require 'nokogiri'

# Parse
doc = Nokogiri::HTML(File.read('test.html'))
root = doc.root

# Add sections for all headers
(1..6).each do |level|
# For each header on this level
root.css("h#{level}").each do |header|
# Get all siblings
siblings = header.parent.children

# Remove previous siblings
siblings_after = []
should_include = false
siblings.each do |sibling|
if sibling == header
should_include = true
elsif should_include
siblings_after << sibling
end
end

# Remove next siblings that should not be part of this section
siblings_in_between = []
siblings_after.each do |sibling|
if sibling.name =~ /^h(\d)/ && $1.to_i <= level
break
else
siblings_in_between << sibling
end
end

# Create section
section = Nokogiri::XML::Node.new('section', doc)
section['id'] = header.content.downcase.gsub(/[^\w\d\-_]+/, '-').sub(/-$/, '')
header.add_previous_sibling(section)

# Move children into section
header.unlink
section.add_child(header)
siblings_in_between.each do |sibling|
sibling.unlink
section.add_child(sibling)
end
end
end

# Done
puts doc.to_s.gsub(' />', '>')