require'nokogiri'# Parse
doc =Nokogiri::HTML(File.read('test.html'))
root = doc.root
# Add sections for all headers
(1..6).each do |level|# For each header on this level
root.css("h#{level}").each do |header|# Get all siblings
siblings = header.parent.children
# Remove previous siblings
siblings_after =[]
should_include =false
siblings.each do |sibling|if sibling == header
should_include =trueelsif should_include
siblings_after << sibling
endend# Remove next siblings that should not be part of this section
siblings_in_between =[]
siblings_after.each do |sibling|if sibling.name =~/^h(\d)/&&$1.to_i <= level
breakelse
siblings_in_between << sibling
endend# Create section
section =Nokogiri::XML::Node.new('section', doc)
section['id']= header.content.downcase.gsub(/[^\w\d\-_]+/,'-').sub(/-$/,'')
header.add_previous_sibling(section)# Move children into section
header.unlink
section.add_child(header)
siblings_in_between.each do |sibling|
sibling.unlink
section.add_child(sibling)endendend# Done
puts doc.to_s.gsub(' />','>')