Pastie now auto-senses if line-wrap is a bad or good idea. Feedback?
## mark a section (Learn more)
require 'nokogiri' # Parse doc = Nokogiri::HTML(File.read('test.html')) root = doc.root # Add sections for all headers (1..6).each do |level| # For each header on this level root.css("h#{level}").each do |header| # Get all siblings siblings = header.parent.children # Remove previous siblings siblings_after = [] should_include = false siblings.each do |sibling| if sibling == header should_include = true elsif should_include siblings_after << sibling end end # Remove next siblings that should not be part of this section siblings_in_between = [] siblings_after.each do |sibling| if sibling.name =~ /^h(\d)/ && $1.to_i <= level break else siblings_in_between << sibling end end # Create section section = Nokogiri::XML::Node.new('section', doc) section['id'] = header.content.downcase.gsub(/[^\w\d\-_]+/, '-').sub(/-$/, '') header.add_previous_sibling(section) # Move children into section header.unlink section.add_child(header) siblings_in_between.each do |sibling| sibling.unlink section.add_child(sibling) end end end # Done puts doc.to_s.gsub(' />', '>')
This paste will be private.
From the Design Piracy series on my blog: