Report abuse

require 'rubygems'
require 'scrubyt'
require 'nokogiri'

#scrape the stuff with sRUBYt!
data = Scrubyt::Extractor.define do
  fetch 'http://splatbang.com/rubyquiz/'

  quiz "//div[@id='side']/ol/li/a[1]" do
    link_url do 
      quiz_id /id=(\d+)/
      quiz_link /id=(.+)/ do
        quiz_desc_url lambda {|quiz_dir| "http://splatbang.com/rubyquiz/#{quiz_dir}/quiz.txt"}, :type => :script do
          quiz_dl 'descriptions', :type => :download
        end
        quiz_summary_url lambda {|quiz_dir| "http://splatbang.com/rubyquiz/#{quiz_dir}/summ.txt"}, :type => :script do
          quiz_dl 'summaries', :type => :download
        end        
      end           
    end    
    quiz_detail :resolve => "http://splatbang.com/rubyquiz" do
      solution "/html/body/div/div[2]/ol/li/a" do
        author lambda {|solution_link_text| solution_link_text}, :type => :script
        ruby_talk_reference "href", :type => :attribute
        solution_detail :resolve => :full do
          text "//pre[1]"
        end
      end
    end
  end.select_indices(0..2)
end

#post process with Nokogiri
result = Nokogiri::XML(data.to_xml)

(result/"//quiz").each do |quiz|  
  quiz_id = quiz.text[/\s(\d+)\s/,1].to_i
  file_index = quiz_id > 157 ? "_#{(quiz_id - 157)}" : ""  
  (quiz/"//link_url").first.unlink

  desc = Nokogiri::XML::Element.new("description", quiz.document)
  desc.content =open("descriptions/quiz#{file_index}.txt").read
  quiz.add_child(desc)

  summary = Nokogiri::XML::Element.new("summary", quiz.document)
  summary.content =open("summaries/summ#{file_index}.txt").read
  quiz.add_child(summary)   
end

open("ruby_quiz_archive.xml", "w") {|f| f.write result}