require 'rubygems'
require 'scrubyt'
require 'nokogiri'
data = Scrubyt::Extractor.define do
fetch 'http://splatbang.com/rubyquiz/'
quiz "//div[@id='side']/ol/li/a[1]" do
link_url do
quiz_id /id=(\d+)/
quiz_link /id=(.+)/ do
quiz_desc_url lambda {|quiz_dir| "http://splatbang.com/rubyquiz/#{quiz_dir}/quiz.txt"}, :type => :script do
quiz_dl 'descriptions', :type => :download
end
quiz_summary_url lambda {|quiz_dir| "http://splatbang.com/rubyquiz/#{quiz_dir}/summ.txt"}, :type => :script do
quiz_dl 'summaries', :type => :download
end
end
end
quiz_detail :resolve => "http://splatbang.com/rubyquiz" do
solution "/html/body/div/div[2]/ol/li/a" do
author lambda {|solution_link_text| solution_link_text}, :type => :script
ruby_talk_reference "href", :type => :attribute
solution_detail :resolve => :full do
text "//pre[1]"
end
end
end
end.select_indices(0..2)
end
result = Nokogiri::XML(data.to_xml)
(result/"//quiz").each do |quiz|
quiz_id = quiz.text[/\s(\d+)\s/,1].to_i
file_index = quiz_id > 157 ? "_#{(quiz_id - 157)}" : ""
(quiz/"//link_url").first.unlink
desc = Nokogiri::XML::Element.new("description", quiz.document)
desc.content =open("descriptions/quiz#{file_index}.txt").read
quiz.add_child(desc)
summary = Nokogiri::XML::Element.new("summary", quiz.document)
summary.content =open("summaries/summ#{file_index}.txt").read
quiz.add_child(summary)
end
open("ruby_quiz_archive.xml", "w") {|f| f.write result}