# Erik Kastner 2008-02-12 Script to get our lost comments from google's cache
require 'rubygems'
require 'db'
require 'open-uri'
require 'hpricot'
# this can be replaced with a call to Mysql.new(host, user, pass, db) - and require 'mysql'
db = DB.new(:db => "free")
# prepare the database queries - this makes it faster to call them later
fetch_id = db.prepare("SELECT ID from wp_posts WHERE guid = ?")
insert_comment = db.prepare("INSERT INTO wp_comments (comment_post_ID, comment_author, comment_date, comment_content, comment_approved, comment_agent) VALUES (?, ?, ?, ?, '1', 'ruby script from erik')")
# an array of free / google-cache addresses
urls = [
["http://free.winelibrary.com/2008/01/25/leocadie-fernand-averoux-2006/", "http://64.233.169.104/search?q=cache:R5nLZeQOqw0J:free.winelibrary.com/2008/01/25/leocadie-fernand-averoux-2006/+site:free.winelibrary.com&hl=en&ct=clnk&cd=3&gl=us&client=safari"],
["http://free.winelibrary.com/2008/01/31/legras-st-vincent-1990/", "http://64.233.169.104/search?q=cache:p-8QDChsJLcJ:free.winelibrary.com/2008/01/31/legras-st-vincent-1990/+site:free.winelibrary.com&hl=en&ct=clnk&cd=4&gl=us&client=safari"],
["http://free.winelibrary.com/2008/01/24/calvet-thunevin-les-dentelles-2004/", "http://64.233.169.104/search?q=cache:PeTf8KOjS-oJ:free.winelibrary.com/2008/01/24/calvet-thunevin-les-dentelles-2004/+site:free.winelibrary.com&hl=en&ct=clnk&cd=5&gl=us&client=safari"],
["http://free.winelibrary.com/2008/02/04/chateau-henye-tokaji-aszueszencia-2000/", "http://64.233.169.104/search?q=cache:r_sJIN-PwEkJ:free.winelibrary.com/2008/02/04/chateau-henye-tokaji-aszueszencia-2000/+site:free.winelibrary.com&hl=en&ct=clnk&cd=6&gl=us&client=safari"],
["http://free.winelibrary.com/2007/03/08/vinoce-mt-veeder-red-2003/", "http://64.233.169.104/search?q=cache:pDBHCk9jjZ4J:free.winelibrary.com/2007/03/08/vinoce-mt-veeder-red-2003/+site:free.winelibrary.com&hl=en&ct=clnk&cd=7&gl=us&client=safari"],
["http://free.winelibrary.com/2008/02/11/mcprice-myers-grenache-lange-2005/", "http://64.233.169.104/search?q=cache:aBqHslC7IbsJ:free.winelibrary.com/2008/02/11/mcprice-myers-grenache-lange-2005/+site:free.winelibrary.com&hl=en&ct=clnk&cd=8&gl=us&client=safari"],
["http://free.winelibrary.com/2008/02/01/black-pig-ribera-del-duero-2006/", "http://64.233.169.104/search?q=cache:fH0EpNkaSlEJ:free.winelibrary.com/2008/02/01/black-pig-ribera-del-duero-2006/+site:free.winelibrary.com&hl=en&ct=clnk&cd=9&gl=us&client=safari"],
["http://free.winelibrary.com/2007/04/13/parparoussis-the-gift-of-dionysos-sideritis-2005/", "http://64.233.169.104/search?q=cache:Xu_iIowmPDUJ:free.winelibrary.com/2007/04/13/parparoussis-the-gift-of-dionysos-sideritis-2005/+site:free.winelibrary.com&hl=en&ct=clnk&cd=10&gl=us&client=safari"],
["http://free.winelibrary.com/2008/01/30/karydas-naoussa-2003-2/", "http://64.233.169.104/search?q=cache:VY8kMw8WWRQJ:free.winelibrary.com/2008/01/30/karydas-naoussa-2003-2/+site:free.winelibrary.com&hl=en&ct=clnk&cd=11&gl=us&client=safari"],
["http://free.winelibrary.com/2008/02/05/rhone-rebel-gsm-2005/", "http://64.233.169.104/search?q=cache:yQiLGGbyHoEJ:free.winelibrary.com/2008/02/05/rhone-rebel-gsm-2005/+site:free.winelibrary.com&hl=en&ct=clnk&cd=12&gl=us&client=safari"],
["http://free.winelibrary.com/2008/02/06/audelssa-tempest-2004/", "http://64.233.169.104/search?q=cache:B2-8Z8wU3mkJ:free.winelibrary.com/2008/02/06/audelssa-tempest-2004/+site:free.winelibrary.com&hl=en&ct=clnk&cd=13&gl=us&client=safari"]
]
# loop through each pair of addresses
urls.each do |pair|
# get the wp_id from the guid (first address)
fetch_id.execute(pair[0])
unless (id = fetch_id.fetch[0])
puts "Skipping #{pair[0]}, no id found in the db"
next
end
# parse the google cache
p = Hpricot(open(pair[1]))
# loop through each li under .commentlist
(p / ".commentlist li").each do |comment|
# get the important information
who = comment.at("cite").innerHTML
body = (comment / "p").map { |p| p.innerHTML }.join("\n")
date = Time.parse(comment.at(".commentmetadata").innerHTML)
# insert into the db
insert_comment.execute(id, who, date, body)
end
end