Pastie now auto-senses if line-wrap is a bad or good idea. Feedback?
## mark a section (Learn more)
from lxml import etree import urllib2 urls = {} ranks=[] f = file("languages.html", "r") parser = etree.HTMLParser() tree = etree.parse(f, parser) languages = tree.findall("//div[@class='all_languages']//a") def norm(name): name=name.replace(' ', '_') name=name.replace('/', '_') return name #Create the urls list for l in languages: name = l.text url = l.get('href') urls[name] = url #Download the files for name in urls: url = urls[name] filename = 'languages/'+norm(name) print filename fo = file(filename, 'w') try: fo.write(urllib2.urlopen("http://www.github.com"+url).read()) except: print "Error getting language",name #Analyize them for name in urls: url = urls[name] filename = 'languages/'+norm(name) fo = file(filename, 'r') try : tree = etree.parse(fo, parser) pop = int(tree.find("//div[@class='pagehead']/h1/em").text.split()[2][1:]) ranks.append((pop, name)) except: pass print ranks
This paste will be private.
From the Design Piracy series on my blog: