Pastie now auto-senses if line-wrap is a bad or good idea. Feedback?
## mark a section (Learn more)
require 'rubygems' require 'grit' $KCODE = 'u' RAILS_DIR = '/Users/fxn/prj/rails' $changelogs = Marshal.load(File.open('changelogs').read) rescue {} END { File.open('changelogs', 'wb') {|f| Marshal.dump($changelogs, f)} } # Simple trick to be able to post this script with readable addresses. def email(user, domain) user + '@' + domain end # Some people appear in Rails logs under different names, there are nicks, # typos, email addresses, shortenings, etc. This is a hand-made list to map # them in order to be able to aggregate commits from the same real author. SEEN_IN_LOG_ALSO_AS = { 'Aliaksey Kandratsenka' => 'Aleksey Kondratenko', 'Andrew Kaspick' => [email('akaspick', 'gmail.com'), 'akaspick'], 'Anthony Eden' => 'aeden', 'Austin Ziegler' => 'Thanks to Austin Ziegler for Transaction::Simple', 'Blaine' => ['blaine', email('blaine', 'odeo.com')], 'Bob Silva' => 'BobSilva', 'Brad Greenlee' => 'bgreenlee', 'Cheah Chu Yeow' => ['Chu Yeow', 'chuyeow'], 'court3nay' => ['courtenay', email('court3nay', 'gmail.com')], 'Chris McGrath' => [email('c.r.mcgrath', 'gmail.com'), 'c.r.mcgrath', email('chris', 'octopod.info'), 'octopod'], 'Chris Roos' => 'chrisroos', 'Damian Janowski' => 'djanowski', 'Dan Manges' => 'dcmanges', 'Daniel Morrison' => 'danielmorrison', 'Daniel Von Fange' => ['Suggested by Daniel Von Fange', 'Spotted by Daniel Von Fange'], 'Dave Thomas' => [email('dave', 'pragprog.com'), 'After much pestering from Dave Thomas'], 'David Heinemeier Hansson' => 'DHH', 'Don Park' => email('don.park', 'gmail.com'), 'Dreamer3' => email('dreamer3', 'gmail.com'), 'Duncan Beevers' => 'duncanbeevers', 'Eloy Duran' => 'alloy', 'Emilio Tagua' => 'miloops', 'Ernesto Jimenez' => 'ernesto.jimenez', 'Frederick Cheung' => ['fcheung', 'Fred Cheung', 'frederick.cheung', email('frederick.cheung', 'gmail.com')], 'Geoff Buesing' => ['gbuesing', 'Geoffrey Buesing'], 'Geoff Garside' => 'ggarside', 'Grant Hollingworth' => email('grant', 'antiflux.org'), 'Hampton Catlin' => email('hcatlin', 'gmail.com'), 'Hongli Lai (Phusion)' => ['Hongli Lai (Phusion', 'FooBarWidget'], 'Ian White' => email('ian.w.white', 'gmail.com'), 'Isaac Feliu' => 'isaacfeliu', 'Jack Danger Canty' => %w(danger Danger), 'Jarkko Laine' => [email('jarkko', 'jlaine.net'), 'Jarkko', 'jarkko'], 'James Adam' => 'lazyatom', 'James Mead' => 'floehopper', 'Jeremy Evans' => [email('jeremyevans0', 'gmail.com'), 'jeremyevans'], 'Jeremy Kemper' => 'bitsweat', 'Jeremy McAnally' => ['jeremymcnally', 'jeremymcanally'], 'John Barnette' => 'jbarnette', 'Jonathan del Strother' => ['Catfish', 'catfish'], 'Jonathan Weiss' => 'jweiss', 'Jordi Bunster' => 'jordi', 'José Valim' => 'josevalim', 'Josh Knowles' => email('joshknowles', 'gmail.com'), 'Josh Peek' => ['josh', 'Josh', 'Joshua Peek', 'joshpeek', email('josh', 'joshpeek.com')], 'Josh Starcher' => email('josh.starcher', 'gmail.com'), 'Josh Susser' => ['hasmanyjosh', email('josh', 'hasmanythrough.com')], 'Juanjo Bazan' => 'juanjo.bazan', 'Justin French' => 'justinfrench', 'Kamal Fariz Mahyuddin' => 'kamal', 'Ken Barker' => email('ken.barker', 'gmail.com'), 'Ken Miller' => email('kenneth.miller', 'bitfield.net'), 'Kevin Clark' => "Kevin Clark #{email('kevin.clark', 'gmail.com')}", 'Luke Redpath' => email('contact', 'lukeredpath.co.uk'), 'Manfred Stienstra' => [email('m.stienstra', 'fngtps.com'), 'manfred'], 'Marcel Molina' => ['Marcel Molina Jr.', 'Marcel'], 'Mark Somerville' => 'Spakman', 'Michael Klishin' => 'Michael S. Klishin', 'Michael Koziarski' => %w(Koz nzkoz), 'Michael Schoen' => 'Michael A. Schoen', 'Michael Schubert' => [email('michael', 'schubert'), email('michael', 'schubert.cx')], 'Michael Schuerig' => [email('michael', 'schuerig.de'), 'Michael Shuerig'], 'Mike Naberezny' => 'mnaberez', 'Mikel Lindsaar' => ['mikel', 'raasdnil'], 'Mislav Marohnić' => ['mislav', 'mislaw', email('mislav', 'nippur.irb.hr')], 'Nathan Weizenbaum' => 'Nex3', 'Nick Sieger' => ['nicksieger', email('nicksieger', 'gmail.com')], 'Nik Wakelin' => 'nik.wakelin', 'Obie Fernandez' => 'ObieFernandez', 'Philip Hallstrom' => 'phallstrom', 'Pratik Naik' => %w(Pratik lifofifo lifo), 'Rick Olson' => ['rick', 'Rick', 'Rick Olsen', email('technoweenie', 'gmail.com')], 'Rob Biedenharn' => ['rabiedenharn', email('Rob', 'AgileConsultingLLC.com')], 'Robby Russell' => 'robbyrussell', 'Ryan Bates' => 'ryanb', 'Ryan Davis' => 'zenspider', 'Ryan Tomayko' => email('rtomayko', 'gmail.com'), 'Sam Granieri' => 'sjgman9', 'Sebastian Kanthak' => [email('sebastian.kanthak', 'muehlheim.de'), 'sebastian.kanthak', 'skanthak'], 'Seth Rasmussen' => 'loincloth', 'Stefan Kaes' => [email('skaes', 'web.de'), 'skaes', 'Stephan Kaes', 'Skaes', 'skaes.web.de'], 'Steve Purcell' => email('stephen_purcell', 'yahoo.com'), 'Tarmo Tänav' => ['tarmo', 'tarmo_t', 'Tarmo Täna'], 'Thijs van der Vossen' => ['thijsv', email('thijs', 'vandervossen.net'), email('thijs', 'fngtps.com')], 'Tim Pope' => 'tpope', 'Tobias Lütke' => ['Tobias Luetke', 'TobiasLuetke'], 'Tom Ward' => ['Tom ward', 'tomafro'], 'Will Bryant' => 'will.bryant', 'Xavier Noria' => 'fxn', 'Zach Dennis' => 'zdennis', } # Reverse SEEN_IN_LOG_ALSO_AS as NAME_NORMALIZER. NAME_NORMALIZER = {} SEEN_IN_LOG_ALSO_AS.each do |name, also_as| [*also_as].each { |alt| NAME_NORMALIZER[alt] = name } end def normalize_name(name) name = name.sub(/\s*<[^>]+>/, '') # remove any email address in angles name.strip! NAME_NORMALIZER[name] || name end def commit_from_svn?(commit) commit.message.include?('git-svn-id:') end # When Rails had a svn repo there was a convention for authors: the committer # put their name between brackets at the end of the commit or changelog message. # For example: # # Fix case-sensitive validates_uniqueness_of. Closes #11366 [miloops] # # Of course this is not robust, but it is the best we can get. def extract_svn_authors_from_message(message) svn_authors = [] if message =~ /\[([^\]]+)\]\s*$/ # end-of-line anchor on purpose # [Adam Milligan, Pratik] # [Rick Olson/Nicholas Seckar] # [Kevin Clark & Jeremy Hopple] $1.split(%r{\s*[,/&]\s*}).each do |c| if looks_like_an_author_name(c) svn_authors << c end end end svn_authors end # Returns a list of authors extracted from the corresponding svn commit. def extract_svn_authors(commit) svn_authors = extract_svn_authors_from_message(commit.message) if svn_authors.empty? svn_authors = extract_svn_authors_diffing(commit) end svn_authors = [commit.author.name] if svn_authors.empty? svn_authors.map {|a| normalize_name(a)}.uniq end # Searches for author names in CHANGELOGs. def extract_svn_authors_diffing(commit) unless $changelogs.has_key?(commit.id) $changelogs[commit.id] = [] Dir.chdir(RAILS_DIR) do in_changelog = false git_show(commit).each do |line| if line =~ /^diff --git/ in_changelog = false next end if line =~ /^\+\+\+.*changelog$/i in_changelog = true next end if in_changelog && line =~ /^\+\s*\*/ $changelogs[commit.id] << line end end end end $changelogs[commit.id].map {|line| extract_svn_authors_from_message(line)}.flatten end def git_show(commit) `git show #{commit.id}` end # Author name extraction in svn commits returns a few strings we just ignore. def looks_like_an_author_name(str) str !~ /\A\d+\z/ && # Remove side effects of [5684] str !~ /\A\s*\z/ && str != 'See rails ML' && str != 'subject "Text::Format Licence Exception" on Oct 15' && str !~ /RubyConf/ && # example: RubyConf '05 str !~ /^Includes duplicates of changes/ # example: Includes duplicates of changes from 1.1.4 - 1.2.3 end def with_munged_email_addresses(name) name.gsub(/([\w.]+)@([\w.]+)/, "\\1 ~ at ~ \\2") end def print_authors(authors) authors.sort {|a,b| b[1] <=> a[1]}.each do |name, count| puts "%4d %s" % [count, with_munged_email_addresses(name)] end end def process_commits! authors = Hash.new(0) repo = Grit::Repo.new(RAILS_DIR) i = 0 page_size = 100 loop do commits = repo.commits('master', page_size, i) break if commits.empty? commits.each do |commit| if commit_from_svn?(commit) extract_svn_authors(commit).each {|a| authors[normalize_name(a)] += 1} else author_name = commit.author.name if looks_like_an_author_name(author_name) authors[normalize_name(author_name)] += 1 end end i += 1 end end print_authors(authors) end process_commits!
This paste will be private.
From the Design Piracy series on my blog: