Report abuse


			
# == Schema Information
# Schema version: 42
#
# Table name: referers
#
#  id             :integer(11)   not null, primary key
#  created_at     :datetime      
#  host           :string(255)   
#  page           :string(255)   
#  domain_id      :integer(11)   
#  raw_hits_count :integer(11)   
#

class Referer < ActiveRecord::Base
  belongs_to :domain
  has_many :stats

  GOOGLE_TLD = %w{ ae as at az be bg bi by ca cd cg ch ci cl co.cr co.hu co.id co.il co.in co.jp co.kr co.ls co.nz co.th co.uk co.ve co.yu com com.ar com.au com.br com.co com.cu com.do com.eg com.ec com.fj com.gi com.gr com.hk com.ly com.mt com.mx com.my com.na com.nf com.ni com.np com.pa com.pe com.ph com.pk com.pr com.py com.sa com.sg com.sv com.tr com.tw com.ua com.uy com.vc com.vn de dj dk ee es fi fm fr ge gg gl gm hn ie it je kz li lt lu lv ms mu mw nl no pl pn pt ro ru rw se sh sk sm td tt uz vg }
  YAHOO_TLD  = %w{ com com.au co.uk }
  MSN_TLD    = %w{ ca com es com.au }

  def before_create
    self.page = page.gsub(' ', '%20').gsub(/\%\d*$/, '')
    self.host = URI.parse(page).host
  end

  def before_save
    self.raw_hits_count = raw_hits_count.to_i + 1
  end

  def search_result
    @search_result ||= Referer.parse_terms(page)
  end

private

  def self.engines
    @engines ||= [

      # IMAGE SEARCH
      { :query => 'prev',  :name => 'google (image)', :path => 'imgres', :uri => %r{^images\.google\.(#{GOOGLE_TLD.join('|')})$} },
      { :query => 'prev',  :name => 'google (image)', :path => 'imgres', :uri => %r{^www\.google\.(#{GOOGLE_TLD.join('|')})$} },
      { :query => 'query', :name => 'aol (image)',    :path => 'aolcom/imageDetails', :uri => %r{aolsearcht\d\.search\.aol\.com$} },
      { :query => 'back',  :name => 'yahoo (imaage)', :path => 'search/images/view',  :uri => "images.search.yahoo.com" },
      { :query => 'back',  :name => 'yahoo (imaage)', :path => 'search/images/view',  :uri => 'uk.search.yahoo.com' },

      # REGULAR SEARCH
      { :query => 'q', :name => 'google',  :path => 'search', :uri => %r{^www\.google\.com$} },
      { :query => 'q', :name => 'google',  :path => 'search', :uri => %r{^www\.google\.(#{GOOGLE_TLD.join('|')})$} },

      { :query => 'w', :name => 'seznam',  :path => 'searchGoogleScreen', :uri => "search1.seznam.cz" },

      { :query => 'query', :name => 'aol', :path => 'web',    :uri => %r{^aolsearch\.aol\.co\.uk$} },
      { :query => 'query', :name => 'aol', :path => 'aol/search', :uri => %r{aolsearch\.aol\.com$} },

      { :query => 'p', :name => 'yahoo',   :path => 'search', :uri => %r{yahoo\.(#{YAHOO_TLD.join('|')})$} },

      { :query => 'q', :name => 'msn',     :path => %r{\/(previewx|(sp)?results).aspx}, :uri => %r{^search(\.latam)?\.(nine|sympatico\.)?msn\.(#{MSN_TLD.join('|')})$} },
      { :query => 'q', :name => 'live',    :path => %r{\/(previewx|(sp)?results).aspx}, :uri => %r{^search\.live\.com$} },

      { :query => 'q', :name => 'comcast', :path => "", :uri => 'search.comcast.net' },

      { :query => 'q', :name => 'blingo',  :path => 'search', :uri => 'blingo.com' },
      { :query => 'kw', :name => 'voila',  :path => 'S/voila',  :uri => 'search.ke.voila.fr' },

      { :query => 'query', :name => 'lycos', :path => 'cgi-bin/pursuit', :uri => 'suche.lycos.de' },
      { :query => 'q', :name => 'picsearch', :path => 'info.cgi',        :uri => 'www.picsearch.com' },
      { :query => 'q', :name => 'altavsita', :path => 'image/results',   :uri => 'www.altavista.com' },
      { :query => 'q', :name => 'myway',     :path => 'search/GGmain.jhtml', :uri => 'search.myway.com' },
      { :query => 'q', :name => 'bbc',       :path => 'cgi-bin/search/results.pl', :uri => 'search.bbc.co.uk' },

      ## MAIL ##
      { :query => 'auth',  :name => 'gmail    (email)', :path => 'mail/', :uri => %r{^mail.google.com$} },
      { :query => 'len',   :name => 'hotmail  (email)', :path => 'cgi-bin/getmsg', :uri => %r{bay\d+\.hotmail\.msn\.com$} },
      { :query => 'hash',  :name => 'ms-live  (email)', :path => 'mail/ApplicationMain_11.10.0000.0095.aspx', :uri => %r{bay\d+\.mail\.live\.com$} },
      { :query => 'MsgId', :name => 'yahoo!   (email)', :path => %r{ym\/(\d\.com\/)?ShowLetter}, :uri => %r{\d+\.mail\.yahoo\.com$} },
      { :query => 'MLS',   :name => 'fastmail (email)', :path => 'mail', :uri => 'www\.fastmail\.fm' },
      { :query => 'msgId', :name => 'bigpond  (email)', :path => 'webedge/do/mail/message/view', :uri => 'webedge\.bigpond\.com' },

      { :query => 'q', :name => 'mywestnet', :path => 'search.aspx', :uri => 'search.mywestnet.com.au' }

#      { :query => '', :name => '', :path => '', :uri => %r{} },
    ]
  end

  def self.parse_terms(ref)
    return "blank" if ref.blank?
    uri = URI.parse(ref.gsub(' ', "%20"))
    engine = self.engines.select { |e| 
      e[:uri].is_a?(String) ? uri.host == e[:uri] : uri.host.match(e[:uri]) 
    }
    return "nil path" if uri.path.nil?
    for e in engine
      next if e.nil?
      next if e[:path].is_a?Regexp and !uri.path =~ e[:path]
      next if e[:path].is_a?String and uri.path != "/#{e[:path]}"
      return self.find_terms_from_uri( uri.query, e[:query], e[:name] )
    end
    return nil
    rescue URI::InvalidURIError
    # do nothing :( bad URI.
  end

  def self.find_terms_from_uri( query, key, engine )
    if result = self.parameterize( query )[ key ]
      CGI.unescape( result.split(/\+|\%2C/).join(" ") )
      # .gsub("%20", " ").gsub("%27", "'").gsub("%22", '"').gsub("%2B", " +") # was: Search.new(:term => URI.decode(e), :engine => engine) } rescue nil
    else
      "??" # Couldn't determine search terms"
    end
  end

  def self.parameterize(query)
    query.split("&").inject({}) { |s,e| 
      a = e.split("=")
      s.merge({ a[0] => (a[1] || nil) })
    }
  end

end