Report abuse


			
require 'rubygems'
require 'ferret'
include Ferret::Analysis

CHARACTER_MAPPINGS = {
  ['à','á','â','ã','ä','å','ā','ă']         => 'a',
  'æ'                                       => 'ae',
  ['ď','đ']                                 => 'd',
  ['ç','ć','č','ĉ','ċ']                     => 'c',
  ['è','é','ê','ë','ē','ę','ě','ĕ','ė',]    => 'e',
  ['ƒ']                                     => 'f',
  ['ĝ','ğ','ġ','ģ']                         => 'g',
  ['ĥ','ħ']                                 => 'h',
  ['ì','ì','í','î','ï','ī','ĩ','ĭ']         => 'i',
  ['į','ı','ij','ĵ']                         => 'j',
  ['ķ','ĸ']                                 => 'k',
  ['ł','ľ','ĺ','ļ','ŀ']                     => 'l',
  ['ñ','ń','ň','ņ','ʼn','ŋ']                 => 'n',
  ['ò','ó','ô','õ','ö','ø','ō','ő','ŏ','ŏ'] => 'o',
  'œ'                                       => 'oek',
  'ą'                                       => 'q',
  ['ŕ','ř','ŗ']                             => 'r',
  ['ś','š','ş','ŝ','ș']                     => 's',
  ['ť','ţ','ŧ','ț']                         => 't',
  ['ù','ú','û','ü','ū','ů','ű','ŭ','ũ','ų'] => 'u',
  'ŵ'                                       => 'w',
  ['ý','ÿ','ŷ']                             => 'y',
  ['ž','ż','ź']                             => 'z'
}

module Ferret::Analysis
  class NewAnalyzer
    def token_stream( field, data )
      MappingFilter.new( HyphenFilter.new( 
        LowerCaseFilter.new( StandardTokenizer.new(data) ) ), CHARACTER_MAPPINGS )
    end
  end
end

pfa = PerFieldAnalyzer.new( StandardAnalyzer.new )
pfa[:name] = NewAnalyzer.new

(1..1000).each do |i|
  pfa.token_stream(:name, "this is a query to be analyzed")
  sleep 0.1
end