Report abuse


			
require 'rubygems'
require 'ferret'

class PortugueseAnalyzer < Ferret::Analysis::Analyzer
  include Ferret::Analysis
  MAPPING = {
    ['à','á','â','ã','ä','å','ā','ă']         => 'a',
    'æ'                                       => 'ae',
    ['ď','đ']                                 => 'd',
    ['ç','ć','č','ĉ','ċ']                     => 'c',
    ['è','é','ê','ë','ē','ę','ě','ĕ','ė',]    => 'e',
    ['ƒ']                                     => 'f',
    ['ĝ','ğ','ġ','ģ']                         => 'g',
    ['ĥ','ħ']                                 => 'h',
    ['ì','ì','í','î','ï','ī','ĩ','ĭ']         => 'i',
    ['į','ı','ij','ĵ']                         => 'j',
    ['ķ','ĸ']                                 => 'k',
    ['ł','ľ','ĺ','ļ','ŀ']                     => 'l',
    ['ñ','ń','ň','ņ','ʼn','ŋ']                 => 'n',
    ['ò','ó','ô','õ','ö','ø','ō','ő','ŏ','ŏ'] => 'o',
    ['œ']                                     => 'oek',
    ['ą']                                     => 'q',
    ['ŕ','ř','ŗ']                             => 'r',
    ['ś','š','ş','ŝ','ș']                     => 's',
    ['ť','ţ','ŧ','ț']                         => 't',
    ['ù','ú','û','ü','ū','ů','ű','ŭ','ũ','ų'] => 'u',
    ['ŵ']                                     => 'w',
    ['ý','ÿ','ŷ']                             => 'y',
    ['ž','ż','ź']                             => 'z'
  }
  def token_stream(field, string)
    return MappingFilter.new(StandardTokenizer.new(string), MAPPING)
  end
end

a = PortugueseAnalyzer.new
ts = a.token_stream(nil, 'prejuízo')
token = nil
puts "#{token}\n" while token = ts.next