Thank you to anyone who has already donated - your generous donations helped make three months of treatment possible.

My brother Nate continues to fight stage IV Hodgkin's lymphoma. He's just 31, with a wife and baby girl. They have no active income (since he's been unable to return to work), no insurance, and cannot afford the treatment he needs. Nate and his family need your help. Please consider a donation, every dollar helps. Thanks.



			
#!/usr/bin/env ruby19
# Ruby Quiz #167
# http://splatbang.com/rubyquiz/quiz.rhtml?id=167_Statistician_I
# http://groups.google.com/group/comp.lang.ruby/browse_frm/thread/bea26955698bce12?hl=en#

require 'stringio'


class Reporter
    def initialize(rules)
        @rules = read_rules(rules)
    end

    # Syntax:
    # * 
    # * [optional group]
    # * \\escape
    def read_rules(rules_file)
        File.readlines(rules_file).map do |l0|
            state = :text
            level = 0
            l = l0.chomp.split(/([<>\[\]\\])/).map do |t|
                next if t.empty?
                case state
                when :escape
                    state = :text
                    Regexp.escape(t)
                when :group_name
                    raise "Syntax error 01: #{t}: #{l0}" if /\W/ =~ t
                    state = :group_name_end
                    t
                when :group_name_end
                    case t
                    when '>'
                        state = :text
                        '>.*?)'
                    else
                        raise "Syntax error 02: #{t}: #{l0}"
                    end
                when :text
                    case t
                    when '['
                        level += 1
                        '(?:'
                    when ']'
                        level -= 1
                        ')?'
                    when '<'
                        state = :group_name
                        '(?<'
                    when '\\'
                        state = :escape
                        nil
                    else
                        Regexp.escape(t)
                    end
                end
            end
            raise "Syntax error 00 #{level}: #{l0}" unless level == 0
            l.unshift('^')
            Regexp.new(l.join)
        end
    end

    def data_reader(io_data)
        Fiber.new do
            until io_data.eof?
                l = io_data.gets.chomp
                d = {:line => l}
                @rules.each_with_index do |r, i|
                    m = r.match(l)
                    if m
                        d[:match]  = m
                        d[:ruleno] = i
                        break
                    end
                end
                Fiber.yield d
            end
        end
    end

    def report(io_data, io_out)
        unmatched = [nil, 'Unmatched input:']
        reader = data_reader(io_data)
        while (d = reader.resume)
            if d[:match]
                io_out.puts %{Rule #{d[:ruleno]}: #{d[:match].captures.compact.join(', ')}}
            else
                unmatched << d[:line]
                io_out.puts '# No Match'
            end
        end
        io_out.puts unmatched.join("\n") if unmatched.size > 2
    end
end


if __FILE__ == $0
    rules, data = ARGV
    case rules
    when nil, '-h', '--help'
        puts "#$0 RULES.TXT [DATA.TXT]"
        exit 1
    end

    if data
        io_data = File.open(data)
        io_out  = StringIO.open
    else
        io_data = STDIN
        io_out  = STDOUT 
    end
    begin
        # IMHO the encoding handling in ruby19 is weird.
        io_data.set_encoding('ascii-8bit') if io_data.external_encoding == Encoding::US_ASCII
        Reporter.new(rules).report(io_data, io_out)
    ensure
        if data
            puts io_out.string
            io_out.close
            io_data.close
        end
    end
end