# parse_kif.tt
#
# http://treetop.rubyforge.org/
# http://treetop.rubyforge.org/semantic_interpretation.html
# http://treetop.rubyforge.org/syntactic_recognition.html
# http://treetop.rubyforge.org/using_in_ruby.html
# http://treetop.rubyforge.org/pitfalls_and_advanced_techniques.html
#
# http://en.wikipedia.org/wiki/Parsing_expression_grammar
#
# http://logic.stanford.edu/kif/dpans.html#4
#
# Many thanks to hagabaka and db-keen on #treetop !
grammar KifGrammar
# The exp* methods and explanations are courtesy of db-keen on #treetop,
# but any errors are almost undoubtedly my own.
#
# An exp_list is a space-separated list of exp_wraps.
# An exp_wrap is either a parenthesized exp_list or a token.
# The two rules are mutually recursive...
#
rule exp_list
space? exps:(exp_wrap ( space? exp_wrap )*)? space? {
# Let's walk through the expressions method. The first thing it
# does is call super(), which returns the syntax node given by the
# 'exps' label above (because they have the same name).
#
# Then, it fetches the elements of that syntax node, which is either
# nil or an array of syntax nodes: [ exp, ( space exp )* ]
# (If elements returns a nil, we just hand back an empty array.)
#
# It's going to return an array, the first element of which is
# simply that first exp: els[0]
#
# Then it has to deal with els[1], which is: ( space exp )*
# els[1].elements returns an array: [ space exp, space exp, ... ]
#
# So then we map that array to return just the second elements,
# just the exps. So we get: [ exp, exp, exp, ....]
#
# Then we splat it into an array following the first exp.
#
def exps
els = super.elements
return [] unless (els) # '()' -> []
[ els[0], *els[1].elements.map{ |i| i.elements[1] } ]
end
def to_ruby
out = []
exps.each do |e|
tmp = e.to_ruby
out << tmp unless ($rk_strip and
tmp.class == Symbol and
tmp.to_s[0..0] == ';')
end
return out
end
}
end
rule exp_wrap
'(' exp_list ')' {
# The method_missing() and respond_to() defs just pass special
# calls like to_ruby() on to exp_list(). This lets us ignore
# the fact that exp_wrap() exists, from an API perspective.
#
def method_missing(m, *args); exp_list.send(m, *args); end
def respond_to?(m); exp_list.respond_to?(m); end
}
/ token
end
# Noise (ie, '#|...|#' comments)
rule noise
'#|' n_b '|#' { def to_ruby; text_value.to_sym; end }
end
rule n_b
(noise / !'#|' !'|#' .)*
end
# Notes (ie, ";...\n" comments)
rule note
# !"\n" keeps the following '.' from matching a newline.
# So, match a ';', followed by any number of things that
# aren't newlines, followed by either a newline or EOF.
#
# The positive lookahead (&) keeps us from grabbing the
# newline, so juxtaposed comment lines parse OK.
';' contents:(!"\n" .)* &("\n" / !.) {
def to_ruby
(';' + contents.text_value + "\n").to_sym
end
}
end
# Numbers (exponential, floating point, and integer numbers)
rule b_f # base - float
b_i '.' b_i
end
rule b_i # base - integer
'-'? [0-9]+
end
rule n_e
b_f 'e' b_i !nd { def to_ruby; text_value.to_f; end }
/
b_i 'e' b_i !nd { def to_ruby; text_value.to_f; end }
end
rule n_f # number - float
b_f !nd { def to_ruby; text_value.to_f; end }
end
rule n_i # number - integer
b_i !nd { def to_ruby; text_value.to_i; end }
end
rule number
n_e / n_f / n_i
end
# Space (any of blank, formfeed, tab, newline, or return)
rule space
(' ' / "\f" / "\t" / "\n" / "\r")+
end
# Strings
rule string
# !'"' keeps the following '.' from matching a quote.
'"' body:(!'"' . / '\"')* '"' {
def to_ruby; body.text_value; end
}
end
# Token (note, string, number, word)
rule token
noise / note / string / number / word
end
# Word
rule word
wc (es / wc)* { def to_ruby; text_value.to_sym; end }
end
rule dgt
[0-9]
end
rule es # escape sequence
'\\' . { def to_ruby; '\\' + '%03o' % text_value[1]; end }
end
rule etc
[:!%&/<=>@_~] / '*' / '+' / '-' / '?' / '$' / '.'
end
rule ltr # letter
[a-zA-Z]
end
rule nd # non-digit
etc / ltr
end
rule wc # word character
dgt / nd { def to_ruby; text_value; end }
end
end