diff options
-rw-r--r-- | lib/nlp_ruby/grammar.rb | 122 | ||||
-rw-r--r-- | test/hg/hg-toy-with-target.json | 31 |
2 files changed, 153 insertions, 0 deletions
diff --git a/lib/nlp_ruby/grammar.rb b/lib/nlp_ruby/grammar.rb new file mode 100644 index 0000000..7bd8fe6 --- /dev/null +++ b/lib/nlp_ruby/grammar.rb @@ -0,0 +1,122 @@ +module Grammar + +class T + attr_accessor :word + + def initialize word + @word = word + end + + def to_s + "T<#{@word}>" + end +end + +class NT + attr_accessor :symbol, :index, :span + + def initialize symbol, index=0 + @symbol = symbol + @index = index + @span = Span.new + end + + def to_s + "NT(#{@span.left},#{@span.right})<#{@symbol},#{@index}>" + end +end + +class Rule + attr_accessor :lhs, :rhs, :e + + def initialize lhs=nil, rhs=[], e='' + @lhs = lhs + @rhs = rhs + @e = e + end + + def to_s + "#{lhs} -> #{rhs.map{ |i| i.to_s }.join ' '} [arity=#{arity}] ||| #{@e}" + end + + def arity + rhs.select { |i| i.class == NT }.size + end + + def from_s s + _ = splitpipe s, 3 + @lhs = NT.new _[0].strip.gsub!(/(\[|\])/, "") + _[1].split.each { |x| + x.strip! + if x[0]=='[' && x[x.size-1] == ']' + @rhs << NT.new(x.gsub!(/(\[|\])/, "").split(',')[0]) + else + @rhs << T.new(x) + end + } + @e = _[2] + end + + def self.from_s s + r = self.new + r.from_s s + return r + end +end + +class Span + attr_accessor :left, :right + + def initialize left=nil, right=nil + @left = left + @right = right + end +end + +class Grammar + attr_accessor :rules, :startn, :startt, :flat + + def initialize fn + @rules = []; @startn = []; @startt = [] ;@flat = [] + ReadFile.readlines_strip(fn).each_with_index { |s,i| + STDERR.write '.'; STDERR.write " #{i+1}\n" if (i+1)%80==0 + @rules << Rule.from_s(s) + if @rules.last.rhs.first.class == NT + @startn << @rules.last + else + if rules.last.arity == 0 + @flat << @rules.last + else + @startt << @rules.last + end + end + } + STDERR.write "\n" + end + + def to_s + s = '' + @rules.each { |r| s += r.to_s+"\n" } + return s + end + + def add_glue_rules + @rules.map { |r| r.lhs.symbol }.select { |s| s != 'S' }.uniq.each { |symbol| + @rules << Rule.new(NT.new('S'), [NT.new(symbol)]) + @startn << @rules.last + @rules << Rule.new(NT.new('S'), [NT.new('S'), NT.new('X')]) + @startn << @rules.last + } + end + + def add_pass_through_rules s + s.each { |word| + @rules << Rule.new(NT.new('X'), [T.new(word)]) + @flat << @rules.last + } + end +end + + +end # module + diff --git a/test/hg/hg-toy-with-target.json b/test/hg/hg-toy-with-target.json new file mode 100644 index 0000000..1fb4b3d --- /dev/null +++ b/test/hg/hg-toy-with-target.json @@ -0,0 +1,31 @@ +{ +"weights":{ +"logp":2.0, "use_house":0.0, "use_shell":1.0 +}, +"nodes": +[ +{ "label":"root", "cat":"root" }, +{ "label":"0", "cat":"NP" }, +{ "label":"1", "cat":"V" }, +{ "label":"2", "cat":"JJ" }, +{ "label":"3", "cat":"NN" }, +{ "label":"4", "cat":"NP" }, +{ "label":"5", "cat":"VP" }, +{ "label":"6", "cat":"S" }, +{ "label":"7", "cat":"Goal" } +], +"edges": +[ +{"head":"0", "trule":"[NP] ||| ich ||| i ||| logp=-0.5 use_i=1.0", "left":0, "right":1, "tails":[ "root" ], "f":{"logp":-0.5, "use_i":1.0}, "weight":0.367879441171 }, +{"head":"1", "trule":"[V] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0", "left":1, "right":2, "tails":[ "root" ], "f":{"logp":-0.25, "use_saw":1.0}, "weight":0.606530659713 }, +{"head":"2", "trule":"[JJ] ||| kleines ||| small ||| logp=0.0 use_small=1.0", "left":3, "right":4, "tails":[ "root" ], "f":{"logp":0.0, "use_small":1.0}, "weight":1.0 }, +{"head":"2", "trule":"[JJ] ||| kleines ||| little ||| logp=0.0 use_little=1.0", "left":3, "right":4, "tails":[ "root" ], "f":{"logp":0.0, "use_little":1.0}, "weight":1.0 }, +{"head":"3", "trule":"[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0.0 use_house=1.0", "left":3, "right":5, "tails":[ "2" ], "f":{"logp":0.0, "use_house":1.0}, "weight":1.0 }, +{"head":"3", "trule":"[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0.0 use_shell=1.0", "left":3, "right":5, "tails":[ "2" ], "f":{"logp":0.0, "use_shell":1.0}, "weight":2.71828182846 }, +{"head":"4", "trule":"[NP] ||| ein [NN,1] ||| a [1] ||| logp=0.0 use_a=1.0", "left":2, "right":5, "tails":[ "3" ], "f":{"logp":0.0, "use_a":1.0}, "weight":1.0 }, +{"head":"5", "trule":"[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0.0", "left":1, "right":5, "tails":[ "1","4" ], "f":{"logp":0.0}, "weight":1.0 }, +{"head":"6", "trule":"[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0.0", "left":0, "right":5, "tails":[ "0","5" ], "f":{"logp":0.0}, "weight":1.0 }, +{"head":"7", "trule":"[Goal] ||| [S,1] ||| [1] ||| ", "left":0, "right":5, "tails":[ "6" ], "f":{}, "weight":1.0 } +] +} + |