diff options
-rw-r--r-- | README.md | 5 | ||||
-rw-r--r-- | animate.rb | 17 | ||||
-rw-r--r-- | example/glue (renamed from example/grammar.x) | 0 | ||||
-rw-r--r-- | example/grammar-test | 22 | ||||
-rw-r--r-- | grammar.rb | 111 | ||||
-rw-r--r-- | hg.rb | 0 | ||||
-rw-r--r-- | parse.rb | 47 |
7 files changed, 46 insertions, 156 deletions
@@ -4,3 +4,8 @@ nothing to see here helpful stuff * https://github.com/jweese/thrax/wiki/Glue-grammar +todo + * animate? + * to json + * integrate with HG + diff --git a/animate.rb b/animate.rb deleted file mode 100644 index 558146f..0000000 --- a/animate.rb +++ /dev/null @@ -1,17 +0,0 @@ -s = 'ich sah ein kleines haus .' -def visit n, depth, skip=0 # FIXME - (depth-skip).times { |i| - i += skip - 0.upto(n-(i+1)) { |j| - yield j, j+i+1 - } - } -end - -0,5 - - -1 2 -0,1 1,2 2,3 3,4 4,5 -ich sah ein kleines haus - diff --git a/example/grammar.x b/example/glue index 7ff74b5..7ff74b5 100644 --- a/example/grammar.x +++ b/example/glue diff --git a/example/grammar-test b/example/grammar-test new file mode 100644 index 0000000..f49b999 --- /dev/null +++ b/example/grammar-test @@ -0,0 +1,22 @@ +[S] ||| [B,1] ||| [1] ||| logp=0 +[S] ||| ich [V,1] ein [JJ,2] haus ||| i [1] a [2] ||| +[S] ||| ich sah ein kleines haus ||| i saw a small house ||| +[S] ||| ich sah ein [JJ,1] ||| i saw a [1] ||| +[B] ||| [C,1] ||| [1] ||| logp=0 +[C] ||| [Q,1] ||| [1] ||| logp=0 +[Q] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 +[NP] ||| ich ||| i ||| logp=-0.5 use_i=1.0 +[NP] ||| ein [NN,1] ||| a [1] ||| logp=0 use_a=1.0 +[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 +[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 +[JJ] ||| [H,1] ||| [1] ||| +[H] ||| kleines [Z,1] ||| small [1] ||| +[Z] ||| [I,1] ||| [1] ||| +[I] ||| haus ||| house ||| +[JJ] ||| kleines ||| small ||| logp=0 use_small=1.0 +[JJ] ||| kleines ||| little ||| logp=0 use_little=1.0 +[JJ] ||| grosses ||| big ||| logp=0 +[JJ] ||| grosses ||| large ||| logp=0 +[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 +[V] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0 +[V] ||| fand ||| found ||| logp=0 diff --git a/grammar.rb b/grammar.rb deleted file mode 100644 index a674a7b..0000000 --- a/grammar.rb +++ /dev/null @@ -1,111 +0,0 @@ -require 'nlp_ruby' - - -class T - attr_accessor :word - - def initialize word - @word = word - end - - def to_s - "T<#{@word}>" - end -end - -class NT - attr_accessor :symbol, :index, :span - - def initialize symbol, index=0 - @symbol = symbol - @index = index - @span = Span.new - end - - def to_s - "NT(#{@span.left},#{@span.right})<#{@symbol},#{@index}>" - end -end - - - -class Rule - attr_accessor :lhs, :rhs - - def initialize lhs=nil, rhs=[] - @lhs = lhs - @rhs = rhs - end - - def to_s - "#{lhs} -> #{rhs.map{ |i| i.to_s }.join ' '} [arity=#{arity}]" - end - - def arity - rhs.select { |i| i.class == NT }.size - end - - def from_s s - _ = splitpipe s, 3 - @lhs = NT.new _[0].strip.gsub!(/(\[|\])/, "") - _[1].split.each { |x| - x.strip! - if x[0]=='[' && x[x.size-1] == ']' - @rhs << NT.new(x.gsub!(/(\[|\])/, "").split(',')[0]) - else - @rhs << T.new(x) - end - } - end - - def self.from_s s - r = self.new - r.from_s s - return r - end -end - -class Grammar - attr_accessor :rules, :startn, :startt, :flat - - def initialize fn - @rules = []; @startn = []; @startt = [] ;@flat = [] - ReadFile.readlines_strip(fn).each_with_index { |s,i| - STDERR.write '.'; STDERR.write " #{i+1}\n" if (i+1)%80==0 - @rules << Rule.from_s(s) - if @rules.last.rhs.first.class == NT - @startn << @rules.last - else - if rules.last.arity == 0 - @flat << @rules.last - else - @startt << @rules.last - end - end - } - STDERR.write "\n" - end - - def to_s - s = '' - @rules.each { |r| s += r.to_s+"\n" } - return s - end - - def add_glue_rules - @rules.map { |r| r.lhs.symbol }.select { |s| s != 'S' }.uniq.each { |symbol| - @rules << Rule.new(NT.new('S'), [NT.new(symbol)]) - @startn << @rules.last - @rules << Rule.new(NT.new('S'), [NT.new('S'), NT.new('X')]) - @startn << @rules.last - } - end - - def add_pass_through_rules s - s.each { |word| - @rules << Rule.new(NT.new('X'), [T.new(word)]) - @flat << @rules.last - } - end -end - @@ -1,6 +1,6 @@ #!/usr/bin/env ruby -require_relative './grammar.rb' +require 'nlp_ruby' class Chart @@ -29,36 +29,28 @@ class Chart end end -class Span - attr_accessor :left, :right - - def initialize left=nil, right=nil - @left = left - @right = right - end -end - -class Item < Rule - attr_accessor :lhs, :rhs, :dot +class Item < Grammar::Rule + attr_accessor :lhs, :rhs, :dot, :e def initialize rule_or_item, left, right, dot - @lhs = NT.new rule_or_item.lhs.symbol - @lhs.span = Span.new left, right + @lhs = Grammar::NT.new rule_or_item.lhs.symbol + @lhs.span = Grammar::Span.new left, right @rhs = [] rule_or_item.rhs.each { |x| - if x.class == T - @rhs << T.new(x.word) + if x.class == Grammar::T + @rhs << Grammar::T.new(x.word) end - if x.class == NT - @rhs << NT.new(x.symbol) - @rhs.last.span = Span.new x.span.left, x.span.right + if x.class == Grammar::NT + @rhs << Grammar::NT.new(x.symbol) + @rhs.last.span = Grammar::Span.new x.span.left, x.span.right end } @dot = dot + @e = rule_or_item.e end def to_s - "#{lhs} -> #{rhs.map{|i|i.to_s}.insert(@dot,'*').join ' '} [dot@#{@dot}] [arity=#{arity}] (#{@lhs.span.left}, #{@lhs.span.right})" + "#{lhs} -> #{rhs.map{|i|i.to_s}.insert(@dot,'*').join ' '} [dot@#{@dot}] [arity=#{arity}] (#{@lhs.span.left}, #{@lhs.span.right}) ||| #{@e}" end end @@ -73,7 +65,7 @@ def init input, n, active_chart, passive_chart, grammar end def scan item, input, limit, passive_chart - while item.rhs[item.dot].class == T + while item.rhs[item.dot].class == Grammar::T return false if item.lhs.span.right==limit if item.rhs[item.dot].word == input[item.lhs.span.right] item.dot += 1 @@ -120,7 +112,7 @@ def parse input, n, active_chart, passive_chart, grammar if passive_chart.has active_item.rhs[active_item.dot].symbol, k, l if k == active_item.lhs.span.right new_item = Item.new active_item, active_item.lhs.span.left, l, active_item.dot+1 - new_item.rhs[new_item.dot-1].span = Span.new k, l + new_item.rhs[new_item.dot-1].span = Grammar::Span.new k, l if scan new_item, input, j, passive_chart if new_item.dot == new_item.rhs.size if new_item.lhs.span.left == i && new_item.lhs.span.right == j @@ -145,13 +137,12 @@ def parse input, n, active_chart, passive_chart, grammar # 'self-filling' step new_symbols.each { |s| - puts new_symbols.to_s if i==2&&j==5 remaining_items.each { |active_item| next if active_item.dot!=0 - next if active_item.rhs[active_item.dot].class!=NT + next if active_item.rhs[active_item.dot].class!=Grammar::NT if active_item.rhs[active_item.dot].symbol == s new_item = Item.new active_item, i, j, active_item.dot+1 - new_item.rhs[new_item.dot-1].span = Span.new i, j + new_item.rhs[new_item.dot-1].span = Grammar::Span.new i, j if new_item.dot==new_item.rhs.size new_symbols << new_item.lhs.symbol if !new_symbols.include? new_item.lhs.symbol passive_chart.add new_item, i, j @@ -165,13 +156,13 @@ end def main STDERR.write "> reading input from TODO\n" - input = 'ich sah ein kleines haus'.split + #input = 'ich sah ein kleines haus'.split #input = 'lebensmittel schuld an europäischer inflation'.split - #input = 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .'.split + input = 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .'.split n = input.size STDERR.write "> reading grammar\n" - grammar = Grammar.new 'example/grammarx' + grammar = Grammar::Grammar.new 'example/grammar.3.gz' STDERR.write ">> adding glue grammar\n" #grammar.add_glue_rules STDERR.write ">> adding pass-through grammar\n" |