From ea5a910c2e1cb8e22a6d21bc2566428469a6f53b Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Tue, 20 May 2014 16:38:11 +0200 Subject: need to fix self-filling --- animate.rb | 17 ++++++++++++++++ grammar.rb | 41 +++++++++++++++++++------------------- parse.rb | 66 +++++++++++++++++++++++++++++++++++++------------------------- 3 files changed, 77 insertions(+), 47 deletions(-) create mode 100644 animate.rb diff --git a/animate.rb b/animate.rb new file mode 100644 index 0000000..558146f --- /dev/null +++ b/animate.rb @@ -0,0 +1,17 @@ +s = 'ich sah ein kleines haus .' +def visit n, depth, skip=0 # FIXME + (depth-skip).times { |i| + i += skip + 0.upto(n-(i+1)) { |j| + yield j, j+i+1 + } + } +end + +0,5 + + +1 2 +0,1 1,2 2,3 3,4 4,5 +ich sah ein kleines haus + diff --git a/grammar.rb b/grammar.rb index 4183a8f..b7d2408 100644 --- a/grammar.rb +++ b/grammar.rb @@ -41,18 +41,18 @@ class Rule end def arity - rhs.reject { |i| i.class==T }.size + rhs.select { |i| i.class == NT }.size end def from_s s _ = splitpipe s, 3 @lhs = NT.new _[0].strip.gsub!(/(\[|\])/, "") - _[1].split.each { |i| - i.strip! - if i[0]=='[' && i[i.size-1] == ']' - @rhs << NT.new(i.gsub!(/(\[|\])/, "").split(',')[0]) + _[1].split.each { |x| + x.strip! + if x[0]=='[' && x[x.size-1] == ']' + @rhs << NT.new(x.gsub!(/(\[|\])/, "").split(',')[0]) else - @rhs << T.new(i) + @rhs << T.new(x) end } end @@ -60,26 +60,25 @@ class Rule def self.from_s s r = self.new r.from_s s - r + return r end end class Grammar - attr_accessor :rules, :rewrite, :flat, :mixed + attr_accessor :rules, :startn, :startt, :flat def initialize fn - @rules = []; @rewrite = []; @flat = []; @mixed = [] + @rules = []; @startn = []; @startt = [] ;@flat = [] ReadFile.readlines_strip(fn).each_with_index { |s,i| - STDERR.write '.' - STDERR.write "\n" if (i+1)%80==0 + STDERR.write '.'; STDERR.write "\n" if (i+1)%80==0 @rules << Rule.from_s(s) if @rules.last.rhs.first.class == NT - @rewrite << @rules.last + @startn << @rules.last else if rules.last.arity == 0 @flat << @rules.last else - @mixed << @rules.last + @startt << @rules.last end end } @@ -89,21 +88,21 @@ class Grammar def to_s s = '' @rules.each { |r| s += r.to_s+"\n" } - s + return s end def add_glue_rules - @rules.map { |r| r.lhs.symbol }.reject { |s| s=='S' }.uniq.each { |s| - @rules << Rule.new(NT.new('S'), [NT.new(s)]) - @rewrite << @rules.last + @rules.map { |r| r.lhs.symbol }.select { |s| s != 'S' }.uniq.each { |symbol| + @rules << Rule.new(NT.new('S'), [NT.new(symbol)]) + @startn << @rules.last @rules << Rule.new(NT.new('S'), [NT.new('S'), NT.new('X')]) - @rewrite << @rules.last + @startn << @rules.last } end - def add_pass_through_rules input - input.each { |terminal| - @rules << Rule.new(NT.new('X'), [T.new(terminal.word)]) + def add_pass_through_rules s + s.each { |word| + @rules << Rule.new(NT.new('X'), [T.new(word)]) @flat << @rules.last } end diff --git a/parse.rb b/parse.rb index 0d82c1d..cb428d1 100644 --- a/parse.rb +++ b/parse.rb @@ -1,7 +1,6 @@ #!/usr/bin/env ruby require_relative './grammar.rb' -STDOUT.sync = true class Chart @@ -20,7 +19,7 @@ class Chart @m[i][j] end - def add i, j, item + def add item, i, j at(i,j) << item @b["#{i},#{j},#{item.lhs.symbol}"] = true end @@ -59,14 +58,15 @@ def init input, n, active_chart, passive_chart, grammar input.each_index { |i| if input[i, r.rhs.size] == r.rhs.map { |x| x.word } new_item = Item.new r, i, i+r.rhs.size, r.rhs.size - passive_chart.add i, i+r.rhs.size, new_item + passive_chart.add new_item, i, i+r.rhs.size end } } end def scan item, input, limit, passive_chart - while item.rhs[item.dot].class == T && item.span.right < limit + while item.rhs[item.dot].class == T + return false if item.span.right==limit if item.rhs[item.dot].word == input[item.span.right] item.dot += 1 item.span.right += 1 @@ -74,44 +74,45 @@ def scan item, input, limit, passive_chart break end end + return true end def parse input, n, active_chart, passive_chart, grammar - 2.upto(n) { |span| # outer loop + 2.upto(n) { |span| # outer loops 0.upto(n-span) { |k| puts " span(#{k},#{k+span})" # try to apply rules starting with T - grammar.mixed.select { |r| r.rhs.first.word == input[k] }.each { |r| + grammar.startt.select { |r| r.rhs.first.word == input[k] }.each { |r| new_item = Item.new r, k, k, 0 - scan new_item, input, k+span, passive_chart - active_chart.at(k,k+span) << new_item + active_chart.at(k,k+span) << new_item if scan new_item, input, k+span, passive_chart } # seed active chart - grammar.rewrite.each { |r| + grammar.startn.each { |r| next if r.rhs.size > span active_chart.at(k,k+span) << Item.new(r, k, k, 0) } active_chart.at(k,k+span).each { |active_item| - next if active_item.rhs[active_item.dot].class==T - # inner loop - 1.upto(span-1) { |span2| + 1.upto(span-1) { |span2| # inner loops k.upto((k+span)-span2) { |l| if passive_chart.has active_item.rhs[active_item.dot].symbol, l, l+span2 if l == active_item.span.right new_item = Item.new active_item, active_item.span.left, l+span2, active_item.dot+1 - scan new_item, input, k+span, passive_chart - if new_item.dot == new_item.rhs.size # done with item - if new_item.span.left == k && new_item.span.right == k+span - passive_chart.add k, k+span, new_item - end - else - if new_item.rhs[new_item.dot].class == NT && new_item.span.right+(new_item.rhs.size-(new_item.dot)) <= k+span - active_chart.at(k,k+span) << new_item + if scan new_item, input, k+span, passive_chart + if new_item.dot == new_item.rhs.size # done with item -> passive chart + if new_item.span.left == k && new_item.span.right == k+span + passive_chart.add new_item, k, k+span + end + else + if new_item.rhs[new_item.dot].class == NT + if new_item.span.right+(new_item.rhs.size-(new_item.dot)) <= k+span + active_chart.at(k,k+span) << new_item + end + end end end end @@ -121,6 +122,7 @@ def parse input, n, active_chart, passive_chart, grammar } # 'self-filling' step + # FIXME slow! passive_chart.at(k,k+span).each { |passive_item| active_chart.at(k,k+span).each { |active_item| next if active_item.rhs[active_item.dot].class!=NT @@ -130,9 +132,9 @@ def parse input, n, active_chart, passive_chart, grammar scan new_item, input, k+span, passive_chart if new_item.dot == new_item.rhs.size if new_item.span.left == k && new_item.span.right == k+span - passive_chart.add k, k+span, new_item + passive_chart.add new_item, k, k+span else - puts "#{new_item}" # FIXME never happens + puts "#{new_item}" end else if new_item.rhs[new_item.dot].class == NT && new_item.span.right+(new_item.rhs.size-(new_item.dot)) <= k+span @@ -143,6 +145,7 @@ def parse input, n, active_chart, passive_chart, grammar end } } + } } end @@ -157,17 +160,28 @@ def visit n, depth, skip=0 # FIXME end def main + STDERR.write "> reading input from TODO\n" #input = 'ich sah ein kleines haus'.split - input = 'lebensmittel schuld an europäischer inflation'.split - #input = 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .'.split + #input = 'lebensmittel schuld an europäischer inflation'.split + input = 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .'.split n = input.size - grammar = Grammar.new 'example/grammar.x' + + STDERR.write "> reading grammar\n" + grammar = Grammar.new 'example/grammar.3.gz' + STDERR.write ">> adding glue grammar\n" grammar.add_glue_rules + STDERR.write ">> adding pass-through grammar\n" + #grammar.add_pass_through_rules input + + STDERR.write "> initializing charts\n" passive_chart = Chart.new n active_chart = Chart.new n init input, n, active_chart, passive_chart, grammar + + STDERR.write "> parsing\n" parse input, n, active_chart, passive_chart, grammar - puts "---\npassive chart" + + puts "\n---\npassive chart" visit(n, n, 0) { |i,j| puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts ' '+item.to_s }; puts } end -- cgit v1.2.3