From 92638dbe20317d2cccf8258c5859af91617f53bb Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Wed, 7 May 2014 16:46:39 +0200 Subject: ja, it's slow --- grammar.rb | 72 +++++++++++++++++++++++++++++++++--------------------------- intersect.rb | 61 ++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 85 insertions(+), 48 deletions(-) diff --git a/grammar.rb b/grammar.rb index b516912..cf321f8 100644 --- a/grammar.rb +++ b/grammar.rb @@ -1,68 +1,60 @@ require 'nlp_ruby' -class Terminal - attr_accessor :w +class T + attr_accessor :word - def initialize s - @w = s + def initialize word + @word = word end def to_s - "T<#{@w}>" + "T<#{@word}>" end end -class NonTerminal - attr_accessor :sym, :idx +class NT + attr_accessor :symbol, :index - def initialize sym, idx=0 - @sym = sym - @idx = idx + def initialize symbol, index=0 + @symbol = symbol + @index = index end def to_s - "NT<#{sym},#{idx}>" + "NT<#{@symbol},#{@index}>" end end -class Span - attr_accessor :left, :right - def initialize left=nil, right=nil - @left = left - @right = right - end -end class Rule - attr_accessor :lhs, :rhs, :span + attr_accessor :lhs, :rhs - def initialize lhs=nil, rhs=nil, span=nil - @lhs = '' - @rhs = [] + def initialize lhs=nil, rhs=[] + @lhs = lhs + @rhs = rhs end def to_s - "#{lhs} -> #{rhs.map{|i|i.to_s}.join ' '} a:#{arity} (#{@span.left}, #{@span.right})" + "#{lhs} -> #{rhs.map{ |i| i.to_s }.join ' '} [arity=#{arity}]" end def arity - rhs.reject { |i| i.class == Terminal }.size + rhs.reject { |i| i.class==T }.size end def from_s s - a = splitpipe s, 3 - @lhs = NonTerminal.new a[0].strip.gsub!(/(\[|\])/, "") - a[1].split.each { |i| + _ = splitpipe s, 3 + @lhs = NT.new _[0].strip.gsub!(/(\[|\])/, "") + _[1].split.each { |i| i.strip! if i[0]=='[' && i[i.size-1] == ']' - @rhs << NonTerminal.new(i.gsub!(/(\[|\])/, "").split(',')[0]) + @rhs << NT.new(i.gsub!(/(\[|\])/, "").split(',')[0]) else - @rhs << Terminal.new(i) + @rhs << T.new(i) end } - @span = Span.new end def self.from_s s @@ -77,8 +69,11 @@ class Grammar def initialize fn @rules = [] - a = ReadFile.readlines_strip fn - a.each { |s| @rules << Rule.from_s(s) } + ReadFile.readlines_strip(fn).each_with_index { |s,j| + STDERR.write '.' + STDERR.write "\n" if j%100==0&&j>0 + @rules << Rule.from_s(s) + } end def to_s @@ -86,5 +81,18 @@ class Grammar @rules.each { |r| s += r.to_s+"\n" } s end + + def add_glue_rules + @rules.map { |r| r.lhs.symbol }.reject { |s| s=='S' }.uniq.each { |s| + @rules << Rule.new(NT.new('S'), [NT.new(s)]) + @rules << Rule.new(NT.new('S'), [NT.new('S'), NT.new('X')]) + } + end + + def add_pass_through_rules input + input.each { |w| + @rules << Rule.new(NT.new('X'), [T.new(w)]) + } + end end diff --git a/intersect.rb b/intersect.rb index 5894f40..17e249f 100644 --- a/intersect.rb +++ b/intersect.rb @@ -27,14 +27,27 @@ class Chart end end +class Span + attr_accessor :left, :right + + def initialize left=nil, right=nil + @left = left + @right = right + end +end + class Item < Rule attr_accessor :lhs, :rhs, :span, :dot - def initialize rule - @lhs = rule.lhs.dup - @rhs = rule.rhs.dup - @span = Span.new rule.span.left, rule.span.right - @dot = rule.dot if rule.class==Item + def initialize rule_or_item + @lhs = rule_or_item.lhs.dup + @rhs = rule_or_item.rhs.dup + if rule_or_item.class == Item + @span = Span.new rule_or_item.span.left, rule_or_item.span.right + @dot = rule_or_item.dot + else + @span = Span.new + end end def to_s @@ -53,17 +66,17 @@ end def init active_chart, passive_chart, grammar, input, n # pre-fill passive chart w/ 0-arity rules - grammar.rules.select { |r| r.rhs.first.class==Terminal }.each { |r| - input.each_index.select { |i| input[i].w==r.rhs.first.w }.each { |j| + grammar.rules.select { |r| r.rhs.first.class==T }.each { |r| + input.each_index.select { |i| input[i].word==r.rhs.first.word }.each { |j| k = 1 if r.rhs.size > 1 - z = r.rhs.index { |i| i.class==NonTerminal } + z = r.rhs.index { |i| i.class==NT } if z z -= 1 else z = r.rhs.size-1 end - if input[j..j+z].map { |i| i.w } == r.rhs[0..z].map { |i| i.w } + if input[j..j+z].map { |i| i.word } == r.rhs[0..z].map { |i| i.word } k = z+1 else next @@ -77,15 +90,15 @@ def init active_chart, passive_chart, grammar, input, n } } # seed active chart - s = grammar.rules.reject { |r| r.rhs.first.class!=NonTerminal } + s = grammar.rules.reject { |r| r.rhs.first.class!=NT } visit(n, n, 1) { |i,j| s.each { |r| active_chart.add(r, i, j, i) } } end def scan item, passive_chart, input, i, j - while item.rhs[item.dot].class == Terminal - if item.rhs[item.dot].w == input[item.span.left+item.dot].w + while item.rhs[item.dot].class == T + if item.rhs[item.dot].word == input[item.span.left+item.dot].word item.dot += 1 item.span.right = item.span.left+item.dot if item.dot == item.rhs.size @@ -100,9 +113,11 @@ def parse i, j, sz, active_chart, passive_chart, g, input 1.upto(sz) { |span| break if span==(j-i) i.upto(j-span) { |k| + STDERR.write "active chart size #{active_chart.at(i,j).size}\n" active_chart.at(i,j).each { |active_item| passive_chart.at(k, k+span).each { |passive_item| - if active_item.rhs[active_item.dot].class==NonTerminal && passive_item.lhs.sym == active_item.rhs[active_item.dot].sym + STDERR.write " passive chart size #{passive_chart.at(k,k+span).size}\n" + if active_item.rhs[active_item.dot].class==NT && passive_item.lhs.symbol == active_item.rhs[active_item.dot].symbol next if not active_item.span.right==passive_item.span.left active_item.span.right = passive_item.span.right active_item.dot += 1 @@ -116,13 +131,27 @@ def parse i, j, sz, active_chart, passive_chart, g, input end def main - g = Grammar.new 'grammar' - input = "ich sah ein kleines haus".split.map { |i| Terminal.new i } + #input = "ich sah ein kleines haus".split.map { |i| T.new i } + #input = "musharrafs letzter akt ?".split.map { |i| T.new i } + input = "das ukrainische parlament verweigerte heute den antrag , im rahmen einer novelle des strafgesetzbuches denjenigen paragrafen abzuschaffen , auf dessen grundlage die oppositionsführerin yulia timoshenko verurteilt worden war .".split.map { |i| T.new i } n = input.size + #g = Grammar.new 'grammar' + STDERR.write "reading grammar ..\n" + #g = Grammar.new '/home/pks/src/examples/cdec/data/grammar.gz' + g = Grammar.new 'grammar.1.gz' + STDERR.write "\nadding glue rules ..\n" + g.add_glue_rules + STDERR.write "adding pass-through rules ..\n" + g.add_pass_through_rules input passive_chart = Chart.new n active_chart = Chart.new n + STDERR.write "initializing charts ..\n" init active_chart, passive_chart, g, input, n - visit(n, n, 1) { |i,j| parse i, j, n, active_chart, passive_chart, g, input } + STDERR.write "parsing ..\n\n" + visit(n, n, 1) { |i,j| + STDERR.write " span (#{i},#{j})\n\n" + parse i, j, n, active_chart, passive_chart, g, input + } visit(n, n, 0) { |i,j| puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts item.to_s } } end -- cgit v1.2.3