diff options
author | Patrick Simianer <p@simianer.de> | 2014-05-02 19:05:29 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-05-02 19:05:29 +0200 |
commit | f714aa1be20cc92d77cc3cffd203a848868d98e2 (patch) | |
tree | 4d8b4d63f9d618b8cf734b2ff18c653ea38b908e | |
parent | 2f65a33d9e55bd53de73389330a8c36665a98b03 (diff) |
arity-0 rules with rhs.size > 1
-rw-r--r-- | grammar | 1 | ||||
-rw-r--r-- | grammar.rb | 8 | ||||
-rw-r--r-- | intersect.rb | 100 |
3 files changed, 35 insertions, 74 deletions
@@ -2,6 +2,7 @@ [S] ||| ich [VP,2] ||| i [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=-0.5 use_i=1.0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 use_a=1.0 +[NN] ||| kleines haus ||| small house ||| logp=0 use_house=1 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 [JJ] ||| kleines ||| small ||| logp=0 use_small=1.0 @@ -1,5 +1,6 @@ require 'nlp_ruby' + class Terminal attr_accessor :w @@ -44,7 +45,7 @@ class Rule end def to_s - "#{lhs} -> #{rhs.map{|i|i.to_s}.join ' '} (#{arity}) (#{@span.left}, #{@span.right})" + "#{lhs} -> #{rhs.map{|i|i.to_s}.join ' '} a:#{arity} (#{@span.left}, #{@span.right})" end def arity @@ -89,8 +90,3 @@ class Grammar end end - - - - - diff --git a/intersect.rb b/intersect.rb index 41ddfd6..d8404a2 100644 --- a/intersect.rb +++ b/intersect.rb @@ -2,13 +2,10 @@ require_relative './grammar.rb' -# interescts a WFSA (WFST) with a WSCFG to produce a hypergraph - - class Chart attr_accessor :m - + def initialize input @m = [] (input.size+1).times { @@ -18,10 +15,6 @@ class Chart } end - def at span - return @m[span.left][span.right] - end - def at i,j return @m[i][j] end @@ -41,47 +34,35 @@ class Item < Rule end def to_s - "#{lhs} -> #{rhs.map{|i|i.to_s}.insert(@dot,'*').join ' '} (dot:#{@dot}) (a:#{arity}) (#{@span.left}, #{@span.right})" + "#{lhs} -> #{rhs.map{|i|i.to_s}.insert(@dot,'*').join ' '} dot:#{@dot} a:#{arity} (#{@span.left}, #{@span.right})" end end - - -#def shift -#end -#def reduce -#end -#def init - #axioms,goals etc -#end -#def parse -#end - - +# set-up g = Grammar.new 'grammar' input = "ich sah ein kleines haus".split.map { |i| Terminal.new i } -#start = NonTerminal.new 'S' - passive_chart = Chart.new input active_chart = Chart.new input - - -# 1st row: arity0 rules -unary_rules = g.rules.reject { |r| r.arity>0 } -input.each_with_index { |t,j| - unary_rules.each { |r| - if r.rhs.first.w == t.w - passive_chart.at(j,j+1) << Item.new(r) - passive_chart.at(j,j+1).last.span.left = j - passive_chart.at(j,j+1).last.span.right = j+1 - passive_chart.at(j,j+1).last.dot = 1 +# pre-fill passive chart w/ arity-0 rules +g.rules.reject { |r| r.arity>0 }.each { |r| + input.each_index.select{ |j| input[j].w==r.rhs.first.w }.each { |j| + k = 1 + if r.rhs.size > 1 + slice = input[j,j+(r.rhs.size-1)].map { |i| i.w } + if slice == r.rhs.map { |i| i.w } + k = r.rhs.size + else + next + end end + passive_chart.at(j,j+k) << Item.new(r) + passive_chart.at(j,j+k).last.span.left = j + passive_chart.at(j,j+k).last.span.right = j+k + passive_chart.at(j,j+k).last.dot = k } } - - # pre-fill active chart with non 0arity rules starting with a (single(!)) T g.rules.reject{|r| r.rhs[0].class==NonTerminal||r.arity==0}.each {|r| input.each_with_index { |i,j| @@ -89,12 +70,13 @@ g.rules.reject{|r| r.rhs[0].class==NonTerminal||r.arity==0}.each {|r| (j+2).upto(input.size) { |k| # no empty NT! active_chart.at(j,k) << Item.new(r) active_chart.at(j,k).last.span.left = j - active_chart.at(j,k).last.span.right = j+1 # k + active_chart.at(j,k).last.span.right = j+1 active_chart.at(j,k).last.dot = 1 } end } } + # pre-fill active chart s = g.rules.reject { |r| r.rhs.first.class!=NonTerminal}#.reject{|r| r.lhs.sym == 'S'} (input.size).times { |k| @@ -102,22 +84,16 @@ s = g.rules.reject { |r| r.rhs.first.class!=NonTerminal}#.reject{|r| r.lhs.sym = s.each { |r| active_chart.at(i,i+k+2) << Item.new(r) active_chart.at(i,i+k+2).last.span.left = i - active_chart.at(i,i+k+2).last.span.right = i #i+k+2 + active_chart.at(i,i+k+2).last.span.right = i active_chart.at(i,i+k+2).last.dot = 0 } } } - - - - - -# parse - -def scan i, j, active_chart, input +# scan +def scan i, j, active_chart, passive_chart, input active_chart.at(i,j).each { |item| - if item.rhs[item.dot].class == Terminal + while item.rhs[item.dot].class == Terminal if item.rhs[item.dot].w == input[item.span.left+item.dot].w item.dot += 1 if item.dot == item.rhs.size @@ -129,14 +105,12 @@ def scan i, j, active_chart, input } end - -puts "parse" +# parse def visit i, j, sz, active_chart, passive_chart, g, input puts "| #{i},#{j}" # SCAN - scan i, j, active_chart, input - + scan i, j, active_chart, passive_chart, input 1.upto(sz) { |span| break if span==(j-i) @@ -147,7 +121,7 @@ def visit i, j, sz, active_chart, passive_chart, g, input passive_chart.at(k, k+span).each { |passive_item| if active_item.rhs[active_item.dot].class==NonTerminal && passive_item.lhs.sym == active_item.rhs[active_item.dot].sym next if not active_item.span.right==passive_item.span.left - active_item.span.right = passive_item.span.right #if passive_item.span.right > active_item.span.right + active_item.span.right = passive_item.span.right active_item.dot += 1 if active_item.dot == active_item.rhs.size passive_chart.at(i,j) << Item.new(active_item) @@ -157,30 +131,20 @@ def visit i, j, sz, active_chart, passive_chart, g, input } } - # SCAN - active_chart.at(i,j).each { |item| - if item.rhs[item.dot].class == Terminal - if item.rhs[item.dot].w == input[item.span.left+item.dot].w - item.dot += 1 - if item.dot == item.rhs.size - passive_chart.at(i,j) << Item.new(item) - passive_chart.at(i,j).last.span.right = item.span.left+item.dot - end - end - end - } + scan i, j, active_chart, passive_chart, input } end +# once for each cell (input.size).times { |k| 0.upto(input.size-(k+2)) { |i| - visit i, i+k+2, input.size, active_chart, passive_chart, g, input + visit i, i+k+2, input.size, active_chart, passive_chart, g, input } } -puts "---" -active_chart.at(0,5).each { |item| +puts "---" +passive_chart.at(3,5).each { |item| puts item.to_s } |