diff options
author | Patrick Simianer <p@simianer.de> | 2014-05-08 17:56:31 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-05-08 17:56:31 +0200 |
commit | c8b4abf55672fb27eca97e921ca0e12057dcc0cf (patch) | |
tree | f29553541df4d63e45246f1f3415070dfbd2aa59 | |
parent | 4400add706c01ebf1460701c651d66bbf592cfa5 (diff) |
correct, but still slow
-rw-r--r-- | example/grammar (renamed from grammar) | 4 | ||||
-rw-r--r-- | example/grammar.3.gz | bin | 0 -> 65565 bytes | |||
-rw-r--r-- | example/grammar.x | 5 | ||||
-rw-r--r-- | grammar.rb | 9 | ||||
-rw-r--r-- | intersect.rb | 62 |
5 files changed, 49 insertions, 31 deletions
diff --git a/grammar b/example/grammar index 45608e7..1d72ce5 100644 --- a/grammar +++ b/example/grammar @@ -1,8 +1,8 @@ [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 -[S] ||| ich sah ein kleines haus ||| - ||| - +[S] ||| ich sah ein kleines haus ||| [S] ||| ich [VP,2] ||| i [1] [2] ||| logp=0 [S] ||| ich sah ein [NN,1] haus ||| i saw a [NN,1] house ||| logp=0 -[S] ||| ich [V,1] ein [NN,1] haus ||| i saw a [NN,1] house ||| logp=0 +[S] ||| ich [V,1] ein [NN,1] haus ||| i [1] a [2] house ||| logp=0 [NP] ||| ich ||| i ||| logp=-0.5 use_i=1.0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 use_a=1.0 [NN] ||| kleines haus ||| small house ||| logp=0 use_house=1 diff --git a/example/grammar.3.gz b/example/grammar.3.gz Binary files differnew file mode 100644 index 0000000..59e6234 --- /dev/null +++ b/example/grammar.3.gz diff --git a/example/grammar.x b/example/grammar.x new file mode 100644 index 0000000..7ff74b5 --- /dev/null +++ b/example/grammar.x @@ -0,0 +1,5 @@ +[X] ||| lebensmittel [X,1] +[X] ||| schuld [X,1] +[X] ||| an +[X] ||| europäischer [X,1] +[X] ||| inflation @@ -69,11 +69,13 @@ class Grammar def initialize fn @rules = [] + @glue_rules = [] ReadFile.readlines_strip(fn).each_with_index { |s,j| - #STDERR.write '.' - #puts "\n" if j%100==0&&j>0 + STDERR.write '.' + STDERR.write "\n" if (j+1)%80==0 @rules << Rule.from_s(s) } + STDERR.write "\n" end def to_s @@ -83,9 +85,12 @@ class Grammar end def add_glue_rules + # see https://github.com/jweese/thrax/wiki/Glue-grammar @rules.map { |r| r.lhs.symbol }.reject { |s| s=='S' }.uniq.each { |s| @rules << Rule.new(NT.new('S'), [NT.new(s)]) + @glue_rules << @rules.last @rules << Rule.new(NT.new('S'), [NT.new('S'), NT.new('X')]) + @glue_rules << @rules.last } end diff --git a/intersect.rb b/intersect.rb index 0aff6c4..e4db7eb 100644 --- a/intersect.rb +++ b/intersect.rb @@ -48,6 +48,7 @@ class Item < Rule @dot = rule_or_item.dot else @span = Span.new + @dot = 0 end end @@ -60,7 +61,7 @@ def visit n, depth, skip=0 (depth-skip).times { |i| i += skip 0.upto(n-(i+1)) { |j| - yield j, j+i+1 if block_given? + yield j, j+i+1 } } end @@ -86,7 +87,7 @@ def init active_chart, passive_chart, grammar, input, n if k == r.rhs.size passive_chart.add(r, j, j+k, j+k, k) else - (j+k).upto(n) { |l| active_chart.add(r, j, l, j+k, k) } + (j+k).upto(n) { |l| active_chart.add r, j, l, j+k, k } end } } @@ -97,23 +98,21 @@ def init active_chart, passive_chart, grammar, input, n } end -def scan item, passive_chart, input, i, j - while item.rhs[item.dot].class == T - if item.rhs[item.dot].word == input[item.span.left+item.dot].word +def scan item, passive_chart, input + while item.rhs[item.dot].class == T + break if item.span.right > input.size-1 + if item.rhs[item.dot].word == input[item.span.right].word item.dot += 1 - item.span.right = item.span.left+item.dot - if item.dot == item.rhs.size - passive_chart.add(item, i, j, item.span.left+item.dot, item.dot) - break - end + item.span.right += 1 + break if item.dot == item.rhs.size else break end end end -def parse i, j, sz, active_chart, passive_chart, g, input - active_chart.at(i,j).each_with_index { |active_item,q| +def parse i, j, sz, active_chart, passive_chart, input + active_chart.at(i,j).each_with_index { |active_item,z| 1.upto(sz) { |span| break if span==(j-i) i.upto(j-span) { |k| @@ -123,9 +122,9 @@ def parse i, j, sz, active_chart, passive_chart, g, input new_item = Item.new active_item new_item.span.right = passive_item.span.right new_item.dot += 1 - scan new_item, passive_chart, input, i, j + scan new_item, passive_chart, input if new_item.dot == new_item.rhs.size - passive_chart.at(i,j) << new_item + passive_chart.at(i,j) << new_item if new_item.span.left==i&&new_item.span.right==j else active_chart.at(i,j) << new_item end @@ -139,13 +138,15 @@ def parse i, j, sz, active_chart, passive_chart, g, input to_add_passive = [] passive_chart.at(i,j).each { |passive_item| active_chart.at(i,j).each { |active_item| + next if active_item.rhs[active_item.dot].class!=NT if passive_item.lhs.symbol == active_item.rhs[active_item.dot].symbol + next if not active_item.span.right==passive_item.span.left new_item = Item.new active_item new_item.span.right = passive_item.span.right new_item.dot += 1 - scan new_item, passive_chart, input, i, j + scan new_item, passive_chart, input if new_item.dot == new_item.rhs.size - to_add_passive << new_item + to_add_passive << new_item if new_item.span.left==i&&new_item.span.right==j else to_add_active << new_item end @@ -161,29 +162,36 @@ def preprocess s end def main - #input = "ich sah ein kleines haus".split.map { |i| T.new i } - input = preprocess "lebensmittel schuld an europäischer inflation" + #input = preprocess 'ich sah ein kleines haus' + #input = preprocess 'lebensmittel schuld an europäischer inflation' + input = preprocess 'offizielle sind von nur' # 3 prozent' # ausgegangen , meldete bloomberg .' n = input.size - puts "reading grammar .." - g = Grammar.new 'grammar.x' + puts 'reading grammar' + #g = Grammar.new 'example/grammar' + #g = Grammar.new 'example/grammar.x' + g = Grammar.new 'example/grammar.3.gz' # 4th segment of newstest2008 - puts "adding glue rules .." + puts 'adding glue rules' g.add_glue_rules - #puts "adding pass-through rules .." + #puts 'adding pass-through rules' #g.add_pass_through_rules input - puts "initializing charts .." + puts 'initializing charts' passive_chart = Chart.new n active_chart = Chart.new n init active_chart, passive_chart, g, input, n - puts "parsing .." - visit(n, n, 1) { |i,j| - parse i, j, n, active_chart, passive_chart, g, input + active_chart.at(0, 1).each_with_index { |i,x| puts "#{x}. #{i.to_s}" } + puts passive_chart.at(0,1).size + + puts 'parsing' + visit(n, n, 1) { |i,j| + STDERR.write " span (#{i}, #{j})\n" + parse i, j, n, active_chart, passive_chart, input } - + puts "---\npassive chart" visit(n, n, 0) { |i,j| puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts ' '+item.to_s if item.span.left==i&&item.span.right==j }; puts } end |