summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-05-08 17:56:31 +0200
committerPatrick Simianer <p@simianer.de>2014-05-08 17:56:31 +0200
commitc8b4abf55672fb27eca97e921ca0e12057dcc0cf (patch)
treef29553541df4d63e45246f1f3415070dfbd2aa59
parent4400add706c01ebf1460701c651d66bbf592cfa5 (diff)
correct, but still slow
-rw-r--r--example/grammar (renamed from grammar)4
-rw-r--r--example/grammar.3.gzbin0 -> 65565 bytes
-rw-r--r--example/grammar.x5
-rw-r--r--grammar.rb9
-rw-r--r--intersect.rb62
5 files changed, 49 insertions, 31 deletions
diff --git a/grammar b/example/grammar
index 45608e7..1d72ce5 100644
--- a/grammar
+++ b/example/grammar
@@ -1,8 +1,8 @@
[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0
-[S] ||| ich sah ein kleines haus ||| - ||| -
+[S] ||| ich sah ein kleines haus |||
[S] ||| ich [VP,2] ||| i [1] [2] ||| logp=0
[S] ||| ich sah ein [NN,1] haus ||| i saw a [NN,1] house ||| logp=0
-[S] ||| ich [V,1] ein [NN,1] haus ||| i saw a [NN,1] house ||| logp=0
+[S] ||| ich [V,1] ein [NN,1] haus ||| i [1] a [2] house ||| logp=0
[NP] ||| ich ||| i ||| logp=-0.5 use_i=1.0
[NP] ||| ein [NN,1] ||| a [1] ||| logp=0 use_a=1.0
[NN] ||| kleines haus ||| small house ||| logp=0 use_house=1
diff --git a/example/grammar.3.gz b/example/grammar.3.gz
new file mode 100644
index 0000000..59e6234
--- /dev/null
+++ b/example/grammar.3.gz
Binary files differ
diff --git a/example/grammar.x b/example/grammar.x
new file mode 100644
index 0000000..7ff74b5
--- /dev/null
+++ b/example/grammar.x
@@ -0,0 +1,5 @@
+[X] ||| lebensmittel [X,1]
+[X] ||| schuld [X,1]
+[X] ||| an
+[X] ||| europäischer [X,1]
+[X] ||| inflation
diff --git a/grammar.rb b/grammar.rb
index 3ee8105..8a08cc1 100644
--- a/grammar.rb
+++ b/grammar.rb
@@ -69,11 +69,13 @@ class Grammar
def initialize fn
@rules = []
+ @glue_rules = []
ReadFile.readlines_strip(fn).each_with_index { |s,j|
- #STDERR.write '.'
- #puts "\n" if j%100==0&&j>0
+ STDERR.write '.'
+ STDERR.write "\n" if (j+1)%80==0
@rules << Rule.from_s(s)
}
+ STDERR.write "\n"
end
def to_s
@@ -83,9 +85,12 @@ class Grammar
end
def add_glue_rules
+ # see https://github.com/jweese/thrax/wiki/Glue-grammar
@rules.map { |r| r.lhs.symbol }.reject { |s| s=='S' }.uniq.each { |s|
@rules << Rule.new(NT.new('S'), [NT.new(s)])
+ @glue_rules << @rules.last
@rules << Rule.new(NT.new('S'), [NT.new('S'), NT.new('X')])
+ @glue_rules << @rules.last
}
end
diff --git a/intersect.rb b/intersect.rb
index 0aff6c4..e4db7eb 100644
--- a/intersect.rb
+++ b/intersect.rb
@@ -48,6 +48,7 @@ class Item < Rule
@dot = rule_or_item.dot
else
@span = Span.new
+ @dot = 0
end
end
@@ -60,7 +61,7 @@ def visit n, depth, skip=0
(depth-skip).times { |i|
i += skip
0.upto(n-(i+1)) { |j|
- yield j, j+i+1 if block_given?
+ yield j, j+i+1
}
}
end
@@ -86,7 +87,7 @@ def init active_chart, passive_chart, grammar, input, n
if k == r.rhs.size
passive_chart.add(r, j, j+k, j+k, k)
else
- (j+k).upto(n) { |l| active_chart.add(r, j, l, j+k, k) }
+ (j+k).upto(n) { |l| active_chart.add r, j, l, j+k, k }
end
}
}
@@ -97,23 +98,21 @@ def init active_chart, passive_chart, grammar, input, n
}
end
-def scan item, passive_chart, input, i, j
- while item.rhs[item.dot].class == T
- if item.rhs[item.dot].word == input[item.span.left+item.dot].word
+def scan item, passive_chart, input
+ while item.rhs[item.dot].class == T
+ break if item.span.right > input.size-1
+ if item.rhs[item.dot].word == input[item.span.right].word
item.dot += 1
- item.span.right = item.span.left+item.dot
- if item.dot == item.rhs.size
- passive_chart.add(item, i, j, item.span.left+item.dot, item.dot)
- break
- end
+ item.span.right += 1
+ break if item.dot == item.rhs.size
else
break
end
end
end
-def parse i, j, sz, active_chart, passive_chart, g, input
- active_chart.at(i,j).each_with_index { |active_item,q|
+def parse i, j, sz, active_chart, passive_chart, input
+ active_chart.at(i,j).each_with_index { |active_item,z|
1.upto(sz) { |span|
break if span==(j-i)
i.upto(j-span) { |k|
@@ -123,9 +122,9 @@ def parse i, j, sz, active_chart, passive_chart, g, input
new_item = Item.new active_item
new_item.span.right = passive_item.span.right
new_item.dot += 1
- scan new_item, passive_chart, input, i, j
+ scan new_item, passive_chart, input
if new_item.dot == new_item.rhs.size
- passive_chart.at(i,j) << new_item
+ passive_chart.at(i,j) << new_item if new_item.span.left==i&&new_item.span.right==j
else
active_chart.at(i,j) << new_item
end
@@ -139,13 +138,15 @@ def parse i, j, sz, active_chart, passive_chart, g, input
to_add_passive = []
passive_chart.at(i,j).each { |passive_item|
active_chart.at(i,j).each { |active_item|
+ next if active_item.rhs[active_item.dot].class!=NT
if passive_item.lhs.symbol == active_item.rhs[active_item.dot].symbol
+ next if not active_item.span.right==passive_item.span.left
new_item = Item.new active_item
new_item.span.right = passive_item.span.right
new_item.dot += 1
- scan new_item, passive_chart, input, i, j
+ scan new_item, passive_chart, input
if new_item.dot == new_item.rhs.size
- to_add_passive << new_item
+ to_add_passive << new_item if new_item.span.left==i&&new_item.span.right==j
else
to_add_active << new_item
end
@@ -161,29 +162,36 @@ def preprocess s
end
def main
- #input = "ich sah ein kleines haus".split.map { |i| T.new i }
- input = preprocess "lebensmittel schuld an europäischer inflation"
+ #input = preprocess 'ich sah ein kleines haus'
+ #input = preprocess 'lebensmittel schuld an europäischer inflation'
+ input = preprocess 'offizielle sind von nur' # 3 prozent' # ausgegangen , meldete bloomberg .'
n = input.size
- puts "reading grammar .."
- g = Grammar.new 'grammar.x'
+ puts 'reading grammar'
+ #g = Grammar.new 'example/grammar'
+ #g = Grammar.new 'example/grammar.x'
+ g = Grammar.new 'example/grammar.3.gz' # 4th segment of newstest2008
- puts "adding glue rules .."
+ puts 'adding glue rules'
g.add_glue_rules
- #puts "adding pass-through rules .."
+ #puts 'adding pass-through rules'
#g.add_pass_through_rules input
- puts "initializing charts .."
+ puts 'initializing charts'
passive_chart = Chart.new n
active_chart = Chart.new n
init active_chart, passive_chart, g, input, n
- puts "parsing .."
- visit(n, n, 1) { |i,j|
- parse i, j, n, active_chart, passive_chart, g, input
+ active_chart.at(0, 1).each_with_index { |i,x| puts "#{x}. #{i.to_s}" }
+ puts passive_chart.at(0,1).size
+
+ puts 'parsing'
+ visit(n, n, 1) { |i,j|
+ STDERR.write " span (#{i}, #{j})\n"
+ parse i, j, n, active_chart, passive_chart, input
}
-
+
puts "---\npassive chart"
visit(n, n, 0) { |i,j| puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts ' '+item.to_s if item.span.left==i&&item.span.right==j }; puts }
end