summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-05-07 22:05:31 +0200
committerPatrick Simianer <p@simianer.de>2014-05-07 22:05:31 +0200
commit4400add706c01ebf1460701c651d66bbf592cfa5 (patch)
tree99ff57cc48fbff7720b5a0c1507cf389122963c6
parent92638dbe20317d2cccf8258c5859af91617f53bb (diff)
added self-filling, fixing stuff
-rw-r--r--grammar.rb8
-rw-r--r--hg.rb5
-rw-r--r--intersect.rb79
3 files changed, 60 insertions, 32 deletions
diff --git a/grammar.rb b/grammar.rb
index cf321f8..3ee8105 100644
--- a/grammar.rb
+++ b/grammar.rb
@@ -70,8 +70,8 @@ class Grammar
def initialize fn
@rules = []
ReadFile.readlines_strip(fn).each_with_index { |s,j|
- STDERR.write '.'
- STDERR.write "\n" if j%100==0&&j>0
+ #STDERR.write '.'
+ #puts "\n" if j%100==0&&j>0
@rules << Rule.from_s(s)
}
end
@@ -90,8 +90,8 @@ class Grammar
end
def add_pass_through_rules input
- input.each { |w|
- @rules << Rule.new(NT.new('X'), [T.new(w)])
+ input.each { |terminal|
+ @rules << Rule.new(NT.new('X'), [T.new(terminal.word)])
}
end
end
diff --git a/hg.rb b/hg.rb
index f4a9cc4..e69de29 100644
--- a/hg.rb
+++ b/hg.rb
@@ -1,5 +0,0 @@
-require nlp_ruby
-
-def s2lattice s="ich sah ein kleines haus"
- tokenize s
-end
diff --git a/intersect.rb b/intersect.rb
index 17e249f..0aff6c4 100644
--- a/intersect.rb
+++ b/intersect.rb
@@ -1,6 +1,7 @@
#!/usr/bin/env ruby
require_relative './grammar.rb'
+STDOUT.sync = true
class Chart
@@ -105,54 +106,86 @@ def scan item, passive_chart, input, i, j
passive_chart.add(item, i, j, item.span.left+item.dot, item.dot)
break
end
+ else
+ break
end
end
end
def parse i, j, sz, active_chart, passive_chart, g, input
- 1.upto(sz) { |span|
- break if span==(j-i)
- i.upto(j-span) { |k|
- STDERR.write "active chart size #{active_chart.at(i,j).size}\n"
- active_chart.at(i,j).each { |active_item|
+ active_chart.at(i,j).each_with_index { |active_item,q|
+ 1.upto(sz) { |span|
+ break if span==(j-i)
+ i.upto(j-span) { |k|
passive_chart.at(k, k+span).each { |passive_item|
- STDERR.write " passive chart size #{passive_chart.at(k,k+span).size}\n"
if active_item.rhs[active_item.dot].class==NT && passive_item.lhs.symbol == active_item.rhs[active_item.dot].symbol
next if not active_item.span.right==passive_item.span.left
- active_item.span.right = passive_item.span.right
- active_item.dot += 1
- scan active_item, passive_chart, input, i, j
- passive_chart.at(i,j) << Item.new(active_item) if active_item.dot==active_item.rhs.size
+ new_item = Item.new active_item
+ new_item.span.right = passive_item.span.right
+ new_item.dot += 1
+ scan new_item, passive_chart, input, i, j
+ if new_item.dot == new_item.rhs.size
+ passive_chart.at(i,j) << new_item
+ else
+ active_chart.at(i,j) << new_item
+ end
end
}
}
}
}
+ # self-filling
+ to_add_active = []
+ to_add_passive = []
+ passive_chart.at(i,j).each { |passive_item|
+ active_chart.at(i,j).each { |active_item|
+ if passive_item.lhs.symbol == active_item.rhs[active_item.dot].symbol
+ new_item = Item.new active_item
+ new_item.span.right = passive_item.span.right
+ new_item.dot += 1
+ scan new_item, passive_chart, input, i, j
+ if new_item.dot == new_item.rhs.size
+ to_add_passive << new_item
+ else
+ to_add_active << new_item
+ end
+ end
+ }
+ }
+ to_add_active.each { |item| active_chart.at(i,j) << item }
+ to_add_passive.each { |item| passive_chart.at(i,j) << item }
+end
+
+def preprocess s
+ s.split.map { |i| T.new i }
end
def main
#input = "ich sah ein kleines haus".split.map { |i| T.new i }
- #input = "musharrafs letzter akt ?".split.map { |i| T.new i }
- input = "das ukrainische parlament verweigerte heute den antrag , im rahmen einer novelle des strafgesetzbuches denjenigen paragrafen abzuschaffen , auf dessen grundlage die oppositionsfĂĽhrerin yulia timoshenko verurteilt worden war .".split.map { |i| T.new i }
+ input = preprocess "lebensmittel schuld an europäischer inflation"
n = input.size
- #g = Grammar.new 'grammar'
- STDERR.write "reading grammar ..\n"
- #g = Grammar.new '/home/pks/src/examples/cdec/data/grammar.gz'
- g = Grammar.new 'grammar.1.gz'
- STDERR.write "\nadding glue rules ..\n"
+
+ puts "reading grammar .."
+ g = Grammar.new 'grammar.x'
+
+ puts "adding glue rules .."
g.add_glue_rules
- STDERR.write "adding pass-through rules ..\n"
- g.add_pass_through_rules input
+
+ #puts "adding pass-through rules .."
+ #g.add_pass_through_rules input
+
+ puts "initializing charts .."
passive_chart = Chart.new n
active_chart = Chart.new n
- STDERR.write "initializing charts ..\n"
init active_chart, passive_chart, g, input, n
- STDERR.write "parsing ..\n\n"
+
+ puts "parsing .."
visit(n, n, 1) { |i,j|
- STDERR.write " span (#{i},#{j})\n\n"
parse i, j, n, active_chart, passive_chart, g, input
}
- visit(n, n, 0) { |i,j| puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts item.to_s } }
+
+ puts "---\npassive chart"
+ visit(n, n, 0) { |i,j| puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts ' '+item.to_s if item.span.left==i&&item.span.right==j }; puts }
end