summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-05-20 16:38:11 +0200
committerPatrick Simianer <p@simianer.de>2014-05-20 16:38:11 +0200
commitea5a910c2e1cb8e22a6d21bc2566428469a6f53b (patch)
treeafe65053245dd8c6cc372e3a8533bfa0aafca5e0
parent71b3702396943872e13d109031b81c8a6fdc1d35 (diff)
need to fix self-filling
-rw-r--r--animate.rb17
-rw-r--r--grammar.rb41
-rw-r--r--parse.rb66
3 files changed, 77 insertions, 47 deletions
diff --git a/animate.rb b/animate.rb
new file mode 100644
index 0000000..558146f
--- /dev/null
+++ b/animate.rb
@@ -0,0 +1,17 @@
+s = 'ich sah ein kleines haus .'
+def visit n, depth, skip=0 # FIXME
+ (depth-skip).times { |i|
+ i += skip
+ 0.upto(n-(i+1)) { |j|
+ yield j, j+i+1
+ }
+ }
+end
+
+0,5
+
+
+1 2
+0,1 1,2 2,3 3,4 4,5
+ich sah ein kleines haus
+
diff --git a/grammar.rb b/grammar.rb
index 4183a8f..b7d2408 100644
--- a/grammar.rb
+++ b/grammar.rb
@@ -41,18 +41,18 @@ class Rule
end
def arity
- rhs.reject { |i| i.class==T }.size
+ rhs.select { |i| i.class == NT }.size
end
def from_s s
_ = splitpipe s, 3
@lhs = NT.new _[0].strip.gsub!(/(\[|\])/, "")
- _[1].split.each { |i|
- i.strip!
- if i[0]=='[' && i[i.size-1] == ']'
- @rhs << NT.new(i.gsub!(/(\[|\])/, "").split(',')[0])
+ _[1].split.each { |x|
+ x.strip!
+ if x[0]=='[' && x[x.size-1] == ']'
+ @rhs << NT.new(x.gsub!(/(\[|\])/, "").split(',')[0])
else
- @rhs << T.new(i)
+ @rhs << T.new(x)
end
}
end
@@ -60,26 +60,25 @@ class Rule
def self.from_s s
r = self.new
r.from_s s
- r
+ return r
end
end
class Grammar
- attr_accessor :rules, :rewrite, :flat, :mixed
+ attr_accessor :rules, :startn, :startt, :flat
def initialize fn
- @rules = []; @rewrite = []; @flat = []; @mixed = []
+ @rules = []; @startn = []; @startt = [] ;@flat = []
ReadFile.readlines_strip(fn).each_with_index { |s,i|
- STDERR.write '.'
- STDERR.write "\n" if (i+1)%80==0
+ STDERR.write '.'; STDERR.write "\n" if (i+1)%80==0
@rules << Rule.from_s(s)
if @rules.last.rhs.first.class == NT
- @rewrite << @rules.last
+ @startn << @rules.last
else
if rules.last.arity == 0
@flat << @rules.last
else
- @mixed << @rules.last
+ @startt << @rules.last
end
end
}
@@ -89,21 +88,21 @@ class Grammar
def to_s
s = ''
@rules.each { |r| s += r.to_s+"\n" }
- s
+ return s
end
def add_glue_rules
- @rules.map { |r| r.lhs.symbol }.reject { |s| s=='S' }.uniq.each { |s|
- @rules << Rule.new(NT.new('S'), [NT.new(s)])
- @rewrite << @rules.last
+ @rules.map { |r| r.lhs.symbol }.select { |s| s != 'S' }.uniq.each { |symbol|
+ @rules << Rule.new(NT.new('S'), [NT.new(symbol)])
+ @startn << @rules.last
@rules << Rule.new(NT.new('S'), [NT.new('S'), NT.new('X')])
- @rewrite << @rules.last
+ @startn << @rules.last
}
end
- def add_pass_through_rules input
- input.each { |terminal|
- @rules << Rule.new(NT.new('X'), [T.new(terminal.word)])
+ def add_pass_through_rules s
+ s.each { |word|
+ @rules << Rule.new(NT.new('X'), [T.new(word)])
@flat << @rules.last
}
end
diff --git a/parse.rb b/parse.rb
index 0d82c1d..cb428d1 100644
--- a/parse.rb
+++ b/parse.rb
@@ -1,7 +1,6 @@
#!/usr/bin/env ruby
require_relative './grammar.rb'
-STDOUT.sync = true
class Chart
@@ -20,7 +19,7 @@ class Chart
@m[i][j]
end
- def add i, j, item
+ def add item, i, j
at(i,j) << item
@b["#{i},#{j},#{item.lhs.symbol}"] = true
end
@@ -59,14 +58,15 @@ def init input, n, active_chart, passive_chart, grammar
input.each_index { |i|
if input[i, r.rhs.size] == r.rhs.map { |x| x.word }
new_item = Item.new r, i, i+r.rhs.size, r.rhs.size
- passive_chart.add i, i+r.rhs.size, new_item
+ passive_chart.add new_item, i, i+r.rhs.size
end
}
}
end
def scan item, input, limit, passive_chart
- while item.rhs[item.dot].class == T && item.span.right < limit
+ while item.rhs[item.dot].class == T
+ return false if item.span.right==limit
if item.rhs[item.dot].word == input[item.span.right]
item.dot += 1
item.span.right += 1
@@ -74,44 +74,45 @@ def scan item, input, limit, passive_chart
break
end
end
+ return true
end
def parse input, n, active_chart, passive_chart, grammar
- 2.upto(n) { |span| # outer loop
+ 2.upto(n) { |span| # outer loops
0.upto(n-span) { |k|
puts " span(#{k},#{k+span})"
# try to apply rules starting with T
- grammar.mixed.select { |r| r.rhs.first.word == input[k] }.each { |r|
+ grammar.startt.select { |r| r.rhs.first.word == input[k] }.each { |r|
new_item = Item.new r, k, k, 0
- scan new_item, input, k+span, passive_chart
- active_chart.at(k,k+span) << new_item
+ active_chart.at(k,k+span) << new_item if scan new_item, input, k+span, passive_chart
}
# seed active chart
- grammar.rewrite.each { |r|
+ grammar.startn.each { |r|
next if r.rhs.size > span
active_chart.at(k,k+span) << Item.new(r, k, k, 0)
}
active_chart.at(k,k+span).each { |active_item|
- next if active_item.rhs[active_item.dot].class==T
- # inner loop
- 1.upto(span-1) { |span2|
+ 1.upto(span-1) { |span2| # inner loops
k.upto((k+span)-span2) { |l|
if passive_chart.has active_item.rhs[active_item.dot].symbol, l, l+span2
if l == active_item.span.right
new_item = Item.new active_item, active_item.span.left, l+span2, active_item.dot+1
- scan new_item, input, k+span, passive_chart
- if new_item.dot == new_item.rhs.size # done with item
- if new_item.span.left == k && new_item.span.right == k+span
- passive_chart.add k, k+span, new_item
- end
- else
- if new_item.rhs[new_item.dot].class == NT && new_item.span.right+(new_item.rhs.size-(new_item.dot)) <= k+span
- active_chart.at(k,k+span) << new_item
+ if scan new_item, input, k+span, passive_chart
+ if new_item.dot == new_item.rhs.size # done with item -> passive chart
+ if new_item.span.left == k && new_item.span.right == k+span
+ passive_chart.add new_item, k, k+span
+ end
+ else
+ if new_item.rhs[new_item.dot].class == NT
+ if new_item.span.right+(new_item.rhs.size-(new_item.dot)) <= k+span
+ active_chart.at(k,k+span) << new_item
+ end
+ end
end
end
end
@@ -121,6 +122,7 @@ def parse input, n, active_chart, passive_chart, grammar
}
# 'self-filling' step
+ # FIXME slow!
passive_chart.at(k,k+span).each { |passive_item|
active_chart.at(k,k+span).each { |active_item|
next if active_item.rhs[active_item.dot].class!=NT
@@ -130,9 +132,9 @@ def parse input, n, active_chart, passive_chart, grammar
scan new_item, input, k+span, passive_chart
if new_item.dot == new_item.rhs.size
if new_item.span.left == k && new_item.span.right == k+span
- passive_chart.add k, k+span, new_item
+ passive_chart.add new_item, k, k+span
else
- puts "#{new_item}" # FIXME never happens
+ puts "#{new_item}"
end
else
if new_item.rhs[new_item.dot].class == NT && new_item.span.right+(new_item.rhs.size-(new_item.dot)) <= k+span
@@ -143,6 +145,7 @@ def parse input, n, active_chart, passive_chart, grammar
end
}
}
+
}
}
end
@@ -157,17 +160,28 @@ def visit n, depth, skip=0 # FIXME
end
def main
+ STDERR.write "> reading input from TODO\n"
#input = 'ich sah ein kleines haus'.split
- input = 'lebensmittel schuld an europäischer inflation'.split
- #input = 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .'.split
+ #input = 'lebensmittel schuld an europäischer inflation'.split
+ input = 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .'.split
n = input.size
- grammar = Grammar.new 'example/grammar.x'
+
+ STDERR.write "> reading grammar\n"
+ grammar = Grammar.new 'example/grammar.3.gz'
+ STDERR.write ">> adding glue grammar\n"
grammar.add_glue_rules
+ STDERR.write ">> adding pass-through grammar\n"
+ #grammar.add_pass_through_rules input
+
+ STDERR.write "> initializing charts\n"
passive_chart = Chart.new n
active_chart = Chart.new n
init input, n, active_chart, passive_chart, grammar
+
+ STDERR.write "> parsing\n"
parse input, n, active_chart, passive_chart, grammar
- puts "---\npassive chart"
+
+ puts "\n---\npassive chart"
visit(n, n, 0) { |i,j| puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts ' '+item.to_s }; puts }
end