summaryrefslogtreecommitdiff
path: root/parse.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-05-20 14:42:33 +0200
committerPatrick Simianer <p@simianer.de>2014-05-20 14:42:33 +0200
commitda646adc508083d69dea995ef8692840377fa61e (patch)
tree23d742d9de664db95bf01a858b0acbf6334aa599 /parse.rb
parentb385228969382185afee33d0661790b2831ac6c1 (diff)
cleanup
Diffstat (limited to 'parse.rb')
-rw-r--r--parse.rb176
1 files changed, 176 insertions, 0 deletions
diff --git a/parse.rb b/parse.rb
new file mode 100644
index 0000000..0d82c1d
--- /dev/null
+++ b/parse.rb
@@ -0,0 +1,176 @@
+#!/usr/bin/env ruby
+
+require_relative './grammar.rb'
+STDOUT.sync = true
+
+
+class Chart
+
+ def initialize n
+ @m = []
+ (n+1).times {
+ _ = []
+ (n+1).times { _ << [] }
+ @m << _
+ }
+ @b = {}
+ end
+
+ def at i, j
+ @m[i][j]
+ end
+
+ def add i, j, item
+ at(i,j) << item
+ @b["#{i},#{j},#{item.lhs.symbol}"] = true
+ end
+
+ def has symbol, i, j
+ return @b["#{i},#{j},#{symbol}"]
+ end
+end
+
+class Span
+ attr_accessor :left, :right
+
+ def initialize left=nil, right=nil
+ @left = left
+ @right = right
+ end
+end
+
+class Item < Rule
+ attr_accessor :lhs, :rhs, :span, :dot
+
+ def initialize rule_or_item, left, right, dot
+ @lhs = rule_or_item.lhs
+ @rhs = rule_or_item.rhs.dup
+ @span = Span.new left, right
+ @dot = dot
+ end
+
+ def to_s
+ "#{lhs} -> #{rhs.map{|i|i.to_s}.insert(@dot,'*').join ' '} [dot@#{@dot}] [arity=#{arity}] (#{@span.left}, #{@span.right})"
+ end
+end
+
+def init input, n, active_chart, passive_chart, grammar
+ grammar.flat.each { |r|
+ input.each_index { |i|
+ if input[i, r.rhs.size] == r.rhs.map { |x| x.word }
+ new_item = Item.new r, i, i+r.rhs.size, r.rhs.size
+ passive_chart.add i, i+r.rhs.size, new_item
+ end
+ }
+ }
+end
+
+def scan item, input, limit, passive_chart
+ while item.rhs[item.dot].class == T && item.span.right < limit
+ if item.rhs[item.dot].word == input[item.span.right]
+ item.dot += 1
+ item.span.right += 1
+ else
+ break
+ end
+ end
+end
+
+def parse input, n, active_chart, passive_chart, grammar
+ 2.upto(n) { |span| # outer loop
+ 0.upto(n-span) { |k|
+
+ puts " span(#{k},#{k+span})"
+
+ # try to apply rules starting with T
+ grammar.mixed.select { |r| r.rhs.first.word == input[k] }.each { |r|
+ new_item = Item.new r, k, k, 0
+ scan new_item, input, k+span, passive_chart
+ active_chart.at(k,k+span) << new_item
+ }
+
+ # seed active chart
+ grammar.rewrite.each { |r|
+ next if r.rhs.size > span
+ active_chart.at(k,k+span) << Item.new(r, k, k, 0)
+ }
+
+ active_chart.at(k,k+span).each { |active_item|
+ next if active_item.rhs[active_item.dot].class==T
+ # inner loop
+ 1.upto(span-1) { |span2|
+ k.upto((k+span)-span2) { |l|
+
+ if passive_chart.has active_item.rhs[active_item.dot].symbol, l, l+span2
+ if l == active_item.span.right
+ new_item = Item.new active_item, active_item.span.left, l+span2, active_item.dot+1
+ scan new_item, input, k+span, passive_chart
+ if new_item.dot == new_item.rhs.size # done with item
+ if new_item.span.left == k && new_item.span.right == k+span
+ passive_chart.add k, k+span, new_item
+ end
+ else
+ if new_item.rhs[new_item.dot].class == NT && new_item.span.right+(new_item.rhs.size-(new_item.dot)) <= k+span
+ active_chart.at(k,k+span) << new_item
+ end
+ end
+ end
+ end
+ }
+ }
+ }
+
+ # 'self-filling' step
+ passive_chart.at(k,k+span).each { |passive_item|
+ active_chart.at(k,k+span).each { |active_item|
+ next if active_item.rhs[active_item.dot].class!=NT
+ if passive_item.lhs.symbol == active_item.rhs[active_item.dot].symbol
+ next if not active_item.span.right==passive_item.span.left
+ new_item = Item.new active_item, active_item.span.left, passive_item.span.right, active_item.dot+1
+ scan new_item, input, k+span, passive_chart
+ if new_item.dot == new_item.rhs.size
+ if new_item.span.left == k && new_item.span.right == k+span
+ passive_chart.add k, k+span, new_item
+ else
+ puts "#{new_item}" # FIXME never happens
+ end
+ else
+ if new_item.rhs[new_item.dot].class == NT && new_item.span.right+(new_item.rhs.size-(new_item.dot)) <= k+span
+ puts "NA@#{k},#{k+span} #{new_item}"
+ active_chart.at(k,k+span) << new_item
+ end
+ end
+ end
+ }
+ }
+ }
+ }
+end
+
+def visit n, depth, skip=0 # FIXME
+ (depth-skip).times { |i|
+ i += skip
+ 0.upto(n-(i+1)) { |j|
+ yield j, j+i+1
+ }
+ }
+end
+
+def main
+ #input = 'ich sah ein kleines haus'.split
+ input = 'lebensmittel schuld an europäischer inflation'.split
+ #input = 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .'.split
+ n = input.size
+ grammar = Grammar.new 'example/grammar.x'
+ grammar.add_glue_rules
+ passive_chart = Chart.new n
+ active_chart = Chart.new n
+ init input, n, active_chart, passive_chart, grammar
+ parse input, n, active_chart, passive_chart, grammar
+ puts "---\npassive chart"
+ visit(n, n, 0) { |i,j| puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts ' '+item.to_s }; puts }
+end
+
+
+main
+