diff options
author | Patrick Simianer <p@simianer.de> | 2014-05-20 14:42:33 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-05-20 14:42:33 +0200 |
commit | da646adc508083d69dea995ef8692840377fa61e (patch) | |
tree | 23d742d9de664db95bf01a858b0acbf6334aa599 /parse.rb | |
parent | b385228969382185afee33d0661790b2831ac6c1 (diff) |
cleanup
Diffstat (limited to 'parse.rb')
-rw-r--r-- | parse.rb | 176 |
1 files changed, 176 insertions, 0 deletions
diff --git a/parse.rb b/parse.rb new file mode 100644 index 0000000..0d82c1d --- /dev/null +++ b/parse.rb @@ -0,0 +1,176 @@ +#!/usr/bin/env ruby + +require_relative './grammar.rb' +STDOUT.sync = true + + +class Chart + + def initialize n + @m = [] + (n+1).times { + _ = [] + (n+1).times { _ << [] } + @m << _ + } + @b = {} + end + + def at i, j + @m[i][j] + end + + def add i, j, item + at(i,j) << item + @b["#{i},#{j},#{item.lhs.symbol}"] = true + end + + def has symbol, i, j + return @b["#{i},#{j},#{symbol}"] + end +end + +class Span + attr_accessor :left, :right + + def initialize left=nil, right=nil + @left = left + @right = right + end +end + +class Item < Rule + attr_accessor :lhs, :rhs, :span, :dot + + def initialize rule_or_item, left, right, dot + @lhs = rule_or_item.lhs + @rhs = rule_or_item.rhs.dup + @span = Span.new left, right + @dot = dot + end + + def to_s + "#{lhs} -> #{rhs.map{|i|i.to_s}.insert(@dot,'*').join ' '} [dot@#{@dot}] [arity=#{arity}] (#{@span.left}, #{@span.right})" + end +end + +def init input, n, active_chart, passive_chart, grammar + grammar.flat.each { |r| + input.each_index { |i| + if input[i, r.rhs.size] == r.rhs.map { |x| x.word } + new_item = Item.new r, i, i+r.rhs.size, r.rhs.size + passive_chart.add i, i+r.rhs.size, new_item + end + } + } +end + +def scan item, input, limit, passive_chart + while item.rhs[item.dot].class == T && item.span.right < limit + if item.rhs[item.dot].word == input[item.span.right] + item.dot += 1 + item.span.right += 1 + else + break + end + end +end + +def parse input, n, active_chart, passive_chart, grammar + 2.upto(n) { |span| # outer loop + 0.upto(n-span) { |k| + + puts " span(#{k},#{k+span})" + + # try to apply rules starting with T + grammar.mixed.select { |r| r.rhs.first.word == input[k] }.each { |r| + new_item = Item.new r, k, k, 0 + scan new_item, input, k+span, passive_chart + active_chart.at(k,k+span) << new_item + } + + # seed active chart + grammar.rewrite.each { |r| + next if r.rhs.size > span + active_chart.at(k,k+span) << Item.new(r, k, k, 0) + } + + active_chart.at(k,k+span).each { |active_item| + next if active_item.rhs[active_item.dot].class==T + # inner loop + 1.upto(span-1) { |span2| + k.upto((k+span)-span2) { |l| + + if passive_chart.has active_item.rhs[active_item.dot].symbol, l, l+span2 + if l == active_item.span.right + new_item = Item.new active_item, active_item.span.left, l+span2, active_item.dot+1 + scan new_item, input, k+span, passive_chart + if new_item.dot == new_item.rhs.size # done with item + if new_item.span.left == k && new_item.span.right == k+span + passive_chart.add k, k+span, new_item + end + else + if new_item.rhs[new_item.dot].class == NT && new_item.span.right+(new_item.rhs.size-(new_item.dot)) <= k+span + active_chart.at(k,k+span) << new_item + end + end + end + end + } + } + } + + # 'self-filling' step + passive_chart.at(k,k+span).each { |passive_item| + active_chart.at(k,k+span).each { |active_item| + next if active_item.rhs[active_item.dot].class!=NT + if passive_item.lhs.symbol == active_item.rhs[active_item.dot].symbol + next if not active_item.span.right==passive_item.span.left + new_item = Item.new active_item, active_item.span.left, passive_item.span.right, active_item.dot+1 + scan new_item, input, k+span, passive_chart + if new_item.dot == new_item.rhs.size + if new_item.span.left == k && new_item.span.right == k+span + passive_chart.add k, k+span, new_item + else + puts "#{new_item}" # FIXME never happens + end + else + if new_item.rhs[new_item.dot].class == NT && new_item.span.right+(new_item.rhs.size-(new_item.dot)) <= k+span + puts "NA@#{k},#{k+span} #{new_item}" + active_chart.at(k,k+span) << new_item + end + end + end + } + } + } + } +end + +def visit n, depth, skip=0 # FIXME + (depth-skip).times { |i| + i += skip + 0.upto(n-(i+1)) { |j| + yield j, j+i+1 + } + } +end + +def main + #input = 'ich sah ein kleines haus'.split + input = 'lebensmittel schuld an europäischer inflation'.split + #input = 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .'.split + n = input.size + grammar = Grammar.new 'example/grammar.x' + grammar.add_glue_rules + passive_chart = Chart.new n + active_chart = Chart.new n + init input, n, active_chart, passive_chart, grammar + parse input, n, active_chart, passive_chart, grammar + puts "---\npassive chart" + visit(n, n, 0) { |i,j| puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts ' '+item.to_s }; puts } +end + + +main + |