From 129a22cfcc7651daa4b11ed52e7870249f6373a5 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 16 Sep 2014 10:23:14 +0100 Subject: spring cleaning --- prototype/parse.rb | 207 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 prototype/parse.rb (limited to 'prototype/parse.rb') diff --git a/prototype/parse.rb b/prototype/parse.rb new file mode 100644 index 0000000..adf2b91 --- /dev/null +++ b/prototype/parse.rb @@ -0,0 +1,207 @@ +require 'zipf' +require_relative 'grammar' +require_relative 'hypergraph' + +module Parse + +def Parse::visit i, l, r, x=0 + i.upto(r-x) { |span| + l.upto(r-span) { |k| + yield k, k+span + } + } +end + +class Chart + + def initialize n + @n = n + @m = [] + (n+1).times { + a = [] + (n+1).times { a << [] } + @m << a + } + @b = {} + end + + def at i, j + @m[i][j] + end + + def add item, i, j + at(i,j) << item + @b["#{i},#{j},#{item.lhs.symbol}"] = true + end + + def has symbol, i, j + return @b["#{i},#{j},#{symbol}"] + end + + def to_hg weights=SparseVector.new + nodes = [] + edges = [] + nodes_by_id = {} + nodes << HG::Node.new(-1, "root", [-1,-1]) + nodes_by_id[-1] = nodes.last + id = 0 + seen = {} + Parse::visit(1, 0, @n) { |i,j| + self.at(i,j).each { |item| + _ = "#{item.lhs.symbol},#{i},#{j}" + if !seen[_] + nodes << HG::Node.new(id, item.lhs.symbol, [i,j]) + nodes_by_id[id] = nodes.last + seen[_] = id + id += 1 + end + } + } + + Parse::visit(1, 0, @n) { |i,j| + self.at(i,j).each { |item| + edges << HG::Hyperedge.new(nodes_by_id[seen[item.lhs.symbol+','+i.to_s+','+j.to_s]], \ + (item.tail_spans.empty? ? [nodes_by_id[-1]] : item.rhs.zip((0..item.rhs.size-1).map{|q| item.tail_spans[q] }).select{|x| x[0].class==Grammar::NT }.map{|x| nodes_by_id[seen["#{x[0].symbol},#{x[1].left},#{x[1].right}"]]}), \ + Math.exp(weights.dot(item.f)), + item.f, + Grammar::Rule.new(item.lhs, item.rhs, item.target, item.map, item.f), \ + ) + edges.last.head.incoming << edges.last + edges.last.tails.each { |n| n.outgoing << edges.last } + } + } + return HG::Hypergraph.new(nodes, edges, nodes_by_id) + end +end + +Span = Struct.new(:left, :right) + +class Item < Grammar::Rule + attr_accessor :left, :right, :tail_spans, :dot, :f + + def initialize rule_or_item, left, right, dot + @lhs = Grammar::NT.new rule_or_item.lhs.symbol, rule_or_item.lhs.index + @left = left + @right = right + @rhs = [] + @tail_spans = {} # refers to source side, use @map + @f = rule_or_item.f + @map = (rule_or_item.map ? rule_or_item.map.dup : []) + rule_or_item.rhs.each_with_index { |x,i| # duplicate rhs partially + @rhs << x + if x.class == Grammar::NT + begin + if i >= dot + @tail_spans[i] = Span.new(-1, -1) + else + @tail_spans[i] = rule_or_item.tail_spans[i].dup + end + rescue + @tail_spans[i] = Span.new(-1, -1) + end + end + } + @dot = dot + @target = rule_or_item.target + end + + def to_s + "(#{@left}, #{@right}) [#{tail_spans.map{|k,v| k.to_s+'('+v.left.to_s+','+v.right.to_s+')'}.join ' '}] {#{@map.to_s.delete('[]')}} #{lhs} -> #{rhs.map{|i|i.to_s}.insert(@dot,'*').join ' '} [dot@#{@dot}] ||| #{@target.map{|x|x.to_s}.join ' '}" + end +end + +def Parse::init input, n, active_chart, passive_chart, grammar + grammar.flat.each { |r| + input.each_index { |i| + if input[i, r.rhs.size] == r.rhs.map { |x| x.word } + passive_chart.add Item.new(r, i, i+r.rhs.size, r.rhs.size), i, i+r.rhs.size + end + } + } +end + +def Parse::scan item, input, limit, passive_chart + while item.rhs[item.dot].class == Grammar::T + return false if item.right==limit + if item.rhs[item.dot].word == input[item.right] + item.dot += 1 + item.right += 1 + else + return false + end + end + return true +end + +def Parse::parse input, n, active_chart, passive_chart, grammar + visit(1, 0, n) { |i,j| + + STDERR.write " span(#{i},#{j})\n" + + # try to apply rules starting with T + grammar.start_t.select { |r| r.rhs.first.word == input[i] }.each { |r| + new_item = Item.new r, i, i, 0 + active_chart.at(i,j) << new_item if scan new_item, input, j, passive_chart + } + + # seed active chart + grammar.start_nt.each { |r| + next if r.rhs.size > j-i + active_chart.at(i,j) << Item.new(r, i, i, 0) + } + + # parse + new_symbols = [] + remaining_items = [] + while !active_chart.at(i,j).empty? + active_item = active_chart.at(i,j).pop + advanced = false + visit(1, i, j, 1) { |k,l| + if passive_chart.has active_item.rhs[active_item.dot].symbol, k, l + if k == active_item.right + new_item = Item.new active_item, active_item.left, l, active_item.dot+1 + new_item.tail_spans[new_item.dot-1] = Span.new(k,l) + if scan new_item, input, j, passive_chart + if new_item.dot == new_item.rhs.size + if new_item.left == i && new_item.right == j + new_symbols << new_item.lhs.symbol if !new_symbols.include? new_item.lhs.symbol + passive_chart.add new_item, i, j + advanced = true + end + else + if new_item.right+(new_item.rhs.size-(new_item.dot)) <= j + active_chart.at(i,j) << new_item + advanced = true + end + end + end + end + end + } + if !advanced + remaining_items << active_item + end + end + + + # 'self-filling' step + new_symbols.each { |s| + remaining_items.each { |item| + next if item.dot!=0 + next if item.rhs[item.dot].class!=Grammar::NT + if item.rhs[item.dot].symbol == s + new_item = Item.new item, i, j, item.dot+1 + new_item.tail_spans[new_item.dot-1] = Span.new(i,j) + if new_item.dot==new_item.rhs.size + new_symbols << new_item.lhs.symbol if !new_symbols.include? new_item.lhs.symbol + passive_chart.add new_item, i, j + end + end + } + } + + } +end + +end #module + -- cgit v1.2.3