summaryrefslogtreecommitdiff
path: root/parse.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-09-16 10:23:14 +0100
committerPatrick Simianer <p@simianer.de>2014-09-16 10:23:14 +0100
commit129a22cfcc7651daa4b11ed52e7870249f6373a5 (patch)
tree78de4649396ab0d37a325b7598f9873c2d65f4c9 /parse.rb
parentdf70006a07fb67b17fb39aa56762c50c2e7b8131 (diff)
spring cleaning
Diffstat (limited to 'parse.rb')
-rw-r--r--parse.rb210
1 files changed, 0 insertions, 210 deletions
diff --git a/parse.rb b/parse.rb
deleted file mode 100644
index 1f69c91..0000000
--- a/parse.rb
+++ /dev/null
@@ -1,210 +0,0 @@
-require 'zipf'
-require_relative 'grammar'
-require_relative 'hg'
-
-
-module Parse
-
-
-def Parse::visit i, l, r, x=0
- i.upto(r-x) { |span|
- l.upto(r-span) { |k|
- yield k, k+span
- }
- }
-end
-
-class Chart
-
- def initialize n
- @n = n
- @m = []
- (n+1).times {
- a = []
- (n+1).times { a << [] }
- @m << a
- }
- @b = {}
- end
-
- def at i, j
- @m[i][j]
- end
-
- def add item, i, j
- at(i,j) << item
- @b["#{i},#{j},#{item.lhs.symbol}"] = true
- end
-
- def has symbol, i, j
- return @b["#{i},#{j},#{symbol}"]
- end
-
- def to_hg weights=SparseVector.new
- nodes = []
- edges = []
- nodes_by_id = {}
- nodes << HG::Node.new(-1, "root", [-1,-1])
- nodes_by_id[-1] = nodes.last
- id = 0
- seen = {}
- Parse::visit(1, 0, @n) { |i,j|
- self.at(i,j).each { |item|
- _ = "#{item.lhs.symbol},#{i},#{j}"
- if !seen[_]
- nodes << HG::Node.new(id, item.lhs.symbol, [i,j])
- nodes_by_id[id] = nodes.last
- seen[_] = id
- id += 1
- end
- }
- }
-
- Parse::visit(1, 0, @n) { |i,j|
- self.at(i,j).each { |item|
- edges << HG::Hyperedge.new(nodes_by_id[seen[item.lhs.symbol+','+i.to_s+','+j.to_s]], \
- (item.tail_spans.empty? ? [nodes_by_id[-1]] : item.rhs.zip((0..item.rhs.size-1).map{|q| item.tail_spans[q] }).select{|x| x[0].class==Grammar::NT }.map{|x| nodes_by_id[seen["#{x[0].symbol},#{x[1].left},#{x[1].right}"]]}), \
- Math.exp(weights.dot(item.f)),
- item.f,
- Grammar::Rule.new(item.lhs, item.rhs, item.target, item.map, item.f), \
- )
- edges.last.head.incoming << edges.last
- edges.last.tails.each { |n| n.outgoing << edges.last }
- }
- }
- return HG::Hypergraph.new(nodes, edges, nodes_by_id)
- end
-end
-
-Span = Struct.new(:left, :right)
-
-class Item < Grammar::Rule
- attr_accessor :left, :right, :tail_spans, :dot, :f
-
- def initialize rule_or_item, left, right, dot
- @lhs = Grammar::NT.new rule_or_item.lhs.symbol, rule_or_item.lhs.index
- @left = left
- @right = right
- @rhs = []
- @tail_spans = {} # refers to source side, use @map
- @f = rule_or_item.f
- @map = (rule_or_item.map ? rule_or_item.map.dup : [])
- rule_or_item.rhs.each_with_index { |x,i| # duplicate rhs partially
- @rhs << x
- if x.class == Grammar::NT
- begin
- if i >= dot
- @tail_spans[i] = Span.new(-1, -1)
- else
- @tail_spans[i] = rule_or_item.tail_spans[i].dup
- end
- rescue
- @tail_spans[i] = Span.new(-1, -1)
- end
- end
- }
- @dot = dot
- @target = rule_or_item.target
- end
-
- def to_s
- "(#{@left}, #{@right}) [#{tail_spans.map{|k,v| k.to_s+'('+v.left.to_s+','+v.right.to_s+')'}.join ' '}] {#{@map.to_s.delete('[]')}} #{lhs} -> #{rhs.map{|i|i.to_s}.insert(@dot,'*').join ' '} [dot@#{@dot}] ||| #{@target.map{|x|x.to_s}.join ' '}"
- end
-end
-
-def Parse::init input, n, active_chart, passive_chart, grammar
- grammar.flat.each { |r|
- input.each_index { |i|
- if input[i, r.rhs.size] == r.rhs.map { |x| x.word }
- passive_chart.add Item.new(r, i, i+r.rhs.size, r.rhs.size), i, i+r.rhs.size
- end
- }
- }
-end
-
-def Parse::scan item, input, limit, passive_chart
- while item.rhs[item.dot].class == Grammar::T
- return false if item.right==limit
- if item.rhs[item.dot].word == input[item.right]
- item.dot += 1
- item.right += 1
- else
- return false
- end
- end
- return true
-end
-
-def Parse::parse input, n, active_chart, passive_chart, grammar
- visit(1, 0, n) { |i,j|
-
- STDERR.write " span(#{i},#{j})\n"
-
- # try to apply rules starting with T
- grammar.start_t.select { |r| r.rhs.first.word == input[i] }.each { |r|
- new_item = Item.new r, i, i, 0
- active_chart.at(i,j) << new_item if scan new_item, input, j, passive_chart
- }
-
- # seed active chart
- grammar.start_nt.each { |r|
- next if r.rhs.size > j-i
- active_chart.at(i,j) << Item.new(r, i, i, 0)
- }
-
- # parse
- new_symbols = []
- remaining_items = []
- while !active_chart.at(i,j).empty?
- active_item = active_chart.at(i,j).pop
- advanced = false
- visit(1, i, j, 1) { |k,l|
- if passive_chart.has active_item.rhs[active_item.dot].symbol, k, l
- if k == active_item.right
- new_item = Item.new active_item, active_item.left, l, active_item.dot+1
- new_item.tail_spans[new_item.dot-1] = Span.new(k,l)
- if scan new_item, input, j, passive_chart
- if new_item.dot == new_item.rhs.size
- if new_item.left == i && new_item.right == j
- new_symbols << new_item.lhs.symbol if !new_symbols.include? new_item.lhs.symbol
- passive_chart.add new_item, i, j
- advanced = true
- end
- else
- if new_item.right+(new_item.rhs.size-(new_item.dot)) <= j
- active_chart.at(i,j) << new_item
- advanced = true
- end
- end
- end
- end
- end
- }
- if !advanced
- remaining_items << active_item
- end
- end
-
-
- # 'self-filling' step
- new_symbols.each { |s|
- remaining_items.each { |item|
- next if item.dot!=0
- next if item.rhs[item.dot].class!=Grammar::NT
- if item.rhs[item.dot].symbol == s
- new_item = Item.new item, i, j, item.dot+1
- new_item.tail_spans[new_item.dot-1] = Span.new(i,j)
- if new_item.dot==new_item.rhs.size
- new_symbols << new_item.lhs.symbol if !new_symbols.include? new_item.lhs.symbol
- passive_chart.add new_item, i, j
- end
- end
- }
- }
-
- }
-end
-
-
-end #module
-