summaryrefslogtreecommitdiff
path: root/hg.rb
diff options
context:
space:
mode:
Diffstat (limited to 'hg.rb')
-rw-r--r--hg.rb222
1 files changed, 0 insertions, 222 deletions
diff --git a/hg.rb b/hg.rb
deleted file mode 100644
index a68407c..0000000
--- a/hg.rb
+++ /dev/null
@@ -1,222 +0,0 @@
-require 'zipf'
-require_relative 'grammar'
-
-
-module HG
-
-
-class HG::Node
- attr_accessor :id, :symbol, :left, :right, :outgoing, :incoming, :score
-
- def initialize id=nil, symbol='', span=[-1,-1], outgoing=[], incoming=[], score=nil
- @id = id
- @symbol = symbol
- @left = span[0]
- @right = span[1]
- @outgoing = outgoing
- @incoming = incoming
- @score = score
- end
-
- def to_s
- "Node<id=#{@id}, symbol='#{symbol}', span=(#{@left},#{@right}), outgoing:#{@outgoing.size}, incoming:#{@incoming.size}>"
- end
-end
-
-class HG::Hypergraph
- attr_accessor :nodes, :edges, :nodes_by_id
-
- def initialize nodes=[], edges=[], nodes_by_id={}
- @nodes = nodes
- @edges = edges
- @nodes_by_id = nodes_by_id
- @arity_ = nil
- end
-
- def arity
- @arity_ = @edges.map { |e| e.arity }.max if !@arity_
- return @arity_
- end
-
- def reset
- @edges.each { |e| e.mark = 0 }
- end
-
- def to_s
- "Hypergraph<nodes:#{@nodes.size}, edges:#{@edges.size}, arity=#{arity}>"
- end
-
- def to_json weights=nil
- json_s = "{\n"
- json_s += "\"weights\":#{(weights ? weights.to_json : '{}')},\n"
- json_s += "\"nodes\":\n"
- json_s += "[\n"
- json_s += @nodes.map { |n|
- "{ \"id\":#{n.id}, \"cat\":\"#{n.symbol.to_json.slice(1..-1).chomp('"')}\", \"span\":[#{n.left},#{n.right}] }"
- }.join ",\n"
- json_s += "\n],\n"
- json_s += "\"edges\":\n"
- json_s += "[\n"
- json_s += @edges.map { |e|
- "{ \"head\":#{e.head.id}, \"rule\":#{e.rule.to_json}, \"tails\":#{e.tails.map{ |n| n.id }}, \"f\":#{(e.f ? e.f.to_json : '{}')} }"
- }.join ",\n"
- json_s += "\n]\n"
- json_s += "}\n"
-
- return json_s
-
- end
-end
-
-class HG::Hyperedge
- attr_accessor :head, :tails, :score, :f, :mark, :rule
-
- def initialize head=Node.new, tails=[], score=0.0, f=SparseVector.new, rule=nil
- @head = head
- @tails = tails
- @score = score
- @f = f
- @mark = 0
- @rule = (rule.class==String ? Grammar::Rule.from_s(rule) : rule)
- end
-
- def arity
- return @tails.size
- end
-
- def marked?
- arity == @mark
- end
-
- def to_s
- "Hyperedge<head=#{@head.id}, rule:'#{@rule.to_s}', tails=#{@tails.map{|n|n.id}}, arity=#{arity}, score=#{@score}, f=#{f.to_s}, mark=#{@mark}>"
- end
-end
-
-def HG::topological_sort nodes
- sorted = []
- s = nodes.reject { |n| !n.incoming.empty? }
- while !s.empty?
- sorted << s.shift
- sorted.last.outgoing.each { |e|
- next if e.marked?
- e.mark += 1
- s << e.head if e.head.incoming.reject{ |f| f.marked? }.empty?
- }
- end
- return sorted
-end
-
-def HG::init nodes, semiring, root
- nodes.each { |n| n.score=semiring.null }
- root.score = semiring.one
-end
-
-def HG::viterbi hypergraph, root, semiring=ViterbiSemiring.new
- toposorted = topological_sort hypergraph.nodes
- init toposorted, semiring, root
- toposorted.each { |n|
- n.incoming.each { |e|
- s = semiring.one
- e.tails.each { |m|
- s = semiring.multiply.call(s, m.score)
- }
- n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.score))
- }
- }
-end
-
-def HG::viterbi_path hypergraph, root, semiring=ViterbiSemiring.new
- toposorted = topological_sort hypergraph.nodes
- init toposorted, semiring, root
- best_path = []
- toposorted.each { |n|
- best_edge = nil
- n.incoming.each { |e|
- s = semiring.one
- e.tails.each { |m|
- s = semiring.multiply.call(s, m.score)
- }
- if n.score < semiring.multiply.call(s, e.score) # ViterbiSemiring add
- best_edge = e
- end
- n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.score))
- }
- best_path << best_edge if best_edge
- }
- return best_path, toposorted.last.score
-end
-
-def HG::k_best hypergraph, root, semiring=nil
- #TODO
-end
-
-def HG::all_paths hypergraph, root
- toposorted = topological_sort hypergraph.nodes
- paths = [[]]
- toposorted.each { |n|
- next if n.incoming.empty?
- new_paths = []
- while !paths.empty?
- p = paths.pop
- n.incoming.each { |e|
- new_paths << p+[e]
- }
- end
- paths = new_paths
- }
- return paths
-end
-
-def HG::derive path, cur, carry
- edge = path.select { |e| e.head.symbol==cur.symbol \
- && e.head.left==cur.left \
- && e.head.right==cur.right }.first
- j = 0
- edge.rule.target.each { |i|
- if i.class == Grammar::NT
- derive path, edge.tails[edge.rule.map[j]], carry
- j += 1
- else
- carry << i
- end
- }
- return carry
-end
-
-def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=false
- nodes = []
- edges = []
- nodes_by_id = {}
- h = JSON.parse File.new(fn).read
- w = SparseVector.from_h h['weights']
- h['nodes'].each { |x|
- n = Node.new x['id'], x['symbol'], x['span']
- nodes << n
- nodes_by_id[n.id] = n
- }
- h['edges'].each { |x|
- e = Hyperedge.new(nodes_by_id[x['head']], \
- x['tails'].map { |j| nodes_by_id[j] }.to_a, \
- (x['score'] ? semiring.convert.call(x['score'].to_f) : nil), \
- (x['f'] ? SparseVector.from_h(x['f']) : SparseVector.new), \
- x['rule'])
- if x['f']
- if log_weights
- e.score = Math.exp(w.dot(e.f))
- else
- e.score = w.dot(e.f)
- end
- end
- e.tails.each { |m|
- m.outgoing << e
- }
- e.head.incoming << e
- edges << e
- }
- return Hypergraph.new(nodes, edges), nodes_by_id
-end
-
-
-end #module
-