summaryrefslogtreecommitdiff
path: root/hg.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-03 17:11:43 +0200
committerPatrick Simianer <p@simianer.de>2014-06-03 17:11:43 +0200
commit7e965fce5245a79c2c8736b3f996198d09602b3b (patch)
treecab54433cbc96089a9684287f97674ce0464d200 /hg.rb
parent3b0d4622865e71c49c4efc4db4ac35538a373a47 (diff)
getting serious
Diffstat (limited to 'hg.rb')
-rw-r--r--hg.rb199
1 files changed, 199 insertions, 0 deletions
diff --git a/hg.rb b/hg.rb
new file mode 100644
index 0000000..7b39339
--- /dev/null
+++ b/hg.rb
@@ -0,0 +1,199 @@
+#!/usr/bin/env ruby
+
+require 'nlp_ruby'
+require 'json'
+require_relative 'grammar'
+
+
+module HG
+
+
+class HG::Node
+ attr_accessor :label, :cat, :outgoing, :incoming, :score
+
+ def initialize label=nil, cat=nil, outgoing=[], incoming=[], score=nil
+ @label = label
+ @cat = cat
+ @outgoing = outgoing
+ @incoming = incoming
+ @score = nil
+ end
+
+ def to_s
+ "Node<label:\"#{@label}\", cat:\"#{@cat}\", outgoing:#{@outgoing.size}, incoming:#{@incoming.size}>"
+ end
+end
+
+class HG::Hypergraph
+ attr_accessor :nodes, :edges
+
+ def initialize nodes=[], edges=[]
+ @nodes = nodes
+ @edges = edges
+ end
+
+ def arity
+ @edges.map { |e| e.arity }.max
+ end
+
+ def to_s
+ "Hypergraph<nodes:[#{@nodes.to_s}], edges:[#{@edges.to_s}], arity:#{arity}>"
+ end
+end
+
+class HG::Hyperedge
+ attr_accessor :head, :tails, :weight, :f, :mark, :rule, :left, :right
+
+ def initialize head=nil, tails=[], weight=0.0, f={}, rule=nil, left=nil, right=nil, tail_spans=nil
+ @head = head
+ @tails = tails
+ @weight = weight
+ @f = f
+ @mark = 0
+ @rule = Grammar::Rule.from_s rule, tail_spans if rule
+ @rule.lhs.span.left = left if left
+ @rule.lhs.span.right = right if right
+ end
+
+ def arity
+ return @tails.size
+ end
+
+ def marked?
+ arity == @mark
+ end
+
+ def to_s
+ "Hyperedge<head:\"#{@head.label}\", rule:\"#{@rule.to_s}, \"tails:#{@tails.map{|n|n.label}}, arity:#{arity}, weight:#{@weight}, f:#{f.to_s}, mark:#{@mark}>"
+ end
+end
+
+def HG::topological_sort nodes
+ sorted = []
+ s = nodes.reject { |n| !n.incoming.empty? }
+ while !s.empty?
+ sorted << s.shift
+ sorted.last.outgoing.each { |e|
+ next if e.marked?
+ e.mark += 1
+ s << e.head if e.head.incoming.reject{ |f| f.mark==f.arity }.empty?
+ }
+ end
+ return sorted
+end
+
+def HG::init nodes, semiring, root
+ nodes.each { |n| n.score=semiring.null }
+ root.score = semiring.one
+end
+
+def HG::viterbi hypergraph, root, semiring=ViterbiSemiring.new
+ toposorted = topological_sort hypergraph.nodes
+ init toposorted, semiring, root
+ toposorted.each { |n|
+ n.incoming.each { |e|
+ s = semiring.one
+ e.tails.each { |m|
+ s = semiring.multiply.call(s, m.score)
+ }
+ n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight))
+ }
+ }
+end
+
+def HG::viterbi_path hypergraph, root, semiring=ViterbiSemiring.new
+ toposorted = topological_sort hypergraph.nodes
+ init toposorted, semiring, root
+ best_path = []
+ toposorted.each { |n|
+ best_edge = nil
+ n.incoming.each { |e|
+ s = semiring.one
+ e.tails.each { |m|
+ s = semiring.multiply.call(s, m.score)
+ }
+ if n.score < semiring.multiply.call(s, e.weight) # ViterbiSemiring add
+ best_edge = e
+ end
+ n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight))
+ }
+ best_path << best_edge if best_edge
+ }
+ return best_path, toposorted.last.score
+end
+
+def HG::viterbi_string hypergraph, root, semiring=ViterbiSemiring.new
+ toposorted = topological_sort hypergraph.nodes
+ init toposorted, semiring, root
+ s = ''
+ toposorted.each { |n|
+ best_s = nil
+ n.incoming.each { |e|
+ s = semiring.one
+ e.tails.each { |m|
+ s = semiring.multiply.call(s, m.score)
+ }
+ if n.score < semiring.multiply.call(s, e.weight) # ViterbiSemiring add
+ best_s = e.e
+ end
+ n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight))
+ }
+ s += best_s if best_s
+ }
+ return s, toposorted.last.score
+end
+
+def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=false
+ nodes = []
+ edges = []
+ nodes_by_label = {}
+ nodes_by_index = []
+ h = JSON.parse File.new(fn).read
+ w = SparseVector.from_h h['weights']
+ h['nodes'].each { |i|
+ n = Node.new i['label'], i['cat']
+ nodes << n
+ nodes_by_label[n.label] = n
+ nodes_by_index << n
+ }
+ h['edges'].each { |i|
+ e = Hyperedge.new(nodes_by_label[i['head']], \
+ i['tails'].map{|j| nodes_by_label[j]}.to_a, \
+ semiring.convert.call(i['weight'].to_f), \
+ {}, \
+ i['rule'], i['left'], i['right'], i['spans'])
+ e.f = SparseVector.from_h i['f']
+ if log_weights
+ e.weight = Math.exp(w.dot(e.f))
+ else
+ e.weight = w.dot(e.f)
+ end
+ e.tails.each { |m|
+ m.outgoing << e
+ }
+ e.head.incoming << e
+ edges << e
+ }
+ return Hypergraph.new(nodes, edges), nodes_by_label, nodes_by_index
+end
+
+def HG::all_paths hypergraph, root, semiring=ViterbiSemiring.new
+ toposorted = topological_sort hypergraph.nodes
+ paths = [[]]
+ toposorted.each { |n|
+ next if n.incoming.empty?
+ new_paths = []
+ while !paths.empty?
+ p = paths.pop
+ n.incoming.each { |e|
+ new_paths << p+[e]
+ }
+ end
+ paths = new_paths
+ }
+ return paths
+end
+
+
+end #module
+