diff options
Diffstat (limited to 'lib')
| -rwxr-xr-x | lib/nlp_ruby.rb | 1 | ||||
| -rw-r--r-- | lib/nlp_ruby/hg.rb | 131 | ||||
| -rw-r--r-- | lib/nlp_ruby/semirings.rb | 10 | 
3 files changed, 142 insertions, 0 deletions
| diff --git a/lib/nlp_ruby.rb b/lib/nlp_ruby.rb index a3f9f1a..f57a302 100755 --- a/lib/nlp_ruby.rb +++ b/lib/nlp_ruby.rb @@ -9,6 +9,7 @@ require 'nlp_ruby/dag'  require 'nlp_ruby/semirings'  require 'nlp_ruby/bleu'  require 'nlp_ruby/misc' +require 'nlp_ruby/hg'  STDIN.set_encoding 'utf-8'  STDOUT.set_encoding 'utf-8' diff --git a/lib/nlp_ruby/hg.rb b/lib/nlp_ruby/hg.rb new file mode 100644 index 0000000..b49cc6e --- /dev/null +++ b/lib/nlp_ruby/hg.rb @@ -0,0 +1,131 @@ +#!/usr/bin/env ruby + +require 'nlp_ruby' +require 'json' + +module HG + + +class HG::Node +  attr_accessor :label, :cat, :outgoing, :incoming, :score + +  def initialize label=nil, cat=nil, outgoing=[], incoming=[], score=nil +    @label    = label +    @cat      = cat +    @outgoing = outgoing +    @incoming = incoming +    @score    = nil +  end + +  def to_s +    "Node<label:\"#{@label}\", cat:\"#{@cat}\", outgoing:#{@outgoing.size}, incoming:#{@incoming.size}>" +  end +end + +class HG::Hypergraph +  attr_accessor :nodes, :edges + +  def initialize nodes=[], edges=[] +    @nodes = nodes +    @edges = edges +  end + +  def arity +    @edges.map { |e| e.arity }.max +  end + +  def to_s +    "Hypergraph<nodes:[#{@nodes.to_s}], edges:[#{@edges.to_s}], arity:#{arity}>" +  end +end + +class HG::Hyperedge +  attr_accessor :head, :tails, :weight, :f, :mark + +  def initialize head=nil, tails=[], weight=0.0, f={} +    @head   = head +    @tails  = tails +    @weight = weight +    @f      = f +    @mark   = 0 +  end + +  def arity +    return @tails.size +  end + +  def marked? +    arity == @mark +  end + +  def to_s +    "Hyperedge<head:\"#{@head.label}\", tails:#{@tails.map{|n|n.label}}, arity:#{arity}, weight:#{@weight}, f:#{f.to_s}, mark:#{@mark}>" +  end +end + +def HG::topological_sort nodes +  sorted = [] +  s = nodes.reject { |n| !n.incoming.empty? } +  while !s.empty? +    sorted << s.shift +    sorted.last.outgoing.each { |e| +      next if e.marked? +      e.mark += 1 +      s << e.head if e.head.incoming.reject{ |f| f.mark==f.arity }.empty? +    } +  end +  return sorted +end + +def HG::init nodes, semiring, root +  nodes.each { |n| n.score=semiring.null } +  root.score = semiring.one +end + +def HG::viterbi hypergraph, root, semiring=ViterbiSemiring.new +  toposorted = topological_sort hypergraph.nodes +  init toposorted, semiring, root +  toposorted.each { |n| +    n.incoming.each { |e| +      s = semiring.one +      e.tails.each { |m| +        s = semiring.multiply.call(s, m.score) +      } +      n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight)) +    } +  } +end + +def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=false +  nodes = [] +  edges = [] +  nodes_by_label = {} +  nodes_by_index = [] +  h = JSON.parse File.new(fn).read +  w = SparseVector.from_h h['weights'] +  h['nodes'].each { |i| +    n = Node.new i['label'], i['cat'] +    nodes << n +    nodes_by_label[n.label] = n +    nodes_by_index << n +  } +  h['edges'].each { |i| +    e = Hyperedge.new nodes_by_label[i['head']], i['tails'].map{|j| nodes_by_label[j]}.to_a, semiring.convert.call(i['weight'].to_f) +    e.f = SparseVector.from_h i['f'] +    if log_weights +      e.weight = Math.exp(w.dot(e.f)) +    else +      e.weight = w.dot(e.f) +    end +    e.tails.each { |m| +      m.outgoing << e +    } +    e.head.incoming << e +    edges << e +  } +  return Hypergraph.new(nodes, edges), nodes_by_label, nodes_by_index +end + + +end #module + diff --git a/lib/nlp_ruby/semirings.rb b/lib/nlp_ruby/semirings.rb index 83551a9..fda4683 100644 --- a/lib/nlp_ruby/semirings.rb +++ b/lib/nlp_ruby/semirings.rb @@ -26,6 +26,16 @@ class ViterbiSemiring < Semiring    end  end +class ViterbiLogSemiring < Semiring +  def initialize +    @add = Proc.new { |a,b| [a,b].max } +    @multiply =  Proc.new { |a,b| a+b } +    @one = 0.0 +    @null = -1.0/0.0 +    @convert = Proc.new { |v| v } +  end +end +  class InsideSemiring < Semiring    def initialize      @add = Proc.new { |a,b| a+b } | 
