From 94fef2d3aac6d7380e92771c46cc0e4655afaea4 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 1 Jun 2014 23:23:14 +0200 Subject: add grammar.rb, hg.rb: viterbi_path(), all_paths() --- README.md | 1 + lib/nlp_ruby.rb | 3 ++- lib/nlp_ruby/hg.rb | 52 ++++++++++++++++++++++++++++++++++++++++++++++++---- nlp_ruby.gemspec | 2 +- 4 files changed, 52 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 338109c..cc8d3ef 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ The .gem can be found here: https://rubygems.org/gems/nlp_ruby bleu.rb : BLEU implementation, also per-sentence-BLEU dag.rb : implementation of a directed acyclic graph and various algorithms fileutil.rb : file utilities + grammar.rb : classes to model grammars (mostly for translation grammars) hg.rb : data structures and algorithms (viterbi currently) for hypergraphs misc.rb : misc. stuff (e.g. monkey patches for Array and String) semirings.rb : semirings (used in dags.rb) diff --git a/lib/nlp_ruby.rb b/lib/nlp_ruby.rb index f57a302..0e26e97 100755 --- a/lib/nlp_ruby.rb +++ b/lib/nlp_ruby.rb @@ -10,8 +10,9 @@ require 'nlp_ruby/semirings' require 'nlp_ruby/bleu' require 'nlp_ruby/misc' require 'nlp_ruby/hg' +require 'nlp_ruby/grammar' -STDIN.set_encoding 'utf-8' +STDIN.set_encoding 'utf-8' STDOUT.set_encoding 'utf-8' STDERR.set_encoding 'utf-8' diff --git a/lib/nlp_ruby/hg.rb b/lib/nlp_ruby/hg.rb index b49cc6e..66e3b13 100644 --- a/lib/nlp_ruby/hg.rb +++ b/lib/nlp_ruby/hg.rb @@ -3,6 +3,7 @@ require 'nlp_ruby' require 'json' + module HG @@ -40,14 +41,15 @@ class HG::Hypergraph end class HG::Hyperedge - attr_accessor :head, :tails, :weight, :f, :mark + attr_accessor :head, :tails, :weight, :f, :mark, :rule, :left, :right - def initialize head=nil, tails=[], weight=0.0, f={} + def initialize head=nil, tails=[], weight=0.0, f={}, rule=nil, left=nil, right=nil @head = head @tails = tails @weight = weight @f = f @mark = 0 + @rule = Grammar::Rule.from_s rule if rule end def arity @@ -59,7 +61,7 @@ class HG::Hyperedge end def to_s - "Hyperedge" + "Hyperedge" end end @@ -96,6 +98,27 @@ def HG::viterbi hypergraph, root, semiring=ViterbiSemiring.new } end +def HG::viterbi_path hypergraph, root, semiring=ViterbiSemiring.new + toposorted = topological_sort hypergraph.nodes + init toposorted, semiring, root + best_path = [] + toposorted.each { |n| + best_edge = nil + n.incoming.each { |e| + s = semiring.one + e.tails.each { |m| + s = semiring.multiply.call(s, m.score) + } + if n.score < semiring.multiply.call(s, e.weight) # ViterbiSemiring add + best_edge = e + end + n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight)) + } + best_path << best_edge + } + return best_path, toposorted.last.score +end + def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=false nodes = [] edges = [] @@ -110,7 +133,11 @@ def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=fal nodes_by_index << n } h['edges'].each { |i| - e = Hyperedge.new nodes_by_label[i['head']], i['tails'].map{|j| nodes_by_label[j]}.to_a, semiring.convert.call(i['weight'].to_f) + e = Hyperedge.new(nodes_by_label[i['head']], \ + i['tails'].map{|j| nodes_by_label[j]}.to_a, \ + semiring.convert.call(i['weight'].to_f), \ + {}, \ + i['rule'], i['left'], i['right']) e.f = SparseVector.from_h i['f'] if log_weights e.weight = Math.exp(w.dot(e.f)) @@ -126,6 +153,23 @@ def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=fal return Hypergraph.new(nodes, edges), nodes_by_label, nodes_by_index end +def HG::all_paths hypergraph, root, semiring=ViterbiSemiring.new + toposorted = topological_sort hypergraph.nodes + paths = [[]] + toposorted.each { |n| + next if n.incoming.empty? + new_paths = [] + while !paths.empty? + p = paths.pop + n.incoming.each { |e| + new_paths << p+[e] + } + end + paths = new_paths + } + return paths +end + end #module diff --git a/nlp_ruby.gemspec b/nlp_ruby.gemspec index 1b7ac8d..a5a26df 100644 --- a/nlp_ruby.gemspec +++ b/nlp_ruby.gemspec @@ -1,6 +1,6 @@ Gem::Specification.new do |s| s.name = 'nlp_ruby' - s.version = '0.4.1' + s.version = '0.4.2' s.date = '2014-03-10' s.summary = 'nlp_ruby' s.description = 'NLP related tools and classes' -- cgit v1.2.3