summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-01 23:23:14 +0200
committerPatrick Simianer <p@simianer.de>2014-06-01 23:23:14 +0200
commit94fef2d3aac6d7380e92771c46cc0e4655afaea4 (patch)
tree8caf3e6b14b366ac8c420419eff611d71a297b86
parentaba7c124e3402a39a8237a7ae0ec4bc15f4016ce (diff)
add grammar.rb, hg.rb: viterbi_path(), all_paths()
-rw-r--r--README.md1
-rwxr-xr-xlib/nlp_ruby.rb3
-rw-r--r--lib/nlp_ruby/hg.rb52
-rw-r--r--nlp_ruby.gemspec2
4 files changed, 52 insertions, 6 deletions
diff --git a/README.md b/README.md
index 338109c..cc8d3ef 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,7 @@ The .gem can be found here: https://rubygems.org/gems/nlp_ruby
bleu.rb : BLEU implementation, also per-sentence-BLEU
dag.rb : implementation of a directed acyclic graph and various algorithms
fileutil.rb : file utilities
+ grammar.rb : classes to model grammars (mostly for translation grammars)
hg.rb : data structures and algorithms (viterbi currently) for hypergraphs
misc.rb : misc. stuff (e.g. monkey patches for Array and String)
semirings.rb : semirings (used in dags.rb)
diff --git a/lib/nlp_ruby.rb b/lib/nlp_ruby.rb
index f57a302..0e26e97 100755
--- a/lib/nlp_ruby.rb
+++ b/lib/nlp_ruby.rb
@@ -10,8 +10,9 @@ require 'nlp_ruby/semirings'
require 'nlp_ruby/bleu'
require 'nlp_ruby/misc'
require 'nlp_ruby/hg'
+require 'nlp_ruby/grammar'
-STDIN.set_encoding 'utf-8'
+STDIN.set_encoding 'utf-8'
STDOUT.set_encoding 'utf-8'
STDERR.set_encoding 'utf-8'
diff --git a/lib/nlp_ruby/hg.rb b/lib/nlp_ruby/hg.rb
index b49cc6e..66e3b13 100644
--- a/lib/nlp_ruby/hg.rb
+++ b/lib/nlp_ruby/hg.rb
@@ -3,6 +3,7 @@
require 'nlp_ruby'
require 'json'
+
module HG
@@ -40,14 +41,15 @@ class HG::Hypergraph
end
class HG::Hyperedge
- attr_accessor :head, :tails, :weight, :f, :mark
+ attr_accessor :head, :tails, :weight, :f, :mark, :rule, :left, :right
- def initialize head=nil, tails=[], weight=0.0, f={}
+ def initialize head=nil, tails=[], weight=0.0, f={}, rule=nil, left=nil, right=nil
@head = head
@tails = tails
@weight = weight
@f = f
@mark = 0
+ @rule = Grammar::Rule.from_s rule if rule
end
def arity
@@ -59,7 +61,7 @@ class HG::Hyperedge
end
def to_s
- "Hyperedge<head:\"#{@head.label}\", tails:#{@tails.map{|n|n.label}}, arity:#{arity}, weight:#{@weight}, f:#{f.to_s}, mark:#{@mark}>"
+ "Hyperedge<head:\"#{@head.label}\", rule:\"#{@rule.to_s}, \"tails:#{@tails.map{|n|n.label}}, arity:#{arity}, weight:#{@weight}, f:#{f.to_s}, mark:#{@mark}>"
end
end
@@ -96,6 +98,27 @@ def HG::viterbi hypergraph, root, semiring=ViterbiSemiring.new
}
end
+def HG::viterbi_path hypergraph, root, semiring=ViterbiSemiring.new
+ toposorted = topological_sort hypergraph.nodes
+ init toposorted, semiring, root
+ best_path = []
+ toposorted.each { |n|
+ best_edge = nil
+ n.incoming.each { |e|
+ s = semiring.one
+ e.tails.each { |m|
+ s = semiring.multiply.call(s, m.score)
+ }
+ if n.score < semiring.multiply.call(s, e.weight) # ViterbiSemiring add
+ best_edge = e
+ end
+ n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight))
+ }
+ best_path << best_edge
+ }
+ return best_path, toposorted.last.score
+end
+
def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=false
nodes = []
edges = []
@@ -110,7 +133,11 @@ def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=fal
nodes_by_index << n
}
h['edges'].each { |i|
- e = Hyperedge.new nodes_by_label[i['head']], i['tails'].map{|j| nodes_by_label[j]}.to_a, semiring.convert.call(i['weight'].to_f)
+ e = Hyperedge.new(nodes_by_label[i['head']], \
+ i['tails'].map{|j| nodes_by_label[j]}.to_a, \
+ semiring.convert.call(i['weight'].to_f), \
+ {}, \
+ i['rule'], i['left'], i['right'])
e.f = SparseVector.from_h i['f']
if log_weights
e.weight = Math.exp(w.dot(e.f))
@@ -126,6 +153,23 @@ def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=fal
return Hypergraph.new(nodes, edges), nodes_by_label, nodes_by_index
end
+def HG::all_paths hypergraph, root, semiring=ViterbiSemiring.new
+ toposorted = topological_sort hypergraph.nodes
+ paths = [[]]
+ toposorted.each { |n|
+ next if n.incoming.empty?
+ new_paths = []
+ while !paths.empty?
+ p = paths.pop
+ n.incoming.each { |e|
+ new_paths << p+[e]
+ }
+ end
+ paths = new_paths
+ }
+ return paths
+end
+
end #module
diff --git a/nlp_ruby.gemspec b/nlp_ruby.gemspec
index 1b7ac8d..a5a26df 100644
--- a/nlp_ruby.gemspec
+++ b/nlp_ruby.gemspec
@@ -1,6 +1,6 @@
Gem::Specification.new do |s|
s.name = 'nlp_ruby'
- s.version = '0.4.1'
+ s.version = '0.4.2'
s.date = '2014-03-10'
s.summary = 'nlp_ruby'
s.description = 'NLP related tools and classes'