5 files changed, 76 insertions, 41 deletions
diff --git a/lib/nlp_ruby/SparseVector.rb b/lib/nlp_ruby/SparseVector.rb
index b80373c..3096412 100644
--- a/lib/nlp_ruby/SparseVector.rb
+++ b/lib/nlp_ruby/SparseVector.rb
@@ -12,14 +12,32 @@ class SparseVector < Hash
     a.each_with_index { |i,j| self[j] = i }
   end
 
+  def self.from_a a
+    v = SparseVector.new
+    v.from_a a
+    return v
+  end
+
   def from_h h
     h.each_pair { |k,v| self[k] = v }
   end
 
+  def self.from_h h
+    v = SparseVector.new
+    v.from_h h
+    return v
+  end
+
   def from_s s
     from_h eval(s)
   end
 
+  def self.from_s s
+    v = SparseVector.new
+    v.from_s s
+    return v
+  end
+
   def to_kv sep='=', join=' '
     a = []
     self.each_pair { |k,v|
@@ -35,6 +53,12 @@ class SparseVector < Hash
     }
   end
 
+  def self.from_kv s
+    v = SparseVector.new
+    v.from_kv s
+    return v
+  end
+
   def from_file fn, sep='='
     f = ReadFile.new(fn)
     while line = f.gets
@@ -44,6 +68,12 @@ class SparseVector < Hash
     end
   end
 
+  def self.from_file fn, sep='='
+    v = SparseVector.new
+    v.from_file fn, sep
+    return v
+  end
+
   def join_keys other
     self.keys + other.keys
   end
@@ -126,24 +156,17 @@ class SparseVector < Hash
     }
     return new
   end
-end
-
-
-module SparseVector
-
 
-def SparseVector::mean a
-  mean = SparseVector.new
-  a.each { |i|
-    i.each_pair { |k,v|
-      mean[k] += v
+  def self.mean a
+    mean = SparseVector.new
+    a.each { |i|
+      i.each_pair { |k,v|
+        mean[k] += v
+      }
     }
-  }
-  n = array_of_vectors.size.to_f
-  mean.each_pair { |k,v| mean[k] = v/n }
-  return mean
-end
-
-
+    n = a.size.to_f
+    mean.each_pair { |k,v| mean[k] = v/n }
+    return mean
+  end
 end
 
diff --git a/lib/nlp_ruby/Translation.rb b/lib/nlp_ruby/Translation.rb
index 0c346a4..34effe0 100644
--- a/lib/nlp_ruby/Translation.rb
+++ b/lib/nlp_ruby/Translation.rb
@@ -1,5 +1,5 @@
 class Translation
-  attr_accessor :id, :s, :raw, :f, :score, :rank, :other_score
+  attr_accessor :id, :s, :raw, :f, :scores, :rank
 
   def initialize id=nil, raw=nil, s=nil, f=nil, scores={}, rank=nil
     @id = id
@@ -21,11 +21,17 @@ class Translation
       @s = raw
     end
     @id = id.to_i
-    @f = read_feature_string features
+    @f = SparseVector.from_kv features
     @scores['decoder'] = score.to_f
     @rank = rank
   end
 
+  def self.from_s s
+    t = self.new
+    t.from_s s
+    return t
+  end
+
   def to_s include_features=true
     [@id, @s, @f.to_kv('=', ' '), @scores['decoder']].join(' ||| ') if include_features
     [@id, @s, @scores['decoder']].join(' ||| ') if !include_features
diff --git a/lib/nlp_ruby/dag.rb b/lib/nlp_ruby/dag.rb
index cca35c5..6f514c7 100644
--- a/lib/nlp_ruby/dag.rb
+++ b/lib/nlp_ruby/dag.rb
@@ -4,27 +4,27 @@ require 'json'
 
 
 class DAG::Node
-  attr_accessor :label, :edges, :incoming, :score, :mark
+  attr_accessor :label, :outgoing, :incoming, :score, :mark
 
-  def initialize label=nil, edges=[], incoming=[], score=nil
+  def initialize label=nil, outgoing=[], incoming=[], score=nil
     @label    = label
-    @edges    = edges # outgoing
+    @outgoing = outgoing
     @incoming = incoming
     @score    = nil
   end
 
   def add_edge head, weight=0
     exit if self==head # no self-cycles!
-    @edges << DAG::Edge.new(self, head, weight)
-    return @edges.last
+    @outgoing << DAG::Edge.new(self, head, weight)
+    return @outgoing.last
   end
 
   def to_s
-    "DAG::Node<label:#{label}, edges:#{edges.size}, incoming:#{incoming.size}>"
+    "DAG::Node<label:#{label}, outgoing:#{outgoing.size}, incoming:#{incoming.size}>"
   end
 
   def repr
-    "#{to_s} #{@score} out:#{@edges} in:[#{@incoming.map{|e| e.to_s}.join ', '}]"
+    "#{to_s} #{@score} out:#{@outgoing} in:[#{@incoming.map{|e| e.to_s}.join ', '}]"
   end
 end
 
@@ -50,7 +50,7 @@ end
 #  w/o markings as we do not have cycles
 def DAG::dfs n, target_label
   return n if n.label==target_label # assumes uniq labels!
-  stack = n.edges.map { |i| i.head }
+  stack = n.outgoing.map { |i| i.head }
   while !stack.empty?
     m = stack.pop
     return DAG::dfs m, target_label
@@ -65,7 +65,7 @@ def DAG::bfs n, target_label
   while !queue.empty?
     m = queue.shift
     return m if m.label==target_label
-    m.edges.each { |e| queue << e.head }
+    m.outgoing.each { |e| queue << e.head }
   end
   return nil
 end
@@ -76,7 +76,7 @@ def DAG::topological_sort graph
   s = graph.reject { |n| !n.incoming.empty? }
   while !s.empty?
     sorted << s.shift
-    sorted.last.edges.each { |e|
+    sorted.last.outgoing.each { |e|
       e.mark = true
       s << e.head if e.head.incoming.reject{|f| f.mark}.empty?
     }
@@ -110,7 +110,7 @@ def DAG::viterbi_forward graph, semiring=ViterbiSemiring, source_node
   toposorted = DAG::topological_sort(graph)
   DAG::init(graph, semiring, source_node)
   toposorted.each { |n|
-    n.edges.each { |e|
+    n.outgoing.each { |e|
       e.head.score = \
         semiring.add.call(e.head.score, \
                           semiring.multiply.call(n.score, e.weight)
@@ -127,7 +127,7 @@ def DAG::dijkstra graph, semiring=RealSemiring.new, source_node
   q = PriorityQueue.new graph
   while !q.empty?
     n = q.pop
-    n.edges.each { |e|
+    n.outgoing.each { |e|
       e.head.score = \
         semiring.add.call(e.head.score, \
                           semiring.multiply.call(n.score, e.weight))
diff --git a/lib/nlp_ruby/misc.rb b/lib/nlp_ruby/misc.rb
index 0f58100..b2ab885 100644
--- a/lib/nlp_ruby/misc.rb
+++ b/lib/nlp_ruby/misc.rb
@@ -1,3 +1,6 @@
+require 'timeout'
+
+
 class Array
   def max_index
     self.index(self.max)
@@ -23,8 +26,8 @@ end
 
 class String
 
-  def downcase? s
-    s[/[[:lower:]]/]
+  def downcase?
+    self[/[[:lower:]]/]
   end
 end
 
@@ -56,16 +59,13 @@ class PriorityQueue
 end
 
 def spawn_with_timeout cmd, t=4, debug=false
-  require 'timeout'
   STDERR.write cmd+"\n" if debug
   pipe_in, pipe_out = IO.pipe
   pid = Process.spawn(cmd, :out => pipe_out)
   begin
     Timeout.timeout(t) { Process.wait pid }
   rescue Timeout::Error
-    return ""
-    # accept the zombies
-    #Process.kill('TERM', pid)
+    Process.kill('TERM', pid)
   end
   pipe_out.close
   return pipe_in.read
@@ -76,7 +76,7 @@ def read_phrase_table fn
   f = ReadFile.new fn
   while raw_rule = f.gets
     french, english, features = splitpipe(raw_rule)
-    feature_map = read_feature_string(features)
+    feature_map = SparseVector.from_kv  features
     if table.has_key? french
       table[french] << [english, feature_map ]
     else
diff --git a/lib/nlp_ruby/tfidf.rb b/lib/nlp_ruby/tfidf.rb
index 84d55a5..13a40a3 100644
--- a/lib/nlp_ruby/tfidf.rb
+++ b/lib/nlp_ruby/tfidf.rb
@@ -1,6 +1,9 @@
+module TFIDF
+
+
 # returns key='raw frequency' for an
 # array-like object
-def tf array, stopwords=[]
+def TFIDF::tf array, stopwords=[]
   v = {}; v.default = 0
   array.uniq.each { |i|
    next if stopwords.include? i
@@ -11,7 +14,7 @@ end
 
 # smoothes raw frequencies of tf() in-place
 # a is a smoothing term
-def ntf hash, a=0.4
+def TFIDF::ntf hash, a=0.4
   max = hash.values.max.to_f
   hash.each_pair { |k,v|
     hash[k] = a + (1-a)*(v/max)
@@ -19,7 +22,7 @@ def ntf hash, a=0.4
 end
 
 # returns idf value for each word in a vocabulary
-def idf list_of_hashes
+def TFIDF::idf list_of_hashes
   vocab = list_of_hashes.values.flatten.uniq
   n = list_of_hashes.size.to_f
   idf = {}
@@ -30,3 +33,6 @@ def idf list_of_hashes
   return idf
 end
 
+
+end #module
+