From 062eda911830c779aa685885b8e15ecceabfc085 Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Fri, 14 Feb 2014 18:56:47 +0100
Subject: some class methods; Translation scores dict; DAG edges->outgoing;
 TFIDF module
---
 lib/nlp_ruby/SparseVector.rb | 57 +++++++++++++++++++++++++++++++-------------
 lib/nlp_ruby/Translation.rb  | 10 ++++++--
 lib/nlp_ruby/dag.rb          | 24 +++++++++----------
 lib/nlp_ruby/misc.rb         | 14 +++++------
 lib/nlp_ruby/tfidf.rb        | 12 +++++++---
 5 files changed, 76 insertions(+), 41 deletions(-)
diff --git a/lib/nlp_ruby/SparseVector.rb b/lib/nlp_ruby/SparseVector.rb
index b80373c..3096412 100644
--- a/lib/nlp_ruby/SparseVector.rb
+++ b/lib/nlp_ruby/SparseVector.rb
@@ -12,14 +12,32 @@ class SparseVector < Hash
     a.each_with_index { |i,j| self[j] = i }
   end
 
+  def self.from_a a
+    v = SparseVector.new
+    v.from_a a
+    return v
+  end
+
   def from_h h
     h.each_pair { |k,v| self[k] = v }
   end
 
+  def self.from_h h
+    v = SparseVector.new
+    v.from_h h
+    return v
+  end
+
   def from_s s
     from_h eval(s)
   end
 
+  def self.from_s s
+    v = SparseVector.new
+    v.from_s s
+    return v
+  end
+
   def to_kv sep='=', join=' '
     a = []
     self.each_pair { |k,v|
@@ -35,6 +53,12 @@ class SparseVector < Hash
     }
   end
 
+  def self.from_kv s
+    v = SparseVector.new
+    v.from_kv s
+    return v
+  end
+
   def from_file fn, sep='='
     f = ReadFile.new(fn)
     while line = f.gets
@@ -44,6 +68,12 @@ class SparseVector < Hash
     end
   end
 
+  def self.from_file fn, sep='='
+    v = SparseVector.new
+    v.from_file fn, sep
+    return v
+  end
+
   def join_keys other
     self.keys + other.keys
   end
@@ -126,24 +156,17 @@ class SparseVector < Hash
     }
     return new
   end
-end
-
-
-module SparseVector
-
 
-def SparseVector::mean a
-  mean = SparseVector.new
-  a.each { |i|
-    i.each_pair { |k,v|
-      mean[k] += v
+  def self.mean a
+    mean = SparseVector.new
+    a.each { |i|
+      i.each_pair { |k,v|
+        mean[k] += v
+      }
     }
-  }
-  n = array_of_vectors.size.to_f
-  mean.each_pair { |k,v| mean[k] = v/n }
-  return mean
-end
-
-
+    n = a.size.to_f
+    mean.each_pair { |k,v| mean[k] = v/n }
+    return mean
+  end
 end
 
diff --git a/lib/nlp_ruby/Translation.rb b/lib/nlp_ruby/Translation.rb
index 0c346a4..34effe0 100644
--- a/lib/nlp_ruby/Translation.rb
+++ b/lib/nlp_ruby/Translation.rb
@@ -1,5 +1,5 @@
 class Translation
-  attr_accessor :id, :s, :raw, :f, :score, :rank, :other_score
+  attr_accessor :id, :s, :raw, :f, :scores, :rank
 
   def initialize id=nil, raw=nil, s=nil, f=nil, scores={}, rank=nil
     @id = id
@@ -21,11 +21,17 @@ class Translation
       @s = raw
     end
     @id = id.to_i
-    @f = read_feature_string features
+    @f = SparseVector.from_kv features
     @scores['decoder'] = score.to_f
     @rank = rank
   end
 
+  def self.from_s s
+    t = self.new
+    t.from_s s
+    return t
+  end
+
   def to_s include_features=true
     [@id, @s, @f.to_kv('=', ' '), @scores['decoder']].join(' ||| ') if include_features
     [@id, @s, @scores['decoder']].join(' ||| ') if !include_features
diff --git a/lib/nlp_ruby/dag.rb b/lib/nlp_ruby/dag.rb
index cca35c5..6f514c7 100644
--- a/lib/nlp_ruby/dag.rb
+++ b/lib/nlp_ruby/dag.rb
@@ -4,27 +4,27 @@ require 'json'
 
 
 class DAG::Node
-  attr_accessor :label, :edges, :incoming, :score, :mark
+  attr_accessor :label, :outgoing, :incoming, :score, :mark
 
-  def initialize label=nil, edges=[], incoming=[], score=nil
+  def initialize label=nil, outgoing=[], incoming=[], score=nil
     @label    = label
-    @edges    = edges # outgoing
+    @outgoing = outgoing
     @incoming = incoming
     @score    = nil
   end
 
   def add_edge head, weight=0
     exit if self==head # no self-cycles!
-    @edges << DAG::Edge.new(self, head, weight)
-    return @edges.last
+    @outgoing << DAG::Edge.new(self, head, weight)
+    return @outgoing.last
   end
 
   def to_s
-    "DAG::Node