From 062eda911830c779aa685885b8e15ecceabfc085 Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Fri, 14 Feb 2014 18:56:47 +0100
Subject: some class methods; Translation scores dict; DAG edges->outgoing;
TFIDF module
---
lib/nlp_ruby/SparseVector.rb | 57 +++++++++++++++++++++++++++++++-------------
lib/nlp_ruby/Translation.rb | 10 ++++++--
lib/nlp_ruby/dag.rb | 24 +++++++++----------
lib/nlp_ruby/misc.rb | 14 +++++------
lib/nlp_ruby/tfidf.rb | 12 +++++++---
5 files changed, 76 insertions(+), 41 deletions(-)
(limited to 'lib')
diff --git a/lib/nlp_ruby/SparseVector.rb b/lib/nlp_ruby/SparseVector.rb
index b80373c..3096412 100644
--- a/lib/nlp_ruby/SparseVector.rb
+++ b/lib/nlp_ruby/SparseVector.rb
@@ -12,14 +12,32 @@ class SparseVector < Hash
a.each_with_index { |i,j| self[j] = i }
end
+ def self.from_a a
+ v = SparseVector.new
+ v.from_a a
+ return v
+ end
+
def from_h h
h.each_pair { |k,v| self[k] = v }
end
+ def self.from_h h
+ v = SparseVector.new
+ v.from_h h
+ return v
+ end
+
def from_s s
from_h eval(s)
end
+ def self.from_s s
+ v = SparseVector.new
+ v.from_s s
+ return v
+ end
+
def to_kv sep='=', join=' '
a = []
self.each_pair { |k,v|
@@ -35,6 +53,12 @@ class SparseVector < Hash
}
end
+ def self.from_kv s
+ v = SparseVector.new
+ v.from_kv s
+ return v
+ end
+
def from_file fn, sep='='
f = ReadFile.new(fn)
while line = f.gets
@@ -44,6 +68,12 @@ class SparseVector < Hash
end
end
+ def self.from_file fn, sep='='
+ v = SparseVector.new
+ v.from_file fn, sep
+ return v
+ end
+
def join_keys other
self.keys + other.keys
end
@@ -126,24 +156,17 @@ class SparseVector < Hash
}
return new
end
-end
-
-
-module SparseVector
-
-def SparseVector::mean a
- mean = SparseVector.new
- a.each { |i|
- i.each_pair { |k,v|
- mean[k] += v
+ def self.mean a
+ mean = SparseVector.new
+ a.each { |i|
+ i.each_pair { |k,v|
+ mean[k] += v
+ }
}
- }
- n = array_of_vectors.size.to_f
- mean.each_pair { |k,v| mean[k] = v/n }
- return mean
-end
-
-
+ n = a.size.to_f
+ mean.each_pair { |k,v| mean[k] = v/n }
+ return mean
+ end
end
diff --git a/lib/nlp_ruby/Translation.rb b/lib/nlp_ruby/Translation.rb
index 0c346a4..34effe0 100644
--- a/lib/nlp_ruby/Translation.rb
+++ b/lib/nlp_ruby/Translation.rb
@@ -1,5 +1,5 @@
class Translation
- attr_accessor :id, :s, :raw, :f, :score, :rank, :other_score
+ attr_accessor :id, :s, :raw, :f, :scores, :rank
def initialize id=nil, raw=nil, s=nil, f=nil, scores={}, rank=nil
@id = id
@@ -21,11 +21,17 @@ class Translation
@s = raw
end
@id = id.to_i
- @f = read_feature_string features
+ @f = SparseVector.from_kv features
@scores['decoder'] = score.to_f
@rank = rank
end
+ def self.from_s s
+ t = self.new
+ t.from_s s
+ return t
+ end
+
def to_s include_features=true
[@id, @s, @f.to_kv('=', ' '), @scores['decoder']].join(' ||| ') if include_features
[@id, @s, @scores['decoder']].join(' ||| ') if !include_features
diff --git a/lib/nlp_ruby/dag.rb b/lib/nlp_ruby/dag.rb
index cca35c5..6f514c7 100644
--- a/lib/nlp_ruby/dag.rb
+++ b/lib/nlp_ruby/dag.rb
@@ -4,27 +4,27 @@ require 'json'
class DAG::Node
- attr_accessor :label, :edges, :incoming, :score, :mark
+ attr_accessor :label, :outgoing, :incoming, :score, :mark
- def initialize label=nil, edges=[], incoming=[], score=nil
+ def initialize label=nil, outgoing=[], incoming=[], score=nil
@label = label
- @edges = edges # outgoing
+ @outgoing = outgoing
@incoming = incoming
@score = nil
end
def add_edge head, weight=0
exit if self==head # no self-cycles!
- @edges << DAG::Edge.new(self, head, weight)
- return @edges.last
+ @outgoing << DAG::Edge.new(self, head, weight)
+ return @outgoing.last
end
def to_s
- "DAG::Node