summaryrefslogtreecommitdiff
path: root/lib/nlp_ruby/SparseVector.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/nlp_ruby/SparseVector.rb')
-rw-r--r--lib/nlp_ruby/SparseVector.rb64
1 files changed, 64 insertions, 0 deletions
diff --git a/lib/nlp_ruby/SparseVector.rb b/lib/nlp_ruby/SparseVector.rb
new file mode 100644
index 0000000..0033690
--- /dev/null
+++ b/lib/nlp_ruby/SparseVector.rb
@@ -0,0 +1,64 @@
+class SparseVector < Hash
+
+ def initialize
+ super
+ self.default = 0
+ end
+
+ def from_hash h
+ h.each_pair { |k,v| self[k] = v }
+ end
+
+ def sum
+ self.values.inject(:+)
+ end
+
+ def average
+ self.sum/self.size.to_f
+ end
+
+ def variance
+ avg = self.average
+ var = 0.0
+ self.values.each { |i| var += (avg - i)**2 }
+ return var
+ end
+
+ def stddev
+ Math.sqrt self.variance
+ end
+
+ def dot other
+ sum = 0.0
+ self.each_pair { |k,v| sum += v * other[k] }
+ return sum
+ end
+
+ def magnitude
+ Math.sqrt self.values.inject { |sum,i| sum+i**2 }
+ end
+
+ def cosinus_sim other
+ self.dot(other)/(self.magnitude*other.magnitude)
+ end
+
+ def euclidian_dist other
+ dims = [self.keys, other.keys].flatten.uniq
+ sum = 0.0
+ dims.each { |d| sum += (self[d] - other[d])**2 }
+ return Math.sqrt(sum)
+ end
+end
+
+def mean_sparse_vector array_of_vectors
+ mean = SparseVector.new
+ array_of_vectors.each { |i|
+ i.each_pair { |k,v|
+ mean[k] += v
+ }
+ }
+ n = array_of_vectors.size.to_f
+ mean.each_pair { |k,v| mean[k] = v/n }
+ return mean
+end
+