diff options
author | Patrick Simianer <p@simianer.de> | 2014-01-29 19:22:56 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-01-29 19:22:56 +0100 |
commit | d9d72e06db07087aa54401fae8b259f0c4ccd649 (patch) | |
tree | 97f0444314c40d2894ac0892d5559101eda01acf /lib/nlp_ruby/SparseVector.rb | |
parent | 22644ed1365e566c8bf806bfff4ecd43c46ce089 (diff) |
first usable version, name change => nlp_ruby
Diffstat (limited to 'lib/nlp_ruby/SparseVector.rb')
-rw-r--r-- | lib/nlp_ruby/SparseVector.rb | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/lib/nlp_ruby/SparseVector.rb b/lib/nlp_ruby/SparseVector.rb new file mode 100644 index 0000000..0033690 --- /dev/null +++ b/lib/nlp_ruby/SparseVector.rb @@ -0,0 +1,64 @@ +class SparseVector < Hash + + def initialize + super + self.default = 0 + end + + def from_hash h + h.each_pair { |k,v| self[k] = v } + end + + def sum + self.values.inject(:+) + end + + def average + self.sum/self.size.to_f + end + + def variance + avg = self.average + var = 0.0 + self.values.each { |i| var += (avg - i)**2 } + return var + end + + def stddev + Math.sqrt self.variance + end + + def dot other + sum = 0.0 + self.each_pair { |k,v| sum += v * other[k] } + return sum + end + + def magnitude + Math.sqrt self.values.inject { |sum,i| sum+i**2 } + end + + def cosinus_sim other + self.dot(other)/(self.magnitude*other.magnitude) + end + + def euclidian_dist other + dims = [self.keys, other.keys].flatten.uniq + sum = 0.0 + dims.each { |d| sum += (self[d] - other[d])**2 } + return Math.sqrt(sum) + end +end + +def mean_sparse_vector array_of_vectors + mean = SparseVector.new + array_of_vectors.each { |i| + i.each_pair { |k,v| + mean[k] += v + } + } + n = array_of_vectors.size.to_f + mean.each_pair { |k,v| mean[k] = v/n } + return mean +end + |