diff options
author | Patrick Simianer <p@simianer.de> | 2014-06-16 17:44:07 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-06-16 17:44:07 +0200 |
commit | 4059a5d048cb0f72872c98073ef1ce120a30d78c (patch) | |
tree | 4fbff0dc62c5ef3deea0ffdec578e3f2c0ed74b6 /lib/nlp_ruby/SparseVector.rb | |
parent | 912ff6aebcf4f89f9e64b5f59956dbf7d8f624e3 (diff) |
renaming to zipf
Diffstat (limited to 'lib/nlp_ruby/SparseVector.rb')
-rw-r--r-- | lib/nlp_ruby/SparseVector.rb | 172 |
1 files changed, 0 insertions, 172 deletions
diff --git a/lib/nlp_ruby/SparseVector.rb b/lib/nlp_ruby/SparseVector.rb deleted file mode 100644 index 3096412..0000000 --- a/lib/nlp_ruby/SparseVector.rb +++ /dev/null @@ -1,172 +0,0 @@ -class SparseVector < Hash - - def initialize arg=nil - super - self.default = 0 - if arg.is_a? Array - from_a arg - end - end - - def from_a a - a.each_with_index { |i,j| self[j] = i } - end - - def self.from_a a - v = SparseVector.new - v.from_a a - return v - end - - def from_h h - h.each_pair { |k,v| self[k] = v } - end - - def self.from_h h - v = SparseVector.new - v.from_h h - return v - end - - def from_s s - from_h eval(s) - end - - def self.from_s s - v = SparseVector.new - v.from_s s - return v - end - - def to_kv sep='=', join=' ' - a = [] - self.each_pair { |k,v| - a << "#{k}#{sep}#{v}" - } - return a.join join - end - - def from_kv s - s.split.each { |i| - k,v = i.split('=') - self[k] = v.to_f - } - end - - def self.from_kv s - v = SparseVector.new - v.from_kv s - return v - end - - def from_file fn, sep='=' - f = ReadFile.new(fn) - while line = f.gets - key, value = line.strip.split sep - value = value.to_f - self[key] = value - end - end - - def self.from_file fn, sep='=' - v = SparseVector.new - v.from_file fn, sep - return v - end - - def join_keys other - self.keys + other.keys - end - - def sum - self.values.inject(:+) - end - - def approx_eql? other, p=10**-10 - return false if !other - return false if other.size!=self.size - return false if other.keys.sort!=self.keys.sort - self.keys.each { |k| - return false if (self[k]-other[k]).abs>p - } - return true - end - - def average - self.sum/self.size.to_f - end - - def variance - avg = self.average - var = 0.0 - self.values.each { |i| var += (avg - i)**2 } - return var - end - - def stddev - Math.sqrt self.variance - end - - def dot other - sum = 0.0 - self.each_pair { |k,v| sum += v * other[k] } - return sum - end - - def zeros n - (0).upto(n-1) { |i| self[i] = 0.0 } - end - - def magnitude - Math.sqrt self.values.inject { |sum,i| sum+i**2 } - end - - def cosinus_sim other - self.dot(other)/(self.magnitude*other.magnitude) - end - - def euclidian_dist other - dims = [self.keys, other.keys].flatten.uniq - sum = 0.0 - dims.each { |d| sum += (self[d] - other[d])**2 } - return Math.sqrt(sum) - end - - def + other - new = SparseVector.new - join_keys(other).each { |k| - new[k] = self[k]+other[k] - } - return new - end - - def - other - new = SparseVector.new - join_keys(other).each { |k| - new[k] = self[k]-other[k] - } - return new - end - - def * scalar - raise ArgumentError, "Arg is not numeric #{scalar}" unless scalar.is_a? Numeric - new = SparseVector.new - self.keys.each { |k| - new[k] = self[k] * scalar - } - return new - end - - def self.mean a - mean = SparseVector.new - a.each { |i| - i.each_pair { |k,v| - mean[k] += v - } - } - n = a.size.to_f - mean.each_pair { |k,v| mean[k] = v/n } - return mean - end -end - |