summaryrefslogtreecommitdiff
path: root/lib/nlp_ruby/SparseVector.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-16 17:44:07 +0200
committerPatrick Simianer <p@simianer.de>2014-06-16 17:44:07 +0200
commit4059a5d048cb0f72872c98073ef1ce120a30d78c (patch)
tree4fbff0dc62c5ef3deea0ffdec578e3f2c0ed74b6 /lib/nlp_ruby/SparseVector.rb
parent912ff6aebcf4f89f9e64b5f59956dbf7d8f624e3 (diff)
renaming to zipf
Diffstat (limited to 'lib/nlp_ruby/SparseVector.rb')
-rw-r--r--lib/nlp_ruby/SparseVector.rb172
1 files changed, 0 insertions, 172 deletions
diff --git a/lib/nlp_ruby/SparseVector.rb b/lib/nlp_ruby/SparseVector.rb
deleted file mode 100644
index 3096412..0000000
--- a/lib/nlp_ruby/SparseVector.rb
+++ /dev/null
@@ -1,172 +0,0 @@
-class SparseVector < Hash
-
- def initialize arg=nil
- super
- self.default = 0
- if arg.is_a? Array
- from_a arg
- end
- end
-
- def from_a a
- a.each_with_index { |i,j| self[j] = i }
- end
-
- def self.from_a a
- v = SparseVector.new
- v.from_a a
- return v
- end
-
- def from_h h
- h.each_pair { |k,v| self[k] = v }
- end
-
- def self.from_h h
- v = SparseVector.new
- v.from_h h
- return v
- end
-
- def from_s s
- from_h eval(s)
- end
-
- def self.from_s s
- v = SparseVector.new
- v.from_s s
- return v
- end
-
- def to_kv sep='=', join=' '
- a = []
- self.each_pair { |k,v|
- a << "#{k}#{sep}#{v}"
- }
- return a.join join
- end
-
- def from_kv s
- s.split.each { |i|
- k,v = i.split('=')
- self[k] = v.to_f
- }
- end
-
- def self.from_kv s
- v = SparseVector.new
- v.from_kv s
- return v
- end
-
- def from_file fn, sep='='
- f = ReadFile.new(fn)
- while line = f.gets
- key, value = line.strip.split sep
- value = value.to_f
- self[key] = value
- end
- end
-
- def self.from_file fn, sep='='
- v = SparseVector.new
- v.from_file fn, sep
- return v
- end
-
- def join_keys other
- self.keys + other.keys
- end
-
- def sum
- self.values.inject(:+)
- end
-
- def approx_eql? other, p=10**-10
- return false if !other
- return false if other.size!=self.size
- return false if other.keys.sort!=self.keys.sort
- self.keys.each { |k|
- return false if (self[k]-other[k]).abs>p
- }
- return true
- end
-
- def average
- self.sum/self.size.to_f
- end
-
- def variance
- avg = self.average
- var = 0.0
- self.values.each { |i| var += (avg - i)**2 }
- return var
- end
-
- def stddev
- Math.sqrt self.variance
- end
-
- def dot other
- sum = 0.0
- self.each_pair { |k,v| sum += v * other[k] }
- return sum
- end
-
- def zeros n
- (0).upto(n-1) { |i| self[i] = 0.0 }
- end
-
- def magnitude
- Math.sqrt self.values.inject { |sum,i| sum+i**2 }
- end
-
- def cosinus_sim other
- self.dot(other)/(self.magnitude*other.magnitude)
- end
-
- def euclidian_dist other
- dims = [self.keys, other.keys].flatten.uniq
- sum = 0.0
- dims.each { |d| sum += (self[d] - other[d])**2 }
- return Math.sqrt(sum)
- end
-
- def + other
- new = SparseVector.new
- join_keys(other).each { |k|
- new[k] = self[k]+other[k]
- }
- return new
- end
-
- def - other
- new = SparseVector.new
- join_keys(other).each { |k|
- new[k] = self[k]-other[k]
- }
- return new
- end
-
- def * scalar
- raise ArgumentError, "Arg is not numeric #{scalar}" unless scalar.is_a? Numeric
- new = SparseVector.new
- self.keys.each { |k|
- new[k] = self[k] * scalar
- }
- return new
- end
-
- def self.mean a
- mean = SparseVector.new
- a.each { |i|
- i.each_pair { |k,v|
- mean[k] += v
- }
- }
- n = a.size.to_f
- mean.each_pair { |k,v| mean[k] = v/n }
- return mean
- end
-end
-