summaryrefslogtreecommitdiff
path: root/kmeans
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-02-16 00:12:32 +0100
committerPatrick Simianer <p@simianer.de>2014-02-16 00:12:32 +0100
commit81a637ae52d2a1d0bc751b44c193765cdc1091f1 (patch)
tree19708fb523ef32cbeccc4d87133f115650e13280 /kmeans
parent99ae15932eae7e727b74f723107cf42aad80ba3f (diff)
nlp_ruby 0.3
Diffstat (limited to 'kmeans')
-rwxr-xr-xkmeans7
1 files changed, 4 insertions, 3 deletions
diff --git a/kmeans b/kmeans
index 5c49d9a..02c9c42 100755
--- a/kmeans
+++ b/kmeans
@@ -8,7 +8,8 @@ def read_data fn
data = {}
ReadFile.new(fn).readlines_strip.map{ |i|
a = i.split ' ', 2
- data[a.first] = read_feature_string a.last
+ v = SparseVector.from_kv a.last
+ data[a.first] = v
}
return data
end
@@ -22,7 +23,7 @@ def rand_means_init data, k
prng = Random.new
a = []
0.upto(k-1) do
- a << mean_sparse_vector(data.values.sample k, random:prng)
+ a << SparseVector.mean(data.values.sample k, random:prng)
end
return a
end
@@ -51,7 +52,7 @@ end
def update assignment, data
new_centroids = []
assignment.each_pair { |centroid_index,a|
- new_centroids << mean_sparse_vector(assignment[centroid_index].map{ |i| data[i] })
+ new_centroids << SparseVector.mean(assignment[centroid_index].map{ |i| data[i] })
}
return new_centroids
end