From e35bc000fd461dfdef3d2abfa550be84ad97acc3 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 29 Nov 2011 10:51:17 +0100 Subject: lplp.rb --- dtrain/hstreaming/lplp.rb | 101 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100755 dtrain/hstreaming/lplp.rb (limited to 'dtrain/hstreaming/lplp.rb') diff --git a/dtrain/hstreaming/lplp.rb b/dtrain/hstreaming/lplp.rb new file mode 100755 index 00000000..edb93e77 --- /dev/null +++ b/dtrain/hstreaming/lplp.rb @@ -0,0 +1,101 @@ +# lplp.rb + +# norms +def l0(feature_column, n) + if feature_column.size == n then return 1 else return 0 end +end + +def l1(feature_column, n=-1) + return feature_column.reduce { |sum, i| i.abs } +end + +def l2(feature_column, n=-1) + return Math.sqrt feature_column.reduce { |sum, i| i**2 } +end + +def linfty(feature_column, n=-1) + return feature_column.map { |i| i.abs }.max +end + +# stats +def M(feature_column, n) + return feature_column.concat(0.step(n-feature_column.size-1).map{|i|0}).sort[feature_column.size/2] +end + +def mean(feature_column, n) + return feature_column.reduce { |sum, i| sum+i } / n +end + +# selection +def select_k(weights, normfn, n, k=10000) + weights.sort{|a,b| normfn.call(b[1], n) <=> normfn.call(a[1], n)}.each { |p| + puts "#{p[0]}\t#{mean(p[1], n)}" + k -= 1 + if k == 0 then break end + } +end + +def cut(weights, normfn, n, epsilon=0.0001) + weights.each { |k,v| + if normfn.call(v).abs > epsilon + puts "#{k}\t#{mean(v, n)}" + end + } +end + + +shard_count_key = "__SHARD_COUNT__" + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +w = {} +shard_count = 0 +while line = STDIN.gets + key, val = line.split /\t/ + if k = shard_count_key + shard_count += 1 + next + end + if w.has_key? key + w[key].push val + else + w[key] = [val] + end +end + +select_k(w, method(:l1), shard_count, 100000) + +def _test() +puts +w = {} +w["a"] = [1, 2, 3] +w["b"] = [1, 2] +w["c"] = [66] +w["d"] = [10, 20, 30] +n = 3 +puts w.to_s +puts +puts "select_k" +puts "l0 expect ad" +select_k(w, method(:l0), n, 2) +puts "l1 expect c" +select_k(w, method(:l1), n, 1) +puts "l2 expect d" +select_k(w, method(:l2), n, 1) +puts +puts "cut" +puts "l1 expect cd" +cut(w, method(:l1), n, 7) +puts +puts "M" +a = [1,3,4,5,6] +puts a.to_s +puts M(a, 7) +puts "that's because we add missing 0s" +puts a.concat(0.step(7-a.size-1).map{|i|0}).to_s +puts +end + +#_test() + -- cgit v1.2.3