summaryrefslogtreecommitdiff
path: root/dtrain/hstreaming/lplp.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2011-11-30 14:34:22 +0100
committerPatrick Simianer <p@simianer.de>2011-11-30 14:34:22 +0100
commit470ec6988a00e77d361115c4bff850fcb12e7539 (patch)
tree8da9e523b463ab15520e73ba2bac4ba7b7d29245 /dtrain/hstreaming/lplp.rb
parent06a270c8d3f061bc7fed062cb28605dd8c6e3a8f (diff)
lplp.rb doc
Diffstat (limited to 'dtrain/hstreaming/lplp.rb')
-rwxr-xr-xdtrain/hstreaming/lplp.rb6
1 files changed, 5 insertions, 1 deletions
diff --git a/dtrain/hstreaming/lplp.rb b/dtrain/hstreaming/lplp.rb
index 0ec21a46..40409bbd 100755
--- a/dtrain/hstreaming/lplp.rb
+++ b/dtrain/hstreaming/lplp.rb
@@ -37,7 +37,7 @@ end
def cut(weights, norm_fun, n, epsilon=0.0001)
weights.each { |k,v|
- if norm_fun.call(v, n).abs > epsilon
+ if norm_fun.call(v, n).abs >= epsilon
puts "#{k}\t#{mean(v, n)}"
end
}
@@ -87,6 +87,10 @@ _test()
# actually do something
def usage()
puts "lplp.rb <l0,l1,l2,linfty,mean,median> <cut|select_k> <k|threshold> [n] < <input>"
+ puts " l0...: norms for selection"
+ puts "select_k: only output top k (according to the norm of their column vector) features"
+ puts " cut: output features with weight >= threshold"
+ puts " n: if we do not have a shard count use this number for averaging"
exit
end