summaryrefslogtreecommitdiff
path: root/training/dtrain/lplp.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2015-09-19 11:05:23 +0200
committerPatrick Simianer <p@simianer.de>2015-09-19 11:05:23 +0200
commit92a376c3479a18e2a6f674ecc6955a7eb29f217a (patch)
tree3ac35108b8ca53299db638bf7f7695c93f4806f5 /training/dtrain/lplp.rb
parent86ea4ed498d96c1d988f2287afa580dcf558ddb0 (diff)
dtrain: current version
Diffstat (limited to 'training/dtrain/lplp.rb')
-rwxr-xr-xtraining/dtrain/lplp.rb12
1 files changed, 6 insertions, 6 deletions
diff --git a/training/dtrain/lplp.rb b/training/dtrain/lplp.rb
index 86e835e8..cf28b477 100755
--- a/training/dtrain/lplp.rb
+++ b/training/dtrain/lplp.rb
@@ -1,4 +1,4 @@
-# lplp.rb
+#!/usr/bin/env ruby
# norms
def l0(feature_column, n)
@@ -19,7 +19,8 @@ end
# stats
def median(feature_column, n)
- return feature_column.concat(0.step(n-feature_column.size-1).map{|i|0}).sort[feature_column.size/2]
+ return feature_column.concat(0.step(n-feature_column.size-1).map{|i|0})
+ .sort[feature_column.size/2]
end
def mean(feature_column, n)
@@ -28,7 +29,7 @@ end
# selection
def select_k(weights, norm_fun, n, k=10000)
- weights.sort{|a,b| norm_fun.call(b[1], n) <=> norm_fun.call(a[1], n)}.each { |p|
+ weights.sort{|a,b| norm_fun.call(b[1], n)<=>norm_fun.call(a[1], n)}.each { |p|
puts "#{p[0]}\t#{mean(p[1], n)}"
k -= 1
if k == 0 then break end
@@ -84,17 +85,16 @@ def _test()
end
#_test()
-
def usage()
puts "lplp.rb <l0,l1,l2,linfty,mean,median> <cut|select_k> <k|threshold> <#shards> < <input>"
puts " l0...: norms for selection"
puts "select_k: only output top k (according to the norm of their column vector) features"
puts " cut: output features with weight >= threshold"
- puts " n: if we do not have a shard count use this number for averaging"
+ puts " n: number of shards for averaging"
exit 1
end
-if ARGV.size < 4 then usage end
+usage if ARGV.size<4
norm_fun = method(ARGV[0].to_sym)
type = ARGV[1]
x = ARGV[2].to_f