From 851f1582e839c272cbc0b6e29f5f911e281911d9 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Fri, 8 Apr 2016 14:10:41 +0200
Subject: dtrain: select features explicitly
---
training/dtrain/lplp.rb | 23 +++++++++++++++++++++--
1 file changed, 21 insertions(+), 2 deletions(-)
diff --git a/training/dtrain/lplp.rb b/training/dtrain/lplp.rb
index cf28b477..ac3fb758 100755
--- a/training/dtrain/lplp.rb
+++ b/training/dtrain/lplp.rb
@@ -1,5 +1,7 @@
#!/usr/bin/env ruby
+require 'zipf'
+
# norms
def l0(feature_column, n)
if feature_column.size >= n then return 1 else return 0 end
@@ -86,7 +88,7 @@ end
#_test()
def usage()
- puts "lplp.rb <#shards> < "
+ puts "lplp.rb <#shards> < "
puts " l0...: norms for selection"
puts "select_k: only output top k (according to the norm of their column vector) features"
puts " cut: output features with weight >= threshold"
@@ -95,8 +97,14 @@ def usage()
end
usage if ARGV.size<4
-norm_fun = method(ARGV[0].to_sym)
+norm_fun = nil
+feature_names = nil
type = ARGV[1]
+if type == 'feature_names'
+ feature_names = ARGV[0]
+else
+ norm_fun = method(ARGV[0].to_sym)
+end
x = ARGV[2].to_f
shard_count = ARGV[3].to_f
@@ -117,6 +125,17 @@ if type == 'cut'
cut(w, norm_fun, shard_count, x)
elsif type == 'select_k'
select_k(w, norm_fun, shard_count, x)
+elsif type == 'feature_names'
+ a = ReadFile.readlines_strip "#{fnames}"
+ h = {}
+ a.each { |i|
+ h[i] = true
+ }
+ w.each_pair { |k,v|
+ if h[k]
+ puts "#{k}\t#{mean(v, shard_count)}"
+ end
+ }
else
puts "oh oh"
end
--
cgit v1.2.3