From 851f1582e839c272cbc0b6e29f5f911e281911d9 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 8 Apr 2016 14:10:41 +0200 Subject: dtrain: select features explicitly --- training/dtrain/lplp.rb | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/training/dtrain/lplp.rb b/training/dtrain/lplp.rb index cf28b477..ac3fb758 100755 --- a/training/dtrain/lplp.rb +++ b/training/dtrain/lplp.rb @@ -1,5 +1,7 @@ #!/usr/bin/env ruby +require 'zipf' + # norms def l0(feature_column, n) if feature_column.size >= n then return 1 else return 0 end @@ -86,7 +88,7 @@ end #_test() def usage() - puts "lplp.rb <#shards> < " + puts "lplp.rb <#shards> < " puts " l0...: norms for selection" puts "select_k: only output top k (according to the norm of their column vector) features" puts " cut: output features with weight >= threshold" @@ -95,8 +97,14 @@ def usage() end usage if ARGV.size<4 -norm_fun = method(ARGV[0].to_sym) +norm_fun = nil +feature_names = nil type = ARGV[1] +if type == 'feature_names' + feature_names = ARGV[0] +else + norm_fun = method(ARGV[0].to_sym) +end x = ARGV[2].to_f shard_count = ARGV[3].to_f @@ -117,6 +125,17 @@ if type == 'cut' cut(w, norm_fun, shard_count, x) elsif type == 'select_k' select_k(w, norm_fun, shard_count, x) +elsif type == 'feature_names' + a = ReadFile.readlines_strip "#{fnames}" + h = {} + a.each { |i| + h[i] = true + } + w.each_pair { |k,v| + if h[k] + puts "#{k}\t#{mean(v, shard_count)}" + end + } else puts "oh oh" end -- cgit v1.2.3