summaryrefslogtreecommitdiff
path: root/avg_weights
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2013-12-05 07:56:38 +0100
committerPatrick Simianer <p@simianer.de>2013-12-05 07:56:38 +0100
commitdb6a6ecfa350cae29739c59df1210d8f76a479c9 (patch)
treef137a001f57f170455c28ce97b5abb2726006cf6 /avg_weights
init
Diffstat (limited to 'avg_weights')
-rwxr-xr-xavg_weights46
1 files changed, 46 insertions, 0 deletions
diff --git a/avg_weights b/avg_weights
new file mode 100755
index 0000000..2b72747
--- /dev/null
+++ b/avg_weights
@@ -0,0 +1,46 @@
+#!/usr/bin/env ruby
+
+require 'trollop'
+require 'zlib'
+
+
+STDIN.set_encoding 'utf-8'
+STDOUT.set_encoding 'utf-8'
+
+opts = Trollop::options do
+ opt :filter, "Filter if key does not appear in every file.", :type => :bool, :default => false
+end
+
+def usage
+ puts "avg_weights_filter [--filter] <filename>+"
+ exit 1
+end
+usage if ARGV.size==0
+
+
+h = {}
+ARGV.each { |fn|
+if File.extname(fn)=='.gz'
+ f = Zlib::GzipReader.new(File.new(fn, 'rb'))
+else
+ f = File.new fn, 'r'
+end
+while line = f.gets
+ k, v = line.split
+ v = v.to_f
+ if h.has_key? k
+ h[k] << v
+ else
+ h[k] = [v]
+ end
+end
+f.close
+}
+
+n = ARGV.size.to_f
+
+h.each_pair { |k,a|
+ next if opts[:filter] and a.size < n
+ puts "#{k} #{a.inject(:+)/n}"
+}
+