diff options
author | Patrick Simianer <p@simianer.de> | 2011-09-27 00:14:27 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2011-09-27 00:14:27 +0200 |
commit | c04c964b2a3c4e0f0c5a85acad5404fa6b7fb976 (patch) | |
tree | e7e35f0e1193d61c82140b6d06ab469f15367dc3 /dtrain/hstreaming/red-avg.rb | |
parent | 63fe5ea76c52f76a924f1d3df9f6bff6a2c0d93d (diff) | |
parent | 40c884b3d74a1779be80974fc6fc926b0812813c (diff) |
more streaming
Diffstat (limited to 'dtrain/hstreaming/red-avg.rb')
-rwxr-xr-x | dtrain/hstreaming/red-avg.rb | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/dtrain/hstreaming/red-avg.rb b/dtrain/hstreaming/red-avg.rb new file mode 100755 index 00000000..11dc0d71 --- /dev/null +++ b/dtrain/hstreaming/red-avg.rb @@ -0,0 +1,26 @@ +#!/usr/bin/env ruby1.9.1 + + +STDIN.set_encoding 'utf-8' + +shard_count_key = "__SHARD_COUNT__" + +w = {} +c = {} +w.default = 0 +c.default = 0 +while line = STDIN.gets + key, val = line.split /\t/ + w[key] += val.to_f + c[key] += 1.0 +end + +shard_count = w["__SHARD_COUNT__"] + +w.each_key { |k| + if k == shard_count_key then next end + puts "#{k}\t{w[k]/shard_count}" +} + +puts "#{shard_count_key}\t#{w[shard_count_key]}" + |