summaryrefslogtreecommitdiff
path: root/dtrain/hstreaming/red-avg.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2011-11-13 22:12:46 +0100
committerPatrick Simianer <p@simianer.de>2011-11-13 22:12:46 +0100
commit7b79fc9e6e6c9c2bb7f977978e319abe2143bbd9 (patch)
treed7272bb7db6c214f9cf66f665f142470e5aee9ed /dtrain/hstreaming/red-avg.rb
parentc57425bbdbb9cbc437ed6cd2774702ecb2617d22 (diff)
new reducer: active on all tasks
Diffstat (limited to 'dtrain/hstreaming/red-avg.rb')
-rwxr-xr-xdtrain/hstreaming/red-avg.rb9
1 files changed, 5 insertions, 4 deletions
diff --git a/dtrain/hstreaming/red-avg.rb b/dtrain/hstreaming/red-avg.rb
index 048128f5..771f4c0e 100755
--- a/dtrain/hstreaming/red-avg.rb
+++ b/dtrain/hstreaming/red-avg.rb
@@ -1,10 +1,11 @@
#!/usr/bin/env ruby1.9.1
-STDIN.set_encoding 'utf-8'
-
shard_count_key = "__SHARD_COUNT__"
+STDIN.set_encoding 'utf-8'
+STDOUT.set_encoding 'utf-8'
+
w = {}
c = {}
w.default = 0
@@ -12,11 +13,11 @@ c.default = 0
while line = STDIN.gets
key, val = line.split /\t/
w[key] += val.to_f
- c[key] += 1.0
+ c[key] += 1
end
+puts "# dtrain reducer: average"
shard_count = w["__SHARD_COUNT__"]
-
w.each_key { |k|
if k == shard_count_key then next end
puts "#{k}\t#{w[k]/shard_count}"