diff options
Diffstat (limited to 'dtrain/hstreaming/red-avg.rb')
-rwxr-xr-x | dtrain/hstreaming/red-avg.rb | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/dtrain/hstreaming/red-avg.rb b/dtrain/hstreaming/red-avg.rb new file mode 100755 index 00000000..9326ffbe --- /dev/null +++ b/dtrain/hstreaming/red-avg.rb @@ -0,0 +1,27 @@ +#!/usr/bin/env ruby1.9.1 + +shard_count_key = "__SHARD_COUNT__" + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +w = {} +c = {} +w.default = 0 +c.default = 0 +while line = STDIN.gets + key, val = line.split /\t/ + w[key] += val.to_f + c[key] += 1 +end + +puts "# dtrain reducer: average" +shard_count = w["__SHARD_COUNT__"] +w.each_key { |k| + if k == shard_count_key + puts "# shard count: #{shard_count.to_i}" + else + puts "#{k}\t#{w[k]/shard_count}\t# #{c[k]}" + end +} + |