From f99ba621e5bd10b069b453d11b3b4981dc482b6c Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 13 Nov 2011 22:12:46 +0100 Subject: new reducer: active on all tasks --- dtrain/hstreaming/red-all.rb | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100755 dtrain/hstreaming/red-all.rb (limited to 'dtrain/hstreaming/red-all.rb') diff --git a/dtrain/hstreaming/red-all.rb b/dtrain/hstreaming/red-all.rb new file mode 100755 index 00000000..bbc65945 --- /dev/null +++ b/dtrain/hstreaming/red-all.rb @@ -0,0 +1,26 @@ +#!/usr/bin/env ruby1.9.1 + + +shard_count_key = "__SHARD_COUNT__" + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +w = {} +c = {} +w.default = 0 +c.default = 0 +while line = STDIN.gets + key, val = line.split /\t/ + w[key] += val.to_f + c[key] += 1 +end + +puts "# dtrain reducer: active on all" +shard_count = w["__SHARD_COUNT__"] +puts "shard count #{shard_count}" +w.each_key { |k| + if k == shard_count_key then next end + if c[k] == shard_count then puts "#{k}\t#{w[k]/shard_count}" end +} + -- cgit v1.2.3