diff options
author | Michael Denkowski <michael.j.denkowski@gmail.com> | 2012-12-22 16:01:23 -0500 |
---|---|---|
committer | Michael Denkowski <michael.j.denkowski@gmail.com> | 2012-12-22 16:01:23 -0500 |
commit | 778a4cec55f82bcc66b3f52de7cc871e8daaeb92 (patch) | |
tree | 2a5bccaa85965855104c4e8ac3738b2e1c77f164 /training/dtrain/hstreaming/avg.rb | |
parent | 57fff9eea5ba0e71fb958fdb4f32d17f2fe31108 (diff) | |
parent | d21491daa5e50b4456c7c5f9c2e51d25afd2a757 (diff) |
Merge branch 'master' of git://github.com/redpony/cdec
Diffstat (limited to 'training/dtrain/hstreaming/avg.rb')
-rwxr-xr-x | training/dtrain/hstreaming/avg.rb | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/training/dtrain/hstreaming/avg.rb b/training/dtrain/hstreaming/avg.rb new file mode 100755 index 00000000..2599c732 --- /dev/null +++ b/training/dtrain/hstreaming/avg.rb @@ -0,0 +1,32 @@ +#!/usr/bin/env ruby +# first arg may be an int of custom shard count + +shard_count_key = "__SHARD_COUNT__" + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +w = {} +c = {} +w.default = 0 +c.default = 0 +while line = STDIN.gets + key, val = line.split /\s/ + w[key] += val.to_f + c[key] += 1 +end + +if ARGV.size == 0 + shard_count = w["__SHARD_COUNT__"] +else + shard_count = ARGV[0].to_f +end +w.each_key { |k| + if k == shard_count_key + next + else + puts "#{k}\t#{w[k]/shard_count}" + #puts "# #{c[k]}" + end +} + |