diff options
Diffstat (limited to 'dtrain/hstreaming')
| -rw-r--r-- | dtrain/hstreaming/nc-wmt11.en.srilm.3.gz | bin | 12173238 -> 0 bytes | |||
| -rwxr-xr-x | dtrain/hstreaming/red-all.rb | 26 | ||||
| -rwxr-xr-x | dtrain/hstreaming/red-avg.rb | 9 | ||||
| -rw-r--r-- | dtrain/hstreaming/red-test | 1 | 
4 files changed, 32 insertions, 4 deletions
diff --git a/dtrain/hstreaming/nc-wmt11.en.srilm.3.gz b/dtrain/hstreaming/nc-wmt11.en.srilm.3.gz Binary files differdeleted file mode 100644 index 5a50f8fb..00000000 --- a/dtrain/hstreaming/nc-wmt11.en.srilm.3.gz +++ /dev/null diff --git a/dtrain/hstreaming/red-all.rb b/dtrain/hstreaming/red-all.rb new file mode 100755 index 00000000..bbc65945 --- /dev/null +++ b/dtrain/hstreaming/red-all.rb @@ -0,0 +1,26 @@ +#!/usr/bin/env ruby1.9.1 + + +shard_count_key = "__SHARD_COUNT__" + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +w = {} +c = {} +w.default = 0 +c.default = 0 +while line = STDIN.gets +  key, val = line.split /\t/ +  w[key] += val.to_f +  c[key] += 1 +end + +puts "# dtrain reducer: active on all" +shard_count = w["__SHARD_COUNT__"] +puts "shard count #{shard_count}" +w.each_key { |k| +  if k == shard_count_key then next end +  if c[k] == shard_count then puts "#{k}\t#{w[k]/shard_count}" end +} + diff --git a/dtrain/hstreaming/red-avg.rb b/dtrain/hstreaming/red-avg.rb index 048128f5..771f4c0e 100755 --- a/dtrain/hstreaming/red-avg.rb +++ b/dtrain/hstreaming/red-avg.rb @@ -1,10 +1,11 @@  #!/usr/bin/env ruby1.9.1 -STDIN.set_encoding 'utf-8' -  shard_count_key = "__SHARD_COUNT__" +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' +  w = {}  c = {}  w.default = 0 @@ -12,11 +13,11 @@ c.default = 0  while line = STDIN.gets    key, val = line.split /\t/    w[key] += val.to_f -  c[key] += 1.0 +  c[key] += 1  end +puts "# dtrain reducer: average"  shard_count = w["__SHARD_COUNT__"] -  w.each_key { |k|    if k == shard_count_key then next end    puts "#{k}\t#{w[k]/shard_count}" diff --git a/dtrain/hstreaming/red-test b/dtrain/hstreaming/red-test index b86e7894..a2a0edb1 100644 --- a/dtrain/hstreaming/red-test +++ b/dtrain/hstreaming/red-test @@ -4,4 +4,5 @@ c	3.5  a	1  b	2  c	3.5 +d	1  __SHARD_COUNT__	2  | 
