diff options
Diffstat (limited to 'dtrain/hstreaming')
| -rwxr-xr-x | dtrain/hstreaming/rule_count/map.sh | 4 | ||||
| -rw-r--r-- | dtrain/hstreaming/rule_count/red.rb | 22 | ||||
| -rw-r--r-- | dtrain/hstreaming/rule_count/rulecount.rb | 11 | ||||
| -rw-r--r-- | dtrain/hstreaming/rule_count/test | 8 | 
4 files changed, 45 insertions, 0 deletions
| diff --git a/dtrain/hstreaming/rule_count/map.sh b/dtrain/hstreaming/rule_count/map.sh new file mode 100755 index 00000000..ae75fece --- /dev/null +++ b/dtrain/hstreaming/rule_count/map.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +ruby rulecount.rb | sort | ruby red.rb + diff --git a/dtrain/hstreaming/rule_count/red.rb b/dtrain/hstreaming/rule_count/red.rb new file mode 100644 index 00000000..8f9109cc --- /dev/null +++ b/dtrain/hstreaming/rule_count/red.rb @@ -0,0 +1,22 @@ +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +def output(key, val) +  puts "#{key}\t#{val}" +end + +prev_key = nil +sum = 0 +while line = STDIN.gets +   key, val = line.strip.split /\t/ +   if key != prev_key && sum > 0 +      output prev_key, sum +      prev_key = key +      sum = 0 +   elsif !prev_key +      prev_key = key +   end +   sum += val.to_i +end +output prev_key, sum + diff --git a/dtrain/hstreaming/rule_count/rulecount.rb b/dtrain/hstreaming/rule_count/rulecount.rb new file mode 100644 index 00000000..035bdf06 --- /dev/null +++ b/dtrain/hstreaming/rule_count/rulecount.rb @@ -0,0 +1,11 @@ +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +while line = STDIN.gets +  a = line.strip.chomp.split "\t" +  a[3..a.size].each { |r| +    id = r.split("|||")[0..2].join("|||").to_s.strip.gsub("\s", "_") +    puts "#{id}\t1" +  } +end + diff --git a/dtrain/hstreaming/rule_count/test b/dtrain/hstreaming/rule_count/test new file mode 100644 index 00000000..acd00a5e --- /dev/null +++ b/dtrain/hstreaming/rule_count/test @@ -0,0 +1,8 @@ +a	1 +a	1 +a	1 +b	1 +b	1 +c	1 +d	1 +a	1 | 
