diff options
author | Patrick Simianer <p@simianer.de> | 2011-12-03 21:38:59 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2011-12-03 21:38:59 +0100 |
commit | 0f9024d49f7622d1c135aa2e3f9ddc6bc4349fb9 (patch) | |
tree | fc087cb9a222d00fc8b2f6d4484a62c581b84e54 /dtrain/hstreaming/rule_count/rulecount.rb | |
parent | 68fd129f5f69162fc2385bd3e57335968dfc74c2 (diff) |
new rule count
Diffstat (limited to 'dtrain/hstreaming/rule_count/rulecount.rb')
-rw-r--r-- | dtrain/hstreaming/rule_count/rulecount.rb | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/dtrain/hstreaming/rule_count/rulecount.rb b/dtrain/hstreaming/rule_count/rulecount.rb new file mode 100644 index 00000000..035bdf06 --- /dev/null +++ b/dtrain/hstreaming/rule_count/rulecount.rb @@ -0,0 +1,11 @@ +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +while line = STDIN.gets + a = line.strip.chomp.split "\t" + a[3..a.size].each { |r| + id = r.split("|||")[0..2].join("|||").to_s.strip.gsub("\s", "_") + puts "#{id}\t1" + } +end + |