summaryrefslogtreecommitdiff
path: root/dtrain/hstreaming/rule_count/rulecount.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2011-12-03 21:38:59 +0100
committerPatrick Simianer <p@simianer.de>2011-12-03 21:38:59 +0100
commit0f9024d49f7622d1c135aa2e3f9ddc6bc4349fb9 (patch)
treefc087cb9a222d00fc8b2f6d4484a62c581b84e54 /dtrain/hstreaming/rule_count/rulecount.rb
parent68fd129f5f69162fc2385bd3e57335968dfc74c2 (diff)
new rule count
Diffstat (limited to 'dtrain/hstreaming/rule_count/rulecount.rb')
-rw-r--r--dtrain/hstreaming/rule_count/rulecount.rb11
1 files changed, 11 insertions, 0 deletions
diff --git a/dtrain/hstreaming/rule_count/rulecount.rb b/dtrain/hstreaming/rule_count/rulecount.rb
new file mode 100644
index 00000000..035bdf06
--- /dev/null
+++ b/dtrain/hstreaming/rule_count/rulecount.rb
@@ -0,0 +1,11 @@
+STDIN.set_encoding 'utf-8'
+STDOUT.set_encoding 'utf-8'
+
+while line = STDIN.gets
+ a = line.strip.chomp.split "\t"
+ a[3..a.size].each { |r|
+ id = r.split("|||")[0..2].join("|||").to_s.strip.gsub("\s", "_")
+ puts "#{id}\t1"
+ }
+end
+