From 0f9024d49f7622d1c135aa2e3f9ddc6bc4349fb9 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sat, 3 Dec 2011 21:38:59 +0100 Subject: new rule count --- dtrain/hstreaming/rule_count/rulecount.rb | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 dtrain/hstreaming/rule_count/rulecount.rb (limited to 'dtrain/hstreaming/rule_count/rulecount.rb') diff --git a/dtrain/hstreaming/rule_count/rulecount.rb b/dtrain/hstreaming/rule_count/rulecount.rb new file mode 100644 index 00000000..035bdf06 --- /dev/null +++ b/dtrain/hstreaming/rule_count/rulecount.rb @@ -0,0 +1,11 @@ +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +while line = STDIN.gets + a = line.strip.chomp.split "\t" + a[3..a.size].each { |r| + id = r.split("|||")[0..2].join("|||").to_s.strip.gsub("\s", "_") + puts "#{id}\t1" + } +end + -- cgit v1.2.3