From 0f9024d49f7622d1c135aa2e3f9ddc6bc4349fb9 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sat, 3 Dec 2011 21:38:59 +0100 Subject: new rule count --- dtrain/hstreaming/rule_count/map.sh | 4 ++++ dtrain/hstreaming/rule_count/red.rb | 22 ++++++++++++++++++++++ dtrain/hstreaming/rule_count/rulecount.rb | 11 +++++++++++ dtrain/hstreaming/rule_count/test | 8 ++++++++ 4 files changed, 45 insertions(+) create mode 100755 dtrain/hstreaming/rule_count/map.sh create mode 100644 dtrain/hstreaming/rule_count/red.rb create mode 100644 dtrain/hstreaming/rule_count/rulecount.rb create mode 100644 dtrain/hstreaming/rule_count/test (limited to 'dtrain/hstreaming/rule_count') diff --git a/dtrain/hstreaming/rule_count/map.sh b/dtrain/hstreaming/rule_count/map.sh new file mode 100755 index 00000000..ae75fece --- /dev/null +++ b/dtrain/hstreaming/rule_count/map.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +ruby rulecount.rb | sort | ruby red.rb + diff --git a/dtrain/hstreaming/rule_count/red.rb b/dtrain/hstreaming/rule_count/red.rb new file mode 100644 index 00000000..8f9109cc --- /dev/null +++ b/dtrain/hstreaming/rule_count/red.rb @@ -0,0 +1,22 @@ +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +def output(key, val) + puts "#{key}\t#{val}" +end + +prev_key = nil +sum = 0 +while line = STDIN.gets + key, val = line.strip.split /\t/ + if key != prev_key && sum > 0 + output prev_key, sum + prev_key = key + sum = 0 + elsif !prev_key + prev_key = key + end + sum += val.to_i +end +output prev_key, sum + diff --git a/dtrain/hstreaming/rule_count/rulecount.rb b/dtrain/hstreaming/rule_count/rulecount.rb new file mode 100644 index 00000000..035bdf06 --- /dev/null +++ b/dtrain/hstreaming/rule_count/rulecount.rb @@ -0,0 +1,11 @@ +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +while line = STDIN.gets + a = line.strip.chomp.split "\t" + a[3..a.size].each { |r| + id = r.split("|||")[0..2].join("|||").to_s.strip.gsub("\s", "_") + puts "#{id}\t1" + } +end + diff --git a/dtrain/hstreaming/rule_count/test b/dtrain/hstreaming/rule_count/test new file mode 100644 index 00000000..acd00a5e --- /dev/null +++ b/dtrain/hstreaming/rule_count/test @@ -0,0 +1,8 @@ +a 1 +a 1 +a 1 +b 1 +b 1 +c 1 +d 1 +a 1 -- cgit v1.2.3