summaryrefslogtreecommitdiff
path: root/overlapping_rules/merge_rules_mapred.sh
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-14 16:46:27 +0200
committerPatrick Simianer <p@simianer.de>2014-06-14 16:46:27 +0200
commit26c490f404731d053a6205719b6246502c07b449 (patch)
tree3aa721098f1251dfbf2249ecd2736434c13b1d48 /overlapping_rules/merge_rules_mapred.sh
init
Diffstat (limited to 'overlapping_rules/merge_rules_mapred.sh')
-rwxr-xr-xoverlapping_rules/merge_rules_mapred.sh12
1 files changed, 12 insertions, 0 deletions
diff --git a/overlapping_rules/merge_rules_mapred.sh b/overlapping_rules/merge_rules_mapred.sh
new file mode 100755
index 0000000..e1329f4
--- /dev/null
+++ b/overlapping_rules/merge_rules_mapred.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+hadoop jar /usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming-2.0.0-mr1-cdh4.1.2.jar \
+ -D mapred.reduce.tasks=23 \
+ -D mapred.task.timeout=120000000 \
+ -input overlap/g120+index-16mb \
+ -cacheFile 'hdfs://10.0.0.1:8020/user/simianer/overlap/g120+index#g120+index' \
+ -output overlap/test-g120 \
+ -mapper merge_rules.rb \
+ -reducer /usr/bin/uniq \
+ -file merge_rules.rb \
+