summaryrefslogtreecommitdiff
path: root/hadoop-uniq
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2016-07-05 11:01:46 +0200
committerPatrick Simianer <p@simianer.de>2016-07-05 11:01:46 +0200
commit2b1d7f881c19c4d4b5afae194e02d3300c7675d0 (patch)
tree5a06ee7de98640a39244b57bb369697176b44ebf /hadoop-uniq
parent69949dda35c3ea21d8e926e5f0a596a0a0f61c6a (diff)
mv
Diffstat (limited to 'hadoop-uniq')
-rwxr-xr-xhadoop-uniq11
1 files changed, 11 insertions, 0 deletions
diff --git a/hadoop-uniq b/hadoop-uniq
new file mode 100755
index 0000000..5052419
--- /dev/null
+++ b/hadoop-uniq
@@ -0,0 +1,11 @@
+#!/bin/zsh
+
+HADOOP_HOME=/usr/lib/hadoop
+
+$HADOOP_HOME/bin/hadoop jar /usr/lib/hadoop-mapreduce/hadoop-streaming.jar \
+ -D mapred.reduce.tasks=98 \
+ -input d \
+ -output d.uniq \
+ -mapper 'cut -d " " -f 1' \
+ -reducer /usr/bin/uniq
+