summaryrefslogtreecommitdiff
path: root/hadoop-uniq
diff options
context:
space:
mode:
Diffstat (limited to 'hadoop-uniq')
-rwxr-xr-xhadoop-uniq11
1 files changed, 11 insertions, 0 deletions
diff --git a/hadoop-uniq b/hadoop-uniq
new file mode 100755
index 0000000..5052419
--- /dev/null
+++ b/hadoop-uniq
@@ -0,0 +1,11 @@
+#!/bin/zsh
+
+HADOOP_HOME=/usr/lib/hadoop
+
+$HADOOP_HOME/bin/hadoop jar /usr/lib/hadoop-mapreduce/hadoop-streaming.jar \
+ -D mapred.reduce.tasks=98 \
+ -input d \
+ -output d.uniq \
+ -mapper 'cut -d " " -f 1' \
+ -reducer /usr/bin/uniq
+