summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xhadoop_uniq11
1 files changed, 11 insertions, 0 deletions
diff --git a/hadoop_uniq b/hadoop_uniq
new file mode 100755
index 0000000..5052419
--- /dev/null
+++ b/hadoop_uniq
@@ -0,0 +1,11 @@
+#!/bin/zsh
+
+HADOOP_HOME=/usr/lib/hadoop
+
+$HADOOP_HOME/bin/hadoop jar /usr/lib/hadoop-mapreduce/hadoop-streaming.jar \
+ -D mapred.reduce.tasks=98 \
+ -input d \
+ -output d.uniq \
+ -mapper 'cut -d " " -f 1' \
+ -reducer /usr/bin/uniq
+