diff options
author | Patrick Simianer <p@simianer.de> | 2016-07-05 11:01:46 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2016-07-05 11:01:46 +0200 |
commit | 2b1d7f881c19c4d4b5afae194e02d3300c7675d0 (patch) | |
tree | 5a06ee7de98640a39244b57bb369697176b44ebf /hadoop-uniq | |
parent | 69949dda35c3ea21d8e926e5f0a596a0a0f61c6a (diff) |
mv
Diffstat (limited to 'hadoop-uniq')
-rwxr-xr-x | hadoop-uniq | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/hadoop-uniq b/hadoop-uniq new file mode 100755 index 0000000..5052419 --- /dev/null +++ b/hadoop-uniq @@ -0,0 +1,11 @@ +#!/bin/zsh + +HADOOP_HOME=/usr/lib/hadoop + +$HADOOP_HOME/bin/hadoop jar /usr/lib/hadoop-mapreduce/hadoop-streaming.jar \ + -D mapred.reduce.tasks=98 \ + -input d \ + -output d.uniq \ + -mapper 'cut -d " " -f 1' \ + -reducer /usr/bin/uniq + |