summaryrefslogtreecommitdiff
path: root/hadoop/streaming/secondary_sort.sh
diff options
context:
space:
mode:
Diffstat (limited to 'hadoop/streaming/secondary_sort.sh')
-rwxr-xr-xhadoop/streaming/secondary_sort.sh30
1 files changed, 0 insertions, 30 deletions
diff --git a/hadoop/streaming/secondary_sort.sh b/hadoop/streaming/secondary_sort.sh
deleted file mode 100755
index 7fa0c6d..0000000
--- a/hadoop/streaming/secondary_sort.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/sh
-
-
-hadoop dfs -put secondary_sort.input secondary_sort.input
-
-HADOOP_HOME=/usr/lib/hadoop
-HADOOP_VERSION=0.20.2-cdh3u1
-JAR=contrib/streaming/hadoop-streaming-$HADOOP_VERSION.jar
-HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR"
-
-OUT=comp_out
-
-$HSTREAMING \
- -input secondary_sort.input \
- -output $OUT \
- -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner \
- -jobconf map.output.key.field.separator="-*-" \
- -jobconf mapred.text.key.partitioner.options="-k1,1n" \
- -mapper /bin/cat \
- -reducer org.apache.hadoop.mapred.lib.IdentityReducer \
- -jobconf mapred.output.key.comparator.class=org.apache.hadoop.mapred.lib.KeyFieldBasedComparator \
- -jobconf stream.num.map.output.key.fields=2 \
- -jobconf stream.map.output.field.separator="\t" \
- -jobconf mapred.text.key.comparator.options="-k1,1n -k2,2nr" \
- -jobconf mapred.reduce.tasks=3
-
-hadoop dfs -get $OUT .
-hadoop dfs -rmr $OUT
-hadoop dfs -rm secondary_sort.input
-