summaryrefslogtreecommitdiff
path: root/hadoop/streaming/secondary_sort.sh
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-14 19:03:21 +0200
committerPatrick Simianer <p@simianer.de>2014-06-14 19:03:21 +0200
commit5ddc763ab9953eebdaf78af4eb72288d7955b310 (patch)
treefffaf3d22173feae684b7c02ce86c67cf77c7fec /hadoop/streaming/secondary_sort.sh
parent26c490f404731d053a6205719b6246502c07b449 (diff)
cleanup
Diffstat (limited to 'hadoop/streaming/secondary_sort.sh')
-rwxr-xr-xhadoop/streaming/secondary_sort.sh30
1 files changed, 0 insertions, 30 deletions
diff --git a/hadoop/streaming/secondary_sort.sh b/hadoop/streaming/secondary_sort.sh
deleted file mode 100755
index 7fa0c6d..0000000
--- a/hadoop/streaming/secondary_sort.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/sh
-
-
-hadoop dfs -put secondary_sort.input secondary_sort.input
-
-HADOOP_HOME=/usr/lib/hadoop
-HADOOP_VERSION=0.20.2-cdh3u1
-JAR=contrib/streaming/hadoop-streaming-$HADOOP_VERSION.jar
-HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR"
-
-OUT=comp_out
-
-$HSTREAMING \
- -input secondary_sort.input \
- -output $OUT \
- -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner \
- -jobconf map.output.key.field.separator="-*-" \
- -jobconf mapred.text.key.partitioner.options="-k1,1n" \
- -mapper /bin/cat \
- -reducer org.apache.hadoop.mapred.lib.IdentityReducer \
- -jobconf mapred.output.key.comparator.class=org.apache.hadoop.mapred.lib.KeyFieldBasedComparator \
- -jobconf stream.num.map.output.key.fields=2 \
- -jobconf stream.map.output.field.separator="\t" \
- -jobconf mapred.text.key.comparator.options="-k1,1n -k2,2nr" \
- -jobconf mapred.reduce.tasks=3
-
-hadoop dfs -get $OUT .
-hadoop dfs -rmr $OUT
-hadoop dfs -rm secondary_sort.input
-