#!/bin/sh hadoop dfs -put secondary_sort.input secondary_sort.input HADOOP_HOME=/usr/lib/hadoop HADOOP_VERSION=0.20.2-cdh3u1 JAR=contrib/streaming/hadoop-streaming-$HADOOP_VERSION.jar HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR" OUT=comp_out $HSTREAMING \ -input secondary_sort.input \ -output $OUT \ -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner \ -jobconf map.output.key.field.separator="-*-" \ -jobconf mapred.text.key.partitioner.options="-k1,1n" \ -mapper /bin/cat \ -reducer org.apache.hadoop.mapred.lib.IdentityReducer \ -jobconf mapred.output.key.comparator.class=org.apache.hadoop.mapred.lib.KeyFieldBasedComparator \ -jobconf stream.num.map.output.key.fields=2 \ -jobconf stream.map.output.field.separator="\t" \ -jobconf mapred.text.key.comparator.options="-k1,1n -k2,2nr" \ -jobconf mapred.reduce.tasks=3 hadoop dfs -get $OUT . hadoop dfs -rmr $OUT hadoop dfs -rm secondary_sort.input