summaryrefslogtreecommitdiff
path: root/dtrain/hstreaming/hadoop-streaming-job.sh
diff options
context:
space:
mode:
Diffstat (limited to 'dtrain/hstreaming/hadoop-streaming-job.sh')
-rwxr-xr-xdtrain/hstreaming/hadoop-streaming-job.sh7
1 files changed, 3 insertions, 4 deletions
diff --git a/dtrain/hstreaming/hadoop-streaming-job.sh b/dtrain/hstreaming/hadoop-streaming-job.sh
index 90c2b790..92419956 100755
--- a/dtrain/hstreaming/hadoop-streaming-job.sh
+++ b/dtrain/hstreaming/hadoop-streaming-job.sh
@@ -6,17 +6,16 @@ EXP=a_simple_test
HADOOP_HOME=/usr/lib/hadoop-0.20
JAR=contrib/streaming/hadoop-streaming-0.20.2-cdh3u1.jar
HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR"
-# ^^^
IN=input_on_hdfs
OUT=output_weights_on_hdfs
-# you can remove the -reducer line if you want to
+# you can -reducer to NONE if you want to
# do feature selection/averaging locally (e.g. to
-# keep weights of the iterations)
+# keep weights of all epochs)
$HSTREAMING \
-mapper "dtrain.sh" \
- -reducer "lplp.rb l2 select_k 100000" \
+ -reducer "ruby lplp.rb l2 select_k 100000" \
-input $IN \
-output $OUT \
-file dtrain.sh \