diff options
Diffstat (limited to 'dtrain/hstreaming/hadoop-streaming-job.sh')
-rwxr-xr-x | dtrain/hstreaming/hadoop-streaming-job.sh | 7 |
1 files changed, 3 insertions, 4 deletions
diff --git a/dtrain/hstreaming/hadoop-streaming-job.sh b/dtrain/hstreaming/hadoop-streaming-job.sh index 90c2b790..92419956 100755 --- a/dtrain/hstreaming/hadoop-streaming-job.sh +++ b/dtrain/hstreaming/hadoop-streaming-job.sh @@ -6,17 +6,16 @@ EXP=a_simple_test HADOOP_HOME=/usr/lib/hadoop-0.20 JAR=contrib/streaming/hadoop-streaming-0.20.2-cdh3u1.jar HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR" -# ^^^ IN=input_on_hdfs OUT=output_weights_on_hdfs -# you can remove the -reducer line if you want to +# you can -reducer to NONE if you want to # do feature selection/averaging locally (e.g. to -# keep weights of the iterations) +# keep weights of all epochs) $HSTREAMING \ -mapper "dtrain.sh" \ - -reducer "lplp.rb l2 select_k 100000" \ + -reducer "ruby lplp.rb l2 select_k 100000" \ -input $IN \ -output $OUT \ -file dtrain.sh \ |