summaryrefslogtreecommitdiff
path: root/training/dtrain/hstreaming/hadoop-streaming-job.sh
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-04-24 17:18:10 +0100
committerPaul Baltescu <pauldb89@gmail.com>2013-04-24 17:18:10 +0100
commitba206aaac1d95e76126443c9e7ccc5941e879849 (patch)
tree13a918da3f3983fd8e4cb74e7cdc3f5e1fc01cd1 /training/dtrain/hstreaming/hadoop-streaming-job.sh
parentc2aede0f19b7a5e43581768b8c4fbfae8b92c68c (diff)
parentdb960a8bba81df3217660ec5a96d73e0d6baa01b (diff)
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'training/dtrain/hstreaming/hadoop-streaming-job.sh')
-rwxr-xr-xtraining/dtrain/hstreaming/hadoop-streaming-job.sh30
1 files changed, 0 insertions, 30 deletions
diff --git a/training/dtrain/hstreaming/hadoop-streaming-job.sh b/training/dtrain/hstreaming/hadoop-streaming-job.sh
deleted file mode 100755
index 92419956..00000000
--- a/training/dtrain/hstreaming/hadoop-streaming-job.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/sh
-
-EXP=a_simple_test
-
-# change these vars to fit your hadoop installation
-HADOOP_HOME=/usr/lib/hadoop-0.20
-JAR=contrib/streaming/hadoop-streaming-0.20.2-cdh3u1.jar
-HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR"
-
- IN=input_on_hdfs
-OUT=output_weights_on_hdfs
-
-# you can -reducer to NONE if you want to
-# do feature selection/averaging locally (e.g. to
-# keep weights of all epochs)
-$HSTREAMING \
- -mapper "dtrain.sh" \
- -reducer "ruby lplp.rb l2 select_k 100000" \
- -input $IN \
- -output $OUT \
- -file dtrain.sh \
- -file lplp.rb \
- -file ../dtrain \
- -file dtrain.ini \
- -file cdec.ini \
- -file ../test/example/nc-wmt11.en.srilm.gz \
- -jobconf mapred.reduce.tasks=30 \
- -jobconf mapred.max.map.failures.percent=0 \
- -jobconf mapred.job.name="dtrain $EXP"
-