summaryrefslogtreecommitdiff
path: root/training/dtrain/hstreaming/hadoop-streaming-job.sh
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2013-03-15 10:29:13 +0100
committerPatrick Simianer <p@simianer.de>2013-03-15 10:29:13 +0100
commit2b4b3adc764085bccc6ddbde96b8cc7ba4287a9f (patch)
treebbe72e5e3f425d2389b1f037a83aefd2c40269eb /training/dtrain/hstreaming/hadoop-streaming-job.sh
parent08d5de939f85075fc1569ddfa545b5d815231c3f (diff)
removed hadoop/hstreaming mode
Diffstat (limited to 'training/dtrain/hstreaming/hadoop-streaming-job.sh')
-rwxr-xr-xtraining/dtrain/hstreaming/hadoop-streaming-job.sh30
1 files changed, 0 insertions, 30 deletions
diff --git a/training/dtrain/hstreaming/hadoop-streaming-job.sh b/training/dtrain/hstreaming/hadoop-streaming-job.sh
deleted file mode 100755
index 92419956..00000000
--- a/training/dtrain/hstreaming/hadoop-streaming-job.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/sh
-
-EXP=a_simple_test
-
-# change these vars to fit your hadoop installation
-HADOOP_HOME=/usr/lib/hadoop-0.20
-JAR=contrib/streaming/hadoop-streaming-0.20.2-cdh3u1.jar
-HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR"
-
- IN=input_on_hdfs
-OUT=output_weights_on_hdfs
-
-# you can -reducer to NONE if you want to
-# do feature selection/averaging locally (e.g. to
-# keep weights of all epochs)
-$HSTREAMING \
- -mapper "dtrain.sh" \
- -reducer "ruby lplp.rb l2 select_k 100000" \
- -input $IN \
- -output $OUT \
- -file dtrain.sh \
- -file lplp.rb \
- -file ../dtrain \
- -file dtrain.ini \
- -file cdec.ini \
- -file ../test/example/nc-wmt11.en.srilm.gz \
- -jobconf mapred.reduce.tasks=30 \
- -jobconf mapred.max.map.failures.percent=0 \
- -jobconf mapred.job.name="dtrain $EXP"
-