summaryrefslogtreecommitdiff
path: root/dtrain/hstreaming/hadoop-streaming-job.sh
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2012-11-18 13:35:42 -0500
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2012-11-18 13:35:42 -0500
commit1b8181bf0d6e9137e6b9ccdbe414aec37377a1a9 (patch)
tree33e5f3aa5abff1f41314cf8f6afbd2c2c40e4bfd /dtrain/hstreaming/hadoop-streaming-job.sh
parent7c4665949fb93fb3de402e4ce1d19bef67850d05 (diff)
major restructure of the training code
Diffstat (limited to 'dtrain/hstreaming/hadoop-streaming-job.sh')
-rwxr-xr-xdtrain/hstreaming/hadoop-streaming-job.sh30
1 files changed, 0 insertions, 30 deletions
diff --git a/dtrain/hstreaming/hadoop-streaming-job.sh b/dtrain/hstreaming/hadoop-streaming-job.sh
deleted file mode 100755
index 92419956..00000000
--- a/dtrain/hstreaming/hadoop-streaming-job.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/sh
-
-EXP=a_simple_test
-
-# change these vars to fit your hadoop installation
-HADOOP_HOME=/usr/lib/hadoop-0.20
-JAR=contrib/streaming/hadoop-streaming-0.20.2-cdh3u1.jar
-HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR"
-
- IN=input_on_hdfs
-OUT=output_weights_on_hdfs
-
-# you can -reducer to NONE if you want to
-# do feature selection/averaging locally (e.g. to
-# keep weights of all epochs)
-$HSTREAMING \
- -mapper "dtrain.sh" \
- -reducer "ruby lplp.rb l2 select_k 100000" \
- -input $IN \
- -output $OUT \
- -file dtrain.sh \
- -file lplp.rb \
- -file ../dtrain \
- -file dtrain.ini \
- -file cdec.ini \
- -file ../test/example/nc-wmt11.en.srilm.gz \
- -jobconf mapred.reduce.tasks=30 \
- -jobconf mapred.max.map.failures.percent=0 \
- -jobconf mapred.job.name="dtrain $EXP"
-