summaryrefslogtreecommitdiff
path: root/dtrain/hstreaming/hadoop-streaming-job.sh
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2011-11-28 11:34:41 +0100
committerPatrick Simianer <p@simianer.de>2011-11-28 11:34:41 +0100
commit6e4367db4136e7a7de3771edf964a16c2fdf08a6 (patch)
treed8e705ae35ecfae235c39e799740d62c289bd3d5 /dtrain/hstreaming/hadoop-streaming-job.sh
parent8aa98e9aa2a786151aab1e9398bb03a6ba1ca383 (diff)
hstreaming stuff
Diffstat (limited to 'dtrain/hstreaming/hadoop-streaming-job.sh')
-rwxr-xr-xdtrain/hstreaming/hadoop-streaming-job.sh16
1 files changed, 8 insertions, 8 deletions
diff --git a/dtrain/hstreaming/hadoop-streaming-job.sh b/dtrain/hstreaming/hadoop-streaming-job.sh
index 788c9fd1..4c0238f3 100755
--- a/dtrain/hstreaming/hadoop-streaming-job.sh
+++ b/dtrain/hstreaming/hadoop-streaming-job.sh
@@ -1,26 +1,26 @@
#!/bin/bash
-ID=
EXP=test
HADOOP_HOME=/usr/lib/hadoop-0.20
JAR=contrib/streaming/hadoop-streaming-0.20.2-cdh3u1.jar
HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR"
-IN=nc-v6.de-en/nc-v6.de-en-dtrain.1500m
-OUT=nc-v6.de-en/nc-v6.de-en-dtrain.1500m-weights
+ IN=nc-v6.de-en.cs.giza.loo/nc-v6.de-en.cs.giza.loo-dtrain1.sz2
+OUT=out/$EXP-weights
$HSTREAMING \
- -mapper "dtrain -c dtrain.ini --hstreaming" \
+ -mapper "dtrain.sh" \
-reducer "red-avg.rb" \
-input $IN \
-output $OUT \
+ -file dtrain.sh \
-file red-avg.rb \
- -file ../dtrain \
+ -file ~/exp/cdec-dtrain-ro/dtrain/dtrain \
-file dtrain.ini \
-file cdec.ini \
- -file nc-wmt11.en.srilm.3.gz \
+ -file ~/exp/data/nc-v6.en.3.unk.probing.kenv5 \
-jobconf mapred.reduce.tasks=1 \
- -jobconf mapred.max.map.failures.percent=100 \
- -jobconf mapred.job.name="dtrain $ID $EXP"
+ -jobconf mapred.max.map.failures.percent=0 \
+ -jobconf mapred.job.name="dtrain $EXP"