From dc9fd7a3adc863510d79a718e919b6833a86729c Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 23 Sep 2011 20:53:15 +0200 Subject: begin refactoring --- dtrain/hstreaming/hadoop-streaming-job.sh | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100755 dtrain/hstreaming/hadoop-streaming-job.sh (limited to 'dtrain/hstreaming/hadoop-streaming-job.sh') diff --git a/dtrain/hstreaming/hadoop-streaming-job.sh b/dtrain/hstreaming/hadoop-streaming-job.sh new file mode 100755 index 00000000..2cf3f50a --- /dev/null +++ b/dtrain/hstreaming/hadoop-streaming-job.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +HADOOP_HOME=/usr/lib/hadoop-0.20 +JAR=contrib/streaming/hadoop-streaming-0.20.2-cdh3u1.jar +HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR" + +IN=in/nc-wmt11-de-en-dyer-cs-joshua.tok.lc.fixamp1.loo.psg.dtrain.1400m +OUT=out/nc-wmt11-de-en-dyer-cs-joshua.tok.lc.fixamp1.loo.psg.dtrain-weights-1400m + +$HSTREAMING \ + -mapper "dtrain.sh" \ + -reducer "avgweights.rb" \ + -input $IN \ + -output $OUT \ + -file avgweights.rb \ + -file dtrain.sh \ + -file dtrain \ + -file dtrain.ini \ + -file cdec.ini \ + -file nc-wmt11.en.srilm.3.gz \ + -jobconf mapred.reduce.tasks=1 \ + -jobconf mapred.max.map.failures.percent=100 + -- cgit v1.2.3