summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2011-09-30 00:33:36 +0200
committerPatrick Simianer <p@simianer.de>2011-09-30 00:33:36 +0200
commitabc2919ccf6cb57dd0320716cad378866b08054a (patch)
tree5c46017bd810a36796e5338d99d1bdab2020529f
parent58f4ff5b79a545d59e21e77511a4b74c99b63d56 (diff)
parentafdb78976b938bcbc1220135fc63a9c49ca564bb (diff)
Merge branch 'master' of github.com:qlt/cdec_dtrain
-rw-r--r--dtrain/hstreaming/dtrain.ini2
-rwxr-xr-xdtrain/hstreaming/hadoop-streaming-job.sh2
-rwxr-xr-xdtrain/hstreaming/red-avg.rb4
3 files changed, 3 insertions, 5 deletions
diff --git a/dtrain/hstreaming/dtrain.ini b/dtrain/hstreaming/dtrain.ini
index bb594653..708bbe46 100644
--- a/dtrain/hstreaming/dtrain.ini
+++ b/dtrain/hstreaming/dtrain.ini
@@ -6,5 +6,5 @@ input=-
output=-
scorer=stupid_bleu
sample_from=forest
-pair_sampling=rand
+pair_sampling=all
tmp=/var/hadoop/mapred/local
diff --git a/dtrain/hstreaming/hadoop-streaming-job.sh b/dtrain/hstreaming/hadoop-streaming-job.sh
index 9da0a6c3..f51b6024 100755
--- a/dtrain/hstreaming/hadoop-streaming-job.sh
+++ b/dtrain/hstreaming/hadoop-streaming-job.sh
@@ -5,7 +5,7 @@ JAR=contrib/streaming/hadoop-streaming-0.20.2-cdh3u1.jar
HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR"
IN=in/nc-wmt11-de-en-dyer-cs-joshua.tok.lc.fixamp1.loo.psg.dtrain.1400m
-OUT=out/nc-wmt11-de-en-dyer-cs-joshua.tok.lc.fixamp1.loo.psg.dtrain-weights-1400m-NEW
+OUT=out/nc-wmt11-de-en-dyer-cs-joshua.tok.lc.fixamp1.loo.psg.dtrain-weights-1400m-weights
$HSTREAMING \
-mapper "dtrain -c dtrain.ini --hstreaming" \
diff --git a/dtrain/hstreaming/red-avg.rb b/dtrain/hstreaming/red-avg.rb
index 11dc0d71..048128f5 100755
--- a/dtrain/hstreaming/red-avg.rb
+++ b/dtrain/hstreaming/red-avg.rb
@@ -19,8 +19,6 @@ shard_count = w["__SHARD_COUNT__"]
w.each_key { |k|
if k == shard_count_key then next end
- puts "#{k}\t{w[k]/shard_count}"
+ puts "#{k}\t#{w[k]/shard_count}"
}
-puts "#{shard_count_key}\t#{w[shard_count_key]}"
-