summaryrefslogtreecommitdiff
path: root/dtrain
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2011-09-28 13:08:59 +0200
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2011-09-28 13:08:59 +0200
commitb19aa739311b8e9ee120f02746677584db3b3af9 (patch)
treee10ed97f654f0d221a9b97a54a9e94f301a1a8c2 /dtrain
parenta016c260bba1a7411af21264079ce670fb0ad3a6 (diff)
hstreaming bug
Diffstat (limited to 'dtrain')
-rw-r--r--dtrain/hstreaming/dtrain.ini2
-rwxr-xr-xdtrain/hstreaming/hadoop-streaming-job.sh2
-rwxr-xr-xdtrain/hstreaming/red-avg.rb4
3 files changed, 3 insertions, 5 deletions
diff --git a/dtrain/hstreaming/dtrain.ini b/dtrain/hstreaming/dtrain.ini
index bb594653..708bbe46 100644
--- a/dtrain/hstreaming/dtrain.ini
+++ b/dtrain/hstreaming/dtrain.ini
@@ -6,5 +6,5 @@ input=-
output=-
scorer=stupid_bleu
sample_from=forest
-pair_sampling=rand
+pair_sampling=all
tmp=/var/hadoop/mapred/local
diff --git a/dtrain/hstreaming/hadoop-streaming-job.sh b/dtrain/hstreaming/hadoop-streaming-job.sh
index 9da0a6c3..f51b6024 100755
--- a/dtrain/hstreaming/hadoop-streaming-job.sh
+++ b/dtrain/hstreaming/hadoop-streaming-job.sh
@@ -5,7 +5,7 @@ JAR=contrib/streaming/hadoop-streaming-0.20.2-cdh3u1.jar
HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR"
IN=in/nc-wmt11-de-en-dyer-cs-joshua.tok.lc.fixamp1.loo.psg.dtrain.1400m
-OUT=out/nc-wmt11-de-en-dyer-cs-joshua.tok.lc.fixamp1.loo.psg.dtrain-weights-1400m-NEW
+OUT=out/nc-wmt11-de-en-dyer-cs-joshua.tok.lc.fixamp1.loo.psg.dtrain-weights-1400m-weights
$HSTREAMING \
-mapper "dtrain -c dtrain.ini --hstreaming" \
diff --git a/dtrain/hstreaming/red-avg.rb b/dtrain/hstreaming/red-avg.rb
index 11dc0d71..048128f5 100755
--- a/dtrain/hstreaming/red-avg.rb
+++ b/dtrain/hstreaming/red-avg.rb
@@ -19,8 +19,6 @@ shard_count = w["__SHARD_COUNT__"]
w.each_key { |k|
if k == shard_count_key then next end
- puts "#{k}\t{w[k]/shard_count}"
+ puts "#{k}\t#{w[k]/shard_count}"
}
-puts "#{shard_count_key}\t#{w[shard_count_key]}"
-