summaryrefslogtreecommitdiff
path: root/training/dtrain
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2013-01-24 16:28:23 +0100
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2013-01-24 16:28:23 +0100
commit2ddb66613dcc83093e409b03d50ca777b452b147 (patch)
tree955e3bd573f5cdf2e8fd47633a5f7ca706ac5ac5 /training/dtrain
parente952a226ced8041d6e33eb5312668f598648ee2f (diff)
made examples work again
Diffstat (limited to 'training/dtrain')
-rwxr-xr-xtraining/dtrain/parallelize.rb12
-rw-r--r--training/dtrain/test/example/README4
-rw-r--r--training/dtrain/test/example/cdec.ini2
-rw-r--r--training/dtrain/test/example/dtrain.ini4
-rw-r--r--training/dtrain/test/parallelize/README5
-rw-r--r--training/dtrain/test/parallelize/cdec.ini2
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.0.gzbin0 -> 8318 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.1.gzbin0 -> 358560 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.2.gzbin0 -> 1014466 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.3.gzbin0 -> 391811 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.4.gzbin0 -> 149590 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.5.gzbin0 -> 537024 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.6.gzbin0 -> 291286 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.7.gzbin0 -> 1038140 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.8.gzbin0 -> 419889 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.9.gzbin0 -> 409140 bytes
16 files changed, 19 insertions, 10 deletions
diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index 6e30cf9d..9b0923f6 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -7,10 +7,10 @@ if ARGV.size != 7
exit
end
-cdec_dir = '~/MAREC/cdec-dtrain/'
-dtrain_bin = "~/MAREC/cdec-dtrain/training/dtrain/dtrain"
+dtrain_dir = File.expand_path File.dirname(__FILE__)
+dtrain_bin = "#{dtrain_dir}/dtrain"
ruby = '/usr/bin/ruby'
-lplp_rb = "#{cdec_dir}/training/dtrain/hstreaming/lplp.rb"
+lplp_rb = "#{dtrain_dir}/hstreaming/lplp.rb"
lplp_args = 'l2 select_k 100000'
cat = '/bin/cat'
@@ -96,15 +96,19 @@ end
while remaining_shards > 0
shards_at_once.times {
qsub_str_start = qsub_str_end = ''
+ local_end = ''
if use_qsub
qsub_str_start = "qsub -cwd -sync y -b y -j y -o work/out.#{shard}.#{epoch} -N dtrain.#{shard}.#{epoch} \""
qsub_str_end = "\""
+ local_end = ''
+ else
+ local_end = "&>work/out.#{shard}.#{epoch}"
end
pids << Kernel.fork {
`#{qsub_str_start}#{dtrain_bin} -c #{ini}\
--input #{input_files[shard]}\
--refs #{refs_files[shard]} #{input_weights}\
- --output work/weights.#{shard}.#{epoch}#{qsub_str_end}`
+ --output work/weights.#{shard}.#{epoch}#{qsub_str_end} #{local_end}`
}
weights_files << "work/weights.#{shard}.#{epoch}"
shard += 1
diff --git a/training/dtrain/test/example/README b/training/dtrain/test/example/README
index 6937b11b..2df77086 100644
--- a/training/dtrain/test/example/README
+++ b/training/dtrain/test/example/README
@@ -1,8 +1,8 @@
Small example of input format for distributed training.
-Call dtrain from cdec/dtrain/ with ./dtrain -c test/example/dtrain.ini .
+Call dtrain from this folder with ../../dtrain -c test/example/dtrain.ini .
For this to work, undef 'DTRAIN_LOCAL' in dtrain.h
and recompile.
-Data is here: http://simianer.de/#dtrain
+data can be found here: http://simianer.de/#dtrain
diff --git a/training/dtrain/test/example/cdec.ini b/training/dtrain/test/example/cdec.ini
index d5955f0e..068ebd4d 100644
--- a/training/dtrain/test/example/cdec.ini
+++ b/training/dtrain/test/example/cdec.ini
@@ -4,7 +4,7 @@ scfg_max_span_limit=15
intersection_strategy=cube_pruning
cubepruning_pop_limit=30
feature_function=WordPenalty
-feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz
+feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz
# all currently working feature functions for translation:
# (with those features active that were used in the ACL paper)
#feature_function=ArityPenalty
diff --git a/training/dtrain/test/example/dtrain.ini b/training/dtrain/test/example/dtrain.ini
index 72d50ca1..97fce7f0 100644
--- a/training/dtrain/test/example/dtrain.ini
+++ b/training/dtrain/test/example/dtrain.ini
@@ -1,7 +1,7 @@
-input=test/example/nc-wmt11.1k.gz # use '-' for STDIN
+input=./nc-wmt11.1k.gz # use '-' for STDIN
output=- # a weights file (add .gz for gzip compression) or STDOUT '-'
select_weights=VOID # don't output weights
-decoder_config=test/example/cdec.ini # config for cdec
+decoder_config=./cdec.ini # config for cdec
# weights for these features will be printed on each iteration
print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
tmp=/tmp
diff --git a/training/dtrain/test/parallelize/README b/training/dtrain/test/parallelize/README
new file mode 100644
index 00000000..89715105
--- /dev/null
+++ b/training/dtrain/test/parallelize/README
@@ -0,0 +1,5 @@
+run for example
+ ../../parallelize.rb ./dtrain.ini 4 false 2 2 ./in ./refs
+
+final weights will be in the file work/weights.3
+
diff --git a/training/dtrain/test/parallelize/cdec.ini b/training/dtrain/test/parallelize/cdec.ini
index e118374b..e43ba1c4 100644
--- a/training/dtrain/test/parallelize/cdec.ini
+++ b/training/dtrain/test/parallelize/cdec.ini
@@ -4,7 +4,7 @@ intersection_strategy=cube_pruning
cubepruning_pop_limit=200
scfg_max_span_limit=15
feature_function=WordPenalty
-feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz
+feature_function=KLanguageModel ../example/nc-wmt11.en.srilm.gz
#feature_function=ArityPenalty
#feature_function=CMR2008ReorderingFeatures
#feature_function=Dwarf
diff --git a/training/dtrain/test/parallelize/g/grammar.out.0.gz b/training/dtrain/test/parallelize/g/grammar.out.0.gz
new file mode 100644
index 00000000..1e28a24b
--- /dev/null
+++ b/training/dtrain/test/parallelize/g/grammar.out.0.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.1.gz b/training/dtrain/test/parallelize/g/grammar.out.1.gz
new file mode 100644
index 00000000..372f5675
--- /dev/null
+++ b/training/dtrain/test/parallelize/g/grammar.out.1.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.2.gz b/training/dtrain/test/parallelize/g/grammar.out.2.gz
new file mode 100644
index 00000000..145d0dc0
--- /dev/null
+++ b/training/dtrain/test/parallelize/g/grammar.out.2.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.3.gz b/training/dtrain/test/parallelize/g/grammar.out.3.gz
new file mode 100644
index 00000000..105593ff
--- /dev/null
+++ b/training/dtrain/test/parallelize/g/grammar.out.3.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.4.gz b/training/dtrain/test/parallelize/g/grammar.out.4.gz
new file mode 100644
index 00000000..30781f48
--- /dev/null
+++ b/training/dtrain/test/parallelize/g/grammar.out.4.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.5.gz b/training/dtrain/test/parallelize/g/grammar.out.5.gz
new file mode 100644
index 00000000..834ee759
--- /dev/null
+++ b/training/dtrain/test/parallelize/g/grammar.out.5.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.6.gz b/training/dtrain/test/parallelize/g/grammar.out.6.gz
new file mode 100644
index 00000000..2e76f348
--- /dev/null
+++ b/training/dtrain/test/parallelize/g/grammar.out.6.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.7.gz b/training/dtrain/test/parallelize/g/grammar.out.7.gz
new file mode 100644
index 00000000..3741a887
--- /dev/null
+++ b/training/dtrain/test/parallelize/g/grammar.out.7.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.8.gz b/training/dtrain/test/parallelize/g/grammar.out.8.gz
new file mode 100644
index 00000000..ebf6bd0c
--- /dev/null
+++ b/training/dtrain/test/parallelize/g/grammar.out.8.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.9.gz b/training/dtrain/test/parallelize/g/grammar.out.9.gz
new file mode 100644
index 00000000..c1791059
--- /dev/null
+++ b/training/dtrain/test/parallelize/g/grammar.out.9.gz
Binary files differ