diff options
16 files changed, 19 insertions, 10 deletions
diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index 6e30cf9d..9b0923f6 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -7,10 +7,10 @@ if ARGV.size != 7 exit end -cdec_dir = '~/MAREC/cdec-dtrain/' -dtrain_bin = "~/MAREC/cdec-dtrain/training/dtrain/dtrain" +dtrain_dir = File.expand_path File.dirname(__FILE__) +dtrain_bin = "#{dtrain_dir}/dtrain" ruby = '/usr/bin/ruby' -lplp_rb = "#{cdec_dir}/training/dtrain/hstreaming/lplp.rb" +lplp_rb = "#{dtrain_dir}/hstreaming/lplp.rb" lplp_args = 'l2 select_k 100000' cat = '/bin/cat' @@ -96,15 +96,19 @@ end while remaining_shards > 0 shards_at_once.times { qsub_str_start = qsub_str_end = '' + local_end = '' if use_qsub qsub_str_start = "qsub -cwd -sync y -b y -j y -o work/out.#{shard}.#{epoch} -N dtrain.#{shard}.#{epoch} \"" qsub_str_end = "\"" + local_end = '' + else + local_end = "&>work/out.#{shard}.#{epoch}" end pids << Kernel.fork { `#{qsub_str_start}#{dtrain_bin} -c #{ini}\ --input #{input_files[shard]}\ --refs #{refs_files[shard]} #{input_weights}\ - --output work/weights.#{shard}.#{epoch}#{qsub_str_end}` + --output work/weights.#{shard}.#{epoch}#{qsub_str_end} #{local_end}` } weights_files << "work/weights.#{shard}.#{epoch}" shard += 1 diff --git a/training/dtrain/test/example/README b/training/dtrain/test/example/README index 6937b11b..2df77086 100644 --- a/training/dtrain/test/example/README +++ b/training/dtrain/test/example/README @@ -1,8 +1,8 @@ Small example of input format for distributed training. -Call dtrain from cdec/dtrain/ with ./dtrain -c test/example/dtrain.ini . +Call dtrain from this folder with ../../dtrain -c test/example/dtrain.ini . For this to work, undef 'DTRAIN_LOCAL' in dtrain.h and recompile. -Data is here: http://simianer.de/#dtrain +data can be found here: http://simianer.de/#dtrain diff --git a/training/dtrain/test/example/cdec.ini b/training/dtrain/test/example/cdec.ini index d5955f0e..068ebd4d 100644 --- a/training/dtrain/test/example/cdec.ini +++ b/training/dtrain/test/example/cdec.ini @@ -4,7 +4,7 @@ scfg_max_span_limit=15 intersection_strategy=cube_pruning cubepruning_pop_limit=30 feature_function=WordPenalty -feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz +feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz # all currently working feature functions for translation: # (with those features active that were used in the ACL paper) #feature_function=ArityPenalty diff --git a/training/dtrain/test/example/dtrain.ini b/training/dtrain/test/example/dtrain.ini index 72d50ca1..97fce7f0 100644 --- a/training/dtrain/test/example/dtrain.ini +++ b/training/dtrain/test/example/dtrain.ini @@ -1,7 +1,7 @@ -input=test/example/nc-wmt11.1k.gz # use '-' for STDIN +input=./nc-wmt11.1k.gz # use '-' for STDIN output=- # a weights file (add .gz for gzip compression) or STDOUT '-' select_weights=VOID # don't output weights -decoder_config=test/example/cdec.ini # config for cdec +decoder_config=./cdec.ini # config for cdec # weights for these features will be printed on each iteration print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough tmp=/tmp diff --git a/training/dtrain/test/parallelize/README b/training/dtrain/test/parallelize/README new file mode 100644 index 00000000..89715105 --- /dev/null +++ b/training/dtrain/test/parallelize/README @@ -0,0 +1,5 @@ +run for example + ../../parallelize.rb ./dtrain.ini 4 false 2 2 ./in ./refs + +final weights will be in the file work/weights.3 + diff --git a/training/dtrain/test/parallelize/cdec.ini b/training/dtrain/test/parallelize/cdec.ini index e118374b..e43ba1c4 100644 --- a/training/dtrain/test/parallelize/cdec.ini +++ b/training/dtrain/test/parallelize/cdec.ini @@ -4,7 +4,7 @@ intersection_strategy=cube_pruning cubepruning_pop_limit=200 scfg_max_span_limit=15 feature_function=WordPenalty -feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz +feature_function=KLanguageModel ../example/nc-wmt11.en.srilm.gz #feature_function=ArityPenalty #feature_function=CMR2008ReorderingFeatures #feature_function=Dwarf diff --git a/training/dtrain/test/parallelize/g/grammar.out.0.gz b/training/dtrain/test/parallelize/g/grammar.out.0.gz Binary files differnew file mode 100644 index 00000000..1e28a24b --- /dev/null +++ b/training/dtrain/test/parallelize/g/grammar.out.0.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.1.gz b/training/dtrain/test/parallelize/g/grammar.out.1.gz Binary files differnew file mode 100644 index 00000000..372f5675 --- /dev/null +++ b/training/dtrain/test/parallelize/g/grammar.out.1.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.2.gz b/training/dtrain/test/parallelize/g/grammar.out.2.gz Binary files differnew file mode 100644 index 00000000..145d0dc0 --- /dev/null +++ b/training/dtrain/test/parallelize/g/grammar.out.2.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.3.gz b/training/dtrain/test/parallelize/g/grammar.out.3.gz Binary files differnew file mode 100644 index 00000000..105593ff --- /dev/null +++ b/training/dtrain/test/parallelize/g/grammar.out.3.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.4.gz b/training/dtrain/test/parallelize/g/grammar.out.4.gz Binary files differnew file mode 100644 index 00000000..30781f48 --- /dev/null +++ b/training/dtrain/test/parallelize/g/grammar.out.4.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.5.gz b/training/dtrain/test/parallelize/g/grammar.out.5.gz Binary files differnew file mode 100644 index 00000000..834ee759 --- /dev/null +++ b/training/dtrain/test/parallelize/g/grammar.out.5.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.6.gz b/training/dtrain/test/parallelize/g/grammar.out.6.gz Binary files differnew file mode 100644 index 00000000..2e76f348 --- /dev/null +++ b/training/dtrain/test/parallelize/g/grammar.out.6.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.7.gz b/training/dtrain/test/parallelize/g/grammar.out.7.gz Binary files differnew file mode 100644 index 00000000..3741a887 --- /dev/null +++ b/training/dtrain/test/parallelize/g/grammar.out.7.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.8.gz b/training/dtrain/test/parallelize/g/grammar.out.8.gz Binary files differnew file mode 100644 index 00000000..ebf6bd0c --- /dev/null +++ b/training/dtrain/test/parallelize/g/grammar.out.8.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.9.gz b/training/dtrain/test/parallelize/g/grammar.out.9.gz Binary files differnew file mode 100644 index 00000000..c1791059 --- /dev/null +++ b/training/dtrain/test/parallelize/g/grammar.out.9.gz |