diff options
Diffstat (limited to 'training/dtrain')
-rw-r--r-- | training/dtrain/dtrain.cc | 4 | ||||
-rw-r--r-- | training/dtrain/dtrain.h | 6 | ||||
-rw-r--r-- | training/dtrain/examples/standard/cdec.ini | 2 | ||||
-rw-r--r-- | training/dtrain/examples/standard/expected-output | 115 | ||||
-rw-r--r-- | training/dtrain/examples/toy/cdec.ini | 1 | ||||
-rw-r--r-- | training/dtrain/examples/toy/dtrain.ini | 2 | ||||
-rwxr-xr-x | training/dtrain/parallelize.rb | 19 |
7 files changed, 78 insertions, 71 deletions
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc index b01cf421..ccb50af2 100644 --- a/training/dtrain/dtrain.cc +++ b/training/dtrain/dtrain.cc @@ -438,7 +438,7 @@ main(int argc, char** argv) score_t model_diff = it->first.model - it->second.model; score_t loss = max(0.0, -1.0 * model_diff); - if (check && ki == 1) cout << losses[pair_idx] - loss << endl; + if (check && ki==repeat-1) cout << losses[pair_idx] - loss << endl; pair_idx++; if (repeat > 1) { @@ -455,7 +455,7 @@ main(int argc, char** argv) margin = fabs(model_diff); if (!rank_error && margin < loss_margin) margin_violations++; } - if (rank_error && ki==1) rank_errors++; + if (rank_error && ki==0) rank_errors++; if (scale_bleu_diff) eta = it->first.score - it->second.score; if (rank_error || margin < loss_margin) { SparseVector<weight_t> diff_vec = it->first.f - it->second.f; diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h index eb23b813..07bd9b65 100644 --- a/training/dtrain/dtrain.h +++ b/training/dtrain/dtrain.h @@ -116,11 +116,11 @@ inline ostream& _p(ostream& out) { return out << setiosflags(ios::showpos); } inline ostream& _p2(ostream& out) { return out << setprecision(2); } inline ostream& _p5(ostream& out) { return out << setprecision(5); } -inline void printWordIDVec(vector<WordID>& v) +inline void printWordIDVec(vector<WordID>& v, ostream& os=cerr) { for (unsigned i = 0; i < v.size(); i++) { - cerr << TD::Convert(v[i]); - if (i < v.size()-1) cerr << " "; + os << TD::Convert(v[i]); + if (i < v.size()-1) os << " "; } } diff --git a/training/dtrain/examples/standard/cdec.ini b/training/dtrain/examples/standard/cdec.ini index 6cba9e1e..3330dd71 100644 --- a/training/dtrain/examples/standard/cdec.ini +++ b/training/dtrain/examples/standard/cdec.ini @@ -21,7 +21,7 @@ feature_function=RuleIdentityFeatures feature_function=RuleSourceBigramFeatures feature_function=RuleTargetBigramFeatures feature_function=RuleShape -feature_function=RuleWordAlignmentFeatures +feature_function=LexicalFeatures 1 1 1 #feature_function=SourceSpanSizeFeatures #feature_function=SourceWordPenalty #feature_function=SpanFeatures diff --git a/training/dtrain/examples/standard/expected-output b/training/dtrain/examples/standard/expected-output index fa831221..2460cfbb 100644 --- a/training/dtrain/examples/standard/expected-output +++ b/training/dtrain/examples/standard/expected-output @@ -4,7 +4,8 @@ Reading ./nc-wmt11.en.srilm.gz ----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 **************************************************************************************************** Example feature: Shape_S00000_T00000 -Seeding random number sequence to 4138446869 +T=1 I=1 D=1 +Seeding random number sequence to 2327685089 dtrain Parameters: @@ -36,87 +37,87 @@ Iteration #1 of 3. . 10 Stopping after 10 input sentences. WEIGHTS - Glue = -80.3 - WordPenalty = -51.247 - LanguageModel = +282.46 - LanguageModel_OOV = -85.8 - PhraseModel_0 = -100.06 - PhraseModel_1 = -98.692 - PhraseModel_2 = -9.4958 - PhraseModel_3 = +18.535 - PhraseModel_4 = +62.35 - PhraseModel_5 = +7 - PhraseModel_6 = +31.4 - PassThrough = -126.5 + Glue = +6.9 + WordPenalty = -46.426 + LanguageModel = +535.12 + LanguageModel_OOV = -123.5 + PhraseModel_0 = -160.73 + PhraseModel_1 = -350.13 + PhraseModel_2 = -187.81 + PhraseModel_3 = +172.04 + PhraseModel_4 = +0.90108 + PhraseModel_5 = +21.6 + PhraseModel_6 = +67.2 + PassThrough = -149.7 --- - 1best avg score: 0.25631 (+0.25631) - 1best avg model score: -4843.6 (-4843.6) - avg # pairs: 744.4 + 1best avg score: 0.23327 (+0.23327) + 1best avg model score: -9084.9 (-9084.9) + avg # pairs: 780.7 avg # rank err: 0 (meaningless) avg # margin viol: 0 k-best loss imp: 100% - non0 feature count: 1274 + non0 feature count: 1389 avg list sz: 91.3 - avg f count: 143.72 -(time 0.4 min, 2.4 s/S) + avg f count: 146.2 +(time 0.37 min, 2.2 s/S) Iteration #2 of 3. . 10 WEIGHTS - Glue = -117.4 - WordPenalty = -99.584 - LanguageModel = +395.05 - LanguageModel_OOV = -136.8 - PhraseModel_0 = +40.614 - PhraseModel_1 = -123.29 - PhraseModel_2 = -152 - PhraseModel_3 = -161.13 - PhraseModel_4 = -76.379 - PhraseModel_5 = +39.1 - PhraseModel_6 = +137.7 - PassThrough = -162.1 + Glue = -43 + WordPenalty = -22.019 + LanguageModel = +591.53 + LanguageModel_OOV = -252.1 + PhraseModel_0 = -120.21 + PhraseModel_1 = -43.589 + PhraseModel_2 = +73.53 + PhraseModel_3 = +113.7 + PhraseModel_4 = -223.81 + PhraseModel_5 = +64 + PhraseModel_6 = +54.8 + PassThrough = -331.1 --- - 1best avg score: 0.26751 (+0.011198) - 1best avg model score: -10061 (-5216.9) - avg # pairs: 639.1 + 1best avg score: 0.29568 (+0.062413) + 1best avg model score: -15879 (-6794.1) + avg # pairs: 566.1 avg # rank err: 0 (meaningless) avg # margin viol: 0 k-best loss imp: 100% - non0 feature count: 1845 + non0 feature count: 1931 avg list sz: 91.3 - avg f count: 139.88 -(time 0.35 min, 2.1 s/S) + avg f count: 139.89 +(time 0.33 min, 2 s/S) Iteration #3 of 3. . 10 WEIGHTS - Glue = -101.1 - WordPenalty = -139.97 - LanguageModel = +327.98 - LanguageModel_OOV = -234.7 - PhraseModel_0 = -144.49 - PhraseModel_1 = -263.88 - PhraseModel_2 = -149.25 - PhraseModel_3 = -38.805 - PhraseModel_4 = +50.575 - PhraseModel_5 = -52.4 - PhraseModel_6 = +41.6 - PassThrough = -230.2 + Glue = -44.3 + WordPenalty = -131.85 + LanguageModel = +230.91 + LanguageModel_OOV = -285.4 + PhraseModel_0 = -194.27 + PhraseModel_1 = -294.83 + PhraseModel_2 = -92.043 + PhraseModel_3 = -140.24 + PhraseModel_4 = +85.613 + PhraseModel_5 = +238.1 + PhraseModel_6 = +158.7 + PassThrough = -359.6 --- - 1best avg score: 0.36222 (+0.094717) - 1best avg model score: -17416 (-7355.5) - avg # pairs: 661.2 + 1best avg score: 0.37375 (+0.078067) + 1best avg model score: -14519 (+1359.7) + avg # pairs: 545.4 avg # rank err: 0 (meaningless) avg # margin viol: 0 k-best loss imp: 100% - non0 feature count: 2163 + non0 feature count: 2218 avg list sz: 91.3 - avg f count: 132.53 -(time 0.33 min, 2 s/S) + avg f count: 137.77 +(time 0.35 min, 2.1 s/S) Writing weights file to '-' ... done --- -Best iteration: 3 [SCORE 'fixed_stupid_bleu'=0.36222]. -This took 1.0833 min. +Best iteration: 3 [SCORE 'fixed_stupid_bleu'=0.37375]. +This took 1.05 min. diff --git a/training/dtrain/examples/toy/cdec.ini b/training/dtrain/examples/toy/cdec.ini index b14f4819..e6c19abe 100644 --- a/training/dtrain/examples/toy/cdec.ini +++ b/training/dtrain/examples/toy/cdec.ini @@ -1,3 +1,4 @@ formalism=scfg add_pass_through_rules=true grammar=grammar.gz +#add_extra_pass_through_features=6 diff --git a/training/dtrain/examples/toy/dtrain.ini b/training/dtrain/examples/toy/dtrain.ini index cd715f26..ef956df7 100644 --- a/training/dtrain/examples/toy/dtrain.ini +++ b/training/dtrain/examples/toy/dtrain.ini @@ -2,7 +2,7 @@ decoder_config=cdec.ini input=src refs=tgt output=- -print_weights=logp shell_rule house_rule small_rule little_rule PassThrough +print_weights=logp shell_rule house_rule small_rule little_rule PassThrough PassThrough_1 PassThrough_2 PassThrough_3 PassThrough_4 PassThrough_5 PassThrough_6 k=4 N=4 epochs=2 diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index 60ca9422..5fc8b04e 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -4,7 +4,7 @@ require 'trollop' def usage STDERR.write "Usage: " - STDERR.write "ruby parallelize.rb -c <dtrain.ini> [-e <epochs=10>] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p <at once=9999>] -i <input> -r <refs> [--qsub/-q] [--dtrain_binary <path to dtrain binary>] [-l \"l2 select_k 100000\"] [--extra_qsub \"-l virtual_free=24G\"]\n" + STDERR.write "ruby parallelize.rb -c <dtrain.ini> [-e <epochs=10>] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p <at once=9999>] -i <input> -r <refs> [--qsub/-q] [--dtrain_binary <path to dtrain binary>] [-l \"l2 select_k 100000\"] [--extra_qsub \"-l mem_free=24G\"]\n" exit 1 end @@ -26,7 +26,6 @@ opts = Trollop::options do end usage if not opts[:config]&&opts[:shards]&&opts[:input]&&opts[:references] - dtrain_dir = File.expand_path File.dirname(__FILE__) if not opts[:dtrain_binary] dtrain_bin = "#{dtrain_dir}/dtrain" @@ -56,6 +55,7 @@ refs = opts[:references] use_qsub = opts[:qsub] shards_at_once = opts[:processes_at_once] first_input_weights = opts[:first_input_weights] +opts[:extra_qsub] = "-l #{opts[:extra_qsub]}" if opts[:extra_qsub]!="" `mkdir work` @@ -64,8 +64,9 @@ def make_shards(input, refs, num_shards, epoch, rand) index = (0..lc-1).to_a index.reverse! index.shuffle! if rand - shard_sz = lc / num_shards - leftover = lc % num_shards + shard_sz = (lc / num_shards.to_f).round 0 + leftover = lc - (num_shards*shard_sz) + leftover = 0 if leftover < 0 in_f = File.new input, 'r' in_lines = in_f.readlines refs_f = File.new refs, 'r' @@ -74,7 +75,10 @@ def make_shards(input, refs, num_shards, epoch, rand) shard_refs_files = [] in_fns = [] refs_fns = [] + new_num_shards = 0 0.upto(num_shards-1) { |shard| + break if index.size==0 + new_num_shards += 1 in_fn = "work/shard.#{shard}.#{epoch}.in" shard_in = File.new in_fn, 'w+' in_fns << in_fn @@ -83,6 +87,7 @@ def make_shards(input, refs, num_shards, epoch, rand) refs_fns << refs_fn 0.upto(shard_sz-1) { |i| j = index.pop + break if !j shard_in.write in_lines[j] shard_refs.write refs_lines[j] } @@ -98,7 +103,7 @@ def make_shards(input, refs, num_shards, epoch, rand) (shard_in_files + shard_refs_files).each do |f| f.close end in_f.close refs_f.close - return [in_fns, refs_fns] + return in_fns, refs_fns, new_num_shards end input_files = [] @@ -111,7 +116,7 @@ if predefined_shards end num_shards = input_files.size else - input_files, refs_files = make_shards input, refs, num_shards, 0, rand + input_files, refs_files, num_shards = make_shards input, refs, num_shards, 0, rand end 0.upto(epochs-1) { |epoch| @@ -158,7 +163,7 @@ end `#{cat} work/weights.*.#{epoch} > work/weights_cat` `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat > work/weights.#{epoch}` if rand and reshard and epoch+1!=epochs - input_files, refs_files = make_shards input, refs, num_shards, epoch+1, rand + input_files, refs_files, num_shards = make_shards input, refs, num_shards, epoch+1, rand end } |