diff options
Diffstat (limited to 'training')
| -rw-r--r-- | training/dtrain/dtrain.cc | 4 | ||||
| -rw-r--r-- | training/dtrain/dtrain.h | 6 | ||||
| -rw-r--r-- | training/dtrain/examples/standard/cdec.ini | 2 | ||||
| -rw-r--r-- | training/dtrain/examples/standard/expected-output | 115 | ||||
| -rw-r--r-- | training/dtrain/examples/toy/cdec.ini | 1 | ||||
| -rw-r--r-- | training/dtrain/examples/toy/dtrain.ini | 2 | ||||
| -rwxr-xr-x | training/dtrain/parallelize.rb | 16 | ||||
| -rw-r--r-- | training/mira/kbest_cut_mira.cc | 8 | ||||
| -rwxr-xr-x | training/mira/mira.py | 19 | ||||
| -rwxr-xr-x | training/pro/pro.pl | 1 | 
10 files changed, 99 insertions, 75 deletions
| diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc index b01cf421..ccb50af2 100644 --- a/training/dtrain/dtrain.cc +++ b/training/dtrain/dtrain.cc @@ -438,7 +438,7 @@ main(int argc, char** argv)          score_t model_diff = it->first.model - it->second.model;          score_t loss = max(0.0, -1.0 * model_diff); -        if (check && ki == 1) cout << losses[pair_idx] - loss << endl; +        if (check && ki==repeat-1) cout << losses[pair_idx] - loss << endl;          pair_idx++;          if (repeat > 1) { @@ -455,7 +455,7 @@ main(int argc, char** argv)            margin = fabs(model_diff);            if (!rank_error && margin < loss_margin) margin_violations++;          } -        if (rank_error && ki==1) rank_errors++; +        if (rank_error && ki==0) rank_errors++;          if (scale_bleu_diff) eta = it->first.score - it->second.score;          if (rank_error || margin < loss_margin) {            SparseVector<weight_t> diff_vec = it->first.f - it->second.f; diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h index eb23b813..07bd9b65 100644 --- a/training/dtrain/dtrain.h +++ b/training/dtrain/dtrain.h @@ -116,11 +116,11 @@ inline ostream& _p(ostream& out)  { return out << setiosflags(ios::showpos); }  inline ostream& _p2(ostream& out) { return out << setprecision(2); }  inline ostream& _p5(ostream& out) { return out << setprecision(5); } -inline void printWordIDVec(vector<WordID>& v) +inline void printWordIDVec(vector<WordID>& v, ostream& os=cerr)  {    for (unsigned i = 0; i < v.size(); i++) { -    cerr << TD::Convert(v[i]); -    if (i < v.size()-1) cerr << " "; +    os << TD::Convert(v[i]); +    if (i < v.size()-1) os << " ";    }  } diff --git a/training/dtrain/examples/standard/cdec.ini b/training/dtrain/examples/standard/cdec.ini index 6cba9e1e..3330dd71 100644 --- a/training/dtrain/examples/standard/cdec.ini +++ b/training/dtrain/examples/standard/cdec.ini @@ -21,7 +21,7 @@ feature_function=RuleIdentityFeatures  feature_function=RuleSourceBigramFeatures  feature_function=RuleTargetBigramFeatures  feature_function=RuleShape -feature_function=RuleWordAlignmentFeatures +feature_function=LexicalFeatures 1 1 1  #feature_function=SourceSpanSizeFeatures  #feature_function=SourceWordPenalty  #feature_function=SpanFeatures diff --git a/training/dtrain/examples/standard/expected-output b/training/dtrain/examples/standard/expected-output index fa831221..2460cfbb 100644 --- a/training/dtrain/examples/standard/expected-output +++ b/training/dtrain/examples/standard/expected-output @@ -4,7 +4,8 @@ Reading ./nc-wmt11.en.srilm.gz  ----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100  ****************************************************************************************************    Example feature: Shape_S00000_T00000 -Seeding random number sequence to 4138446869 +T=1 I=1 D=1 +Seeding random number sequence to 2327685089  dtrain  Parameters: @@ -36,87 +37,87 @@ Iteration #1 of 3.   . 10  Stopping after 10 input sentences.  WEIGHTS -              Glue = -80.3 -       WordPenalty = -51.247 -     LanguageModel = +282.46 - LanguageModel_OOV = -85.8 -     PhraseModel_0 = -100.06 -     PhraseModel_1 = -98.692 -     PhraseModel_2 = -9.4958 -     PhraseModel_3 = +18.535 -     PhraseModel_4 = +62.35 -     PhraseModel_5 = +7 -     PhraseModel_6 = +31.4 -       PassThrough = -126.5 +              Glue = +6.9 +       WordPenalty = -46.426 +     LanguageModel = +535.12 + LanguageModel_OOV = -123.5 +     PhraseModel_0 = -160.73 +     PhraseModel_1 = -350.13 +     PhraseModel_2 = -187.81 +     PhraseModel_3 = +172.04 +     PhraseModel_4 = +0.90108 +     PhraseModel_5 = +21.6 +     PhraseModel_6 = +67.2 +       PassThrough = -149.7          --- -       1best avg score: 0.25631 (+0.25631) - 1best avg model score: -4843.6 (-4843.6) -           avg # pairs: 744.4 +       1best avg score: 0.23327 (+0.23327) + 1best avg model score: -9084.9 (-9084.9) +           avg # pairs: 780.7          avg # rank err: 0 (meaningless)       avg # margin viol: 0         k-best loss imp: 100% -    non0 feature count: 1274 +    non0 feature count: 1389             avg list sz: 91.3 -           avg f count: 143.72 -(time 0.4 min, 2.4 s/S) +           avg f count: 146.2 +(time 0.37 min, 2.2 s/S)  Iteration #2 of 3.   . 10  WEIGHTS -              Glue = -117.4 -       WordPenalty = -99.584 -     LanguageModel = +395.05 - LanguageModel_OOV = -136.8 -     PhraseModel_0 = +40.614 -     PhraseModel_1 = -123.29 -     PhraseModel_2 = -152 -     PhraseModel_3 = -161.13 -     PhraseModel_4 = -76.379 -     PhraseModel_5 = +39.1 -     PhraseModel_6 = +137.7 -       PassThrough = -162.1 +              Glue = -43 +       WordPenalty = -22.019 +     LanguageModel = +591.53 + LanguageModel_OOV = -252.1 +     PhraseModel_0 = -120.21 +     PhraseModel_1 = -43.589 +     PhraseModel_2 = +73.53 +     PhraseModel_3 = +113.7 +     PhraseModel_4 = -223.81 +     PhraseModel_5 = +64 +     PhraseModel_6 = +54.8 +       PassThrough = -331.1          --- -       1best avg score: 0.26751 (+0.011198) - 1best avg model score: -10061 (-5216.9) -           avg # pairs: 639.1 +       1best avg score: 0.29568 (+0.062413) + 1best avg model score: -15879 (-6794.1) +           avg # pairs: 566.1          avg # rank err: 0 (meaningless)       avg # margin viol: 0         k-best loss imp: 100% -    non0 feature count: 1845 +    non0 feature count: 1931             avg list sz: 91.3 -           avg f count: 139.88 -(time 0.35 min, 2.1 s/S) +           avg f count: 139.89 +(time 0.33 min, 2 s/S)  Iteration #3 of 3.   . 10  WEIGHTS -              Glue = -101.1 -       WordPenalty = -139.97 -     LanguageModel = +327.98 - LanguageModel_OOV = -234.7 -     PhraseModel_0 = -144.49 -     PhraseModel_1 = -263.88 -     PhraseModel_2 = -149.25 -     PhraseModel_3 = -38.805 -     PhraseModel_4 = +50.575 -     PhraseModel_5 = -52.4 -     PhraseModel_6 = +41.6 -       PassThrough = -230.2 +              Glue = -44.3 +       WordPenalty = -131.85 +     LanguageModel = +230.91 + LanguageModel_OOV = -285.4 +     PhraseModel_0 = -194.27 +     PhraseModel_1 = -294.83 +     PhraseModel_2 = -92.043 +     PhraseModel_3 = -140.24 +     PhraseModel_4 = +85.613 +     PhraseModel_5 = +238.1 +     PhraseModel_6 = +158.7 +       PassThrough = -359.6          --- -       1best avg score: 0.36222 (+0.094717) - 1best avg model score: -17416 (-7355.5) -           avg # pairs: 661.2 +       1best avg score: 0.37375 (+0.078067) + 1best avg model score: -14519 (+1359.7) +           avg # pairs: 545.4          avg # rank err: 0 (meaningless)       avg # margin viol: 0         k-best loss imp: 100% -    non0 feature count: 2163 +    non0 feature count: 2218             avg list sz: 91.3 -           avg f count: 132.53 -(time 0.33 min, 2 s/S) +           avg f count: 137.77 +(time 0.35 min, 2.1 s/S)  Writing weights file to '-' ...  done  --- -Best iteration: 3 [SCORE 'fixed_stupid_bleu'=0.36222]. -This took 1.0833 min. +Best iteration: 3 [SCORE 'fixed_stupid_bleu'=0.37375]. +This took 1.05 min. diff --git a/training/dtrain/examples/toy/cdec.ini b/training/dtrain/examples/toy/cdec.ini index b14f4819..e6c19abe 100644 --- a/training/dtrain/examples/toy/cdec.ini +++ b/training/dtrain/examples/toy/cdec.ini @@ -1,3 +1,4 @@  formalism=scfg  add_pass_through_rules=true  grammar=grammar.gz +#add_extra_pass_through_features=6 diff --git a/training/dtrain/examples/toy/dtrain.ini b/training/dtrain/examples/toy/dtrain.ini index cd715f26..ef956df7 100644 --- a/training/dtrain/examples/toy/dtrain.ini +++ b/training/dtrain/examples/toy/dtrain.ini @@ -2,7 +2,7 @@ decoder_config=cdec.ini  input=src  refs=tgt  output=- -print_weights=logp shell_rule house_rule small_rule little_rule PassThrough +print_weights=logp shell_rule house_rule small_rule little_rule PassThrough PassThrough_1 PassThrough_2 PassThrough_3 PassThrough_4 PassThrough_5 PassThrough_6  k=4  N=4  epochs=2 diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index 60ca9422..82600009 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -26,7 +26,6 @@ opts = Trollop::options do  end  usage if not opts[:config]&&opts[:shards]&&opts[:input]&&opts[:references] -  dtrain_dir = File.expand_path File.dirname(__FILE__)  if not opts[:dtrain_binary]    dtrain_bin = "#{dtrain_dir}/dtrain" @@ -56,6 +55,7 @@ refs  = opts[:references]  use_qsub       = opts[:qsub]  shards_at_once = opts[:processes_at_once]  first_input_weights  = opts[:first_input_weights] +opts[:extra_qsub] = "-l #{opts[:extra_qsub]}" if opts[:extra_qsub]!=""  `mkdir work` @@ -64,8 +64,9 @@ def make_shards(input, refs, num_shards, epoch, rand)    index = (0..lc-1).to_a    index.reverse!    index.shuffle! if rand -  shard_sz = lc / num_shards -  leftover = lc % num_shards +  shard_sz = (lc / num_shards.to_f).round 0 +  leftover = lc - (num_shards*shard_sz) +  leftover = 0 if leftover < 0    in_f = File.new input, 'r'    in_lines = in_f.readlines    refs_f = File.new refs, 'r' @@ -74,7 +75,10 @@ def make_shards(input, refs, num_shards, epoch, rand)    shard_refs_files = []    in_fns = []    refs_fns = [] +  new_num_shards = 0    0.upto(num_shards-1) { |shard| +    break if index.size==0 +    new_num_shards += 1      in_fn = "work/shard.#{shard}.#{epoch}.in"      shard_in = File.new in_fn, 'w+'      in_fns << in_fn @@ -98,7 +102,7 @@ def make_shards(input, refs, num_shards, epoch, rand)    (shard_in_files + shard_refs_files).each do |f| f.close end    in_f.close    refs_f.close -  return [in_fns, refs_fns] +  return in_fns, refs_fns, new_num_shards  end  input_files = [] @@ -111,7 +115,7 @@ if predefined_shards    end    num_shards = input_files.size  else -  input_files, refs_files = make_shards input, refs, num_shards, 0, rand +  input_files, refs_files, num_shards = make_shards input, refs, num_shards, 0, rand  end  0.upto(epochs-1) { |epoch| @@ -158,7 +162,7 @@ end    `#{cat} work/weights.*.#{epoch} > work/weights_cat`    `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat > work/weights.#{epoch}`    if rand and reshard and epoch+1!=epochs -    input_files, refs_files = make_shards input, refs, num_shards, epoch+1, rand +    input_files, refs_files, num_shards = make_shards input, refs, num_shards, epoch+1, rand    end  } diff --git a/training/mira/kbest_cut_mira.cc b/training/mira/kbest_cut_mira.cc index 56206593..724b1853 100644 --- a/training/mira/kbest_cut_mira.cc +++ b/training/mira/kbest_cut_mira.cc @@ -95,7 +95,8 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {      ("stream,t", "Stream mode (used for realtime)")      ("weights_output,O",po::value<string>(),"Directory to write weights to")      ("output_dir,D",po::value<string>(),"Directory to place output in") -    ("decoder_config,c",po::value<string>(),"Decoder configuration file"); +    ("decoder_config,c",po::value<string>(),"Decoder configuration file") +	("verbose,v",po::value<bool>()->zero_tokens(),"verbose stderr output");    po::options_description clo("Command line options");    clo.add_options()      ("config", po::value<string>(), "Configuration file") @@ -621,6 +622,7 @@ int main(int argc, char** argv) {    vector<string> corpus; +  const bool VERBOSE = conf.count("verbose");    const string metric_name = conf["mt_metric"].as<string>();    optimizer = conf["optimizer"].as<int>();    fear_select = conf["fear"].as<int>(); @@ -783,7 +785,8 @@ int main(int argc, char** argv) {  	  double margin = cur_bad.features.dot(dense_weights) - cur_good.features.dot(dense_weights);  	  double mt_loss = (cur_good.mt_metric - cur_bad.mt_metric);  	  const double loss = margin +  mt_loss; -	  cerr << "LOSS: " << loss << " Margin:" << margin << " BLEUL:" << mt_loss << " " << cur_bad.features.dot(dense_weights) << " " << cur_good.features.dot(dense_weights) <<endl; +	  cerr << "LOSS: " << loss << " Margin:" << margin << " BLEUL:" << mt_loss << endl; +	  if (VERBOSE) cerr << cur_bad.features.dot(dense_weights) << " " << cur_good.features.dot(dense_weights) << endl;  	  if (loss > 0.0 || !checkloss) {  	    SparseVector<double> diff = cur_good.features;  	    diff -= cur_bad.features;	     @@ -920,6 +923,7 @@ int main(int argc, char** argv) {  			lambdas += (cur_pair[1]->features) * step_size;  			lambdas -= (cur_pair[0]->features) * step_size; +			if (VERBOSE) cerr << " Lambdas " << lambdas << endl;  			//reload weights based on update  			dense_weights.clear(); diff --git a/training/mira/mira.py b/training/mira/mira.py index 3e6aa2db..ec9c2d64 100755 --- a/training/mira/mira.py +++ b/training/mira/mira.py @@ -143,6 +143,12 @@ def main():    parser.add_argument('--pass-suffix',                         help='multipass decoding iteration. see documentation '                             'at www.cdec-decoder.org for more information') +  parser.add_argument('--qsub', +                      help='use qsub', action='store_true') +  parser.add_argument('--pmem', +                      help='memory for qsub', type=str, default='5G') +  parser.add_argument('-v', '--verbose', +                      help='more verbose mira optimizers')    args = parser.parse_args()    args.metric = args.metric.upper() @@ -315,6 +321,8 @@ def split_devset(dev, outdir):  def optimize(args, script_dir, dev_size):    parallelize = script_dir+'/../utils/parallelize.pl' +  if args.qsub: +    parallelize += " -p %s"%args.pmem    decoder = script_dir+'/kbest_cut_mira'    (source, refs) = split_devset(args.devset, args.output_dir)    port = random.randint(15000,50000) @@ -353,10 +361,15 @@ def optimize(args, script_dir, dev_size):        decoder_cmd += ' -a'      if not args.no_pseudo:        decoder_cmd += ' -e' +    if args.verbose: +      decoder_cmd += ' -v' -    #always use fork  -    parallel_cmd = '{0} --use-fork -e {1} -j {2} --'.format( -                    parallelize, logdir, args.jobs) +    if args.qsub: +      parallel_cmd = '{0} -e {1} -j {2} --'.format( +                      parallelize, logdir, args.jobs) +    else: +      parallel_cmd = '{0} --use-fork -e {1} -j {2} --'.format( +                      parallelize, logdir, args.jobs)      cmd = parallel_cmd + ' ' + decoder_cmd      logging.info('OPTIMIZATION COMMAND: {}'.format(cmd)) diff --git a/training/pro/pro.pl b/training/pro/pro.pl index 3b30c379..a059477d 100755 --- a/training/pro/pro.pl +++ b/training/pro/pro.pl @@ -79,6 +79,7 @@ if (GetOptions(  	"help" => \$help,  	"reg=f" => \$reg,  	"reg-previous=f" => \$reg_previous, +  "pmem=s" => \$pmem,  	"output-dir=s" => \$dir,  ) == 0 || @ARGV!=0 || $help) {  	print_help(); | 
