From 32dea3f24e56ac7c17343457c48f750f16838742 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <p@simianer.de>
Date: Fri, 23 Jan 2015 15:50:27 +0100
Subject: dtrain: multi-reference BLEU

---
 training/dtrain/dtrain.cc                          |  67 ++++-------
 training/dtrain/dtrain.h                           |   6 +-
 training/dtrain/examples/standard/expected-output  | 123 ---------------------
 .../dtrain/examples/standard/expected-output.gz    | Bin 0 -> 625304 bytes
 training/dtrain/examples/standard/nc-wmt11.de.gz   | Bin 58324 -> 0 bytes
 training/dtrain/examples/standard/nc-wmt11.en.gz   | Bin 49600 -> 0 bytes
 training/dtrain/examples/toy/dtrain.ini            |   3 +-
 training/dtrain/examples/toy/expected-output       |  31 ++++--
 training/dtrain/examples/toy/in                    |   2 +
 training/dtrain/examples/toy/src                   |   2 -
 training/dtrain/examples/toy/tgt                   |   2 -
 training/dtrain/kbestget.h                         |   4 +-
 training/dtrain/ksampler.h                         |   2 +-
 training/dtrain/parallelize.rb                     |  35 ++----
 training/dtrain/score.cc                           |  63 ++++++-----
 training/dtrain/score.h                            |  37 ++++---
 16 files changed, 123 insertions(+), 254 deletions(-)
 delete mode 100644 training/dtrain/examples/standard/expected-output
 create mode 100644 training/dtrain/examples/standard/expected-output.gz
 delete mode 100644 training/dtrain/examples/standard/nc-wmt11.de.gz
 delete mode 100644 training/dtrain/examples/standard/nc-wmt11.en.gz
 create mode 100644 training/dtrain/examples/toy/in
 delete mode 100644 training/dtrain/examples/toy/src
 delete mode 100644 training/dtrain/examples/toy/tgt
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index 823a50de..737326f8 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -12,9 +12,7 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
 {
   po::options_description ini("Configuration File Options");
   ini.add_options()
-    ("input",             po::value<string>(),                                                                 "input file (src)")
-    ("refs,r",            po::value<string>(),                                                                       "references")
-    ("bitext,b",          po::value<string>(),                                                            "bitext: 'src ||| tgt'")
+    ("bitext,b",          po::value<string>(),                                                            "bitext: 'src ||| tgt ||| tgt ||| ...'")
     ("output",            po::value<string>()->default_value("-"),                          "output weights file, '-' for STDOUT")
     ("input_weights",     po::value<string>(),                                "input weights file (e.g. from previous iteration)")
     ("decoder_config",    po::value<string>(),                                                      "configuration file for cdec")
@@ -84,8 +82,8 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
     cerr << "hi_lo must lie in [0.01, 0.5]" << endl;
     return false;
   }
-  if ((cfg->count("input")>0 || cfg->count("refs")>0) && cfg->count("bitext")>0) {
-    cerr << "Provide 'input' and 'refs' or 'bitext', not both." << endl;
+  if (!cfg->count("bitext")) {
+    cerr << "No training data given." << endl;
     return false;
   }
   if ((*cfg)["pair_threshold"].as<score_t>() < 0) {
@@ -221,24 +219,11 @@ main(int argc, char** argv)
   // output
   string output_fn = cfg["output"].as<string>();
   // input
-  bool read_bitext = false;
   string input_fn;
-  if (cfg.count("bitext")) {
-    read_bitext = true;
-    input_fn = cfg["bitext"].as<string>();
-  } else {
-    input_fn = cfg["input"].as<string>();
-  }
-  ReadFile input(input_fn);
+  ReadFile input(cfg["bitext"].as<string>());
   // buffer input for t > 0
   vector<string> src_str_buf;          // source strings (decoder takes only strings)
-  vector<vector<WordID> > ref_ids_buf; // references as WordID vecs
-  ReadFile refs;
-  string refs_fn;
-  if (!read_bitext) {
-    refs_fn = cfg["refs"].as<string>();
-    refs.Init(refs_fn);
-  }
+  vector<vector<vector<WordID> > > refs_as_ids_buf; // references as WordID vecs
 
   unsigned in_sz = std::numeric_limits<unsigned>::max(); // input index, input size
   vector<pair<score_t, score_t> > all_scores;
@@ -280,8 +265,6 @@ main(int argc, char** argv)
     //cerr << setw(25) << "test k-best " << test_k_best << endl;
     cerr << setw(25) << "cdec cfg " << "'" << cfg["decoder_config"].as<string>() << "'" << endl;
     cerr << setw(25) << "input " << "'" << input_fn << "'" << endl;
-    if (!read_bitext)
-      cerr << setw(25) << "refs " << "'" << refs_fn << "'" << endl;
     cerr << setw(25) << "output " << "'" << output_fn << "'" << endl;
     if (cfg.count("input_weights"))
       cerr << setw(25) << "weights in " << "'" << cfg["input_weights"].as<string>() << "'" << endl;
@@ -311,17 +294,13 @@ main(int argc, char** argv)
   {
 
     string in;
-    vector<string> ref;
+    vector<string> refs;
     bool next = false, stop = false; // next iteration or premature stop
     if (t == 0) {
       if(!getline(*input, in)) next = true;
-      if(read_bitext) {
-        vector<string> strs;
-        boost::algorithm::split_regex(strs, in, boost::regex(" \\|\\|\\| "));
-        in = strs[0];
-        strs.erase(strs.begin());
-        ref = strs;
-      }
+        boost::algorithm::split_regex(refs, in, boost::regex(" \\|\\|\\| "));
+        in = refs[0];
+        refs.erase(refs.begin());
     } else {
       if (ii == in_sz) next = true; // stop if we reach the end of our input
     }
@@ -356,20 +335,19 @@ main(int argc, char** argv)
     lambdas.init_vector(&decoder_weights);
 
     // getting input
-    vector<vector<WordID> ref_ids; // reference as vector<WordID>
     if (t == 0) {
-      if (!read_bitext) {
-        getline(*refs, ref);
+      vector<vector<WordID> > cur_refs;
+      for (auto r: refs) {
+        vector<WordID> cur_ref;
+        vector<string> tok;
+        boost::split(tok, r, boost::is_any_of(" "));
+        register_and_convert(tok, cur_ref);
+        cur_refs.push_back(cur_ref);
       }
-      vector<string> ref_tok;
-      boost::split(ref_tok, ref, boost::is_any_of(" "));
-      register_and_convert(ref_tok, ref_ids);
-      ref_ids_buf.push_back(ref_ids);
+      refs_as_ids_buf.push_back(cur_refs);
       src_str_buf.push_back(in);
-    } else {
-      ref_ids = ref_ids_buf[ii];
     }
-    observer->SetRef(ref_ids);
+    observer->SetRef(refs_as_ids_buf[ii]);
     if (t == 0)
       decoder.Decode(in, observer);
     else
@@ -379,10 +357,11 @@ main(int argc, char** argv)
     vector<ScoredHyp>* samples = observer->GetSamples();
 
     if (verbose) {
-      cerr << "--- ref for " << ii << ": ";
-      if (t > 0) printWordIDVec(ref_ids_buf[ii]);
-      else printWordIDVec(ref_ids);
-      cerr << endl;
+      cerr << "--- refs for " << ii << ": ";
+      for (auto r: refs_as_ids_buf[ii]) {
+        printWordIDVec(r);
+        cerr << endl;
+      }
       for (unsigned u = 0; u < samples->size(); u++) {
         cerr << _p2 << _np << "[" << u << ". '";
         printWordIDVec((*samples)[u].w);
diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h
index 07bd9b65..d7980688 100644
--- a/training/dtrain/dtrain.h
+++ b/training/dtrain/dtrain.h
@@ -64,7 +64,7 @@ struct LocalScorer
   vector<score_t> w_;
 
   virtual score_t
-  Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned rank, const unsigned src_len)=0;
+  Score(const vector<WordID>& hyp, const vector<vector<WordID> >& ref, const unsigned rank, const unsigned src_len)=0;
 
   virtual void Reset() {} // only for ApproxBleuScorer, LinearBleuScorer
 
@@ -88,11 +88,11 @@ struct LocalScorer
 struct HypSampler : public DecoderObserver
 {
   LocalScorer* scorer_;
-  vector<WordID>* ref_;
+  vector<vector<WordID> >* refs_;
   unsigned f_count_, sz_;
   virtual vector<ScoredHyp>* GetSamples()=0;
   inline void SetScorer(LocalScorer* scorer) { scorer_ = scorer; }
-  inline void SetRef(vector<WordID>& ref) { ref_ = &ref; }
+  inline void SetRef(vector<vector<WordID> >& refs) { refs_ = &refs; }
   inline unsigned get_f_count() { return f_count_; }
   inline unsigned get_sz() { return sz_; }
 };
diff --git a/training/dtrain/examples/standard/expected-output b/training/dtrain/examples/standard/expected-output
deleted file mode 100644
index 2460cfbb..00000000
--- a/training/dtrain/examples/standard/expected-output
+++ /dev/null
@@ -1,123 +0,0 @@
-                cdec cfg './cdec.ini'
-Loading the LM will be faster if you build a binary file.
-Reading ./nc-wmt11.en.srilm.gz
-----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
-****************************************************************************************************
-  Example feature: Shape_S00000_T00000
-T=1 I=1 D=1
-Seeding random number sequence to 2327685089
-
-dtrain
-Parameters:
-                       k 100
-                       N 4
-                       T 3
-                   batch 0
-                  scorer 'fixed_stupid_bleu'
-             sample from 'kbest'
-                  filter 'uniq'
-           learning rate 0.1
-                   gamma 0
-             loss margin 0
-       faster perceptron 1
-                   pairs 'XYX'
-                   hi lo 0.1
-          pair threshold 0
-          select weights 'avg'
-                  l1 reg 0 'none'
-                    pclr no
-               max pairs 4294967295
-                  repeat 1
-                cdec cfg './cdec.ini'
-                   input './nc-wmt11.gz'
-                  output '-'
-              stop_after 10
-(a dot represents 10 inputs)
-Iteration #1 of 3.
- . 10
-Stopping after 10 input sentences.
-WEIGHTS
-              Glue = +6.9
-       WordPenalty = -46.426
-     LanguageModel = +535.12
- LanguageModel_OOV = -123.5
-     PhraseModel_0 = -160.73
-     PhraseModel_1 = -350.13
-     PhraseModel_2 = -187.81
-     PhraseModel_3 = +172.04
-     PhraseModel_4 = +0.90108
-     PhraseModel_5 = +21.6
-     PhraseModel_6 = +67.2
-       PassThrough = -149.7
-        ---
-       1best avg score: 0.23327 (+0.23327)
- 1best avg model score: -9084.9 (-9084.9)
-           avg # pairs: 780.7
-        avg # rank err: 0 (meaningless)
-     avg # margin viol: 0
-       k-best loss imp: 100%
-    non0 feature count: 1389
-           avg list sz: 91.3
-           avg f count: 146.2
-(time 0.37 min, 2.2 s/S)
-
-Iteration #2 of 3.
- . 10
-WEIGHTS
-              Glue = -43
-       WordPenalty = -22.019
-     LanguageModel = +591.53
- LanguageModel_OOV = -252.1
-     PhraseModel_0 = -120.21
-     PhraseModel_1 = -43.589
-     PhraseModel_2 = +73.53
-     PhraseModel_3 = +113.7
-     PhraseModel_4 = -223.81
-     PhraseModel_5 = +64
-     PhraseModel_6 = +54.8
-       PassThrough = -331.1
-        ---
-       1best avg score: 0.29568 (+0.062413)
- 1best avg model score: -15879 (-6794.1)
-           avg # pairs: 566.1
-        avg # rank err: 0 (meaningless)
-     avg # margin viol: 0
-       k-best loss imp: 100%
-    non0 feature count: 1931
-           avg list sz: 91.3
-           avg f count: 139.89
-(time 0.33 min, 2 s/S)
-
-Iteration #3 of 3.
- . 10
-WEIGHTS
-              Glue = -44.3
-       WordPenalty = -131.85
-     LanguageModel = +230.91
- LanguageModel_OOV = -285.4
-     PhraseModel_0 = -194.27
-     PhraseModel_1 = -294.83
-     PhraseModel_2 = -92.043
-     PhraseModel_3 = -140.24
-     PhraseModel_4 = +85.613
-     PhraseModel_5 = +238.1
-     PhraseModel_6 = +158.7
-       PassThrough = -359.6
-        ---
-       1best avg score: 0.37375 (+0.078067)
- 1best avg model score: -14519 (+1359.7)
-           avg # pairs: 545.4
-        avg # rank err: 0 (meaningless)
-     avg # margin viol: 0
-       k-best loss imp: 100%
-    non0 feature count: 2218
-           avg list sz: 91.3
-           avg f count: 137.77
-(time 0.35 min, 2.1 s/S)
-
-Writing weights file to '-' ...
-done
-
----
-Best iteration: 3 [SCORE 'fixed_stupid_bleu'=0.37375].
-This took 1.05 min.
diff --git a/training/dtrain/examples/standard/expected-output.gz b/training/dtrain/examples/standard/expected-output.gz
new file mode 100644
index 00000000..f93a253e
Binary files /dev/null and b/training/dtrain/examples/standard/expected-output.gz differ
diff --git a/training/dtrain/examples/standard/nc-wmt11.de.gz b/training/dtrain/examples/standard/nc-wmt11.de.gz
deleted file mode 100644
index 0741fd92..00000000
Binary files a/training/dtrain/examples/standard/nc-wmt11.de.gz and /dev/null differ
diff --git a/training/dtrain/examples/standard/nc-wmt11.en.gz b/training/dtrain/examples/standard/nc-wmt11.en.gz
deleted file mode 100644
index 1c0bd401..00000000
Binary files a/training/dtrain/examples/standard/nc-wmt11.en.gz and /dev/null differ
diff --git a/training/dtrain/examples/toy/dtrain.ini b/training/dtrain/examples/toy/dtrain.ini
index ef956df7..70c7331c 100644
--- a/training/dtrain/examples/toy/dtrain.ini
+++ b/training/dtrain/examples/toy/dtrain.ini
@@ -1,6 +1,5 @@
 decoder_config=cdec.ini
-input=src
-refs=tgt
+bitext=in
 output=-
 print_weights=logp shell_rule house_rule small_rule little_rule PassThrough PassThrough_1 PassThrough_2 PassThrough_3 PassThrough_4 PassThrough_5 PassThrough_6
 k=4
diff --git a/training/dtrain/examples/toy/expected-output b/training/dtrain/examples/toy/expected-output
index 1da2aadd..fbee24e3 100644
--- a/training/dtrain/examples/toy/expected-output
+++ b/training/dtrain/examples/toy/expected-output
@@ -1,26 +1,29 @@
 Warning: hi_lo only works with pair_sampling XYX.
                 cdec cfg 'cdec.ini'
-Seeding random number sequence to 1664825829
+Seeding random number sequence to 3626026233
 
 dtrain
 Parameters:
                        k 4
                        N 4
                        T 2
+                   batch 0
                   scorer 'bleu'
              sample from 'kbest'
                   filter 'uniq'
            learning rate 1
                    gamma 0
              loss margin 0
+       faster perceptron 1
                    pairs 'all'
           pair threshold 0
           select weights 'last'
                   l1 reg 0 'none'
+                    pclr no
                max pairs 4294967295
+                  repeat 1
                 cdec cfg 'cdec.ini'
-                   input 'src'
-                    refs 'tgt'
+                   input ''
                   output '-'
 (a dot represents 10 inputs)
 Iteration #1 of 2.
@@ -32,12 +35,19 @@ WEIGHTS
         small_rule = -2
        little_rule = +3
        PassThrough = -5
+     PassThrough_1 = +0
+     PassThrough_2 = +0
+     PassThrough_3 = +0
+     PassThrough_4 = +0
+     PassThrough_5 = +0
+     PassThrough_6 = +0
         ---
        1best avg score: 0.5 (+0.5)
  1best avg model score: 2.5 (+2.5)
-           avg # pairs: 4
-        avg # rank err: 1.5
+           avg # pairs: 1.5
+        avg # rank err: 1.5 (meaningless)
      avg # margin viol: 0
+       k-best loss imp: 100%
     non0 feature count: 6
            avg list sz: 4
            avg f count: 2.875
@@ -52,12 +62,19 @@ WEIGHTS
         small_rule = -2
        little_rule = +3
        PassThrough = -5
+     PassThrough_1 = +0
+     PassThrough_2 = +0
+     PassThrough_3 = +0
+     PassThrough_4 = +0
+     PassThrough_5 = +0
+     PassThrough_6 = +0
         ---
        1best avg score: 1 (+0.5)
  1best avg model score: 5 (+2.5)
-           avg # pairs: 5
-        avg # rank err: 0
+           avg # pairs: 0
+        avg # rank err: 0 (meaningless)
      avg # margin viol: 0
+       k-best loss imp: 100%
     non0 feature count: 6
            avg list sz: 4
            avg f count: 3
diff --git a/training/dtrain/examples/toy/in b/training/dtrain/examples/toy/in
new file mode 100644
index 00000000..5d70795d
--- /dev/null
+++ b/training/dtrain/examples/toy/in
@@ -0,0 +1,2 @@
+ich sah ein kleines haus ||| i saw a little house
+ich fand ein kleines haus ||| i found a little house
diff --git a/training/dtrain/examples/toy/src b/training/dtrain/examples/toy/src
deleted file mode 100644
index 87e39ef2..00000000
--- a/training/dtrain/examples/toy/src
+++ /dev/null
@@ -1,2 +0,0 @@
-ich sah ein kleines haus
-ich fand ein kleines haus
diff --git a/training/dtrain/examples/toy/tgt b/training/dtrain/examples/toy/tgt
deleted file mode 100644
index 174926b3..00000000
--- a/training/dtrain/examples/toy/tgt
+++ /dev/null
@@ -1,2 +0,0 @@
-i saw a little house
-i found a little house
diff --git a/training/dtrain/kbestget.h b/training/dtrain/kbestget.h
index 85252db3..25f02273 100644
--- a/training/dtrain/kbestget.h
+++ b/training/dtrain/kbestget.h
@@ -52,7 +52,7 @@ struct KBestGetter : public HypSampler
       h.f = d->feature_values;
       h.model = log(d->score);
       h.rank = i;
-      h.score = scorer_->Score(h.w, *ref_, i, src_len_);
+      h.score = scorer_->Score(h.w, *refs_, i, src_len_);
       s_.push_back(h);
       sz_++;
       f_count_ += h.f.size();
@@ -73,7 +73,7 @@ struct KBestGetter : public HypSampler
       h.f = d->feature_values;
       h.model = log(d->score);
       h.rank = i;
-      h.score = scorer_->Score(h.w, *ref_, i, src_len_);
+      h.score = scorer_->Score(h.w, *refs_, i, src_len_);
       s_.push_back(h);
       sz_++;
       f_count_ += h.f.size();
diff --git a/training/dtrain/ksampler.h b/training/dtrain/ksampler.h
index 29dab667..9eedc74f 100644
--- a/training/dtrain/ksampler.h
+++ b/training/dtrain/ksampler.h
@@ -43,7 +43,7 @@ struct KSampler : public HypSampler
       h.f = samples[i].fmap;
       h.model = log(samples[i].model_score);
       h.rank = i;
-      h.score = scorer_->Score(h.w, *ref_, i, src_len_);
+      h.score = scorer_->Score(h.w, *refs_, i, src_len_);
       s_.push_back(h);
       sz_++;
       f_count_ += h.f.size();
diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index 5fc8b04e..fe3a6cf5 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -1,10 +1,11 @@
 #!/usr/bin/env ruby
 
 require 'trollop'
+require 'zipf'
 
 def usage
   STDERR.write "Usage: "
-  STDERR.write "ruby parallelize.rb -c <dtrain.ini> [-e <epochs=10>] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p <at once=9999>] -i <input> -r <refs> [--qsub/-q] [--dtrain_binary <path to dtrain binary>] [-l \"l2 select_k 100000\"] [--extra_qsub \"-l mem_free=24G\"]\n"
+  STDERR.write "ruby parallelize.rb -c <dtrain.ini> [-e <epochs=10>] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p <at once=9999>] -i <input> [--qsub/-q] [--dtrain_binary <path to dtrain binary>] [-l \"l2 select_k 100000\"] [--extra_qsub \"-l mem_free=24G\"]\n"
   exit 1
 end
 
@@ -16,15 +17,14 @@ opts = Trollop::options do
   opt :reshard, "reshard after each epoch", :type => :bool, :short => '-y', :default => false
   opt :shards, "number of shards", :type => :int
   opt :processes_at_once, "have this number (max) running at the same time", :type => :int, :default => 9999
-  opt :input, "input", :type => :string
-  opt :references, "references", :type => :string
+  opt :input, "input (bitext f ||| e ||| ...)", :type => :string
   opt :qsub, "use qsub", :type => :bool, :default => false
   opt :dtrain_binary, "path to dtrain binary", :type => :string
   opt :extra_qsub, "extra qsub args", :type => :string, :default => ""
   opt :per_shard_decoder_configs, "give special decoder config per shard", :type => :string, :short => '-o'
   opt :first_input_weights, "input weights for first iter", :type => :string, :default => '', :short => '-w'
 end
-usage if not opts[:config]&&opts[:shards]&&opts[:input]&&opts[:references]
+usage if not opts[:config]&&opts[:shards]&&opts[:input]
 
 dtrain_dir = File.expand_path File.dirname(__FILE__)
 if not opts[:dtrain_binary]
@@ -51,7 +51,6 @@ else
   num_shards = opts[:shards]
 end
 input = opts[:input]
-refs  = opts[:references]
 use_qsub       = opts[:qsub]
 shards_at_once = opts[:processes_at_once]
 first_input_weights  = opts[:first_input_weights]
@@ -59,7 +58,7 @@ opts[:extra_qsub] = "-l #{opts[:extra_qsub]}" if opts[:extra_qsub]!=""
 
 `mkdir work`
 
-def make_shards(input, refs, num_shards, epoch, rand)
+def make_shards(input, num_shards, epoch, rand)
   lc = `wc -l #{input}`.split.first.to_i
   index = (0..lc-1).to_a
   index.reverse!
@@ -69,12 +68,8 @@ def make_shards(input, refs, num_shards, epoch, rand)
   leftover = 0 if leftover < 0
   in_f = File.new input, 'r'
   in_lines = in_f.readlines
-  refs_f = File.new refs, 'r'
-  refs_lines = refs_f.readlines
   shard_in_files = []
-  shard_refs_files = []
   in_fns = []
-  refs_fns = []
   new_num_shards = 0
   0.upto(num_shards-1) { |shard|
     break if index.size==0
@@ -82,41 +77,32 @@ def make_shards(input, refs, num_shards, epoch, rand)
     in_fn = "work/shard.#{shard}.#{epoch}.in"
     shard_in = File.new in_fn, 'w+'
     in_fns << in_fn
-    refs_fn = "work/shard.#{shard}.#{epoch}.refs"
-    shard_refs = File.new refs_fn, 'w+'
-    refs_fns << refs_fn
     0.upto(shard_sz-1) { |i|
       j = index.pop
       break if !j
       shard_in.write in_lines[j]
-      shard_refs.write refs_lines[j]
     }
     shard_in_files << shard_in
-    shard_refs_files << shard_refs
   }
   while leftover > 0
     j = index.pop
     shard_in_files[-1].write in_lines[j]
-    shard_refs_files[-1].write refs_lines[j]
     leftover -= 1
   end
-  (shard_in_files + shard_refs_files).each do |f| f.close end
+  shard_in_files.each do |f| f.close end
   in_f.close
-  refs_f.close
-  return in_fns, refs_fns, new_num_shards
+  return in_fns, new_num_shards
 end
 
 input_files = []
-refs_files = []
 if predefined_shards
   input_files = File.new(input).readlines.map {|i| i.strip }
-  refs_files = File.new(refs).readlines.map {|i| i.strip }
   if per_shard_decoder_configs
     decoder_configs = File.new(opts[:per_shard_decoder_configs]).readlines.map {|i| i.strip}
   end
   num_shards = input_files.size
 else
-  input_files, refs_files, num_shards = make_shards input, refs, num_shards, 0, rand
+  input_files, num_shards = make_shards input, num_shards, 0, rand
 end
 
 0.upto(epochs-1) { |epoch|
@@ -149,8 +135,7 @@ end
       end
       pids << Kernel.fork {
         `#{qsub_str_start}#{dtrain_bin} -c #{ini} #{cdec_cfg} #{input_weights}\
-          --input #{input_files[shard]}\
-          --refs #{refs_files[shard]}\
+          --bitext #{input_files[shard]}\
           --output work/weights.#{shard}.#{epoch}#{qsub_str_end} #{local_end}`
       }
       weights_files << "work/weights.#{shard}.#{epoch}"
@@ -163,7 +148,7 @@ end
   `#{cat} work/weights.*.#{epoch} > work/weights_cat`
   `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat > work/weights.#{epoch}`
   if rand and reshard and epoch+1!=epochs
-    input_files, refs_files, num_shards = make_shards input, refs, num_shards, epoch+1, rand
+    input_files, num_shards = make_shards input, num_shards, epoch+1, rand
   end
 }
 
diff --git a/training/dtrain/score.cc b/training/dtrain/score.cc
index 127f34d2..d81eafcb 100644
--- a/training/dtrain/score.cc
+++ b/training/dtrain/score.cc
@@ -31,13 +31,22 @@ BleuScorer::Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref
   return brevity_penalty(hyp_len, ref_len) * exp(sum);
 }
 
+size_t
+RefLen(vector<vector<WordID> > refs)
+{
+  size_t ref_len = 0;
+  for (auto r: refs)
+    ref_len = max(ref_len, r.size());
+  return ref_len;
+}
+
 score_t
-BleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
+BleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
                   const unsigned /*rank*/, const unsigned /*src_len*/)
 {
-  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
   if (hyp_len == 0 || ref_len == 0) return 0.;
-  NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+  NgramCounts counts = make_ngram_counts(hyp, refs, N_);
   return Bleu(counts, hyp_len, ref_len);
 }
 
@@ -52,12 +61,12 @@ BleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
  * NOTE: 0 iff no 1gram match ('grounded')
  */
 score_t
-StupidBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
+StupidBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
                         const unsigned /*rank*/, const unsigned /*src_len*/)
 {
-  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
   if (hyp_len == 0 || ref_len == 0) return 0.;
-  NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+  NgramCounts counts = make_ngram_counts(hyp, refs, N_);
   unsigned M = N_;
   vector<score_t> v = w_;
   if (ref_len < N_) {
@@ -81,12 +90,12 @@ StupidBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
  * (Nakov et al. '12)
  */
 score_t
-FixedStupidBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
+FixedStupidBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
                         const unsigned /*rank*/, const unsigned /*src_len*/)
 {
-  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
   if (hyp_len == 0 || ref_len == 0) return 0.;
-  NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+  NgramCounts counts = make_ngram_counts(hyp, refs, N_);
   unsigned M = N_;
   vector<score_t> v = w_;
   if (ref_len < N_) {
@@ -112,12 +121,12 @@ FixedStupidBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& re
  * NOTE: max is 0.9375 (with N=4)
  */
 score_t
-SmoothBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
+SmoothBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
                         const unsigned /*rank*/, const unsigned /*src_len*/)
 {
-  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
   if (hyp_len == 0 || ref_len == 0) return 0.;
-  NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+  NgramCounts counts = make_ngram_counts(hyp, refs, N_);
   unsigned M = N_;
   if (ref_len < N_) M = ref_len;
   score_t sum = 0.;
@@ -143,12 +152,12 @@ SmoothBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
  * sum up Ngram precisions
  */
 score_t
-SumBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
+SumBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
                         const unsigned /*rank*/, const unsigned /*src_len*/)
 {
-  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
   if (hyp_len == 0 || ref_len == 0) return 0.;
-  NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+  NgramCounts counts = make_ngram_counts(hyp, refs, N_);
   unsigned M = N_;
   if (ref_len < N_) M = ref_len;
   score_t sum = 0.;
@@ -167,12 +176,12 @@ SumBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
  * sum up exp(Ngram precisions)
  */
 score_t
-SumExpBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
+SumExpBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
                         const unsigned /*rank*/, const unsigned /*src_len*/)
 {
-  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
   if (hyp_len == 0 || ref_len == 0) return 0.;
-  NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+  NgramCounts counts = make_ngram_counts(hyp, refs, N_);
   unsigned M = N_;
   if (ref_len < N_) M = ref_len;
   score_t sum = 0.;
@@ -191,12 +200,12 @@ SumExpBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
  * sum up exp(weight * log(Ngram precisions))
  */
 score_t
-SumWhateverBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
+SumWhateverBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
                         const unsigned /*rank*/, const unsigned /*src_len*/)
 {
-  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
   if (hyp_len == 0 || ref_len == 0) return 0.;
-  NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+  NgramCounts counts = make_ngram_counts(hyp, refs, N_);
   unsigned M = N_;
   vector<score_t> v = w_;
   if (ref_len < N_) {
@@ -224,15 +233,15 @@ SumWhateverBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& re
  *       No scaling by src len.
  */
 score_t
-ApproxBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
+ApproxBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
                         const unsigned rank, const unsigned src_len)
 {
-  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
   if (ref_len == 0) return 0.;
   score_t score = 0.;
   NgramCounts counts(N_);
   if (hyp_len > 0) {
-    counts = make_ngram_counts(hyp, ref, N_);
+    counts = make_ngram_counts(hyp, refs, N_);
     NgramCounts tmp = glob_onebest_counts_ + counts;
     score = Bleu(tmp, hyp_len, ref_len);
   }
@@ -255,16 +264,16 @@ ApproxBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
  *
  */
 score_t
-LinearBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
+LinearBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
                         const unsigned rank, const unsigned /*src_len*/)
 {
-  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
   if (ref_len == 0) return 0.;
   unsigned M = N_;
   if (ref_len < N_) M = ref_len;
   NgramCounts counts(M);
   if (hyp_len > 0)
-    counts = make_ngram_counts(hyp, ref, M);
+    counts = make_ngram_counts(hyp, refs, M);
   score_t ret = 0.;
   for (unsigned i = 0; i < M; i++) {
     if (counts.sum_[i] == 0 || onebest_counts_.sum_[i] == 0) break;
diff --git a/training/dtrain/score.h b/training/dtrain/score.h
index 1cdd3fa9..7d88cb61 100644
--- a/training/dtrain/score.h
+++ b/training/dtrain/score.h
@@ -117,20 +117,25 @@ make_ngrams(const vector<WordID>& s, const unsigned N)
 }
 
 inline NgramCounts
-make_ngram_counts(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned N)
+make_ngram_counts(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned N)
 {
   Ngrams hyp_ngrams = make_ngrams(hyp, N);
-  Ngrams ref_ngrams = make_ngrams(ref, N);
+  vector<Ngrams> refs_ngrams;
+  for (auto r: refs) {
+    Ngrams r_ng = make_ngrams(r, N);
+    refs_ngrams.push_back(r_ng);
+  }
   NgramCounts counts(N);
   Ngrams::iterator it;
   Ngrams::iterator ti;
   for (it = hyp_ngrams.begin(); it != hyp_ngrams.end(); it++) {
-    ti = ref_ngrams.find(it->first);
-    if (ti != ref_ngrams.end()) {
-      counts.Add(it->second, ti->second, it->first.size() - 1);
-    } else {
-      counts.Add(it->second, 0, it->first.size() - 1);
+    unsigned max_ref_count = 0;
+    for (auto ref_ngrams: refs_ngrams) {
+      ti = ref_ngrams.find(it->first);
+      if (ti != ref_ngrams.end())
+        max_ref_count = max(max_ref_count, ti->second);
     }
+    counts.Add(it->second, max_ref_count, it->first.size() - 1);
   }
   return counts;
 }
@@ -138,43 +143,43 @@ make_ngram_counts(const vector<WordID>& hyp, const vector<WordID>& ref, const un
 struct BleuScorer : public LocalScorer
 {
   score_t Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref_len);
-  score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/);
   void Reset() {}
 };
 
 struct StupidBleuScorer : public LocalScorer
 {
-  score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/);
   void Reset() {}
 };
 
 struct FixedStupidBleuScorer : public LocalScorer
 {
-  score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/);
   void Reset() {}
 };
 
 struct SmoothBleuScorer : public LocalScorer
 {
-  score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/);
   void Reset() {}
 };
 
 struct SumBleuScorer : public LocalScorer
 {
-  score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/);
   void Reset() {}
 };
 
 struct SumExpBleuScorer : public LocalScorer
 {
-  score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/);
   void Reset() {}
 };
 
 struct SumWhateverBleuScorer : public LocalScorer
 {
-  score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/);
   void Reset() {};
 };
 
@@ -194,7 +199,7 @@ struct ApproxBleuScorer : public BleuScorer
     glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0.;
   }
 
-  score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned rank, const unsigned src_len);
+  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned rank, const unsigned src_len);
 };
 
 struct LinearBleuScorer : public BleuScorer
@@ -207,7 +212,7 @@ struct LinearBleuScorer : public BleuScorer
     onebest_counts_.One();
   }
 
-  score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned rank, const unsigned /*src_len*/);
+  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned rank, const unsigned /*src_len*/);
 
   inline void Reset() {
     onebest_len_ = 1;
-- 
cgit v1.2.3