6 files changed, 101 insertions, 552 deletions
diff --git a/training/dtrain/Makefile.am b/training/dtrain/Makefile.am
index 3c072ffc..7717ec86 100644
--- a/training/dtrain/Makefile.am
+++ b/training/dtrain/Makefile.am
@@ -1,6 +1,6 @@
 bin_PROGRAMS = dtrain
 
-dtrain_SOURCES = dtrain.cc score.cc dtrain.h sample.h pairs.h score.h
+dtrain_SOURCES = dtrain.cc dtrain.h sample.h pairs.h score.h
 dtrain_LDADD   = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a
 
 AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index 67e16d23..18addcb0 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -30,10 +30,9 @@ dtrain_init(int argc, char** argv, po::variables_map* conf)
     ("gamma",             po::value<weight_t>()->default_value(0.),                            "gamma for SVM (0 for perceptron)")
     ("select_weights",    po::value<string>()->default_value("last"),     "output best, last, avg weights ('VOID' to throw away)")
     ("rescale",           po::value<bool>()->zero_tokens(),                     "(re)scale data and weight vector to unit length")
-    ("l1_reg",            po::value<string>()->default_value("none"),      "apply l1 regularization as in 'Tsuroka et al' (2010)")
+    ("l1_reg",            po::value<string>()->default_value("none"),      "apply l1 regularization with clipping as in 'Tsuroka et al' (2010)")
     ("l1_reg_strength",   po::value<weight_t>(),                                                     "l1 regularization strength")
     ("fselect",           po::value<weight_t>()->default_value(-1), "select top x percent (or by threshold) of features after each epoch NOT IMPLEMENTED") // TODO
-    ("approx_bleu_d",     po::value<score_t>()->default_value(0.9),                                   "discount for approx. BLEU")
     ("loss_margin",       po::value<weight_t>()->default_value(0.),  "update if no error in pref pair but model scores this near")
     ("max_pairs",         po::value<unsigned>()->default_value(std::numeric_limits<unsigned>::max()), "max. # of pairs per Sent.")
     ("pclr",              po::value<string>()->default_value("no"),         "use a (simple|adagrad) per-coordinate learning rate")
@@ -107,13 +106,11 @@ main(int argc, char** argv)
   const unsigned N = conf["N"].as<unsigned>();
   const unsigned T = conf["epochs"].as<unsigned>();
   const unsigned stop_after = conf["stop_after"].as<unsigned>();
-  const string filter_type = conf["filter"].as<string>();
   const string pair_sampling = conf["pair_sampling"].as<string>();
   const score_t pair_threshold = conf["pair_threshold"].as<score_t>();
   const string select_weights = conf["select_weights"].as<string>();
   const string output_ranking = conf["output_ranking"].as<string>();
   const float hi_lo = conf["hi_lo"].as<float>();
-  const score_t approx_bleu_d = conf["approx_bleu_d"].as<score_t>();
   const unsigned max_pairs = conf["max_pairs"].as<unsigned>();
   int repeat = conf["repeat"].as<unsigned>();
   weight_t loss_margin = conf["loss_margin"].as<weight_t>();
@@ -136,39 +133,8 @@ main(int argc, char** argv)
     cerr << setw(25) << "cdec conf " << "'" << conf["decoder_config"].as<string>() << "'" << endl;
   Decoder decoder(ini_rf.stream());
 
-  // scoring metric/scorer
-  string scorer_str = conf["scorer"].as<string>();
-  LocalScorer* scorer;
-  if (scorer_str == "bleu") {
-    scorer = static_cast<BleuScorer*>(new BleuScorer);
-  } else if (scorer_str == "stupid_bleu") {
-    scorer = static_cast<StupidBleuScorer*>(new StupidBleuScorer);
-  } else if (scorer_str == "fixed_stupid_bleu") {
-    scorer = static_cast<FixedStupidBleuScorer*>(new FixedStupidBleuScorer);
-  } else if (scorer_str == "smooth_bleu") {
-    scorer = static_cast<SmoothBleuScorer*>(new SmoothBleuScorer);
-  } else if (scorer_str == "sum_bleu") {
-    scorer = static_cast<SumBleuScorer*>(new SumBleuScorer);
-  } else if (scorer_str == "sumexp_bleu") {
-    scorer = static_cast<SumExpBleuScorer*>(new SumExpBleuScorer);
-  } else if (scorer_str == "sumwhatever_bleu") {
-    scorer = static_cast<SumWhateverBleuScorer*>(new SumWhateverBleuScorer);
-  } else if (scorer_str == "approx_bleu") {
-    scorer = static_cast<ApproxBleuScorer*>(new ApproxBleuScorer(N, approx_bleu_d));
-  } else if (scorer_str == "lc_bleu") {
-    scorer = static_cast<LinearBleuScorer*>(new LinearBleuScorer(N));
-  } else {
-    cerr << "Don't know scoring metric: '" << scorer_str << "', exiting." << endl;
-    exit(1);
-  }
-  vector<score_t> bleu_weights;
-  scorer->Init(N, bleu_weights);
-
   // setup decoder observer
-  MT19937 rng; // random number generator, only for forest sampling
-  HypSampler* observer;
-  observer = static_cast<KBestGetter*>(new KBestGetter(k, filter_type));
-  observer->SetScorer(scorer);
+  ScoredKbest* observer = new ScoredKbest(k, new PerSentenceBleuScorer(N));
 
   // init weights
   vector<weight_t>& decoder_weights = decoder.CurrentWeightVector();
@@ -222,10 +188,6 @@ main(int argc, char** argv)
     cerr << setw(25) << "N " << N << endl;
     cerr << setw(25) << "T " << T << endl;
     cerr << setw(25) << "batch " << batch << endl;
-    cerr << setw(26) << "scorer '" << scorer_str << "'" << endl;
-    if (scorer_str == "approx_bleu")
-      cerr << setw(25) << "approx. B discount " << approx_bleu_d << endl;
-    cerr << setw(25) << "filter " << "'" << filter_type << "'" << endl;
     cerr << setw(25) << "learning rate " << eta << endl;
     cerr << setw(25) << "gamma " << gamma << endl;
     cerr << setw(25) << "loss margin " << loss_margin << endl;
@@ -242,7 +204,6 @@ main(int argc, char** argv)
     cerr << setw(25) << "pclr " << pclr << endl;
     cerr << setw(25) << "max pairs " << max_pairs << endl;
     cerr << setw(25) << "repeat " << repeat << endl;
-    //cerr << setw(25) << "test k-best " << test_k_best << endl;
     cerr << setw(25) << "cdec conf " << "'" << conf["decoder_config"].as<string>() << "'" << endl;
     cerr << setw(25) << "input " << "'" << input_fn << "'" << endl;
     cerr << setw(25) << "output " << "'" << output_fn << "'" << endl;
@@ -321,13 +282,13 @@ main(int argc, char** argv)
         vector<WordID> cur_ref;
         vector<string> tok;
         boost::split(tok, r, boost::is_any_of(" "));
-        register_and_convert(tok, cur_ref);
+        RegisterAndConvert(tok, cur_ref);
         cur_refs.push_back(cur_ref);
       }
       refs_as_ids_buf.push_back(cur_refs);
       src_str_buf.push_back(in);
     }
-    observer->SetRef(refs_as_ids_buf[ii]);
+    observer->SetReference(refs_as_ids_buf[ii]);
     if (t == 0)
       decoder.Decode(in, observer);
     else
@@ -341,7 +302,7 @@ main(int argc, char** argv)
       stringstream ss;
       for (auto s: *samples) {
         ss << ii << " ||| ";
-        printWordIDVec(s.w, ss);
+        PrintWordIDVec(s.w, ss);
         ss << " ||| " << s.model << " ||| " << s.score << endl;
       }
       of.get() << ss.str();
@@ -350,12 +311,12 @@ main(int argc, char** argv)
     if (verbose) {
       cerr << "--- refs for " << ii << ": ";
       for (auto r: refs_as_ids_buf[ii]) {
-        printWordIDVec(r);
+        PrintWordIDVec(r);
         cerr << endl;
       }
       for (unsigned u = 0; u < samples->size(); u++) {
         cerr << _p2 << _np << "[" << u << ". '";
-        printWordIDVec((*samples)[u].w);
+        PrintWordIDVec((*samples)[u].w);
         cerr << "'" << endl;
         cerr << "SCORE=" << (*samples)[u].score << ",model="<< (*samples)[u].model << endl;
         cerr << "F{" << (*samples)[u].f << "} ]" << endl << endl;
@@ -367,8 +328,8 @@ main(int argc, char** argv)
       model_sum += (*samples)[0].model;
     }
 
-    f_count += observer->get_f_count();
-    list_sz += observer->get_sz();
+    f_count += observer->GetFeatureCount();
+    list_sz += observer->GetSize();
 
     // weight updates
     if (!noup) {
@@ -552,8 +513,6 @@ main(int argc, char** argv)
 
   if (average) w_average += lambdas;
 
-  if (scorer_str == "approx_bleu" || scorer_str == "lc_bleu") scorer->Reset();
-
   // print some stats
   score_t score_avg = score_sum/(score_t)in_sz;
   score_t model_avg = model_sum/(score_t)in_sz;
@@ -665,7 +624,7 @@ main(int argc, char** argv)
 
   if (!quiet) {
     cerr << _p5 << _np << endl << "---" << endl << "Best iteration: ";
-    cerr << best_it+1 << " [SCORE '" << scorer_str << "'=" << max_score << "]." << endl;
+    cerr << best_it+1 << " [SCORE = " << max_score << "]." << endl;
     cerr << "This took " << overall_time/60. << " min." << endl;
   }
 }
diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h
index e25c6f24..2b466930 100644
--- a/training/dtrain/dtrain.h
+++ b/training/dtrain/dtrain.h
@@ -15,7 +15,6 @@
 
 #include "decoder.h"
 #include "ff_register.h"
-#include "sampler.h"
 #include "sentence_metadata.h"
 #include "verbose.h"
 #include "viterbi.h"
@@ -26,113 +25,46 @@ namespace po = boost::program_options;
 namespace dtrain
 {
 
-
-inline void register_and_convert(const vector<string>& strs, vector<WordID>& ids)
-{
-  vector<string>::const_iterator it;
-  for (it = strs.begin(); it < strs.end(); it++)
-    ids.push_back(TD::Convert(*it));
-}
-
-inline string gettmpf(const string path, const string infix)
-{
-  char fn[path.size() + infix.size() + 8];
-  strcpy(fn, path.c_str());
-  strcat(fn, "/");
-  strcat(fn, infix.c_str());
-  strcat(fn, "-XXXXXX");
-  if (!mkstemp(fn)) {
-    cerr << "Cannot make temp file in" << path << " , exiting." << endl;
-    exit(1);
-  }
-  return string(fn);
-}
-
 typedef double score_t;
 
 struct ScoredHyp
 {
   vector<WordID> w;
-  SparseVector<double> f;
-  score_t model;
-  score_t score;
+  SparseVector<weight_t> f;
+  score_t model, score;
   unsigned rank;
 };
 
-struct LocalScorer
+inline void
+RegisterAndConvert(const vector<string>& strs, vector<WordID>& ids)
 {
-  unsigned N_;
-  vector<score_t> w_;
-
-  virtual score_t
-  Score(const vector<WordID>& hyp, const vector<vector<WordID> >& ref, const unsigned rank, const unsigned src_len)=0;
-
-  virtual void Reset() {} // only for ApproxBleuScorer, LinearBleuScorer
-
-  inline void
-  Init(unsigned N, vector<score_t> weights)
-  {
-    assert(N > 0);
-    N_ = N;
-    if (weights.empty()) for (unsigned i = 0; i < N_; i++) w_.push_back(1./N_);
-    else w_ = weights;
-  }
-
-  inline score_t
-  brevity_penalty(const unsigned hyp_len, const unsigned ref_len)
-  {
-    if (hyp_len > ref_len) return 1;
-    return exp(1 - (score_t)ref_len/hyp_len);
-  }
-};
-
-struct HypSampler : public DecoderObserver
-{
-  LocalScorer* scorer_;
-  vector<vector<WordID> >* refs_;
-  unsigned f_count_, sz_;
-  virtual vector<ScoredHyp>* GetSamples()=0;
-  inline void SetScorer(LocalScorer* scorer) { scorer_ = scorer; }
-  inline void SetRef(vector<vector<WordID> >& refs) { refs_ = &refs; }
-  inline unsigned get_f_count() { return f_count_; }
-  inline unsigned get_sz() { return sz_; }
-};
+  vector<string>::const_iterator it;
+  for (auto s: strs)
+    ids.push_back(TD::Convert(s));
+}
 
-struct HSReporter
+inline void
+PrintWordIDVec(vector<WordID>& v, ostream& os=cerr)
 {
-  string task_id_;
-
-  HSReporter(string task_id) : task_id_(task_id) {}
-
-  inline void update_counter(string name, unsigned amount) {
-    cerr << "reporter:counter:" << task_id_ << "," << name << "," << amount << endl;
-  }
-  inline void update_gcounter(string name, unsigned amount) {
-    cerr << "reporter:counter:Global," << name << "," << amount << endl;
+  for (unsigned i = 0; i < v.size(); i++) {
+    os << TD::Convert(v[i]);
+    if (i < v.size()-1) os << " ";
   }
-};
+}
 
 inline ostream& _np(ostream& out) { return out << resetiosflags(ios::showpos); }
 inline ostream& _p(ostream& out)  { return out << setiosflags(ios::showpos); }
 inline ostream& _p2(ostream& out) { return out << setprecision(2); }
 inline ostream& _p5(ostream& out) { return out << setprecision(5); }
 
-inline void printWordIDVec(vector<WordID>& v, ostream& os=cerr)
-{
-  for (unsigned i = 0; i < v.size(); i++) {
-    os << TD::Convert(v[i]);
-    if (i < v.size()-1) os << " ";
-  }
-}
-
 template<typename T>
-inline T sign(T z)
+inline T
+sign(T z)
 {
   if (z == 0) return 0;
   return z < 0 ? -1 : +1;
 }
 
-
 } // namespace
 
 #endif
diff --git a/training/dtrain/sample.h b/training/dtrain/sample.h
index 25f02273..64d93cb0 100644
--- a/training/dtrain/sample.h
+++ b/training/dtrain/sample.h
@@ -1,5 +1,5 @@
-#ifndef _DTRAIN_KBESTGET_H_
-#define _DTRAIN_KBESTGET_H_
+#ifndef _DTRAIN_SAMPLE_H_
+#define _DTRAIN_SAMPLE_H_
 
 #include "kbest.h"
 
@@ -7,78 +7,46 @@ namespace dtrain
 {
 
 
-struct KBestGetter : public HypSampler
+struct ScoredKbest : public DecoderObserver
 {
   const unsigned k_;
-  const string filter_type_;
   vector<ScoredHyp> s_;
   unsigned src_len_;
+  PerSentenceBleuScorer* scorer_;
+  vector<vector<WordID> >* refs_;
+  unsigned f_count_, sz_;
 
-  KBestGetter(const unsigned k, const string filter_type) :
-    k_(k), filter_type_(filter_type) {}
+  ScoredKbest(const unsigned k, PerSentenceBleuScorer* scorer) :
+    k_(k), scorer_(scorer) {}
 
   virtual void
   NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg)
   {
     src_len_ = smeta.GetSourceLength();
-    KBestScored(*hg);
-  }
-
-  vector<ScoredHyp>* GetSamples() { return &s_; }
-
-  void
-  KBestScored(const Hypergraph& forest)
-  {
-    if (filter_type_ == "uniq") {
-      KBestUnique(forest);
-    } else if (filter_type_ == "not") {
-      KBestNoFilter(forest);
-    }
-  }
-
-  void
-  KBestUnique(const Hypergraph& forest)
-  {
     s_.clear(); sz_ = f_count_ = 0;
     KBest::KBestDerivations<vector<WordID>, ESentenceTraversal,
-      KBest::FilterUnique, prob_t, EdgeProb> kbest(forest, k_);
+      KBest::FilterUnique, prob_t, EdgeProb> kbest(*hg, k_);
     for (unsigned i = 0; i < k_; ++i) {
       const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal, KBest::FilterUnique,
               prob_t, EdgeProb>::Derivation* d =
-            kbest.LazyKthBest(forest.nodes_.size() - 1, i);
+            kbest.LazyKthBest(hg->nodes_.size() - 1, i);
       if (!d) break;
       ScoredHyp h;
       h.w = d->yield;
       h.f = d->feature_values;
       h.model = log(d->score);
       h.rank = i;
-      h.score = scorer_->Score(h.w, *refs_, i, src_len_);
+      h.score = scorer_->Score(h.w, *refs_);
       s_.push_back(h);
       sz_++;
       f_count_ += h.f.size();
     }
   }
 
-  void
-  KBestNoFilter(const Hypergraph& forest)
-  {
-    s_.clear(); sz_ = f_count_ = 0;
-    KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(forest, k_);
-    for (unsigned i = 0; i < k_; ++i) {
-      const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
-            kbest.LazyKthBest(forest.nodes_.size() - 1, i);
-      if (!d) break;
-      ScoredHyp h;
-      h.w = d->yield;
-      h.f = d->feature_values;
-      h.model = log(d->score);
-      h.rank = i;
-      h.score = scorer_->Score(h.w, *refs_, i, src_len_);
-      s_.push_back(h);
-      sz_++;
-      f_count_ += h.f.size();
-    }
-  }
+  vector<ScoredHyp>* GetSamples() { return &s_; }
+  inline void SetReference(vector<vector<WordID> >& refs) { refs_ = &refs; }
+  inline unsigned GetFeatureCount() { return f_count_; }
+  inline unsigned GetSize() { return sz_; }
 };
 
 
diff --git a/training/dtrain/score.cc b/training/dtrain/score.cc
deleted file mode 100644
index 8a28771f..00000000
--- a/training/dtrain/score.cc
+++ /dev/null
@@ -1,292 +0,0 @@
-#include "score.h"
-
-namespace dtrain
-{
-
-
-/*
- * bleu
- *
- * as in "BLEU: a Method for Automatic Evaluation
- *        of Machine Translation"
- * (Papineni et al. '02)
- *
- * NOTE: 0 if for one n \in {1..N} count is 0
- */
-score_t
-BleuScorer::Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref_len)
-{
-  if (hyp_len == 0 || ref_len == 0) return 0.;
-  unsigned M = N_;
-  vector<score_t> v = w_;
-  if (ref_len < N_) {
-    M = ref_len;
-    for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M);
-  }
-  score_t sum = 0;
-  for (unsigned i = 0; i < M; i++) {
-    if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) return 0.;
-    sum += v[i] * log((score_t)counts.clipped_[i]/counts.sum_[i]);
-  }
-  return brevity_penalty(hyp_len, ref_len) * exp(sum);
-}
-
-size_t
-RefLen(vector<vector<WordID> > refs)
-{
-  size_t ref_len = 0;
-  for (auto r: refs)
-    ref_len = max(ref_len, r.size()); // FIXME
-  return ref_len;
-}
-
-score_t
-BleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
-                  const unsigned /*rank*/, const unsigned /*src_len*/)
-{
-  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
-  if (hyp_len == 0 || ref_len == 0) return 0.;
-  NgramCounts counts = make_ngram_counts(hyp, refs, N_);
-  return Bleu(counts, hyp_len, ref_len);
-}
-
-/*
- * 'stupid' bleu
- *
- * as in "ORANGE: a Method for Evaluating
- *        Automatic Evaluation Metrics
- *        for Machine Translation"
- * (Lin & Och '04)
- *
- * NOTE: 0 iff no 1gram match ('grounded')
- */
-score_t
-StupidBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
-                        const unsigned /*rank*/, const unsigned /*src_len*/)
-{
-  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
-  if (hyp_len == 0 || ref_len == 0) return 0.;
-  NgramCounts counts = make_ngram_counts(hyp, refs, N_);
-  unsigned M = N_;
-  vector<score_t> v = w_;
-  if (ref_len < N_) {
-    M = ref_len;
-    for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M);
-  }
-  score_t sum = 0, add = 0;
-  for (unsigned i = 0; i < M; i++) {
-    if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.;
-    if (i == 1) add = 1;
-    sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add)));
-  }
-  return  brevity_penalty(hyp_len, ref_len) * exp(sum);
-}
-
-/*
- * fixed 'stupid' bleu
- *
- * as in "Optimizing for Sentence-Level BLEU+1
- *        Yields Short Translations"
- * (Nakov et al. '12)
- */
-score_t
-FixedStupidBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
-                        const unsigned /*rank*/, const unsigned /*src_len*/)
-{
-  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
-  if (hyp_len == 0 || ref_len == 0) return 0.;
-  NgramCounts counts = make_ngram_counts(hyp, refs, N_);
-  unsigned M = N_;
-  vector<score_t> v = w_;
-  if (ref_len < N_) {
-    M = ref_len;
-    for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M);
-  }
-  score_t sum = 0, add = 0;
-  for (unsigned i = 0; i < M; i++) {
-    if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.;
-    if (i == 1) add = 1;
-    sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add)));
-  }
-  return  brevity_penalty(hyp_len, ref_len+1) * exp(sum); // <- fix
-}
-
-/*
- * smooth bleu
- *
- * as in "An End-to-End Discriminative Approach
- *        to Machine Translation"
- * (Liang et al. '06)
- *
- * NOTE: max is 0.9375 (with N=4)
- */
-score_t
-SmoothBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
-                        const unsigned /*rank*/, const unsigned /*src_len*/)
-{
-  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
-  if (hyp_len == 0 || ref_len == 0) return 0.;
-  NgramCounts counts = make_ngram_counts(hyp, refs, N_);
-  unsigned M = N_;
-  if (ref_len < N_) M = ref_len;
-  score_t sum = 0.;
-  vector<score_t> i_bleu;
-  for (unsigned i = 0; i < M; i++) i_bleu.push_back(0.);
-  for (unsigned i = 0; i < M; i++) {
-    if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) {
-      break;
-    } else {
-      score_t i_ng = log((score_t)counts.clipped_[i]/counts.sum_[i]);
-      for (unsigned j = i; j < M; j++) {
-        i_bleu[j] += (1/((score_t)j+1)) * i_ng;
-      }
-    }
-    sum += exp(i_bleu[i])/pow(2.0, (double)(N_-i));
-  }
-  return brevity_penalty(hyp_len, ref_len) * sum;
-}
-
-/*
- * 'sum' bleu
- *
- * sum up Ngram precisions
- */
-score_t
-SumBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
-                        const unsigned /*rank*/, const unsigned /*src_len*/)
-{
-  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
-  if (hyp_len == 0 || ref_len == 0) return 0.;
-  NgramCounts counts = make_ngram_counts(hyp, refs, N_);
-  unsigned M = N_;
-  if (ref_len < N_) M = ref_len;
-  score_t sum = 0.;
-  unsigned j = 1;
-  for (unsigned i = 0; i < M; i++) {
-    if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break;
-    sum += ((score_t)counts.clipped_[i]/counts.sum_[i])/pow(2.0, (double) (N_-j+1));
-    j++;
-  }
-  return brevity_penalty(hyp_len, ref_len) * sum;
-}
-
-/*
- * 'sum' (exp) bleu
- *
- * sum up exp(Ngram precisions)
- */
-score_t
-SumExpBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
-                        const unsigned /*rank*/, const unsigned /*src_len*/)
-{
-  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
-  if (hyp_len == 0 || ref_len == 0) return 0.;
-  NgramCounts counts = make_ngram_counts(hyp, refs, N_);
-  unsigned M = N_;
-  if (ref_len < N_) M = ref_len;
-  score_t sum = 0.;
-  unsigned j = 1;
-  for (unsigned i = 0; i < M; i++) {
-    if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break;
-    sum += exp(((score_t)counts.clipped_[i]/counts.sum_[i]))/pow(2.0, (double) (N_-j+1));
-    j++;
-  }
-  return brevity_penalty(hyp_len, ref_len) * sum;
-}
-
-/*
- * 'sum' (whatever) bleu
- *
- * sum up exp(weight * log(Ngram precisions))
- */
-score_t
-SumWhateverBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
-                        const unsigned /*rank*/, const unsigned /*src_len*/)
-{
-  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
-  if (hyp_len == 0 || ref_len == 0) return 0.;
-  NgramCounts counts = make_ngram_counts(hyp, refs, N_);
-  unsigned M = N_;
-  vector<score_t> v = w_;
-  if (ref_len < N_) {
-    M = ref_len;
-    for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M);
-  }
-  score_t sum = 0.;
-  unsigned j = 1;
-  for (unsigned i = 0; i < M; i++) {
-    if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break;
-    sum += exp(v[i] * log(((score_t)counts.clipped_[i]/counts.sum_[i])))/pow(2.0, (double) (N_-j+1));
-    j++;
-  }
-  return brevity_penalty(hyp_len, ref_len) * sum;
-}
-
-/*
- * approx. bleu
- *
- * as in "Online Large-Margin Training of Syntactic
- *        and Structural Translation Features"
- * (Chiang et al. '08)
- *
- * NOTE: Needs some more code in dtrain.cc .
- *       No scaling by src len.
- */
-score_t
-ApproxBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
-                        const unsigned rank, const unsigned src_len)
-{
-  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
-  if (ref_len == 0) return 0.;
-  score_t score = 0.;
-  NgramCounts counts(N_);
-  if (hyp_len > 0) {
-    counts = make_ngram_counts(hyp, refs, N_);
-    NgramCounts tmp = glob_onebest_counts_ + counts;
-    score = Bleu(tmp, hyp_len, ref_len);
-  }
-  if (rank == 0) { // 'context of 1best translations'
-    glob_onebest_counts_ += counts;
-    glob_onebest_counts_ *= discount_;
-    glob_hyp_len_ = discount_ * (glob_hyp_len_ + hyp_len);
-    glob_ref_len_ = discount_ * (glob_ref_len_ + ref_len);
-    glob_src_len_ = discount_ * (glob_src_len_ + src_len);
-  }
-  return score;
-}
-
-/*
- * Linear (Corpus) Bleu
- *
- * as in "Lattice Minimum Bayes-Risk Decoding
- *        for Statistical Machine Translation"
- * (Tromble et al. '08)
- *
- */
-score_t
-LinearBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs,
-                        const unsigned rank, const unsigned /*src_len*/)
-{
-  unsigned hyp_len = hyp.size(), ref_len = RefLen(refs);
-  if (ref_len == 0) return 0.;
-  unsigned M = N_;
-  if (ref_len < N_) M = ref_len;
-  NgramCounts counts(M);
-  if (hyp_len > 0)
-    counts = make_ngram_counts(hyp, refs, M);
-  score_t ret = 0.;
-  for (unsigned i = 0; i < M; i++) {
-    if (counts.sum_[i] == 0 || onebest_counts_.sum_[i] == 0) break;
-    ret += counts.sum_[i]/onebest_counts_.sum_[i];
-  }
-  ret = -(hyp_len/(score_t)onebest_len_) + (1./M) * ret;
-  if (rank == 0) {
-    onebest_len_ += hyp_len;
-    onebest_counts_ += counts;
-  }
-  return ret;
-}
-
-
-} // namespace
-
diff --git a/training/dtrain/score.h b/training/dtrain/score.h
index 62d8f587..c727dd30 100644
--- a/training/dtrain/score.h
+++ b/training/dtrain/score.h
@@ -6,7 +6,6 @@
 namespace dtrain
 {
 
-
 struct NgramCounts
 {
   unsigned N_;
@@ -30,6 +29,7 @@ struct NgramCounts
   {
     NgramCounts result = *this;
     result += other;
+
     return result;
   }
 
@@ -102,7 +102,7 @@ struct NgramCounts
 typedef map<vector<WordID>, unsigned> Ngrams;
 
 inline Ngrams
-make_ngrams(const vector<WordID>& s, const unsigned N)
+MakeNgrams(const vector<WordID>& s, const unsigned N)
 {
   Ngrams ngrams;
   vector<WordID> ng;
@@ -113,21 +113,21 @@ make_ngrams(const vector<WordID>& s, const unsigned N)
       ngrams[ng]++;
     }
   }
+
   return ngrams;
 }
 
 inline NgramCounts
-make_ngram_counts(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned N)
+MakeNgramCounts(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned N)
 {
-  Ngrams hyp_ngrams = make_ngrams(hyp, N);
+  Ngrams hyp_ngrams = MakeNgrams(hyp, N);
   vector<Ngrams> refs_ngrams;
   for (auto r: refs) {
-    Ngrams r_ng = make_ngrams(r, N);
+    Ngrams r_ng = MakeNgrams(r, N);
     refs_ngrams.push_back(r_ng);
   }
   NgramCounts counts(N);
-  Ngrams::iterator it;
-  Ngrams::iterator ti;
+  Ngrams::iterator it, ti;
   for (it = hyp_ngrams.begin(); it != hyp_ngrams.end(); it++) {
     unsigned max_ref_count = 0;
     for (auto ref_ngrams: refs_ngrams) {
@@ -137,90 +137,72 @@ make_ngram_counts(const vector<WordID>& hyp, const vector<vector<WordID> >& refs
     }
     counts.Add(it->second, min(it->second, max_ref_count), it->first.size() - 1);
   }
+
   return counts;
 }
 
-struct BleuScorer : public LocalScorer
-{
-  score_t Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref_len);
-  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/);
-  void Reset() {}
-};
-
-struct StupidBleuScorer : public LocalScorer
-{
-  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/);
-  void Reset() {}
-};
-
-struct FixedStupidBleuScorer : public LocalScorer
+/*
+ * per-sentence BLEU
+ * as in "Optimizing for Sentence-Level BLEU+1
+ *        Yields Short Translations"
+ * (Nakov et al. '12)
+ *
+ * [simply add 1 to reference length for calculation of BP]
+ *
+ */
+
+struct PerSentenceBleuScorer
 {
-  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/);
-  void Reset() {}
-};
-
-struct SmoothBleuScorer : public LocalScorer
-{
-  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/);
-  void Reset() {}
-};
-
-struct SumBleuScorer : public LocalScorer
-{
-  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/);
-  void Reset() {}
-};
-
-struct SumExpBleuScorer : public LocalScorer
-{
-  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/);
-  void Reset() {}
-};
+  const unsigned N_;
+  vector<score_t> w_;
 
-struct SumWhateverBleuScorer : public LocalScorer
-{
-  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/);
-  void Reset() {};
-};
-
-struct ApproxBleuScorer : public BleuScorer
-{
-  NgramCounts glob_onebest_counts_;
-  unsigned glob_hyp_len_, glob_ref_len_, glob_src_len_;
-  score_t discount_;
-
-  ApproxBleuScorer(unsigned N, score_t d) : glob_onebest_counts_(NgramCounts(N)), discount_(d)
+  PerSentenceBleuScorer(unsigned n) : N_(n)
   {
-    glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0;
+    for (auto i = 1; i <= N_; i++)
+      w_.push_back(1.0/N_);
   }
 
-  inline void Reset() {
-    glob_onebest_counts_.Zero();
-    glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0.;
-  }
-
-  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned rank, const unsigned src_len);
-};
-
-struct LinearBleuScorer : public BleuScorer
-{
-  unsigned onebest_len_;
-  NgramCounts onebest_counts_;
-
-  LinearBleuScorer(unsigned N) : onebest_len_(1), onebest_counts_(N)
+  inline score_t
+  BrevityPenalty(const unsigned hyp_len, const unsigned ref_len)
   {
-    onebest_counts_.One();
+    if (hyp_len > ref_len) return 1;
+    return exp(1 - (score_t)ref_len/hyp_len);
   }
 
-  score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned rank, const unsigned /*src_len*/);
-
-  inline void Reset() {
-    onebest_len_ = 1;
-    onebest_counts_.One();
+  score_t
+  Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs)
+  {
+    unsigned hyp_len = hyp.size(), ref_len = 0;
+    // best match reference length
+    if (refs.size() == 1)  {
+      ref_len = refs[0].size();
+    } else {
+      unsigned i = 0, best_idx = 0;
+      unsigned best = std::numeric_limits<unsigned>::max();
+      for (auto r: refs) {
+        unsigned d = abs(hyp_len-r.size());
+        if (best > d) best_idx = i;
+      }
+      ref_len = refs[best_idx].size();
+    }
+    if (hyp_len == 0 || ref_len == 0) return 0.;
+    NgramCounts counts = MakeNgramCounts(hyp, refs, N_);
+    unsigned M = N_;
+    vector<score_t> v = w_;
+    if (ref_len < N_) {
+      M = ref_len;
+      for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M);
+    }
+    score_t sum = 0, add = 0;
+    for (unsigned i = 0; i < M; i++) {
+      if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.;
+      if (i == 1) add = 1;
+      sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add)));
+    }
+    return  BrevityPenalty(hyp_len, ref_len+1) * exp(sum);
   }
 };
 
-
 } // namespace
 
 #endif