5 files changed, 45 insertions, 68 deletions
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc
index 3dee10f2..e817e7ab 100644
--- a/dtrain/dtrain.cc
+++ b/dtrain/dtrain.cc
@@ -30,7 +30,6 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
     ("rescale",           po::value<bool>()->zero_tokens(),                           "rescale weight vector after each input")
     ("l1_reg",            po::value<string>()->default_value("none"),   "apply l1 regularization as in 'Tsuroka et al' (2010)")
     ("l1_reg_strength",   po::value<weight_t>(),                                                  "l1 regularization strength")
-    ("inc_correct",       po::value<bool>()->zero_tokens(),                      "include correctly ranked pairs into updates")
     ("fselect",           po::value<weight_t>()->default_value(-1),   "TODO select top x percent of features after each epoch")
     ("approx_bleu_d",     po::value<score_t>()->default_value(0.9),                                "discount for approx. BLEU")
 #ifdef DTRAIN_LOCAL
@@ -122,9 +121,6 @@ main(int argc, char** argv)
   HSReporter rep(task_id);
   bool keep = false;
   if (cfg.count("keep")) keep = true;
-  bool inc_correct = false;
-  if (cfg.count("inc_correct"))
-    inc_correct = true;
 
   const unsigned k = cfg["k"].as<unsigned>();
   const unsigned N = cfg["N"].as<unsigned>();
@@ -226,7 +222,6 @@ main(int argc, char** argv)
   score_t max_score = 0.;
   unsigned best_it = 0;
   float overall_time = 0.;
-  unsigned pair_count = 0, feature_count = 0;
 
   // output cfg
   if (!quiet) {
@@ -250,8 +245,6 @@ main(int argc, char** argv)
     cerr << setw(25) << "select weights " << "'" << select_weights << "'" << endl;
     if (cfg.count("l1_reg"))
       cerr << setw(25) << "l1 reg " << l1_reg << " '" << cfg["l1_reg"].as<string>() << "'" << endl;
-    if (inc_correct)
-      cerr << setw(25) << "inc. correct " << inc_correct << endl;
     if (rescale)
       cerr << setw(25) << "rescale " << rescale << endl;
     cerr << setw(25) << "cdec cfg " << "'" << cfg["decoder_config"].as<string>() << "'" << endl;
@@ -420,36 +413,18 @@ main(int argc, char** argv)
       if (pair_sampling == "PRO")
         PROsampling(samples, pairs, pair_threshold);
       npairs += pairs.size();
-      pair_count += 2*pairs.size();
 
       for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin();
            it != pairs.end(); it++) {
-        score_t rank_error = it->second.score - it->first.score;
-        feature_count += it->first.f.size() + it->second.f.size();
-        if (!gamma) {
-          // perceptron
-          if (rank_error > 0) {
-            SparseVector<weight_t> diff_vec = it->second.f - it->first.f;
-            lambdas.plus_eq_v_times_s(diff_vec, eta);
-            rank_errors++;
-          } else {
-            if (inc_correct) {
-              SparseVector<weight_t> diff_vec = it->first.f - it->second.f;
-              lambdas.plus_eq_v_times_s(diff_vec, eta);
-            }
-          }
-          if (it->first.model - it->second.model < 1) margin_violations++;
-        } else {
-          // SVM
-          score_t margin = it->first.model - it->second.model;
-          if (rank_error > 0 || margin < 1) {
-            SparseVector<weight_t> diff_vec = it->second.f - it->first.f;
-            lambdas.plus_eq_v_times_s(diff_vec, eta);
-            if (rank_error > 0) rank_errors++;
-            if (margin < 1) margin_violations++;
-          }
-          // regularization
-          lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta*(1./npairs));
+        bool rank_error = it->first.model <= it->second.model;
+        if (rank_error) rank_errors++;
+        score_t margin = fabs(it->first.model - it->second.model);
+        if (!rank_error && margin < 1) margin_violations++;
+        if (rank_error || (gamma && margin<1)) {
+          SparseVector<weight_t> diff_vec = it->first.f - it->second.f;
+          lambdas.plus_eq_v_times_s(diff_vec, eta);
+          if (gamma)
+            lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta*(1./npairs));
         }
       }
 
@@ -553,8 +528,6 @@ main(int argc, char** argv)
     cerr << "     avg # margin viol: ";
     cerr << margin_violations/(float)in_sz << endl;
     cerr << "    non0 feature count: " <<  nonz << endl;
-    cerr << "           avg f count: ";
-    cerr << feature_count/(float)pair_count << endl;
   }
 
   if (hstreaming) {
@@ -580,7 +553,7 @@ main(int argc, char** argv)
   overall_time += time_diff;
   if (!quiet) {
     cerr << _p2 << _np << "(time " << time_diff/60. << " min, ";
-    cerr << time_diff/(float)in_sz<< " s/S)" << endl;
+    cerr << time_diff/in_sz << " s/S)" << endl;
   }
   if (t+1 != T && !quiet) cerr << endl;
 
diff --git a/dtrain/dtrain.h b/dtrain/dtrain.h
index 7b03d258..15d32e36 100644
--- a/dtrain/dtrain.h
+++ b/dtrain/dtrain.h
@@ -13,7 +13,7 @@
 
 #include "filelib.h"
 
-//#define DTRAIN_LOCAL
+#define DTRAIN_LOCAL
 
 #define DTRAIN_DOTS 10 // after how many inputs to display a '.'
 #define DTRAIN_GRAMMAR_DELIM "########EOS########"
@@ -85,7 +85,8 @@ inline void printWordIDVec(vector<WordID>& v)
 }
 
 template<typename T>
-inline T sign(T z) {
+inline T sign(T z)
+{
   if (z == 0) return 0;
   return z < 0 ? -1 : +1;
 }
diff --git a/dtrain/pairsampling.h b/dtrain/pairsampling.h
index 56702b86..bb01cf4f 100644
--- a/dtrain/pairsampling.h
+++ b/dtrain/pairsampling.h
@@ -15,12 +15,12 @@ accept_pair(score_t a, score_t b, score_t threshold)
 inline void
 all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, float _unused = 1)
 {
-  for (unsigned i = 0; i < s->size()-1; i++) {
-    for (unsigned j = i+1; j < s->size(); j++) {
+  unsigned sz = s->size();
+  for (unsigned i = 0; i < sz-1; i++) {
+    for (unsigned j = i+1; j < sz; j++) {
       if (threshold > 0) {
-        if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) {
+        if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
           training.push_back(make_pair((*s)[i], (*s)[j]));
-        }
       } else {
         training.push_back(make_pair((*s)[i], (*s)[j]));
       }
@@ -30,14 +30,14 @@ all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, sc
 
 /*
  * multipartite ranking
- *  sort by bleu
- *  compare top 10% to middle 80% and low 10%
- *  cmp middle 80% to low 10%
+ *  sort (descending) by bleu
+ *  compare top X to middle Y and low X
+ *  cmp middle Y to low X
  */
 bool
 _XYX_cmp_hyp_by_score(ScoredHyp a, ScoredHyp b)
 {
-  return a.score < b.score;
+  return a.score > b.score;
 }
 inline void
 partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, float hi_lo)
@@ -47,27 +47,23 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
   unsigned sep = round(sz*hi_lo);
   for (unsigned i = 0; i < sep; i++) {
     for (unsigned j = sep; j < sz; j++) {
-      if ((*s)[i].rank < (*s)[j].rank) {
-        if (threshold > 0) {
-          if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) {
-            training.push_back(make_pair((*s)[i], (*s)[j]));
-          }
-        } else {
+      if (threshold > 0) {
+        if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
+          training.push_back(make_pair((*s)[i], (*s)[j]));
+      } else {
+        if((*s)[i].score != (*s)[j].score)
           training.push_back(make_pair((*s)[i], (*s)[j]));
-        }
       }
     }
   }
   for (unsigned i = sep; i < sz-sep; i++) {
     for (unsigned j = sz-sep; j < sz; j++) {
-      if ((*s)[i].rank < (*s)[j].rank) {
-        if (threshold > 0) {
-          if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) {
-            training.push_back(make_pair((*s)[i], (*s)[j]));
-          }
-        } else {
+      if (threshold > 0) {
+        if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
+          training.push_back(make_pair((*s)[i], (*s)[j]));
+      } else {
+        if((*s)[i].score != (*s)[j].score)
           training.push_back(make_pair((*s)[i], (*s)[j]));
-        }
       }
     }
   }
@@ -83,7 +79,6 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
 bool
 _PRO_cmp_pair_by_diff(pair<ScoredHyp,ScoredHyp> a, pair<ScoredHyp,ScoredHyp> b)
 {
-  // descending order
   return (fabs(a.first.score - a.second.score)) > (fabs(b.first.score - b.second.score));
 }
 inline void
diff --git a/dtrain/score.cc b/dtrain/score.cc
index d0f9e8a0..b09d32ba 100644
--- a/dtrain/score.cc
+++ b/dtrain/score.cc
@@ -18,11 +18,15 @@ BleuScorer::Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref
 {
   if (hyp_len == 0 || ref_len == 0) return 0.;
   unsigned M = N_;
-  if (ref_len < N_) M = ref_len;
+  vector<score_t> v = w_;
+  if (ref_len < N_) {
+    M = ref_len;
+    for (unsigned i = 0; i < M; i++) v[i] = 1./((score_t)M);
+  }
   score_t sum = 0;
   for (unsigned i = 0; i < M; i++) {
     if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) return 0.;
-    sum += w_[i] * log((score_t)counts.clipped_[i]/counts.sum_[i]);
+    sum += v[i] * log((score_t)counts.clipped_[i]/counts.sum_[i]);
   }
   return brevity_penalty(hyp_len, ref_len) * exp(sum);
 }
@@ -55,12 +59,16 @@ StupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
   if (hyp_len == 0 || ref_len == 0) return 0.;
   NgramCounts counts = make_ngram_counts(hyp, ref, N_);
   unsigned M = N_;
-  if (ref_len < N_) M = ref_len;
+  vector<score_t> v = w_;
+  if (ref_len < N_) {
+    M = ref_len;
+    for (unsigned i = 0; i < M; i++) v[i] = 1./((score_t)M);
+  }
   score_t sum = 0, add = 0;
   for (unsigned i = 0; i < M; i++) {
     if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.;
     if (i == 1) add = 1;
-    sum += w_[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add)));
+    sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add)));
   }
   return  brevity_penalty(hyp_len, ref_len) * exp(sum);
 }
diff --git a/dtrain/score.h b/dtrain/score.h
index d0e79f65..eb8ad912 100644
--- a/dtrain/score.h
+++ b/dtrain/score.h
@@ -61,8 +61,8 @@ struct NgramCounts
   {
     unsigned i;
     for (i = 0; i < N_; i++) {
-      clipped_[i] = 0;
-      sum_[i] = 0;
+      clipped_[i] = 0.;
+      sum_[i] = 0.;
     }
   }