From 1b8181bf0d6e9137e6b9ccdbe414aec37377a1a9 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>
Date: Sun, 18 Nov 2012 13:35:42 -0500
Subject: major restructure of the training code

---
 training/dtrain/pairsampling.h | 149 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)
 create mode 100644 training/dtrain/pairsampling.h

(limited to 'training/dtrain/pairsampling.h')
diff --git a/training/dtrain/pairsampling.h b/training/dtrain/pairsampling.h
new file mode 100644
index 00000000..84be1efb
--- /dev/null
+++ b/training/dtrain/pairsampling.h
@@ -0,0 +1,149 @@
+#ifndef _DTRAIN_PAIRSAMPLING_H_
+#define _DTRAIN_PAIRSAMPLING_H_
+
+namespace dtrain
+{
+
+
+bool
+accept_pair(score_t a, score_t b, score_t threshold)
+{
+  if (fabs(a - b) < threshold) return false;
+  return true;
+}
+
+bool
+cmp_hyp_by_score_d(ScoredHyp a, ScoredHyp b)
+{
+  return a.score > b.score;
+}
+
+inline void
+all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, float _unused=1)
+{
+  sort(s->begin(), s->end(), cmp_hyp_by_score_d);
+  unsigned sz = s->size();
+  bool b = false;
+  unsigned count = 0;
+  for (unsigned i = 0; i < sz-1; i++) {
+    for (unsigned j = i+1; j < sz; j++) {
+      if (threshold > 0) {
+        if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
+          training.push_back(make_pair((*s)[i], (*s)[j]));
+      } else {
+        if ((*s)[i].score != (*s)[j].score)
+          training.push_back(make_pair((*s)[i], (*s)[j]));
+      }
+      if (++count == max) {
+        b = true;
+        break;
+      }
+    }
+    if (b) break;
+  }
+}
+
+/*
+ * multipartite ranking
+ *  sort (descending) by bleu
+ *  compare top X to middle Y and low X
+ *  cmp middle Y to low X
+ */
+
+inline void
+partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, float hi_lo)
+{
+  unsigned sz = s->size();
+  if (sz < 2) return;
+  sort(s->begin(), s->end(), cmp_hyp_by_score_d);
+  unsigned sep = round(sz*hi_lo);
+  unsigned sep_hi = sep;
+  if (sz > 4) while (sep_hi < sz && (*s)[sep_hi-1].score == (*s)[sep_hi].score) ++sep_hi;
+  else sep_hi = 1;
+  bool b = false;
+  unsigned count = 0;
+  for (unsigned i = 0; i < sep_hi; i++) {
+    for (unsigned j = sep_hi; j < sz; j++) {
+#ifdef DTRAIN_FASTER_PERCEPTRON
+      if ((*s)[i].model <= (*s)[j].model) {
+#endif
+      if (threshold > 0) {
+        if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
+          training.push_back(make_pair((*s)[i], (*s)[j]));
+      } else {
+        if ((*s)[i].score != (*s)[j].score)
+          training.push_back(make_pair((*s)[i], (*s)[j]));
+      }
+      if (++count == max) {
+        b = true;
+        break;
+      }
+#ifdef DTRAIN_FASTER_PERCEPTRON
+      }
+#endif
+    }
+    if (b) break;
+  }
+  unsigned sep_lo = sz-sep;
+  while (sep_lo > 0 && (*s)[sep_lo-1].score == (*s)[sep_lo].score) --sep_lo;
+  for (unsigned i = sep_hi; i < sz-sep_lo; i++) {
+    for (unsigned j = sz-sep_lo; j < sz; j++) {
+#ifdef DTRAIN_FASTER_PERCEPTRON
+      if ((*s)[i].model <= (*s)[j].model) {
+#endif
+      if (threshold > 0) {
+        if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
+          training.push_back(make_pair((*s)[i], (*s)[j]));
+      } else {
+        if ((*s)[i].score != (*s)[j].score)
+          training.push_back(make_pair((*s)[i], (*s)[j]));
+      }
+      if (++count == max) return;
+#ifdef DTRAIN_FASTER_PERCEPTRON
+      }
+#endif
+    }
+  }
+}
+
+/*
+ * pair sampling as in
+ * 'Tuning as Ranking' (Hopkins & May, 2011)
+ *     count = 5000
+ * threshold = 5% BLEU (0.05 for param 3)
+ *       cut = top 50
+ */
+bool
+_PRO_cmp_pair_by_diff_d(pair<ScoredHyp,ScoredHyp> a, pair<ScoredHyp,ScoredHyp> b)
+{
+  return (fabs(a.first.score - a.second.score)) > (fabs(b.first.score - b.second.score));
+}
+inline void
+PROsampling(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, float _unused=1)
+{
+  unsigned max_count = 5000, count = 0, sz = s->size();
+  bool b = false;
+  for (unsigned i = 0; i < sz-1; i++) {
+    for (unsigned j = i+1; j < sz; j++) {
+      if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) {
+        training.push_back(make_pair((*s)[i], (*s)[j]));
+        if (++count == max_count) {
+          b = true;
+          break;
+        }
+      }
+    }
+    if (b) break;
+  }
+  if (training.size() > 50) {
+    sort(training.begin(), training.end(), _PRO_cmp_pair_by_diff_d);
+    training.erase(training.begin()+50, training.end());
+  }
+  return;
+}
+
+
+} // namespace
+
+#endif
+
-- 
cgit v1.2.3


From ae6a76dfc04698029616232b39d9f47347ec9d4b Mon Sep 17 00:00:00 2001
From: Patrick Simianer <p@simianer.de>
Date: Fri, 15 Mar 2013 12:46:03 +0100
Subject: make perceptron automatically faster

---
 training/dtrain/dtrain.cc      | 36 ++++++++++++++++++++----------------
 training/dtrain/pairsampling.h | 21 ++++++---------------
 2 files changed, 26 insertions(+), 31 deletions(-)

(limited to 'training/dtrain/pairsampling.h')

diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index fcb46db2..2bb4ec98 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -6,7 +6,8 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
 {
   po::options_description ini("Configuration File Options");
   ini.add_options()
-    ("input",             po::value<string>()->default_value("-"),                                                   "input file")
+    ("input",             po::value<string>()->default_value("-"),                                             "input file (src)")
+    ("refs,r",            po::value<string>(),                                                                       "references")
     ("output",            po::value<string>()->default_value("-"),                          "output weights file, '-' for STDOUT")
     ("input_weights",     po::value<string>(),                                "input weights file (e.g. from previous iteration)")
     ("decoder_config",    po::value<string>(),                                                      "configuration file for cdec")
@@ -33,8 +34,8 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
     ("scale_bleu_diff",   po::value<bool>()->zero_tokens(),                      "learning rate <- bleu diff of a misranked pair")
     ("loss_margin",       po::value<weight_t>()->default_value(0.),  "update if no error in pref pair but model scores this near")
     ("max_pairs",         po::value<unsigned>()->default_value(std::numeric_limits<unsigned>::max()), "max. # of pairs per Sent.")
-    ("refs,r",            po::value<string>(),                                                         "references in local mode")
-    ("noup",              po::value<bool>()->zero_tokens(),                                               "do not update weights");
+    ("noup",              po::value<bool>()->zero_tokens(),                                               "do not update weights")
+    ("pair_stats",        po::value<bool>()->zero_tokens(), "stats about correctly ranked/misranked pairs even if loss_margin=0 and gamma=0");
   po::options_description cl("Command Line Options");
   cl.add_options()
     ("config,c",         po::value<string>(),              "dtrain config file")
@@ -124,6 +125,10 @@ main(int argc, char** argv)
   vector<string> print_weights;
   if (cfg.count("print_weights"))
     boost::split(print_weights, cfg["print_weights"].as<string>(), boost::is_any_of(" "));
+  bool pair_stats = false;
+  if (cfg.count("pair_stats")) pair_stats = true;
+  bool faster_perceptron = false;
+  if (gamma==0 && loss_margin==0 && !pair_stats) faster_perceptron = true;
 
   // setup decoder
   register_feature_functions();
@@ -346,25 +351,26 @@ main(int argc, char** argv)
       // get pairs
       vector<pair<ScoredHyp,ScoredHyp> > pairs;
       if (pair_sampling == "all")
-        all_pairs(samples, pairs, pair_threshold, max_pairs);
+        all_pairs(samples, pairs, pair_threshold, max_pairs, faster_perceptron);
       if (pair_sampling == "XYX")
-        partXYX(samples, pairs, pair_threshold, max_pairs, hi_lo);
+        partXYX(samples, pairs, pair_threshold, max_pairs, faster_perceptron, hi_lo);
       if (pair_sampling == "PRO")
         PROsampling(samples, pairs, pair_threshold, max_pairs);
       npairs += pairs.size();
 
       for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin();
            it != pairs.end(); it++) {
-#ifdef DTRAIN_FASTER_PERCEPTRON
-        bool rank_error = true; // pair sampling already did this for us
-        rank_errors++;
-        score_t margin = std::numeric_limits<float>::max();
-#else
-        bool rank_error = it->first.model <= it->second.model;
+        bool rank_error;
+        score_t margin;
+        if (faster_perceptron) { // we only have considering misranked pairs
+          rank_error = true; // pair sampling already did this for us
+          margin = std::numeric_limits<float>::max();
+        } else {
+          rank_error = it->first.model <= it->second.model;
+          margin = fabs(fabs(it->first.model) - fabs(it->second.model));
+          if (!rank_error && margin < loss_margin) margin_violations++;
+        }
         if (rank_error) rank_errors++;
-        score_t margin = fabs(fabs(it->first.model) - fabs(it->second.model));
-        if (!rank_error && margin < loss_margin) margin_violations++;
-#endif
         if (scale_bleu_diff) eta = it->first.score - it->second.score;
         if (rank_error || margin < loss_margin) {
           SparseVector<weight_t> diff_vec = it->first.f - it->second.f;
@@ -458,10 +464,8 @@ main(int argc, char** argv)
     cerr << _np << npairs/(float)in_sz << endl;
     cerr << "        avg # rank err: ";
     cerr << rank_errors/(float)in_sz << endl;
-#ifndef DTRAIN_FASTER_PERCEPTRON
     cerr << "     avg # margin viol: ";
     cerr << margin_violations/(float)in_sz << endl;
-#endif
     cerr << "    non0 feature count: " <<  nonz << endl;
     cerr << "           avg list sz: " << list_sz/(float)in_sz << endl;
     cerr << "           avg f count: " << f_count/(float)list_sz << endl;
diff --git a/training/dtrain/pairsampling.h b/training/dtrain/pairsampling.h
index 84be1efb..3f67e209 100644
--- a/training/dtrain/pairsampling.h
+++ b/training/dtrain/pairsampling.h
@@ -19,7 +19,7 @@ cmp_hyp_by_score_d(ScoredHyp a, ScoredHyp b)
 }
 
 inline void
-all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, float _unused=1)
+all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, bool misranked_only, float _unused=1)
 {
   sort(s->begin(), s->end(), cmp_hyp_by_score_d);
   unsigned sz = s->size();
@@ -27,6 +27,7 @@ all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, sc
   unsigned count = 0;
   for (unsigned i = 0; i < sz-1; i++) {
     for (unsigned j = i+1; j < sz; j++) {
+      if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue;
       if (threshold > 0) {
         if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
           training.push_back(make_pair((*s)[i], (*s)[j]));
@@ -51,7 +52,7 @@ all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, sc
  */
 
 inline void
-partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, float hi_lo)
+partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, bool misranked_only, float hi_lo)
 {
   unsigned sz = s->size();
   if (sz < 2) return;
@@ -64,9 +65,7 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
   unsigned count = 0;
   for (unsigned i = 0; i < sep_hi; i++) {
     for (unsigned j = sep_hi; j < sz; j++) {
-#ifdef DTRAIN_FASTER_PERCEPTRON
-      if ((*s)[i].model <= (*s)[j].model) {
-#endif
+      if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue;
       if (threshold > 0) {
         if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
           training.push_back(make_pair((*s)[i], (*s)[j]));
@@ -78,9 +77,6 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
         b = true;
         break;
       }
-#ifdef DTRAIN_FASTER_PERCEPTRON
-      }
-#endif
     }
     if (b) break;
   }
@@ -88,9 +84,7 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
   while (sep_lo > 0 && (*s)[sep_lo-1].score == (*s)[sep_lo].score) --sep_lo;
   for (unsigned i = sep_hi; i < sz-sep_lo; i++) {
     for (unsigned j = sz-sep_lo; j < sz; j++) {
-#ifdef DTRAIN_FASTER_PERCEPTRON
-      if ((*s)[i].model <= (*s)[j].model) {
-#endif
+      if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue;
       if (threshold > 0) {
         if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
           training.push_back(make_pair((*s)[i], (*s)[j]));
@@ -99,9 +93,6 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
           training.push_back(make_pair((*s)[i], (*s)[j]));
       }
       if (++count == max) return;
-#ifdef DTRAIN_FASTER_PERCEPTRON
-      }
-#endif
     }
   }
 }
@@ -119,7 +110,7 @@ _PRO_cmp_pair_by_diff_d(pair<ScoredHyp,ScoredHyp> a, pair<ScoredHyp,ScoredHyp> b
   return (fabs(a.first.score - a.second.score)) > (fabs(b.first.score - b.second.score));
 }
 inline void
-PROsampling(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, float _unused=1)
+PROsampling(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, bool _unused=false, float _also_unused=0)
 {
   unsigned max_count = 5000, count = 0, sz = s->size();
   bool b = false;
-- 
cgit v1.2.3