From ae6a76dfc04698029616232b39d9f47347ec9d4b Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Fri, 15 Mar 2013 12:46:03 +0100
Subject: make perceptron automatically faster
---
training/dtrain/dtrain.cc | 36 ++++++++++++++++++++----------------
training/dtrain/pairsampling.h | 21 ++++++---------------
2 files changed, 26 insertions(+), 31 deletions(-)
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index fcb46db2..2bb4ec98 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -6,7 +6,8 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
{
po::options_description ini("Configuration File Options");
ini.add_options()
- ("input", po::value()->default_value("-"), "input file")
+ ("input", po::value()->default_value("-"), "input file (src)")
+ ("refs,r", po::value(), "references")
("output", po::value()->default_value("-"), "output weights file, '-' for STDOUT")
("input_weights", po::value(), "input weights file (e.g. from previous iteration)")
("decoder_config", po::value(), "configuration file for cdec")
@@ -33,8 +34,8 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
("scale_bleu_diff", po::value()->zero_tokens(), "learning rate <- bleu diff of a misranked pair")
("loss_margin", po::value()->default_value(0.), "update if no error in pref pair but model scores this near")
("max_pairs", po::value()->default_value(std::numeric_limits::max()), "max. # of pairs per Sent.")
- ("refs,r", po::value(), "references in local mode")
- ("noup", po::value()->zero_tokens(), "do not update weights");
+ ("noup", po::value()->zero_tokens(), "do not update weights")
+ ("pair_stats", po::value()->zero_tokens(), "stats about correctly ranked/misranked pairs even if loss_margin=0 and gamma=0");
po::options_description cl("Command Line Options");
cl.add_options()
("config,c", po::value(), "dtrain config file")
@@ -124,6 +125,10 @@ main(int argc, char** argv)
vector print_weights;
if (cfg.count("print_weights"))
boost::split(print_weights, cfg["print_weights"].as(), boost::is_any_of(" "));
+ bool pair_stats = false;
+ if (cfg.count("pair_stats")) pair_stats = true;
+ bool faster_perceptron = false;
+ if (gamma==0 && loss_margin==0 && !pair_stats) faster_perceptron = true;
// setup decoder
register_feature_functions();
@@ -346,25 +351,26 @@ main(int argc, char** argv)
// get pairs
vector > pairs;
if (pair_sampling == "all")
- all_pairs(samples, pairs, pair_threshold, max_pairs);
+ all_pairs(samples, pairs, pair_threshold, max_pairs, faster_perceptron);
if (pair_sampling == "XYX")
- partXYX(samples, pairs, pair_threshold, max_pairs, hi_lo);
+ partXYX(samples, pairs, pair_threshold, max_pairs, faster_perceptron, hi_lo);
if (pair_sampling == "PRO")
PROsampling(samples, pairs, pair_threshold, max_pairs);
npairs += pairs.size();
for (vector >::iterator it = pairs.begin();
it != pairs.end(); it++) {
-#ifdef DTRAIN_FASTER_PERCEPTRON
- bool rank_error = true; // pair sampling already did this for us
- rank_errors++;
- score_t margin = std::numeric_limits::max();
-#else
- bool rank_error = it->first.model <= it->second.model;
+ bool rank_error;
+ score_t margin;
+ if (faster_perceptron) { // we only have considering misranked pairs
+ rank_error = true; // pair sampling already did this for us
+ margin = std::numeric_limits::max();
+ } else {
+ rank_error = it->first.model <= it->second.model;
+ margin = fabs(fabs(it->first.model) - fabs(it->second.model));
+ if (!rank_error && margin < loss_margin) margin_violations++;
+ }
if (rank_error) rank_errors++;
- score_t margin = fabs(fabs(it->first.model) - fabs(it->second.model));
- if (!rank_error && margin < loss_margin) margin_violations++;
-#endif
if (scale_bleu_diff) eta = it->first.score - it->second.score;
if (rank_error || margin < loss_margin) {
SparseVector diff_vec = it->first.f - it->second.f;
@@ -458,10 +464,8 @@ main(int argc, char** argv)
cerr << _np << npairs/(float)in_sz << endl;
cerr << " avg # rank err: ";
cerr << rank_errors/(float)in_sz << endl;
-#ifndef DTRAIN_FASTER_PERCEPTRON
cerr << " avg # margin viol: ";
cerr << margin_violations/(float)in_sz << endl;
-#endif
cerr << " non0 feature count: " << nonz << endl;
cerr << " avg list sz: " << list_sz/(float)in_sz << endl;
cerr << " avg f count: " << f_count/(float)list_sz << endl;
diff --git a/training/dtrain/pairsampling.h b/training/dtrain/pairsampling.h
index 84be1efb..3f67e209 100644
--- a/training/dtrain/pairsampling.h
+++ b/training/dtrain/pairsampling.h
@@ -19,7 +19,7 @@ cmp_hyp_by_score_d(ScoredHyp a, ScoredHyp b)
}
inline void
-all_pairs(vector* s, vector >& training, score_t threshold, unsigned max, float _unused=1)
+all_pairs(vector* s, vector >& training, score_t threshold, unsigned max, bool misranked_only, float _unused=1)
{
sort(s->begin(), s->end(), cmp_hyp_by_score_d);
unsigned sz = s->size();
@@ -27,6 +27,7 @@ all_pairs(vector* s, vector >& training, sc
unsigned count = 0;
for (unsigned i = 0; i < sz-1; i++) {
for (unsigned j = i+1; j < sz; j++) {
+ if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue;
if (threshold > 0) {
if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
training.push_back(make_pair((*s)[i], (*s)[j]));
@@ -51,7 +52,7 @@ all_pairs(vector* s, vector >& training, sc
*/
inline void
-partXYX(vector* s, vector >& training, score_t threshold, unsigned max, float hi_lo)
+partXYX(vector* s, vector >& training, score_t threshold, unsigned max, bool misranked_only, float hi_lo)
{
unsigned sz = s->size();
if (sz < 2) return;
@@ -64,9 +65,7 @@ partXYX(vector* s, vector >& training, scor
unsigned count = 0;
for (unsigned i = 0; i < sep_hi; i++) {
for (unsigned j = sep_hi; j < sz; j++) {
-#ifdef DTRAIN_FASTER_PERCEPTRON
- if ((*s)[i].model <= (*s)[j].model) {
-#endif
+ if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue;
if (threshold > 0) {
if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
training.push_back(make_pair((*s)[i], (*s)[j]));
@@ -78,9 +77,6 @@ partXYX(vector* s, vector >& training, scor
b = true;
break;
}
-#ifdef DTRAIN_FASTER_PERCEPTRON
- }
-#endif
}
if (b) break;
}
@@ -88,9 +84,7 @@ partXYX(vector* s, vector >& training, scor
while (sep_lo > 0 && (*s)[sep_lo-1].score == (*s)[sep_lo].score) --sep_lo;
for (unsigned i = sep_hi; i < sz-sep_lo; i++) {
for (unsigned j = sz-sep_lo; j < sz; j++) {
-#ifdef DTRAIN_FASTER_PERCEPTRON
- if ((*s)[i].model <= (*s)[j].model) {
-#endif
+ if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue;
if (threshold > 0) {
if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
training.push_back(make_pair((*s)[i], (*s)[j]));
@@ -99,9 +93,6 @@ partXYX(vector* s, vector >& training, scor
training.push_back(make_pair((*s)[i], (*s)[j]));
}
if (++count == max) return;
-#ifdef DTRAIN_FASTER_PERCEPTRON
- }
-#endif
}
}
}
@@ -119,7 +110,7 @@ _PRO_cmp_pair_by_diff_d(pair a, pair b
return (fabs(a.first.score - a.second.score)) > (fabs(b.first.score - b.second.score));
}
inline void
-PROsampling(vector* s, vector >& training, score_t threshold, unsigned max, float _unused=1)
+PROsampling(vector* s, vector >& training, score_t threshold, unsigned max, bool _unused=false, float _also_unused=0)
{
unsigned max_count = 5000, count = 0, sz = s->size();
bool b = false;
--
cgit v1.2.3