From 1b8181bf0d6e9137e6b9ccdbe414aec37377a1a9 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 18 Nov 2012 13:35:42 -0500 Subject: major restructure of the training code --- training/dtrain/pairsampling.h | 149 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 training/dtrain/pairsampling.h (limited to 'training/dtrain/pairsampling.h') diff --git a/training/dtrain/pairsampling.h b/training/dtrain/pairsampling.h new file mode 100644 index 00000000..84be1efb --- /dev/null +++ b/training/dtrain/pairsampling.h @@ -0,0 +1,149 @@ +#ifndef _DTRAIN_PAIRSAMPLING_H_ +#define _DTRAIN_PAIRSAMPLING_H_ + +namespace dtrain +{ + + +bool +accept_pair(score_t a, score_t b, score_t threshold) +{ + if (fabs(a - b) < threshold) return false; + return true; +} + +bool +cmp_hyp_by_score_d(ScoredHyp a, ScoredHyp b) +{ + return a.score > b.score; +} + +inline void +all_pairs(vector* s, vector >& training, score_t threshold, unsigned max, float _unused=1) +{ + sort(s->begin(), s->end(), cmp_hyp_by_score_d); + unsigned sz = s->size(); + bool b = false; + unsigned count = 0; + for (unsigned i = 0; i < sz-1; i++) { + for (unsigned j = i+1; j < sz; j++) { + if (threshold > 0) { + if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) + training.push_back(make_pair((*s)[i], (*s)[j])); + } else { + if ((*s)[i].score != (*s)[j].score) + training.push_back(make_pair((*s)[i], (*s)[j])); + } + if (++count == max) { + b = true; + break; + } + } + if (b) break; + } +} + +/* + * multipartite ranking + * sort (descending) by bleu + * compare top X to middle Y and low X + * cmp middle Y to low X + */ + +inline void +partXYX(vector* s, vector >& training, score_t threshold, unsigned max, float hi_lo) +{ + unsigned sz = s->size(); + if (sz < 2) return; + sort(s->begin(), s->end(), cmp_hyp_by_score_d); + unsigned sep = round(sz*hi_lo); + unsigned sep_hi = sep; + if (sz > 4) while (sep_hi < sz && (*s)[sep_hi-1].score == (*s)[sep_hi].score) ++sep_hi; + else sep_hi = 1; + bool b = false; + unsigned count = 0; + for (unsigned i = 0; i < sep_hi; i++) { + for (unsigned j = sep_hi; j < sz; j++) { +#ifdef DTRAIN_FASTER_PERCEPTRON + if ((*s)[i].model <= (*s)[j].model) { +#endif + if (threshold > 0) { + if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) + training.push_back(make_pair((*s)[i], (*s)[j])); + } else { + if ((*s)[i].score != (*s)[j].score) + training.push_back(make_pair((*s)[i], (*s)[j])); + } + if (++count == max) { + b = true; + break; + } +#ifdef DTRAIN_FASTER_PERCEPTRON + } +#endif + } + if (b) break; + } + unsigned sep_lo = sz-sep; + while (sep_lo > 0 && (*s)[sep_lo-1].score == (*s)[sep_lo].score) --sep_lo; + for (unsigned i = sep_hi; i < sz-sep_lo; i++) { + for (unsigned j = sz-sep_lo; j < sz; j++) { +#ifdef DTRAIN_FASTER_PERCEPTRON + if ((*s)[i].model <= (*s)[j].model) { +#endif + if (threshold > 0) { + if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) + training.push_back(make_pair((*s)[i], (*s)[j])); + } else { + if ((*s)[i].score != (*s)[j].score) + training.push_back(make_pair((*s)[i], (*s)[j])); + } + if (++count == max) return; +#ifdef DTRAIN_FASTER_PERCEPTRON + } +#endif + } + } +} + +/* + * pair sampling as in + * 'Tuning as Ranking' (Hopkins & May, 2011) + * count = 5000 + * threshold = 5% BLEU (0.05 for param 3) + * cut = top 50 + */ +bool +_PRO_cmp_pair_by_diff_d(pair a, pair b) +{ + return (fabs(a.first.score - a.second.score)) > (fabs(b.first.score - b.second.score)); +} +inline void +PROsampling(vector* s, vector >& training, score_t threshold, unsigned max, float _unused=1) +{ + unsigned max_count = 5000, count = 0, sz = s->size(); + bool b = false; + for (unsigned i = 0; i < sz-1; i++) { + for (unsigned j = i+1; j < sz; j++) { + if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) { + training.push_back(make_pair((*s)[i], (*s)[j])); + if (++count == max_count) { + b = true; + break; + } + } + } + if (b) break; + } + if (training.size() > 50) { + sort(training.begin(), training.end(), _PRO_cmp_pair_by_diff_d); + training.erase(training.begin()+50, training.end()); + } + return; +} + + +} // namespace + +#endif + -- cgit v1.2.3 From ae6a76dfc04698029616232b39d9f47347ec9d4b Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 15 Mar 2013 12:46:03 +0100 Subject: make perceptron automatically faster --- training/dtrain/dtrain.cc | 36 ++++++++++++++++++++---------------- training/dtrain/pairsampling.h | 21 ++++++--------------- 2 files changed, 26 insertions(+), 31 deletions(-) (limited to 'training/dtrain/pairsampling.h') diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc index fcb46db2..2bb4ec98 100644 --- a/training/dtrain/dtrain.cc +++ b/training/dtrain/dtrain.cc @@ -6,7 +6,8 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) { po::options_description ini("Configuration File Options"); ini.add_options() - ("input", po::value()->default_value("-"), "input file") + ("input", po::value()->default_value("-"), "input file (src)") + ("refs,r", po::value(), "references") ("output", po::value()->default_value("-"), "output weights file, '-' for STDOUT") ("input_weights", po::value(), "input weights file (e.g. from previous iteration)") ("decoder_config", po::value(), "configuration file for cdec") @@ -33,8 +34,8 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) ("scale_bleu_diff", po::value()->zero_tokens(), "learning rate <- bleu diff of a misranked pair") ("loss_margin", po::value()->default_value(0.), "update if no error in pref pair but model scores this near") ("max_pairs", po::value()->default_value(std::numeric_limits::max()), "max. # of pairs per Sent.") - ("refs,r", po::value(), "references in local mode") - ("noup", po::value()->zero_tokens(), "do not update weights"); + ("noup", po::value()->zero_tokens(), "do not update weights") + ("pair_stats", po::value()->zero_tokens(), "stats about correctly ranked/misranked pairs even if loss_margin=0 and gamma=0"); po::options_description cl("Command Line Options"); cl.add_options() ("config,c", po::value(), "dtrain config file") @@ -124,6 +125,10 @@ main(int argc, char** argv) vector print_weights; if (cfg.count("print_weights")) boost::split(print_weights, cfg["print_weights"].as(), boost::is_any_of(" ")); + bool pair_stats = false; + if (cfg.count("pair_stats")) pair_stats = true; + bool faster_perceptron = false; + if (gamma==0 && loss_margin==0 && !pair_stats) faster_perceptron = true; // setup decoder register_feature_functions(); @@ -346,25 +351,26 @@ main(int argc, char** argv) // get pairs vector > pairs; if (pair_sampling == "all") - all_pairs(samples, pairs, pair_threshold, max_pairs); + all_pairs(samples, pairs, pair_threshold, max_pairs, faster_perceptron); if (pair_sampling == "XYX") - partXYX(samples, pairs, pair_threshold, max_pairs, hi_lo); + partXYX(samples, pairs, pair_threshold, max_pairs, faster_perceptron, hi_lo); if (pair_sampling == "PRO") PROsampling(samples, pairs, pair_threshold, max_pairs); npairs += pairs.size(); for (vector >::iterator it = pairs.begin(); it != pairs.end(); it++) { -#ifdef DTRAIN_FASTER_PERCEPTRON - bool rank_error = true; // pair sampling already did this for us - rank_errors++; - score_t margin = std::numeric_limits::max(); -#else - bool rank_error = it->first.model <= it->second.model; + bool rank_error; + score_t margin; + if (faster_perceptron) { // we only have considering misranked pairs + rank_error = true; // pair sampling already did this for us + margin = std::numeric_limits::max(); + } else { + rank_error = it->first.model <= it->second.model; + margin = fabs(fabs(it->first.model) - fabs(it->second.model)); + if (!rank_error && margin < loss_margin) margin_violations++; + } if (rank_error) rank_errors++; - score_t margin = fabs(fabs(it->first.model) - fabs(it->second.model)); - if (!rank_error && margin < loss_margin) margin_violations++; -#endif if (scale_bleu_diff) eta = it->first.score - it->second.score; if (rank_error || margin < loss_margin) { SparseVector diff_vec = it->first.f - it->second.f; @@ -458,10 +464,8 @@ main(int argc, char** argv) cerr << _np << npairs/(float)in_sz << endl; cerr << " avg # rank err: "; cerr << rank_errors/(float)in_sz << endl; -#ifndef DTRAIN_FASTER_PERCEPTRON cerr << " avg # margin viol: "; cerr << margin_violations/(float)in_sz << endl; -#endif cerr << " non0 feature count: " << nonz << endl; cerr << " avg list sz: " << list_sz/(float)in_sz << endl; cerr << " avg f count: " << f_count/(float)list_sz << endl; diff --git a/training/dtrain/pairsampling.h b/training/dtrain/pairsampling.h index 84be1efb..3f67e209 100644 --- a/training/dtrain/pairsampling.h +++ b/training/dtrain/pairsampling.h @@ -19,7 +19,7 @@ cmp_hyp_by_score_d(ScoredHyp a, ScoredHyp b) } inline void -all_pairs(vector* s, vector >& training, score_t threshold, unsigned max, float _unused=1) +all_pairs(vector* s, vector >& training, score_t threshold, unsigned max, bool misranked_only, float _unused=1) { sort(s->begin(), s->end(), cmp_hyp_by_score_d); unsigned sz = s->size(); @@ -27,6 +27,7 @@ all_pairs(vector* s, vector >& training, sc unsigned count = 0; for (unsigned i = 0; i < sz-1; i++) { for (unsigned j = i+1; j < sz; j++) { + if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue; if (threshold > 0) { if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) training.push_back(make_pair((*s)[i], (*s)[j])); @@ -51,7 +52,7 @@ all_pairs(vector* s, vector >& training, sc */ inline void -partXYX(vector* s, vector >& training, score_t threshold, unsigned max, float hi_lo) +partXYX(vector* s, vector >& training, score_t threshold, unsigned max, bool misranked_only, float hi_lo) { unsigned sz = s->size(); if (sz < 2) return; @@ -64,9 +65,7 @@ partXYX(vector* s, vector >& training, scor unsigned count = 0; for (unsigned i = 0; i < sep_hi; i++) { for (unsigned j = sep_hi; j < sz; j++) { -#ifdef DTRAIN_FASTER_PERCEPTRON - if ((*s)[i].model <= (*s)[j].model) { -#endif + if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue; if (threshold > 0) { if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) training.push_back(make_pair((*s)[i], (*s)[j])); @@ -78,9 +77,6 @@ partXYX(vector* s, vector >& training, scor b = true; break; } -#ifdef DTRAIN_FASTER_PERCEPTRON - } -#endif } if (b) break; } @@ -88,9 +84,7 @@ partXYX(vector* s, vector >& training, scor while (sep_lo > 0 && (*s)[sep_lo-1].score == (*s)[sep_lo].score) --sep_lo; for (unsigned i = sep_hi; i < sz-sep_lo; i++) { for (unsigned j = sz-sep_lo; j < sz; j++) { -#ifdef DTRAIN_FASTER_PERCEPTRON - if ((*s)[i].model <= (*s)[j].model) { -#endif + if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue; if (threshold > 0) { if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) training.push_back(make_pair((*s)[i], (*s)[j])); @@ -99,9 +93,6 @@ partXYX(vector* s, vector >& training, scor training.push_back(make_pair((*s)[i], (*s)[j])); } if (++count == max) return; -#ifdef DTRAIN_FASTER_PERCEPTRON - } -#endif } } } @@ -119,7 +110,7 @@ _PRO_cmp_pair_by_diff_d(pair a, pair b return (fabs(a.first.score - a.second.score)) > (fabs(b.first.score - b.second.score)); } inline void -PROsampling(vector* s, vector >& training, score_t threshold, unsigned max, float _unused=1) +PROsampling(vector* s, vector >& training, score_t threshold, unsigned max, bool _unused=false, float _also_unused=0) { unsigned max_count = 5000, count = 0, sz = s->size(); bool b = false; -- cgit v1.2.3