diff options
-rw-r--r-- | dtrain/dtrain.cc | 47 | ||||
-rw-r--r-- | dtrain/dtrain.h | 5 | ||||
-rw-r--r-- | dtrain/pairsampling.h | 41 | ||||
-rw-r--r-- | dtrain/score.cc | 16 | ||||
-rw-r--r-- | dtrain/score.h | 4 |
5 files changed, 45 insertions, 68 deletions
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index 3dee10f2..e817e7ab 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -30,7 +30,6 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) ("rescale", po::value<bool>()->zero_tokens(), "rescale weight vector after each input") ("l1_reg", po::value<string>()->default_value("none"), "apply l1 regularization as in 'Tsuroka et al' (2010)") ("l1_reg_strength", po::value<weight_t>(), "l1 regularization strength") - ("inc_correct", po::value<bool>()->zero_tokens(), "include correctly ranked pairs into updates") ("fselect", po::value<weight_t>()->default_value(-1), "TODO select top x percent of features after each epoch") ("approx_bleu_d", po::value<score_t>()->default_value(0.9), "discount for approx. BLEU") #ifdef DTRAIN_LOCAL @@ -122,9 +121,6 @@ main(int argc, char** argv) HSReporter rep(task_id); bool keep = false; if (cfg.count("keep")) keep = true; - bool inc_correct = false; - if (cfg.count("inc_correct")) - inc_correct = true; const unsigned k = cfg["k"].as<unsigned>(); const unsigned N = cfg["N"].as<unsigned>(); @@ -226,7 +222,6 @@ main(int argc, char** argv) score_t max_score = 0.; unsigned best_it = 0; float overall_time = 0.; - unsigned pair_count = 0, feature_count = 0; // output cfg if (!quiet) { @@ -250,8 +245,6 @@ main(int argc, char** argv) cerr << setw(25) << "select weights " << "'" << select_weights << "'" << endl; if (cfg.count("l1_reg")) cerr << setw(25) << "l1 reg " << l1_reg << " '" << cfg["l1_reg"].as<string>() << "'" << endl; - if (inc_correct) - cerr << setw(25) << "inc. correct " << inc_correct << endl; if (rescale) cerr << setw(25) << "rescale " << rescale << endl; cerr << setw(25) << "cdec cfg " << "'" << cfg["decoder_config"].as<string>() << "'" << endl; @@ -420,36 +413,18 @@ main(int argc, char** argv) if (pair_sampling == "PRO") PROsampling(samples, pairs, pair_threshold); npairs += pairs.size(); - pair_count += 2*pairs.size(); for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin(); it != pairs.end(); it++) { - score_t rank_error = it->second.score - it->first.score; - feature_count += it->first.f.size() + it->second.f.size(); - if (!gamma) { - // perceptron - if (rank_error > 0) { - SparseVector<weight_t> diff_vec = it->second.f - it->first.f; - lambdas.plus_eq_v_times_s(diff_vec, eta); - rank_errors++; - } else { - if (inc_correct) { - SparseVector<weight_t> diff_vec = it->first.f - it->second.f; - lambdas.plus_eq_v_times_s(diff_vec, eta); - } - } - if (it->first.model - it->second.model < 1) margin_violations++; - } else { - // SVM - score_t margin = it->first.model - it->second.model; - if (rank_error > 0 || margin < 1) { - SparseVector<weight_t> diff_vec = it->second.f - it->first.f; - lambdas.plus_eq_v_times_s(diff_vec, eta); - if (rank_error > 0) rank_errors++; - if (margin < 1) margin_violations++; - } - // regularization - lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta*(1./npairs)); + bool rank_error = it->first.model <= it->second.model; + if (rank_error) rank_errors++; + score_t margin = fabs(it->first.model - it->second.model); + if (!rank_error && margin < 1) margin_violations++; + if (rank_error || (gamma && margin<1)) { + SparseVector<weight_t> diff_vec = it->first.f - it->second.f; + lambdas.plus_eq_v_times_s(diff_vec, eta); + if (gamma) + lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta*(1./npairs)); } } @@ -553,8 +528,6 @@ main(int argc, char** argv) cerr << " avg # margin viol: "; cerr << margin_violations/(float)in_sz << endl; cerr << " non0 feature count: " << nonz << endl; - cerr << " avg f count: "; - cerr << feature_count/(float)pair_count << endl; } if (hstreaming) { @@ -580,7 +553,7 @@ main(int argc, char** argv) overall_time += time_diff; if (!quiet) { cerr << _p2 << _np << "(time " << time_diff/60. << " min, "; - cerr << time_diff/(float)in_sz<< " s/S)" << endl; + cerr << time_diff/in_sz << " s/S)" << endl; } if (t+1 != T && !quiet) cerr << endl; diff --git a/dtrain/dtrain.h b/dtrain/dtrain.h index 7b03d258..15d32e36 100644 --- a/dtrain/dtrain.h +++ b/dtrain/dtrain.h @@ -13,7 +13,7 @@ #include "filelib.h" -//#define DTRAIN_LOCAL +#define DTRAIN_LOCAL #define DTRAIN_DOTS 10 // after how many inputs to display a '.' #define DTRAIN_GRAMMAR_DELIM "########EOS########" @@ -85,7 +85,8 @@ inline void printWordIDVec(vector<WordID>& v) } template<typename T> -inline T sign(T z) { +inline T sign(T z) +{ if (z == 0) return 0; return z < 0 ? -1 : +1; } diff --git a/dtrain/pairsampling.h b/dtrain/pairsampling.h index 56702b86..bb01cf4f 100644 --- a/dtrain/pairsampling.h +++ b/dtrain/pairsampling.h @@ -15,12 +15,12 @@ accept_pair(score_t a, score_t b, score_t threshold) inline void all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, float _unused = 1) { - for (unsigned i = 0; i < s->size()-1; i++) { - for (unsigned j = i+1; j < s->size(); j++) { + unsigned sz = s->size(); + for (unsigned i = 0; i < sz-1; i++) { + for (unsigned j = i+1; j < sz; j++) { if (threshold > 0) { - if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) { + if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) training.push_back(make_pair((*s)[i], (*s)[j])); - } } else { training.push_back(make_pair((*s)[i], (*s)[j])); } @@ -30,14 +30,14 @@ all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, sc /* * multipartite ranking - * sort by bleu - * compare top 10% to middle 80% and low 10% - * cmp middle 80% to low 10% + * sort (descending) by bleu + * compare top X to middle Y and low X + * cmp middle Y to low X */ bool _XYX_cmp_hyp_by_score(ScoredHyp a, ScoredHyp b) { - return a.score < b.score; + return a.score > b.score; } inline void partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, float hi_lo) @@ -47,27 +47,23 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor unsigned sep = round(sz*hi_lo); for (unsigned i = 0; i < sep; i++) { for (unsigned j = sep; j < sz; j++) { - if ((*s)[i].rank < (*s)[j].rank) { - if (threshold > 0) { - if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) { - training.push_back(make_pair((*s)[i], (*s)[j])); - } - } else { + if (threshold > 0) { + if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) + training.push_back(make_pair((*s)[i], (*s)[j])); + } else { + if((*s)[i].score != (*s)[j].score) training.push_back(make_pair((*s)[i], (*s)[j])); - } } } } for (unsigned i = sep; i < sz-sep; i++) { for (unsigned j = sz-sep; j < sz; j++) { - if ((*s)[i].rank < (*s)[j].rank) { - if (threshold > 0) { - if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) { - training.push_back(make_pair((*s)[i], (*s)[j])); - } - } else { + if (threshold > 0) { + if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) + training.push_back(make_pair((*s)[i], (*s)[j])); + } else { + if((*s)[i].score != (*s)[j].score) training.push_back(make_pair((*s)[i], (*s)[j])); - } } } } @@ -83,7 +79,6 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor bool _PRO_cmp_pair_by_diff(pair<ScoredHyp,ScoredHyp> a, pair<ScoredHyp,ScoredHyp> b) { - // descending order return (fabs(a.first.score - a.second.score)) > (fabs(b.first.score - b.second.score)); } inline void diff --git a/dtrain/score.cc b/dtrain/score.cc index d0f9e8a0..b09d32ba 100644 --- a/dtrain/score.cc +++ b/dtrain/score.cc @@ -18,11 +18,15 @@ BleuScorer::Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref { if (hyp_len == 0 || ref_len == 0) return 0.; unsigned M = N_; - if (ref_len < N_) M = ref_len; + vector<score_t> v = w_; + if (ref_len < N_) { + M = ref_len; + for (unsigned i = 0; i < M; i++) v[i] = 1./((score_t)M); + } score_t sum = 0; for (unsigned i = 0; i < M; i++) { if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) return 0.; - sum += w_[i] * log((score_t)counts.clipped_[i]/counts.sum_[i]); + sum += v[i] * log((score_t)counts.clipped_[i]/counts.sum_[i]); } return brevity_penalty(hyp_len, ref_len) * exp(sum); } @@ -55,12 +59,16 @@ StupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, if (hyp_len == 0 || ref_len == 0) return 0.; NgramCounts counts = make_ngram_counts(hyp, ref, N_); unsigned M = N_; - if (ref_len < N_) M = ref_len; + vector<score_t> v = w_; + if (ref_len < N_) { + M = ref_len; + for (unsigned i = 0; i < M; i++) v[i] = 1./((score_t)M); + } score_t sum = 0, add = 0; for (unsigned i = 0; i < M; i++) { if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.; if (i == 1) add = 1; - sum += w_[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add))); + sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add))); } return brevity_penalty(hyp_len, ref_len) * exp(sum); } diff --git a/dtrain/score.h b/dtrain/score.h index d0e79f65..eb8ad912 100644 --- a/dtrain/score.h +++ b/dtrain/score.h @@ -61,8 +61,8 @@ struct NgramCounts { unsigned i; for (i = 0; i < N_; i++) { - clipped_[i] = 0; - sum_[i] = 0; + clipped_[i] = 0.; + sum_[i] = 0.; } } |