diff options
-rw-r--r-- | dtrain/dtrain.cc | 2 | ||||
-rw-r--r-- | dtrain/pairsampling.h | 15 | ||||
-rw-r--r-- | dtrain/score.cc | 6 |
3 files changed, 20 insertions, 3 deletions
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index 88413a1d..7f44d4cf 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -261,7 +261,7 @@ main(int argc, char** argv) cerr << setw(25) << "l1 reg " << l1_reg << " '" << cfg["l1_reg"].as<string>() << "'" << endl; if (rescale) cerr << setw(25) << "rescale " << rescale << endl; - cerr << "max pairs " << max_pairs << endl; + cerr << setw(25) << "max pairs " << max_pairs << endl; cerr << setw(25) << "cdec cfg " << "'" << cfg["decoder_config"].as<string>() << "'" << endl; cerr << setw(25) << "input " << "'" << input_fn << "'" << endl; #ifdef DTRAIN_LOCAL diff --git a/dtrain/pairsampling.h b/dtrain/pairsampling.h index 71c8ae59..84be1efb 100644 --- a/dtrain/pairsampling.h +++ b/dtrain/pairsampling.h @@ -23,6 +23,8 @@ all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, sc { sort(s->begin(), s->end(), cmp_hyp_by_score_d); unsigned sz = s->size(); + bool b = false; + unsigned count = 0; for (unsigned i = 0; i < sz-1; i++) { for (unsigned j = i+1; j < sz; j++) { if (threshold > 0) { @@ -32,7 +34,12 @@ all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, sc if ((*s)[i].score != (*s)[j].score) training.push_back(make_pair((*s)[i], (*s)[j])); } + if (++count == max) { + b = true; + break; + } } + if (b) break; } } @@ -53,6 +60,8 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor unsigned sep_hi = sep; if (sz > 4) while (sep_hi < sz && (*s)[sep_hi-1].score == (*s)[sep_hi].score) ++sep_hi; else sep_hi = 1; + bool b = false; + unsigned count = 0; for (unsigned i = 0; i < sep_hi; i++) { for (unsigned j = sep_hi; j < sz; j++) { #ifdef DTRAIN_FASTER_PERCEPTRON @@ -65,10 +74,15 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor if ((*s)[i].score != (*s)[j].score) training.push_back(make_pair((*s)[i], (*s)[j])); } + if (++count == max) { + b = true; + break; + } #ifdef DTRAIN_FASTER_PERCEPTRON } #endif } + if (b) break; } unsigned sep_lo = sz-sep; while (sep_lo > 0 && (*s)[sep_lo-1].score == (*s)[sep_lo].score) --sep_lo; @@ -84,6 +98,7 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor if ((*s)[i].score != (*s)[j].score) training.push_back(make_pair((*s)[i], (*s)[j])); } + if (++count == max) return; #ifdef DTRAIN_FASTER_PERCEPTRON } #endif diff --git a/dtrain/score.cc b/dtrain/score.cc index 5c356c0f..5bb0bcaa 100644 --- a/dtrain/score.cc +++ b/dtrain/score.cc @@ -128,6 +128,7 @@ SmoothSingleBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, return brevity_penalty(hyp_len, ref_len) * sum; } +// TODO single variants! /* * approx. bleu @@ -136,7 +137,8 @@ SmoothSingleBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, * and Structural Translation Features" * (Chiang et al. '08) * - * NOTE: needs some more code in dtrain.cc + * NOTE: Needs some more code in dtrain.cc . + * No scaling by src len. */ score_t ApproxBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, @@ -158,7 +160,7 @@ ApproxBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, glob_ref_len_ = discount_ * (glob_ref_len_ + ref_len); glob_src_len_ = discount_ * (glob_src_len_ + src_len); } - return (score_t)glob_src_len_ * score; + return score; } /* |