diff options
Diffstat (limited to 'dtrain/pairsampling.h')
-rw-r--r-- | dtrain/pairsampling.h | 34 |
1 files changed, 26 insertions, 8 deletions
diff --git a/dtrain/pairsampling.h b/dtrain/pairsampling.h index bac132c6..71c8ae59 100644 --- a/dtrain/pairsampling.h +++ b/dtrain/pairsampling.h @@ -19,7 +19,7 @@ cmp_hyp_by_score_d(ScoredHyp a, ScoredHyp b) } inline void -all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, float _unused=1) +all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, float _unused=1) { sort(s->begin(), s->end(), cmp_hyp_by_score_d); unsigned sz = s->size(); @@ -44,13 +44,20 @@ all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, sc */ inline void -partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, float hi_lo) +partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, float hi_lo) { - sort(s->begin(), s->end(), cmp_hyp_by_score_d); unsigned sz = s->size(); + if (sz < 2) return; + sort(s->begin(), s->end(), cmp_hyp_by_score_d); unsigned sep = round(sz*hi_lo); - for (unsigned i = 0; i < sep; i++) { - for (unsigned j = sep; j < sz; j++) { + unsigned sep_hi = sep; + if (sz > 4) while (sep_hi < sz && (*s)[sep_hi-1].score == (*s)[sep_hi].score) ++sep_hi; + else sep_hi = 1; + for (unsigned i = 0; i < sep_hi; i++) { + for (unsigned j = sep_hi; j < sz; j++) { +#ifdef DTRAIN_FASTER_PERCEPTRON + if ((*s)[i].model <= (*s)[j].model) { +#endif if (threshold > 0) { if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) training.push_back(make_pair((*s)[i], (*s)[j])); @@ -58,10 +65,18 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor if ((*s)[i].score != (*s)[j].score) training.push_back(make_pair((*s)[i], (*s)[j])); } +#ifdef DTRAIN_FASTER_PERCEPTRON + } +#endif } } - for (unsigned i = sep; i < sz-sep; i++) { - for (unsigned j = sz-sep; j < sz; j++) { + unsigned sep_lo = sz-sep; + while (sep_lo > 0 && (*s)[sep_lo-1].score == (*s)[sep_lo].score) --sep_lo; + for (unsigned i = sep_hi; i < sz-sep_lo; i++) { + for (unsigned j = sz-sep_lo; j < sz; j++) { +#ifdef DTRAIN_FASTER_PERCEPTRON + if ((*s)[i].model <= (*s)[j].model) { +#endif if (threshold > 0) { if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) training.push_back(make_pair((*s)[i], (*s)[j])); @@ -69,6 +84,9 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor if ((*s)[i].score != (*s)[j].score) training.push_back(make_pair((*s)[i], (*s)[j])); } +#ifdef DTRAIN_FASTER_PERCEPTRON + } +#endif } } } @@ -86,7 +104,7 @@ _PRO_cmp_pair_by_diff_d(pair<ScoredHyp,ScoredHyp> a, pair<ScoredHyp,ScoredHyp> b return (fabs(a.first.score - a.second.score)) > (fabs(b.first.score - b.second.score)); } inline void -PROsampling(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, float _unused=1) +PROsampling(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, float _unused=1) { unsigned max_count = 5000, count = 0, sz = s->size(); bool b = false; |