diff options
Diffstat (limited to 'dtrain/pairsampling.h')
-rw-r--r-- | dtrain/pairsampling.h | 36 |
1 files changed, 33 insertions, 3 deletions
diff --git a/dtrain/pairsampling.h b/dtrain/pairsampling.h index 6db0c045..131e90ca 100644 --- a/dtrain/pairsampling.h +++ b/dtrain/pairsampling.h @@ -1,13 +1,12 @@ #ifndef _DTRAIN_PAIRSAMPLING_H_ #define _DTRAIN_PAIRSAMPLING_H_ - namespace dtrain { inline void -sample_all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> > &training) +sample_all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training) { for (unsigned i = 0; i < s->size()-1; i++) { for (unsigned j = i+1; j < s->size(); j++) { @@ -20,7 +19,7 @@ sample_all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> > &train } inline void -sample_rand_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> > &training, +sample_rand_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, MT19937* prng) { for (unsigned i = 0; i < s->size()-1; i++) { @@ -35,6 +34,37 @@ sample_rand_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> > &trai } } +bool +sort_samples_by_score(ScoredHyp a, ScoredHyp b) +{ + return a.score < b.score; +} + +inline void +sample108010(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training) +{ + sort(s->begin(), s->end(), sort_samples_by_score); + pair<ScoredHyp,ScoredHyp> p; + unsigned sz = s->size(); + unsigned slice = 10; + unsigned sep = sz%slice; + if (sep == 0) sep = sz/slice; + for (unsigned i = 0; i < sep; i++) { + for(unsigned j = sep; j < sz; j++) { + p.first = (*s)[i]; + p.second = (*s)[j]; + if(p.first.rank < p.second.rank) training.push_back(p); + } + } + for (unsigned i = sep; i < sz-sep; i++) { + for (unsigned j = sz-sep; j < sz; j++) { + p.first = (*s)[i]; + p.second = (*s)[j]; + if(p.first.rank < p.second.rank) training.push_back(p); + } + } +} + } // namespace |