summaryrefslogtreecommitdiff
path: root/dtrain/pairsampling.h
diff options
context:
space:
mode:
Diffstat (limited to 'dtrain/pairsampling.h')
-rw-r--r--dtrain/pairsampling.h70
1 files changed, 67 insertions, 3 deletions
diff --git a/dtrain/pairsampling.h b/dtrain/pairsampling.h
index 9b88a4be..0951f8e9 100644
--- a/dtrain/pairsampling.h
+++ b/dtrain/pairsampling.h
@@ -49,17 +49,17 @@ multpart108010(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& trainin
unsigned sep = sz%slice;
if (sep == 0) sep = sz/slice;
for (unsigned i = 0; i < sep; i++) {
- for(unsigned j = sep; j < sz; j++) {
+ for (unsigned j = sep; j < sz; j++) {
p.first = (*s)[i];
p.second = (*s)[j];
- if(p.first.rank < p.second.rank) training.push_back(p);
+ if (p.first.rank < p.second.rank) training.push_back(p);
}
}
for (unsigned i = sep; i < sz-sep; i++) {
for (unsigned j = sz-sep; j < sz; j++) {
p.first = (*s)[i];
p.second = (*s)[j];
- if(p.first.rank < p.second.rank) training.push_back(p);
+ if (p.first.rank < p.second.rank) training.push_back(p);
}
}
}
@@ -118,6 +118,70 @@ all_pairs_discard(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& trai
}
}
+inline void
+multpart108010_discard(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training)
+{
+ sort(s->begin(), s->end(), _multpart_cmp_hyp_by_score);
+ pair<ScoredHyp,ScoredHyp> p;
+ unsigned sz = s->size();
+ unsigned slice = 10;
+ unsigned sep = sz%slice;
+ if (sep == 0) sep = sz/slice;
+ for (unsigned i = 0; i < sep; i++) {
+ for (unsigned j = sep; j < sz; j++) {
+ p.first = (*s)[i];
+ p.second = (*s)[j];
+ if (p.first.rank < p.second.rank) {
+ if (_PRO_accept_pair(p)) training.push_back(p);
+ }
+ }
+ }
+ for (unsigned i = sep; i < sz-sep; i++) {
+ for (unsigned j = sz-sep; j < sz; j++) {
+ p.first = (*s)[i];
+ p.second = (*s)[j];
+ if (p.first.rank < p.second.rank) {
+ if (_PRO_accept_pair(p)) training.push_back(p);
+ }
+ }
+ }
+ sort(training.begin(), training.end(), _PRO_cmp_pair_by_diff);
+ if (training.size() > 50)
+ training.erase(training.begin()+50, training.end());
+}
+
+inline void
+multpart108010_discard1(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training)
+{
+ sort(s->begin(), s->end(), _multpart_cmp_hyp_by_score);
+ pair<ScoredHyp,ScoredHyp> p;
+ unsigned sz = s->size();
+ unsigned slice = 10;
+ unsigned sep = sz%slice;
+ if (sep == 0) sep = sz/slice;
+ for (unsigned i = 0; i < sep; i++) {
+ for (unsigned j = sep; j < sz; j++) {
+ p.first = (*s)[i];
+ p.second = (*s)[j];
+ if (p.first.rank < p.second.rank) {
+ if (_PRO_accept_pair(p)) training.push_back(p);
+ }
+ }
+ }
+ for (unsigned i = sep; i < sz-sep; i++) {
+ for (unsigned j = sz-sep; j < sz; j++) {
+ p.first = (*s)[i];
+ p.second = (*s)[j];
+ if (p.first.rank < p.second.rank) {
+ if (_PRO_accept_pair(p)) training.push_back(p);
+ }
+ }
+ }
+ sort(training.begin(), training.end(), _PRO_cmp_pair_by_diff);
+ if (training.size() > 50)
+ training.erase(training.begin()+50, training.end());
+}
+
} // namespace