summaryrefslogtreecommitdiff
path: root/dtrain/pairsampling.h
diff options
context:
space:
mode:
Diffstat (limited to 'dtrain/pairsampling.h')
-rw-r--r--dtrain/pairsampling.h72
1 files changed, 72 insertions, 0 deletions
diff --git a/dtrain/pairsampling.h b/dtrain/pairsampling.h
new file mode 100644
index 00000000..131e90ca
--- /dev/null
+++ b/dtrain/pairsampling.h
@@ -0,0 +1,72 @@
+#ifndef _DTRAIN_PAIRSAMPLING_H_
+#define _DTRAIN_PAIRSAMPLING_H_
+
+namespace dtrain
+{
+
+
+inline void
+sample_all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training)
+{
+ for (unsigned i = 0; i < s->size()-1; i++) {
+ for (unsigned j = i+1; j < s->size(); j++) {
+ pair<ScoredHyp,ScoredHyp> p;
+ p.first = (*s)[i];
+ p.second = (*s)[j];
+ training.push_back(p);
+ }
+ }
+}
+
+inline void
+sample_rand_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training,
+ MT19937* prng)
+{
+ for (unsigned i = 0; i < s->size()-1; i++) {
+ for (unsigned j = i+1; j < s->size(); j++) {
+ if (prng->next() < .5) {
+ pair<ScoredHyp,ScoredHyp> p;
+ p.first = (*s)[i];
+ p.second = (*s)[j];
+ training.push_back(p);
+ }
+ }
+ }
+}
+
+bool
+sort_samples_by_score(ScoredHyp a, ScoredHyp b)
+{
+ return a.score < b.score;
+}
+
+inline void
+sample108010(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training)
+{
+ sort(s->begin(), s->end(), sort_samples_by_score);
+ pair<ScoredHyp,ScoredHyp> p;
+ unsigned sz = s->size();
+ unsigned slice = 10;
+ unsigned sep = sz%slice;
+ if (sep == 0) sep = sz/slice;
+ for (unsigned i = 0; i < sep; i++) {
+ for(unsigned j = sep; j < sz; j++) {
+ p.first = (*s)[i];
+ p.second = (*s)[j];
+ if(p.first.rank < p.second.rank) training.push_back(p);
+ }
+ }
+ for (unsigned i = sep; i < sz-sep; i++) {
+ for (unsigned j = sz-sep; j < sz; j++) {
+ p.first = (*s)[i];
+ p.second = (*s)[j];
+ if(p.first.rank < p.second.rank) training.push_back(p);
+ }
+ }
+}
+
+
+} // namespace
+
+#endif
+