summaryrefslogtreecommitdiff
path: root/training/dtrain/pairsampling.h
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2013-03-15 12:46:03 +0100
committerPatrick Simianer <p@simianer.de>2013-03-15 12:46:03 +0100
commitce2f5608e15a3d3e080ab4b26b5f263fead215e2 (patch)
tree6be551752b56ea8c66987e2fe898b30cdc6f6e17 /training/dtrain/pairsampling.h
parent4f191a593918c7889f2c27cd9f596bec591bf736 (diff)
make perceptron automatically faster
Diffstat (limited to 'training/dtrain/pairsampling.h')
-rw-r--r--training/dtrain/pairsampling.h21
1 files changed, 6 insertions, 15 deletions
diff --git a/training/dtrain/pairsampling.h b/training/dtrain/pairsampling.h
index 84be1efb..3f67e209 100644
--- a/training/dtrain/pairsampling.h
+++ b/training/dtrain/pairsampling.h
@@ -19,7 +19,7 @@ cmp_hyp_by_score_d(ScoredHyp a, ScoredHyp b)
}
inline void
-all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, float _unused=1)
+all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, bool misranked_only, float _unused=1)
{
sort(s->begin(), s->end(), cmp_hyp_by_score_d);
unsigned sz = s->size();
@@ -27,6 +27,7 @@ all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, sc
unsigned count = 0;
for (unsigned i = 0; i < sz-1; i++) {
for (unsigned j = i+1; j < sz; j++) {
+ if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue;
if (threshold > 0) {
if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
training.push_back(make_pair((*s)[i], (*s)[j]));
@@ -51,7 +52,7 @@ all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, sc
*/
inline void
-partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, float hi_lo)
+partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, bool misranked_only, float hi_lo)
{
unsigned sz = s->size();
if (sz < 2) return;
@@ -64,9 +65,7 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
unsigned count = 0;
for (unsigned i = 0; i < sep_hi; i++) {
for (unsigned j = sep_hi; j < sz; j++) {
-#ifdef DTRAIN_FASTER_PERCEPTRON
- if ((*s)[i].model <= (*s)[j].model) {
-#endif
+ if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue;
if (threshold > 0) {
if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
training.push_back(make_pair((*s)[i], (*s)[j]));
@@ -78,9 +77,6 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
b = true;
break;
}
-#ifdef DTRAIN_FASTER_PERCEPTRON
- }
-#endif
}
if (b) break;
}
@@ -88,9 +84,7 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
while (sep_lo > 0 && (*s)[sep_lo-1].score == (*s)[sep_lo].score) --sep_lo;
for (unsigned i = sep_hi; i < sz-sep_lo; i++) {
for (unsigned j = sz-sep_lo; j < sz; j++) {
-#ifdef DTRAIN_FASTER_PERCEPTRON
- if ((*s)[i].model <= (*s)[j].model) {
-#endif
+ if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue;
if (threshold > 0) {
if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
training.push_back(make_pair((*s)[i], (*s)[j]));
@@ -99,9 +93,6 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
training.push_back(make_pair((*s)[i], (*s)[j]));
}
if (++count == max) return;
-#ifdef DTRAIN_FASTER_PERCEPTRON
- }
-#endif
}
}
}
@@ -119,7 +110,7 @@ _PRO_cmp_pair_by_diff_d(pair<ScoredHyp,ScoredHyp> a, pair<ScoredHyp,ScoredHyp> b
return (fabs(a.first.score - a.second.score)) > (fabs(b.first.score - b.second.score));
}
inline void
-PROsampling(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, float _unused=1)
+PROsampling(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, bool _unused=false, float _also_unused=0)
{
unsigned max_count = 5000, count = 0, sz = s->size();
bool b = false;