summaryrefslogtreecommitdiff
path: root/dtrain/pairsampling.h
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-05-15 00:44:03 +0200
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-05-15 00:44:03 +0200
commit37050e861de2b216d3a28f79e111b674c5d142ac (patch)
treef830d51ca55af1d1a67f2e7bad687cc46480317b /dtrain/pairsampling.h
parent7c344de97edac0aa2a6a90c2de9bcf60f15ac000 (diff)
loss margin cfg, XYX improved, smooth bleu variant
Diffstat (limited to 'dtrain/pairsampling.h')
-rw-r--r--dtrain/pairsampling.h16
1 files changed, 11 insertions, 5 deletions
diff --git a/dtrain/pairsampling.h b/dtrain/pairsampling.h
index 5085738e..32006a41 100644
--- a/dtrain/pairsampling.h
+++ b/dtrain/pairsampling.h
@@ -46,11 +46,15 @@ all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, sc
inline void
partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, float hi_lo)
{
- sort(s->begin(), s->end(), cmp_hyp_by_score_d);
unsigned sz = s->size();
+ if (sz < 2) return;
+ sort(s->begin(), s->end(), cmp_hyp_by_score_d);
unsigned sep = round(sz*hi_lo);
- for (unsigned i = 0; i < sep; i++) {
- for (unsigned j = sep; j < sz; j++) {
+ unsigned sep_hi = sep;
+ if (sz > 4) while (sep_hi < sz && (*s)[sep_hi-1].score == (*s)[sep_hi].score) ++sep_hi;
+ else sep_hi = 1;
+ for (unsigned i = 0; i < sep_hi; i++) {
+ for (unsigned j = sep_hi; j < sz; j++) {
#ifdef DTRAIN_FASTER_PERCEPTRON
if ((*s)[i].model <= (*s)[j].model) {
#endif
@@ -66,8 +70,10 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
#endif
}
}
- for (unsigned i = sep; i < sz-sep; i++) {
- for (unsigned j = sz-sep; j < sz; j++) {
+ unsigned sep_lo = sz-sep;
+ while (sep_lo > 0 && (*s)[sep_lo-1].score == (*s)[sep_lo].score) --sep_lo;
+ for (unsigned i = sep_hi; i < sz-sep_lo; i++) {
+ for (unsigned j = sz-sep_lo; j < sz; j++) {
#ifdef DTRAIN_FASTER_PERCEPTRON
if ((*s)[i].model <= (*s)[j].model) {
#endif