summaryrefslogtreecommitdiff
path: root/dtrain
diff options
context:
space:
mode:
Diffstat (limited to 'dtrain')
-rw-r--r--dtrain/dtrain.cc2
-rw-r--r--dtrain/pairsampling.h15
-rw-r--r--dtrain/score.cc6
3 files changed, 20 insertions, 3 deletions
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc
index 88413a1d..7f44d4cf 100644
--- a/dtrain/dtrain.cc
+++ b/dtrain/dtrain.cc
@@ -261,7 +261,7 @@ main(int argc, char** argv)
cerr << setw(25) << "l1 reg " << l1_reg << " '" << cfg["l1_reg"].as<string>() << "'" << endl;
if (rescale)
cerr << setw(25) << "rescale " << rescale << endl;
- cerr << "max pairs " << max_pairs << endl;
+ cerr << setw(25) << "max pairs " << max_pairs << endl;
cerr << setw(25) << "cdec cfg " << "'" << cfg["decoder_config"].as<string>() << "'" << endl;
cerr << setw(25) << "input " << "'" << input_fn << "'" << endl;
#ifdef DTRAIN_LOCAL
diff --git a/dtrain/pairsampling.h b/dtrain/pairsampling.h
index 71c8ae59..84be1efb 100644
--- a/dtrain/pairsampling.h
+++ b/dtrain/pairsampling.h
@@ -23,6 +23,8 @@ all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, sc
{
sort(s->begin(), s->end(), cmp_hyp_by_score_d);
unsigned sz = s->size();
+ bool b = false;
+ unsigned count = 0;
for (unsigned i = 0; i < sz-1; i++) {
for (unsigned j = i+1; j < sz; j++) {
if (threshold > 0) {
@@ -32,7 +34,12 @@ all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, sc
if ((*s)[i].score != (*s)[j].score)
training.push_back(make_pair((*s)[i], (*s)[j]));
}
+ if (++count == max) {
+ b = true;
+ break;
+ }
}
+ if (b) break;
}
}
@@ -53,6 +60,8 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
unsigned sep_hi = sep;
if (sz > 4) while (sep_hi < sz && (*s)[sep_hi-1].score == (*s)[sep_hi].score) ++sep_hi;
else sep_hi = 1;
+ bool b = false;
+ unsigned count = 0;
for (unsigned i = 0; i < sep_hi; i++) {
for (unsigned j = sep_hi; j < sz; j++) {
#ifdef DTRAIN_FASTER_PERCEPTRON
@@ -65,10 +74,15 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
if ((*s)[i].score != (*s)[j].score)
training.push_back(make_pair((*s)[i], (*s)[j]));
}
+ if (++count == max) {
+ b = true;
+ break;
+ }
#ifdef DTRAIN_FASTER_PERCEPTRON
}
#endif
}
+ if (b) break;
}
unsigned sep_lo = sz-sep;
while (sep_lo > 0 && (*s)[sep_lo-1].score == (*s)[sep_lo].score) --sep_lo;
@@ -84,6 +98,7 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
if ((*s)[i].score != (*s)[j].score)
training.push_back(make_pair((*s)[i], (*s)[j]));
}
+ if (++count == max) return;
#ifdef DTRAIN_FASTER_PERCEPTRON
}
#endif
diff --git a/dtrain/score.cc b/dtrain/score.cc
index 5c356c0f..5bb0bcaa 100644
--- a/dtrain/score.cc
+++ b/dtrain/score.cc
@@ -128,6 +128,7 @@ SmoothSingleBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
return brevity_penalty(hyp_len, ref_len) * sum;
}
+// TODO single variants!
/*
* approx. bleu
@@ -136,7 +137,8 @@ SmoothSingleBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
* and Structural Translation Features"
* (Chiang et al. '08)
*
- * NOTE: needs some more code in dtrain.cc
+ * NOTE: Needs some more code in dtrain.cc .
+ * No scaling by src len.
*/
score_t
ApproxBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
@@ -158,7 +160,7 @@ ApproxBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
glob_ref_len_ = discount_ * (glob_ref_len_ + ref_len);
glob_src_len_ = discount_ * (glob_src_len_ + src_len);
}
- return (score_t)glob_src_len_ * score;
+ return score;
}
/*