summaryrefslogtreecommitdiff
path: root/dtrain/dtrain.cc
diff options
context:
space:
mode:
Diffstat (limited to 'dtrain/dtrain.cc')
-rw-r--r--dtrain/dtrain.cc22
1 files changed, 18 insertions, 4 deletions
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc
index 8b1fc953..717d47a2 100644
--- a/dtrain/dtrain.cc
+++ b/dtrain/dtrain.cc
@@ -33,6 +33,7 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
("fselect", po::value<weight_t>()->default_value(-1), "TODO select top x percent (or by threshold) of features after each epoch")
("approx_bleu_d", po::value<score_t>()->default_value(0.9), "discount for approx. BLEU")
("scale_bleu_diff", po::value<bool>()->zero_tokens(), "learning rate <- bleu diff of a misranked pair")
+ ("loss_margin", po::value<weight_t>()->default_value(0.), "update if no error in pref pair but model scores this near")
#ifdef DTRAIN_LOCAL
("refs,r", po::value<string>(), "references in local mode")
#endif
@@ -134,6 +135,8 @@ main(int argc, char** argv)
const string select_weights = cfg["select_weights"].as<string>();
const float hi_lo = cfg["hi_lo"].as<float>();
const score_t approx_bleu_d = cfg["approx_bleu_d"].as<score_t>();
+ weight_t loss_margin = cfg["loss_margin"].as<weight_t>();
+ if (loss_margin > 9998.) loss_margin = std::numeric_limits<float>::max();
bool scale_bleu_diff = false;
if (cfg.count("scale_bleu_diff")) scale_bleu_diff = true;
bool average = false;
@@ -160,6 +163,8 @@ main(int argc, char** argv)
scorer = dynamic_cast<StupidBleuScorer*>(new StupidBleuScorer);
} else if (scorer_str == "smooth_bleu") {
scorer = dynamic_cast<SmoothBleuScorer*>(new SmoothBleuScorer);
+ } else if (scorer_str == "smooth_single_bleu") {
+ scorer = dynamic_cast<SmoothSingleBleuScorer*>(new SmoothSingleBleuScorer);
} else if (scorer_str == "approx_bleu") {
scorer = dynamic_cast<ApproxBleuScorer*>(new ApproxBleuScorer(N, approx_bleu_d));
} else {
@@ -220,7 +225,7 @@ main(int argc, char** argv)
grammar_buf_out.open(grammar_buf_fn.c_str());
#endif
- unsigned in_sz = UINT_MAX; // input index, input size
+ unsigned in_sz = std::numeric_limits<unsigned>::max(); // input index, input size
vector<pair<score_t, score_t> > all_scores;
score_t max_score = 0.;
unsigned best_it = 0;
@@ -242,6 +247,7 @@ main(int argc, char** argv)
if (!scale_bleu_diff) cerr << setw(25) << "learning rate " << eta << endl;
else cerr << setw(25) << "learning rate " << "bleu diff" << endl;
cerr << setw(25) << "gamma " << gamma << endl;
+ cerr << setw(25) << "loss margin " << loss_margin << endl;
cerr << setw(25) << "pairs " << "'" << pair_sampling << "'" << endl;
if (pair_sampling == "XYX")
cerr << setw(25) << "hi lo " << hi_lo << endl;
@@ -424,12 +430,18 @@ main(int argc, char** argv)
for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin();
it != pairs.end(); it++) {
+#ifdef DTRAIN_FASTER_PERCEPTRON
+ bool rank_error = true; // pair filtering already did this for us
+ rank_errors++;
+ score_t margin = std::numeric_limits<float>::max();
+#else
bool rank_error = it->first.model <= it->second.model;
if (rank_error) rank_errors++;
- score_t margin = fabs(it->first.model - it->second.model);
- if (!rank_error && margin < 1) margin_violations++;
+ score_t margin = fabs(fabs(it->first.model) - fabs(it->second.model));
+ if (!rank_error && margin < loss_margin) margin_violations++;
+#endif
if (scale_bleu_diff) eta = it->first.score - it->second.score;
- if (rank_error || (gamma && margin<1)) {
+ if (rank_error || margin < loss_margin) {
SparseVector<weight_t> diff_vec = it->first.f - it->second.f;
lambdas.plus_eq_v_times_s(diff_vec, eta);
if (gamma)
@@ -534,8 +546,10 @@ main(int argc, char** argv)
cerr << _np << npairs/(float)in_sz << endl;
cerr << " avg # rank err: ";
cerr << rank_errors/(float)in_sz << endl;
+#ifndef DTRAIN_FASTER_PERCEPTRON
cerr << " avg # margin viol: ";
cerr << margin_violations/(float)in_sz << endl;
+#endif
cerr << " non0 feature count: " << nonz << endl;
cerr << " avg list sz: " << list_sz/(float)in_sz << endl;
cerr << " avg f count: " << f_count/(float)list_sz << endl;