summaryrefslogtreecommitdiff
path: root/training
diff options
context:
space:
mode:
Diffstat (limited to 'training')
-rw-r--r--training/dtrain/dtrain.cc20
1 files changed, 10 insertions, 10 deletions
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index ae5b630a..4f29c499 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -12,7 +12,7 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
{
po::options_description ini("Configuration File Options");
ini.add_options()
- ("bitext,b", po::value<string>(), "bitext: 'src ||| tgt ||| tgt ||| ...'")
+ ("bitext,b", po::value<string>(), "bitext: 'src ||| tgt ||| tgt ||| ...'")
("output", po::value<string>()->default_value("-"), "output weights file, '-' for STDOUT")
("input_weights", po::value<string>(), "input weights file (e.g. from previous iteration)")
("decoder_config", po::value<string>(), "configuration file for cdec")
@@ -31,12 +31,11 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
("learning_rate", po::value<weight_t>()->default_value(1.0), "learning rate")
("gamma", po::value<weight_t>()->default_value(0.), "gamma for SVM (0 for perceptron)")
("select_weights", po::value<string>()->default_value("last"), "output best, last, avg weights ('VOID' to throw away)")
- ("rescale", po::value<bool>()->zero_tokens(), "rescale weight vector after each input")
- ("l1_reg", po::value<string>()->default_value("none"), "apply l1 regularization as in 'Tsuroka et al' (2010) UNTESTED")
+ ("rescale", po::value<bool>()->zero_tokens(), "(re)scale data and weight vector to unit length")
+ ("l1_reg", po::value<string>()->default_value("none"), "apply l1 regularization as in 'Tsuroka et al' (2010)")
("l1_reg_strength", po::value<weight_t>(), "l1 regularization strength")
("fselect", po::value<weight_t>()->default_value(-1), "select top x percent (or by threshold) of features after each epoch NOT IMPLEMENTED") // TODO
("approx_bleu_d", po::value<score_t>()->default_value(0.9), "discount for approx. BLEU")
- ("scale_bleu_diff", po::value<bool>()->zero_tokens(), "learning rate <- bleu diff of a misranked pair")
("loss_margin", po::value<weight_t>()->default_value(0.), "update if no error in pref pair but model scores this near")
("max_pairs", po::value<unsigned>()->default_value(std::numeric_limits<unsigned>::max()), "max. # of pairs per Sent.")
("pclr", po::value<string>()->default_value("no"), "use a (simple|adagrad) per-coordinate learning rate")
@@ -106,6 +105,7 @@ main(int argc, char** argv)
// handle most parameters
po::variables_map cfg;
if (!dtrain_init(argc, argv, &cfg)) exit(1); // something is wrong
+
bool quiet = false;
if (cfg.count("quiet")) quiet = true;
bool verbose = false;
@@ -139,8 +139,6 @@ main(int argc, char** argv)
bool batch = false;
if (cfg.count("batch")) batch = true;
if (loss_margin > 9998.) loss_margin = std::numeric_limits<float>::max();
- bool scale_bleu_diff = false;
- if (cfg.count("scale_bleu_diff")) scale_bleu_diff = true;
const string pclr = cfg["pclr"].as<string>();
bool average = false;
if (select_weights == "avg")
@@ -260,8 +258,7 @@ main(int argc, char** argv)
cerr << setw(25) << "sample from " << "'" << sample_from << "'" << endl;
if (sample_from == "kbest")
cerr << setw(25) << "filter " << "'" << filter_type << "'" << endl;
- if (!scale_bleu_diff) cerr << setw(25) << "learning rate " << eta << endl;
- else cerr << setw(25) << "learning rate " << "bleu diff" << endl;
+ cerr << setw(25) << "learning rate " << eta << endl;
cerr << setw(25) << "gamma " << gamma << endl;
cerr << setw(25) << "loss margin " << loss_margin << endl;
cerr << setw(25) << "faster perceptron " << faster_perceptron << endl;
@@ -438,6 +435,10 @@ main(int argc, char** argv)
for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin();
it != pairs.end(); it++) {
+ if (rescale) {
+ it->first.f /= it->first.f.l2norm();
+ it->second.f /= it->second.f.l2norm();
+ }
score_t model_diff = it->first.model - it->second.model;
score_t loss = max(0.0, -1.0 * model_diff);
losses.push_back(loss);
@@ -475,7 +476,6 @@ main(int argc, char** argv)
if (!rank_error && margin < loss_margin) margin_violations++;
}
if (rank_error && ki==0) rank_errors++;
- if (scale_bleu_diff) eta = it->first.score - it->second.score;
if (rank_error || margin < loss_margin) {
SparseVector<weight_t> diff_vec = it->first.f - it->second.f;
if (batch) {
@@ -673,7 +673,7 @@ main(int argc, char** argv)
if (!noup) {
if (!quiet) cerr << endl << "Writing weights file to '" << output_fn << "' ..." << endl;
if (select_weights == "last" || average) { // last, average
- WriteFile of(output_fn); // works with '-'
+ WriteFile of(output_fn);
ostream& o = *of.stream();
o.precision(17);
o << _np;