summaryrefslogtreecommitdiff
path: root/training/dtrain/dtrain.cc
diff options
context:
space:
mode:
Diffstat (limited to 'training/dtrain/dtrain.cc')
-rw-r--r--training/dtrain/dtrain.cc99
1 files changed, 54 insertions, 45 deletions
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index 1b7047b0..63b154b4 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -1,6 +1,6 @@
#include "dtrain.h"
-#include "score.h"
#include "sample.h"
+#include "score.h"
#include "update.h"
using namespace dtrain;
@@ -16,21 +16,20 @@ main(int argc, char** argv)
const size_t N = conf["N"].as<size_t>();
const size_t T = conf["iterations"].as<size_t>();
const weight_t eta = conf["learning_rate"].as<weight_t>();
- const weight_t error_margin = conf["error_margin"].as<weight_t>();
+ const weight_t margin = conf["margin"].as<weight_t>();
const bool average = conf["average"].as<bool>();
- const bool keep = conf["keep"].as<bool>();
const weight_t l1_reg = conf["l1_reg"].as<weight_t>();
+ const bool keep = conf["keep"].as<bool>();
const string output_fn = conf["output"].as<string>();
vector<string> print_weights;
- boost::split(print_weights, conf["print_weights"].as<string>(), boost::is_any_of(" "));
+ boost::split(print_weights, conf["print_weights"].as<string>(),
+ boost::is_any_of(" "));
// setup decoder
register_feature_functions();
SetSilent(true);
- ReadFile f(conf["decoder_config"].as<string>());
+ ReadFile f(conf["decoder_conf"].as<string>());
Decoder decoder(f.stream());
-
- // setup decoder observer
ScoredKbest* observer = new ScoredKbest(k, new PerSentenceBleuScorer(N));
// weights
@@ -44,25 +43,29 @@ main(int argc, char** argv)
// input
string input_fn = conf["bitext"].as<string>();
ReadFile input(input_fn);
- vector<string> buf; // source strings (decoder takes only strings)
- vector<vector<Ngrams> > buf_ngs; // compute ngrams and lengths of references
- vector<vector<size_t> > buf_ls; // just once
+ vector<string> buf; // decoder only accepts strings as input
+ vector<vector<Ngrams> > buf_ngs; // compute ngrams and lengths of references
+ vector<vector<size_t> > buf_ls; // just once
size_t input_sz = 0;
+ cerr << _p4;
// output configuration
- cerr << _p5 << "dtrain" << endl << "Parameters:" << endl;
+ cerr << "dtrain" << endl << "Parameters:" << endl;
cerr << setw(25) << "k " << k << endl;
cerr << setw(25) << "N " << N << endl;
cerr << setw(25) << "T " << T << endl;
cerr << setw(25) << "learning rate " << eta << endl;
- cerr << setw(25) << "error margin " << error_margin << endl;
+ cerr << setw(25) << "margin " << margin << endl;
cerr << setw(25) << "l1 reg " << l1_reg << endl;
- cerr << setw(25) << "decoder conf " << "'" << conf["decoder_config"].as<string>() << "'" << endl;
+ cerr << setw(25) << "decoder conf " << "'"
+ << conf["decoder_conf"].as<string>() << "'" << endl;
cerr << setw(25) << "input " << "'" << input_fn << "'" << endl;
cerr << setw(25) << "output " << "'" << output_fn << "'" << endl;
- if (conf.count("input_weights"))
- cerr << setw(25) << "weights in " << "'" << conf["input_weights"].as<string>() << "'" << endl;
- cerr << "(a dot per input)" << endl;
+ if (conf.count("input_weights")) {
+ cerr << setw(25) << "weights in " << "'"
+ << conf["input_weights"].as<string>() << "'" << endl;
+ }
+ cerr << "(1 dot per processed input)" << endl;
// meta
weight_t best=0., gold_prev=0.;
@@ -75,7 +78,7 @@ main(int argc, char** argv)
time_t start, end;
time(&start);
weight_t gold_sum=0., model_sum=0.;
- size_t i = 0, num_pairs = 0, feature_count = 0, list_sz = 0;
+ size_t i=0, num_up=0, feature_count=0, list_sz=0;
cerr << "Iteration #" << t+1 << " of " << T << "." << endl;
@@ -97,9 +100,10 @@ main(int argc, char** argv)
buf_ls.push_back({});
for (auto s: parts) {
vector<WordID> r;
- vector<string> tok;
- boost::split(tok, s, boost::is_any_of(" "));
- RegisterAndConvert(tok, r);
+ vector<string> toks;
+ boost::split(toks, s, boost::is_any_of(" "));
+ for (auto tok: toks)
+ r.push_back(TD::Convert(tok));
buf_ngs.back().emplace_back(MakeNgrams(r, N));
buf_ls.back().push_back(r.size());
}
@@ -109,12 +113,16 @@ main(int argc, char** argv)
}
// produce some pretty output
- if (i == 0 || (i+1)%20==0)
- cerr << " ";
- cerr << ".";
+ if (next) {
+ if (i%20==0)
+ cerr << " ";
+ cerr << ".";
+ if ((i+1)%20==0)
+ cerr << " " << i+1 << endl;
+ } else {
+ cerr << " " << i << endl;
+ }
cerr.flush();
- if (!next)
- if (i%20 != 0) cerr << " " << i << endl;
// stop iterating
if (!next) break;
@@ -133,9 +141,8 @@ main(int argc, char** argv)
list_sz += observer->GetSize();
// get pairs and update
- vector<pair<ScoredHyp,ScoredHyp> > pairs;
SparseVector<weight_t> updates;
- num_pairs += CollectUpdates(samples, updates, error_margin);
+ num_up += CollectUpdates(samples, updates, margin);
SparseVector<weight_t> lambdas_copy;
if (l1_reg)
lambdas_copy = lambdas;
@@ -147,11 +154,12 @@ main(int argc, char** argv)
if (l1_reg) {
SparseVector<weight_t>::iterator it = lambdas.begin();
for (; it != lambdas.end(); ++it) {
- if (it->second == 0) continue;
- if (!lambdas_copy.get(it->first) // new or..
- || lambdas_copy.get(it->first)!=it->second) // updated feature
+ weight_t v = it->second;
+ if (!v)
+ continue;
+ if (!lambdas_copy.get(it->first) // new or..
+ || lambdas_copy.get(it->first)!=v) // updated feature
{
- weight_t v = it->second;
if (v > 0) {
it->second = max(0., v - l1_reg);
} else {
@@ -174,19 +182,19 @@ main(int argc, char** argv)
// stats
weight_t gold_avg = gold_sum/(weight_t)input_sz;
- size_t non_zero = (size_t)lambdas.num_nonzero();
- cerr << _p5 << _p << "WEIGHTS" << endl;
+ cerr << _p << "WEIGHTS" << endl;
for (auto name: print_weights)
cerr << setw(18) << name << " = " << lambdas.get(FD::Convert(name)) << endl;
cerr << " ---" << endl;
- cerr << _np << " 1best avg score: " << gold_avg;
- cerr << _p << " (" << gold_avg-gold_prev << ")" << endl;
- cerr << _np << " 1best avg model score: " << model_sum/(weight_t)input_sz << endl;
- cerr << " avg # pairs: ";
- cerr << _np << num_pairs/(float)input_sz << endl;
- cerr << " non-0 feature count: " << non_zero << endl;
- cerr << " avg list sz: " << list_sz/(float)input_sz << endl;
+ cerr << _np << " 1best avg score: " << gold_avg*100;
+ cerr << _p << " (" << (gold_avg-gold_prev)*100 << ")" << endl;
+ cerr << " 1best avg model score: "
+ << model_sum/(weight_t)input_sz << endl;
+ cerr << " avg # updates: ";
+ cerr << _np << num_up/(float)input_sz << endl;
+ cerr << " non-0 feature count: " << lambdas.num_nonzero() << endl;
cerr << " avg f count: " << feature_count/(float)list_sz << endl;
+ cerr << " avg list sz: " << list_sz/(float)input_sz << endl;
if (gold_avg > best) {
best = gold_avg;
@@ -197,7 +205,7 @@ main(int argc, char** argv)
time (&end);
time_t time_diff = difftime(end, start);
total_time += time_diff;
- cerr << _p2 << _np << "(time " << time_diff/60. << " min, ";
+ cerr << "(time " << time_diff/60. << " min, ";
cerr << time_diff/input_sz << " s/S)" << endl;
if (t+1 != T) cerr << endl;
@@ -211,15 +219,16 @@ main(int argc, char** argv)
// final weights
if (average) {
- w_average /= (weight_t)T;
+ w_average /= T;
w_average.init_vector(decoder_weights);
} else if (!keep) {
lambdas.init_vector(decoder_weights);
}
- Weights::WriteToFile(output_fn, decoder_weights, true);
+ if (average || !keep)
+ Weights::WriteToFile(output_fn, decoder_weights, true);
- cerr << _p5 << _np << endl << "---" << endl << "Best iteration: ";
- cerr << best_iteration+1 << " [GOLD = " << best << "]." << endl;
+ cerr << endl << "---" << endl << "Best iteration: ";
+ cerr << best_iteration+1 << " [GOLD = " << best*100 << "]." << endl;
cerr << "This took " << total_time/60. << " min." << endl;
return 0;