diff options
author | Patrick Simianer <p@simianer.de> | 2015-02-26 22:12:23 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2015-02-26 22:12:23 +0100 |
commit | b20c6665542cbfa1b4328b349d6912944a1483f2 (patch) | |
tree | ba909fb624c8a735c279788ef102fc24693ef2ae /training/dtrain/dtrain.cc | |
parent | 29ddfafb0dea599965e6a881c25b396a6db2f40f (diff) |
last tweaks & fixes
Diffstat (limited to 'training/dtrain/dtrain.cc')
-rw-r--r-- | training/dtrain/dtrain.cc | 99 |
1 files changed, 54 insertions, 45 deletions
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc index 1b7047b0..63b154b4 100644 --- a/training/dtrain/dtrain.cc +++ b/training/dtrain/dtrain.cc @@ -1,6 +1,6 @@ #include "dtrain.h" -#include "score.h" #include "sample.h" +#include "score.h" #include "update.h" using namespace dtrain; @@ -16,21 +16,20 @@ main(int argc, char** argv) const size_t N = conf["N"].as<size_t>(); const size_t T = conf["iterations"].as<size_t>(); const weight_t eta = conf["learning_rate"].as<weight_t>(); - const weight_t error_margin = conf["error_margin"].as<weight_t>(); + const weight_t margin = conf["margin"].as<weight_t>(); const bool average = conf["average"].as<bool>(); - const bool keep = conf["keep"].as<bool>(); const weight_t l1_reg = conf["l1_reg"].as<weight_t>(); + const bool keep = conf["keep"].as<bool>(); const string output_fn = conf["output"].as<string>(); vector<string> print_weights; - boost::split(print_weights, conf["print_weights"].as<string>(), boost::is_any_of(" ")); + boost::split(print_weights, conf["print_weights"].as<string>(), + boost::is_any_of(" ")); // setup decoder register_feature_functions(); SetSilent(true); - ReadFile f(conf["decoder_config"].as<string>()); + ReadFile f(conf["decoder_conf"].as<string>()); Decoder decoder(f.stream()); - - // setup decoder observer ScoredKbest* observer = new ScoredKbest(k, new PerSentenceBleuScorer(N)); // weights @@ -44,25 +43,29 @@ main(int argc, char** argv) // input string input_fn = conf["bitext"].as<string>(); ReadFile input(input_fn); - vector<string> buf; // source strings (decoder takes only strings) - vector<vector<Ngrams> > buf_ngs; // compute ngrams and lengths of references - vector<vector<size_t> > buf_ls; // just once + vector<string> buf; // decoder only accepts strings as input + vector<vector<Ngrams> > buf_ngs; // compute ngrams and lengths of references + vector<vector<size_t> > buf_ls; // just once size_t input_sz = 0; + cerr << _p4; // output configuration - cerr << _p5 << "dtrain" << endl << "Parameters:" << endl; + cerr << "dtrain" << endl << "Parameters:" << endl; cerr << setw(25) << "k " << k << endl; cerr << setw(25) << "N " << N << endl; cerr << setw(25) << "T " << T << endl; cerr << setw(25) << "learning rate " << eta << endl; - cerr << setw(25) << "error margin " << error_margin << endl; + cerr << setw(25) << "margin " << margin << endl; cerr << setw(25) << "l1 reg " << l1_reg << endl; - cerr << setw(25) << "decoder conf " << "'" << conf["decoder_config"].as<string>() << "'" << endl; + cerr << setw(25) << "decoder conf " << "'" + << conf["decoder_conf"].as<string>() << "'" << endl; cerr << setw(25) << "input " << "'" << input_fn << "'" << endl; cerr << setw(25) << "output " << "'" << output_fn << "'" << endl; - if (conf.count("input_weights")) - cerr << setw(25) << "weights in " << "'" << conf["input_weights"].as<string>() << "'" << endl; - cerr << "(a dot per input)" << endl; + if (conf.count("input_weights")) { + cerr << setw(25) << "weights in " << "'" + << conf["input_weights"].as<string>() << "'" << endl; + } + cerr << "(1 dot per processed input)" << endl; // meta weight_t best=0., gold_prev=0.; @@ -75,7 +78,7 @@ main(int argc, char** argv) time_t start, end; time(&start); weight_t gold_sum=0., model_sum=0.; - size_t i = 0, num_pairs = 0, feature_count = 0, list_sz = 0; + size_t i=0, num_up=0, feature_count=0, list_sz=0; cerr << "Iteration #" << t+1 << " of " << T << "." << endl; @@ -97,9 +100,10 @@ main(int argc, char** argv) buf_ls.push_back({}); for (auto s: parts) { vector<WordID> r; - vector<string> tok; - boost::split(tok, s, boost::is_any_of(" ")); - RegisterAndConvert(tok, r); + vector<string> toks; + boost::split(toks, s, boost::is_any_of(" ")); + for (auto tok: toks) + r.push_back(TD::Convert(tok)); buf_ngs.back().emplace_back(MakeNgrams(r, N)); buf_ls.back().push_back(r.size()); } @@ -109,12 +113,16 @@ main(int argc, char** argv) } // produce some pretty output - if (i == 0 || (i+1)%20==0) - cerr << " "; - cerr << "."; + if (next) { + if (i%20==0) + cerr << " "; + cerr << "."; + if ((i+1)%20==0) + cerr << " " << i+1 << endl; + } else { + cerr << " " << i << endl; + } cerr.flush(); - if (!next) - if (i%20 != 0) cerr << " " << i << endl; // stop iterating if (!next) break; @@ -133,9 +141,8 @@ main(int argc, char** argv) list_sz += observer->GetSize(); // get pairs and update - vector<pair<ScoredHyp,ScoredHyp> > pairs; SparseVector<weight_t> updates; - num_pairs += CollectUpdates(samples, updates, error_margin); + num_up += CollectUpdates(samples, updates, margin); SparseVector<weight_t> lambdas_copy; if (l1_reg) lambdas_copy = lambdas; @@ -147,11 +154,12 @@ main(int argc, char** argv) if (l1_reg) { SparseVector<weight_t>::iterator it = lambdas.begin(); for (; it != lambdas.end(); ++it) { - if (it->second == 0) continue; - if (!lambdas_copy.get(it->first) // new or.. - || lambdas_copy.get(it->first)!=it->second) // updated feature + weight_t v = it->second; + if (!v) + continue; + if (!lambdas_copy.get(it->first) // new or.. + || lambdas_copy.get(it->first)!=v) // updated feature { - weight_t v = it->second; if (v > 0) { it->second = max(0., v - l1_reg); } else { @@ -174,19 +182,19 @@ main(int argc, char** argv) // stats weight_t gold_avg = gold_sum/(weight_t)input_sz; - size_t non_zero = (size_t)lambdas.num_nonzero(); - cerr << _p5 << _p << "WEIGHTS" << endl; + cerr << _p << "WEIGHTS" << endl; for (auto name: print_weights) cerr << setw(18) << name << " = " << lambdas.get(FD::Convert(name)) << endl; cerr << " ---" << endl; - cerr << _np << " 1best avg score: " << gold_avg; - cerr << _p << " (" << gold_avg-gold_prev << ")" << endl; - cerr << _np << " 1best avg model score: " << model_sum/(weight_t)input_sz << endl; - cerr << " avg # pairs: "; - cerr << _np << num_pairs/(float)input_sz << endl; - cerr << " non-0 feature count: " << non_zero << endl; - cerr << " avg list sz: " << list_sz/(float)input_sz << endl; + cerr << _np << " 1best avg score: " << gold_avg*100; + cerr << _p << " (" << (gold_avg-gold_prev)*100 << ")" << endl; + cerr << " 1best avg model score: " + << model_sum/(weight_t)input_sz << endl; + cerr << " avg # updates: "; + cerr << _np << num_up/(float)input_sz << endl; + cerr << " non-0 feature count: " << lambdas.num_nonzero() << endl; cerr << " avg f count: " << feature_count/(float)list_sz << endl; + cerr << " avg list sz: " << list_sz/(float)input_sz << endl; if (gold_avg > best) { best = gold_avg; @@ -197,7 +205,7 @@ main(int argc, char** argv) time (&end); time_t time_diff = difftime(end, start); total_time += time_diff; - cerr << _p2 << _np << "(time " << time_diff/60. << " min, "; + cerr << "(time " << time_diff/60. << " min, "; cerr << time_diff/input_sz << " s/S)" << endl; if (t+1 != T) cerr << endl; @@ -211,15 +219,16 @@ main(int argc, char** argv) // final weights if (average) { - w_average /= (weight_t)T; + w_average /= T; w_average.init_vector(decoder_weights); } else if (!keep) { lambdas.init_vector(decoder_weights); } - Weights::WriteToFile(output_fn, decoder_weights, true); + if (average || !keep) + Weights::WriteToFile(output_fn, decoder_weights, true); - cerr << _p5 << _np << endl << "---" << endl << "Best iteration: "; - cerr << best_iteration+1 << " [GOLD = " << best << "]." << endl; + cerr << endl << "---" << endl << "Best iteration: "; + cerr << best_iteration+1 << " [GOLD = " << best*100 << "]." << endl; cerr << "This took " << total_time/60. << " min." << endl; return 0; |