From aaeb2dec23ff9257a9fc86ba49ee8d97f18138cd Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Thu, 27 Oct 2011 15:08:08 +0200 Subject: fix --- dtrain/dtrain.cc | 89 ++++++++++++++++-------------------------- dtrain/dtrain.h | 9 +++++ dtrain/test/example/dtrain.ini | 12 +++--- 3 files changed, 48 insertions(+), 62 deletions(-) (limited to 'dtrain') diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index 05c3728d..27315358 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -7,21 +7,21 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) po::options_description ini("Configuration File Options"); ini.add_options() ("input", po::value()->default_value("-"), "input file") - ("output", po::value()->default_value("-"), "output weights file") + ("output", po::value()->default_value("-"), "output weights file, '-' for STDOUT") ("input_weights", po::value(), "input weights file (e.g. from previous iteration)") ("decoder_config", po::value(), "configuration file for cdec") - ("k", po::value()->default_value(100), "size of kbest or sample from forest") - ("sample_from", po::value()->default_value("kbest"), "where to get translations from") - ("filter", po::value()->default_value("unique"), "filter kbest list") - ("pair_sampling", po::value()->default_value("all"), "how to sample pairs: all, rand") - ("N", po::value()->default_value(3), "N for Ngrams") - ("epochs", po::value()->default_value(2), "# of iterations T") - ("scorer", po::value()->default_value("stupid_bleu"), "scoring metric") + ("sample_from", po::value()->default_value("kbest"), "where to sample translations from: kbest, forest") + ("k", po::value()->default_value(100), "how many translations to sample") + ("filter", po::value()->default_value("unique"), "filter kbest list: no, unique") + ("pair_sampling", po::value()->default_value("all"), "how to sample pairs: all, rand, 108010") + ("N", po::value()->default_value(3), "N for Ngrams (BLEU)") + ("epochs", po::value()->default_value(2), "# of iterations T (per shard)") + ("scorer", po::value()->default_value("stupid_bleu"), "scoring: bleu, stupid_*, smooth_*, approx_*") ("stop_after", po::value()->default_value(0), "stop after X input sentences") ("print_weights", po::value(), "weights to print on each iteration") ("hstreaming", po::value()->zero_tokens(), "run in hadoop streaming mode") - ("learning_rate", po::value()->default_value(0.0005), "learning rate") - ("gamma", po::value()->default_value(0), "gamma for SVM (0 for perceptron)") + ("learning_rate", po::value()->default_value(0.0005), "learning rate") + ("gamma", po::value()->default_value(0), "gamma for SVM (0 for perceptron)") ("tmp", po::value()->default_value("/tmp"), "temp dir to use") ("select_weights", po::value()->default_value("last"), "output 'best' or 'last' weights ('VOID' to throw away)") ("noup", po::value()->zero_tokens(), "do not update weights"); @@ -142,8 +142,6 @@ main(int argc, char** argv) // meta params for perceptron, SVM weight_t eta = cfg["learning_rate"].as(); weight_t gamma = cfg["gamma"].as(); - WordID __bias = FD::Convert("__bias"); - lambdas.add_value(__bias, 0); string output_fn = cfg["output"].as(); // input @@ -158,7 +156,7 @@ main(int argc, char** argv) ogzstream grammar_buf_out; grammar_buf_out.open(grammar_buf_fn.c_str()); - unsigned in_sz = 999999999; // input index, input size + unsigned in_sz = UINT_MAX; // input index, input size vector > all_scores; score_t max_score = 0.; unsigned best_it = 0; @@ -286,23 +284,18 @@ main(int argc, char** argv) // get (scored) samples vector* samples = observer->GetSamples(); - // FIXME - /*if (verbose) { - cout << "[ref: '"; - if (t > 0) cout << ref_ids_buf[ii]; <--- - else cout << ref_ids; - cout << endl; - cout << _p5 << _np << "1best: " << "'" << (*samples)[0].w << "'" << endl; - cout << "SCORE=" << (*samples)[0].score << ",model="<< (*samples)[0].model << endl; - cout << "F{" << (*samples)[0].f << "} ]" << endl << endl; - }*/ - /*cout << lambdas.get(FD::Convert("PhraseModel_0")) << endl; - cout << (*samples)[0].model << endl; - cout << "1best: "; - for (unsigned u = 0; u < (*samples)[0].w.size(); u++) cout << TD::Convert((*samples)[0].w[u]) << " "; - cout << endl; - cout << (*samples)[0].f << endl; - cout << "___" << endl;*/ + if (verbose) { + cerr << "--- ref for " << ii << " "; + if (t > 0) printWordIDVec(ref_ids_buf[ii]); + else printWordIDVec(ref_ids); + for (unsigned u = 0; u < samples->size(); u++) { + cerr << _p5 << _np << "[" << u << ". '"; + printWordIDVec((*samples)[u].w); + cerr << "'" << endl; + cerr << "SCORE=" << (*samples)[0].score << ",model="<< (*samples)[0].model << endl; + cerr << "F{" << (*samples)[0].f << "} ]" << endl << endl; + } + } score_sum += (*samples)[0].score; model_sum += (*samples)[0].model; @@ -320,43 +313,28 @@ main(int argc, char** argv) for (vector >::iterator it = pairs.begin(); it != pairs.end(); it++) { + score_t rank_error = it->second.score - it->first.score; if (!gamma) { // perceptron - if (it->first.score - it->second.score < 0) { // rank error - SparseVector dv = it->second.f - it->first.f; - dv.add_value(__bias, -1); - lambdas.plus_eq_v_times_s(dv, eta); + if (rank_error > 0) { + SparseVector diff_vec = it->second.f - it->first.f; + lambdas.plus_eq_v_times_s(diff_vec, eta); nup++; } } else { // SVM - score_t rank_error = it->second.score - it->first.score; - if (rank_error > 0) { - SparseVector dv = it->second.f - it->first.f; - dv.add_value(__bias, -1); - lambdas.plus_eq_v_times_s(dv, eta); - } - // regularization score_t margin = it->first.model - it->second.model; - if (rank_error || margin < 1) { - lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta); // reg /= #EXAMPLES or #UPDATES ? + if (rank_error > 0 || margin < 1) { + SparseVector diff_vec = it->second.f - it->first.f; + lambdas.plus_eq_v_times_s(diff_vec, eta); nup++; } + // regularization + lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta*(1./npairs)); } } } - // DEBUG - vector x; - lambdas.init_vector(&x); - //cout << "[" << ii << "]" << endl; - for (int jj = 0; jj < x.size(); jj++) { - //if (x[jj] != 0) - //cout << FD::Convert(jj) << " " << x[jj] << endl; - } - //cout << " --- " << endl; - // /DEBUG - ++ii; if (hstreaming) cerr << "reporter:counter:dtrain,sid," << ii << endl; @@ -375,8 +353,7 @@ main(int argc, char** argv) // print some stats score_t score_avg = score_sum/(score_t)in_sz; score_t model_avg = model_sum/(score_t)in_sz; - score_t score_diff; - score_t model_diff; + score_t score_diff, model_diff; if (t > 0) { score_diff = score_avg - all_scores[t-1].first; model_diff = model_avg - all_scores[t-1].second; diff --git a/dtrain/dtrain.h b/dtrain/dtrain.h index 7c1509e4..f4d32ecb 100644 --- a/dtrain/dtrain.h +++ b/dtrain/dtrain.h @@ -3,6 +3,7 @@ #include +#include #include #include @@ -58,5 +59,13 @@ inline ostream& _p2(ostream& out) { return out << setprecision(2); } inline ostream& _p5(ostream& out) { return out << setprecision(5); } inline ostream& _p9(ostream& out) { return out << setprecision(9); } +inline void printWordIDVec(vector& v) +{ + for (unsigned i = 0; i < v.size(); i++) { + cerr << TD::Convert(v[i]); + if (i < v.size()-1) cerr << " "; + } +} + #endif diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini index 185d6d90..40f8e03f 100644 --- a/dtrain/test/example/dtrain.ini +++ b/dtrain/test/example/dtrain.ini @@ -1,14 +1,14 @@ decoder_config=test/example/cdec.ini k=100 N=3 -gamma=0 -epochs=5 +gamma=0.001 +epochs=20 input=test/example/nc-1k-tabs.gz scorer=stupid_bleu -output=- -stop_after=100 -sample_from=kbest -pair_sampling=all +output=weights.gz +#stop_after=100 +sample_from=forest +pair_sampling=108010 select_weights=VOID print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough tmp=/tmp -- cgit v1.2.3