From b9641702ba7aa86e9cc7ed0d4fffa4dd6271cc8f Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Thu, 13 Oct 2011 19:20:20 +0200 Subject: svm impl, faster --- dtrain/dtrain.cc | 140 ++++++++++++++++++++--------------------- dtrain/test/example/dtrain.ini | 8 +-- dtrain/test/toy/dtrain.ini | 13 ++-- dtrain/test/toy/in | 4 +- 4 files changed, 81 insertions(+), 84 deletions(-) (limited to 'dtrain') diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index 5d84f250..25858738 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -6,25 +6,25 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) { po::options_description ini("Configuration File Options"); ini.add_options() - ("input", po::value()->default_value("-"), "input file") - ("output", po::value()->default_value("-"), "output weights file (or VOID)") - ("input_weights", po::value(), "input weights file (e.g. from previous iteration)") - ("decoder_config", po::value(), "configuration file for cdec") - ("k", po::value()->default_value(100), "size of kbest or sample from forest") - ("sample_from", po::value()->default_value("kbest"), "where to get translations from") - ("filter", po::value()->default_value("unique"), "filter kbest list") - ("pair_sampling", po::value()->default_value("all"), "how to sample pairs: all, rand") - ("N", po::value()->default_value(3), "N for Ngrams") - ("epochs", po::value()->default_value(2), "# of iterations T") - ("scorer", po::value()->default_value("stupid_bleu"), "scoring metric") - ("stop_after", po::value()->default_value(0), "stop after X input sentences") - ("print_weights", po::value(), "weights to print on each iteration") - ("hstreaming", po::value()->zero_tokens(), "run in hadoop streaming mode") - ("learning_rate", po::value()->default_value(0.0005), "learning rate") - ("gamma", po::value()->default_value(0.), "gamma for SVM (0 for perceptron)") - ("tmp", po::value()->default_value("/tmp"), "temp dir to use") - ("select_weights", po::value()->default_value("last"), "output 'best' or 'last' weights") - ("noup", po::value()->zero_tokens(), "do not update weights"); + ("input", po::value()->default_value("-"), "input file") + ("output", po::value()->default_value("-"), "output weights file") + ("input_weights", po::value(), "input weights file (e.g. from previous iteration)") + ("decoder_config", po::value(), "configuration file for cdec") + ("k", po::value()->default_value(100), "size of kbest or sample from forest") + ("sample_from", po::value()->default_value("kbest"), "where to get translations from") + ("filter", po::value()->default_value("unique"), "filter kbest list") + ("pair_sampling", po::value()->default_value("all"), "how to sample pairs: all, rand") + ("N", po::value()->default_value(3), "N for Ngrams") + ("epochs", po::value()->default_value(2), "# of iterations T") + ("scorer", po::value()->default_value("stupid_bleu"), "scoring metric") + ("stop_after", po::value()->default_value(0), "stop after X input sentences") + ("print_weights", po::value(), "weights to print on each iteration") + ("hstreaming", po::value()->zero_tokens(), "run in hadoop streaming mode") + ("learning_rate", po::value()->default_value(0.0005), "learning rate") + ("gamma", po::value()->default_value(0), "gamma for SVM (0 for perceptron)") + ("tmp", po::value()->default_value("/tmp"), "temp dir to use") + ("select_weights", po::value()->default_value("last"), "output 'best' or 'last' weights ('VOID' to throw away)") + ("noup", po::value()->zero_tokens(), "do not update weights"); po::options_description cl("Command Line Options"); cl.add_options() ("config,c", po::value(), "dtrain config file") @@ -108,9 +108,9 @@ main(int argc, char** argv) } else if (scorer_str == "stupid_bleu") { scorer = dynamic_cast(new StupidBleuScorer); } else if (scorer_str == "smooth_bleu") { - scorer = dynamic_cast(new SmoothBleuScorer); + scorer = dynamic_cast(new SmoothBleuScorer); } else if (scorer_str == "approx_bleu") { - scorer = dynamic_cast(new StupidBleuScorer); // FIXME + scorer = dynamic_cast(new StupidBleuScorer); // FIXME } else { cerr << "Don't know scoring metric: '" << scorer_str << "', exiting." << endl; exit(1); @@ -138,8 +138,10 @@ main(int argc, char** argv) // meta params for perceptron, SVM double eta = cfg["learning_rate"].as(); double gamma = cfg["gamma"].as(); - lambdas.add_value(FD::Convert("__bias"), 0); + WordID __bias = FD::Convert("__bias"); + lambdas.add_value(__bias, 0); + string output_fn = cfg["output"].as(); // input string input_fn = cfg["input"].as(); ReadFile input(input_fn); @@ -169,8 +171,8 @@ main(int argc, char** argv) cerr << setw(25) << "stop_after " << stop_after << endl; if (cfg.count("input_weights")) cerr << setw(25) << "weights in" << cfg["input_weights"].as() << endl; - cerr << setw(25) << "input " << "'" << cfg["input"].as() << "'" << endl; - cerr << setw(25) << "output " << "'" << cfg["output"].as() << "'" << endl; + cerr << setw(25) << "input " << "'" << input_fn << "'" << endl; + cerr << setw(25) << "output " << "'" << output_fn << "'" << endl; if (sample_from == "kbest") cerr << setw(25) << "filter " << "'" << filter_type << "'" << endl; cerr << setw(25) << "learning rate " << eta << endl; @@ -190,9 +192,9 @@ main(int argc, char** argv) igzstream grammar_buf_in; if (t > 0) grammar_buf_in.open(grammar_buf_fn.c_str()); score_t score_sum = 0., model_sum = 0.; - unsigned ii = 0; + unsigned ii = 0, nup = 0, npairs = 0; if (!quiet) cerr << "Iteration #" << t+1 << " of " << T << "." << endl; - + while(true) { @@ -294,8 +296,7 @@ main(int argc, char** argv) score_sum += (*samples)[0].score; model_sum += (*samples)[0].model; -////////////////////////////////////////////////////////// - // UPDATE WEIGHTS + // weight updates if (!noup) { vector > pairs; if (pair_sampling == "all") @@ -304,45 +305,35 @@ main(int argc, char** argv) sample_rand_pairs(samples, pairs, &rng); if (pair_sampling == "108010") sample108010(samples, pairs); + npairs += pairs.size(); for (vector >::iterator it = pairs.begin(); it != pairs.end(); it++) { - - SparseVector dv; - if (it->first.score - it->second.score < 0) { - dv = it->second.f - it->first.f; - //} else { - //dv = it->first - it->second; - //} - dv.add_value(FD::Convert("__bias"), -1); - - //SparseVector reg; - //reg = lambdas * (2 * gamma); - //dv -= reg; - lambdas += dv * eta; - - if (verbose) { - /*cerr << "{{ f("<< it->first_rank <<") > f(" << it->second_rank << ") but g(i)="<< it->first_score <<" < g(j)="<< it->second_score << " so update" << endl; - cerr << " i " << TD::GetString(samples->sents[ti->first_rank]) << endl; - cerr << " " << samples->feats[ti->first_rank] << endl; - cerr << " j " << TD::GetString(samples->sents[ti->second_rank]) << endl; - cerr << " " << samples->feats[ti->second_rank] << endl; - cerr << " diff vec: " << dv << endl; - cerr << " lambdas after update: " << lambdas << endl; - cerr << "}}" << endl;*/ + if (!gamma) { + // perceptron + if (it->first.score - it->second.score < 0) { // rank error + SparseVector dv = it->second.f - it->first.f; + dv.add_value(__bias, -1); + lambdas.plus_eq_v_times_s(dv, eta); + nup++; } } else { - //SparseVector reg; - //reg = lambdas * (2 * gamma); - //lambdas += reg * (-eta); + // SVM + double rank_error = it->second.score - it->first.score; + if (rank_error > 0) { + SparseVector dv = it->second.f - it->first.f; + dv.add_value(__bias, -1); + lambdas.plus_eq_v_times_s(dv, eta); + } + // regularization + double margin = it->first.model - it->second.model; + if (rank_error || margin < 1) { + lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta); // reg /= #EXAMPLES or #UPDATES ? + nup++; + } } - } - - //double l2 = lambdas.l2norm(); - //if (l2) lambdas /= lambdas.l2norm(); } -////////////////////////////////////////////////////////// ++ii; @@ -369,15 +360,19 @@ main(int argc, char** argv) model_diff = model_avg; } if (!quiet) { - cerr << _p5 << _p << "WEIGHTS" << endl; - for (vector::iterator it = print_weights.begin(); it != print_weights.end(); it++) { - cerr << setw(16) << *it << " = " << lambdas.get(FD::Convert(*it)) << endl; - } - cerr << " ---" << endl; - cerr << _np << " 1best avg score: " << score_avg; - cerr << _p << " (" << score_diff << ")" << endl; - cerr << _np << "1best avg model score: " << model_avg; - cerr << _p << " (" << model_diff << ")" << endl; + cerr << _p5 << _p << "WEIGHTS" << endl; + for (vector::iterator it = print_weights.begin(); it != print_weights.end(); it++) { + cerr << setw(18) << *it << " = " << lambdas.get(FD::Convert(*it)) << endl; + } + cerr << " ---" << endl; + cerr << _np << " 1best avg score: " << score_avg; + cerr << _p << " (" << score_diff << ")" << endl; + cerr << _np << "1best avg model score: " << model_avg; + cerr << _p << " (" << model_diff << ")" << endl; + cerr << " avg #pairs: "; + cerr << _np << npairs/(float)in_sz << endl; + cerr << " avg #up: "; + cerr << nup/(float)in_sz << endl; } pair remember; remember.first = score_avg; @@ -412,9 +407,9 @@ main(int argc, char** argv) unlink(grammar_buf_fn.c_str()); if (!noup) { - if (!quiet) cerr << endl << "writing weights file to '" << cfg["output"].as() << "' ..."; + if (!quiet) cerr << endl << "writing weights file to '" << output_fn << "' ..." << endl; if (select_weights == "last") { // last - WriteFile out(cfg["output"].as()); + WriteFile out(output_fn); ostream& o = *out.stream(); o.precision(17); o << _np; @@ -423,9 +418,10 @@ main(int argc, char** argv) o << FD::Convert(it->first) << '\t' << it->second << endl; } if (hstreaming) cout << "__SHARD_COUNT__\t1" << endl; + } else if (select_weights == "VOID") { // do nothing } else { // best - if (cfg["output"].as() != "-") { - CopyFile(weights_files[best_it], cfg["output"].as()); + if (output_fn != "-") { + CopyFile(weights_files[best_it], output_fn); } else { ReadFile(weights_files[best_it]); string o; diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini index 09c876d9..3e5c2cd1 100644 --- a/dtrain/test/example/dtrain.ini +++ b/dtrain/test/example/dtrain.ini @@ -1,15 +1,15 @@ decoder_config=test/example/cdec.ini k=100 N=3 +gamma=0 +#gamma=0.00001 epochs=4 -#input=test/example/nc-1k.gz input=test/example/nc-1k-tabs.gz scorer=stupid_bleu -output=VOID -#/tmp/weights.gz +output=- stop_after=100 sample_from=kbest pair_sampling=all +select_weights=VOID print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough tmp=/tmp -select_weights=best diff --git a/dtrain/test/toy/dtrain.ini b/dtrain/test/toy/dtrain.ini index 3ab4f8d4..5bfa5b2d 100644 --- a/dtrain/test/toy/dtrain.ini +++ b/dtrain/test/toy/dtrain.ini @@ -1,8 +1,9 @@ -decoder_config=test/toy_example/cdec.ini -ksamples=4 -ngrams=3 +decoder_config=test/toy/cdec.ini +k=4 +N=3 epochs=2 -input=test/toy_example/toy.in -scorer=bleu +input=test/toy/in +scorer=stupid_bleu +sample_from=forest output=- -wprint=logp use_shell use_house PassThrough +print_weights=logp use_shell use_house PassThrough diff --git a/dtrain/test/toy/in b/dtrain/test/toy/in index 63f97158..d7b7d080 100644 --- a/dtrain/test/toy/in +++ b/dtrain/test/toy/in @@ -1,2 +1,2 @@ -0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT_RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT_RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT_RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT_RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT_RULE__ [V] ||| fand ||| found ||| logp=0 -1 ich fand ein grosses haus i found a large house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT_RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT_RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT_RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT_RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT_RULE__ [V] ||| fand ||| found ||| logp=0 +0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 [JJ] ||| kleines ||| small ||| logp=0 [JJ] ||| kleines ||| little ||| logp=0 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0 +1 ich fand ein grosses haus i found a large house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 [JJ] ||| kleines ||| small ||| logp=0 [JJ] ||| kleines ||| little ||| logp=0 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0 -- cgit v1.2.3