diff options
author | Patrick Simianer <p@simianer.de> | 2011-10-13 19:20:20 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2011-10-13 19:20:20 +0200 |
commit | c7735ab60e22bfec7245dc7af7f14b74459dada8 (patch) | |
tree | 52e08c54b0709ac98100265b2eab223f79686cd9 | |
parent | 3e8f5cd6191b700df08867e3eb0b8c03d2324fe3 (diff) |
svm impl, faster
-rw-r--r-- | dtrain/dtrain.cc | 140 | ||||
-rw-r--r-- | dtrain/test/example/dtrain.ini | 8 | ||||
-rw-r--r-- | dtrain/test/toy/dtrain.ini | 13 | ||||
-rw-r--r-- | dtrain/test/toy/in | 4 | ||||
-rw-r--r-- | utils/fast_sparse_vector.h | 8 |
5 files changed, 89 insertions, 84 deletions
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index 5d84f250..25858738 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -6,25 +6,25 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) { po::options_description ini("Configuration File Options"); ini.add_options() - ("input", po::value<string>()->default_value("-"), "input file") - ("output", po::value<string>()->default_value("-"), "output weights file (or VOID)") - ("input_weights", po::value<string>(), "input weights file (e.g. from previous iteration)") - ("decoder_config", po::value<string>(), "configuration file for cdec") - ("k", po::value<unsigned>()->default_value(100), "size of kbest or sample from forest") - ("sample_from", po::value<string>()->default_value("kbest"), "where to get translations from") - ("filter", po::value<string>()->default_value("unique"), "filter kbest list") - ("pair_sampling", po::value<string>()->default_value("all"), "how to sample pairs: all, rand") - ("N", po::value<unsigned>()->default_value(3), "N for Ngrams") - ("epochs", po::value<unsigned>()->default_value(2), "# of iterations T") - ("scorer", po::value<string>()->default_value("stupid_bleu"), "scoring metric") - ("stop_after", po::value<unsigned>()->default_value(0), "stop after X input sentences") - ("print_weights", po::value<string>(), "weights to print on each iteration") - ("hstreaming", po::value<bool>()->zero_tokens(), "run in hadoop streaming mode") - ("learning_rate", po::value<double>()->default_value(0.0005), "learning rate") - ("gamma", po::value<double>()->default_value(0.), "gamma for SVM (0 for perceptron)") - ("tmp", po::value<string>()->default_value("/tmp"), "temp dir to use") - ("select_weights", po::value<string>()->default_value("last"), "output 'best' or 'last' weights") - ("noup", po::value<bool>()->zero_tokens(), "do not update weights"); + ("input", po::value<string>()->default_value("-"), "input file") + ("output", po::value<string>()->default_value("-"), "output weights file") + ("input_weights", po::value<string>(), "input weights file (e.g. from previous iteration)") + ("decoder_config", po::value<string>(), "configuration file for cdec") + ("k", po::value<unsigned>()->default_value(100), "size of kbest or sample from forest") + ("sample_from", po::value<string>()->default_value("kbest"), "where to get translations from") + ("filter", po::value<string>()->default_value("unique"), "filter kbest list") + ("pair_sampling", po::value<string>()->default_value("all"), "how to sample pairs: all, rand") + ("N", po::value<unsigned>()->default_value(3), "N for Ngrams") + ("epochs", po::value<unsigned>()->default_value(2), "# of iterations T") + ("scorer", po::value<string>()->default_value("stupid_bleu"), "scoring metric") + ("stop_after", po::value<unsigned>()->default_value(0), "stop after X input sentences") + ("print_weights", po::value<string>(), "weights to print on each iteration") + ("hstreaming", po::value<bool>()->zero_tokens(), "run in hadoop streaming mode") + ("learning_rate", po::value<double>()->default_value(0.0005), "learning rate") + ("gamma", po::value<double>()->default_value(0), "gamma for SVM (0 for perceptron)") + ("tmp", po::value<string>()->default_value("/tmp"), "temp dir to use") + ("select_weights", po::value<string>()->default_value("last"), "output 'best' or 'last' weights ('VOID' to throw away)") + ("noup", po::value<bool>()->zero_tokens(), "do not update weights"); po::options_description cl("Command Line Options"); cl.add_options() ("config,c", po::value<string>(), "dtrain config file") @@ -108,9 +108,9 @@ main(int argc, char** argv) } else if (scorer_str == "stupid_bleu") { scorer = dynamic_cast<StupidBleuScorer*>(new StupidBleuScorer); } else if (scorer_str == "smooth_bleu") { - scorer = dynamic_cast<SmoothBleuScorer*>(new SmoothBleuScorer); + scorer = dynamic_cast<SmoothBleuScorer*>(new SmoothBleuScorer); } else if (scorer_str == "approx_bleu") { - scorer = dynamic_cast<StupidBleuScorer*>(new StupidBleuScorer); // FIXME + scorer = dynamic_cast<StupidBleuScorer*>(new StupidBleuScorer); // FIXME } else { cerr << "Don't know scoring metric: '" << scorer_str << "', exiting." << endl; exit(1); @@ -138,8 +138,10 @@ main(int argc, char** argv) // meta params for perceptron, SVM double eta = cfg["learning_rate"].as<double>(); double gamma = cfg["gamma"].as<double>(); - lambdas.add_value(FD::Convert("__bias"), 0); + WordID __bias = FD::Convert("__bias"); + lambdas.add_value(__bias, 0); + string output_fn = cfg["output"].as<string>(); // input string input_fn = cfg["input"].as<string>(); ReadFile input(input_fn); @@ -169,8 +171,8 @@ main(int argc, char** argv) cerr << setw(25) << "stop_after " << stop_after << endl; if (cfg.count("input_weights")) cerr << setw(25) << "weights in" << cfg["input_weights"].as<string>() << endl; - cerr << setw(25) << "input " << "'" << cfg["input"].as<string>() << "'" << endl; - cerr << setw(25) << "output " << "'" << cfg["output"].as<string>() << "'" << endl; + cerr << setw(25) << "input " << "'" << input_fn << "'" << endl; + cerr << setw(25) << "output " << "'" << output_fn << "'" << endl; if (sample_from == "kbest") cerr << setw(25) << "filter " << "'" << filter_type << "'" << endl; cerr << setw(25) << "learning rate " << eta << endl; @@ -190,9 +192,9 @@ main(int argc, char** argv) igzstream grammar_buf_in; if (t > 0) grammar_buf_in.open(grammar_buf_fn.c_str()); score_t score_sum = 0., model_sum = 0.; - unsigned ii = 0; + unsigned ii = 0, nup = 0, npairs = 0; if (!quiet) cerr << "Iteration #" << t+1 << " of " << T << "." << endl; - + while(true) { @@ -294,8 +296,7 @@ main(int argc, char** argv) score_sum += (*samples)[0].score; model_sum += (*samples)[0].model; -////////////////////////////////////////////////////////// - // UPDATE WEIGHTS + // weight updates if (!noup) { vector<pair<ScoredHyp,ScoredHyp> > pairs; if (pair_sampling == "all") @@ -304,45 +305,35 @@ main(int argc, char** argv) sample_rand_pairs(samples, pairs, &rng); if (pair_sampling == "108010") sample108010(samples, pairs); + npairs += pairs.size(); for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin(); it != pairs.end(); it++) { - - SparseVector<double> dv; - if (it->first.score - it->second.score < 0) { - dv = it->second.f - it->first.f; - //} else { - //dv = it->first - it->second; - //} - dv.add_value(FD::Convert("__bias"), -1); - - //SparseVector<double> reg; - //reg = lambdas * (2 * gamma); - //dv -= reg; - lambdas += dv * eta; - - if (verbose) { - /*cerr << "{{ f("<< it->first_rank <<") > f(" << it->second_rank << ") but g(i)="<< it->first_score <<" < g(j)="<< it->second_score << " so update" << endl; - cerr << " i " << TD::GetString(samples->sents[ti->first_rank]) << endl; - cerr << " " << samples->feats[ti->first_rank] << endl; - cerr << " j " << TD::GetString(samples->sents[ti->second_rank]) << endl; - cerr << " " << samples->feats[ti->second_rank] << endl; - cerr << " diff vec: " << dv << endl; - cerr << " lambdas after update: " << lambdas << endl; - cerr << "}}" << endl;*/ + if (!gamma) { + // perceptron + if (it->first.score - it->second.score < 0) { // rank error + SparseVector<double> dv = it->second.f - it->first.f; + dv.add_value(__bias, -1); + lambdas.plus_eq_v_times_s(dv, eta); + nup++; } } else { - //SparseVector<double> reg; - //reg = lambdas * (2 * gamma); - //lambdas += reg * (-eta); + // SVM + double rank_error = it->second.score - it->first.score; + if (rank_error > 0) { + SparseVector<double> dv = it->second.f - it->first.f; + dv.add_value(__bias, -1); + lambdas.plus_eq_v_times_s(dv, eta); + } + // regularization + double margin = it->first.model - it->second.model; + if (rank_error || margin < 1) { + lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta); // reg /= #EXAMPLES or #UPDATES ? + nup++; + } } - } - - //double l2 = lambdas.l2norm(); - //if (l2) lambdas /= lambdas.l2norm(); } -////////////////////////////////////////////////////////// ++ii; @@ -369,15 +360,19 @@ main(int argc, char** argv) model_diff = model_avg; } if (!quiet) { - cerr << _p5 << _p << "WEIGHTS" << endl; - for (vector<string>::iterator it = print_weights.begin(); it != print_weights.end(); it++) { - cerr << setw(16) << *it << " = " << lambdas.get(FD::Convert(*it)) << endl; - } - cerr << " ---" << endl; - cerr << _np << " 1best avg score: " << score_avg; - cerr << _p << " (" << score_diff << ")" << endl; - cerr << _np << "1best avg model score: " << model_avg; - cerr << _p << " (" << model_diff << ")" << endl; + cerr << _p5 << _p << "WEIGHTS" << endl; + for (vector<string>::iterator it = print_weights.begin(); it != print_weights.end(); it++) { + cerr << setw(18) << *it << " = " << lambdas.get(FD::Convert(*it)) << endl; + } + cerr << " ---" << endl; + cerr << _np << " 1best avg score: " << score_avg; + cerr << _p << " (" << score_diff << ")" << endl; + cerr << _np << "1best avg model score: " << model_avg; + cerr << _p << " (" << model_diff << ")" << endl; + cerr << " avg #pairs: "; + cerr << _np << npairs/(float)in_sz << endl; + cerr << " avg #up: "; + cerr << nup/(float)in_sz << endl; } pair<score_t,score_t> remember; remember.first = score_avg; @@ -412,9 +407,9 @@ main(int argc, char** argv) unlink(grammar_buf_fn.c_str()); if (!noup) { - if (!quiet) cerr << endl << "writing weights file to '" << cfg["output"].as<string>() << "' ..."; + if (!quiet) cerr << endl << "writing weights file to '" << output_fn << "' ..." << endl; if (select_weights == "last") { // last - WriteFile out(cfg["output"].as<string>()); + WriteFile out(output_fn); ostream& o = *out.stream(); o.precision(17); o << _np; @@ -423,9 +418,10 @@ main(int argc, char** argv) o << FD::Convert(it->first) << '\t' << it->second << endl; } if (hstreaming) cout << "__SHARD_COUNT__\t1" << endl; + } else if (select_weights == "VOID") { // do nothing } else { // best - if (cfg["output"].as<string>() != "-") { - CopyFile(weights_files[best_it], cfg["output"].as<string>()); + if (output_fn != "-") { + CopyFile(weights_files[best_it], output_fn); } else { ReadFile(weights_files[best_it]); string o; diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini index 09c876d9..3e5c2cd1 100644 --- a/dtrain/test/example/dtrain.ini +++ b/dtrain/test/example/dtrain.ini @@ -1,15 +1,15 @@ decoder_config=test/example/cdec.ini k=100 N=3 +gamma=0 +#gamma=0.00001 epochs=4 -#input=test/example/nc-1k.gz input=test/example/nc-1k-tabs.gz scorer=stupid_bleu -output=VOID -#/tmp/weights.gz +output=- stop_after=100 sample_from=kbest pair_sampling=all +select_weights=VOID print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough tmp=/tmp -select_weights=best diff --git a/dtrain/test/toy/dtrain.ini b/dtrain/test/toy/dtrain.ini index 3ab4f8d4..5bfa5b2d 100644 --- a/dtrain/test/toy/dtrain.ini +++ b/dtrain/test/toy/dtrain.ini @@ -1,8 +1,9 @@ -decoder_config=test/toy_example/cdec.ini -ksamples=4 -ngrams=3 +decoder_config=test/toy/cdec.ini +k=4 +N=3 epochs=2 -input=test/toy_example/toy.in -scorer=bleu +input=test/toy/in +scorer=stupid_bleu +sample_from=forest output=- -wprint=logp use_shell use_house PassThrough +print_weights=logp use_shell use_house PassThrough diff --git a/dtrain/test/toy/in b/dtrain/test/toy/in index 63f97158..d7b7d080 100644 --- a/dtrain/test/toy/in +++ b/dtrain/test/toy/in @@ -1,2 +1,2 @@ -0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT_RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT_RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT_RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT_RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT_RULE__ [V] ||| fand ||| found ||| logp=0 -1 ich fand ein grosses haus i found a large house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT_RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT_RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT_RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT_RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT_RULE__ [V] ||| fand ||| found ||| logp=0 +0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 [JJ] ||| kleines ||| small ||| logp=0 [JJ] ||| kleines ||| little ||| logp=0 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0 +1 ich fand ein grosses haus i found a large house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 [JJ] ||| kleines ||| small ||| logp=0 [JJ] ||| kleines ||| little ||| logp=0 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0 diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h index 1301581a..8daaac85 100644 --- a/utils/fast_sparse_vector.h +++ b/utils/fast_sparse_vector.h @@ -214,6 +214,14 @@ class FastSparseVector { } return *this; } + template <typename O> + inline FastSparseVector<O>& plus_eq_v_times_s(const FastSparseVector<O>& other, const O scalar) { + const typename FastSparseVector<O>::const_iterator end = other.end(); + for (typename FastSparseVector<O>::const_iterator it = other.begin(); it != end; ++it) { + get_or_create_bin(it->first) += it->second * scalar; + } + return *this; + } inline FastSparseVector& operator-=(const FastSparseVector& other) { const typename FastSparseVector::const_iterator end = other.end(); for (typename FastSparseVector::const_iterator it = other.begin(); it != end; ++it) { |