summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2011-10-13 19:20:20 +0200
committerPatrick Simianer <p@simianer.de>2011-10-13 19:20:20 +0200
commitc7735ab60e22bfec7245dc7af7f14b74459dada8 (patch)
tree52e08c54b0709ac98100265b2eab223f79686cd9
parent3e8f5cd6191b700df08867e3eb0b8c03d2324fe3 (diff)
svm impl, faster
-rw-r--r--dtrain/dtrain.cc140
-rw-r--r--dtrain/test/example/dtrain.ini8
-rw-r--r--dtrain/test/toy/dtrain.ini13
-rw-r--r--dtrain/test/toy/in4
-rw-r--r--utils/fast_sparse_vector.h8
5 files changed, 89 insertions, 84 deletions
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc
index 5d84f250..25858738 100644
--- a/dtrain/dtrain.cc
+++ b/dtrain/dtrain.cc
@@ -6,25 +6,25 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
{
po::options_description ini("Configuration File Options");
ini.add_options()
- ("input", po::value<string>()->default_value("-"), "input file")
- ("output", po::value<string>()->default_value("-"), "output weights file (or VOID)")
- ("input_weights", po::value<string>(), "input weights file (e.g. from previous iteration)")
- ("decoder_config", po::value<string>(), "configuration file for cdec")
- ("k", po::value<unsigned>()->default_value(100), "size of kbest or sample from forest")
- ("sample_from", po::value<string>()->default_value("kbest"), "where to get translations from")
- ("filter", po::value<string>()->default_value("unique"), "filter kbest list")
- ("pair_sampling", po::value<string>()->default_value("all"), "how to sample pairs: all, rand")
- ("N", po::value<unsigned>()->default_value(3), "N for Ngrams")
- ("epochs", po::value<unsigned>()->default_value(2), "# of iterations T")
- ("scorer", po::value<string>()->default_value("stupid_bleu"), "scoring metric")
- ("stop_after", po::value<unsigned>()->default_value(0), "stop after X input sentences")
- ("print_weights", po::value<string>(), "weights to print on each iteration")
- ("hstreaming", po::value<bool>()->zero_tokens(), "run in hadoop streaming mode")
- ("learning_rate", po::value<double>()->default_value(0.0005), "learning rate")
- ("gamma", po::value<double>()->default_value(0.), "gamma for SVM (0 for perceptron)")
- ("tmp", po::value<string>()->default_value("/tmp"), "temp dir to use")
- ("select_weights", po::value<string>()->default_value("last"), "output 'best' or 'last' weights")
- ("noup", po::value<bool>()->zero_tokens(), "do not update weights");
+ ("input", po::value<string>()->default_value("-"), "input file")
+ ("output", po::value<string>()->default_value("-"), "output weights file")
+ ("input_weights", po::value<string>(), "input weights file (e.g. from previous iteration)")
+ ("decoder_config", po::value<string>(), "configuration file for cdec")
+ ("k", po::value<unsigned>()->default_value(100), "size of kbest or sample from forest")
+ ("sample_from", po::value<string>()->default_value("kbest"), "where to get translations from")
+ ("filter", po::value<string>()->default_value("unique"), "filter kbest list")
+ ("pair_sampling", po::value<string>()->default_value("all"), "how to sample pairs: all, rand")
+ ("N", po::value<unsigned>()->default_value(3), "N for Ngrams")
+ ("epochs", po::value<unsigned>()->default_value(2), "# of iterations T")
+ ("scorer", po::value<string>()->default_value("stupid_bleu"), "scoring metric")
+ ("stop_after", po::value<unsigned>()->default_value(0), "stop after X input sentences")
+ ("print_weights", po::value<string>(), "weights to print on each iteration")
+ ("hstreaming", po::value<bool>()->zero_tokens(), "run in hadoop streaming mode")
+ ("learning_rate", po::value<double>()->default_value(0.0005), "learning rate")
+ ("gamma", po::value<double>()->default_value(0), "gamma for SVM (0 for perceptron)")
+ ("tmp", po::value<string>()->default_value("/tmp"), "temp dir to use")
+ ("select_weights", po::value<string>()->default_value("last"), "output 'best' or 'last' weights ('VOID' to throw away)")
+ ("noup", po::value<bool>()->zero_tokens(), "do not update weights");
po::options_description cl("Command Line Options");
cl.add_options()
("config,c", po::value<string>(), "dtrain config file")
@@ -108,9 +108,9 @@ main(int argc, char** argv)
} else if (scorer_str == "stupid_bleu") {
scorer = dynamic_cast<StupidBleuScorer*>(new StupidBleuScorer);
} else if (scorer_str == "smooth_bleu") {
- scorer = dynamic_cast<SmoothBleuScorer*>(new SmoothBleuScorer);
+ scorer = dynamic_cast<SmoothBleuScorer*>(new SmoothBleuScorer);
} else if (scorer_str == "approx_bleu") {
- scorer = dynamic_cast<StupidBleuScorer*>(new StupidBleuScorer); // FIXME
+ scorer = dynamic_cast<StupidBleuScorer*>(new StupidBleuScorer); // FIXME
} else {
cerr << "Don't know scoring metric: '" << scorer_str << "', exiting." << endl;
exit(1);
@@ -138,8 +138,10 @@ main(int argc, char** argv)
// meta params for perceptron, SVM
double eta = cfg["learning_rate"].as<double>();
double gamma = cfg["gamma"].as<double>();
- lambdas.add_value(FD::Convert("__bias"), 0);
+ WordID __bias = FD::Convert("__bias");
+ lambdas.add_value(__bias, 0);
+ string output_fn = cfg["output"].as<string>();
// input
string input_fn = cfg["input"].as<string>();
ReadFile input(input_fn);
@@ -169,8 +171,8 @@ main(int argc, char** argv)
cerr << setw(25) << "stop_after " << stop_after << endl;
if (cfg.count("input_weights"))
cerr << setw(25) << "weights in" << cfg["input_weights"].as<string>() << endl;
- cerr << setw(25) << "input " << "'" << cfg["input"].as<string>() << "'" << endl;
- cerr << setw(25) << "output " << "'" << cfg["output"].as<string>() << "'" << endl;
+ cerr << setw(25) << "input " << "'" << input_fn << "'" << endl;
+ cerr << setw(25) << "output " << "'" << output_fn << "'" << endl;
if (sample_from == "kbest")
cerr << setw(25) << "filter " << "'" << filter_type << "'" << endl;
cerr << setw(25) << "learning rate " << eta << endl;
@@ -190,9 +192,9 @@ main(int argc, char** argv)
igzstream grammar_buf_in;
if (t > 0) grammar_buf_in.open(grammar_buf_fn.c_str());
score_t score_sum = 0., model_sum = 0.;
- unsigned ii = 0;
+ unsigned ii = 0, nup = 0, npairs = 0;
if (!quiet) cerr << "Iteration #" << t+1 << " of " << T << "." << endl;
-
+
while(true)
{
@@ -294,8 +296,7 @@ main(int argc, char** argv)
score_sum += (*samples)[0].score;
model_sum += (*samples)[0].model;
-//////////////////////////////////////////////////////////
- // UPDATE WEIGHTS
+ // weight updates
if (!noup) {
vector<pair<ScoredHyp,ScoredHyp> > pairs;
if (pair_sampling == "all")
@@ -304,45 +305,35 @@ main(int argc, char** argv)
sample_rand_pairs(samples, pairs, &rng);
if (pair_sampling == "108010")
sample108010(samples, pairs);
+ npairs += pairs.size();
for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin();
it != pairs.end(); it++) {
-
- SparseVector<double> dv;
- if (it->first.score - it->second.score < 0) {
- dv = it->second.f - it->first.f;
- //} else {
- //dv = it->first - it->second;
- //}
- dv.add_value(FD::Convert("__bias"), -1);
-
- //SparseVector<double> reg;
- //reg = lambdas * (2 * gamma);
- //dv -= reg;
- lambdas += dv * eta;
-
- if (verbose) {
- /*cerr << "{{ f("<< it->first_rank <<") > f(" << it->second_rank << ") but g(i)="<< it->first_score <<" < g(j)="<< it->second_score << " so update" << endl;
- cerr << " i " << TD::GetString(samples->sents[ti->first_rank]) << endl;
- cerr << " " << samples->feats[ti->first_rank] << endl;
- cerr << " j " << TD::GetString(samples->sents[ti->second_rank]) << endl;
- cerr << " " << samples->feats[ti->second_rank] << endl;
- cerr << " diff vec: " << dv << endl;
- cerr << " lambdas after update: " << lambdas << endl;
- cerr << "}}" << endl;*/
+ if (!gamma) {
+ // perceptron
+ if (it->first.score - it->second.score < 0) { // rank error
+ SparseVector<double> dv = it->second.f - it->first.f;
+ dv.add_value(__bias, -1);
+ lambdas.plus_eq_v_times_s(dv, eta);
+ nup++;
}
} else {
- //SparseVector<double> reg;
- //reg = lambdas * (2 * gamma);
- //lambdas += reg * (-eta);
+ // SVM
+ double rank_error = it->second.score - it->first.score;
+ if (rank_error > 0) {
+ SparseVector<double> dv = it->second.f - it->first.f;
+ dv.add_value(__bias, -1);
+ lambdas.plus_eq_v_times_s(dv, eta);
+ }
+ // regularization
+ double margin = it->first.model - it->second.model;
+ if (rank_error || margin < 1) {
+ lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta); // reg /= #EXAMPLES or #UPDATES ?
+ nup++;
+ }
}
-
}
-
- //double l2 = lambdas.l2norm();
- //if (l2) lambdas /= lambdas.l2norm();
}
-//////////////////////////////////////////////////////////
++ii;
@@ -369,15 +360,19 @@ main(int argc, char** argv)
model_diff = model_avg;
}
if (!quiet) {
- cerr << _p5 << _p << "WEIGHTS" << endl;
- for (vector<string>::iterator it = print_weights.begin(); it != print_weights.end(); it++) {
- cerr << setw(16) << *it << " = " << lambdas.get(FD::Convert(*it)) << endl;
- }
- cerr << " ---" << endl;
- cerr << _np << " 1best avg score: " << score_avg;
- cerr << _p << " (" << score_diff << ")" << endl;
- cerr << _np << "1best avg model score: " << model_avg;
- cerr << _p << " (" << model_diff << ")" << endl;
+ cerr << _p5 << _p << "WEIGHTS" << endl;
+ for (vector<string>::iterator it = print_weights.begin(); it != print_weights.end(); it++) {
+ cerr << setw(18) << *it << " = " << lambdas.get(FD::Convert(*it)) << endl;
+ }
+ cerr << " ---" << endl;
+ cerr << _np << " 1best avg score: " << score_avg;
+ cerr << _p << " (" << score_diff << ")" << endl;
+ cerr << _np << "1best avg model score: " << model_avg;
+ cerr << _p << " (" << model_diff << ")" << endl;
+ cerr << " avg #pairs: ";
+ cerr << _np << npairs/(float)in_sz << endl;
+ cerr << " avg #up: ";
+ cerr << nup/(float)in_sz << endl;
}
pair<score_t,score_t> remember;
remember.first = score_avg;
@@ -412,9 +407,9 @@ main(int argc, char** argv)
unlink(grammar_buf_fn.c_str());
if (!noup) {
- if (!quiet) cerr << endl << "writing weights file to '" << cfg["output"].as<string>() << "' ...";
+ if (!quiet) cerr << endl << "writing weights file to '" << output_fn << "' ..." << endl;
if (select_weights == "last") { // last
- WriteFile out(cfg["output"].as<string>());
+ WriteFile out(output_fn);
ostream& o = *out.stream();
o.precision(17);
o << _np;
@@ -423,9 +418,10 @@ main(int argc, char** argv)
o << FD::Convert(it->first) << '\t' << it->second << endl;
}
if (hstreaming) cout << "__SHARD_COUNT__\t1" << endl;
+ } else if (select_weights == "VOID") { // do nothing
} else { // best
- if (cfg["output"].as<string>() != "-") {
- CopyFile(weights_files[best_it], cfg["output"].as<string>());
+ if (output_fn != "-") {
+ CopyFile(weights_files[best_it], output_fn);
} else {
ReadFile(weights_files[best_it]);
string o;
diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini
index 09c876d9..3e5c2cd1 100644
--- a/dtrain/test/example/dtrain.ini
+++ b/dtrain/test/example/dtrain.ini
@@ -1,15 +1,15 @@
decoder_config=test/example/cdec.ini
k=100
N=3
+gamma=0
+#gamma=0.00001
epochs=4
-#input=test/example/nc-1k.gz
input=test/example/nc-1k-tabs.gz
scorer=stupid_bleu
-output=VOID
-#/tmp/weights.gz
+output=-
stop_after=100
sample_from=kbest
pair_sampling=all
+select_weights=VOID
print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough
tmp=/tmp
-select_weights=best
diff --git a/dtrain/test/toy/dtrain.ini b/dtrain/test/toy/dtrain.ini
index 3ab4f8d4..5bfa5b2d 100644
--- a/dtrain/test/toy/dtrain.ini
+++ b/dtrain/test/toy/dtrain.ini
@@ -1,8 +1,9 @@
-decoder_config=test/toy_example/cdec.ini
-ksamples=4
-ngrams=3
+decoder_config=test/toy/cdec.ini
+k=4
+N=3
epochs=2
-input=test/toy_example/toy.in
-scorer=bleu
+input=test/toy/in
+scorer=stupid_bleu
+sample_from=forest
output=-
-wprint=logp use_shell use_house PassThrough
+print_weights=logp use_shell use_house PassThrough
diff --git a/dtrain/test/toy/in b/dtrain/test/toy/in
index 63f97158..d7b7d080 100644
--- a/dtrain/test/toy/in
+++ b/dtrain/test/toy/in
@@ -1,2 +1,2 @@
-0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT_RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT_RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT_RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT_RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT_RULE__ [V] ||| fand ||| found ||| logp=0
-1 ich fand ein grosses haus i found a large house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT_RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT_RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT_RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT_RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT_RULE__ [V] ||| fand ||| found ||| logp=0
+0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 [JJ] ||| kleines ||| small ||| logp=0 [JJ] ||| kleines ||| little ||| logp=0 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0
+1 ich fand ein grosses haus i found a large house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 [JJ] ||| kleines ||| small ||| logp=0 [JJ] ||| kleines ||| little ||| logp=0 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0
diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h
index 1301581a..8daaac85 100644
--- a/utils/fast_sparse_vector.h
+++ b/utils/fast_sparse_vector.h
@@ -214,6 +214,14 @@ class FastSparseVector {
}
return *this;
}
+ template <typename O>
+ inline FastSparseVector<O>& plus_eq_v_times_s(const FastSparseVector<O>& other, const O scalar) {
+ const typename FastSparseVector<O>::const_iterator end = other.end();
+ for (typename FastSparseVector<O>::const_iterator it = other.begin(); it != end; ++it) {
+ get_or_create_bin(it->first) += it->second * scalar;
+ }
+ return *this;
+ }
inline FastSparseVector& operator-=(const FastSparseVector& other) {
const typename FastSparseVector::const_iterator end = other.end();
for (typename FastSparseVector::const_iterator it = other.begin(); it != end; ++it) {