diff options
Diffstat (limited to 'dtrain')
-rw-r--r-- | dtrain/README.md | 1 | ||||
-rw-r--r-- | dtrain/dtrain.cc | 127 | ||||
-rw-r--r-- | dtrain/dtrain.h | 2 | ||||
-rw-r--r-- | dtrain/kbestget.h | 2 | ||||
-rw-r--r-- | dtrain/pairsampling.h | 174 | ||||
-rw-r--r-- | dtrain/test/example/cdec.ini | 2 | ||||
-rw-r--r-- | dtrain/test/example/dtrain.ini | 36 | ||||
-rw-r--r-- | dtrain/test/toy/dtrain.ini | 8 |
8 files changed, 147 insertions, 205 deletions
diff --git a/dtrain/README.md b/dtrain/README.md index c50f3cad..d78dc100 100644 --- a/dtrain/README.md +++ b/dtrain/README.md @@ -106,6 +106,7 @@ Todo * mira: 5/10/15, pro: (5)/10/20/30 (on devtest!) * sample pairs like in pro * mira forest sampling +* platform specific (108010!) Data ---- diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index 434ae2d6..581c985a 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -10,25 +10,26 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) ("output", po::value<string>()->default_value("-"), "output weights file, '-' for STDOUT") ("input_weights", po::value<string>(), "input weights file (e.g. from previous iteration)") ("decoder_config", po::value<string>(), "configuration file for cdec") - ("sample_from", po::value<string>()->default_value("kbest"), "where to sample translations from: kbest, forest") + ("print_weights", po::value<string>(), "weights to print on each iteration") + ("stop_after", po::value<unsigned>()->default_value(0), "stop after X input sentences") + ("tmp", po::value<string>()->default_value("/tmp"), "temp dir to use") + ("keep", po::value<bool>()->zero_tokens(), "keep weights files for each iteration") + ("hstreaming", po::value<string>(), "run in hadoop streaming mode, arg is a task id") + ("epochs", po::value<unsigned>()->default_value(10), "# of iterations T (per shard)") ("k", po::value<unsigned>()->default_value(100), "how many translations to sample") - ("filter", po::value<string>()->default_value("uniq"), "filter kbest list: no, uniq") - ("pair_sampling", po::value<string>()->default_value("all"), "how to sample pairs: all, 5050, 108010, PRO") - ("N", po::value<unsigned>()->default_value(3), "N for Ngrams (BLEU)") - ("epochs", po::value<unsigned>()->default_value(2), "# of iterations T (per shard)") - ("scorer", po::value<string>()->default_value("stupid_bleu"), "scoring: bleu, stupid_*, smooth_*, approx_*") - ("learning_rate", po::value<weight_t>()->default_value(0.0005), "learning rate") + ("sample_from", po::value<string>()->default_value("kbest"), "where to sample translations from: 'kbest', 'forest'") + ("filter", po::value<string>()->default_value("uniq"), "filter kbest list: 'not', 'uniq'") + ("pair_sampling", po::value<string>()->default_value("108010"), "how to sample pairs: 'all', '108010' or 'PRO'") + ("pair_threshold", po::value<score_t>()->default_value(0), "bleu [0,1] threshold to filter pairs") + ("N", po::value<unsigned>()->default_value(4), "N for Ngrams (BLEU)") + ("scorer", po::value<string>()->default_value("stupid_bleu"), "scoring: bleu, stupid_, smooth_, approx_") + ("learning_rate", po::value<weight_t>()->default_value(0.0001), "learning rate") ("gamma", po::value<weight_t>()->default_value(0), "gamma for SVM (0 for perceptron)") ("select_weights", po::value<string>()->default_value("last"), "output 'best' or 'last' weights ('VOID' to throw away)") - ("unit_wv", po::value<bool>()->zero_tokens(), "Rescale weight vector after each input") - ("l1_reg", po::value<string>()->default_value("no"), "apply l1 regularization as in Tsuroka et al 2010") + ("rescale", po::value<bool>()->zero_tokens(), "rescale weight vector after each input") + ("l1_reg", po::value<string>()->default_value("none"), "apply l1 regularization as in 'Tsuroka et al' (2010)") ("l1_reg_strength", po::value<weight_t>(), "l1 regularization strength") - ("update_ok", po::value<bool>()->zero_tokens(), "include correctly ranked pairs into updates") - ("stop_after", po::value<unsigned>()->default_value(0), "stop after X input sentences") - ("keep_w", po::value<bool>()->zero_tokens(), "keep weights files for each iteration") - ("print_weights", po::value<string>(), "weights to print on each iteration") - ("hstreaming", po::value<string>(), "run in hadoop streaming mode, arg is a task id") - ("tmp", po::value<string>()->default_value("/tmp"), "temp dir to use") + ("funny", po::value<bool>()->zero_tokens(), "include correctly ranked pairs into updates") #ifdef DTRAIN_LOCAL ("refs,r", po::value<string>(), "references in local mode") #endif @@ -64,18 +65,22 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) cerr << "Wrong 'sample_from' param: '" << (*cfg)["sample_from"].as<string>() << "', use 'kbest' or 'forest'." << endl; return false; } - if ((*cfg)["sample_from"].as<string>() == "kbest" && (*cfg)["filter"].as<string>() != "uniq" - && (*cfg)["filter"].as<string>() != "no") { - cerr << "Wrong 'filter' param: '" << (*cfg)["filter"].as<string>() << "', use 'uniq' or 'no'." << endl; + if ((*cfg)["sample_from"].as<string>() == "kbest" && (*cfg)["filter"].as<string>() != "uniq" && + (*cfg)["filter"].as<string>() != "not") { + cerr << "Wrong 'filter' param: '" << (*cfg)["filter"].as<string>() << "', use 'uniq' or 'not'." << endl; return false; } - string s = (*cfg)["pair_sampling"].as<string>(); - if (s != "all" && s != "5050" && s != "108010" && s != "PRO" && s != "alld" && s != "108010d") { + if ((*cfg)["pair_sampling"].as<string>() != "all" && (*cfg)["pair_sampling"].as<string>() != "108010" && + (*cfg)["pair_sampling"].as<string>() != "PRO") { cerr << "Wrong 'pair_sampling' param: '" << (*cfg)["pair_sampling"].as<string>() << "'." << endl; return false; } - if ((*cfg)["select_weights"].as<string>() != "last" - && (*cfg)["select_weights"].as<string>() != "best" && (*cfg)["select_weights"].as<string>() != "VOID") { + if ((*cfg)["pair_threshold"].as<score_t>() < 0) { + cerr << "The threshold must be >= 0!" << endl; + return false; + } + if ((*cfg)["select_weights"].as<string>() != "last" && (*cfg)["select_weights"].as<string>() != "best" && + (*cfg)["select_weights"].as<string>() != "VOID") { cerr << "Wrong 'select_weights' param: '" << (*cfg)["select_weights"].as<string>() << "', use 'last' or 'best'." << endl; return false; } @@ -102,14 +107,14 @@ main(int argc, char** argv) task_id = cfg["hstreaming"].as<string>(); cerr.precision(17); } - bool unit_wv = false; - if (cfg.count("unit_wv")) unit_wv = true; + bool rescale = false; + if (cfg.count("rescale")) rescale = true; HSReporter rep(task_id); - bool keep_w = false; - if (cfg.count("keep_w")) keep_w = true; - bool update_ok = false; - if (cfg.count("update_ok")) - update_ok = true; + bool keep = false; + if (cfg.count("keep")) keep = true; + bool funny = false; + if (cfg.count("funny")) + funny = true; const unsigned k = cfg["k"].as<unsigned>(); const unsigned N = cfg["N"].as<unsigned>(); @@ -118,6 +123,7 @@ main(int argc, char** argv) const string filter_type = cfg["filter"].as<string>(); const string sample_from = cfg["sample_from"].as<string>(); const string pair_sampling = cfg["pair_sampling"].as<string>(); + const score_t pair_threshold = cfg["pair_threshold"].as<score_t>(); const string select_weights = cfg["select_weights"].as<string>(); vector<string> print_weights; if (cfg.count("print_weights")) @@ -168,12 +174,13 @@ main(int argc, char** argv) // meta params for perceptron, SVM weight_t eta = cfg["learning_rate"].as<weight_t>(); weight_t gamma = cfg["gamma"].as<weight_t>(); + // l1 regularization bool l1naive = false; bool l1clip = false; bool l1cumul = false; weight_t l1_reg = 0; - if (cfg["l1_reg"].as<string>() != "no") { + if (cfg["l1_reg"].as<string>() != "none") { string s = cfg["l1_reg"].as<string>(); if (s == "naive") l1naive = true; else if (s == "clip") l1clip = true; @@ -191,7 +198,7 @@ main(int argc, char** argv) vector<vector<WordID> > ref_ids_buf; // references as WordID vecs // where temp files go string tmp_path = cfg["tmp"].as<string>(); - vector<string> w_tmp_files; // used for keep_w + vector<string> w_tmp_files; // used for keep #ifdef DTRAIN_LOCAL string refs_fn = cfg["refs"].as<string>(); ReadFile refs(refs_fn); @@ -214,28 +221,30 @@ main(int argc, char** argv) cerr << setw(25) << "k " << k << endl; cerr << setw(25) << "N " << N << endl; cerr << setw(25) << "T " << T << endl; - if (cfg.count("stop-after")) - cerr << setw(25) << "stop_after " << stop_after << endl; - if (cfg.count("input_weights")) - cerr << setw(25) << "weights in" << cfg["input_weights"].as<string>() << endl; - cerr << setw(25) << "input " << "'" << input_fn << "'" << endl; -#ifdef DTRAIN_LOCAL - cerr << setw(25) << "refs " << "'" << refs_fn << "'" << endl; -#endif - cerr << setw(25) << "output " << "'" << output_fn << "'" << endl; + cerr << setw(25) << "sample from " << "'" << sample_from << "'" << endl; if (sample_from == "kbest") cerr << setw(25) << "filter " << "'" << filter_type << "'" << endl; cerr << setw(25) << "learning rate " << eta << endl; cerr << setw(25) << "gamma " << gamma << endl; - cerr << setw(25) << "sample from " << "'" << sample_from << "'" << endl; cerr << setw(25) << "pairs " << "'" << pair_sampling << "'" << endl; + cerr << setw(25) << "pair threshold " << pair_threshold << endl; cerr << setw(25) << "select weights " << "'" << select_weights << "'" << endl; if (cfg.count("l1_reg")) cerr << setw(25) << "l1 reg " << l1_reg << " '" << cfg["l1_reg"].as<string>() << "'" << endl; - if (update_ok) - cerr << setw(25) << "up ok " << update_ok << endl; - if (unit_wv) - cerr << setw(25) << "unit weight vec " << unit_wv << endl; + if (funny) + cerr << setw(25) << "funny " << funny << endl; + if (rescale) + cerr << setw(25) << "rescale " << rescale << endl; + cerr << setw(25) << "cdec cfg " << "'" << cfg["decoder_config"].as<string>() << "'" << endl; + cerr << setw(25) << "input " << "'" << input_fn << "'" << endl; +#ifdef DTRAIN_LOCAL + cerr << setw(25) << "refs " << "'" << refs_fn << "'" << endl; +#endif + cerr << setw(25) << "output " << "'" << output_fn << "'" << endl; + if (cfg.count("input_weights")) + cerr << setw(25) << "weights in" << cfg["input_weights"].as<string>() << endl; + if (cfg.count("stop-after")) + cerr << setw(25) << "stop_after " << stop_after << endl; if (!verbose) cerr << "(a dot represents " << DTRAIN_DOTS << " lines of input)" << endl; } @@ -382,17 +391,11 @@ main(int argc, char** argv) if (!noup) { vector<pair<ScoredHyp,ScoredHyp> > pairs; if (pair_sampling == "all") - all_pairs(samples, pairs); - if (pair_sampling == "5050") - rand_pairs_5050(samples, pairs, &rng); + all_pairs(samples, pairs, pair_threshold); if (pair_sampling == "108010") - multpart108010(samples, pairs); + part108010(samples, pairs, pair_threshold); if (pair_sampling == "PRO") PROsampling(samples, pairs); - if (pair_sampling == "alld") - all_pairs_discard(samples, pairs); - if (pair_sampling == "108010d") - multpart108010_discard(samples, pairs); npairs += pairs.size(); for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin(); @@ -405,7 +408,7 @@ main(int argc, char** argv) lambdas.plus_eq_v_times_s(diff_vec, eta); rank_errors++; } else { - if (update_ok) { + if (funny) { SparseVector<weight_t> diff_vec = it->first.f - it->second.f; lambdas.plus_eq_v_times_s(diff_vec, eta); } @@ -429,6 +432,7 @@ main(int argc, char** argv) // TEST THIS // reset cumulative_penalties after 1 iter? // do this only once per INPUT (not per pair) +if (false) { if (l1naive) { for (unsigned d = 0; d < lambdas.size(); d++) { weight_t v = lambdas.get(d); @@ -462,9 +466,10 @@ main(int argc, char** argv) } } } +} //////// - if (unit_wv && sample_from == "forest") lambdas /= lambdas.l2norm(); + if (rescale) lambdas /= lambdas.l2norm(); ++ii; @@ -505,6 +510,9 @@ main(int argc, char** argv) score_diff = score_avg; model_diff = model_avg; } + + unsigned nonz; + if (!quiet || hstreaming) nonz = (unsigned)lambdas.size_nonzero(); if (!quiet) { cerr << _p5 << _p << "WEIGHTS" << endl; @@ -522,6 +530,8 @@ main(int argc, char** argv) cerr << rank_errors/(float)in_sz << endl; cerr << " avg #margin viol: "; cerr << margin_violations/float(in_sz) << endl; + cerr << " non0 feature count: "; + cerr << nonz << endl; } if (hstreaming) { @@ -530,7 +540,6 @@ main(int argc, char** argv) rep.update_counter("Pairs avg #"+boost::lexical_cast<string>(t+1), (unsigned)((npairs/(weight_t)in_sz)*DTRAIN_SCALE)); rep.update_counter("Rank errors avg #"+boost::lexical_cast<string>(t+1), (unsigned)((rank_errors/(weight_t)in_sz)*DTRAIN_SCALE)); rep.update_counter("Margin violations avg #"+boost::lexical_cast<string>(t+1), (unsigned)((margin_violations/(weight_t)in_sz)*DTRAIN_SCALE)); - unsigned nonz = (unsigned)lambdas.size_nonzero(); rep.update_counter("Non zero feature count #"+boost::lexical_cast<string>(t+1), nonz); rep.update_gcounter("Non zero feature count #"+boost::lexical_cast<string>(t+1), nonz); } @@ -555,7 +564,7 @@ main(int argc, char** argv) if (noup) break; // write weights to file - if (select_weights == "best" || keep_w) { + if (select_weights == "best" || keep) { lambdas.init_vector(&dense_weights); string w_fn = "weights." + boost::lexical_cast<string>(t) + ".gz"; Weights::WriteToFile(w_fn, dense_weights, true); @@ -589,7 +598,7 @@ main(int argc, char** argv) cout << _np; while(getline(*bestw, o)) cout << o << endl; } - if (!keep_w) { + if (!keep) { for (unsigned i = 0; i < T; i++) { string s = "weights." + boost::lexical_cast<string>(i) + ".gz"; unlink(s.c_str()); @@ -606,7 +615,7 @@ main(int argc, char** argv) cerr << _p2 << "This took " << overall_time/60. << " min." << endl; } - if (keep_w) { + if (keep) { cout << endl << "Weight files per iteration:" << endl; for (unsigned i = 0; i < w_tmp_files.size(); i++) { cout << w_tmp_files[i] << endl; diff --git a/dtrain/dtrain.h b/dtrain/dtrain.h index 14ef410e..3d76bd7f 100644 --- a/dtrain/dtrain.h +++ b/dtrain/dtrain.h @@ -13,7 +13,7 @@ #include "filelib.h" -#define DTRAIN_LOCAL +//#define DTRAIN_LOCAL #define DTRAIN_DOTS 100 // when to display a '.' #define DTRAIN_GRAMMAR_DELIM "########EOS########" diff --git a/dtrain/kbestget.h b/dtrain/kbestget.h index 08104dec..1b96bbf4 100644 --- a/dtrain/kbestget.h +++ b/dtrain/kbestget.h @@ -88,7 +88,7 @@ struct KBestGetter : public HypSampler { if (filter_type_ == "uniq") { KBestUnique(forest); - } else if (filter_type_ == "no") { + } else if (filter_type_ == "not") { KBestNoFilter(forest); } } diff --git a/dtrain/pairsampling.h b/dtrain/pairsampling.h index 0951f8e9..e866c8a0 100644 --- a/dtrain/pairsampling.h +++ b/dtrain/pairsampling.h @@ -5,72 +5,83 @@ namespace dtrain { -inline void -all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training) +bool +accept_pair(score_t a, score_t b, score_t threshold) { - for (unsigned i = 0; i < s->size()-1; i++) { - for (unsigned j = i+1; j < s->size(); j++) { - pair<ScoredHyp,ScoredHyp> p; - p.first = (*s)[i]; - p.second = (*s)[j]; - training.push_back(p); - } - } + if (fabs(a - b) < threshold) return false; + return true; } inline void -rand_pairs_5050(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, - MT19937* prng) +all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold) { for (unsigned i = 0; i < s->size()-1; i++) { for (unsigned j = i+1; j < s->size(); j++) { - if (prng->next() < .5) { - pair<ScoredHyp,ScoredHyp> p; - p.first = (*s)[i]; - p.second = (*s)[j]; - training.push_back(p); + if (threshold > 0) { + if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) { + training.push_back(make_pair((*s)[i], (*s)[j])); + } + } else { + training.push_back(make_pair((*s)[i], (*s)[j])); } } } } +/* + * multipartite ranking + * sort by bleu + * compare top 10% to middle 80% and low 10% + * 80% to low 10% + */ bool -_multpart_cmp_hyp_by_score(ScoredHyp a, ScoredHyp b) +_108010_cmp_hyp_by_score(ScoredHyp a, ScoredHyp b) { return a.score < b.score; } inline void -multpart108010(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training) +part108010(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold) { - sort(s->begin(), s->end(), _multpart_cmp_hyp_by_score); - pair<ScoredHyp,ScoredHyp> p; + sort(s->begin(), s->end(), _108010_cmp_hyp_by_score); unsigned sz = s->size(); unsigned slice = 10; unsigned sep = sz%slice; if (sep == 0) sep = sz/slice; for (unsigned i = 0; i < sep; i++) { for (unsigned j = sep; j < sz; j++) { - p.first = (*s)[i]; - p.second = (*s)[j]; - if (p.first.rank < p.second.rank) training.push_back(p); + if ((*s)[i].rank < (*s)[j].rank) { + if (threshold > 0) { + if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) { + training.push_back(make_pair((*s)[i], (*s)[j])); + } + } else { + training.push_back(make_pair((*s)[i], (*s)[j])); + } + } } } for (unsigned i = sep; i < sz-sep; i++) { for (unsigned j = sz-sep; j < sz; j++) { - p.first = (*s)[i]; - p.second = (*s)[j]; - if (p.first.rank < p.second.rank) training.push_back(p); + if ((*s)[i].rank < (*s)[j].rank) { + if (threshold > 0) { + if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) { + training.push_back(make_pair((*s)[i], (*s)[j])); + } + } else { + training.push_back(make_pair((*s)[i], (*s)[j])); + } + } } } } - -inline bool -_PRO_accept_pair(pair<ScoredHyp,ScoredHyp> &p) -{ - if (fabs(p.first.score - p.second.score) < 0.05) return false; - return true; -} +/* + * pair sampling as in + * 'Tuning as Ranking' (Hopkins & May, 2011) + * count = 5000 + * threshold = 5% BLEU + * cut = top 50 + */ bool _PRO_cmp_pair_by_diff(pair<ScoredHyp,ScoredHyp> a, pair<ScoredHyp,ScoredHyp> b) { @@ -78,19 +89,15 @@ _PRO_cmp_pair_by_diff(pair<ScoredHyp,ScoredHyp> a, pair<ScoredHyp,ScoredHyp> b) return (fabs(a.first.score - a.second.score)) > (fabs(b.first.score - b.second.score)); } inline void -PROsampling(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training) // ugly +PROsampling(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold=0.05) { unsigned max_count = 5000, count = 0; bool b = false; for (unsigned i = 0; i < s->size()-1; i++) { for (unsigned j = i+1; j < s->size(); j++) { - pair<ScoredHyp,ScoredHyp> p; - p.first = (*s)[i]; - p.second = (*s)[j]; - if (_PRO_accept_pair(p)) { - training.push_back(p); - count++; - if (count == max_count) { + if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) { + training.push_back(make_pair((*s)[i], (*s)[j])); + if (++count == max_count) { b = true; break; } @@ -98,88 +105,11 @@ PROsampling(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training) } if (b) break; } - sort(training.begin(), training.end(), _PRO_cmp_pair_by_diff); - if (training.size() > 50) + if (training.size() > 50) { + sort(training.begin(), training.end(), _PRO_cmp_pair_by_diff); training.erase(training.begin()+50, training.end()); - return; -} - -inline void -all_pairs_discard(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training) -{ - for (unsigned i = 0; i < s->size()-1; i++) { - for (unsigned j = i+1; j < s->size(); j++) { - pair<ScoredHyp,ScoredHyp> p; - p.first = (*s)[i]; - p.second = (*s)[j]; - if(_PRO_accept_pair(p)) - training.push_back(p); - } } -} - -inline void -multpart108010_discard(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training) -{ - sort(s->begin(), s->end(), _multpart_cmp_hyp_by_score); - pair<ScoredHyp,ScoredHyp> p; - unsigned sz = s->size(); - unsigned slice = 10; - unsigned sep = sz%slice; - if (sep == 0) sep = sz/slice; - for (unsigned i = 0; i < sep; i++) { - for (unsigned j = sep; j < sz; j++) { - p.first = (*s)[i]; - p.second = (*s)[j]; - if (p.first.rank < p.second.rank) { - if (_PRO_accept_pair(p)) training.push_back(p); - } - } - } - for (unsigned i = sep; i < sz-sep; i++) { - for (unsigned j = sz-sep; j < sz; j++) { - p.first = (*s)[i]; - p.second = (*s)[j]; - if (p.first.rank < p.second.rank) { - if (_PRO_accept_pair(p)) training.push_back(p); - } - } - } - sort(training.begin(), training.end(), _PRO_cmp_pair_by_diff); - if (training.size() > 50) - training.erase(training.begin()+50, training.end()); -} - -inline void -multpart108010_discard1(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training) -{ - sort(s->begin(), s->end(), _multpart_cmp_hyp_by_score); - pair<ScoredHyp,ScoredHyp> p; - unsigned sz = s->size(); - unsigned slice = 10; - unsigned sep = sz%slice; - if (sep == 0) sep = sz/slice; - for (unsigned i = 0; i < sep; i++) { - for (unsigned j = sep; j < sz; j++) { - p.first = (*s)[i]; - p.second = (*s)[j]; - if (p.first.rank < p.second.rank) { - if (_PRO_accept_pair(p)) training.push_back(p); - } - } - } - for (unsigned i = sep; i < sz-sep; i++) { - for (unsigned j = sz-sep; j < sz; j++) { - p.first = (*s)[i]; - p.second = (*s)[j]; - if (p.first.rank < p.second.rank) { - if (_PRO_accept_pair(p)) training.push_back(p); - } - } - } - sort(training.begin(), training.end(), _PRO_cmp_pair_by_diff); - if (training.size() > 50) - training.erase(training.begin()+50, training.end()); + return; } diff --git a/dtrain/test/example/cdec.ini b/dtrain/test/example/cdec.ini index 51edab09..14c1199b 100644 --- a/dtrain/test/example/cdec.ini +++ b/dtrain/test/example/cdec.ini @@ -5,8 +5,8 @@ intersection_strategy=cube_pruning cubepruning_pop_limit=30 feature_function=WordPenalty feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz - feature_function=RuleIdentityFeatures +# these also work with scfg translator #feature_function=SpanFeatures #feature_function=SourceWordPenalty #feature_function=SourceSpanSizeFeatures diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini index 09b493ad..9b9f45e7 100644 --- a/dtrain/test/example/dtrain.ini +++ b/dtrain/test/example/dtrain.ini @@ -1,20 +1,20 @@ -decoder_config=test/example/cdec.ini -k=1500 -N=3 -learning_rate=0.0005 -gamma=0 -epochs=2 -input=test/example/nc-wmt11.1k.gz -output=- +input=test/example/nc-wmt11.1k.gz # use '-' for stdin +output=- # a weights file +decoder_config=test/example/cdec.ini # a ini for cdec +# these will be printed on each iteration +print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough +tmp=/tmp +#stop_after=10 + +# interesting stuff +epochs=10 +k=100 +N=4 +learning_rate=0.0001 +gamma=0.00001 scorer=stupid_bleu sample_from=kbest -#filter=unique -pair_sampling=PRO -select_weights=last -print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough -tmp=/tmp -stop_after=100 -#keep_w= -#update_ok= -#l1_reg=clip -#l1_reg_strength=0.0001 +filter=uniq +pair_sampling=108010 +pair_threshold=0 +select_weights=VOID diff --git a/dtrain/test/toy/dtrain.ini b/dtrain/test/toy/dtrain.ini index 105c07df..3548bbb6 100644 --- a/dtrain/test/toy/dtrain.ini +++ b/dtrain/test/toy/dtrain.ini @@ -1,9 +1,11 @@ decoder_config=test/toy/cdec.ini +input=test/toy/in +output=- +print_weights=logp use_shell use_house PassThrough + k=4 N=3 epochs=2 -input=test/toy/in -output=- scorer=stupid_bleu sample_from=kbest -print_weights=logp use_shell use_house PassThrough +filter=uniq |