diff options
Diffstat (limited to 'dtrain')
| -rw-r--r-- | dtrain/dtrain.cc | 89 | ||||
| -rw-r--r-- | dtrain/dtrain.h | 9 | ||||
| -rw-r--r-- | dtrain/test/example/dtrain.ini | 12 | 
3 files changed, 48 insertions, 62 deletions
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index 05c3728d..27315358 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -7,21 +7,21 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)    po::options_description ini("Configuration File Options");    ini.add_options()      ("input",          po::value<string>()->default_value("-"),                                                "input file") -    ("output",         po::value<string>()->default_value("-"),                                       "output weights file") +    ("output",         po::value<string>()->default_value("-"),                       "output weights file, '-' for STDOUT")      ("input_weights",  po::value<string>(),                             "input weights file (e.g. from previous iteration)")      ("decoder_config", po::value<string>(),                                                   "configuration file for cdec") -    ("k",              po::value<unsigned>()->default_value(100),                     "size of kbest or sample from forest") -    ("sample_from",    po::value<string>()->default_value("kbest"),                        "where to get translations from") -    ("filter",         po::value<string>()->default_value("unique"),                                    "filter kbest list") -    ("pair_sampling",  po::value<string>()->default_value("all"),                          "how to sample pairs: all, rand") -    ("N",              po::value<unsigned>()->default_value(3),                                              "N for Ngrams") -    ("epochs",         po::value<unsigned>()->default_value(2),                                         "# of iterations T")  -    ("scorer",         po::value<string>()->default_value("stupid_bleu"),                                  "scoring metric") +    ("sample_from",    po::value<string>()->default_value("kbest"),      "where to sample translations from: kbest, forest") +    ("k",              po::value<unsigned>()->default_value(100),                         "how many translations to sample") +    ("filter",         po::value<string>()->default_value("unique"),                        "filter kbest list: no, unique") +    ("pair_sampling",  po::value<string>()->default_value("all"),                  "how to sample pairs: all, rand, 108010") +    ("N",              po::value<unsigned>()->default_value(3),                                       "N for Ngrams (BLEU)") +    ("epochs",         po::value<unsigned>()->default_value(2),                             "# of iterations T (per shard)")  +    ("scorer",         po::value<string>()->default_value("stupid_bleu"),     "scoring: bleu, stupid_*, smooth_*, approx_*")      ("stop_after",     po::value<unsigned>()->default_value(0),                              "stop after X input sentences")      ("print_weights",  po::value<string>(),                                            "weights to print on each iteration")      ("hstreaming",     po::value<bool>()->zero_tokens(),                                     "run in hadoop streaming mode") -    ("learning_rate",  po::value<weight_t>()->default_value(0.0005),                                          "learning rate") -    ("gamma",          po::value<weight_t>()->default_value(0),                            "gamma for SVM (0 for perceptron)") +    ("learning_rate",  po::value<weight_t>()->default_value(0.0005),                                        "learning rate") +    ("gamma",          po::value<weight_t>()->default_value(0),                          "gamma for SVM (0 for perceptron)")      ("tmp",            po::value<string>()->default_value("/tmp"),                                        "temp dir to use")      ("select_weights", po::value<string>()->default_value("last"), "output 'best' or 'last' weights ('VOID' to throw away)")      ("noup",           po::value<bool>()->zero_tokens(),                                            "do not update weights"); @@ -142,8 +142,6 @@ main(int argc, char** argv)    // meta params for perceptron, SVM    weight_t eta = cfg["learning_rate"].as<weight_t>();    weight_t gamma = cfg["gamma"].as<weight_t>(); -  WordID __bias = FD::Convert("__bias"); -  lambdas.add_value(__bias, 0);    string output_fn = cfg["output"].as<string>();    // input @@ -158,7 +156,7 @@ main(int argc, char** argv)    ogzstream grammar_buf_out;    grammar_buf_out.open(grammar_buf_fn.c_str()); -  unsigned in_sz = 999999999; // input index, input size +  unsigned in_sz = UINT_MAX; // input index, input size    vector<pair<score_t, score_t> > all_scores;    score_t max_score = 0.;    unsigned best_it = 0; @@ -286,23 +284,18 @@ main(int argc, char** argv)      // get (scored) samples       vector<ScoredHyp>* samples = observer->GetSamples(); -    // FIXME -    /*if (verbose) { -      cout << "[ref: '"; -      if (t > 0) cout << ref_ids_buf[ii]; <--- -      else cout << ref_ids; -      cout << endl; -      cout << _p5 << _np << "1best: " << "'" << (*samples)[0].w << "'" << endl; -      cout << "SCORE=" << (*samples)[0].score << ",model="<< (*samples)[0].model << endl; -      cout << "F{" << (*samples)[0].f << "} ]" << endl << endl; -    }*/ -    /*cout << lambdas.get(FD::Convert("PhraseModel_0")) << endl; -    cout << (*samples)[0].model << endl; -    cout << "1best: "; -    for (unsigned u = 0; u < (*samples)[0].w.size(); u++) cout << TD::Convert((*samples)[0].w[u]) << " "; -    cout << endl; -    cout << (*samples)[0].f << endl; -    cout << "___" << endl;*/ +    if (verbose) { +      cerr << "--- ref for " << ii << " "; +      if (t > 0) printWordIDVec(ref_ids_buf[ii]); +      else printWordIDVec(ref_ids); +      for (unsigned u = 0; u < samples->size(); u++) { +        cerr << _p5 << _np << "[" << u << ". '"; +        printWordIDVec((*samples)[u].w); +        cerr << "'" << endl; +        cerr << "SCORE=" << (*samples)[0].score << ",model="<< (*samples)[0].model << endl; +        cerr << "F{" << (*samples)[0].f << "} ]" << endl << endl; +      } +    }      score_sum += (*samples)[0].score;      model_sum += (*samples)[0].model; @@ -320,43 +313,28 @@ main(int argc, char** argv)        for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin();             it != pairs.end(); it++) { +        score_t rank_error = it->second.score - it->first.score;          if (!gamma) {            // perceptron -          if (it->first.score - it->second.score < 0) { // rank error -            SparseVector<weight_t> dv = it->second.f - it->first.f; -            dv.add_value(__bias, -1); -            lambdas.plus_eq_v_times_s(dv, eta); +          if (rank_error > 0) { +            SparseVector<weight_t> diff_vec = it->second.f - it->first.f; +            lambdas.plus_eq_v_times_s(diff_vec, eta);              nup++;            }          } else {            // SVM -          score_t rank_error = it->second.score - it->first.score; -          if (rank_error > 0) { -            SparseVector<weight_t> dv = it->second.f - it->first.f; -            dv.add_value(__bias, -1); -            lambdas.plus_eq_v_times_s(dv, eta); -          } -          // regularization            score_t margin = it->first.model - it->second.model; -          if (rank_error || margin < 1) { -            lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta); // reg /= #EXAMPLES or #UPDATES ? +          if (rank_error > 0 || margin < 1) { +            SparseVector<weight_t> diff_vec = it->second.f - it->first.f; +            lambdas.plus_eq_v_times_s(diff_vec, eta);              nup++;            } +          // regularization +          lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta*(1./npairs));          }        }      } -    // DEBUG -    vector<weight_t> x; -    lambdas.init_vector(&x); -    //cout << "[" << ii << "]" << endl; -    for (int jj = 0; jj < x.size(); jj++) { -      //if (x[jj] != 0) -        //cout << FD::Convert(jj) << " " << x[jj] << endl;  -    } -    //cout << " --- " << endl; -    // /DEBUG -      ++ii;      if (hstreaming) cerr << "reporter:counter:dtrain,sid," << ii << endl; @@ -375,8 +353,7 @@ main(int argc, char** argv)    // print some stats    score_t score_avg = score_sum/(score_t)in_sz;    score_t model_avg = model_sum/(score_t)in_sz; -  score_t score_diff; -  score_t model_diff; +  score_t score_diff, model_diff;    if (t > 0) {      score_diff = score_avg - all_scores[t-1].first;      model_diff = model_avg - all_scores[t-1].second; diff --git a/dtrain/dtrain.h b/dtrain/dtrain.h index 7c1509e4..f4d32ecb 100644 --- a/dtrain/dtrain.h +++ b/dtrain/dtrain.h @@ -3,6 +3,7 @@  #include <iomanip> +#include <climits>  #include <string.h>  #include <boost/algorithm/string.hpp> @@ -58,5 +59,13 @@ inline ostream& _p2(ostream& out) { return out << setprecision(2); }  inline ostream& _p5(ostream& out) { return out << setprecision(5); }  inline ostream& _p9(ostream& out) { return out << setprecision(9); } +inline void printWordIDVec(vector<WordID>& v) +{ +  for (unsigned i = 0; i < v.size(); i++) { +    cerr << TD::Convert(v[i]); +    if (i < v.size()-1) cerr << " "; +  } +} +  #endif diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini index 185d6d90..40f8e03f 100644 --- a/dtrain/test/example/dtrain.ini +++ b/dtrain/test/example/dtrain.ini @@ -1,14 +1,14 @@  decoder_config=test/example/cdec.ini  k=100  N=3 -gamma=0 -epochs=5 +gamma=0.001 +epochs=20  input=test/example/nc-1k-tabs.gz  scorer=stupid_bleu -output=- -stop_after=100 -sample_from=kbest -pair_sampling=all +output=weights.gz +#stop_after=100 +sample_from=forest +pair_sampling=108010  select_weights=VOID  print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough  tmp=/tmp  | 
