diff options
Diffstat (limited to 'training')
| -rw-r--r-- | training/Makefile.am | 8 | ||||
| -rw-r--r-- | training/augment_grammar.cc | 4 | ||||
| -rw-r--r-- | training/collapse_weights.cc | 6 | ||||
| -rw-r--r-- | training/compute_cllh.cc | 23 | ||||
| -rw-r--r-- | training/grammar_convert.cc | 8 | ||||
| -rw-r--r-- | training/mpi_batch_optimize.cc | 127 | ||||
| -rw-r--r-- | training/mpi_online_optimize.cc | 69 | ||||
| -rw-r--r-- | training/mr_optimize_reduce.cc | 19 | 
8 files changed, 72 insertions, 192 deletions
| diff --git a/training/Makefile.am b/training/Makefile.am index e075e417..6e2c06f5 100644 --- a/training/Makefile.am +++ b/training/Makefile.am @@ -12,9 +12,7 @@ bin_PROGRAMS = \    cllh_filter_grammar \    mpi_online_optimize \    mpi_batch_optimize \ -  mpi_em_optimize \    compute_cllh \ -  feature_expectations \    augment_grammar  noinst_PROGRAMS = \ @@ -29,12 +27,6 @@ mpi_online_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval  mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc optimize.cc  mpi_batch_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz -feature_expectations_SOURCES = feature_expectations.cc -feature_expectations_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz - -mpi_em_optimize_SOURCES = mpi_em_optimize.cc optimize.cc -mpi_em_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz -  compute_cllh_SOURCES = compute_cllh.cc  compute_cllh_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz diff --git a/training/augment_grammar.cc b/training/augment_grammar.cc index df8d4ee8..e89a92d5 100644 --- a/training/augment_grammar.cc +++ b/training/augment_grammar.cc @@ -134,9 +134,7 @@ int main(int argc, char** argv) {    } else { ngram = NULL; }    extra_feature = conf.count("extra_lex_feature") > 0;    if (conf.count("collapse_weights")) { -    Weights w; -    w.InitFromFile(conf["collapse_weights"].as<string>()); -    w.InitVector(&col_weights); +    Weights::InitFromFile(conf["collapse_weights"].as<string>(), &col_weights);    }    clear_features = conf.count("clear_features_after_collapse") > 0;    gather_rules = false; diff --git a/training/collapse_weights.cc b/training/collapse_weights.cc index 4fb742fb..dc480f6c 100644 --- a/training/collapse_weights.cc +++ b/training/collapse_weights.cc @@ -59,10 +59,8 @@ int main(int argc, char** argv) {    InitCommandLine(argc, argv, &conf);    const string wfile = conf["weights"].as<string>();    const string gfile = conf["grammar"].as<string>(); -  Weights wm; -  wm.InitFromFile(wfile); -  vector<double> w; -  wm.InitVector(&w); +  vector<weight_t> w; +  Weights::InitFromFile(wfile, &w);    MarginalMap e_tots;    MarginalMap f_tots;    prob_t tot; diff --git a/training/compute_cllh.cc b/training/compute_cllh.cc index 332f6d0c..b496d196 100644 --- a/training/compute_cllh.cc +++ b/training/compute_cllh.cc @@ -148,15 +148,6 @@ int main(int argc, char** argv) {    if (!InitCommandLine(argc, argv, &conf))      return false; -  // load initial weights -  Weights weights; -  if (conf.count("weights")) -    weights.InitFromFile(conf["weights"].as<string>()); - -  // freeze feature set -  //const bool freeze_feature_set = conf.count("freeze_feature_set"); -  //if (freeze_feature_set) FD::Freeze(); -    // load cdec.ini and set up decoder    ReadFile ini_rf(conf["decoder_config"].as<string>());    Decoder decoder(ini_rf.stream()); @@ -165,17 +156,22 @@ int main(int argc, char** argv) {      abort();    } +  // load weights +  vector<weight_t>& weights = decoder.CurrentWeightVector(); +  if (conf.count("weights")) +    Weights::InitFromFile(conf["weights"].as<string>(), &weights); + +  // freeze feature set +  //const bool freeze_feature_set = conf.count("freeze_feature_set"); +  //if (freeze_feature_set) FD::Freeze(); +    vector<string> corpus; vector<int> ids;    ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus, &ids);    assert(corpus.size() > 0);    assert(corpus.size() == ids.size()); -  vector<double> wv; -  weights.InitVector(&wv); -  decoder.SetWeights(wv);    TrainingObserver observer;    double objective = 0; -  bool converged = false;    observer.Reset();    if (rank == 0) @@ -197,3 +193,4 @@ int main(int argc, char** argv) {    return 0;  } + diff --git a/training/grammar_convert.cc b/training/grammar_convert.cc index 8d292f8a..bf8abb26 100644 --- a/training/grammar_convert.cc +++ b/training/grammar_convert.cc @@ -251,12 +251,10 @@ int main(int argc, char **argv) {    const bool is_split_input = (conf["format"].as<string>() == "split");    const bool is_json_input = is_split_input || (conf["format"].as<string>() == "json");    const bool collapse_weights = conf.count("collapse_weights"); -  Weights wts;    vector<double> w; -  if (conf.count("weights")) { -    wts.InitFromFile(conf["weights"].as<string>()); -    wts.InitVector(&w); -  } +  if (conf.count("weights")) +    Weights::InitFromFile(conf["weights"].as<string>(), &w); +    if (collapse_weights && !w.size()) {      cerr << "--collapse_weights requires a weights file to be specified!\n";      exit(1); diff --git a/training/mpi_batch_optimize.cc b/training/mpi_batch_optimize.cc index 39a8af7d..cc5953f6 100644 --- a/training/mpi_batch_optimize.cc +++ b/training/mpi_batch_optimize.cc @@ -31,42 +31,12 @@ using namespace std;  using boost::shared_ptr;  namespace po = boost::program_options; -void SanityCheck(const vector<double>& w) { -  for (int i = 0; i < w.size(); ++i) { -    assert(!isnan(w[i])); -    assert(!isinf(w[i])); -  } -} - -struct FComp { -  const vector<double>& w_; -  FComp(const vector<double>& w) : w_(w) {} -  bool operator()(int a, int b) const { -    return fabs(w_[a]) > fabs(w_[b]); -  } -}; - -void ShowLargestFeatures(const vector<double>& w) { -  vector<int> fnums(w.size()); -  for (int i = 0; i < w.size(); ++i) -    fnums[i] = i; -  vector<int>::iterator mid = fnums.begin(); -  mid += (w.size() > 10 ? 10 : w.size()); -  partial_sort(fnums.begin(), mid, fnums.end(), FComp(w)); -  cerr << "TOP FEATURES:"; -  for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) { -    cerr << ' ' << FD::Convert(*i) << '=' << w[*i]; -  } -  cerr << endl; -} -  bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {    po::options_description opts("Configuration options");    opts.add_options()          ("input_weights,w",po::value<string>(),"Input feature weights file")          ("training_data,t",po::value<string>(),"Training data")          ("decoder_config,d",po::value<string>(),"Decoder configuration file") -        ("sharded_input,s",po::value<string>(), "Corpus and grammar files are 'sharded' so each processor loads its own input and grammar file. Argument is the directory containing the shards.")          ("output_weights,o",po::value<string>()->default_value("-"),"Output feature weights file")          ("optimization_method,m", po::value<string>()->default_value("lbfgs"), "Optimization method (sgd, lbfgs, rprop)")  	("correction_buffers,M", po::value<int>()->default_value(10), "Number of gradients for LBFGS to maintain in memory") @@ -88,14 +58,10 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {    }    po::notify(*conf); -  if (conf->count("help") || !conf->count("input_weights") || !(conf->count("training_data") | conf->count("sharded_input")) || !conf->count("decoder_config")) { +  if (conf->count("help") || !conf->count("input_weights") || !(conf->count("training_data")) || !conf->count("decoder_config")) {      cerr << dcmdline_options << endl;      return false;    } -  if (conf->count("training_data") && conf->count("sharded_input")) { -    cerr << "Cannot specify both --training_data and --sharded_input\n"; -    return false; -  }    return true;  } @@ -236,42 +202,9 @@ int main(int argc, char** argv) {    po::variables_map conf;    if (!InitCommandLine(argc, argv, &conf)) return 1; -  string shard_dir; -  if (conf.count("sharded_input")) { -    shard_dir = conf["sharded_input"].as<string>(); -    if (!DirectoryExists(shard_dir)) { -      if (rank == 0) cerr << "Can't find shard directory: " << shard_dir << endl; -      return 1; -    } -    if (rank == 0) -      cerr << "Shard directory: " << shard_dir << endl; -  } - -  // load initial weights -  Weights weights; -  if (rank == 0) { cerr << "Loading weights...\n"; } -  weights.InitFromFile(conf["input_weights"].as<string>()); -  if (rank == 0) { cerr << "Done loading weights.\n"; } - -  // freeze feature set (should be optional?) -  const bool freeze_feature_set = true; -  if (freeze_feature_set) FD::Freeze(); -    // load cdec.ini and set up decoder    vector<string> cdec_ini;    ReadConfig(conf["decoder_config"].as<string>(), &cdec_ini); -  if (shard_dir.size()) { -    if (rank == 0) { -      for (int i = 0; i < cdec_ini.size(); ++i) { -        if (cdec_ini[i].find("grammar=") == 0) { -          cerr << "!!! using sharded input and " << conf["decoder_config"].as<string>() << " contains a grammar specification:\n" << cdec_ini[i] << "\n  VERIFY THAT THIS IS CORRECT!\n"; -        } -      } -    } -    ostringstream g; -    g << "grammar=" << shard_dir << "/grammar." << rank << "_of_" << size << ".gz"; -    cdec_ini.push_back(g.str()); -  }    istringstream ini;    StoreConfig(cdec_ini, &ini);    if (rank == 0) cerr << "Loading grammar...\n"; @@ -282,22 +215,28 @@ int main(int argc, char** argv) {    }    if (rank == 0) cerr << "Done loading grammar!\n"; +  // load initial weights +  if (rank == 0) { cerr << "Loading weights...\n"; } +  vector<weight_t>& lambdas = decoder->CurrentWeightVector(); +  Weights::InitFromFile(conf["input_weights"].as<string>(), &lambdas); +  if (rank == 0) { cerr << "Done loading weights.\n"; } + +  // freeze feature set (should be optional?) +  const bool freeze_feature_set = true; +  if (freeze_feature_set) FD::Freeze(); +    const int num_feats = FD::NumFeats();    if (rank == 0) cerr << "Number of features: " << num_feats << endl; +  lambdas.resize(num_feats); +    const bool gaussian_prior = conf.count("gaussian_prior"); -  vector<double> means(num_feats, 0); +  vector<weight_t> means(num_feats, 0);    if (conf.count("means")) {      if (!gaussian_prior) {        cerr << "Don't use --means without --gaussian_prior!\n";        exit(1);      } -    Weights wm;  -    wm.InitFromFile(conf["means"].as<string>()); -    if (num_feats != FD::NumFeats()) { -      cerr << "[ERROR] Means file had unexpected features!\n"; -      exit(1); -    } -    wm.InitVector(&means); +    Weights::InitFromFile(conf["means"].as<string>(), &means);    }    shared_ptr<BatchOptimizer> o;    if (rank == 0) { @@ -309,26 +248,13 @@ int main(int argc, char** argv) {      cerr << "Optimizer: " << o->Name() << endl;    }    double objective = 0; -  vector<double> lambdas(num_feats, 0.0); -  weights.InitVector(&lambdas); -  if (lambdas.size() != num_feats) { -    cerr << "Initial weights file did not have all features specified!\n  feats=" -         << num_feats << "\n  weights file=" << lambdas.size() << endl; -    lambdas.resize(num_feats, 0.0); -  }    vector<double> gradient(num_feats, 0.0); -  vector<double> rcv_grad(num_feats, 0.0); +  vector<double> rcv_grad; +  rcv_grad.clear();    bool converged = false;    vector<string> corpus; -  if (shard_dir.size()) { -    ostringstream os; os << shard_dir << "/corpus." << rank << "_of_" << size; -    ReadTrainingCorpus(os.str(), 0, 1, &corpus); -    cerr << os.str() << " has " << corpus.size() << " training examples. " << endl; -    if (corpus.size() > 500) { corpus.resize(500); cerr << "  TRUNCATING\n"; } -  } else { -    ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus); -  } +  ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus);    assert(corpus.size() > 0);    TrainingObserver observer; @@ -341,19 +267,20 @@ int main(int argc, char** argv) {      if (rank == 0) {        cerr << "Starting decoding... (~" << corpus.size() << " sentences / proc)\n";      } -    decoder->SetWeights(lambdas);      for (int i = 0; i < corpus.size(); ++i)        decoder->Decode(corpus[i], &observer);      cerr << "  process " << rank << '/' << size << " done\n";      fill(gradient.begin(), gradient.end(), 0); -    fill(rcv_grad.begin(), rcv_grad.end(), 0);      observer.SetLocalGradientAndObjective(&gradient, &objective);      double to = 0;  #ifdef HAVE_MPI +    rcv_grad.resize(num_feats, 0.0);      mpi::reduce(world, &gradient[0], gradient.size(), &rcv_grad[0], plus<double>(), 0); -    mpi::reduce(world, objective, to, plus<double>(), 0);      swap(gradient, rcv_grad); +    rcv_grad.clear(); + +    mpi::reduce(world, objective, to, plus<double>(), 0);      objective = to;  #endif @@ -378,7 +305,7 @@ int main(int argc, char** argv) {        for (int i = 0; i < gradient.size(); ++i)          gnorm += gradient[i] * gradient[i];        cerr << "  GNORM=" << sqrt(gnorm) << endl; -      vector<double> old = lambdas; +      vector<weight_t> old = lambdas;        int c = 0;        while (old == lambdas) {          ++c; @@ -387,9 +314,8 @@ int main(int argc, char** argv) {          assert(c < 5);        }        old.clear(); -      SanityCheck(lambdas); -      ShowLargestFeatures(lambdas); -      weights.InitFromVector(lambdas); +      Weights::SanityCheck(lambdas); +      Weights::ShowLargestFeatures(lambdas);        converged = o->HasConverged();        if (converged) { cerr << "OPTIMIZER REPORTS CONVERGENCE!\n"; } @@ -399,7 +325,7 @@ int main(int argc, char** argv) {        ostringstream vv;        vv << "Objective = " << objective << "  (eval count=" << o->EvaluationCount() << ")";        const string svv = vv.str(); -      weights.WriteToFile(fname, true, &svv); +      Weights::WriteToFile(fname, lambdas, true, &svv);      }  // rank == 0      int cint = converged;  #ifdef HAVE_MPI @@ -411,3 +337,4 @@ int main(int argc, char** argv) {    }    return 0;  } + diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc index 32033c19..2ef4a2e7 100644 --- a/training/mpi_online_optimize.cc +++ b/training/mpi_online_optimize.cc @@ -31,35 +31,6 @@ namespace mpi = boost::mpi;  using namespace std;  namespace po = boost::program_options; -void SanityCheck(const vector<double>& w) { -  for (int i = 0; i < w.size(); ++i) { -    assert(!isnan(w[i])); -    assert(!isinf(w[i])); -  } -} - -struct FComp { -  const vector<double>& w_; -  FComp(const vector<double>& w) : w_(w) {} -  bool operator()(int a, int b) const { -    return fabs(w_[a]) > fabs(w_[b]); -  } -}; - -void ShowLargestFeatures(const vector<double>& w) { -  vector<int> fnums(w.size()); -  for (int i = 0; i < w.size(); ++i) -    fnums[i] = i; -  vector<int>::iterator mid = fnums.begin(); -  mid += (w.size() > 10 ? 10 : w.size()); -  partial_sort(fnums.begin(), mid, fnums.end(), FComp(w)); -  cerr << "TOP FEATURES:"; -  for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) { -    cerr << ' ' << FD::Convert(*i) << '=' << w[*i]; -  } -  cerr << endl; -} -  bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {    po::options_description opts("Configuration options");    opts.add_options() @@ -250,10 +221,25 @@ int main(int argc, char** argv) {    if (!InitCommandLine(argc, argv, &conf))      return 1; +  vector<pair<string, int> > agenda; +  if (!LoadAgenda(conf["training_agenda"].as<string>(), &agenda)) +    return 1; +  if (rank == 0) +    cerr << "Loaded agenda defining " << agenda.size() << " training epochs\n"; + +  assert(agenda.size() > 0); + +  if (1) {  // hack to load the feature hash functions -- TODO this should not be in cdec.ini +    const string& cur_config = agenda[0].first; +    const unsigned max_iteration = agenda[0].second; +    ReadFile ini_rf(cur_config); +    Decoder decoder(ini_rf.stream()); +  } +    // load initial weights -  Weights weights; +  vector<weight_t> init_weights;    if (conf.count("input_weights")) -    weights.InitFromFile(conf["input_weights"].as<string>()); +    Weights::InitFromFile(conf["input_weights"].as<string>(), &init_weights);    vector<int> frozen_fids;    if (conf.count("frozen_features")) { @@ -310,19 +296,12 @@ int main(int argc, char** argv) {      rng.reset(new MT19937);    SparseVector<double> x; -  weights.InitSparseVector(&x); +  Weights::InitSparseVector(init_weights, &x);    TrainingObserver observer;    int write_weights_every_ith = 100; // TODO configure    int titer = -1; -  vector<pair<string, int> > agenda; -  if (!LoadAgenda(conf["training_agenda"].as<string>(), &agenda)) -    return 1; -  if (rank == 0) -    cerr << "Loaded agenda defining " << agenda.size() << " training epochs\n"; - -  vector<double> lambdas;    for (int ai = 0; ai < agenda.size(); ++ai) {      const string& cur_config = agenda[ai].first;      const unsigned max_iteration = agenda[ai].second; @@ -331,6 +310,8 @@ int main(int argc, char** argv) {      // load cdec.ini and set up decoder      ReadFile ini_rf(cur_config);      Decoder decoder(ini_rf.stream()); +    vector<weight_t>& lambdas = decoder.CurrentWeightVector(); +    if (ai == 0) { lambdas.swap(init_weights); init_weights.clear(); }      if (rank == 0)        o->ResetEpoch(); // resets the learning rate-- TODO is this good? @@ -341,15 +322,13 @@ int main(int argc, char** argv) {  #ifdef HAVE_MPI        mpi::timer timer;  #endif -      weights.InitFromVector(x); -      weights.InitVector(&lambdas); +      x.init_vector(&lambdas);        ++iter; ++titer;        observer.Reset(); -      decoder.SetWeights(lambdas);        if (rank == 0) {          converged = (iter == max_iteration); -        SanityCheck(lambdas); -        ShowLargestFeatures(lambdas); +        Weights::SanityCheck(lambdas); +        Weights::ShowLargestFeatures(lambdas);          string fname = "weights.cur.gz";          if (iter % write_weights_every_ith == 0) {            ostringstream o; o << "weights.epoch_" << (ai+1) << '.' << iter << ".gz"; @@ -360,7 +339,7 @@ int main(int argc, char** argv) {          vv << "total iter=" << titer << " (of current config iter=" << iter << ")  minibatch=" << size_per_proc << " sentences/proc x " << size << " procs.   num_feats=" << x.size() << '/' << FD::NumFeats() << "   passes_thru_data=" << (titer * size_per_proc / static_cast<double>(corpus.size())) << "   eta=" << lr->eta(titer);          const string svv = vv.str();          cerr << svv << endl; -        weights.WriteToFile(fname, true, &svv); +        Weights::WriteToFile(fname, lambdas, true, &svv);        }        for (int i = 0; i < size_per_proc; ++i) { diff --git a/training/mr_optimize_reduce.cc b/training/mr_optimize_reduce.cc index b931991d..15e28fa1 100644 --- a/training/mr_optimize_reduce.cc +++ b/training/mr_optimize_reduce.cc @@ -88,25 +88,19 @@ int main(int argc, char** argv) {    const bool use_b64 = conf["input_format"].as<string>() == "b64"; -  Weights weights; -  weights.InitFromFile(conf["input_weights"].as<string>()); +  vector<weight_t> lambdas; +  Weights::InitFromFile(conf["input_weights"].as<string>(), &lambdas);    const string s_obj = "**OBJ**";    int num_feats = FD::NumFeats();    cerr << "Number of features: " << num_feats << endl;    const bool gaussian_prior = conf.count("gaussian_prior"); -  vector<double> means(num_feats, 0); +  vector<weight_t> means(num_feats, 0);    if (conf.count("means")) {      if (!gaussian_prior) {        cerr << "Don't use --means without --gaussian_prior!\n";        exit(1);      } -    Weights wm;  -    wm.InitFromFile(conf["means"].as<string>()); -    if (num_feats != FD::NumFeats()) { -      cerr << "[ERROR] Means file had unexpected features!\n"; -      exit(1); -    } -    wm.InitVector(&means); +    Weights::InitFromFile(conf["means"].as<string>(), &means);    }    shared_ptr<BatchOptimizer> o;    const string omethod = conf["optimization_method"].as<string>(); @@ -124,8 +118,6 @@ int main(int argc, char** argv) {        cerr << "No state file found, assuming ITERATION 1\n";    } -  vector<double> lambdas(num_feats, 0); -  weights.InitVector(&lambdas);    double objective = 0;    vector<double> gradient(num_feats, 0);    // 0<TAB>**OBJ**=12.2;Feat1=2.3;Feat2=-0.2; @@ -223,8 +215,7 @@ int main(int argc, char** argv) {    old.clear();    SanityCheck(lambdas);    ShowLargestFeatures(lambdas); -  weights.InitFromVector(lambdas); -  weights.WriteToFile(conf["output_weights"].as<string>(), false); +  Weights::WriteToFile(conf["output_weights"].as<string>(), lambdas, false);    const bool conv = o->HasConverged();    if (conv) { cerr << "OPTIMIZER REPORTS CONVERGENCE!\n"; } | 
