summaryrefslogtreecommitdiff
path: root/training
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-09-13 17:36:23 +0100
committerChris Dyer <cdyer@cs.cmu.edu>2011-09-13 17:36:23 +0100
commitbb86637332d49f71c485df34576e464eaf053656 (patch)
treeefaa1cb07db897f3443c9dc69712999a530921f3 /training
parent7fadd06330c015d7ebc51ebd50e30332d187acbb (diff)
get rid of bad Weights class so it no longer keeps a copy of a vector inside it
Diffstat (limited to 'training')
-rw-r--r--training/Makefile.am8
-rw-r--r--training/augment_grammar.cc4
-rw-r--r--training/collapse_weights.cc6
-rw-r--r--training/compute_cllh.cc23
-rw-r--r--training/grammar_convert.cc8
-rw-r--r--training/mpi_batch_optimize.cc127
-rw-r--r--training/mpi_online_optimize.cc69
-rw-r--r--training/mr_optimize_reduce.cc19
8 files changed, 72 insertions, 192 deletions
diff --git a/training/Makefile.am b/training/Makefile.am
index e075e417..6e2c06f5 100644
--- a/training/Makefile.am
+++ b/training/Makefile.am
@@ -12,9 +12,7 @@ bin_PROGRAMS = \
cllh_filter_grammar \
mpi_online_optimize \
mpi_batch_optimize \
- mpi_em_optimize \
compute_cllh \
- feature_expectations \
augment_grammar
noinst_PROGRAMS = \
@@ -29,12 +27,6 @@ mpi_online_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval
mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc optimize.cc
mpi_batch_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-feature_expectations_SOURCES = feature_expectations.cc
-feature_expectations_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-mpi_em_optimize_SOURCES = mpi_em_optimize.cc optimize.cc
-mpi_em_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
compute_cllh_SOURCES = compute_cllh.cc
compute_cllh_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
diff --git a/training/augment_grammar.cc b/training/augment_grammar.cc
index df8d4ee8..e89a92d5 100644
--- a/training/augment_grammar.cc
+++ b/training/augment_grammar.cc
@@ -134,9 +134,7 @@ int main(int argc, char** argv) {
} else { ngram = NULL; }
extra_feature = conf.count("extra_lex_feature") > 0;
if (conf.count("collapse_weights")) {
- Weights w;
- w.InitFromFile(conf["collapse_weights"].as<string>());
- w.InitVector(&col_weights);
+ Weights::InitFromFile(conf["collapse_weights"].as<string>(), &col_weights);
}
clear_features = conf.count("clear_features_after_collapse") > 0;
gather_rules = false;
diff --git a/training/collapse_weights.cc b/training/collapse_weights.cc
index 4fb742fb..dc480f6c 100644
--- a/training/collapse_weights.cc
+++ b/training/collapse_weights.cc
@@ -59,10 +59,8 @@ int main(int argc, char** argv) {
InitCommandLine(argc, argv, &conf);
const string wfile = conf["weights"].as<string>();
const string gfile = conf["grammar"].as<string>();
- Weights wm;
- wm.InitFromFile(wfile);
- vector<double> w;
- wm.InitVector(&w);
+ vector<weight_t> w;
+ Weights::InitFromFile(wfile, &w);
MarginalMap e_tots;
MarginalMap f_tots;
prob_t tot;
diff --git a/training/compute_cllh.cc b/training/compute_cllh.cc
index 332f6d0c..b496d196 100644
--- a/training/compute_cllh.cc
+++ b/training/compute_cllh.cc
@@ -148,15 +148,6 @@ int main(int argc, char** argv) {
if (!InitCommandLine(argc, argv, &conf))
return false;
- // load initial weights
- Weights weights;
- if (conf.count("weights"))
- weights.InitFromFile(conf["weights"].as<string>());
-
- // freeze feature set
- //const bool freeze_feature_set = conf.count("freeze_feature_set");
- //if (freeze_feature_set) FD::Freeze();
-
// load cdec.ini and set up decoder
ReadFile ini_rf(conf["decoder_config"].as<string>());
Decoder decoder(ini_rf.stream());
@@ -165,17 +156,22 @@ int main(int argc, char** argv) {
abort();
}
+ // load weights
+ vector<weight_t>& weights = decoder.CurrentWeightVector();
+ if (conf.count("weights"))
+ Weights::InitFromFile(conf["weights"].as<string>(), &weights);
+
+ // freeze feature set
+ //const bool freeze_feature_set = conf.count("freeze_feature_set");
+ //if (freeze_feature_set) FD::Freeze();
+
vector<string> corpus; vector<int> ids;
ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus, &ids);
assert(corpus.size() > 0);
assert(corpus.size() == ids.size());
- vector<double> wv;
- weights.InitVector(&wv);
- decoder.SetWeights(wv);
TrainingObserver observer;
double objective = 0;
- bool converged = false;
observer.Reset();
if (rank == 0)
@@ -197,3 +193,4 @@ int main(int argc, char** argv) {
return 0;
}
+
diff --git a/training/grammar_convert.cc b/training/grammar_convert.cc
index 8d292f8a..bf8abb26 100644
--- a/training/grammar_convert.cc
+++ b/training/grammar_convert.cc
@@ -251,12 +251,10 @@ int main(int argc, char **argv) {
const bool is_split_input = (conf["format"].as<string>() == "split");
const bool is_json_input = is_split_input || (conf["format"].as<string>() == "json");
const bool collapse_weights = conf.count("collapse_weights");
- Weights wts;
vector<double> w;
- if (conf.count("weights")) {
- wts.InitFromFile(conf["weights"].as<string>());
- wts.InitVector(&w);
- }
+ if (conf.count("weights"))
+ Weights::InitFromFile(conf["weights"].as<string>(), &w);
+
if (collapse_weights && !w.size()) {
cerr << "--collapse_weights requires a weights file to be specified!\n";
exit(1);
diff --git a/training/mpi_batch_optimize.cc b/training/mpi_batch_optimize.cc
index 39a8af7d..cc5953f6 100644
--- a/training/mpi_batch_optimize.cc
+++ b/training/mpi_batch_optimize.cc
@@ -31,42 +31,12 @@ using namespace std;
using boost::shared_ptr;
namespace po = boost::program_options;
-void SanityCheck(const vector<double>& w) {
- for (int i = 0; i < w.size(); ++i) {
- assert(!isnan(w[i]));
- assert(!isinf(w[i]));
- }
-}
-
-struct FComp {
- const vector<double>& w_;
- FComp(const vector<double>& w) : w_(w) {}
- bool operator()(int a, int b) const {
- return fabs(w_[a]) > fabs(w_[b]);
- }
-};
-
-void ShowLargestFeatures(const vector<double>& w) {
- vector<int> fnums(w.size());
- for (int i = 0; i < w.size(); ++i)
- fnums[i] = i;
- vector<int>::iterator mid = fnums.begin();
- mid += (w.size() > 10 ? 10 : w.size());
- partial_sort(fnums.begin(), mid, fnums.end(), FComp(w));
- cerr << "TOP FEATURES:";
- for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) {
- cerr << ' ' << FD::Convert(*i) << '=' << w[*i];
- }
- cerr << endl;
-}
-
bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
po::options_description opts("Configuration options");
opts.add_options()
("input_weights,w",po::value<string>(),"Input feature weights file")
("training_data,t",po::value<string>(),"Training data")
("decoder_config,d",po::value<string>(),"Decoder configuration file")
- ("sharded_input,s",po::value<string>(), "Corpus and grammar files are 'sharded' so each processor loads its own input and grammar file. Argument is the directory containing the shards.")
("output_weights,o",po::value<string>()->default_value("-"),"Output feature weights file")
("optimization_method,m", po::value<string>()->default_value("lbfgs"), "Optimization method (sgd, lbfgs, rprop)")
("correction_buffers,M", po::value<int>()->default_value(10), "Number of gradients for LBFGS to maintain in memory")
@@ -88,14 +58,10 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
}
po::notify(*conf);
- if (conf->count("help") || !conf->count("input_weights") || !(conf->count("training_data") | conf->count("sharded_input")) || !conf->count("decoder_config")) {
+ if (conf->count("help") || !conf->count("input_weights") || !(conf->count("training_data")) || !conf->count("decoder_config")) {
cerr << dcmdline_options << endl;
return false;
}
- if (conf->count("training_data") && conf->count("sharded_input")) {
- cerr << "Cannot specify both --training_data and --sharded_input\n";
- return false;
- }
return true;
}
@@ -236,42 +202,9 @@ int main(int argc, char** argv) {
po::variables_map conf;
if (!InitCommandLine(argc, argv, &conf)) return 1;
- string shard_dir;
- if (conf.count("sharded_input")) {
- shard_dir = conf["sharded_input"].as<string>();
- if (!DirectoryExists(shard_dir)) {
- if (rank == 0) cerr << "Can't find shard directory: " << shard_dir << endl;
- return 1;
- }
- if (rank == 0)
- cerr << "Shard directory: " << shard_dir << endl;
- }
-
- // load initial weights
- Weights weights;
- if (rank == 0) { cerr << "Loading weights...\n"; }
- weights.InitFromFile(conf["input_weights"].as<string>());
- if (rank == 0) { cerr << "Done loading weights.\n"; }
-
- // freeze feature set (should be optional?)
- const bool freeze_feature_set = true;
- if (freeze_feature_set) FD::Freeze();
-
// load cdec.ini and set up decoder
vector<string> cdec_ini;
ReadConfig(conf["decoder_config"].as<string>(), &cdec_ini);
- if (shard_dir.size()) {
- if (rank == 0) {
- for (int i = 0; i < cdec_ini.size(); ++i) {
- if (cdec_ini[i].find("grammar=") == 0) {
- cerr << "!!! using sharded input and " << conf["decoder_config"].as<string>() << " contains a grammar specification:\n" << cdec_ini[i] << "\n VERIFY THAT THIS IS CORRECT!\n";
- }
- }
- }
- ostringstream g;
- g << "grammar=" << shard_dir << "/grammar." << rank << "_of_" << size << ".gz";
- cdec_ini.push_back(g.str());
- }
istringstream ini;
StoreConfig(cdec_ini, &ini);
if (rank == 0) cerr << "Loading grammar...\n";
@@ -282,22 +215,28 @@ int main(int argc, char** argv) {
}
if (rank == 0) cerr << "Done loading grammar!\n";
+ // load initial weights
+ if (rank == 0) { cerr << "Loading weights...\n"; }
+ vector<weight_t>& lambdas = decoder->CurrentWeightVector();
+ Weights::InitFromFile(conf["input_weights"].as<string>(), &lambdas);
+ if (rank == 0) { cerr << "Done loading weights.\n"; }
+
+ // freeze feature set (should be optional?)
+ const bool freeze_feature_set = true;
+ if (freeze_feature_set) FD::Freeze();
+
const int num_feats = FD::NumFeats();
if (rank == 0) cerr << "Number of features: " << num_feats << endl;
+ lambdas.resize(num_feats);
+
const bool gaussian_prior = conf.count("gaussian_prior");
- vector<double> means(num_feats, 0);
+ vector<weight_t> means(num_feats, 0);
if (conf.count("means")) {
if (!gaussian_prior) {
cerr << "Don't use --means without --gaussian_prior!\n";
exit(1);
}
- Weights wm;
- wm.InitFromFile(conf["means"].as<string>());
- if (num_feats != FD::NumFeats()) {
- cerr << "[ERROR] Means file had unexpected features!\n";
- exit(1);
- }
- wm.InitVector(&means);
+ Weights::InitFromFile(conf["means"].as<string>(), &means);
}
shared_ptr<BatchOptimizer> o;
if (rank == 0) {
@@ -309,26 +248,13 @@ int main(int argc, char** argv) {
cerr << "Optimizer: " << o->Name() << endl;
}
double objective = 0;
- vector<double> lambdas(num_feats, 0.0);
- weights.InitVector(&lambdas);
- if (lambdas.size() != num_feats) {
- cerr << "Initial weights file did not have all features specified!\n feats="
- << num_feats << "\n weights file=" << lambdas.size() << endl;
- lambdas.resize(num_feats, 0.0);
- }
vector<double> gradient(num_feats, 0.0);
- vector<double> rcv_grad(num_feats, 0.0);
+ vector<double> rcv_grad;
+ rcv_grad.clear();
bool converged = false;
vector<string> corpus;
- if (shard_dir.size()) {
- ostringstream os; os << shard_dir << "/corpus." << rank << "_of_" << size;
- ReadTrainingCorpus(os.str(), 0, 1, &corpus);
- cerr << os.str() << " has " << corpus.size() << " training examples. " << endl;
- if (corpus.size() > 500) { corpus.resize(500); cerr << " TRUNCATING\n"; }
- } else {
- ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus);
- }
+ ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus);
assert(corpus.size() > 0);
TrainingObserver observer;
@@ -341,19 +267,20 @@ int main(int argc, char** argv) {
if (rank == 0) {
cerr << "Starting decoding... (~" << corpus.size() << " sentences / proc)\n";
}
- decoder->SetWeights(lambdas);
for (int i = 0; i < corpus.size(); ++i)
decoder->Decode(corpus[i], &observer);
cerr << " process " << rank << '/' << size << " done\n";
fill(gradient.begin(), gradient.end(), 0);
- fill(rcv_grad.begin(), rcv_grad.end(), 0);
observer.SetLocalGradientAndObjective(&gradient, &objective);
double to = 0;
#ifdef HAVE_MPI
+ rcv_grad.resize(num_feats, 0.0);
mpi::reduce(world, &gradient[0], gradient.size(), &rcv_grad[0], plus<double>(), 0);
- mpi::reduce(world, objective, to, plus<double>(), 0);
swap(gradient, rcv_grad);
+ rcv_grad.clear();
+
+ mpi::reduce(world, objective, to, plus<double>(), 0);
objective = to;
#endif
@@ -378,7 +305,7 @@ int main(int argc, char** argv) {
for (int i = 0; i < gradient.size(); ++i)
gnorm += gradient[i] * gradient[i];
cerr << " GNORM=" << sqrt(gnorm) << endl;
- vector<double> old = lambdas;
+ vector<weight_t> old = lambdas;
int c = 0;
while (old == lambdas) {
++c;
@@ -387,9 +314,8 @@ int main(int argc, char** argv) {
assert(c < 5);
}
old.clear();
- SanityCheck(lambdas);
- ShowLargestFeatures(lambdas);
- weights.InitFromVector(lambdas);
+ Weights::SanityCheck(lambdas);
+ Weights::ShowLargestFeatures(lambdas);
converged = o->HasConverged();
if (converged) { cerr << "OPTIMIZER REPORTS CONVERGENCE!\n"; }
@@ -399,7 +325,7 @@ int main(int argc, char** argv) {
ostringstream vv;
vv << "Objective = " << objective << " (eval count=" << o->EvaluationCount() << ")";
const string svv = vv.str();
- weights.WriteToFile(fname, true, &svv);
+ Weights::WriteToFile(fname, lambdas, true, &svv);
} // rank == 0
int cint = converged;
#ifdef HAVE_MPI
@@ -411,3 +337,4 @@ int main(int argc, char** argv) {
}
return 0;
}
+
diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc
index 32033c19..2ef4a2e7 100644
--- a/training/mpi_online_optimize.cc
+++ b/training/mpi_online_optimize.cc
@@ -31,35 +31,6 @@ namespace mpi = boost::mpi;
using namespace std;
namespace po = boost::program_options;
-void SanityCheck(const vector<double>& w) {
- for (int i = 0; i < w.size(); ++i) {
- assert(!isnan(w[i]));
- assert(!isinf(w[i]));
- }
-}
-
-struct FComp {
- const vector<double>& w_;
- FComp(const vector<double>& w) : w_(w) {}
- bool operator()(int a, int b) const {
- return fabs(w_[a]) > fabs(w_[b]);
- }
-};
-
-void ShowLargestFeatures(const vector<double>& w) {
- vector<int> fnums(w.size());
- for (int i = 0; i < w.size(); ++i)
- fnums[i] = i;
- vector<int>::iterator mid = fnums.begin();
- mid += (w.size() > 10 ? 10 : w.size());
- partial_sort(fnums.begin(), mid, fnums.end(), FComp(w));
- cerr << "TOP FEATURES:";
- for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) {
- cerr << ' ' << FD::Convert(*i) << '=' << w[*i];
- }
- cerr << endl;
-}
-
bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
po::options_description opts("Configuration options");
opts.add_options()
@@ -250,10 +221,25 @@ int main(int argc, char** argv) {
if (!InitCommandLine(argc, argv, &conf))
return 1;
+ vector<pair<string, int> > agenda;
+ if (!LoadAgenda(conf["training_agenda"].as<string>(), &agenda))
+ return 1;
+ if (rank == 0)
+ cerr << "Loaded agenda defining " << agenda.size() << " training epochs\n";
+
+ assert(agenda.size() > 0);
+
+ if (1) { // hack to load the feature hash functions -- TODO this should not be in cdec.ini
+ const string& cur_config = agenda[0].first;
+ const unsigned max_iteration = agenda[0].second;
+ ReadFile ini_rf(cur_config);
+ Decoder decoder(ini_rf.stream());
+ }
+
// load initial weights
- Weights weights;
+ vector<weight_t> init_weights;
if (conf.count("input_weights"))
- weights.InitFromFile(conf["input_weights"].as<string>());
+ Weights::InitFromFile(conf["input_weights"].as<string>(), &init_weights);
vector<int> frozen_fids;
if (conf.count("frozen_features")) {
@@ -310,19 +296,12 @@ int main(int argc, char** argv) {
rng.reset(new MT19937);
SparseVector<double> x;
- weights.InitSparseVector(&x);
+ Weights::InitSparseVector(init_weights, &x);
TrainingObserver observer;
int write_weights_every_ith = 100; // TODO configure
int titer = -1;
- vector<pair<string, int> > agenda;
- if (!LoadAgenda(conf["training_agenda"].as<string>(), &agenda))
- return 1;
- if (rank == 0)
- cerr << "Loaded agenda defining " << agenda.size() << " training epochs\n";
-
- vector<double> lambdas;
for (int ai = 0; ai < agenda.size(); ++ai) {
const string& cur_config = agenda[ai].first;
const unsigned max_iteration = agenda[ai].second;
@@ -331,6 +310,8 @@ int main(int argc, char** argv) {
// load cdec.ini and set up decoder
ReadFile ini_rf(cur_config);
Decoder decoder(ini_rf.stream());
+ vector<weight_t>& lambdas = decoder.CurrentWeightVector();
+ if (ai == 0) { lambdas.swap(init_weights); init_weights.clear(); }
if (rank == 0)
o->ResetEpoch(); // resets the learning rate-- TODO is this good?
@@ -341,15 +322,13 @@ int main(int argc, char** argv) {
#ifdef HAVE_MPI
mpi::timer timer;
#endif
- weights.InitFromVector(x);
- weights.InitVector(&lambdas);
+ x.init_vector(&lambdas);
++iter; ++titer;
observer.Reset();
- decoder.SetWeights(lambdas);
if (rank == 0) {
converged = (iter == max_iteration);
- SanityCheck(lambdas);
- ShowLargestFeatures(lambdas);
+ Weights::SanityCheck(lambdas);
+ Weights::ShowLargestFeatures(lambdas);
string fname = "weights.cur.gz";
if (iter % write_weights_every_ith == 0) {
ostringstream o; o << "weights.epoch_" << (ai+1) << '.' << iter << ".gz";
@@ -360,7 +339,7 @@ int main(int argc, char** argv) {
vv << "total iter=" << titer << " (of current config iter=" << iter << ") minibatch=" << size_per_proc << " sentences/proc x " << size << " procs. num_feats=" << x.size() << '/' << FD::NumFeats() << " passes_thru_data=" << (titer * size_per_proc / static_cast<double>(corpus.size())) << " eta=" << lr->eta(titer);
const string svv = vv.str();
cerr << svv << endl;
- weights.WriteToFile(fname, true, &svv);
+ Weights::WriteToFile(fname, lambdas, true, &svv);
}
for (int i = 0; i < size_per_proc; ++i) {
diff --git a/training/mr_optimize_reduce.cc b/training/mr_optimize_reduce.cc
index b931991d..15e28fa1 100644
--- a/training/mr_optimize_reduce.cc
+++ b/training/mr_optimize_reduce.cc
@@ -88,25 +88,19 @@ int main(int argc, char** argv) {
const bool use_b64 = conf["input_format"].as<string>() == "b64";
- Weights weights;
- weights.InitFromFile(conf["input_weights"].as<string>());
+ vector<weight_t> lambdas;
+ Weights::InitFromFile(conf["input_weights"].as<string>(), &lambdas);
const string s_obj = "**OBJ**";
int num_feats = FD::NumFeats();
cerr << "Number of features: " << num_feats << endl;
const bool gaussian_prior = conf.count("gaussian_prior");
- vector<double> means(num_feats, 0);
+ vector<weight_t> means(num_feats, 0);
if (conf.count("means")) {
if (!gaussian_prior) {
cerr << "Don't use --means without --gaussian_prior!\n";
exit(1);
}
- Weights wm;
- wm.InitFromFile(conf["means"].as<string>());
- if (num_feats != FD::NumFeats()) {
- cerr << "[ERROR] Means file had unexpected features!\n";
- exit(1);
- }
- wm.InitVector(&means);
+ Weights::InitFromFile(conf["means"].as<string>(), &means);
}
shared_ptr<BatchOptimizer> o;
const string omethod = conf["optimization_method"].as<string>();
@@ -124,8 +118,6 @@ int main(int argc, char** argv) {
cerr << "No state file found, assuming ITERATION 1\n";
}
- vector<double> lambdas(num_feats, 0);
- weights.InitVector(&lambdas);
double objective = 0;
vector<double> gradient(num_feats, 0);
// 0<TAB>**OBJ**=12.2;Feat1=2.3;Feat2=-0.2;
@@ -223,8 +215,7 @@ int main(int argc, char** argv) {
old.clear();
SanityCheck(lambdas);
ShowLargestFeatures(lambdas);
- weights.InitFromVector(lambdas);
- weights.WriteToFile(conf["output_weights"].as<string>(), false);
+ Weights::WriteToFile(conf["output_weights"].as<string>(), lambdas, false);
const bool conv = o->HasConverged();
if (conv) { cerr << "OPTIMIZER REPORTS CONVERGENCE!\n"; }