From 8aa29810bb77611cc20b7a384897ff6703783ea1 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 18 Nov 2012 13:35:42 -0500 Subject: major restructure of the training code --- training/mpi_em_optimize.cc | 389 -------------------------------------------- 1 file changed, 389 deletions(-) delete mode 100644 training/mpi_em_optimize.cc (limited to 'training/mpi_em_optimize.cc') diff --git a/training/mpi_em_optimize.cc b/training/mpi_em_optimize.cc deleted file mode 100644 index 48683b15..00000000 --- a/training/mpi_em_optimize.cc +++ /dev/null @@ -1,389 +0,0 @@ -#include -#include -#include -#include -#include - -#ifdef HAVE_MPI -#include -#endif - -#include -#include -#include - -#include "verbose.h" -#include "hg.h" -#include "prob.h" -#include "inside_outside.h" -#include "ff_register.h" -#include "decoder.h" -#include "filelib.h" -#include "optimize.h" -#include "fdict.h" -#include "weights.h" -#include "sparse_vector.h" - -using namespace std; -using boost::shared_ptr; -namespace po = boost::program_options; - -void SanityCheck(const vector& w) { - for (int i = 0; i < w.size(); ++i) { - assert(!isnan(w[i])); - assert(!isinf(w[i])); - } -} - -struct FComp { - const vector& w_; - FComp(const vector& w) : w_(w) {} - bool operator()(int a, int b) const { - return fabs(w_[a]) > fabs(w_[b]); - } -}; - -void ShowLargestFeatures(const vector& w) { - vector fnums(w.size()); - for (int i = 0; i < w.size(); ++i) - fnums[i] = i; - vector::iterator mid = fnums.begin(); - mid += (w.size() > 10 ? 10 : w.size()); - partial_sort(fnums.begin(), mid, fnums.end(), FComp(w)); - cerr << "TOP FEATURES:"; - for (vector::iterator i = fnums.begin(); i != mid; ++i) { - cerr << ' ' << FD::Convert(*i) << '=' << w[*i]; - } - cerr << endl; -} - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("input_weights,w",po::value(),"Input feature weights file") - ("training_data,t",po::value(),"Training data") - ("decoder_config,c",po::value(),"Decoder configuration file") - ("output_weights,o",po::value()->default_value("-"),"Output feature weights file"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || !(conf->count("training_data")) || !conf->count("decoder_config")) { - cerr << dcmdline_options << endl; -#ifdef HAVE_MPI - MPI::Finalize(); -#endif - exit(1); - } -} - -void ReadTrainingCorpus(const string& fname, int rank, int size, vector* c) { - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - int lc = 0; - while(in) { - getline(in, line); - if (!in) break; - if (lc % size == rank) c->push_back(line); - ++lc; - } -} - -static const double kMINUS_EPSILON = -1e-6; - -struct TrainingObserver : public DecoderObserver { - void Reset() { - total_complete = 0; - cur_obj = 0; - tot_obj = 0; - tot.clear(); - } - - void SetLocalGradientAndObjective(SparseVector* g, double* o) const { - *o = tot_obj; - *g = tot; - } - - virtual void NotifyDecodingStart(const SentenceMetadata& smeta) { - cur_obj = 0; - state = 1; - } - - void ExtractExpectedCounts(Hypergraph* hg) { - vector posts; - cur.clear(); - const prob_t z = hg->ComputeEdgePosteriors(1.0, &posts); - cur_obj = log(z); - for (int i = 0; i < posts.size(); ++i) { - const SparseVector& efeats = hg->edges_[i].feature_values_; - const double post = static_cast(posts[i] / z); - for (SparseVector::const_iterator j = efeats.begin(); j != efeats.end(); ++j) - cur.add_value(j->first, post); - } - } - - // compute model expectations, denominator of objective - virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { - assert(state == 1); - state = 2; - ExtractExpectedCounts(hg); - } - - // replace translation forest, since we're doing EM training (we don't know which) - virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) { - assert(state == 2); - state = 3; - ExtractExpectedCounts(hg); - } - - virtual void NotifyDecodingComplete(const SentenceMetadata& smeta) { - ++total_complete; - tot_obj += cur_obj; - tot += cur; - } - - int total_complete; - double cur_obj; - double tot_obj; - SparseVector cur, tot; - int state; -}; - -void ReadConfig(const string& ini, vector* out) { - ReadFile rf(ini); - istream& in = *rf.stream(); - while(in) { - string line; - getline(in, line); - if (!in) continue; - out->push_back(line); - } -} - -void StoreConfig(const vector& cfg, istringstream* o) { - ostringstream os; - for (int i = 0; i < cfg.size(); ++i) { os << cfg[i] << endl; } - o->str(os.str()); -} - -struct OptimizableMultinomialFamily { - struct CPD { - CPD() : z() {} - double z; - map c2counts; - }; - map counts; - double Value(WordID conditioning, WordID generated) const { - map::const_iterator it = counts.find(conditioning); - assert(it != counts.end()); - map::const_iterator r = it->second.c2counts.find(generated); - if (r == it->second.c2counts.end()) return 0; - return r->second; - } - void Increment(WordID conditioning, WordID generated, double count) { - CPD& cc = counts[conditioning]; - cc.z += count; - cc.c2counts[generated] += count; - } - void Optimize() { - for (map::iterator i = counts.begin(); i != counts.end(); ++i) { - CPD& cpd = i->second; - for (map::iterator j = cpd.c2counts.begin(); j != cpd.c2counts.end(); ++j) { - j->second /= cpd.z; - // cerr << "P(" << TD::Convert(j->first) << " | " << TD::Convert(i->first) << " ) = " << j->second << endl; - } - } - } - void Clear() { - counts.clear(); - } -}; - -struct CountManager { - CountManager(size_t num_types) : oms_(num_types) {} - virtual ~CountManager(); - virtual void AddCounts(const SparseVector& c) = 0; - void Optimize(SparseVector* weights) { - for (int i = 0; i < oms_.size(); ++i) { - oms_[i].Optimize(); - } - GetOptimalValues(weights); - for (int i = 0; i < oms_.size(); ++i) { - oms_[i].Clear(); - } - } - virtual void GetOptimalValues(SparseVector* wv) const = 0; - vector oms_; -}; -CountManager::~CountManager() {} - -struct TaggerCountManager : public CountManager { - // 0 = transitions, 2 = emissions - TaggerCountManager() : CountManager(2) {} - void AddCounts(const SparseVector& c); - void GetOptimalValues(SparseVector* wv) const { - for (set::const_iterator it = fids_.begin(); it != fids_.end(); ++it) { - int ftype; - WordID cond, gen; - bool is_optimized = TaggerCountManager::GetFeature(*it, &ftype, &cond, &gen); - assert(is_optimized); - wv->set_value(*it, log(oms_[ftype].Value(cond, gen))); - } - } - // Id:0:a=1 Bi:a_b=1 Bi:b_c=1 Bi:c_d=1 Uni:a=1 Uni:b=1 Uni:c=1 Uni:d=1 Id:1:b=1 Bi:BOS_a=1 Id:2:c=1 - static bool GetFeature(const int fid, int* feature_type, WordID* cond, WordID* gen) { - const string& feat = FD::Convert(fid); - if (feat.size() > 5 && feat[0] == 'I' && feat[1] == 'd' && feat[2] == ':') { - // emission - const size_t p = feat.rfind(':'); - assert(p != string::npos); - *cond = TD::Convert(feat.substr(p+1)); - *gen = TD::Convert(feat.substr(3, p - 3)); - *feature_type = 1; - return true; - } else if (feat[0] == 'B' && feat.size() > 5 && feat[2] == ':' && feat[1] == 'i') { - // transition - const size_t p = feat.rfind('_'); - assert(p != string::npos); - *gen = TD::Convert(feat.substr(p+1)); - *cond = TD::Convert(feat.substr(3, p - 3)); - *feature_type = 0; - return true; - } else if (feat[0] == 'U' && feat.size() > 4 && feat[1] == 'n' && feat[2] == 'i' && feat[3] == ':') { - // ignore - return false; - } else { - cerr << "Don't know how to deal with feature of type: " << feat << endl; - abort(); - } - } - set fids_; -}; - -void TaggerCountManager::AddCounts(const SparseVector& c) { - for (SparseVector::const_iterator it = c.begin(); it != c.end(); ++it) { - const double& val = it->second; - int ftype; - WordID cond, gen; - if (GetFeature(it->first, &ftype, &cond, &gen)) { - oms_[ftype].Increment(cond, gen, val); - fids_.insert(it->first); - } - } -} - -int main(int argc, char** argv) { -#ifdef HAVE_MPI - MPI::Init(argc, argv); - const int size = MPI::COMM_WORLD.Get_size(); - const int rank = MPI::COMM_WORLD.Get_rank(); -#else - const int size = 1; - const int rank = 0; -#endif - SetSilent(true); // turn off verbose decoder output - register_feature_functions(); - - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - - TaggerCountManager tcm; - - // load cdec.ini and set up decoder - vector cdec_ini; - ReadConfig(conf["decoder_config"].as(), &cdec_ini); - istringstream ini; - StoreConfig(cdec_ini, &ini); - if (rank == 0) cerr << "Loading grammar...\n"; - Decoder* decoder = new Decoder(&ini); - if (decoder->GetConf()["input"].as() != "-") { - cerr << "cdec.ini must not set an input file\n"; -#ifdef HAVE_MPI - MPI::COMM_WORLD.Abort(1); -#endif - } - if (rank == 0) cerr << "Done loading grammar!\n"; - Weights w; - if (conf.count("input_weights")) - w.InitFromFile(conf["input_weights"].as()); - - double objective = 0; - bool converged = false; - - vector lambdas; - w.InitVector(&lambdas); - vector corpus; - ReadTrainingCorpus(conf["training_data"].as(), rank, size, &corpus); - assert(corpus.size() > 0); - - int iteration = 0; - TrainingObserver observer; - while (!converged) { - ++iteration; - observer.Reset(); - if (rank == 0) { - cerr << "Starting decoding... (~" << corpus.size() << " sentences / proc)\n"; - } - decoder->SetWeights(lambdas); - for (int i = 0; i < corpus.size(); ++i) - decoder->Decode(corpus[i], &observer); - - SparseVector x; - observer.SetLocalGradientAndObjective(&x, &objective); - cerr << "COUNTS = " << x << endl; - cerr << " OBJ = " << objective << endl; - tcm.AddCounts(x); - -#if 0 -#ifdef HAVE_MPI - MPI::COMM_WORLD.Reduce(const_cast(&gradient.data()[0]), &rcv_grad[0], num_feats, MPI::DOUBLE, MPI::SUM, 0); - MPI::COMM_WORLD.Reduce(&objective, &to, 1, MPI::DOUBLE, MPI::SUM, 0); - swap(gradient, rcv_grad); - objective = to; -#endif -#endif - - if (rank == 0) { - SparseVector wsv; - tcm.Optimize(&wsv); - - w.InitFromVector(wsv); - w.InitVector(&lambdas); - - ShowLargestFeatures(lambdas); - - converged = iteration > 100; - if (converged) { cerr << "OPTIMIZER REPORTS CONVERGENCE!\n"; } - - string fname = "weights.cur.gz"; - if (converged) { fname = "weights.final.gz"; } - ostringstream vv; - vv << "Objective = " << objective << " (ITERATION=" << iteration << ")"; - const string svv = vv.str(); - w.WriteToFile(fname, true, &svv); - } // rank == 0 - int cint = converged; -#ifdef HAVE_MPI - MPI::COMM_WORLD.Bcast(const_cast(&lambdas.data()[0]), num_feats, MPI::DOUBLE, 0); - MPI::COMM_WORLD.Bcast(&cint, 1, MPI::INT, 0); - MPI::COMM_WORLD.Barrier(); -#endif - converged = cint; - } -#ifdef HAVE_MPI - MPI::Finalize(); -#endif - return 0; -} -- cgit v1.2.3