diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-03-01 23:09:41 -0500 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-03-01 23:09:41 -0500 |
commit | 0885db8a3c07927bd1220bb1c639a0fe8687d6f6 (patch) | |
tree | 3e174cf6bfbc97da6f35e7351a0cdbfa800e969e | |
parent | 378ba2373374015c8de4b360a30227f072616c6c (diff) | |
parent | e1a0c140e9f31461ab45ec7f9533ad98d2b9caa9 (diff) |
Merge branch 'master' of github.com:redpony/cdec
-rw-r--r-- | decoder/decoder.cc | 5 | ||||
-rw-r--r-- | training/lbl_model.cc | 54 | ||||
-rw-r--r-- | utils/corpus_tools.cc | 16 |
3 files changed, 57 insertions, 18 deletions
diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 3394e0b8..69fbaf85 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -812,6 +812,9 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { abort(); } + if (conf.count("show_target_graph")) + HypergraphIO::WriteTarget(forest); + for (int pass = 0; pass < rescoring_passes.size(); ++pass) { const RescoringPass& rp = rescoring_passes[pass]; const vector<weight_t>& cur_weights = *rp.weight_vector; @@ -1018,8 +1021,6 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { } if (conf.count("show_cfg_search_space")) HypergraphIO::WriteAsCFG(forest); - if (conf.count("show_target_graph")) - HypergraphIO::WriteTarget(forest); if (has_ref) { if (HG::Intersect(ref, &forest)) { // if (crf_uniform_empirical) { diff --git a/training/lbl_model.cc b/training/lbl_model.cc index def5075a..a46ce33c 100644 --- a/training/lbl_model.cc +++ b/training/lbl_model.cc @@ -15,6 +15,7 @@ #ifdef HAVE_MPI #include <boost/mpi/timer.hpp> #include <boost/mpi.hpp> +#include <boost/archive/text_oarchive.hpp> namespace mpi = boost::mpi; #endif #include <boost/math/special_functions/fpclassify.hpp> @@ -34,12 +35,26 @@ namespace mpi = boost::mpi; namespace po = boost::program_options; using namespace std; -#define kDIMENSIONS 100 +#define kDIMENSIONS 10 typedef Eigen::Matrix<double, kDIMENSIONS, 1> RVector; typedef Eigen::Matrix<double, 1, kDIMENSIONS> RTVector; typedef Eigen::Matrix<double, kDIMENSIONS, kDIMENSIONS> TMatrix; vector<RVector> r_src, r_trg; +#if HAVE_MPI +namespace boost { +namespace serialization { + +template<class Archive> +void serialize(Archive & ar, RVector & v, const unsigned int version) { + for (unsigned i = 0; i < kDIMENSIONS; ++i) + ar & v[i]; +} + +} // namespace serialization +} // namespace boost +#endif + bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() @@ -224,7 +239,7 @@ int main(int argc, char** argv) { srand(seed); } - TMatrix t; + TMatrix t = TMatrix::Zero(); if (rank == 0) { t = TMatrix::Random() / 50.0; for (unsigned i = 1; i < r_trg.size(); ++i) { @@ -241,16 +256,18 @@ int main(int argc, char** argv) { TMatrix g = TMatrix::Zero(); vector<TMatrix> exp_src; vector<double> z_src; - vector<double> flat_g, flat_t; + vector<double> flat_g, flat_t, rcv_grad; Flatten(t, &flat_t); bool converged = false; - // TODO broadcast embeddings - for (int iter = 0; !converged && iter < ITERATIONS; ++iter) { -#ifdef HAVE_MPI - mpi::broadcast(world, &flat_t[0], flat_t.size(), 0); +#if HAVE_MPI + mpi::broadcast(world, &flat_t[0], flat_t.size(), 0); + mpi::broadcast(world, r_trg, 0); + mpi::broadcast(world, r_src, 0); #endif + cerr << "rank=" << rank << ": " << r_trg[0][4] << endl; + for (int iter = 0; !converged && iter < ITERATIONS; ++iter) { + if (rank == 0) cerr << "ITERATION " << (iter + 1) << endl; Unflatten(flat_t, &t); - cerr << "ITERATION " << (iter + 1) << endl; double likelihood = 0; double denom = 0.0; lc = 0; @@ -350,7 +367,22 @@ int main(int argc, char** argv) { if (!SGD) { Flatten(g, &flat_g); obj = -likelihood; - // TODO - reduce gradient +#if HAVE_MPI + rcv_grad.resize(flat_g.size(), 0.0); + mpi::reduce(world, &flat_g[0], flat_g.size(), &rcv_grad[0], plus<double>(), 0); + swap(flat_g, rcv_grad); + rcv_grad.clear(); + + double to = 0; + mpi::reduce(world, obj, to, plus<double>(), 0); + obj = to; + double tlh = 0; + mpi::reduce(world, likelihood, tlh, plus<double>(), 0); + likelihood = tlh; + double td = 0; + mpi::reduce(world, denom, td, plus<double>(), 0); + denom = td; +#endif } if (rank == 0) { @@ -376,10 +408,12 @@ int main(int argc, char** argv) { } } #ifdef HAVE_MPI + mpi::broadcast(world, &flat_t[0], flat_t.size(), 0); mpi::broadcast(world, converged, 0); #endif } - cerr << "TRANSLATION MATRIX:" << endl << t << endl; + if (rank == 0) + cerr << "TRANSLATION MATRIX:" << endl << t << endl; return 0; } diff --git a/utils/corpus_tools.cc b/utils/corpus_tools.cc index a0542b6e..d17785af 100644 --- a/utils/corpus_tools.cc +++ b/utils/corpus_tools.cc @@ -33,10 +33,12 @@ void CorpusTools::ReadFromFile(const string& filename, while(getline(in, line)) { const bool skip = (lc % size != rank); ++lc; - if (skip) continue; TD::ConvertSentence(line, &tmp); - src->push_back(vector<WordID>()); - vector<WordID>* d = &src->back(); + vector<WordID>* d = NULL; + if (!skip) { + src->push_back(vector<WordID>()); + d = &src->back(); + } set<WordID>* v = src_vocab; int s = 0; for (unsigned i = 0; i < tmp.size(); ++i) { @@ -44,11 +46,13 @@ void CorpusTools::ReadFromFile(const string& filename, ++s; if (s > 1) { cerr << "Unexpected format in line " << lc << ": " << line << endl; abort(); } assert(trg); - trg->push_back(vector<WordID>()); - d = &trg->back(); + if (!skip) { + trg->push_back(vector<WordID>()); + d = &trg->back(); + } v = trg_vocab; } else { - d->push_back(tmp[i]); + if (d) d->push_back(tmp[i]); if (v) v->insert(tmp[i]); } } |