summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-03-01 23:09:41 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2012-03-01 23:09:41 -0500
commit0885db8a3c07927bd1220bb1c639a0fe8687d6f6 (patch)
tree3e174cf6bfbc97da6f35e7351a0cdbfa800e969e
parent378ba2373374015c8de4b360a30227f072616c6c (diff)
parente1a0c140e9f31461ab45ec7f9533ad98d2b9caa9 (diff)
Merge branch 'master' of github.com:redpony/cdec
-rw-r--r--decoder/decoder.cc5
-rw-r--r--training/lbl_model.cc54
-rw-r--r--utils/corpus_tools.cc16
3 files changed, 57 insertions, 18 deletions
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 3394e0b8..69fbaf85 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -812,6 +812,9 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
abort();
}
+ if (conf.count("show_target_graph"))
+ HypergraphIO::WriteTarget(forest);
+
for (int pass = 0; pass < rescoring_passes.size(); ++pass) {
const RescoringPass& rp = rescoring_passes[pass];
const vector<weight_t>& cur_weights = *rp.weight_vector;
@@ -1018,8 +1021,6 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
}
if (conf.count("show_cfg_search_space"))
HypergraphIO::WriteAsCFG(forest);
- if (conf.count("show_target_graph"))
- HypergraphIO::WriteTarget(forest);
if (has_ref) {
if (HG::Intersect(ref, &forest)) {
// if (crf_uniform_empirical) {
diff --git a/training/lbl_model.cc b/training/lbl_model.cc
index def5075a..a46ce33c 100644
--- a/training/lbl_model.cc
+++ b/training/lbl_model.cc
@@ -15,6 +15,7 @@
#ifdef HAVE_MPI
#include <boost/mpi/timer.hpp>
#include <boost/mpi.hpp>
+#include <boost/archive/text_oarchive.hpp>
namespace mpi = boost::mpi;
#endif
#include <boost/math/special_functions/fpclassify.hpp>
@@ -34,12 +35,26 @@ namespace mpi = boost::mpi;
namespace po = boost::program_options;
using namespace std;
-#define kDIMENSIONS 100
+#define kDIMENSIONS 10
typedef Eigen::Matrix<double, kDIMENSIONS, 1> RVector;
typedef Eigen::Matrix<double, 1, kDIMENSIONS> RTVector;
typedef Eigen::Matrix<double, kDIMENSIONS, kDIMENSIONS> TMatrix;
vector<RVector> r_src, r_trg;
+#if HAVE_MPI
+namespace boost {
+namespace serialization {
+
+template<class Archive>
+void serialize(Archive & ar, RVector & v, const unsigned int version) {
+ for (unsigned i = 0; i < kDIMENSIONS; ++i)
+ ar & v[i];
+}
+
+} // namespace serialization
+} // namespace boost
+#endif
+
bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
po::options_description opts("Configuration options");
opts.add_options()
@@ -224,7 +239,7 @@ int main(int argc, char** argv) {
srand(seed);
}
- TMatrix t;
+ TMatrix t = TMatrix::Zero();
if (rank == 0) {
t = TMatrix::Random() / 50.0;
for (unsigned i = 1; i < r_trg.size(); ++i) {
@@ -241,16 +256,18 @@ int main(int argc, char** argv) {
TMatrix g = TMatrix::Zero();
vector<TMatrix> exp_src;
vector<double> z_src;
- vector<double> flat_g, flat_t;
+ vector<double> flat_g, flat_t, rcv_grad;
Flatten(t, &flat_t);
bool converged = false;
- // TODO broadcast embeddings
- for (int iter = 0; !converged && iter < ITERATIONS; ++iter) {
-#ifdef HAVE_MPI
- mpi::broadcast(world, &flat_t[0], flat_t.size(), 0);
+#if HAVE_MPI
+ mpi::broadcast(world, &flat_t[0], flat_t.size(), 0);
+ mpi::broadcast(world, r_trg, 0);
+ mpi::broadcast(world, r_src, 0);
#endif
+ cerr << "rank=" << rank << ": " << r_trg[0][4] << endl;
+ for (int iter = 0; !converged && iter < ITERATIONS; ++iter) {
+ if (rank == 0) cerr << "ITERATION " << (iter + 1) << endl;
Unflatten(flat_t, &t);
- cerr << "ITERATION " << (iter + 1) << endl;
double likelihood = 0;
double denom = 0.0;
lc = 0;
@@ -350,7 +367,22 @@ int main(int argc, char** argv) {
if (!SGD) {
Flatten(g, &flat_g);
obj = -likelihood;
- // TODO - reduce gradient
+#if HAVE_MPI
+ rcv_grad.resize(flat_g.size(), 0.0);
+ mpi::reduce(world, &flat_g[0], flat_g.size(), &rcv_grad[0], plus<double>(), 0);
+ swap(flat_g, rcv_grad);
+ rcv_grad.clear();
+
+ double to = 0;
+ mpi::reduce(world, obj, to, plus<double>(), 0);
+ obj = to;
+ double tlh = 0;
+ mpi::reduce(world, likelihood, tlh, plus<double>(), 0);
+ likelihood = tlh;
+ double td = 0;
+ mpi::reduce(world, denom, td, plus<double>(), 0);
+ denom = td;
+#endif
}
if (rank == 0) {
@@ -376,10 +408,12 @@ int main(int argc, char** argv) {
}
}
#ifdef HAVE_MPI
+ mpi::broadcast(world, &flat_t[0], flat_t.size(), 0);
mpi::broadcast(world, converged, 0);
#endif
}
- cerr << "TRANSLATION MATRIX:" << endl << t << endl;
+ if (rank == 0)
+ cerr << "TRANSLATION MATRIX:" << endl << t << endl;
return 0;
}
diff --git a/utils/corpus_tools.cc b/utils/corpus_tools.cc
index a0542b6e..d17785af 100644
--- a/utils/corpus_tools.cc
+++ b/utils/corpus_tools.cc
@@ -33,10 +33,12 @@ void CorpusTools::ReadFromFile(const string& filename,
while(getline(in, line)) {
const bool skip = (lc % size != rank);
++lc;
- if (skip) continue;
TD::ConvertSentence(line, &tmp);
- src->push_back(vector<WordID>());
- vector<WordID>* d = &src->back();
+ vector<WordID>* d = NULL;
+ if (!skip) {
+ src->push_back(vector<WordID>());
+ d = &src->back();
+ }
set<WordID>* v = src_vocab;
int s = 0;
for (unsigned i = 0; i < tmp.size(); ++i) {
@@ -44,11 +46,13 @@ void CorpusTools::ReadFromFile(const string& filename,
++s;
if (s > 1) { cerr << "Unexpected format in line " << lc << ": " << line << endl; abort(); }
assert(trg);
- trg->push_back(vector<WordID>());
- d = &trg->back();
+ if (!skip) {
+ trg->push_back(vector<WordID>());
+ d = &trg->back();
+ }
v = trg_vocab;
} else {
- d->push_back(tmp[i]);
+ if (d) d->push_back(tmp[i]);
if (v) v->insert(tmp[i]);
}
}