From d5a2a9c3bf18c1e414f79a757c1662fe422e2f5c Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 16 Apr 2012 14:11:02 -0400 Subject: switch to log domain for matrix operations --- rst_parser/arc_factored.h | 2 +- rst_parser/arc_factored_marginals.cc | 24 +++++++++++++----------- rst_parser/mst_train.cc | 29 +++++++++++++++++++++-------- rst_parser/rst_test.cc | 16 +++++++++++++--- utils/logval.h | 13 ++++++++++--- 5 files changed, 58 insertions(+), 26 deletions(-) diff --git a/rst_parser/arc_factored.h b/rst_parser/arc_factored.h index d9a0bb24..4de38b66 100644 --- a/rst_parser/arc_factored.h +++ b/rst_parser/arc_factored.h @@ -56,7 +56,7 @@ class ArcFactoredForest { // Reweight edges so that edge_prob is the edge's marginals // optionally returns log partition - void EdgeMarginals(double* p_log_z = NULL); + void EdgeMarginals(prob_t* p_log_z = NULL); // This may not return a tree void PickBestParentForEachWord(EdgeSubset* st) const; diff --git a/rst_parser/arc_factored_marginals.cc b/rst_parser/arc_factored_marginals.cc index 16360b0d..acb8102a 100644 --- a/rst_parser/arc_factored_marginals.cc +++ b/rst_parser/arc_factored_marginals.cc @@ -9,37 +9,39 @@ using namespace std; #if HAVE_EIGEN #include -typedef Eigen::Matrix ArcMatrix; -typedef Eigen::Matrix RootVector; +typedef Eigen::Matrix ArcMatrix; +typedef Eigen::Matrix RootVector; -void ArcFactoredForest::EdgeMarginals(double *plog_z) { +void ArcFactoredForest::EdgeMarginals(prob_t *plog_z) { ArcMatrix A(num_words_,num_words_); RootVector r(num_words_); for (int h = 0; h < num_words_; ++h) { for (int m = 0; m < num_words_; ++m) { if (h != m) - A(h,m) = edges_(h,m).edge_prob.as_float(); + A(h,m) = edges_(h,m).edge_prob; else - A(h,m) = 0; + A(h,m) = prob_t::Zero(); } - r(h) = root_edges_[h].edge_prob.as_float(); + r(h) = root_edges_[h].edge_prob; } ArcMatrix L = -A; L.diagonal() = A.colwise().sum(); L.row(0) = r; ArcMatrix Linv = L.inverse(); - if (plog_z) *plog_z = log(Linv.determinant()); + if (plog_z) *plog_z = Linv.determinant(); RootVector rootMarginals = r.cwiseProduct(Linv.col(0)); + static const prob_t ZERO(0); + static const prob_t ONE(1); // ArcMatrix T = Linv; for (int h = 0; h < num_words_; ++h) { for (int m = 0; m < num_words_; ++m) { - const double marginal = (m == 0 ? 0.0 : 1.0) * A(h,m) * Linv(m,m) - - (h == 0 ? 0.0 : 1.0) * A(h,m) * Linv(m,h); - edges_(h,m).edge_prob = prob_t(marginal); + const prob_t marginal = (m == 0 ? ZERO : ONE) * A(h,m) * Linv(m,m) - + (h == 0 ? ZERO : ONE) * A(h,m) * Linv(m,h); + edges_(h,m).edge_prob = marginal; // T(h,m) = marginal; } - root_edges_[h].edge_prob = prob_t(rootMarginals(h)); + root_edges_[h].edge_prob = rootMarginals(h); } // cerr << "ROOT MARGINALS: " << rootMarginals.transpose() << endl; // cerr << "M:\n" << T << endl; diff --git a/rst_parser/mst_train.cc b/rst_parser/mst_train.cc index b5114726..c5cab6ec 100644 --- a/rst_parser/mst_train.cc +++ b/rst_parser/mst_train.cc @@ -23,7 +23,9 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { string cfg_file; opts.add_options() ("training_data,t",po::value()->default_value("-"), "File containing training data (jsent format)") - ("feature_function,F",po::value >()->composing(), "feature function") + ("feature_function,F",po::value >()->composing(), "feature function (multiple permitted)") + ("weights,w",po::value(), "Optional starting weights") + ("output_every_i_iterations,I",po::value()->default_value(1), "Write weights every I iterations") ("regularization_strength,C",po::value()->default_value(1.0), "Regularization strength") ("correction_buffers,m", po::value()->default_value(10), "LBFGS correction buffers"); po::options_description clo("Command line options"); @@ -161,9 +163,13 @@ int main(int argc, char** argv) { if (flag) cerr << endl; //cerr << "EMP: " << empirical << endl; //DE vector weights(FD::NumFeats(), 0.0); + if (conf.count("weights")) + Weights::InitFromFile(conf["weights"].as(), &weights); vector g(FD::NumFeats(), 0.0); cerr << "features initialized\noptimizing...\n"; boost::shared_ptr o; + int every = corpus.size() / 20; + if (!every) ++every; o.reset(new LBFGSOptimizer(g.size(), conf["correction_buffers"].as())); int iterations = 1000; for (int iter = 0; iter < iterations; ++iter) { @@ -174,11 +180,12 @@ int main(int argc, char** argv) { double obj = -empirical.dot(weights); // SparseVector mfm; //DE for (int i = 0; i < corpus.size(); ++i) { + if ((i + 1) % every == 0) cerr << '.' << flush; const int num_words = corpus[i].ts.words.size(); forests[i].Reweight(weights); - double lz; - forests[i].EdgeMarginals(&lz); - obj -= lz; + prob_t z; + forests[i].EdgeMarginals(&z); + obj -= log(z); //cerr << " O = " << (-corpus[i].features.dot(weights)) << " D=" << -lz << " OO= " << (-corpus[i].features.dot(weights) - lz) << endl; //cerr << " ZZ = " << zz << endl; for (int h = -1; h < num_words; ++h) { @@ -202,14 +209,20 @@ int main(int argc, char** argv) { gnorm += g[i]*g[i]; ostringstream ll; ll << "ITER=" << (iter+1) << "\tOBJ=" << (obj+r) << "\t[F=" << obj << " R=" << r << "]\tGnorm=" << sqrt(gnorm); - cerr << endl << ll.str() << endl; + cerr << ' ' << ll.str().substr(ll.str().find('\t')+1) << endl; obj += r; assert(obj >= 0); o->Optimize(obj, g, &weights); Weights::ShowLargestFeatures(weights); - string sl = ll.str(); - Weights::WriteToFile(o->HasConverged() ? "weights.final.gz" : "weights.cur.gz", weights, true, &sl); - if (o->HasConverged()) { cerr << "CONVERGED\n"; break; } + const bool converged = o->HasConverged(); + const char* ofname = converged ? "weights.final.gz" : "weights.cur.gz"; + if (converged || ((iter+1) % conf["output_every_i_iterations"].as()) == 0) { + cerr << "writing..." << flush; + const string sl = ll.str(); + Weights::WriteToFile(ofname, weights, true, &sl); + cerr << "done" << endl; + } + if (converged) { cerr << "CONVERGED\n"; break; } } forests[0].Reweight(weights); TreeSampler ts(forests[0]); diff --git a/rst_parser/rst_test.cc b/rst_parser/rst_test.cc index 7e6fb2c1..3bb95759 100644 --- a/rst_parser/rst_test.cc +++ b/rst_parser/rst_test.cc @@ -2,6 +2,8 @@ #include +#include + using namespace std; int main(int argc, char** argv) { @@ -28,11 +30,19 @@ int main(int argc, char** argv) { af(-1,2).edge_prob.logeq(9); EdgeSubset tree; // af.MaximumEdgeSubset(&tree); - double lz; - af.EdgeMarginals(&lz); - cerr << "Z = " << lz << endl; + prob_t z; + af.EdgeMarginals(&z); + cerr << "Z = " << abs(z) << endl; af.PickBestParentForEachWord(&tree); cerr << tree << endl; + typedef Eigen::Matrix M3; + M3 A = M3::Zero(); + A(0,0) = prob_t(1); + A(1,0) = prob_t(3); + A(0,1) = prob_t(2); + A(1,1) = prob_t(4); + prob_t det = A.determinant(); + cerr << det.as_float() << endl; return 0; } diff --git a/utils/logval.h b/utils/logval.h index 8a59d0b1..ec1f6acd 100644 --- a/utils/logval.h +++ b/utils/logval.h @@ -30,8 +30,6 @@ class LogVal { LogVal(init_minus_1) : s_(true),v_(0) { } LogVal(init_1) : s_(),v_(0) { } LogVal(init_0) : s_(),v_(LOGVAL_LOG0) { } - explicit LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {} - explicit LogVal(unsigned x) : s_(0), v_(std::log(x)) { } LogVal(double lnx,bool sign) : s_(sign),v_(lnx) {} LogVal(double lnx,init_lnx) : s_(),v_(lnx) {} static Self exp(T lnx) { return Self(lnx,false); } @@ -126,7 +124,7 @@ class LogVal { } Self operator-() const { - return Self(v_,-s_); + return Self(v_,!s_); } void negate() { s_ = !s_; } @@ -193,6 +191,15 @@ T log(const LogVal& o) { return o.v_; } +template +LogVal abs(const LogVal& o) { + if (o.s_) { + LogVal res = o; + res.s_ = false; + return res; + } else { return o; } +} + template LogVal pow(const LogVal& b, const T& e) { return b.pow(e); -- cgit v1.2.3