switch to log domain for matrix operations

author: Chris Dyer <cdyer@cs.cmu.edu> 2012-04-16 14:11:02 -0400
committer: Chris Dyer <cdyer@cs.cmu.edu> 2012-04-16 14:11:02 -0400
commit: cb0523471caff98a2ec89a3657c1385b53529c8d (patch)
tree: 8996c2ee7bf32807afe23e38492293fd95248dba
parent: c5ec52ded3f14271e25e97cefc8bac03b176f297 (diff)
5 files changed, 58 insertions, 26 deletions
diff --git a/rst_parser/arc_factored.h b/rst_parser/arc_factored.h
index d9a0bb24..4de38b66 100644
--- a/rst_parser/arc_factored.h
+++ b/rst_parser/arc_factored.h
@@ -56,7 +56,7 @@ class ArcFactoredForest {
 
   // Reweight edges so that edge_prob is the edge's marginals
   // optionally returns log partition
-  void EdgeMarginals(double* p_log_z = NULL);
+  void EdgeMarginals(prob_t* p_log_z = NULL);
 
   // This may not return a tree
   void PickBestParentForEachWord(EdgeSubset* st) const;
diff --git a/rst_parser/arc_factored_marginals.cc b/rst_parser/arc_factored_marginals.cc
index 16360b0d..acb8102a 100644
--- a/rst_parser/arc_factored_marginals.cc
+++ b/rst_parser/arc_factored_marginals.cc
@@ -9,37 +9,39 @@ using namespace std;
 #if HAVE_EIGEN
 
 #include <Eigen/Dense>
-typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> ArcMatrix;
-typedef Eigen::Matrix<double, Eigen::Dynamic, 1> RootVector;
+typedef Eigen::Matrix<prob_t, Eigen::Dynamic, Eigen::Dynamic> ArcMatrix;
+typedef Eigen::Matrix<prob_t, Eigen::Dynamic, 1> RootVector;
 
-void ArcFactoredForest::EdgeMarginals(double *plog_z) {
+void ArcFactoredForest::EdgeMarginals(prob_t *plog_z) {
   ArcMatrix A(num_words_,num_words_);
   RootVector r(num_words_);
   for (int h = 0; h < num_words_; ++h) {
     for (int m = 0; m < num_words_; ++m) {
       if (h != m)
-        A(h,m) = edges_(h,m).edge_prob.as_float();
+        A(h,m) = edges_(h,m).edge_prob;
       else
-        A(h,m) = 0;
+        A(h,m) = prob_t::Zero();
     }
-    r(h) = root_edges_[h].edge_prob.as_float();
+    r(h) = root_edges_[h].edge_prob;
   }
 
   ArcMatrix L = -A;
   L.diagonal() = A.colwise().sum();
   L.row(0) = r;
   ArcMatrix Linv = L.inverse();
-  if (plog_z) *plog_z = log(Linv.determinant());
+  if (plog_z) *plog_z = Linv.determinant();
   RootVector rootMarginals = r.cwiseProduct(Linv.col(0));
+  static const prob_t ZERO(0);
+  static const prob_t ONE(1);
 //  ArcMatrix T = Linv;
   for (int h = 0; h < num_words_; ++h) {
     for (int m = 0; m < num_words_; ++m) {
-      const double marginal = (m == 0 ? 0.0 : 1.0) * A(h,m) * Linv(m,m) -
-                              (h == 0 ? 0.0 : 1.0) * A(h,m) * Linv(m,h);
-      edges_(h,m).edge_prob = prob_t(marginal);
+      const prob_t marginal = (m == 0 ? ZERO : ONE) * A(h,m) * Linv(m,m) -
+                              (h == 0 ? ZERO : ONE) * A(h,m) * Linv(m,h);
+      edges_(h,m).edge_prob = marginal;
 //      T(h,m) = marginal;
     }
-    root_edges_[h].edge_prob = prob_t(rootMarginals(h));
+    root_edges_[h].edge_prob = rootMarginals(h);
   }
 //   cerr << "ROOT MARGINALS: " << rootMarginals.transpose() << endl;
 //  cerr << "M:\n" << T << endl;
diff --git a/rst_parser/mst_train.cc b/rst_parser/mst_train.cc
index b5114726..c5cab6ec 100644
--- a/rst_parser/mst_train.cc
+++ b/rst_parser/mst_train.cc
@@ -23,7 +23,9 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   string cfg_file;
   opts.add_options()
         ("training_data,t",po::value<string>()->default_value("-"), "File containing training data (jsent format)")
-        ("feature_function,F",po::value<vector<string> >()->composing(), "feature function")
+        ("feature_function,F",po::value<vector<string> >()->composing(), "feature function (multiple permitted)")
+        ("weights,w",po::value<string>(), "Optional starting weights")
+        ("output_every_i_iterations,I",po::value<unsigned>()->default_value(1), "Write weights every I iterations")
         ("regularization_strength,C",po::value<double>()->default_value(1.0), "Regularization strength")
         ("correction_buffers,m", po::value<int>()->default_value(10), "LBFGS correction buffers");
   po::options_description clo("Command line options");
@@ -161,9 +163,13 @@ int main(int argc, char** argv) {
   if (flag) cerr << endl;
   //cerr << "EMP: " << empirical << endl; //DE
   vector<weight_t> weights(FD::NumFeats(), 0.0);
+  if (conf.count("weights"))
+    Weights::InitFromFile(conf["weights"].as<string>(), &weights);
   vector<weight_t> g(FD::NumFeats(), 0.0);
   cerr << "features initialized\noptimizing...\n";
   boost::shared_ptr<BatchOptimizer> o;
+  int every = corpus.size() / 20;
+  if (!every) ++every;
   o.reset(new LBFGSOptimizer(g.size(), conf["correction_buffers"].as<int>()));
   int iterations = 1000;
   for (int iter = 0; iter < iterations; ++iter) {
@@ -174,11 +180,12 @@ int main(int argc, char** argv) {
     double obj = -empirical.dot(weights);
     // SparseVector<double> mfm;  //DE
     for (int i = 0; i < corpus.size(); ++i) {
+      if ((i + 1) % every == 0) cerr << '.' << flush;
       const int num_words = corpus[i].ts.words.size();
       forests[i].Reweight(weights);
-      double lz;
-      forests[i].EdgeMarginals(&lz);
-      obj -= lz;
+      prob_t z;
+      forests[i].EdgeMarginals(&z);
+      obj -= log(z);
       //cerr << " O = " << (-corpus[i].features.dot(weights)) << " D=" << -lz << "  OO= " << (-corpus[i].features.dot(weights) - lz) << endl;
       //cerr << " ZZ = " << zz << endl;
       for (int h = -1; h < num_words; ++h) {
@@ -202,14 +209,20 @@ int main(int argc, char** argv) {
       gnorm += g[i]*g[i];
     ostringstream ll;
     ll << "ITER=" << (iter+1) << "\tOBJ=" << (obj+r) << "\t[F=" << obj << " R=" << r << "]\tGnorm=" << sqrt(gnorm);
-    cerr << endl << ll.str() << endl;
+    cerr << ' ' << ll.str().substr(ll.str().find('\t')+1) << endl;
     obj += r;
     assert(obj >= 0);
     o->Optimize(obj, g, &weights);
     Weights::ShowLargestFeatures(weights);
-    string sl = ll.str();
-    Weights::WriteToFile(o->HasConverged() ? "weights.final.gz" : "weights.cur.gz", weights, true, &sl);
-    if (o->HasConverged()) { cerr << "CONVERGED\n"; break; }
+    const bool converged = o->HasConverged();
+    const char* ofname = converged ? "weights.final.gz" : "weights.cur.gz";
+    if (converged || ((iter+1) % conf["output_every_i_iterations"].as<unsigned>()) == 0) {
+      cerr << "writing..." << flush;
+      const string sl = ll.str();
+      Weights::WriteToFile(ofname, weights, true, &sl);
+      cerr << "done" << endl;
+    }
+    if (converged) { cerr << "CONVERGED\n"; break; }
   }
   forests[0].Reweight(weights);
   TreeSampler ts(forests[0]);
diff --git a/rst_parser/rst_test.cc b/rst_parser/rst_test.cc
index 7e6fb2c1..3bb95759 100644
--- a/rst_parser/rst_test.cc
+++ b/rst_parser/rst_test.cc
@@ -2,6 +2,8 @@
 
 #include <iostream>
 
+#include <Eigen/Dense>
+
 using namespace std;
 
 int main(int argc, char** argv) {
@@ -28,11 +30,19 @@ int main(int argc, char** argv) {
   af(-1,2).edge_prob.logeq(9);
   EdgeSubset tree;
 //  af.MaximumEdgeSubset(&tree);
-  double lz;
-  af.EdgeMarginals(&lz);
-  cerr << "Z = " << lz << endl;
+  prob_t z;
+  af.EdgeMarginals(&z);
+  cerr << "Z = " << abs(z) << endl;
   af.PickBestParentForEachWord(&tree);
   cerr << tree << endl;
+  typedef Eigen::Matrix<prob_t, 2, 2> M3;
+  M3 A = M3::Zero();
+  A(0,0) = prob_t(1);
+  A(1,0) = prob_t(3);
+  A(0,1) = prob_t(2);
+  A(1,1) = prob_t(4);
+  prob_t det = A.determinant();
+  cerr << det.as_float() << endl;
   return 0;
 }
 
diff --git a/utils/logval.h b/utils/logval.h
index 8a59d0b1..ec1f6acd 100644
--- a/utils/logval.h
+++ b/utils/logval.h
@@ -30,8 +30,6 @@ class LogVal {
   LogVal(init_minus_1) : s_(true),v_(0) {  }
   LogVal(init_1) : s_(),v_(0) {  }
   LogVal(init_0) : s_(),v_(LOGVAL_LOG0) {  }
-  explicit LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {}
-  explicit LogVal(unsigned x) : s_(0), v_(std::log(x)) { }
   LogVal(double lnx,bool sign) : s_(sign),v_(lnx) {}
   LogVal(double lnx,init_lnx) : s_(),v_(lnx) {}
   static Self exp(T lnx) { return Self(lnx,false); }
@@ -126,7 +124,7 @@ class LogVal {
   }
 
   Self operator-() const {
-    return Self(v_,-s_);
+    return Self(v_,!s_);
   }
   void negate() { s_ = !s_; }
 
@@ -193,6 +191,15 @@ T log(const LogVal<T>& o) {
   return o.v_;
 }
 
+template<class T>
+LogVal<T> abs(const LogVal<T>& o) {
+  if (o.s_) {
+    LogVal<T> res = o;
+    res.s_ = false;
+    return res;
+  } else { return o; }
+}
+
 template <class T>
 LogVal<T> pow(const LogVal<T>& b, const T& e) {
   return b.pow(e);
author	Chris Dyer <cdyer@cs.cmu.edu>	2012-04-16 14:11:02 -0400
committer	Chris Dyer <cdyer@cs.cmu.edu>	2012-04-16 14:11:02 -0400
commit	cb0523471caff98a2ec89a3657c1385b53529c8d (patch)
tree	8996c2ee7bf32807afe23e38492293fd95248dba
parent	c5ec52ded3f14271e25e97cefc8bac03b176f297 (diff)