summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-04-16 14:11:02 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-04-16 14:11:02 -0400
commitcb0523471caff98a2ec89a3657c1385b53529c8d (patch)
tree8996c2ee7bf32807afe23e38492293fd95248dba
parentc5ec52ded3f14271e25e97cefc8bac03b176f297 (diff)
switch to log domain for matrix operations
-rw-r--r--rst_parser/arc_factored.h2
-rw-r--r--rst_parser/arc_factored_marginals.cc24
-rw-r--r--rst_parser/mst_train.cc29
-rw-r--r--rst_parser/rst_test.cc16
-rw-r--r--utils/logval.h13
5 files changed, 58 insertions, 26 deletions
diff --git a/rst_parser/arc_factored.h b/rst_parser/arc_factored.h
index d9a0bb24..4de38b66 100644
--- a/rst_parser/arc_factored.h
+++ b/rst_parser/arc_factored.h
@@ -56,7 +56,7 @@ class ArcFactoredForest {
// Reweight edges so that edge_prob is the edge's marginals
// optionally returns log partition
- void EdgeMarginals(double* p_log_z = NULL);
+ void EdgeMarginals(prob_t* p_log_z = NULL);
// This may not return a tree
void PickBestParentForEachWord(EdgeSubset* st) const;
diff --git a/rst_parser/arc_factored_marginals.cc b/rst_parser/arc_factored_marginals.cc
index 16360b0d..acb8102a 100644
--- a/rst_parser/arc_factored_marginals.cc
+++ b/rst_parser/arc_factored_marginals.cc
@@ -9,37 +9,39 @@ using namespace std;
#if HAVE_EIGEN
#include <Eigen/Dense>
-typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> ArcMatrix;
-typedef Eigen::Matrix<double, Eigen::Dynamic, 1> RootVector;
+typedef Eigen::Matrix<prob_t, Eigen::Dynamic, Eigen::Dynamic> ArcMatrix;
+typedef Eigen::Matrix<prob_t, Eigen::Dynamic, 1> RootVector;
-void ArcFactoredForest::EdgeMarginals(double *plog_z) {
+void ArcFactoredForest::EdgeMarginals(prob_t *plog_z) {
ArcMatrix A(num_words_,num_words_);
RootVector r(num_words_);
for (int h = 0; h < num_words_; ++h) {
for (int m = 0; m < num_words_; ++m) {
if (h != m)
- A(h,m) = edges_(h,m).edge_prob.as_float();
+ A(h,m) = edges_(h,m).edge_prob;
else
- A(h,m) = 0;
+ A(h,m) = prob_t::Zero();
}
- r(h) = root_edges_[h].edge_prob.as_float();
+ r(h) = root_edges_[h].edge_prob;
}
ArcMatrix L = -A;
L.diagonal() = A.colwise().sum();
L.row(0) = r;
ArcMatrix Linv = L.inverse();
- if (plog_z) *plog_z = log(Linv.determinant());
+ if (plog_z) *plog_z = Linv.determinant();
RootVector rootMarginals = r.cwiseProduct(Linv.col(0));
+ static const prob_t ZERO(0);
+ static const prob_t ONE(1);
// ArcMatrix T = Linv;
for (int h = 0; h < num_words_; ++h) {
for (int m = 0; m < num_words_; ++m) {
- const double marginal = (m == 0 ? 0.0 : 1.0) * A(h,m) * Linv(m,m) -
- (h == 0 ? 0.0 : 1.0) * A(h,m) * Linv(m,h);
- edges_(h,m).edge_prob = prob_t(marginal);
+ const prob_t marginal = (m == 0 ? ZERO : ONE) * A(h,m) * Linv(m,m) -
+ (h == 0 ? ZERO : ONE) * A(h,m) * Linv(m,h);
+ edges_(h,m).edge_prob = marginal;
// T(h,m) = marginal;
}
- root_edges_[h].edge_prob = prob_t(rootMarginals(h));
+ root_edges_[h].edge_prob = rootMarginals(h);
}
// cerr << "ROOT MARGINALS: " << rootMarginals.transpose() << endl;
// cerr << "M:\n" << T << endl;
diff --git a/rst_parser/mst_train.cc b/rst_parser/mst_train.cc
index b5114726..c5cab6ec 100644
--- a/rst_parser/mst_train.cc
+++ b/rst_parser/mst_train.cc
@@ -23,7 +23,9 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
string cfg_file;
opts.add_options()
("training_data,t",po::value<string>()->default_value("-"), "File containing training data (jsent format)")
- ("feature_function,F",po::value<vector<string> >()->composing(), "feature function")
+ ("feature_function,F",po::value<vector<string> >()->composing(), "feature function (multiple permitted)")
+ ("weights,w",po::value<string>(), "Optional starting weights")
+ ("output_every_i_iterations,I",po::value<unsigned>()->default_value(1), "Write weights every I iterations")
("regularization_strength,C",po::value<double>()->default_value(1.0), "Regularization strength")
("correction_buffers,m", po::value<int>()->default_value(10), "LBFGS correction buffers");
po::options_description clo("Command line options");
@@ -161,9 +163,13 @@ int main(int argc, char** argv) {
if (flag) cerr << endl;
//cerr << "EMP: " << empirical << endl; //DE
vector<weight_t> weights(FD::NumFeats(), 0.0);
+ if (conf.count("weights"))
+ Weights::InitFromFile(conf["weights"].as<string>(), &weights);
vector<weight_t> g(FD::NumFeats(), 0.0);
cerr << "features initialized\noptimizing...\n";
boost::shared_ptr<BatchOptimizer> o;
+ int every = corpus.size() / 20;
+ if (!every) ++every;
o.reset(new LBFGSOptimizer(g.size(), conf["correction_buffers"].as<int>()));
int iterations = 1000;
for (int iter = 0; iter < iterations; ++iter) {
@@ -174,11 +180,12 @@ int main(int argc, char** argv) {
double obj = -empirical.dot(weights);
// SparseVector<double> mfm; //DE
for (int i = 0; i < corpus.size(); ++i) {
+ if ((i + 1) % every == 0) cerr << '.' << flush;
const int num_words = corpus[i].ts.words.size();
forests[i].Reweight(weights);
- double lz;
- forests[i].EdgeMarginals(&lz);
- obj -= lz;
+ prob_t z;
+ forests[i].EdgeMarginals(&z);
+ obj -= log(z);
//cerr << " O = " << (-corpus[i].features.dot(weights)) << " D=" << -lz << " OO= " << (-corpus[i].features.dot(weights) - lz) << endl;
//cerr << " ZZ = " << zz << endl;
for (int h = -1; h < num_words; ++h) {
@@ -202,14 +209,20 @@ int main(int argc, char** argv) {
gnorm += g[i]*g[i];
ostringstream ll;
ll << "ITER=" << (iter+1) << "\tOBJ=" << (obj+r) << "\t[F=" << obj << " R=" << r << "]\tGnorm=" << sqrt(gnorm);
- cerr << endl << ll.str() << endl;
+ cerr << ' ' << ll.str().substr(ll.str().find('\t')+1) << endl;
obj += r;
assert(obj >= 0);
o->Optimize(obj, g, &weights);
Weights::ShowLargestFeatures(weights);
- string sl = ll.str();
- Weights::WriteToFile(o->HasConverged() ? "weights.final.gz" : "weights.cur.gz", weights, true, &sl);
- if (o->HasConverged()) { cerr << "CONVERGED\n"; break; }
+ const bool converged = o->HasConverged();
+ const char* ofname = converged ? "weights.final.gz" : "weights.cur.gz";
+ if (converged || ((iter+1) % conf["output_every_i_iterations"].as<unsigned>()) == 0) {
+ cerr << "writing..." << flush;
+ const string sl = ll.str();
+ Weights::WriteToFile(ofname, weights, true, &sl);
+ cerr << "done" << endl;
+ }
+ if (converged) { cerr << "CONVERGED\n"; break; }
}
forests[0].Reweight(weights);
TreeSampler ts(forests[0]);
diff --git a/rst_parser/rst_test.cc b/rst_parser/rst_test.cc
index 7e6fb2c1..3bb95759 100644
--- a/rst_parser/rst_test.cc
+++ b/rst_parser/rst_test.cc
@@ -2,6 +2,8 @@
#include <iostream>
+#include <Eigen/Dense>
+
using namespace std;
int main(int argc, char** argv) {
@@ -28,11 +30,19 @@ int main(int argc, char** argv) {
af(-1,2).edge_prob.logeq(9);
EdgeSubset tree;
// af.MaximumEdgeSubset(&tree);
- double lz;
- af.EdgeMarginals(&lz);
- cerr << "Z = " << lz << endl;
+ prob_t z;
+ af.EdgeMarginals(&z);
+ cerr << "Z = " << abs(z) << endl;
af.PickBestParentForEachWord(&tree);
cerr << tree << endl;
+ typedef Eigen::Matrix<prob_t, 2, 2> M3;
+ M3 A = M3::Zero();
+ A(0,0) = prob_t(1);
+ A(1,0) = prob_t(3);
+ A(0,1) = prob_t(2);
+ A(1,1) = prob_t(4);
+ prob_t det = A.determinant();
+ cerr << det.as_float() << endl;
return 0;
}
diff --git a/utils/logval.h b/utils/logval.h
index 8a59d0b1..ec1f6acd 100644
--- a/utils/logval.h
+++ b/utils/logval.h
@@ -30,8 +30,6 @@ class LogVal {
LogVal(init_minus_1) : s_(true),v_(0) { }
LogVal(init_1) : s_(),v_(0) { }
LogVal(init_0) : s_(),v_(LOGVAL_LOG0) { }
- explicit LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {}
- explicit LogVal(unsigned x) : s_(0), v_(std::log(x)) { }
LogVal(double lnx,bool sign) : s_(sign),v_(lnx) {}
LogVal(double lnx,init_lnx) : s_(),v_(lnx) {}
static Self exp(T lnx) { return Self(lnx,false); }
@@ -126,7 +124,7 @@ class LogVal {
}
Self operator-() const {
- return Self(v_,-s_);
+ return Self(v_,!s_);
}
void negate() { s_ = !s_; }
@@ -193,6 +191,15 @@ T log(const LogVal<T>& o) {
return o.v_;
}
+template<class T>
+LogVal<T> abs(const LogVal<T>& o) {
+ if (o.s_) {
+ LogVal<T> res = o;
+ res.s_ = false;
+ return res;
+ } else { return o; }
+}
+
template <class T>
LogVal<T> pow(const LogVal<T>& b, const T& e) {
return b.pow(e);