From 19147c5f45b40eac1e0ae1bc8bc8ccf90d1ea56c Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 14 Apr 2012 01:52:14 -0400 Subject: matrix tree theorem stuff --- rst_parser/arc_factored_marginals.cc | 52 ++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 rst_parser/arc_factored_marginals.cc (limited to 'rst_parser/arc_factored_marginals.cc') diff --git a/rst_parser/arc_factored_marginals.cc b/rst_parser/arc_factored_marginals.cc new file mode 100644 index 00000000..9851b59a --- /dev/null +++ b/rst_parser/arc_factored_marginals.cc @@ -0,0 +1,52 @@ +#include "arc_factored.h" + +#include + +#include "config.h" + +using namespace std; + +#if HAVE_EIGEN + +#include +typedef Eigen::Matrix ArcMatrix; +typedef Eigen::Matrix RootVector; + +void ArcFactoredForest::EdgeMarginals(double *plog_z) { + ArcMatrix A(num_words_,num_words_); + RootVector r(num_words_); + for (int h = 0; h < num_words_; ++h) { + for (int m = 0; m < num_words_; ++m) { + if (h != m) + A(h,m) = edges_(h,m).edge_prob.as_float(); + else + A(h,m) = 0; + } + r(h) = root_edges_[h].edge_prob.as_float(); + } + + ArcMatrix L = -A; + L.diagonal() = A.colwise().sum(); + L.row(0) = r; + ArcMatrix Linv = L.inverse(); + if (plog_z) *plog_z = log(Linv.determinant()); + RootVector rootMarginals = r.cwiseProduct(Linv.col(0)); + for (int h = 0; h < num_words_; ++h) { + for (int m = 0; m < num_words_; ++m) { + edges_(h,m).edge_prob = prob_t((m == 0 ? 0.0 : 1.0) * A(h,m) * Linv(m,m) - + (h == 0 ? 0.0 : 1.0) * A(h,m) * Linv(m,h)); + } + root_edges_[h].edge_prob = prob_t(rootMarginals(h)); + } + // cerr << "ROOT MARGINALS: " << rootMarginals.transpose() << endl; +} + +#else + +void ArcFactoredForest::EdgeMarginals(double*) { + cerr << "EdgeMarginals() requires --with-eigen!\n"; + abort(); +} + +#endif + -- cgit v1.2.3 From 372835df257ddd084e47cab962e932615f92f09c Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 15 Apr 2012 17:28:08 -0400 Subject: crf training of arc-factored dep parser --- rst_parser/Makefile.am | 6 +- rst_parser/arc_factored.cc | 29 +++-- rst_parser/arc_factored.h | 53 ++++++---- rst_parser/arc_factored_marginals.cc | 10 +- rst_parser/arc_ff.cc | 64 +++++++++++ rst_parser/arc_ff.h | 43 ++++++++ rst_parser/mst_train.cc | 200 ++++++++++++++++++++++++++++++++++- rst_parser/rst_test.cc | 18 ++-- 8 files changed, 379 insertions(+), 44 deletions(-) create mode 100644 rst_parser/arc_ff.cc create mode 100644 rst_parser/arc_ff.h (limited to 'rst_parser/arc_factored_marginals.cc') diff --git a/rst_parser/Makefile.am b/rst_parser/Makefile.am index b61a20dd..2b64b43a 100644 --- a/rst_parser/Makefile.am +++ b/rst_parser/Makefile.am @@ -8,12 +8,12 @@ TESTS = rst_test noinst_LIBRARIES = librst.a -librst_a_SOURCES = arc_factored.cc arc_factored_marginals.cc rst.cc +librst_a_SOURCES = arc_factored.cc arc_factored_marginals.cc rst.cc arc_ff.cc mst_train_SOURCES = mst_train.cc -mst_train_LDADD = librst.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz +mst_train_LDADD = librst.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a ../training/optimize.o -lz rst_test_SOURCES = rst_test.cc rst_test_LDADD = librst.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz -AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder -I$(top_srcdir)/utils -I$(top_srcdir)/mteval -I../klm +AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder -I$(top_srcdir)/training -I$(top_srcdir)/utils -I$(top_srcdir)/mteval -I../klm diff --git a/rst_parser/arc_factored.cc b/rst_parser/arc_factored.cc index b2c2c427..44e769b8 100644 --- a/rst_parser/arc_factored.cc +++ b/rst_parser/arc_factored.cc @@ -6,23 +6,38 @@ #include #include +#include "arc_ff.h" + using namespace std; using namespace std::tr1; using namespace boost; +void ArcFactoredForest::ExtractFeatures(const TaggedSentence& sentence, + const std::vector >& ffs) { + for (int i = 0; i < ffs.size(); ++i) { + const ArcFeatureFunction& ff = *ffs[i]; + for (int m = 0; m < num_words_; ++m) { + for (int h = 0; h < num_words_; ++h) { + ff.EgdeFeatures(sentence, h, m, &edges_(h,m).features); + } + ff.EgdeFeatures(sentence, -1, m, &root_edges_[m].features); + } + } +} + void ArcFactoredForest::PickBestParentForEachWord(EdgeSubset* st) const { - for (int m = 1; m <= num_words_; ++m) { - int best_head = -1; + for (int m = 0; m < num_words_; ++m) { + int best_head = -2; prob_t best_score; - for (int h = 0; h <= num_words_; ++h) { + for (int h = -1; h < num_words_; ++h) { const Edge& edge = (*this)(h,m); - if (best_head < 0 || edge.edge_prob > best_score) { + if (best_head < -1 || edge.edge_prob > best_score) { best_score = edge.edge_prob; best_head = h; } } - assert(best_head >= 0); - if (best_head) + assert(best_head >= -1); + if (best_head >= 0) st->h_m_pairs.push_back(make_pair(best_head, m)); else st->roots.push_back(m); @@ -56,7 +71,7 @@ struct PriorityQueue { }; // based on Trajan 1977 -void ArcFactoredForest::MaximumEdgeSubset(EdgeSubset* st) const { +void ArcFactoredForest::MaximumSpanningTree(EdgeSubset* st) const { typedef disjoint_sets_with_storage DisjointSet; DisjointSet strongly(num_words_ + 1); diff --git a/rst_parser/arc_factored.h b/rst_parser/arc_factored.h index 3003a86e..a95f8230 100644 --- a/rst_parser/arc_factored.h +++ b/rst_parser/arc_factored.h @@ -5,37 +5,52 @@ #include #include #include +#include #include "array2d.h" #include "sparse_vector.h" #include "prob.h" #include "weights.h" +#include "wordid.h" + +struct TaggedSentence { + std::vector words; + std::vector pos; +}; struct EdgeSubset { EdgeSubset() {} std::vector roots; // unless multiroot trees are supported, this // will have a single member - std::vector > h_m_pairs; // h,m start at *1* + std::vector > h_m_pairs; // h,m start at 0 }; +struct ArcFeatureFunction; class ArcFactoredForest { public: - explicit ArcFactoredForest(short num_words) : - num_words_(num_words), - root_edges_(num_words), - edges_(num_words, num_words) { + ArcFactoredForest() : num_words_() {} + explicit ArcFactoredForest(short num_words) { + resize(num_words); + } + + void resize(unsigned num_words) { + num_words_ = num_words; + root_edges_.clear(); + edges_.clear(); + root_edges_.resize(num_words); + edges_.resize(num_words, num_words); for (int h = 0; h < num_words; ++h) { for (int m = 0; m < num_words; ++m) { - edges_(h, m).h = h + 1; - edges_(h, m).m = m + 1; + edges_(h, m).h = h; + edges_(h, m).m = m; } - root_edges_[h].h = 0; - root_edges_[h].m = h + 1; + root_edges_[h].h = -1; + root_edges_[h].m = h; } } // compute the maximum spanning tree based on the current weighting // using the O(n^2) CLE algorithm - void MaximumEdgeSubset(EdgeSubset* st) const; + void MaximumSpanningTree(EdgeSubset* st) const; // Reweight edges so that edge_prob is the edge's marginals // optionally returns log partition @@ -52,20 +67,16 @@ class ArcFactoredForest { prob_t edge_prob; }; + // set eges_[*].features + void ExtractFeatures(const TaggedSentence& sentence, + const std::vector >& ffs); + const Edge& operator()(short h, short m) const { - assert(m > 0); - assert(m <= num_words_); - assert(h >= 0); - assert(h <= num_words_); - return h ? edges_(h - 1, m - 1) : root_edges_[m - 1]; + return h >= 0 ? edges_(h, m) : root_edges_[m]; } Edge& operator()(short h, short m) { - assert(m > 0); - assert(m <= num_words_); - assert(h >= 0); - assert(h <= num_words_); - return h ? edges_(h - 1, m - 1) : root_edges_[m - 1]; + return h >= 0 ? edges_(h, m) : root_edges_[m]; } float Weight(short h, short m) const { @@ -87,7 +98,7 @@ class ArcFactoredForest { } private: - unsigned num_words_; + int num_words_; std::vector root_edges_; Array2D edges_; }; diff --git a/rst_parser/arc_factored_marginals.cc b/rst_parser/arc_factored_marginals.cc index 9851b59a..16360b0d 100644 --- a/rst_parser/arc_factored_marginals.cc +++ b/rst_parser/arc_factored_marginals.cc @@ -31,14 +31,18 @@ void ArcFactoredForest::EdgeMarginals(double *plog_z) { ArcMatrix Linv = L.inverse(); if (plog_z) *plog_z = log(Linv.determinant()); RootVector rootMarginals = r.cwiseProduct(Linv.col(0)); +// ArcMatrix T = Linv; for (int h = 0; h < num_words_; ++h) { for (int m = 0; m < num_words_; ++m) { - edges_(h,m).edge_prob = prob_t((m == 0 ? 0.0 : 1.0) * A(h,m) * Linv(m,m) - - (h == 0 ? 0.0 : 1.0) * A(h,m) * Linv(m,h)); + const double marginal = (m == 0 ? 0.0 : 1.0) * A(h,m) * Linv(m,m) - + (h == 0 ? 0.0 : 1.0) * A(h,m) * Linv(m,h); + edges_(h,m).edge_prob = prob_t(marginal); +// T(h,m) = marginal; } root_edges_[h].edge_prob = prob_t(rootMarginals(h)); } - // cerr << "ROOT MARGINALS: " << rootMarginals.transpose() << endl; +// cerr << "ROOT MARGINALS: " << rootMarginals.transpose() << endl; +// cerr << "M:\n" << T << endl; } #else diff --git a/rst_parser/arc_ff.cc b/rst_parser/arc_ff.cc new file mode 100644 index 00000000..f9effbda --- /dev/null +++ b/rst_parser/arc_ff.cc @@ -0,0 +1,64 @@ +#include "arc_ff.h" + +#include "tdict.h" +#include "fdict.h" +#include "sentence_metadata.h" + +using namespace std; + +ArcFeatureFunction::~ArcFeatureFunction() {} + +void ArcFeatureFunction::PrepareForInput(const TaggedSentence&) {} + +DistancePenalty::DistancePenalty(const string&) : fidw_(FD::Convert("Distance")), fidr_(FD::Convert("RootDistance")) {} + +void DistancePenalty::EdgeFeaturesImpl(const TaggedSentence& sent, + short h, + short m, + SparseVector* features) const { + const bool dir = m < h; + const bool is_root = (h == -1); + int v = m - h; + if (v < 0) { + v= -1 - int(log(-v) / log(2)); + } else { + v= int(log(v) / log(2)); + } + static map lenmap; + int& lenfid = lenmap[v]; + if (!lenfid) { + ostringstream os; + if (v < 0) os << "LenL" << -v; else os << "LenR" << v; + lenfid = FD::Convert(os.str()); + } + features->set_value(lenfid, 1.0); + const string& lenstr = FD::Convert(lenfid); + if (!is_root) { + static int modl = FD::Convert("ModLeft"); + static int modr = FD::Convert("ModRight"); + if (dir) features->set_value(modl, 1); + else features->set_value(modr, 1); + } + if (is_root) { + ostringstream os; + os << "ROOT:" << TD::Convert(sent.pos[m]); + features->set_value(FD::Convert(os.str()), 1.0); + os << "_" << lenstr; + features->set_value(FD::Convert(os.str()), 1.0); + } else { // not root + ostringstream os; + os << "HM:" << TD::Convert(sent.pos[h]) << '_' << TD::Convert(sent.pos[m]); + features->set_value(FD::Convert(os.str()), 1.0); + os << '_' << dir; + features->set_value(FD::Convert(os.str()), 1.0); + os << '_' << lenstr; + features->set_value(FD::Convert(os.str()), 1.0); + ostringstream os2; + os2 << "LexHM:" << TD::Convert(sent.words[h]) << '_' << TD::Convert(sent.words[m]); + features->set_value(FD::Convert(os2.str()), 1.0); + os2 << '_' << dir; + features->set_value(FD::Convert(os2.str()), 1.0); + os2 << '_' << lenstr; + features->set_value(FD::Convert(os2.str()), 1.0); + } +} diff --git a/rst_parser/arc_ff.h b/rst_parser/arc_ff.h new file mode 100644 index 00000000..bc51fef4 --- /dev/null +++ b/rst_parser/arc_ff.h @@ -0,0 +1,43 @@ +#ifndef _ARC_FF_H_ +#define _ARC_FF_H_ + +#include +#include "sparse_vector.h" +#include "weights.h" +#include "arc_factored.h" + +struct TaggedSentence; +class ArcFeatureFunction { + public: + virtual ~ArcFeatureFunction(); + + // called once, per input, before any calls to EdgeFeatures + // used to initialize sentence-specific data structures + virtual void PrepareForInput(const TaggedSentence& sentence); + + inline void EgdeFeatures(const TaggedSentence& sentence, + short h, + short m, + SparseVector* features) const { + EdgeFeaturesImpl(sentence, h, m, features); + } + protected: + virtual void EdgeFeaturesImpl(const TaggedSentence& sentence, + short h, + short m, + SparseVector* features) const = 0; +}; + +class DistancePenalty : public ArcFeatureFunction { + public: + DistancePenalty(const std::string& param); + protected: + virtual void EdgeFeaturesImpl(const TaggedSentence& sentence, + short h, + short m, + SparseVector* features) const; + private: + const int fidw_, fidr_; +}; + +#endif diff --git a/rst_parser/mst_train.cc b/rst_parser/mst_train.cc index 7b5af4c1..def23edb 100644 --- a/rst_parser/mst_train.cc +++ b/rst_parser/mst_train.cc @@ -1,12 +1,210 @@ #include "arc_factored.h" +#include #include +#include +#include + +#include "arc_ff.h" +#include "arc_ff_factory.h" +#include "stringlib.h" +#include "filelib.h" +#include "tdict.h" +#include "picojson.h" +#include "optimize.h" +#include "weights.h" using namespace std; +namespace po = boost::program_options; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + string cfg_file; + opts.add_options() + ("training_data,t",po::value()->default_value("-"), "File containing training data (jsent format)") + ("feature_function,F",po::value >()->composing(), "feature function") + ("regularization_strength,C",po::value()->default_value(1.0), "Regularization strength") + ("correction_buffers,m", po::value()->default_value(10), "LBFGS correction buffers"); + po::options_description clo("Command line options"); + clo.add_options() + ("config,c", po::value(&cfg_file), "Configuration file") + ("help,?", "Print this help message and exit"); + + po::options_description dconfig_options, dcmdline_options; + dconfig_options.add(opts); + dcmdline_options.add(dconfig_options).add(clo); + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + if (cfg_file.size() > 0) { + ReadFile rf(cfg_file); + po::store(po::parse_config_file(*rf.stream(), dconfig_options), *conf); + } + if (conf->count("help")) { + cerr << dcmdline_options << endl; + exit(1); + } +} + +struct TrainingInstance { + TaggedSentence ts; + EdgeSubset tree; + SparseVector features; +}; + +void ReadTraining(const string& fname, vector* corpus, int rank = 0, int size = 1) { + ReadFile rf(fname); + istream& in = *rf.stream(); + string line; + string err; + int lc = 0; + bool flag = false; + while(getline(in, line)) { + ++lc; + if ((lc-1) % size != rank) continue; + if (rank == 0 && lc % 10 == 0) { cerr << '.' << flush; flag = true; } + if (rank == 0 && lc % 400 == 0) { cerr << " [" << lc << "]\n"; flag = false; } + size_t pos = line.rfind('\t'); + assert(pos != string::npos); + picojson::value obj; + picojson::parse(obj, line.begin() + pos, line.end(), &err); + if (err.size() > 0) { cerr << "JSON parse error in " << lc << ": " << err << endl; abort(); } + corpus->push_back(TrainingInstance()); + TrainingInstance& cur = corpus->back(); + TaggedSentence& ts = cur.ts; + EdgeSubset& tree = cur.tree; + assert(obj.is()); + const picojson::object& d = obj.get(); + const picojson::array& ta = d.find("tokens")->second.get(); + for (unsigned i = 0; i < ta.size(); ++i) { + ts.words.push_back(TD::Convert(ta[i].get()[0].get())); + ts.pos.push_back(TD::Convert(ta[i].get()[1].get())); + } + const picojson::array& da = d.find("deps")->second.get(); + for (unsigned i = 0; i < da.size(); ++i) { + const picojson::array& thm = da[i].get(); + // get dep type here + short h = thm[2].get(); + short m = thm[1].get(); + if (h < 0) + tree.roots.push_back(m); + else + tree.h_m_pairs.push_back(make_pair(h,m)); + } + //cerr << TD::GetString(ts.words) << endl << TD::GetString(ts.pos) << endl << tree << endl; + } + if (flag) cerr << "\nRead " << lc << " training instances\n"; +} + +void AddFeatures(double prob, const SparseVector& fmap, vector* g) { + for (SparseVector::const_iterator it = fmap.begin(); it != fmap.end(); ++it) + (*g)[it->first] += it->second * prob; +} + +double ApplyRegularizationTerms(const double C, + const vector& weights, + vector* g) { + assert(weights.size() == g->size()); + double reg = 0; + for (size_t i = 0; i < weights.size(); ++i) { +// const double prev_w_i = (i < prev_weights.size() ? prev_weights[i] : 0.0); + const double& w_i = weights[i]; + double& g_i = (*g)[i]; + reg += C * w_i * w_i; + g_i += 2 * C * w_i; + +// reg += T * (w_i - prev_w_i) * (w_i - prev_w_i); +// g_i += 2 * T * (w_i - prev_w_i); + } + return reg; +} int main(int argc, char** argv) { + int rank = 0; + int size = 1; + po::variables_map conf; + InitCommandLine(argc, argv, &conf); ArcFactoredForest af(5); - cerr << af(0,3) << endl; + ArcFFRegistry reg; + reg.Register("DistancePenalty", new ArcFFFactory); + vector corpus; + vector > ffs; + ffs.push_back(boost::shared_ptr(new DistancePenalty(""))); + ReadTraining(conf["training_data"].as(), &corpus, rank, size); + vector forests(corpus.size()); + SparseVector empirical; + bool flag = false; + for (int i = 0; i < corpus.size(); ++i) { + TrainingInstance& cur = corpus[i]; + if (rank == 0 && (i+1) % 10 == 0) { cerr << '.' << flush; flag = true; } + if (rank == 0 && (i+1) % 400 == 0) { cerr << " [" << (i+1) << "]\n"; flag = false; } + for (int fi = 0; fi < ffs.size(); ++fi) { + ArcFeatureFunction& ff = *ffs[fi]; + ff.PrepareForInput(cur.ts); + SparseVector efmap; + for (int j = 0; j < cur.tree.h_m_pairs.size(); ++j) { + efmap.clear(); + ff.EgdeFeatures(cur.ts, cur.tree.h_m_pairs[j].first, + cur.tree.h_m_pairs[j].second, + &efmap); + cur.features += efmap; + } + for (int j = 0; j < cur.tree.roots.size(); ++j) { + efmap.clear(); + ff.EgdeFeatures(cur.ts, -1, cur.tree.roots[j], &efmap); + cur.features += efmap; + } + } + empirical += cur.features; + forests[i].resize(cur.ts.words.size()); + forests[i].ExtractFeatures(cur.ts, ffs); + } + if (flag) cerr << endl; + //cerr << "EMP: " << empirical << endl; //DE + vector weights(FD::NumFeats(), 0.0); + vector g(FD::NumFeats(), 0.0); + cerr << "features initialized\noptimizing...\n"; + boost::shared_ptr o; + o.reset(new LBFGSOptimizer(g.size(), conf["correction_buffers"].as())); + int iterations = 1000; + for (int iter = 0; iter < iterations; ++iter) { + cerr << "ITERATION " << iter << " " << flush; + fill(g.begin(), g.end(), 0.0); + for (SparseVector::const_iterator it = empirical.begin(); it != empirical.end(); ++it) + g[it->first] = -it->second; + double obj = -empirical.dot(weights); + // SparseVector mfm; //DE + for (int i = 0; i < corpus.size(); ++i) { + forests[i].Reweight(weights); + double logz; + forests[i].EdgeMarginals(&logz); + //cerr << " O = " << (-corpus[i].features.dot(weights)) << " D=" << -logz << " OO= " << (-corpus[i].features.dot(weights) - logz) << endl; + obj -= logz; + int num_words = corpus[i].ts.words.size(); + for (int h = -1; h < num_words; ++h) { + for (int m = 0; m < num_words; ++m) { + if (h == m) continue; + const ArcFactoredForest::Edge& edge = forests[i](h,m); + const SparseVector& fmap = edge.features; + double prob = edge.edge_prob.as_float(); + if (prob < -0.000001) { cerr << "Prob < 0: " << prob << endl; prob = 0; } + if (prob > 1.000001) { cerr << "Prob > 1: " << prob << endl; prob = 1; } + AddFeatures(prob, fmap, &g); + //mfm += fmap * prob; // DE + } + } + } + //cerr << endl << "E: " << empirical << endl; // DE + //cerr << "M: " << mfm << endl; // DE + double r = ApplyRegularizationTerms(conf["regularization_strength"].as(), weights, &g); + double gnorm = 0; + for (int i = 0; i < g.size(); ++i) + gnorm += g[i]*g[i]; + cerr << "OBJ=" << (obj+r) << "\t[F=" << obj << " R=" << r << "]\tGnorm=" << sqrt(gnorm) << endl; + obj += r; + assert(obj >= 0); + o->Optimize(obj, g, &weights); + Weights::ShowLargestFeatures(weights); + if (o->HasConverged()) { cerr << "CONVERGED\n"; break; } + } return 0; } diff --git a/rst_parser/rst_test.cc b/rst_parser/rst_test.cc index 8995515f..7e6fb2c1 100644 --- a/rst_parser/rst_test.cc +++ b/rst_parser/rst_test.cc @@ -17,15 +17,15 @@ int main(int argc, char** argv) { // (0, 1) 9 // (0, 3) 9 ArcFactoredForest af(3); - af(1,2).edge_prob.logeq(20); - af(1,3).edge_prob.logeq(3); - af(2,1).edge_prob.logeq(20); - af(2,3).edge_prob.logeq(30); - af(3,2).edge_prob.logeq(0); - af(3,1).edge_prob.logeq(11); - af(0,2).edge_prob.logeq(10); - af(0,1).edge_prob.logeq(9); - af(0,3).edge_prob.logeq(9); + af(0,1).edge_prob.logeq(20); + af(0,2).edge_prob.logeq(3); + af(1,0).edge_prob.logeq(20); + af(1,2).edge_prob.logeq(30); + af(2,1).edge_prob.logeq(0); + af(2,0).edge_prob.logeq(11); + af(-1,1).edge_prob.logeq(10); + af(-1,0).edge_prob.logeq(9); + af(-1,2).edge_prob.logeq(9); EdgeSubset tree; // af.MaximumEdgeSubset(&tree); double lz; -- cgit v1.2.3 From cb0523471caff98a2ec89a3657c1385b53529c8d Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 16 Apr 2012 14:11:02 -0400 Subject: switch to log domain for matrix operations --- rst_parser/arc_factored.h | 2 +- rst_parser/arc_factored_marginals.cc | 24 +++++++++++++----------- rst_parser/mst_train.cc | 29 +++++++++++++++++++++-------- rst_parser/rst_test.cc | 16 +++++++++++++--- utils/logval.h | 13 ++++++++++--- 5 files changed, 58 insertions(+), 26 deletions(-) (limited to 'rst_parser/arc_factored_marginals.cc') diff --git a/rst_parser/arc_factored.h b/rst_parser/arc_factored.h index d9a0bb24..4de38b66 100644 --- a/rst_parser/arc_factored.h +++ b/rst_parser/arc_factored.h @@ -56,7 +56,7 @@ class ArcFactoredForest { // Reweight edges so that edge_prob is the edge's marginals // optionally returns log partition - void EdgeMarginals(double* p_log_z = NULL); + void EdgeMarginals(prob_t* p_log_z = NULL); // This may not return a tree void PickBestParentForEachWord(EdgeSubset* st) const; diff --git a/rst_parser/arc_factored_marginals.cc b/rst_parser/arc_factored_marginals.cc index 16360b0d..acb8102a 100644 --- a/rst_parser/arc_factored_marginals.cc +++ b/rst_parser/arc_factored_marginals.cc @@ -9,37 +9,39 @@ using namespace std; #if HAVE_EIGEN #include -typedef Eigen::Matrix ArcMatrix; -typedef Eigen::Matrix RootVector; +typedef Eigen::Matrix ArcMatrix; +typedef Eigen::Matrix RootVector; -void ArcFactoredForest::EdgeMarginals(double *plog_z) { +void ArcFactoredForest::EdgeMarginals(prob_t *plog_z) { ArcMatrix A(num_words_,num_words_); RootVector r(num_words_); for (int h = 0; h < num_words_; ++h) { for (int m = 0; m < num_words_; ++m) { if (h != m) - A(h,m) = edges_(h,m).edge_prob.as_float(); + A(h,m) = edges_(h,m).edge_prob; else - A(h,m) = 0; + A(h,m) = prob_t::Zero(); } - r(h) = root_edges_[h].edge_prob.as_float(); + r(h) = root_edges_[h].edge_prob; } ArcMatrix L = -A; L.diagonal() = A.colwise().sum(); L.row(0) = r; ArcMatrix Linv = L.inverse(); - if (plog_z) *plog_z = log(Linv.determinant()); + if (plog_z) *plog_z = Linv.determinant(); RootVector rootMarginals = r.cwiseProduct(Linv.col(0)); + static const prob_t ZERO(0); + static const prob_t ONE(1); // ArcMatrix T = Linv; for (int h = 0; h < num_words_; ++h) { for (int m = 0; m < num_words_; ++m) { - const double marginal = (m == 0 ? 0.0 : 1.0) * A(h,m) * Linv(m,m) - - (h == 0 ? 0.0 : 1.0) * A(h,m) * Linv(m,h); - edges_(h,m).edge_prob = prob_t(marginal); + const prob_t marginal = (m == 0 ? ZERO : ONE) * A(h,m) * Linv(m,m) - + (h == 0 ? ZERO : ONE) * A(h,m) * Linv(m,h); + edges_(h,m).edge_prob = marginal; // T(h,m) = marginal; } - root_edges_[h].edge_prob = prob_t(rootMarginals(h)); + root_edges_[h].edge_prob = rootMarginals(h); } // cerr << "ROOT MARGINALS: " << rootMarginals.transpose() << endl; // cerr << "M:\n" << T << endl; diff --git a/rst_parser/mst_train.cc b/rst_parser/mst_train.cc index b5114726..c5cab6ec 100644 --- a/rst_parser/mst_train.cc +++ b/rst_parser/mst_train.cc @@ -23,7 +23,9 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { string cfg_file; opts.add_options() ("training_data,t",po::value()->default_value("-"), "File containing training data (jsent format)") - ("feature_function,F",po::value >()->composing(), "feature function") + ("feature_function,F",po::value >()->composing(), "feature function (multiple permitted)") + ("weights,w",po::value(), "Optional starting weights") + ("output_every_i_iterations,I",po::value()->default_value(1), "Write weights every I iterations") ("regularization_strength,C",po::value()->default_value(1.0), "Regularization strength") ("correction_buffers,m", po::value()->default_value(10), "LBFGS correction buffers"); po::options_description clo("Command line options"); @@ -161,9 +163,13 @@ int main(int argc, char** argv) { if (flag) cerr << endl; //cerr << "EMP: " << empirical << endl; //DE vector weights(FD::NumFeats(), 0.0); + if (conf.count("weights")) + Weights::InitFromFile(conf["weights"].as(), &weights); vector g(FD::NumFeats(), 0.0); cerr << "features initialized\noptimizing...\n"; boost::shared_ptr o; + int every = corpus.size() / 20; + if (!every) ++every; o.reset(new LBFGSOptimizer(g.size(), conf["correction_buffers"].as())); int iterations = 1000; for (int iter = 0; iter < iterations; ++iter) { @@ -174,11 +180,12 @@ int main(int argc, char** argv) { double obj = -empirical.dot(weights); // SparseVector mfm; //DE for (int i = 0; i < corpus.size(); ++i) { + if ((i + 1) % every == 0) cerr << '.' << flush; const int num_words = corpus[i].ts.words.size(); forests[i].Reweight(weights); - double lz; - forests[i].EdgeMarginals(&lz); - obj -= lz; + prob_t z; + forests[i].EdgeMarginals(&z); + obj -= log(z); //cerr << " O = " << (-corpus[i].features.dot(weights)) << " D=" << -lz << " OO= " << (-corpus[i].features.dot(weights) - lz) << endl; //cerr << " ZZ = " << zz << endl; for (int h = -1; h < num_words; ++h) { @@ -202,14 +209,20 @@ int main(int argc, char** argv) { gnorm += g[i]*g[i]; ostringstream ll; ll << "ITER=" << (iter+1) << "\tOBJ=" << (obj+r) << "\t[F=" << obj << " R=" << r << "]\tGnorm=" << sqrt(gnorm); - cerr << endl << ll.str() << endl; + cerr << ' ' << ll.str().substr(ll.str().find('\t')+1) << endl; obj += r; assert(obj >= 0); o->Optimize(obj, g, &weights); Weights::ShowLargestFeatures(weights); - string sl = ll.str(); - Weights::WriteToFile(o->HasConverged() ? "weights.final.gz" : "weights.cur.gz", weights, true, &sl); - if (o->HasConverged()) { cerr << "CONVERGED\n"; break; } + const bool converged = o->HasConverged(); + const char* ofname = converged ? "weights.final.gz" : "weights.cur.gz"; + if (converged || ((iter+1) % conf["output_every_i_iterations"].as()) == 0) { + cerr << "writing..." << flush; + const string sl = ll.str(); + Weights::WriteToFile(ofname, weights, true, &sl); + cerr << "done" << endl; + } + if (converged) { cerr << "CONVERGED\n"; break; } } forests[0].Reweight(weights); TreeSampler ts(forests[0]); diff --git a/rst_parser/rst_test.cc b/rst_parser/rst_test.cc index 7e6fb2c1..3bb95759 100644 --- a/rst_parser/rst_test.cc +++ b/rst_parser/rst_test.cc @@ -2,6 +2,8 @@ #include +#include + using namespace std; int main(int argc, char** argv) { @@ -28,11 +30,19 @@ int main(int argc, char** argv) { af(-1,2).edge_prob.logeq(9); EdgeSubset tree; // af.MaximumEdgeSubset(&tree); - double lz; - af.EdgeMarginals(&lz); - cerr << "Z = " << lz << endl; + prob_t z; + af.EdgeMarginals(&z); + cerr << "Z = " << abs(z) << endl; af.PickBestParentForEachWord(&tree); cerr << tree << endl; + typedef Eigen::Matrix M3; + M3 A = M3::Zero(); + A(0,0) = prob_t(1); + A(1,0) = prob_t(3); + A(0,1) = prob_t(2); + A(1,1) = prob_t(4); + prob_t det = A.determinant(); + cerr << det.as_float() << endl; return 0; } diff --git a/utils/logval.h b/utils/logval.h index 8a59d0b1..ec1f6acd 100644 --- a/utils/logval.h +++ b/utils/logval.h @@ -30,8 +30,6 @@ class LogVal { LogVal(init_minus_1) : s_(true),v_(0) { } LogVal(init_1) : s_(),v_(0) { } LogVal(init_0) : s_(),v_(LOGVAL_LOG0) { } - explicit LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {} - explicit LogVal(unsigned x) : s_(0), v_(std::log(x)) { } LogVal(double lnx,bool sign) : s_(sign),v_(lnx) {} LogVal(double lnx,init_lnx) : s_(),v_(lnx) {} static Self exp(T lnx) { return Self(lnx,false); } @@ -126,7 +124,7 @@ class LogVal { } Self operator-() const { - return Self(v_,-s_); + return Self(v_,!s_); } void negate() { s_ = !s_; } @@ -193,6 +191,15 @@ T log(const LogVal& o) { return o.v_; } +template +LogVal abs(const LogVal& o) { + if (o.s_) { + LogVal res = o; + res.s_ = false; + return res; + } else { return o; } +} + template LogVal pow(const LogVal& b, const T& e) { return b.pow(e); -- cgit v1.2.3 From aa96fc48f43c08e1cafff57661a3b7d51e50264e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 19 Apr 2012 13:30:23 -0400 Subject: fix for marginals --- rst_parser/arc_factored_marginals.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'rst_parser/arc_factored_marginals.cc') diff --git a/rst_parser/arc_factored_marginals.cc b/rst_parser/arc_factored_marginals.cc index acb8102a..3e8c9f86 100644 --- a/rst_parser/arc_factored_marginals.cc +++ b/rst_parser/arc_factored_marginals.cc @@ -49,7 +49,7 @@ void ArcFactoredForest::EdgeMarginals(prob_t *plog_z) { #else -void ArcFactoredForest::EdgeMarginals(double*) { +void ArcFactoredForest::EdgeMarginals(prob_t *) { cerr << "EdgeMarginals() requires --with-eigen!\n"; abort(); } -- cgit v1.2.3