From 4ecae3b2e34a45dfdf22f4f244fbbcd66c8635b0 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 8 May 2012 15:19:40 -0400 Subject: add liblbfgs, which is much less crappy than the current one --- training/liblbfgs/lbfgs++.h | 129 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 training/liblbfgs/lbfgs++.h (limited to 'training/liblbfgs/lbfgs++.h') diff --git a/training/liblbfgs/lbfgs++.h b/training/liblbfgs/lbfgs++.h new file mode 100644 index 00000000..119511e5 --- /dev/null +++ b/training/liblbfgs/lbfgs++.h @@ -0,0 +1,129 @@ +// THIS IS CDEC'S C++ WRAPPER AROUND LIBLBFGS +// liblbfgs is +// Copyright (c) 1990, Jorge Nocedal +// Copyright (c) 2007-2010, Naoaki Okazaki +// +// see https://github.com/chokkan/liblbfgs for more details +// +#ifndef __LBFGSPP_H__ +#define __LBFGSPP_H__ + +#include +#include "liblbfgs/lbfgs.h" + +// Function must be lbfgsfloatval_t f(const double* x_start, double* g_start) +template +class LBFGS { + public: + LBFGS(size_t n, // number of variables + const Function& f, // function to optimize + double l1_c = 0.0, // l1 penalty strength + size_t m = 10 // number of memory buffers + // TODO should use custom allocator here: + ) : p_x(new std::vector(n, 0.0)), + owned(true), + m_x(*p_x), + func(f) { + Init(m, l1_c); + } + + // constructor where external vector storage for variables is used + LBFGS(std::vector* px, + const Function& f, + double l1_c = 0.0, // l1 penalty strength + size_t m = 10 + ) : p_x(px), + owned(false), + m_x(*p_x), + func(f) { + Init(m, l1_c); + } + + ~LBFGS() { + if (owned) delete p_x; + } + const lbfgsfloatval_t& operator[](size_t i) const { return m_x[i]; } + lbfgsfloatval_t& operator[](size_t i) { return m_x[i]; } + size_t size() const { return m_x.size(); } + + int Optimize() { + lbfgsfloatval_t fx; + int ret = lbfgs(m_x.size(), &m_x[0], &fx, _evaluate, _progress, this, ¶m); + std::cerr << "L-BFGS optimization terminated with status code = " << ret << std::endl; + std::cerr << " fx = " << fx << std::endl; + return ret; + } + + private: + void Init(size_t m, double l1_c) { + lbfgs_parameter_init(¶m); + param.m = m; + if (l1_c > 0.0) { + param.linesearch = LBFGS_LINESEARCH_BACKTRACKING; + param.orthantwise_c = 1.0; + } + } + + static lbfgsfloatval_t _evaluate( + void *instance, + const lbfgsfloatval_t *x, + lbfgsfloatval_t *g, + const int n, + const lbfgsfloatval_t step) { + return reinterpret_cast*>(instance)->evaluate(x, g, n, step); + } + + lbfgsfloatval_t evaluate(const lbfgsfloatval_t *x, + lbfgsfloatval_t *g, + const int n, + const lbfgsfloatval_t step) { + (void) n; + (void) step; + return func(x, g); + } + + static int _progress( + void *instance, + const lbfgsfloatval_t *x, + const lbfgsfloatval_t *g, + const lbfgsfloatval_t fx, + const lbfgsfloatval_t xnorm, + const lbfgsfloatval_t gnorm, + const lbfgsfloatval_t step, + int n, + int k, + int ls + ) + { + return reinterpret_cast*>(instance) + ->progress(x, g, fx, xnorm, gnorm, step, n, k, ls); + } + + int progress( + const lbfgsfloatval_t *x, + const lbfgsfloatval_t *g, + const lbfgsfloatval_t fx, + const lbfgsfloatval_t xnorm, + const lbfgsfloatval_t gnorm, + const lbfgsfloatval_t step, + int n, + int k, + int ls + ) + { + (void) n; + (void) k; + std::cerr << "Iteration " << k << ':' << std::endl; + std::cerr << " fx = " << fx << ", x[0] = " << x[0] << ", x[1] = " << x[1] << std::endl; + std::cerr << " xnorm = " << xnorm << ", gnorm = " << gnorm << ", step = " << step << std::endl << std::endl; + return 0; + } + std::vector* p_x; + const bool owned; + std::vector& m_x; + const Function& func; + lbfgs_parameter_t param; + +}; + +#endif -- cgit v1.2.3 From 33d4601da5e2e715260619a38f5899645d157952 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 8 May 2012 19:45:10 -0400 Subject: switch to liblbfgs for pro --- pro-train/Makefile.am | 2 +- pro-train/mr_pro_reduce.cc | 89 ++++++++++++++++++++------------------------ training/liblbfgs/lbfgs++.h | 36 +++++++++++------- training/liblbfgs/ll_test.cc | 9 ++--- 4 files changed, 67 insertions(+), 69 deletions(-) (limited to 'training/liblbfgs/lbfgs++.h') diff --git a/pro-train/Makefile.am b/pro-train/Makefile.am index fdaf43e2..11d26211 100644 --- a/pro-train/Makefile.am +++ b/pro-train/Makefile.am @@ -8,6 +8,6 @@ mr_pro_map_SOURCES = mr_pro_map.cc mr_pro_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz mr_pro_reduce_SOURCES = mr_pro_reduce.cc -mr_pro_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/training/optimize.o $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz +mr_pro_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/training/optimize.o $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training diff --git a/pro-train/mr_pro_reduce.cc b/pro-train/mr_pro_reduce.cc index 6362ce47..d3fb8026 100644 --- a/pro-train/mr_pro_reduce.cc +++ b/pro-train/mr_pro_reduce.cc @@ -11,6 +11,7 @@ #include "weights.h" #include "sparse_vector.h" #include "optimize.h" +#include "liblbfgs/lbfgs++.h" using namespace std; namespace po = boost::program_options; @@ -89,10 +90,10 @@ void ReadCorpus(istream* pin, vector > >* corp if (flag) cerr << endl; } -void GradAdd(const SparseVector& v, const double scale, vector* acc) { +void GradAdd(const SparseVector& v, const double scale, weight_t* acc) { for (SparseVector::const_iterator it = v.begin(); it != v.end(); ++it) { - (*acc)[it->first] += it->second * scale; + acc[it->first] += it->second * scale; } } @@ -100,26 +101,24 @@ double ApplyRegularizationTerms(const double C, const double T, const vector& weights, const vector& prev_weights, - vector* g) { - assert(weights.size() == g->size()); + weight_t* g) { double reg = 0; for (size_t i = 0; i < weights.size(); ++i) { const double prev_w_i = (i < prev_weights.size() ? prev_weights[i] : 0.0); const double& w_i = weights[i]; - double& g_i = (*g)[i]; reg += C * w_i * w_i; - g_i += 2 * C * w_i; + g[i] += 2 * C * w_i; const double diff_i = w_i - prev_w_i; reg += T * diff_i * diff_i; - g_i += 2 * T * diff_i; + g[i] += 2 * T * diff_i; } return reg; } double TrainingInference(const vector& x, const vector > >& corpus, - vector* g = NULL) { + weight_t* g = NULL) { double cll = 0; for (int i = 0; i < corpus.size(); ++i) { const double dotprod = corpus[i].second.dot(x) + (x.size() ? x[0] : weight_t()); // x[0] is bias @@ -139,20 +138,44 @@ double TrainingInference(const vector& x, if (g) { // g -= corpus[i].second * exp(lp_false); GradAdd(corpus[i].second, -exp(lp_false), g); - (*g)[0] -= exp(lp_false); // bias + g[0] -= exp(lp_false); // bias } } else { // false label cll -= lp_false; if (g) { // g += corpus[i].second * exp(lp_true); GradAdd(corpus[i].second, exp(lp_true), g); - (*g)[0] += exp(lp_true); // bias + g[0] += exp(lp_true); // bias } } } return cll; } +struct ProLoss { + ProLoss(const vector > >& tr, + const vector > >& te, + const double c, + const double t, + const vector& px) : training(tr), testing(te), C(c), T(t), prev_x(px){} + double operator()(const vector& x, double* g) const { + fill(g, g + x.size(), 0.0); + double cll = TrainingInference(x, training, g); + tppl = 0; + if (testing.size()) + tppl = pow(2.0, TrainingInference(x, testing, g) / (log(2) * testing.size())); + double ppl = cll / log(2); + ppl /= training.size(); + ppl = pow(2.0, ppl); + double reg = ApplyRegularizationTerms(C, T, x, prev_x, g); + return cll + reg; + } + const vector > >& training, testing; + const double C, T; + const vector& prev_x; + mutable double tppl; +}; + // return held-out log likelihood double LearnParameters(const vector > >& training, const vector > >& testing, @@ -161,42 +184,10 @@ double LearnParameters(const vector > >& train const unsigned memory_buffers, const vector& prev_x, vector* px) { - vector& x = *px; - vector vg(FD::NumFeats(), 0.0); - bool converged = false; - LBFGSOptimizer opt(FD::NumFeats(), memory_buffers); - double tppl = 0.0; - while(!converged) { - fill(vg.begin(), vg.end(), 0.0); - double cll = TrainingInference(x, training, &vg); - double ppl = cll / log(2); - ppl /= training.size(); - ppl = pow(2.0, ppl); - - // evaluate optional held-out test set - if (testing.size()) { - tppl = TrainingInference(x, testing) / log(2); - tppl /= testing.size(); - tppl = pow(2.0, tppl); - } - - // handle regularizer - double reg = ApplyRegularizationTerms(C, T, x, prev_x, &vg); - cll += reg; - cerr << cll << " (REG=" << reg << ")\tPPL=" << ppl << "\t TEST_PPL=" << tppl << "\t" << endl; - try { - opt.Optimize(cll, vg, &x); - converged = opt.HasConverged(); - } catch (...) { - cerr << "Exception caught, assuming convergence is close enough...\n"; - converged = true; - } - if (fabs(x[0]) > MAX_BIAS) { - cerr << "Biased model learned. Are your training instances wrong?\n"; - cerr << " BIAS: " << x[0] << endl; - } - } - return tppl; + ProLoss loss(training, testing, C, T, prev_x); + LBFGS lbfgs(px, loss, 0.0, memory_buffers); + lbfgs.MinimizeFunction(); + return loss.tppl; } int main(int argc, char** argv) { @@ -213,9 +204,9 @@ int main(int argc, char** argv) { const double max_reg = conf["max_reg"].as(); double C = conf["regularization_strength"].as(); // will be overridden if parameter is tuned const double T = conf["regularize_to_weights"].as(); - assert(C > 0.0); - assert(min_reg > 0.0); - assert(max_reg > 0.0); + assert(C >= 0.0); + assert(min_reg >= 0.0); + assert(max_reg >= 0.0); assert(max_reg > min_reg); const double psi = conf["interpolate_with_weights"].as(); if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; return 1; } diff --git a/training/liblbfgs/lbfgs++.h b/training/liblbfgs/lbfgs++.h index 119511e5..6c4d1807 100644 --- a/training/liblbfgs/lbfgs++.h +++ b/training/liblbfgs/lbfgs++.h @@ -9,9 +9,10 @@ #define __LBFGSPP_H__ #include +#include #include "liblbfgs/lbfgs.h" -// Function must be lbfgsfloatval_t f(const double* x_start, double* g_start) +// Function must be double f(const vector& x_start, double* g_start) template class LBFGS { public: @@ -46,11 +47,14 @@ class LBFGS { lbfgsfloatval_t& operator[](size_t i) { return m_x[i]; } size_t size() const { return m_x.size(); } - int Optimize() { + int MinimizeFunction(bool s = false) { + silence = s; lbfgsfloatval_t fx; int ret = lbfgs(m_x.size(), &m_x[0], &fx, _evaluate, _progress, this, ¶m); - std::cerr << "L-BFGS optimization terminated with status code = " << ret << std::endl; - std::cerr << " fx = " << fx << std::endl; + if (!silence) { + std::cerr << "L-BFGS optimization terminated with status code = " << ret << std::endl; + std::cerr << " fx = " << fx << std::endl; + } return ret; } @@ -62,6 +66,7 @@ class LBFGS { param.linesearch = LBFGS_LINESEARCH_BACKTRACKING; param.orthantwise_c = 1.0; } + silence = false; } static lbfgsfloatval_t _evaluate( @@ -79,7 +84,8 @@ class LBFGS { const lbfgsfloatval_t step) { (void) n; (void) step; - return func(x, g); + assert(x == &m_x[0]); // sanity check, ensures pass m_x is okay + return func(m_x, g); } static int _progress( @@ -109,21 +115,23 @@ class LBFGS { int n, int k, int ls - ) - { - (void) n; - (void) k; - std::cerr << "Iteration " << k << ':' << std::endl; - std::cerr << " fx = " << fx << ", x[0] = " << x[0] << ", x[1] = " << x[1] << std::endl; - std::cerr << " xnorm = " << xnorm << ", gnorm = " << gnorm << ", step = " << step << std::endl << std::endl; - return 0; + ) { + (void) x; + (void) g; + (void) n; + (void) ls; + if (!silence) { + std::cerr << "Iteration " << k << ':' << "\tfx = " << fx << "\t" + << " xnorm = " << xnorm << ", gnorm = " << gnorm << ", step = " << step << std::endl; } + return 0; + } std::vector* p_x; const bool owned; std::vector& m_x; const Function& func; lbfgs_parameter_t param; - + bool silence; }; #endif diff --git a/training/liblbfgs/ll_test.cc b/training/liblbfgs/ll_test.cc index 058db716..43c0f214 100644 --- a/training/liblbfgs/ll_test.cc +++ b/training/liblbfgs/ll_test.cc @@ -4,12 +4,11 @@ using namespace std; // Function must be lbfgsfloatval_t f(x.begin, x.end, g.begin) -lbfgsfloatval_t func(const lbfgsfloatval_t* x, lbfgsfloatval_t* g) { +lbfgsfloatval_t func(const vector& x, lbfgsfloatval_t* g) { int i; lbfgsfloatval_t fx = 0.0; - int n = 4; - for (i = 0;i < n;i += 2) { + for (i = 0;i < x.size();i += 2) { lbfgsfloatval_t t1 = 1.0 - x[i]; lbfgsfloatval_t t2 = 10.0 * (x[i+1] - x[i] * x[i]); g[i+1] = 20.0 * t2; @@ -21,8 +20,8 @@ lbfgsfloatval_t func(const lbfgsfloatval_t* x, lbfgsfloatval_t* g) { template void Opt(F& f) { - LBFGS lbfgs(4, f, 1.0); - lbfgs.Optimize(); + LBFGS lbfgs(4, f); + lbfgs.MinimizeFunction(); } int main(int argc, char** argv) { -- cgit v1.2.3 From ad16eda6dedf3475e3ab952418dde8decf0dceca Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 8 May 2012 23:10:13 -0400 Subject: better logging --- training/liblbfgs/lbfgs++.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'training/liblbfgs/lbfgs++.h') diff --git a/training/liblbfgs/lbfgs++.h b/training/liblbfgs/lbfgs++.h index 6c4d1807..342f9b0e 100644 --- a/training/liblbfgs/lbfgs++.h +++ b/training/liblbfgs/lbfgs++.h @@ -49,6 +49,7 @@ class LBFGS { int MinimizeFunction(bool s = false) { silence = s; + ec = 0; lbfgsfloatval_t fx; int ret = lbfgs(m_x.size(), &m_x[0], &fx, _evaluate, _progress, this, ¶m); if (!silence) { @@ -84,6 +85,7 @@ class LBFGS { const lbfgsfloatval_t step) { (void) n; (void) step; + if (!silence) { ec++; std::cerr << '.'; } assert(x == &m_x[0]); // sanity check, ensures pass m_x is okay return func(m_x, g); } @@ -121,6 +123,9 @@ class LBFGS { (void) n; (void) ls; if (!silence) { + if (ec < 8) std::cerr << '\t'; + if (ec < 16) std::cerr << '\t'; + ec = 0; std::cerr << "Iteration " << k << ':' << "\tfx = " << fx << "\t" << " xnorm = " << xnorm << ", gnorm = " << gnorm << ", step = " << step << std::endl; } @@ -132,6 +137,7 @@ class LBFGS { const Function& func; lbfgs_parameter_t param; bool silence; + int ec; }; #endif -- cgit v1.2.3