summaryrefslogtreecommitdiff
path: root/training/utils/lbfgs.h
diff options
context:
space:
mode:
authorMichael Denkowski <michael.j.denkowski@gmail.com>2012-12-22 16:01:23 -0500
committerMichael Denkowski <michael.j.denkowski@gmail.com>2012-12-22 16:01:23 -0500
commit597d89c11db53e91bc011eab70fd613bbe6453e8 (patch)
tree83c87c07d1ff6d3ee4e3b1626f7eddd49c61095b /training/utils/lbfgs.h
parent65e958ff2678a41c22be7171456a63f002ef370b (diff)
parent201af2acd394415a05072fbd53d42584875aa4b4 (diff)
Merge branch 'master' of git://github.com/redpony/cdec
Diffstat (limited to 'training/utils/lbfgs.h')
-rw-r--r--training/utils/lbfgs.h1459
1 files changed, 1459 insertions, 0 deletions
diff --git a/training/utils/lbfgs.h b/training/utils/lbfgs.h
new file mode 100644
index 00000000..e8baecab
--- /dev/null
+++ b/training/utils/lbfgs.h
@@ -0,0 +1,1459 @@
+#ifndef SCITBX_LBFGS_H
+#define SCITBX_LBFGS_H
+
+#include <cstdio>
+#include <cstddef>
+#include <cmath>
+#include <stdexcept>
+#include <algorithm>
+#include <vector>
+#include <string>
+#include <iostream>
+#include <sstream>
+
+namespace scitbx {
+
+//! Limited-memory Broyden-Fletcher-Goldfarb-Shanno (LBFGS) %minimizer.
+/*! Implementation of the
+ Limited-memory Broyden-Fletcher-Goldfarb-Shanno (LBFGS)
+ algorithm for large-scale multidimensional minimization
+ problems.
+
+ This code was manually derived from Java code which was
+ in turn derived from the Fortran program
+ <code>lbfgs.f</code>. The Java translation was
+ effected mostly mechanically, with some manual
+ clean-up; in particular, array indices start at 0
+ instead of 1. Most of the comments from the Fortran
+ code have been pasted in.
+
+ Information on the original LBFGS Fortran source code is
+ available at
+ http://www.netlib.org/opt/lbfgs_um.shar . The following
+ information is taken verbatim from the Netlib documentation
+ for the Fortran source.
+
+ <pre>
+ file opt/lbfgs_um.shar
+ for unconstrained optimization problems
+ alg limited memory BFGS method
+ by J. Nocedal
+ contact nocedal@eecs.nwu.edu
+ ref D. C. Liu and J. Nocedal, ``On the limited memory BFGS method for
+ , large scale optimization methods'' Mathematical Programming 45
+ , (1989), pp. 503-528.
+ , (Postscript file of this paper is available via anonymous ftp
+ , to eecs.nwu.edu in the directory pub/%lbfgs/lbfgs_um.)
+ </pre>
+
+ @author Jorge Nocedal: original Fortran version, including comments
+ (July 1990).<br>
+ Robert Dodier: Java translation, August 1997.<br>
+ Ralf W. Grosse-Kunstleve: C++ port, March 2002.<br>
+ Chris Dyer: serialize/deserialize functionality
+ */
+namespace lbfgs {
+
+ //! Generic exception class for %lbfgs %error messages.
+ /*! All exceptions thrown by the minimizer are derived from this class.
+ */
+ class error : public std::exception {
+ public:
+ //! Constructor.
+ error(std::string const& msg) throw()
+ : msg_("lbfgs error: " + msg)
+ {}
+ //! Access to error message.
+ virtual const char* what() const throw() { return msg_.c_str(); }
+ protected:
+ virtual ~error() throw() {}
+ std::string msg_;
+ public:
+ static std::string itoa(unsigned long i) {
+ std::ostringstream os;
+ os << i;
+ return os.str();
+ }
+ };
+
+ //! Specific exception class.
+ class error_internal_error : public error {
+ public:
+ //! Constructor.
+ error_internal_error(const char* file, unsigned long line) throw()
+ : error(
+ "Internal Error: " + std::string(file) + "(" + itoa(line) + ")")
+ {}
+ };
+
+ //! Specific exception class.
+ class error_improper_input_parameter : public error {
+ public:
+ //! Constructor.
+ error_improper_input_parameter(std::string const& msg) throw()
+ : error("Improper input parameter: " + msg)
+ {}
+ };
+
+ //! Specific exception class.
+ class error_improper_input_data : public error {
+ public:
+ //! Constructor.
+ error_improper_input_data(std::string const& msg) throw()
+ : error("Improper input data: " + msg)
+ {}
+ };
+
+ //! Specific exception class.
+ class error_search_direction_not_descent : public error {
+ public:
+ //! Constructor.
+ error_search_direction_not_descent() throw()
+ : error("The search direction is not a descent direction.")
+ {}
+ };
+
+ //! Specific exception class.
+ class error_line_search_failed : public error {
+ public:
+ //! Constructor.
+ error_line_search_failed(std::string const& msg) throw()
+ : error("Line search failed: " + msg)
+ {}
+ };
+
+ //! Specific exception class.
+ class error_line_search_failed_rounding_errors
+ : public error_line_search_failed {
+ public:
+ //! Constructor.
+ error_line_search_failed_rounding_errors(std::string const& msg) throw()
+ : error_line_search_failed(msg)
+ {}
+ };
+
+ namespace detail {
+
+ template <typename NumType>
+ inline
+ NumType
+ pow2(NumType const& x) { return x * x; }
+
+ template <typename NumType>
+ inline
+ NumType
+ abs(NumType const& x) {
+ if (x < NumType(0)) return -x;
+ return x;
+ }
+
+ // This class implements an algorithm for multi-dimensional line search.
+ template <typename FloatType, typename SizeType = std::size_t>
+ class mcsrch
+ {
+ protected:
+ int infoc;
+ FloatType dginit;
+ bool brackt;
+ bool stage1;
+ FloatType finit;
+ FloatType dgtest;
+ FloatType width;
+ FloatType width1;
+ FloatType stx;
+ FloatType fx;
+ FloatType dgx;
+ FloatType sty;
+ FloatType fy;
+ FloatType dgy;
+ FloatType stmin;
+ FloatType stmax;
+
+ static FloatType const& max3(
+ FloatType const& x,
+ FloatType const& y,
+ FloatType const& z)
+ {
+ return x < y ? (y < z ? z : y ) : (x < z ? z : x );
+ }
+
+ public:
+ /* Minimize a function along a search direction. This code is
+ a Java translation of the function <code>MCSRCH</code> from
+ <code>lbfgs.f</code>, which in turn is a slight modification
+ of the subroutine <code>CSRCH</code> of More' and Thuente.
+ The changes are to allow reverse communication, and do not
+ affect the performance of the routine. This function, in turn,
+ calls <code>mcstep</code>.<p>
+
+ The Java translation was effected mostly mechanically, with
+ some manual clean-up; in particular, array indices start at 0
+ instead of 1. Most of the comments from the Fortran code have
+ been pasted in here as well.<p>
+
+ The purpose of <code>mcsrch</code> is to find a step which
+ satisfies a sufficient decrease condition and a curvature
+ condition.<p>
+
+ At each stage this function updates an interval of uncertainty
+ with endpoints <code>stx</code> and <code>sty</code>. The
+ interval of uncertainty is initially chosen so that it
+ contains a minimizer of the modified function
+ <pre>
+ f(x+stp*s) - f(x) - ftol*stp*(gradf(x)'s).
+ </pre>
+ If a step is obtained for which the modified function has a
+ nonpositive function value and nonnegative derivative, then
+ the interval of uncertainty is chosen so that it contains a
+ minimizer of <code>f(x+stp*s)</code>.<p>
+
+ The algorithm is designed to find a step which satisfies
+ the sufficient decrease condition
+ <pre>
+ f(x+stp*s) &lt;= f(X) + ftol*stp*(gradf(x)'s),
+ </pre>
+ and the curvature condition
+ <pre>
+ abs(gradf(x+stp*s)'s)) &lt;= gtol*abs(gradf(x)'s).
+ </pre>
+ If <code>ftol</code> is less than <code>gtol</code> and if,
+ for example, the function is bounded below, then there is
+ always a step which satisfies both conditions. If no step can
+ be found which satisfies both conditions, then the algorithm
+ usually stops when rounding errors prevent further progress.
+ In this case <code>stp</code> only satisfies the sufficient
+ decrease condition.<p>
+
+ @author Original Fortran version by Jorge J. More' and
+ David J. Thuente as part of the Minpack project, June 1983,
+ Argonne National Laboratory. Java translation by Robert
+ Dodier, August 1997.
+
+ @param n The number of variables.
+
+ @param x On entry this contains the base point for the line
+ search. On exit it contains <code>x + stp*s</code>.
+
+ @param f On entry this contains the value of the objective
+ function at <code>x</code>. On exit it contains the value
+ of the objective function at <code>x + stp*s</code>.
+
+ @param g On entry this contains the gradient of the objective
+ function at <code>x</code>. On exit it contains the gradient
+ at <code>x + stp*s</code>.
+
+ @param s The search direction.
+
+ @param stp On entry this contains an initial estimate of a
+ satifactory step length. On exit <code>stp</code> contains
+ the final estimate.
+
+ @param ftol Tolerance for the sufficient decrease condition.
+
+ @param xtol Termination occurs when the relative width of the
+ interval of uncertainty is at most <code>xtol</code>.
+
+ @param maxfev Termination occurs when the number of evaluations
+ of the objective function is at least <code>maxfev</code> by
+ the end of an iteration.
+
+ @param info This is an output variable, which can have these
+ values:
+ <ul>
+ <li><code>info = -1</code> A return is made to compute
+ the function and gradient.
+ <li><code>info = 1</code> The sufficient decrease condition
+ and the directional derivative condition hold.
+ </ul>
+
+ @param nfev On exit, this is set to the number of function
+ evaluations.
+
+ @param wa Temporary storage array, of length <code>n</code>.
+ */
+ void run(
+ FloatType const& gtol,
+ FloatType const& stpmin,
+ FloatType const& stpmax,
+ SizeType n,
+ FloatType* x,
+ FloatType f,
+ const FloatType* g,
+ FloatType* s,
+ SizeType is0,
+ FloatType& stp,
+ FloatType ftol,
+ FloatType xtol,
+ SizeType maxfev,
+ int& info,
+ SizeType& nfev,
+ FloatType* wa);
+
+ /* The purpose of this function is to compute a safeguarded step
+ for a linesearch and to update an interval of uncertainty for
+ a minimizer of the function.<p>
+
+ The parameter <code>stx</code> contains the step with the
+ least function value. The parameter <code>stp</code> contains
+ the current step. It is assumed that the derivative at
+ <code>stx</code> is negative in the direction of the step. If
+ <code>brackt</code> is <code>true</code> when
+ <code>mcstep</code> returns then a minimizer has been
+ bracketed in an interval of uncertainty with endpoints
+ <code>stx</code> and <code>sty</code>.<p>
+
+ Variables that must be modified by <code>mcstep</code> are
+ implemented as 1-element arrays.
+
+ @param stx Step at the best step obtained so far.
+ This variable is modified by <code>mcstep</code>.
+ @param fx Function value at the best step obtained so far.
+ This variable is modified by <code>mcstep</code>.
+ @param dx Derivative at the best step obtained so far.
+ The derivative must be negative in the direction of the
+ step, that is, <code>dx</code> and <code>stp-stx</code> must
+ have opposite signs. This variable is modified by
+ <code>mcstep</code>.
+
+ @param sty Step at the other endpoint of the interval of
+ uncertainty. This variable is modified by <code>mcstep</code>.
+ @param fy Function value at the other endpoint of the interval
+ of uncertainty. This variable is modified by
+ <code>mcstep</code>.
+
+ @param dy Derivative at the other endpoint of the interval of
+ uncertainty. This variable is modified by <code>mcstep</code>.
+
+ @param stp Step at the current step. If <code>brackt</code> is set
+ then on input <code>stp</code> must be between <code>stx</code>
+ and <code>sty</code>. On output <code>stp</code> is set to the
+ new step.
+ @param fp Function value at the current step.
+ @param dp Derivative at the current step.
+
+ @param brackt Tells whether a minimizer has been bracketed.
+ If the minimizer has not been bracketed, then on input this
+ variable must be set <code>false</code>. If the minimizer has
+ been bracketed, then on output this variable is
+ <code>true</code>.
+
+ @param stpmin Lower bound for the step.
+ @param stpmax Upper bound for the step.
+
+ If the return value is 1, 2, 3, or 4, then the step has
+ been computed successfully. A return value of 0 indicates
+ improper input parameters.
+
+ @author Jorge J. More, David J. Thuente: original Fortran version,
+ as part of Minpack project. Argonne Nat'l Laboratory, June 1983.
+ Robert Dodier: Java translation, August 1997.
+ */
+ static int mcstep(
+ FloatType& stx,
+ FloatType& fx,
+ FloatType& dx,
+ FloatType& sty,
+ FloatType& fy,
+ FloatType& dy,
+ FloatType& stp,
+ FloatType fp,
+ FloatType dp,
+ bool& brackt,
+ FloatType stpmin,
+ FloatType stpmax);
+
+ void serialize(std::ostream* out) const {
+ out->write((const char*)&infoc,sizeof(infoc));
+ out->write((const char*)&dginit,sizeof(dginit));
+ out->write((const char*)&brackt,sizeof(brackt));
+ out->write((const char*)&stage1,sizeof(stage1));
+ out->write((const char*)&finit,sizeof(finit));
+ out->write((const char*)&dgtest,sizeof(dgtest));
+ out->write((const char*)&width,sizeof(width));
+ out->write((const char*)&width1,sizeof(width1));
+ out->write((const char*)&stx,sizeof(stx));
+ out->write((const char*)&fx,sizeof(fx));
+ out->write((const char*)&dgx,sizeof(dgx));
+ out->write((const char*)&sty,sizeof(sty));
+ out->write((const char*)&fy,sizeof(fy));
+ out->write((const char*)&dgy,sizeof(dgy));
+ out->write((const char*)&stmin,sizeof(stmin));
+ out->write((const char*)&stmax,sizeof(stmax));
+ }
+
+ void deserialize(std::istream* in) const {
+ in->read((char*)&infoc, sizeof(infoc));
+ in->read((char*)&dginit, sizeof(dginit));
+ in->read((char*)&brackt, sizeof(brackt));
+ in->read((char*)&stage1, sizeof(stage1));
+ in->read((char*)&finit, sizeof(finit));
+ in->read((char*)&dgtest, sizeof(dgtest));
+ in->read((char*)&width, sizeof(width));
+ in->read((char*)&width1, sizeof(width1));
+ in->read((char*)&stx, sizeof(stx));
+ in->read((char*)&fx, sizeof(fx));
+ in->read((char*)&dgx, sizeof(dgx));
+ in->read((char*)&sty, sizeof(sty));
+ in->read((char*)&fy, sizeof(fy));
+ in->read((char*)&dgy, sizeof(dgy));
+ in->read((char*)&stmin, sizeof(stmin));
+ in->read((char*)&stmax, sizeof(stmax));
+ }
+ };
+
+ template <typename FloatType, typename SizeType>
+ void mcsrch<FloatType, SizeType>::run(
+ FloatType const& gtol,
+ FloatType const& stpmin,
+ FloatType const& stpmax,
+ SizeType n,
+ FloatType* x,
+ FloatType f,
+ const FloatType* g,
+ FloatType* s,
+ SizeType is0,
+ FloatType& stp,
+ FloatType ftol,
+ FloatType xtol,
+ SizeType maxfev,
+ int& info,
+ SizeType& nfev,
+ FloatType* wa)
+ {
+ if (info != -1) {
+ infoc = 1;
+ if ( n == 0
+ || maxfev == 0
+ || gtol < FloatType(0)
+ || xtol < FloatType(0)
+ || stpmin < FloatType(0)
+ || stpmax < stpmin) {
+ throw error_internal_error(__FILE__, __LINE__);
+ }
+ if (stp <= FloatType(0) || ftol < FloatType(0)) {
+ throw error_internal_error(__FILE__, __LINE__);
+ }
+ // Compute the initial gradient in the search direction
+ // and check that s is a descent direction.
+ dginit = FloatType(0);
+ for (SizeType j = 0; j < n; j++) {
+ dginit += g[j] * s[is0+j];
+ }
+ if (dginit >= FloatType(0)) {
+ throw error_search_direction_not_descent();
+ }
+ brackt = false;
+ stage1 = true;
+ nfev = 0;
+ finit = f;
+ dgtest = ftol*dginit;
+ width = stpmax - stpmin;
+ width1 = FloatType(2) * width;
+ std::copy(x, x+n, wa);
+ // The variables stx, fx, dgx contain the values of the step,
+ // function, and directional derivative at the best step.
+ // The variables sty, fy, dgy contain the value of the step,
+ // function, and derivative at the other endpoint of
+ // the interval of uncertainty.
+ // The variables stp, f, dg contain the values of the step,
+ // function, and derivative at the current step.
+ stx = FloatType(0);
+ fx = finit;
+ dgx = dginit;
+ sty = FloatType(0);
+ fy = finit;
+ dgy = dginit;
+ }
+ for (;;) {
+ if (info != -1) {
+ // Set the minimum and maximum steps to correspond
+ // to the present interval of uncertainty.
+ if (brackt) {
+ stmin = std::min(stx, sty);
+ stmax = std::max(stx, sty);
+ }
+ else {
+ stmin = stx;
+ stmax = stp + FloatType(4) * (stp - stx);
+ }
+ // Force the step to be within the bounds stpmax and stpmin.
+ stp = std::max(stp, stpmin);
+ stp = std::min(stp, stpmax);
+ // If an unusual termination is to occur then let
+ // stp be the lowest point obtained so far.
+ if ( (brackt && (stp <= stmin || stp >= stmax))
+ || nfev >= maxfev - 1 || infoc == 0
+ || (brackt && stmax - stmin <= xtol * stmax)) {
+ stp = stx;
+ }
+ // Evaluate the function and gradient at stp
+ // and compute the directional derivative.
+ // We return to main program to obtain F and G.
+ for (SizeType j = 0; j < n; j++) {
+ x[j] = wa[j] + stp * s[is0+j];
+ }
+ info=-1;
+ break;
+ }
+ info = 0;
+ nfev++;
+ FloatType dg(0);
+ for (SizeType j = 0; j < n; j++) {
+ dg += g[j] * s[is0+j];
+ }
+ FloatType ftest1 = finit + stp*dgtest;
+ // Test for convergence.
+ if ((brackt && (stp <= stmin || stp >= stmax)) || infoc == 0) {
+ throw error_line_search_failed_rounding_errors(
+ "Rounding errors prevent further progress."
+ " There may not be a step which satisfies the"
+ " sufficient decrease and curvature conditions."
+ " Tolerances may be too small.");
+ }
+ if (stp == stpmax && f <= ftest1 && dg <= dgtest) {
+ throw error_line_search_failed(
+ "The step is at the upper bound stpmax().");
+ }
+ if (stp == stpmin && (f > ftest1 || dg >= dgtest)) {
+ throw error_line_search_failed(
+ "The step is at the lower bound stpmin().");
+ }
+ if (nfev >= maxfev) {
+ throw error_line_search_failed(
+ "Number of function evaluations has reached maxfev().");
+ }
+ if (brackt && stmax - stmin <= xtol * stmax) {
+ throw error_line_search_failed(
+ "Relative width of the interval of uncertainty"
+ " is at most xtol().");
+ }
+ // Check for termination.
+ if (f <= ftest1 && abs(dg) <= gtol * (-dginit)) {
+ info = 1;
+ break;
+ }
+ // In the first stage we seek a step for which the modified
+ // function has a nonpositive value and nonnegative derivative.
+ if ( stage1 && f <= ftest1
+ && dg >= std::min(ftol, gtol) * dginit) {
+ stage1 = false;
+ }
+ // A modified function is used to predict the step only if
+ // we have not obtained a step for which the modified
+ // function has a nonpositive function value and nonnegative
+ // derivative, and if a lower function value has been
+ // obtained but the decrease is not sufficient.
+ if (stage1 && f <= fx && f > ftest1) {
+ // Define the modified function and derivative values.
+ FloatType fm = f - stp*dgtest;
+ FloatType fxm = fx - stx*dgtest;
+ FloatType fym = fy - sty*dgtest;
+ FloatType dgm = dg - dgtest;
+ FloatType dgxm = dgx - dgtest;
+ FloatType dgym = dgy - dgtest;
+ // Call cstep to update the interval of uncertainty
+ // and to compute the new step.
+ infoc = mcstep(stx, fxm, dgxm, sty, fym, dgym, stp, fm, dgm,
+ brackt, stmin, stmax);
+ // Reset the function and gradient values for f.
+ fx = fxm + stx*dgtest;
+ fy = fym + sty*dgtest;
+ dgx = dgxm + dgtest;
+ dgy = dgym + dgtest;
+ }
+ else {
+ // Call mcstep to update the interval of uncertainty
+ // and to compute the new step.
+ infoc = mcstep(stx, fx, dgx, sty, fy, dgy, stp, f, dg,
+ brackt, stmin, stmax);
+ }
+ // Force a sufficient decrease in the size of the
+ // interval of uncertainty.
+ if (brackt) {
+ if (abs(sty - stx) >= FloatType(0.66) * width1) {
+ stp = stx + FloatType(0.5) * (sty - stx);
+ }
+ width1 = width;
+ width = abs(sty - stx);
+ }
+ }
+ }
+
+ template <typename FloatType, typename SizeType>
+ int mcsrch<FloatType, SizeType>::mcstep(
+ FloatType& stx,
+ FloatType& fx,
+ FloatType& dx,
+ FloatType& sty,
+ FloatType& fy,
+ FloatType& dy,
+ FloatType& stp,
+ FloatType fp,
+ FloatType dp,
+ bool& brackt,
+ FloatType stpmin,
+ FloatType stpmax)
+ {
+ bool bound;
+ FloatType gamma, p, q, r, s, sgnd, stpc, stpf, stpq, theta;
+ int info = 0;
+ if ( ( brackt && (stp <= std::min(stx, sty)
+ || stp >= std::max(stx, sty)))
+ || dx * (stp - stx) >= FloatType(0) || stpmax < stpmin) {
+ return 0;
+ }
+ // Determine if the derivatives have opposite sign.
+ sgnd = dp * (dx / abs(dx));
+ if (fp > fx) {
+ // First case. A higher function value.
+ // The minimum is bracketed. If the cubic step is closer
+ // to stx than the quadratic step, the cubic step is taken,
+ // else the average of the cubic and quadratic steps is taken.
+ info = 1;
+ bound = true;
+ theta = FloatType(3) * (fx - fp) / (stp - stx) + dx + dp;
+ s = max3(abs(theta), abs(dx), abs(dp));
+ gamma = s * std::sqrt(pow2(theta / s) - (dx / s) * (dp / s));
+ if (stp < stx) gamma = - gamma;
+ p = (gamma - dx) + theta;
+ q = ((gamma - dx) + gamma) + dp;
+ r = p/q;
+ stpc = stx + r * (stp - stx);
+ stpq = stx
+ + ((dx / ((fx - fp) / (stp - stx) + dx)) / FloatType(2))
+ * (stp - stx);
+ if (abs(stpc - stx) < abs(stpq - stx)) {
+ stpf = stpc;
+ }
+ else {
+ stpf = stpc + (stpq - stpc) / FloatType(2);
+ }
+ brackt = true;
+ }
+ else if (sgnd < FloatType(0)) {
+ // Second case. A lower function value and derivatives of
+ // opposite sign. The minimum is bracketed. If the cubic
+ // step is closer to stx than the quadratic (secant) step,
+ // the cubic step is taken, else the quadratic step is taken.
+ info = 2;
+ bound = false;
+ theta = FloatType(3) * (fx - fp) / (stp - stx) + dx + dp;
+ s = max3(abs(theta), abs(dx), abs(dp));
+ gamma = s * std::sqrt(pow2(theta / s) - (dx / s) * (dp / s));
+ if (stp > stx) gamma = - gamma;
+ p = (gamma - dp) + theta;
+ q = ((gamma - dp) + gamma) + dx;
+ r = p/q;
+ stpc = stp + r * (stx - stp);
+ stpq = stp + (dp / (dp - dx)) * (stx - stp);
+ if (abs(stpc - stp) > abs(stpq - stp)) {
+ stpf = stpc;
+ }
+ else {
+ stpf = stpq;
+ }
+ brackt = true;
+ }
+ else if (abs(dp) < abs(dx)) {
+ // Third case. A lower function value, derivatives of the
+ // same sign, and the magnitude of the derivative decreases.
+ // The cubic step is only used if the cubic tends to infinity
+ // in the direction of the step or if the minimum of the cubic
+ // is beyond stp. Otherwise the cubic step is defined to be
+ // either stpmin or stpmax. The quadratic (secant) step is also
+ // computed and if the minimum is bracketed then the the step
+ // closest to stx is taken, else the step farthest away is taken.
+ info = 3;
+ bound = true;
+ theta = FloatType(3) * (fx - fp) / (stp - stx) + dx + dp;
+ s = max3(abs(theta), abs(dx), abs(dp));
+ gamma = s * std::sqrt(
+ std::max(FloatType(0), pow2(theta / s) - (dx / s) * (dp / s)));
+ if (stp > stx) gamma = -gamma;
+ p = (gamma - dp) + theta;
+ q = (gamma + (dx - dp)) + gamma;
+ r = p/q;
+ if (r < FloatType(0) && gamma != FloatType(0)) {
+ stpc = stp + r * (stx - stp);
+ }
+ else if (stp > stx) {
+ stpc = stpmax;
+ }
+ else {
+ stpc = stpmin;
+ }
+ stpq = stp + (dp / (dp - dx)) * (stx - stp);
+ if (brackt) {
+ if (abs(stp - stpc) < abs(stp - stpq)) {
+ stpf = stpc;
+ }
+ else {
+ stpf = stpq;
+ }
+ }
+ else {
+ if (abs(stp - stpc) > abs(stp - stpq)) {
+ stpf = stpc;
+ }
+ else {
+ stpf = stpq;
+ }
+ }
+ }
+ else {
+ // Fourth case. A lower function value, derivatives of the
+ // same sign, and the magnitude of the derivative does
+ // not decrease. If the minimum is not bracketed, the step
+ // is either stpmin or stpmax, else the cubic step is taken.
+ info = 4;
+ bound = false;
+ if (brackt) {
+ theta = FloatType(3) * (fp - fy) / (sty - stp) + dy + dp;
+ s = max3(abs(theta), abs(dy), abs(dp));
+ gamma = s * std::sqrt(pow2(theta / s) - (dy / s) * (dp / s));
+ if (stp > sty) gamma = -gamma;
+ p = (gamma - dp) + theta;
+ q = ((gamma - dp) + gamma) + dy;
+ r = p/q;
+ stpc = stp + r * (sty - stp);
+ stpf = stpc;
+ }
+ else if (stp > stx) {
+ stpf = stpmax;
+ }
+ else {
+ stpf = stpmin;
+ }
+ }
+ // Update the interval of uncertainty. This update does not
+ // depend on the new step or the case analysis above.
+ if (fp > fx) {
+ sty = stp;
+ fy = fp;
+ dy = dp;
+ }
+ else {
+ if (sgnd < FloatType(0)) {
+ sty = stx;
+ fy = fx;
+ dy = dx;
+ }
+ stx = stp;
+ fx = fp;
+ dx = dp;
+ }
+ // Compute the new step and safeguard it.
+ stpf = std::min(stpmax, stpf);
+ stpf = std::max(stpmin, stpf);
+ stp = stpf;
+ if (brackt && bound) {
+ if (sty > stx) {
+ stp = std::min(stx + FloatType(0.66) * (sty - stx), stp);
+ }
+ else {
+ stp = std::max(stx + FloatType(0.66) * (sty - stx), stp);
+ }
+ }
+ return info;
+ }
+
+ /* Compute the sum of a vector times a scalar plus another vector.
+ Adapted from the subroutine <code>daxpy</code> in
+ <code>lbfgs.f</code>.
+ */
+ template <typename FloatType, typename SizeType>
+ void daxpy(
+ SizeType n,
+ FloatType da,
+ const FloatType* dx,
+ SizeType ix0,
+ SizeType incx,
+ FloatType* dy,
+ SizeType iy0,
+ SizeType incy)
+ {
+ SizeType i, ix, iy, m;
+ if (n == 0) return;
+ if (da == FloatType(0)) return;
+ if (!(incx == 1 && incy == 1)) {
+ ix = 0;
+ iy = 0;
+ for (i = 0; i < n; i++) {
+ dy[iy0+iy] += da * dx[ix0+ix];
+ ix += incx;
+ iy += incy;
+ }
+ return;
+ }
+ m = n % 4;
+ for (i = 0; i < m; i++) {
+ dy[iy0+i] += da * dx[ix0+i];
+ }
+ for (; i < n;) {
+ dy[iy0+i] += da * dx[ix0+i]; i++;
+ dy[iy0+i] += da * dx[ix0+i]; i++;
+ dy[iy0+i] += da * dx[ix0+i]; i++;
+ dy[iy0+i] += da * dx[ix0+i]; i++;
+ }
+ }
+
+ template <typename FloatType, typename SizeType>
+ inline
+ void daxpy(
+ SizeType n,
+ FloatType da,
+ const FloatType* dx,
+ SizeType ix0,
+ FloatType* dy)
+ {
+ daxpy(n, da, dx, ix0, SizeType(1), dy, SizeType(0), SizeType(1));
+ }
+
+ /* Compute the dot product of two vectors.
+ Adapted from the subroutine <code>ddot</code>
+ in <code>lbfgs.f</code>.
+ */
+ template <typename FloatType, typename SizeType>
+ FloatType ddot(
+ SizeType n,
+ const FloatType* dx,
+ SizeType ix0,
+ SizeType incx,
+ const FloatType* dy,
+ SizeType iy0,
+ SizeType incy)
+ {
+ SizeType i, ix, iy, m;
+ FloatType dtemp(0);
+ if (n == 0) return FloatType(0);
+ if (!(incx == 1 && incy == 1)) {
+ ix = 0;
+ iy = 0;
+ for (i = 0; i < n; i++) {
+ dtemp += dx[ix0+ix] * dy[iy0+iy];
+ ix += incx;
+ iy += incy;
+ }
+ return dtemp;
+ }
+ m = n % 5;
+ for (i = 0; i < m; i++) {
+ dtemp += dx[ix0+i] * dy[iy0+i];
+ }
+ for (; i < n;) {
+ dtemp += dx[ix0+i] * dy[iy0+i]; i++;
+ dtemp += dx[ix0+i] * dy[iy0+i]; i++;
+ dtemp += dx[ix0+i] * dy[iy0+i]; i++;
+ dtemp += dx[ix0+i] * dy[iy0+i]; i++;
+ dtemp += dx[ix0+i] * dy[iy0+i]; i++;
+ }
+ return dtemp;
+ }
+
+ template <typename FloatType, typename SizeType>
+ inline
+ FloatType ddot(
+ SizeType n,
+ const FloatType* dx,
+ const FloatType* dy)
+ {
+ return ddot(
+ n, dx, SizeType(0), SizeType(1), dy, SizeType(0), SizeType(1));
+ }
+
+ } // namespace detail
+
+ //! Interface to the LBFGS %minimizer.
+ /*! This class solves the unconstrained minimization problem
+ <pre>
+ min f(x), x = (x1,x2,...,x_n),
+ </pre>
+ using the limited-memory BFGS method. The routine is
+ especially effective on problems involving a large number of
+ variables. In a typical iteration of this method an
+ approximation Hk to the inverse of the Hessian
+ is obtained by applying <code>m</code> BFGS updates to a
+ diagonal matrix Hk0, using information from the
+ previous <code>m</code> steps. The user specifies the number
+ <code>m</code>, which determines the amount of storage
+ required by the routine. The user may also provide the
+ diagonal matrices Hk0 (parameter <code>diag</code> in the run()
+ function) if not satisfied with the default choice. The
+ algorithm is described in "On the limited memory BFGS method for
+ large scale optimization", by D. Liu and J. Nocedal, Mathematical
+ Programming B 45 (1989) 503-528.
+
+ The user is required to calculate the function value
+ <code>f</code> and its gradient <code>g</code>. In order to
+ allow the user complete control over these computations,
+ reverse communication is used. The routine must be called
+ repeatedly under the control of the member functions
+ <code>requests_f_and_g()</code>,
+ <code>requests_diag()</code>.
+ If neither requests_f_and_g() nor requests_diag() is
+ <code>true</code> the user should check for convergence
+ (using class traditional_convergence_test or any
+ other custom test). If the convergence test is negative,
+ the minimizer may be called again for the next iteration.
+
+ The steplength (stp()) is determined at each iteration
+ by means of the line search routine <code>mcsrch</code>, which is
+ a slight modification of the routine <code>CSRCH</code> written
+ by More' and Thuente.
+
+ The only variables that are machine-dependent are
+ <code>xtol</code>,
+ <code>stpmin</code> and
+ <code>stpmax</code>.
+
+ Fatal errors cause <code>error</code> exceptions to be thrown.
+ The generic class <code>error</code> is sub-classed (e.g.
+ class <code>error_line_search_failed</code>) to facilitate
+ granular %error handling.
+
+ A note on performance: Using Compaq Fortran V5.4 and
+ Compaq C++ V6.5, the C++ implementation is about 15% slower
+ than the Fortran implementation.
+ */
+ template <typename FloatType, typename SizeType = std::size_t>
+ class minimizer
+ {
+ public:
+ //! Default constructor. Some members are not initialized!
+ minimizer()
+ : n_(0), m_(0), maxfev_(0),
+ gtol_(0), xtol_(0),
+ stpmin_(0), stpmax_(0),
+ ispt(0), iypt(0)
+ {}
+
+ //! Constructor.
+ /*! @param n The number of variables in the minimization problem.
+ Restriction: <code>n &gt; 0</code>.
+
+ @param m The number of corrections used in the BFGS update.
+ Values of <code>m</code> less than 3 are not recommended;
+ large values of <code>m</code> will result in excessive
+ computing time. <code>3 &lt;= m &lt;= 7</code> is
+ recommended.
+ Restriction: <code>m &gt; 0</code>.
+
+ @param maxfev Maximum number of function evaluations
+ <b>per line search</b>.
+ Termination occurs when the number of evaluations
+ of the objective function is at least <code>maxfev</code> by
+ the end of an iteration.
+
+ @param gtol Controls the accuracy of the line search.
+ If the function and gradient evaluations are inexpensive with
+ respect to the cost of the iteration (which is sometimes the
+ case when solving very large problems) it may be advantageous
+ to set <code>gtol</code> to a small value. A typical small
+ value is 0.1.
+ Restriction: <code>gtol</code> should be greater than 1e-4.
+
+ @param xtol An estimate of the machine precision (e.g. 10e-16
+ on a SUN station 3/60). The line search routine will
+ terminate if the relative width of the interval of
+ uncertainty is less than <code>xtol</code>.
+
+ @param stpmin Specifies the lower bound for the step
+ in the line search.
+ The default value is 1e-20. This value need not be modified
+ unless the exponent is too large for the machine being used,
+ or unless the problem is extremely badly scaled (in which
+ case the exponent should be increased).
+
+ @param stpmax specifies the upper bound for the step
+ in the line search.
+ The default value is 1e20. This value need not be modified
+ unless the exponent is too large for the machine being used,
+ or unless the problem is extremely badly scaled (in which
+ case the exponent should be increased).
+ */
+ explicit
+ minimizer(
+ SizeType n,
+ SizeType m = 5,
+ SizeType maxfev = 20,
+ FloatType gtol = FloatType(0.9),
+ FloatType xtol = FloatType(1.e-16),
+ FloatType stpmin = FloatType(1.e-20),
+ FloatType stpmax = FloatType(1.e20))
+ : n_(n), m_(m), maxfev_(maxfev),
+ gtol_(gtol), xtol_(xtol),
+ stpmin_(stpmin), stpmax_(stpmax),
+ iflag_(0), requests_f_and_g_(false), requests_diag_(false),
+ iter_(0), nfun_(0), stp_(0),
+ stp1(0), ftol(0.0001), ys(0), point(0), npt(0),
+ ispt(n+2*m), iypt((n+2*m)+n*m),
+ info(0), bound(0), nfev(0)
+ {
+ if (n_ == 0) {
+ throw error_improper_input_parameter("n = 0.");
+ }
+ if (m_ == 0) {
+ throw error_improper_input_parameter("m = 0.");
+ }
+ if (maxfev_ == 0) {
+ throw error_improper_input_parameter("maxfev = 0.");
+ }
+ if (gtol_ <= FloatType(1.e-4)) {
+ throw error_improper_input_parameter("gtol <= 1.e-4.");
+ }
+ if (xtol_ < FloatType(0)) {
+ throw error_improper_input_parameter("xtol < 0.");
+ }
+ if (stpmin_ < FloatType(0)) {
+ throw error_improper_input_parameter("stpmin < 0.");
+ }
+ if (stpmax_ < stpmin) {
+ throw error_improper_input_parameter("stpmax < stpmin");
+ }
+ w_.resize(n_*(2*m_+1)+2*m_);
+ scratch_array_.resize(n_);
+ }
+
+ //! Number of free parameters (as passed to the constructor).
+ SizeType n() const { return n_; }
+
+ //! Number of corrections kept (as passed to the constructor).
+ SizeType m() const { return m_; }
+
+ /*! \brief Maximum number of evaluations of the objective function
+ per line search (as passed to the constructor).
+ */
+ SizeType maxfev() const { return maxfev_; }
+
+ /*! \brief Control of the accuracy of the line search.
+ (as passed to the constructor).
+ */
+ FloatType gtol() const { return gtol_; }
+
+ //! Estimate of the machine precision (as passed to the constructor).
+ FloatType xtol() const { return xtol_; }
+
+ /*! \brief Lower bound for the step in the line search.
+ (as passed to the constructor).
+ */
+ FloatType stpmin() const { return stpmin_; }
+
+ /*! \brief Upper bound for the step in the line search.
+ (as passed to the constructor).
+ */
+ FloatType stpmax() const { return stpmax_; }
+
+ //! Status indicator for reverse communication.
+ /*! <code>true</code> if the run() function returns to request
+ evaluation of the objective function (<code>f</code>) and
+ gradients (<code>g</code>) for the current point
+ (<code>x</code>). To continue the minimization the
+ run() function is called again with the updated values for
+ <code>f</code> and <code>g</code>.
+ <p>
+ See also: requests_diag()
+ */
+ bool requests_f_and_g() const { return requests_f_and_g_; }
+
+ //! Status indicator for reverse communication.
+ /*! <code>true</code> if the run() function returns to request
+ evaluation of the diagonal matrix (<code>diag</code>)
+ for the current point (<code>x</code>).
+ To continue the minimization the run() function is called
+ again with the updated values for <code>diag</code>.
+ <p>
+ See also: requests_f_and_g()
+ */
+ bool requests_diag() const { return requests_diag_; }
+
+ //! Number of iterations so far.
+ /*! Note that one iteration may involve multiple evaluations
+ of the objective function.
+ <p>
+ See also: nfun()
+ */
+ SizeType iter() const { return iter_; }
+
+ //! Total number of evaluations of the objective function so far.
+ /*! The total number of function evaluations increases by the
+ number of evaluations required for the line search. The total
+ is only increased after a successful line search.
+ <p>
+ See also: iter()
+ */
+ SizeType nfun() const { return nfun_; }
+
+ //! Norm of gradient given gradient array of length n().
+ FloatType euclidean_norm(const FloatType* a) const {
+ return std::sqrt(detail::ddot(n_, a, a));
+ }
+
+ //! Current stepsize.
+ FloatType stp() const { return stp_; }
+
+ //! Execution of one step of the minimization.
+ /*! @param x On initial entry this must be set by the user to
+ the values of the initial estimate of the solution vector.
+
+ @param f Before initial entry or on re-entry under the
+ control of requests_f_and_g(), <code>f</code> must be set
+ by the user to contain the value of the objective function
+ at the current point <code>x</code>.
+
+ @param g Before initial entry or on re-entry under the
+ control of requests_f_and_g(), <code>g</code> must be set
+ by the user to contain the components of the gradient at
+ the current point <code>x</code>.
+
+ The return value is <code>true</code> if either
+ requests_f_and_g() or requests_diag() is <code>true</code>.
+ Otherwise the user should check for convergence
+ (e.g. using class traditional_convergence_test) and
+ call the run() function again to continue the minimization.
+ If the return value is <code>false</code> the user
+ should <b>not</b> update <code>f</code>, <code>g</code> or
+ <code>diag</code> (other overload) before calling
+ the run() function again.
+
+ Note that <code>x</code> is always modified by the run()
+ function. Depending on the situation it can therefore be
+ necessary to evaluate the objective function one more time
+ after the minimization is terminated.
+ */
+ bool run(
+ FloatType* x,
+ FloatType f,
+ const FloatType* g)
+ {
+ return generic_run(x, f, g, false, 0);
+ }
+
+ //! Execution of one step of the minimization.
+ /*! @param x See other overload.
+
+ @param f See other overload.
+
+ @param g See other overload.
+
+ @param diag On initial entry or on re-entry under the
+ control of requests_diag(), <code>diag</code> must be set by
+ the user to contain the values of the diagonal matrix Hk0.
+ The routine will return at each iteration of the algorithm
+ with requests_diag() set to <code>true</code>.
+ <p>
+ Restriction: all elements of <code>diag</code> must be
+ positive.
+ */
+ bool run(
+ FloatType* x,
+ FloatType f,
+ const FloatType* g,
+ const FloatType* diag)
+ {
+ return generic_run(x, f, g, true, diag);
+ }
+
+ void serialize(std::ostream* out) const {
+ out->write((const char*)&n_, sizeof(n_)); // sanity check
+ out->write((const char*)&m_, sizeof(m_)); // sanity check
+ SizeType fs = sizeof(FloatType);
+ out->write((const char*)&fs, sizeof(fs)); // sanity check
+
+ mcsrch_instance.serialize(out);
+ out->write((const char*)&iflag_, sizeof(iflag_));
+ out->write((const char*)&requests_f_and_g_, sizeof(requests_f_and_g_));
+ out->write((const char*)&requests_diag_, sizeof(requests_diag_));
+ out->write((const char*)&iter_, sizeof(iter_));
+ out->write((const char*)&nfun_, sizeof(nfun_));
+ out->write((const char*)&stp_, sizeof(stp_));
+ out->write((const char*)&stp1, sizeof(stp1));
+ out->write((const char*)&ftol, sizeof(ftol));
+ out->write((const char*)&ys, sizeof(ys));
+ out->write((const char*)&point, sizeof(point));
+ out->write((const char*)&npt, sizeof(npt));
+ out->write((const char*)&info, sizeof(info));
+ out->write((const char*)&bound, sizeof(bound));
+ out->write((const char*)&nfev, sizeof(nfev));
+ out->write((const char*)&w_[0], sizeof(FloatType) * w_.size());
+ out->write((const char*)&scratch_array_[0], sizeof(FloatType) * scratch_array_.size());
+ }
+
+ void deserialize(std::istream* in) {
+ SizeType n, m, fs;
+ in->read((char*)&n, sizeof(n));
+ in->read((char*)&m, sizeof(m));
+ in->read((char*)&fs, sizeof(fs));
+ assert(n == n_);
+ assert(m == m_);
+ assert(fs == sizeof(FloatType));
+
+ mcsrch_instance.deserialize(in);
+ in->read((char*)&iflag_, sizeof(iflag_));
+ in->read((char*)&requests_f_and_g_, sizeof(requests_f_and_g_));
+ in->read((char*)&requests_diag_, sizeof(requests_diag_));
+ in->read((char*)&iter_, sizeof(iter_));
+ in->read((char*)&nfun_, sizeof(nfun_));
+ in->read((char*)&stp_, sizeof(stp_));
+ in->read((char*)&stp1, sizeof(stp1));
+ in->read((char*)&ftol, sizeof(ftol));
+ in->read((char*)&ys, sizeof(ys));
+ in->read((char*)&point, sizeof(point));
+ in->read((char*)&npt, sizeof(npt));
+ in->read((char*)&info, sizeof(info));
+ in->read((char*)&bound, sizeof(bound));
+ in->read((char*)&nfev, sizeof(nfev));
+ in->read((char*)&w_[0], sizeof(FloatType) * w_.size());
+ in->read((char*)&scratch_array_[0], sizeof(FloatType) * scratch_array_.size());
+ }
+
+ protected:
+ static void throw_diagonal_element_not_positive(SizeType i) {
+ throw error_improper_input_data(
+ "The " + error::itoa(i) + ". diagonal element of the"
+ " inverse Hessian approximation is not positive.");
+ }
+
+ bool generic_run(
+ FloatType* x,
+ FloatType f,
+ const FloatType* g,
+ bool diagco,
+ const FloatType* diag);
+
+ detail::mcsrch<FloatType, SizeType> mcsrch_instance;
+ const SizeType n_;
+ const SizeType m_;
+ const SizeType maxfev_;
+ const FloatType gtol_;
+ const FloatType xtol_;
+ const FloatType stpmin_;
+ const FloatType stpmax_;
+ int iflag_;
+ bool requests_f_and_g_;
+ bool requests_diag_;
+ SizeType iter_;
+ SizeType nfun_;
+ FloatType stp_;
+ FloatType stp1;
+ FloatType ftol;
+ FloatType ys;
+ SizeType point;
+ SizeType npt;
+ const SizeType ispt;
+ const SizeType iypt;
+ int info;
+ SizeType bound;
+ SizeType nfev;
+ std::vector<FloatType> w_;
+ std::vector<FloatType> scratch_array_;
+ };
+
+ template <typename FloatType, typename SizeType>
+ bool minimizer<FloatType, SizeType>::generic_run(
+ FloatType* x,
+ FloatType f,
+ const FloatType* g,
+ bool diagco,
+ const FloatType* diag)
+ {
+ bool execute_entire_while_loop = false;
+ if (!(requests_f_and_g_ || requests_diag_)) {
+ execute_entire_while_loop = true;
+ }
+ requests_f_and_g_ = false;
+ requests_diag_ = false;
+ FloatType* w = &(*(w_.begin()));
+ if (iflag_ == 0) { // Initialize.
+ nfun_ = 1;
+ if (diagco) {
+ for (SizeType i = 0; i < n_; i++) {
+ if (diag[i] <= FloatType(0)) {
+ throw_diagonal_element_not_positive(i);
+ }
+ }
+ }
+ else {
+ std::fill_n(scratch_array_.begin(), n_, FloatType(1));
+ diag = &(*(scratch_array_.begin()));
+ }
+ for (SizeType i = 0; i < n_; i++) {
+ w[ispt + i] = -g[i] * diag[i];
+ }
+ FloatType gnorm = std::sqrt(detail::ddot(n_, g, g));
+ if (gnorm == FloatType(0)) return false;
+ stp1 = FloatType(1) / gnorm;
+ execute_entire_while_loop = true;
+ }
+ if (execute_entire_while_loop) {
+ bound = iter_;
+ iter_++;
+ info = 0;
+ if (iter_ != 1) {
+ if (iter_ > m_) bound = m_;
+ ys = detail::ddot(
+ n_, w, iypt + npt, SizeType(1), w, ispt + npt, SizeType(1));
+ if (!diagco) {
+ FloatType yy = detail::ddot(
+ n_, w, iypt + npt, SizeType(1), w, iypt + npt, SizeType(1));
+ std::fill_n(scratch_array_.begin(), n_, ys / yy);
+ diag = &(*(scratch_array_.begin()));
+ }
+ else {
+ iflag_ = 2;
+ requests_diag_ = true;
+ return true;
+ }
+ }
+ }
+ if (execute_entire_while_loop || iflag_ == 2) {
+ if (iter_ != 1) {
+ if (diag == 0) {
+ throw error_internal_error(__FILE__, __LINE__);
+ }
+ if (diagco) {
+ for (SizeType i = 0; i < n_; i++) {
+ if (diag[i] <= FloatType(0)) {
+ throw_diagonal_element_not_positive(i);
+ }
+ }
+ }
+ SizeType cp = point;
+ if (point == 0) cp = m_;
+ w[n_ + cp -1] = 1 / ys;
+ SizeType i;
+ for (i = 0; i < n_; i++) {
+ w[i] = -g[i];
+ }
+ cp = point;
+ for (i = 0; i < bound; i++) {
+ if (cp == 0) cp = m_;
+ cp--;
+ FloatType sq = detail::ddot(
+ n_, w, ispt + cp * n_, SizeType(1), w, SizeType(0), SizeType(1));
+ SizeType inmc=n_+m_+cp;
+ SizeType iycn=iypt+cp*n_;
+ w[inmc] = w[n_ + cp] * sq;
+ detail::daxpy(n_, -w[inmc], w, iycn, w);
+ }
+ for (i = 0; i < n_; i++) {
+ w[i] *= diag[i];
+ }
+ for (i = 0; i < bound; i++) {
+ FloatType yr = detail::ddot(
+ n_, w, iypt + cp * n_, SizeType(1), w, SizeType(0), SizeType(1));
+ FloatType beta = w[n_ + cp] * yr;
+ SizeType inmc=n_+m_+cp;
+ beta = w[inmc] - beta;
+ SizeType iscn=ispt+cp*n_;
+ detail::daxpy(n_, beta, w, iscn, w);
+ cp++;
+ if (cp == m_) cp = 0;
+ }
+ std::copy(w, w+n_, w+(ispt + point * n_));
+ }
+ stp_ = FloatType(1);
+ if (iter_ == 1) stp_ = stp1;
+ std::copy(g, g+n_, w);
+ }
+ mcsrch_instance.run(
+ gtol_, stpmin_, stpmax_, n_, x, f, g, w, ispt + point * n_,
+ stp_, ftol, xtol_, maxfev_, info, nfev, &(*(scratch_array_.begin())));
+ if (info == -1) {
+ iflag_ = 1;
+ requests_f_and_g_ = true;
+ return true;
+ }
+ if (info != 1) {
+ throw error_internal_error(__FILE__, __LINE__);
+ }
+ nfun_ += nfev;
+ npt = point*n_;
+ for (SizeType i = 0; i < n_; i++) {
+ w[ispt + npt + i] = stp_ * w[ispt + npt + i];
+ w[iypt + npt + i] = g[i] - w[i];
+ }
+ point++;
+ if (point == m_) point = 0;
+ return false;
+ }
+
+ //! Traditional LBFGS convergence test.
+ /*! This convergence test is equivalent to the test embedded
+ in the <code>lbfgs.f</code> Fortran code. The test assumes that
+ there is a meaningful relation between the Euclidean norm of the
+ parameter vector <code>x</code> and the norm of the gradient
+ vector <code>g</code>. Therefore this test should not be used if
+ this assumption is not correct for a given problem.
+ */
+ template <typename FloatType, typename SizeType = std::size_t>
+ class traditional_convergence_test
+ {
+ public:
+ //! Default constructor.
+ traditional_convergence_test()
+ : n_(0), eps_(0)
+ {}
+
+ //! Constructor.
+ /*! @param n The number of variables in the minimization problem.
+ Restriction: <code>n &gt; 0</code>.
+
+ @param eps Determines the accuracy with which the solution
+ is to be found.
+ */
+ explicit
+ traditional_convergence_test(
+ SizeType n,
+ FloatType eps = FloatType(1.e-5))
+ : n_(n), eps_(eps)
+ {
+ if (n_ == 0) {
+ throw error_improper_input_parameter("n = 0.");
+ }
+ if (eps_ < FloatType(0)) {
+ throw error_improper_input_parameter("eps < 0.");
+ }
+ }
+
+ //! Number of free parameters (as passed to the constructor).
+ SizeType n() const { return n_; }
+
+ /*! \brief Accuracy with which the solution is to be found
+ (as passed to the constructor).
+ */
+ FloatType eps() const { return eps_; }
+
+ //! Execution of the convergence test for the given parameters.
+ /*! Returns <code>true</code> if
+ <pre>
+ ||g|| &lt; eps * max(1,||x||),
+ </pre>
+ where <code>||.||</code> denotes the Euclidean norm.
+
+ @param x Current solution vector.
+
+ @param g Components of the gradient at the current
+ point <code>x</code>.
+ */
+ bool
+ operator()(const FloatType* x, const FloatType* g) const
+ {
+ FloatType xnorm = std::sqrt(detail::ddot(n_, x, x));
+ FloatType gnorm = std::sqrt(detail::ddot(n_, g, g));
+ if (gnorm <= eps_ * std::max(FloatType(1), xnorm)) return true;
+ return false;
+ }
+ protected:
+ const SizeType n_;
+ const FloatType eps_;
+ };
+
+}} // namespace scitbx::lbfgs
+
+template <typename T>
+std::ostream& operator<<(std::ostream& os, const scitbx::lbfgs::minimizer<T>& min) {
+ return os << "ITER=" << min.iter() << "\tNFUN=" << min.nfun() << "\tSTP=" << min.stp() << "\tDIAG=" << min.requests_diag() << "\tF&G=" << min.requests_f_and_g();
+}
+
+
+#endif // SCITBX_LBFGS_H