1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
#include <vector>
#include <iostream>
#include <cmath>
#include <stdio.h>
#include "mathvec.h"
#include "lbfgs.h"
#include "maxent.h"
using namespace std;
const static int M = LBFGS_M;
const static double LINE_SEARCH_ALPHA = 0.1;
const static double LINE_SEARCH_BETA = 0.5;
// stopping criteria
int LBFGS_MAX_ITER = 300;
const static double MIN_GRAD_NORM = 0.0001;
double ME_Model::backtracking_line_search(const Vec& x0, const Vec& grad0,
const double f0, const Vec& dx,
Vec& x, Vec& grad1) {
double t = 1.0 / LINE_SEARCH_BETA;
double f;
do {
t *= LINE_SEARCH_BETA;
x = x0 + t * dx;
f = FunctionGradient(x.STLVec(), grad1.STLVec());
// cout << "*";
} while (f > f0 + LINE_SEARCH_ALPHA * t * dot_product(dx, grad0));
return f;
}
//
// Jorge Nocedal, "Updating Quasi-Newton Matrices With Limited Storage",
// Mathematics of Computation, Vol. 35, No. 151, pp. 773-782, 1980.
//
Vec approximate_Hg(const int iter, const Vec& grad, const Vec s[],
const Vec y[], const double z[]) {
int offset, bound;
if (iter <= M) {
offset = 0;
bound = iter;
} else {
offset = iter - M;
bound = M;
}
Vec q = grad;
double alpha[M], beta[M];
for (int i = bound - 1; i >= 0; i--) {
const int j = (i + offset) % M;
alpha[i] = z[j] * dot_product(s[j], q);
q += -alpha[i] * y[j];
}
if (iter > 0) {
const int j = (iter - 1) % M;
const double gamma = ((1.0 / z[j]) / dot_product(y[j], y[j]));
// static double gamma;
// if (gamma == 0) gamma = ((1.0 / z[j]) / dot_product(y[j], y[j]));
q *= gamma;
}
for (int i = 0; i <= bound - 1; i++) {
const int j = (i + offset) % M;
beta[i] = z[j] * dot_product(y[j], q);
q += s[j] * (alpha[i] - beta[i]);
}
return q;
}
vector<double> ME_Model::perform_LBFGS(const vector<double>& x0) {
const size_t dim = x0.size();
Vec x = x0;
Vec grad(dim), dx(dim);
double f = FunctionGradient(x.STLVec(), grad.STLVec());
Vec s[M], y[M];
double z[M]; // rho
for (int iter = 0; iter < LBFGS_MAX_ITER; iter++) {
fprintf(stderr, "%3d obj(err) = %f (%6.4f)", iter + 1, -f, _train_error);
if (_nheldout > 0) {
const double heldout_logl = heldout_likelihood();
fprintf(stderr, " heldout_logl(err) = %f (%6.4f)", heldout_logl,
_heldout_error);
}
fprintf(stderr, "\n");
if (sqrt(dot_product(grad, grad)) < MIN_GRAD_NORM) break;
dx = -1 * approximate_Hg(iter, grad, s, y, z);
Vec x1(dim), grad1(dim);
f = backtracking_line_search(x, grad, f, dx, x1, grad1);
s[iter % M] = x1 - x;
y[iter % M] = grad1 - grad;
z[iter % M] = 1.0 / dot_product(y[iter % M], s[iter % M]);
x = x1;
grad = grad1;
}
return x.STLVec();
}
|