1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
#include <vector>
#include <iostream>
#include <cmath>
#include <stdio.h>
#include "mathvec.h"
#include "lbfgs.h"
#include "maxent.h"
using namespace std;
const static int M = LBFGS_M;
const static double LINE_SEARCH_ALPHA = 0.1;
const static double LINE_SEARCH_BETA = 0.5;
// stopping criteria
int OWLQN_MAX_ITER = 300;
const static double MIN_GRAD_NORM = 0.0001;
Vec approximate_Hg(const int iter, const Vec& grad, const Vec s[],
const Vec y[], const double z[]);
inline int sign(double x) {
if (x > 0) return 1;
if (x < 0) return -1;
return 0;
};
static Vec pseudo_gradient(const Vec& x, const Vec& grad0, const double C) {
Vec grad = grad0;
for (size_t i = 0; i < x.Size(); i++) {
if (x[i] != 0) {
grad[i] += C * sign(x[i]);
continue;
}
const double gm = grad0[i] - C;
if (gm > 0) {
grad[i] = gm;
continue;
}
const double gp = grad0[i] + C;
if (gp < 0) {
grad[i] = gp;
continue;
}
grad[i] = 0;
}
return grad;
}
double ME_Model::regularized_func_grad(const double C, const Vec& x,
Vec& grad) {
double f = FunctionGradient(x.STLVec(), grad.STLVec());
for (size_t i = 0; i < x.Size(); i++) {
f += C * fabs(x[i]);
}
return f;
}
double ME_Model::constrained_line_search(double C, const Vec& x0,
const Vec& grad0, const double f0,
const Vec& dx, Vec& x, Vec& grad1) {
// compute the orthant to explore
Vec orthant = x0;
for (size_t i = 0; i < orthant.Size(); i++) {
if (orthant[i] == 0) orthant[i] = -grad0[i];
}
double t = 1.0 / LINE_SEARCH_BETA;
double f;
do {
t *= LINE_SEARCH_BETA;
x = x0 + t * dx;
x.Project(orthant);
// for (size_t i = 0; i < x.Size(); i++) {
// if (x0[i] != 0 && sign(x[i]) != sign(x0[i])) x[i] = 0;
// }
f = regularized_func_grad(C, x, grad1);
// cout << "*";
} while (f > f0 + LINE_SEARCH_ALPHA * dot_product(x - x0, grad0));
return f;
}
vector<double> ME_Model::perform_OWLQN(const vector<double>& x0,
const double C) {
const size_t dim = x0.size();
Vec x = x0;
Vec grad(dim), dx(dim);
double f = regularized_func_grad(C, x, grad);
Vec s[M], y[M];
double z[M]; // rho
for (int iter = 0; iter < OWLQN_MAX_ITER; iter++) {
Vec pg = pseudo_gradient(x, grad, C);
fprintf(stderr, "%3d obj(err) = %f (%6.4f)", iter + 1, -f, _train_error);
if (_nheldout > 0) {
const double heldout_logl = heldout_likelihood();
fprintf(stderr, " heldout_logl(err) = %f (%6.4f)", heldout_logl,
_heldout_error);
}
fprintf(stderr, "\n");
if (sqrt(dot_product(pg, pg)) < MIN_GRAD_NORM) break;
dx = -1 * approximate_Hg(iter, pg, s, y, z);
if (dot_product(dx, pg) >= 0) dx.Project(-1 * pg);
Vec x1(dim), grad1(dim);
f = constrained_line_search(C, x, pg, f, dx, x1, grad1);
s[iter % M] = x1 - x;
y[iter % M] = grad1 - grad;
z[iter % M] = 1.0 / dot_product(y[iter % M], s[iter % M]);
x = x1;
grad = grad1;
}
return x.STLVec();
}
|