summaryrefslogtreecommitdiff
path: root/utils/synutils/maxent-3.0/sgd.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'utils/synutils/maxent-3.0/sgd.cpp')
-rw-r--r--utils/synutils/maxent-3.0/sgd.cpp147
1 files changed, 80 insertions, 67 deletions
diff --git a/utils/synutils/maxent-3.0/sgd.cpp b/utils/synutils/maxent-3.0/sgd.cpp
index 6d28c23f..8613edca 100644
--- a/utils/synutils/maxent-3.0/sgd.cpp
+++ b/utils/synutils/maxent-3.0/sgd.cpp
@@ -4,23 +4,20 @@
using namespace std;
-//const double SGD_ETA0 = 1;
-//const double SGD_ITER = 30;
-//const double SGD_ALPHA = 0.85;
-
+// const double SGD_ETA0 = 1;
+// const double SGD_ITER = 30;
+// const double SGD_ALPHA = 0.85;
//#define FOLOS_NAIVE
//#define FOLOS_LAZY
#define SGD_CP
-inline void
-apply_l1_penalty(const int i, const double u,
- vector<double> & _vl, vector<double> & q)
-{
- double & w = _vl[i];
+inline void apply_l1_penalty(const int i, const double u, vector<double>& _vl,
+ vector<double>& q) {
+ double& w = _vl[i];
const double z = w;
- double & qi = q[i];
- if (w > 0) {
+ double& qi = q[i];
+ if (w > 0) {
w = max(0.0, w - (u + qi));
} else if (w < 0) {
w = min(0.0, w + (u - qi));
@@ -28,31 +25,29 @@ apply_l1_penalty(const int i, const double u,
qi += w - z;
}
-static double
-l1norm(const vector<double>& v)
-{
+static double l1norm(const vector<double>& v) {
double sum = 0;
for (size_t i = 0; i < v.size(); i++) sum += abs(v[i]);
return sum;
}
-inline void
-update_folos_lazy(const int iter_sample,
- const int k, vector<double> & _vl, const vector<double> & sum_eta,
- vector<int> & last_updated)
-{
+inline void update_folos_lazy(const int iter_sample, const int k,
+ vector<double>& _vl,
+ const vector<double>& sum_eta,
+ vector<int>& last_updated) {
const double penalty = sum_eta[iter_sample] - sum_eta[last_updated[k]];
- double & x = _vl[k];
- if (x > 0) x = max(0.0, x - penalty);
- else x = min(0.0, x + penalty);
+ double& x = _vl[k];
+ if (x > 0)
+ x = max(0.0, x - penalty);
+ else
+ x = min(0.0, x + penalty);
last_updated[k] = iter_sample;
}
-int
-ME_Model::perform_SGD()
-{
+int ME_Model::perform_SGD() {
if (_l2reg > 0) {
- cerr << "error: L2 regularization is currently not supported in SGD mode." << endl;
+ cerr << "error: L2 regularization is currently not supported in SGD mode."
+ << endl;
exit(1);
}
@@ -85,95 +80,113 @@ ME_Model::perform_SGD()
double logl = 0;
int ncorrect = 0, ntotal = 0;
for (size_t i = 0; i < _vs.size(); i++, ntotal++, iter_sample++) {
- const Sample & s = _vs[ri[i]];
+ const Sample& s = _vs[ri[i]];
#ifdef FOLOS_LAZY
- for (vector<int>::const_iterator j = s.positive_features.begin(); j != s.positive_features.end(); j++){
- for (vector<int>::const_iterator k = _feature2mef[*j].begin(); k != _feature2mef[*j].end(); k++) {
- update_folos_lazy(iter_sample, *k, _vl, sum_eta, last_updated);
- }
+ for (vector<int>::const_iterator j = s.positive_features.begin();
+ j != s.positive_features.end(); j++) {
+ for (vector<int>::const_iterator k = _feature2mef[*j].begin();
+ k != _feature2mef[*j].end(); k++) {
+ update_folos_lazy(iter_sample, *k, _vl, sum_eta, last_updated);
+ }
}
#endif
vector<double> membp(_num_classes);
const int max_label = conditional_probability(s, membp);
- const double eta = eta0 * pow(SGD_ALPHA, (double)iter_sample / _vs.size()); // exponential decay
- // const double eta = eta0 / (1.0 + (double)iter_sample / _vs.size());
+ const double eta =
+ eta0 * pow(SGD_ALPHA,
+ (double)iter_sample / _vs.size()); // exponential decay
+ // const double eta = eta0 / (1.0 + (double)iter_sample /
+ // _vs.size());
- // if (iter_sample % _vs.size() == 0) cerr << "eta = " << eta << endl;
+ // if (iter_sample % _vs.size() == 0) cerr << "eta = " << eta <<
+ // endl;
u += eta * l1param;
sum_eta.push_back(sum_eta.back() + eta * l1param);
-
+
logl += log(membp[s.label]);
if (max_label == s.label) ncorrect++;
// binary features
- for (vector<int>::const_iterator j = s.positive_features.begin(); j != s.positive_features.end(); j++){
- for (vector<int>::const_iterator k = _feature2mef[*j].begin(); k != _feature2mef[*j].end(); k++) {
- const double me = membp[_fb.Feature(*k).label()];
- const double ee = (_fb.Feature(*k).label() == s.label ? 1.0 : 0);
- const double grad = (me - ee);
- _vl[*k] -= eta * grad;
+ for (vector<int>::const_iterator j = s.positive_features.begin();
+ j != s.positive_features.end(); j++) {
+ for (vector<int>::const_iterator k = _feature2mef[*j].begin();
+ k != _feature2mef[*j].end(); k++) {
+ const double me = membp[_fb.Feature(*k).label()];
+ const double ee = (_fb.Feature(*k).label() == s.label ? 1.0 : 0);
+ const double grad = (me - ee);
+ _vl[*k] -= eta * grad;
#ifdef SGD_CP
- apply_l1_penalty(*k, u, _vl, q);
+ apply_l1_penalty(*k, u, _vl, q);
#endif
- }
+ }
}
// real-valued features
- for (vector<pair<int, double> >::const_iterator j = s.rvfeatures.begin(); j != s.rvfeatures.end(); j++) {
- for (vector<int>::const_iterator k = _feature2mef[j->first].begin(); k != _feature2mef[j->first].end(); k++) {
- const double me = membp[_fb.Feature(*k).label()];
- const double ee = (_fb.Feature(*k).label() == s.label ? 1.0 : 0);
- const double grad = (me - ee) * j->second;
- _vl[*k] -= eta * grad;
+ for (vector<pair<int, double> >::const_iterator j = s.rvfeatures.begin();
+ j != s.rvfeatures.end(); j++) {
+ for (vector<int>::const_iterator k = _feature2mef[j->first].begin();
+ k != _feature2mef[j->first].end(); k++) {
+ const double me = membp[_fb.Feature(*k).label()];
+ const double ee = (_fb.Feature(*k).label() == s.label ? 1.0 : 0);
+ const double grad = (me - ee) * j->second;
+ _vl[*k] -= eta * grad;
#ifdef SGD_CP
- apply_l1_penalty(*k, u, _vl, q);
+ apply_l1_penalty(*k, u, _vl, q);
#endif
- }
+ }
}
#ifdef FOLOS_NAIVE
for (size_t j = 0; j < d; j++) {
- double & x = _vl[j];
- if (x > 0) x = max(0.0, x - eta * l1param);
- else x = min(0.0, x + eta * l1param);
+ double& x = _vl[j];
+ if (x > 0)
+ x = max(0.0, x - eta * l1param);
+ else
+ x = min(0.0, x + eta * l1param);
}
#endif
-
}
logl /= _vs.size();
- // fprintf(stderr, "%4d logl = %8.3f acc = %6.4f ", iter, logl, (double)ncorrect / ntotal);
+// fprintf(stderr, "%4d logl = %8.3f acc = %6.4f ", iter, logl,
+// (double)ncorrect / ntotal);
#ifdef FOLOS_LAZY
if (l1param > 0) {
for (size_t j = 0; j < d; j++)
- update_folos_lazy(iter_sample, j, _vl, sum_eta, last_updated);
+ update_folos_lazy(iter_sample, j, _vl, sum_eta, last_updated);
}
#endif
double f = logl;
if (l1param > 0) {
- const double l1 = l1norm(_vl); // this is not accurate when lazy update is used
- // cerr << "f0 = " << update_model_expectation() - l1param * l1 << " ";
+ const double l1 =
+ l1norm(_vl); // this is not accurate when lazy update is used
+ // cerr << "f0 = " << update_model_expectation() - l1param * l1 << "
+ // ";
f -= l1param * l1;
int nonzero = 0;
- for (int j = 0; j < d; j++) if (_vl[j] != 0) nonzero++;
- // cerr << " f = " << f << " l1 = " << l1 << " nonzero_features = " << nonzero << endl;
+ for (int j = 0; j < d; j++)
+ if (_vl[j] != 0) nonzero++;
+ // cerr << " f = " << f << " l1 = " << l1 << " nonzero_features = "
+ // << nonzero << endl;
}
- // fprintf(stderr, "%4d obj = %7.3f acc = %6.4f", iter+1, f, (double)ncorrect/ntotal);
+ // fprintf(stderr, "%4d obj = %7.3f acc = %6.4f", iter+1, f,
+ // (double)ncorrect/ntotal);
// fprintf(stderr, "%4d obj = %f", iter+1, f);
- fprintf(stderr, "%3d obj(err) = %f (%6.4f)", iter+1, f, 1 - (double)ncorrect/ntotal);
+ fprintf(stderr, "%3d obj(err) = %f (%6.4f)", iter + 1, f,
+ 1 - (double)ncorrect / ntotal);
if (_nheldout > 0) {
double heldout_logl = heldout_likelihood();
- // fprintf(stderr, " heldout_logl = %f acc = %6.4f\n", heldout_logl, 1 - _heldout_error);
- fprintf(stderr, " heldout_logl(err) = %f (%6.4f)", heldout_logl, _heldout_error);
+ // fprintf(stderr, " heldout_logl = %f acc = %6.4f\n",
+ // heldout_logl, 1 - _heldout_error);
+ fprintf(stderr, " heldout_logl(err) = %f (%6.4f)", heldout_logl,
+ _heldout_error);
}
fprintf(stderr, "\n");
-
-
}
return 0;