summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorWu, Ke <wuke@cs.umd.edu>2014-12-17 16:15:13 -0500
committerWu, Ke <wuke@cs.umd.edu>2014-12-17 16:15:13 -0500
commit17dbb7d5ab1544899b1b9e867d2246a0a93e3aa8 (patch)
tree7fa2a51763a1b67fb325e86b0e3f764dd119cd70 /utils
parent1983c75c35b7f5dc3f356a2f9a9345d632b87650 (diff)
parent1613f1fc44ca67820afd7e7b21eb54b316c8ce55 (diff)
Merge branch 'const_reorder_2' into softsyn_2
Diffstat (limited to 'utils')
-rw-r--r--utils/Makefile.am7
-rw-r--r--utils/alias_sampler.h4
-rw-r--r--utils/alignment_io.h4
-rw-r--r--utils/b64featvector.cc55
-rw-r--r--utils/b64featvector.h12
-rw-r--r--utils/b64tools.h4
-rw-r--r--utils/corpus_tools.h4
-rw-r--r--utils/exp_semiring.h4
-rw-r--r--utils/fast_sparse_vector.h4
-rw-r--r--utils/fdict.h4
-rw-r--r--utils/feature_vector.h4
-rw-r--r--utils/filelib.h4
-rw-r--r--utils/kernel_string_subseq.h4
-rw-r--r--utils/lbfgs.cpp108
-rw-r--r--utils/lbfgs.h20
-rw-r--r--utils/m.h4
-rw-r--r--utils/mathvec.h87
-rw-r--r--utils/maxent.cpp427
-rw-r--r--utils/maxent.h95
-rw-r--r--utils/murmur_hash3.h4
-rw-r--r--utils/owlqn.cpp127
-rw-r--r--utils/perfect_hash.h4
-rw-r--r--utils/prob.h4
-rw-r--r--utils/sgd.cpp193
-rw-r--r--utils/small_vector.h20
-rw-r--r--utils/small_vector_test.cc30
-rw-r--r--utils/sparse_vector.h4
-rw-r--r--utils/star.h4
-rw-r--r--utils/sv_test.cc31
-rw-r--r--utils/tdict.h4
-rw-r--r--utils/timing_stats.h4
-rw-r--r--utils/verbose.h4
-rw-r--r--utils/weights.h4
-rw-r--r--utils/wordid.h4
34 files changed, 701 insertions, 595 deletions
diff --git a/utils/Makefile.am b/utils/Makefile.am
index fabb4454..dd74ddc0 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -22,6 +22,7 @@ libutils_a_SOURCES = \
alias_sampler.h \
alignment_io.h \
array2d.h \
+ b64featvector.h \
b64tools.h \
batched_append.h \
city.h \
@@ -38,11 +39,8 @@ libutils_a_SOURCES = \
have_64_bits.h \
indices_after.h \
kernel_string_subseq.h \
- lbfgs.h \
- lbfgs.cpp \
logval.h \
m.h \
- mathvec.h \
maxent.h \
maxent.cpp \
murmur_hash3.h \
@@ -50,8 +48,6 @@ libutils_a_SOURCES = \
named_enum.h \
null_deleter.h \
null_traits.h \
- owlqn.cpp \
- sgd.cpp \
perfect_hash.h \
prob.h \
sampler.h \
@@ -77,6 +73,7 @@ libutils_a_SOURCES = \
fast_lexical_cast.hpp \
intrusive_refcount.hpp \
alignment_io.cc \
+ b64featvector.cc \
b64tools.cc \
corpus_tools.cc \
dict.cc \
diff --git a/utils/alias_sampler.h b/utils/alias_sampler.h
index 81541f7a..0f9d3f6d 100644
--- a/utils/alias_sampler.h
+++ b/utils/alias_sampler.h
@@ -1,5 +1,5 @@
-#ifndef _ALIAS_SAMPLER_H_
-#define _ALIAS_SAMPLER_H_
+#ifndef ALIAS_SAMPLER_H_
+#define ALIAS_SAMPLER_H_
#include <vector>
#include <limits>
diff --git a/utils/alignment_io.h b/utils/alignment_io.h
index 63fb916b..ec70688e 100644
--- a/utils/alignment_io.h
+++ b/utils/alignment_io.h
@@ -1,5 +1,5 @@
-#ifndef _ALIGNMENT_IO_H_
-#define _ALIGNMENT_IO_H_
+#ifndef ALIGNMENT_IO_H_
+#define ALIGNMENT_IO_H_
#include <string>
#include <iostream>
diff --git a/utils/b64featvector.cc b/utils/b64featvector.cc
new file mode 100644
index 00000000..c7d08b29
--- /dev/null
+++ b/utils/b64featvector.cc
@@ -0,0 +1,55 @@
+#include "b64featvector.h"
+
+#include <sstream>
+#include <boost/scoped_array.hpp>
+#include "b64tools.h"
+#include "fdict.h"
+
+using namespace std;
+
+static inline void EncodeFeatureWeight(const string &featname, weight_t weight,
+ ostream *output) {
+ output->write(featname.data(), featname.size() + 1);
+ output->write(reinterpret_cast<char *>(&weight), sizeof(weight_t));
+}
+
+string EncodeFeatureVector(const SparseVector<weight_t> &vec) {
+ string b64;
+ {
+ ostringstream base64_strm;
+ {
+ ostringstream strm;
+ for (SparseVector<weight_t>::const_iterator it = vec.begin();
+ it != vec.end(); ++it)
+ if (it->second != 0)
+ EncodeFeatureWeight(FD::Convert(it->first), it->second, &strm);
+ string data(strm.str());
+ B64::b64encode(data.data(), data.size(), &base64_strm);
+ }
+ b64 = base64_strm.str();
+ }
+ return b64;
+}
+
+void DecodeFeatureVector(const string &data, SparseVector<weight_t> *vec) {
+ vec->clear();
+ if (data.empty()) return;
+ // Decode data
+ size_t b64_len = data.size(), len = b64_len / 4 * 3;
+ boost::scoped_array<char> buf(new char[len]);
+ bool res =
+ B64::b64decode(reinterpret_cast<const unsigned char *>(data.data()),
+ b64_len, buf.get(), len);
+ assert(res);
+ // Apply updates
+ size_t cur = 0;
+ while (cur < len) {
+ string feat_name(buf.get() + cur);
+ if (feat_name.empty()) break; // Encountered trailing \0
+ int feat_id = FD::Convert(feat_name);
+ weight_t feat_delta =
+ *reinterpret_cast<weight_t *>(buf.get() + cur + feat_name.size() + 1);
+ (*vec)[feat_id] = feat_delta;
+ cur += feat_name.size() + 1 + sizeof(weight_t);
+ }
+}
diff --git a/utils/b64featvector.h b/utils/b64featvector.h
new file mode 100644
index 00000000..6ac04d44
--- /dev/null
+++ b/utils/b64featvector.h
@@ -0,0 +1,12 @@
+#ifndef _B64FEATVECTOR_H_
+#define _B64FEATVECTOR_H_
+
+#include <string>
+
+#include "sparse_vector.h"
+#include "weights.h"
+
+std::string EncodeFeatureVector(const SparseVector<weight_t> &);
+void DecodeFeatureVector(const std::string &, SparseVector<weight_t> *);
+
+#endif // _B64FEATVECTOR_H_
diff --git a/utils/b64tools.h b/utils/b64tools.h
index c821fc8f..130a9102 100644
--- a/utils/b64tools.h
+++ b/utils/b64tools.h
@@ -1,5 +1,5 @@
-#ifndef _B64_TOOLS_H_
-#define _B64_TOOLS_H_
+#ifndef B64_TOOLS_H_
+#define B64_TOOLS_H_
namespace B64 {
bool b64decode(const unsigned char* data, const size_t insize, char* out, const size_t outsize);
diff --git a/utils/corpus_tools.h b/utils/corpus_tools.h
index f6699d87..3ccaf6ef 100644
--- a/utils/corpus_tools.h
+++ b/utils/corpus_tools.h
@@ -1,5 +1,5 @@
-#ifndef _CORPUS_TOOLS_H_
-#define _CORPUS_TOOLS_H_
+#ifndef CORPUS_TOOLS_H_
+#define CORPUS_TOOLS_H_
#include <string>
#include <set>
diff --git a/utils/exp_semiring.h b/utils/exp_semiring.h
index 26a22071..164286e3 100644
--- a/utils/exp_semiring.h
+++ b/utils/exp_semiring.h
@@ -1,5 +1,5 @@
-#ifndef _EXP_SEMIRING_H_
-#define _EXP_SEMIRING_H_
+#ifndef EXP_SEMIRING_H_
+#define EXP_SEMIRING_H_
#include <iostream>
#include "star.h"
diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h
index 6e2a77cd..1e0ab428 100644
--- a/utils/fast_sparse_vector.h
+++ b/utils/fast_sparse_vector.h
@@ -1,5 +1,5 @@
-#ifndef _FAST_SPARSE_VECTOR_H_
-#define _FAST_SPARSE_VECTOR_H_
+#ifndef FAST_SPARSE_VECTOR_H_
+#define FAST_SPARSE_VECTOR_H_
// FastSparseVector<T> is a integer indexed unordered map that supports very fast
// (mathematical) vector operations when the sizes are very small, and reasonably
diff --git a/utils/fdict.h b/utils/fdict.h
index eb853fb2..94763890 100644
--- a/utils/fdict.h
+++ b/utils/fdict.h
@@ -1,5 +1,5 @@
-#ifndef _FDICT_H_
-#define _FDICT_H_
+#ifndef FDICT_H_
+#define FDICT_H_
#ifdef HAVE_CONFIG_H
#include "config.h"
diff --git a/utils/feature_vector.h b/utils/feature_vector.h
index a7b61a66..bf77b5ac 100644
--- a/utils/feature_vector.h
+++ b/utils/feature_vector.h
@@ -1,5 +1,5 @@
-#ifndef _FEATURE_VECTOR_H_
-#define _FEATURE_VECTOR_H_
+#ifndef FEATURE_VECTOR_H_
+#define FEATURE_VECTOR_H_
#include <vector>
#include "sparse_vector.h"
diff --git a/utils/filelib.h b/utils/filelib.h
index 4fa69760..90620d05 100644
--- a/utils/filelib.h
+++ b/utils/filelib.h
@@ -1,5 +1,5 @@
-#ifndef _FILELIB_H_
-#define _FILELIB_H_
+#ifndef FILELIB_H_
+#define FILELIB_H_
#include <cassert>
#include <string>
diff --git a/utils/kernel_string_subseq.h b/utils/kernel_string_subseq.h
index 516e8b89..00ee7da7 100644
--- a/utils/kernel_string_subseq.h
+++ b/utils/kernel_string_subseq.h
@@ -1,5 +1,5 @@
-#ifndef _KERNEL_STRING_SUBSEQ_H_
-#define _KERNEL_STRING_SUBSEQ_H_
+#ifndef KERNEL_STRING_SUBSEQ_H_
+#define KERNEL_STRING_SUBSEQ_H_
#include <vector>
#include <cmath>
diff --git a/utils/lbfgs.cpp b/utils/lbfgs.cpp
deleted file mode 100644
index bd26f048..00000000
--- a/utils/lbfgs.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-#include <vector>
-#include <iostream>
-#include <cmath>
-#include <stdio.h>
-#include "mathvec.h"
-#include "lbfgs.h"
-#include "maxent.h"
-
-using namespace std;
-
-const static int M = LBFGS_M;
-const static double LINE_SEARCH_ALPHA = 0.1;
-const static double LINE_SEARCH_BETA = 0.5;
-
-// stopping criteria
-int LBFGS_MAX_ITER = 300;
-const static double MIN_GRAD_NORM = 0.0001;
-
-double ME_Model::backtracking_line_search(const Vec& x0, const Vec& grad0,
- const double f0, const Vec& dx,
- Vec& x, Vec& grad1) {
- double t = 1.0 / LINE_SEARCH_BETA;
-
- double f;
- do {
- t *= LINE_SEARCH_BETA;
- x = x0 + t * dx;
- f = FunctionGradient(x.STLVec(), grad1.STLVec());
- // cout << "*";
- } while (f > f0 + LINE_SEARCH_ALPHA * t * dot_product(dx, grad0));
-
- return f;
-}
-
-//
-// Jorge Nocedal, "Updating Quasi-Newton Matrices With Limited Storage",
-// Mathematics of Computation, Vol. 35, No. 151, pp. 773-782, 1980.
-//
-Vec approximate_Hg(const int iter, const Vec& grad, const Vec s[],
- const Vec y[], const double z[]) {
- int offset, bound;
- if (iter <= M) {
- offset = 0;
- bound = iter;
- } else {
- offset = iter - M;
- bound = M;
- }
-
- Vec q = grad;
- double alpha[M], beta[M];
- for (int i = bound - 1; i >= 0; i--) {
- const int j = (i + offset) % M;
- alpha[i] = z[j] * dot_product(s[j], q);
- q += -alpha[i] * y[j];
- }
- if (iter > 0) {
- const int j = (iter - 1) % M;
- const double gamma = ((1.0 / z[j]) / dot_product(y[j], y[j]));
- // static double gamma;
- // if (gamma == 0) gamma = ((1.0 / z[j]) / dot_product(y[j], y[j]));
- q *= gamma;
- }
- for (int i = 0; i <= bound - 1; i++) {
- const int j = (i + offset) % M;
- beta[i] = z[j] * dot_product(y[j], q);
- q += s[j] * (alpha[i] - beta[i]);
- }
-
- return q;
-}
-
-vector<double> ME_Model::perform_LBFGS(const vector<double>& x0) {
- const size_t dim = x0.size();
- Vec x = x0;
-
- Vec grad(dim), dx(dim);
- double f = FunctionGradient(x.STLVec(), grad.STLVec());
-
- Vec s[M], y[M];
- double z[M]; // rho
-
- for (int iter = 0; iter < LBFGS_MAX_ITER; iter++) {
-
- fprintf(stderr, "%3d obj(err) = %f (%6.4f)", iter + 1, -f, _train_error);
- if (_nheldout > 0) {
- const double heldout_logl = heldout_likelihood();
- fprintf(stderr, " heldout_logl(err) = %f (%6.4f)", heldout_logl,
- _heldout_error);
- }
- fprintf(stderr, "\n");
-
- if (sqrt(dot_product(grad, grad)) < MIN_GRAD_NORM) break;
-
- dx = -1 * approximate_Hg(iter, grad, s, y, z);
-
- Vec x1(dim), grad1(dim);
- f = backtracking_line_search(x, grad, f, dx, x1, grad1);
-
- s[iter % M] = x1 - x;
- y[iter % M] = grad1 - grad;
- z[iter % M] = 1.0 / dot_product(y[iter % M], s[iter % M]);
- x = x1;
- grad = grad1;
- }
-
- return x.STLVec();
-}
diff --git a/utils/lbfgs.h b/utils/lbfgs.h
deleted file mode 100644
index 4d706f7a..00000000
--- a/utils/lbfgs.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _LBFGS_H_
-#define _LBFGS_H_
-
-#include <vector>
-
-// template<class FuncGrad>
-// std::vector<double>
-// perform_LBFGS(FuncGrad func_grad, const std::vector<double> & x0);
-
-std::vector<double> perform_LBFGS(
- double (*func_grad)(const std::vector<double> &, std::vector<double> &),
- const std::vector<double> &x0);
-
-std::vector<double> perform_OWLQN(
- double (*func_grad)(const std::vector<double> &, std::vector<double> &),
- const std::vector<double> &x0, const double C);
-
-const int LBFGS_M = 10;
-
-#endif
diff --git a/utils/m.h b/utils/m.h
index dc881b36..bd82c305 100644
--- a/utils/m.h
+++ b/utils/m.h
@@ -1,5 +1,5 @@
-#ifndef _M_H_
-#define _M_H_
+#ifndef M_H_HEADER_
+#define M_H_HEADER_
#include <cassert>
#include <cmath>
diff --git a/utils/mathvec.h b/utils/mathvec.h
deleted file mode 100644
index f8c60e5d..00000000
--- a/utils/mathvec.h
+++ /dev/null
@@ -1,87 +0,0 @@
-#ifndef _MATH_VECTOR_H_
-#define _MATH_VECTOR_H_
-
-#include <vector>
-#include <iostream>
-#include <cassert>
-
-class Vec {
- private:
- std::vector<double> _v;
-
- public:
- Vec(const size_t n = 0, const double val = 0) { _v.resize(n, val); }
- Vec(const std::vector<double>& v) : _v(v) {}
- const std::vector<double>& STLVec() const { return _v; }
- std::vector<double>& STLVec() { return _v; }
- size_t Size() const { return _v.size(); }
- double& operator[](int i) { return _v[i]; }
- const double& operator[](int i) const { return _v[i]; }
- Vec& operator+=(const Vec& b) {
- assert(b.Size() == _v.size());
- for (size_t i = 0; i < _v.size(); i++) {
- _v[i] += b[i];
- }
- return *this;
- }
- Vec& operator*=(const double c) {
- for (size_t i = 0; i < _v.size(); i++) {
- _v[i] *= c;
- }
- return *this;
- }
- void Project(const Vec& y) {
- for (size_t i = 0; i < _v.size(); i++) {
- // if (sign(_v[i]) != sign(y[i])) _v[i] = 0;
- if (_v[i] * y[i] <= 0) _v[i] = 0;
- }
- }
-};
-
-inline double dot_product(const Vec& a, const Vec& b) {
- double sum = 0;
- for (size_t i = 0; i < a.Size(); i++) {
- sum += a[i] * b[i];
- }
- return sum;
-}
-
-inline std::ostream& operator<<(std::ostream& s, const Vec& a) {
- s << "(";
- for (size_t i = 0; i < a.Size(); i++) {
- if (i != 0) s << ", ";
- s << a[i];
- }
- s << ")";
- return s;
-}
-
-inline const Vec operator+(const Vec& a, const Vec& b) {
- Vec v(a.Size());
- assert(a.Size() == b.Size());
- for (size_t i = 0; i < a.Size(); i++) {
- v[i] = a[i] + b[i];
- }
- return v;
-}
-
-inline const Vec operator-(const Vec& a, const Vec& b) {
- Vec v(a.Size());
- assert(a.Size() == b.Size());
- for (size_t i = 0; i < a.Size(); i++) {
- v[i] = a[i] - b[i];
- }
- return v;
-}
-
-inline const Vec operator*(const Vec& a, const double c) {
- Vec v(a.Size());
- for (size_t i = 0; i < a.Size(); i++) {
- v[i] = a[i] * c;
- }
- return v;
-}
-
-inline const Vec operator*(const double c, const Vec& a) { return a * c; }
-
-#endif
diff --git a/utils/maxent.cpp b/utils/maxent.cpp
index 0f49ee9d..fd772e08 100644
--- a/utils/maxent.cpp
+++ b/utils/maxent.cpp
@@ -3,12 +3,15 @@
*/
#include "maxent.h"
+
+#include <vector>
+#include <iostream>
#include <cmath>
#include <cstdio>
-#include "lbfgs.h"
using namespace std;
+namespace maxent {
double ME_Model::FunctionGradient(const vector<double>& x,
vector<double>& grad) {
assert((int)_fb.Size() == x.size());
@@ -601,6 +604,428 @@ vector<double> ME_Model::classify(ME_Sample& mes) const {
return vp;
}
+// template<class FuncGrad>
+// std::vector<double>
+// perform_LBFGS(FuncGrad func_grad, const std::vector<double> & x0);
+
+std::vector<double> perform_LBFGS(
+ double (*func_grad)(const std::vector<double> &, std::vector<double> &),
+ const std::vector<double> &x0);
+
+std::vector<double> perform_OWLQN(
+ double (*func_grad)(const std::vector<double> &, std::vector<double> &),
+ const std::vector<double> &x0, const double C);
+
+const int LBFGS_M = 10;
+
+const static int M = LBFGS_M;
+const static double LINE_SEARCH_ALPHA = 0.1;
+const static double LINE_SEARCH_BETA = 0.5;
+
+// stopping criteria
+int LBFGS_MAX_ITER = 300;
+const static double MIN_GRAD_NORM = 0.0001;
+
+// LBFGS
+
+double ME_Model::backtracking_line_search(const Vec& x0, const Vec& grad0,
+ const double f0, const Vec& dx,
+ Vec& x, Vec& grad1) {
+ double t = 1.0 / LINE_SEARCH_BETA;
+
+ double f;
+ do {
+ t *= LINE_SEARCH_BETA;
+ x = x0 + t * dx;
+ f = FunctionGradient(x.STLVec(), grad1.STLVec());
+ // cout << "*";
+ } while (f > f0 + LINE_SEARCH_ALPHA * t * dot_product(dx, grad0));
+
+ return f;
+}
+
+//
+// Jorge Nocedal, "Updating Quasi-Newton Matrices With Limited Storage",
+// Mathematics of Computation, Vol. 35, No. 151, pp. 773-782, 1980.
+//
+Vec approximate_Hg(const int iter, const Vec& grad, const Vec s[],
+ const Vec y[], const double z[]) {
+ int offset, bound;
+ if (iter <= M) {
+ offset = 0;
+ bound = iter;
+ } else {
+ offset = iter - M;
+ bound = M;
+ }
+
+ Vec q = grad;
+ double alpha[M], beta[M];
+ for (int i = bound - 1; i >= 0; i--) {
+ const int j = (i + offset) % M;
+ alpha[i] = z[j] * dot_product(s[j], q);
+ q += -alpha[i] * y[j];
+ }
+ if (iter > 0) {
+ const int j = (iter - 1) % M;
+ const double gamma = ((1.0 / z[j]) / dot_product(y[j], y[j]));
+ // static double gamma;
+ // if (gamma == 0) gamma = ((1.0 / z[j]) / dot_product(y[j], y[j]));
+ q *= gamma;
+ }
+ for (int i = 0; i <= bound - 1; i++) {
+ const int j = (i + offset) % M;
+ beta[i] = z[j] * dot_product(y[j], q);
+ q += s[j] * (alpha[i] - beta[i]);
+ }
+
+ return q;
+}
+
+vector<double> ME_Model::perform_LBFGS(const vector<double>& x0) {
+ const size_t dim = x0.size();
+ Vec x = x0;
+
+ Vec grad(dim), dx(dim);
+ double f = FunctionGradient(x.STLVec(), grad.STLVec());
+
+ Vec s[M], y[M];
+ double z[M]; // rho
+
+ for (int iter = 0; iter < LBFGS_MAX_ITER; iter++) {
+
+ fprintf(stderr, "%3d obj(err) = %f (%6.4f)", iter + 1, -f, _train_error);
+ if (_nheldout > 0) {
+ const double heldout_logl = heldout_likelihood();
+ fprintf(stderr, " heldout_logl(err) = %f (%6.4f)", heldout_logl,
+ _heldout_error);
+ }
+ fprintf(stderr, "\n");
+
+ if (sqrt(dot_product(grad, grad)) < MIN_GRAD_NORM) break;
+
+ dx = -1 * approximate_Hg(iter, grad, s, y, z);
+
+ Vec x1(dim), grad1(dim);
+ f = backtracking_line_search(x, grad, f, dx, x1, grad1);
+
+ s[iter % M] = x1 - x;
+ y[iter % M] = grad1 - grad;
+ z[iter % M] = 1.0 / dot_product(y[iter % M], s[iter % M]);
+ x = x1;
+ grad = grad1;
+ }
+
+ return x.STLVec();
+}
+
+// OWLQN
+
+// stopping criteria
+int OWLQN_MAX_ITER = 300;
+
+Vec approximate_Hg(const int iter, const Vec& grad, const Vec s[],
+ const Vec y[], const double z[]);
+
+inline int sign(double x) {
+ if (x > 0) return 1;
+ if (x < 0) return -1;
+ return 0;
+};
+
+static Vec pseudo_gradient(const Vec& x, const Vec& grad0, const double C) {
+ Vec grad = grad0;
+ for (size_t i = 0; i < x.Size(); i++) {
+ if (x[i] != 0) {
+ grad[i] += C * sign(x[i]);
+ continue;
+ }
+ const double gm = grad0[i] - C;
+ if (gm > 0) {
+ grad[i] = gm;
+ continue;
+ }
+ const double gp = grad0[i] + C;
+ if (gp < 0) {
+ grad[i] = gp;
+ continue;
+ }
+ grad[i] = 0;
+ }
+
+ return grad;
+}
+
+double ME_Model::regularized_func_grad(const double C, const Vec& x,
+ Vec& grad) {
+ double f = FunctionGradient(x.STLVec(), grad.STLVec());
+ for (size_t i = 0; i < x.Size(); i++) {
+ f += C * fabs(x[i]);
+ }
+
+ return f;
+}
+
+double ME_Model::constrained_line_search(double C, const Vec& x0,
+ const Vec& grad0, const double f0,
+ const Vec& dx, Vec& x, Vec& grad1) {
+ // compute the orthant to explore
+ Vec orthant = x0;
+ for (size_t i = 0; i < orthant.Size(); i++) {
+ if (orthant[i] == 0) orthant[i] = -grad0[i];
+ }
+
+ double t = 1.0 / LINE_SEARCH_BETA;
+
+ double f;
+ do {
+ t *= LINE_SEARCH_BETA;
+ x = x0 + t * dx;
+ x.Project(orthant);
+ // for (size_t i = 0; i < x.Size(); i++) {
+ // if (x0[i] != 0 && sign(x[i]) != sign(x0[i])) x[i] = 0;
+ // }
+
+ f = regularized_func_grad(C, x, grad1);
+ // cout << "*";
+ } while (f > f0 + LINE_SEARCH_ALPHA * dot_product(x - x0, grad0));
+
+ return f;
+}
+
+vector<double> ME_Model::perform_OWLQN(const vector<double>& x0,
+ const double C) {
+ const size_t dim = x0.size();
+ Vec x = x0;
+
+ Vec grad(dim), dx(dim);
+ double f = regularized_func_grad(C, x, grad);
+
+ Vec s[M], y[M];
+ double z[M]; // rho
+
+ for (int iter = 0; iter < OWLQN_MAX_ITER; iter++) {
+ Vec pg = pseudo_gradient(x, grad, C);
+
+ fprintf(stderr, "%3d obj(err) = %f (%6.4f)", iter + 1, -f, _train_error);
+ if (_nheldout > 0) {
+ const double heldout_logl = heldout_likelihood();
+ fprintf(stderr, " heldout_logl(err) = %f (%6.4f)", heldout_logl,
+ _heldout_error);
+ }
+ fprintf(stderr, "\n");
+
+ if (sqrt(dot_product(pg, pg)) < MIN_GRAD_NORM) break;
+
+ dx = -1 * approximate_Hg(iter, pg, s, y, z);
+ if (dot_product(dx, pg) >= 0) dx.Project(-1 * pg);
+
+ Vec x1(dim), grad1(dim);
+ f = constrained_line_search(C, x, pg, f, dx, x1, grad1);
+
+ s[iter % M] = x1 - x;
+ y[iter % M] = grad1 - grad;
+ z[iter % M] = 1.0 / dot_product(y[iter % M], s[iter % M]);
+
+ x = x1;
+ grad = grad1;
+ }
+
+ return x.STLVec();
+}
+
+// SGD
+
+// const double SGD_ETA0 = 1;
+// const double SGD_ITER = 30;
+// const double SGD_ALPHA = 0.85;
+
+//#define FOLOS_NAIVE
+//#define FOLOS_LAZY
+#define SGD_CP
+
+inline void apply_l1_penalty(const int i, const double u, vector<double>& _vl,
+ vector<double>& q) {
+ double& w = _vl[i];
+ const double z = w;
+ double& qi = q[i];
+ if (w > 0) {
+ w = max(0.0, w - (u + qi));
+ } else if (w < 0) {
+ w = min(0.0, w + (u - qi));
+ }
+ qi += w - z;
+}
+
+static double l1norm(const vector<double>& v) {
+ double sum = 0;
+ for (size_t i = 0; i < v.size(); i++) sum += abs(v[i]);
+ return sum;
+}
+
+inline void update_folos_lazy(const int iter_sample, const int k,
+ vector<double>& _vl,
+ const vector<double>& sum_eta,
+ vector<int>& last_updated) {
+ const double penalty = sum_eta[iter_sample] - sum_eta[last_updated[k]];
+ double& x = _vl[k];
+ if (x > 0)
+ x = max(0.0, x - penalty);
+ else
+ x = min(0.0, x + penalty);
+ last_updated[k] = iter_sample;
+}
+
+int ME_Model::perform_SGD() {
+ if (_l2reg > 0) {
+ cerr << "error: L2 regularization is currently not supported in SGD mode."
+ << endl;
+ exit(1);
+ }
+
+ cerr << "performing SGD" << endl;
+
+ const double l1param = _l1reg;
+
+ const int d = _fb.Size();
+
+ vector<int> ri(_vs.size());
+ for (size_t i = 0; i < ri.size(); i++) ri[i] = i;
+
+ vector<double> grad(d);
+ int iter_sample = 0;
+ const double eta0 = SGD_ETA0;
+
+ // cerr << "l1param = " << l1param << endl;
+ cerr << "eta0 = " << eta0 << " alpha = " << SGD_ALPHA << endl;
+
+ double u = 0;
+ vector<double> q(d, 0);
+ vector<int> last_updated(d, 0);
+ vector<double> sum_eta;
+ sum_eta.push_back(0);
+
+ for (int iter = 0; iter < SGD_ITER; iter++) {
+
+ random_shuffle(ri.begin(), ri.end());
+
+ double logl = 0;
+ int ncorrect = 0, ntotal = 0;
+ for (size_t i = 0; i < _vs.size(); i++, ntotal++, iter_sample++) {
+ const Sample& s = _vs[ri[i]];
+
+#ifdef FOLOS_LAZY
+ for (vector<int>::const_iterator j = s.positive_features.begin();
+ j != s.positive_features.end(); j++) {
+ for (vector<int>::const_iterator k = _feature2mef[*j].begin();
+ k != _feature2mef[*j].end(); k++) {
+ update_folos_lazy(iter_sample, *k, _vl, sum_eta, last_updated);
+ }
+ }
+#endif
+
+ vector<double> membp(_num_classes);
+ const int max_label = conditional_probability(s, membp);
+
+ const double eta =
+ eta0 * pow(SGD_ALPHA,
+ (double)iter_sample / _vs.size()); // exponential decay
+ // const double eta = eta0 / (1.0 + (double)iter_sample /
+ // _vs.size());
+
+ // if (iter_sample % _vs.size() == 0) cerr << "eta = " << eta <<
+ // endl;
+ u += eta * l1param;
+
+ sum_eta.push_back(sum_eta.back() + eta * l1param);
+
+ logl += log(membp[s.label]);
+ if (max_label == s.label) ncorrect++;
+
+ // binary features
+ for (vector<int>::const_iterator j = s.positive_features.begin();
+ j != s.positive_features.end(); j++) {
+ for (vector<int>::const_iterator k = _feature2mef[*j].begin();
+ k != _feature2mef[*j].end(); k++) {
+ const double me = membp[_fb.Feature(*k).label()];
+ const double ee = (_fb.Feature(*k).label() == s.label ? 1.0 : 0);
+ const double grad = (me - ee);
+ _vl[*k] -= eta * grad;
+#ifdef SGD_CP
+ apply_l1_penalty(*k, u, _vl, q);
+#endif
+ }
+ }
+ // real-valued features
+ for (vector<pair<int, double> >::const_iterator j = s.rvfeatures.begin();
+ j != s.rvfeatures.end(); j++) {
+ for (vector<int>::const_iterator k = _feature2mef[j->first].begin();
+ k != _feature2mef[j->first].end(); k++) {
+ const double me = membp[_fb.Feature(*k).label()];
+ const double ee = (_fb.Feature(*k).label() == s.label ? 1.0 : 0);
+ const double grad = (me - ee) * j->second;
+ _vl[*k] -= eta * grad;
+#ifdef SGD_CP
+ apply_l1_penalty(*k, u, _vl, q);
+#endif
+ }
+ }
+
+#ifdef FOLOS_NAIVE
+ for (size_t j = 0; j < d; j++) {
+ double& x = _vl[j];
+ if (x > 0)
+ x = max(0.0, x - eta * l1param);
+ else
+ x = min(0.0, x + eta * l1param);
+ }
+#endif
+ }
+ logl /= _vs.size();
+// fprintf(stderr, "%4d logl = %8.3f acc = %6.4f ", iter, logl,
+// (double)ncorrect / ntotal);
+
+#ifdef FOLOS_LAZY
+ if (l1param > 0) {
+ for (size_t j = 0; j < d; j++)
+ update_folos_lazy(iter_sample, j, _vl, sum_eta, last_updated);
+ }
+#endif
+
+ double f = logl;
+ if (l1param > 0) {
+ const double l1 =
+ l1norm(_vl); // this is not accurate when lazy update is used
+ // cerr << "f0 = " << update_model_expectation() - l1param * l1 << "
+ // ";
+ f -= l1param * l1;
+ int nonzero = 0;
+ for (int j = 0; j < d; j++)
+ if (_vl[j] != 0) nonzero++;
+ // cerr << " f = " << f << " l1 = " << l1 << " nonzero_features = "
+ // << nonzero << endl;
+ }
+ // fprintf(stderr, "%4d obj = %7.3f acc = %6.4f", iter+1, f,
+ // (double)ncorrect/ntotal);
+ // fprintf(stderr, "%4d obj = %f", iter+1, f);
+ fprintf(stderr, "%3d obj(err) = %f (%6.4f)", iter + 1, f,
+ 1 - (double)ncorrect / ntotal);
+
+ if (_nheldout > 0) {
+ double heldout_logl = heldout_likelihood();
+ // fprintf(stderr, " heldout_logl = %f acc = %6.4f\n",
+ // heldout_logl, 1 - _heldout_error);
+ fprintf(stderr, " heldout_logl(err) = %f (%6.4f)", heldout_logl,
+ _heldout_error);
+ }
+ fprintf(stderr, "\n");
+ }
+
+ return 0;
+}
+
+} // namespace maxent
+
/*
* $Log: maxent.cpp,v $
* Revision 1.1.1.1 2007/05/15 08:30:35 kyoshida
diff --git a/utils/maxent.h b/utils/maxent.h
index b1efd88e..74d13a6f 100644
--- a/utils/maxent.h
+++ b/utils/maxent.h
@@ -5,21 +5,95 @@
#ifndef __MAXENT_H_
#define __MAXENT_H_
-#include <string>
-#include <vector>
-#include <list>
-#include <map>
#include <algorithm>
#include <iostream>
+#include <list>
+#include <map>
#include <string>
+#include <unordered_map>
+#include <vector>
+
#include <cassert>
-#include "mathvec.h"
-#define USE_HASH_MAP // if you encounter errors with hash, try commenting out
- // this line. (the program will be a bit slower, though)
-#ifdef USE_HASH_MAP
-#include <unordered_map>
-#endif
+namespace maxent {
+class Vec {
+ private:
+ std::vector<double> _v;
+
+ public:
+ Vec(const size_t n = 0, const double val = 0) { _v.resize(n, val); }
+ Vec(const std::vector<double>& v) : _v(v) {}
+ const std::vector<double>& STLVec() const { return _v; }
+ std::vector<double>& STLVec() { return _v; }
+ size_t Size() const { return _v.size(); }
+ double& operator[](int i) { return _v[i]; }
+ const double& operator[](int i) const { return _v[i]; }
+ Vec& operator+=(const Vec& b) {
+ assert(b.Size() == _v.size());
+ for (size_t i = 0; i < _v.size(); i++) {
+ _v[i] += b[i];
+ }
+ return *this;
+ }
+ Vec& operator*=(const double c) {
+ for (size_t i = 0; i < _v.size(); i++) {
+ _v[i] *= c;
+ }
+ return *this;
+ }
+ void Project(const Vec& y) {
+ for (size_t i = 0; i < _v.size(); i++) {
+ // if (sign(_v[i]) != sign(y[i])) _v[i] = 0;
+ if (_v[i] * y[i] <= 0) _v[i] = 0;
+ }
+ }
+};
+
+inline double dot_product(const Vec& a, const Vec& b) {
+ double sum = 0;
+ for (size_t i = 0; i < a.Size(); i++) {
+ sum += a[i] * b[i];
+ }
+ return sum;
+}
+
+inline std::ostream& operator<<(std::ostream& s, const Vec& a) {
+ s << "(";
+ for (size_t i = 0; i < a.Size(); i++) {
+ if (i != 0) s << ", ";
+ s << a[i];
+ }
+ s << ")";
+ return s;
+}
+
+inline const Vec operator+(const Vec& a, const Vec& b) {
+ Vec v(a.Size());
+ assert(a.Size() == b.Size());
+ for (size_t i = 0; i < a.Size(); i++) {
+ v[i] = a[i] + b[i];
+ }
+ return v;
+}
+
+inline const Vec operator-(const Vec& a, const Vec& b) {
+ Vec v(a.Size());
+ assert(a.Size() == b.Size());
+ for (size_t i = 0; i < a.Size(); i++) {
+ v[i] = a[i] - b[i];
+ }
+ return v;
+}
+
+inline const Vec operator*(const Vec& a, const double c) {
+ Vec v(a.Size());
+ for (size_t i = 0; i < a.Size(); i++) {
+ v[i] = a[i] * c;
+ }
+ return v;
+}
+
+inline const Vec operator*(const double c, const Vec& a) { return a * c; }
//
// data format for each sample for training/testing
@@ -309,6 +383,7 @@ class ME_Model {
static double FunctionGradientWrapper(const std::vector<double>& x,
std::vector<double>& grad);
};
+} // namespace maxent
#endif
diff --git a/utils/murmur_hash3.h b/utils/murmur_hash3.h
index a125d775..e8a8b10b 100644
--- a/utils/murmur_hash3.h
+++ b/utils/murmur_hash3.h
@@ -2,8 +2,8 @@
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
-#ifndef _MURMURHASH3_H_
-#define _MURMURHASH3_H_
+#ifndef MURMURHASH3_H_
+#define MURMURHASH3_H_
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
diff --git a/utils/owlqn.cpp b/utils/owlqn.cpp
deleted file mode 100644
index c3a0f0da..00000000
--- a/utils/owlqn.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-#include <vector>
-#include <iostream>
-#include <cmath>
-#include <stdio.h>
-#include "mathvec.h"
-#include "lbfgs.h"
-#include "maxent.h"
-
-using namespace std;
-
-const static int M = LBFGS_M;
-const static double LINE_SEARCH_ALPHA = 0.1;
-const static double LINE_SEARCH_BETA = 0.5;
-
-// stopping criteria
-int OWLQN_MAX_ITER = 300;
-const static double MIN_GRAD_NORM = 0.0001;
-
-Vec approximate_Hg(const int iter, const Vec& grad, const Vec s[],
- const Vec y[], const double z[]);
-
-inline int sign(double x) {
- if (x > 0) return 1;
- if (x < 0) return -1;
- return 0;
-};
-
-static Vec pseudo_gradient(const Vec& x, const Vec& grad0, const double C) {
- Vec grad = grad0;
- for (size_t i = 0; i < x.Size(); i++) {
- if (x[i] != 0) {
- grad[i] += C * sign(x[i]);
- continue;
- }
- const double gm = grad0[i] - C;
- if (gm > 0) {
- grad[i] = gm;
- continue;
- }
- const double gp = grad0[i] + C;
- if (gp < 0) {
- grad[i] = gp;
- continue;
- }
- grad[i] = 0;
- }
-
- return grad;
-}
-
-double ME_Model::regularized_func_grad(const double C, const Vec& x,
- Vec& grad) {
- double f = FunctionGradient(x.STLVec(), grad.STLVec());
- for (size_t i = 0; i < x.Size(); i++) {
- f += C * fabs(x[i]);
- }
-
- return f;
-}
-
-double ME_Model::constrained_line_search(double C, const Vec& x0,
- const Vec& grad0, const double f0,
- const Vec& dx, Vec& x, Vec& grad1) {
- // compute the orthant to explore
- Vec orthant = x0;
- for (size_t i = 0; i < orthant.Size(); i++) {
- if (orthant[i] == 0) orthant[i] = -grad0[i];
- }
-
- double t = 1.0 / LINE_SEARCH_BETA;
-
- double f;
- do {
- t *= LINE_SEARCH_BETA;
- x = x0 + t * dx;
- x.Project(orthant);
- // for (size_t i = 0; i < x.Size(); i++) {
- // if (x0[i] != 0 && sign(x[i]) != sign(x0[i])) x[i] = 0;
- // }
-
- f = regularized_func_grad(C, x, grad1);
- // cout << "*";
- } while (f > f0 + LINE_SEARCH_ALPHA * dot_product(x - x0, grad0));
-
- return f;
-}
-
-vector<double> ME_Model::perform_OWLQN(const vector<double>& x0,
- const double C) {
- const size_t dim = x0.size();
- Vec x = x0;
-
- Vec grad(dim), dx(dim);
- double f = regularized_func_grad(C, x, grad);
-
- Vec s[M], y[M];
- double z[M]; // rho
-
- for (int iter = 0; iter < OWLQN_MAX_ITER; iter++) {
- Vec pg = pseudo_gradient(x, grad, C);
-
- fprintf(stderr, "%3d obj(err) = %f (%6.4f)", iter + 1, -f, _train_error);
- if (_nheldout > 0) {
- const double heldout_logl = heldout_likelihood();
- fprintf(stderr, " heldout_logl(err) = %f (%6.4f)", heldout_logl,
- _heldout_error);
- }
- fprintf(stderr, "\n");
-
- if (sqrt(dot_product(pg, pg)) < MIN_GRAD_NORM) break;
-
- dx = -1 * approximate_Hg(iter, pg, s, y, z);
- if (dot_product(dx, pg) >= 0) dx.Project(-1 * pg);
-
- Vec x1(dim), grad1(dim);
- f = constrained_line_search(C, x, pg, f, dx, x1, grad1);
-
- s[iter % M] = x1 - x;
- y[iter % M] = grad1 - grad;
- z[iter % M] = 1.0 / dot_product(y[iter % M], s[iter % M]);
-
- x = x1;
- grad = grad1;
- }
-
- return x.STLVec();
-}
diff --git a/utils/perfect_hash.h b/utils/perfect_hash.h
index 29ea48a9..8c12c9f0 100644
--- a/utils/perfect_hash.h
+++ b/utils/perfect_hash.h
@@ -1,5 +1,5 @@
-#ifndef _PERFECT_HASH_MAP_H_
-#define _PERFECT_HASH_MAP_H_
+#ifndef PERFECT_HASH_MAP_H_
+#define PERFECT_HASH_MAP_H_
#include <vector>
#include <boost/utility.hpp>
diff --git a/utils/prob.h b/utils/prob.h
index bc297870..32ba9a86 100644
--- a/utils/prob.h
+++ b/utils/prob.h
@@ -1,5 +1,5 @@
-#ifndef _PROB_H_
-#define _PROB_H_
+#ifndef PROB_H_
+#define PROB_H_
#include "logval.h"
diff --git a/utils/sgd.cpp b/utils/sgd.cpp
deleted file mode 100644
index 8613edca..00000000
--- a/utils/sgd.cpp
+++ /dev/null
@@ -1,193 +0,0 @@
-#include "maxent.h"
-#include <cmath>
-#include <stdio.h>
-
-using namespace std;
-
-// const double SGD_ETA0 = 1;
-// const double SGD_ITER = 30;
-// const double SGD_ALPHA = 0.85;
-
-//#define FOLOS_NAIVE
-//#define FOLOS_LAZY
-#define SGD_CP
-
-inline void apply_l1_penalty(const int i, const double u, vector<double>& _vl,
- vector<double>& q) {
- double& w = _vl[i];
- const double z = w;
- double& qi = q[i];
- if (w > 0) {
- w = max(0.0, w - (u + qi));
- } else if (w < 0) {
- w = min(0.0, w + (u - qi));
- }
- qi += w - z;
-}
-
-static double l1norm(const vector<double>& v) {
- double sum = 0;
- for (size_t i = 0; i < v.size(); i++) sum += abs(v[i]);
- return sum;
-}
-
-inline void update_folos_lazy(const int iter_sample, const int k,
- vector<double>& _vl,
- const vector<double>& sum_eta,
- vector<int>& last_updated) {
- const double penalty = sum_eta[iter_sample] - sum_eta[last_updated[k]];
- double& x = _vl[k];
- if (x > 0)
- x = max(0.0, x - penalty);
- else
- x = min(0.0, x + penalty);
- last_updated[k] = iter_sample;
-}
-
-int ME_Model::perform_SGD() {
- if (_l2reg > 0) {
- cerr << "error: L2 regularization is currently not supported in SGD mode."
- << endl;
- exit(1);
- }
-
- cerr << "performing SGD" << endl;
-
- const double l1param = _l1reg;
-
- const int d = _fb.Size();
-
- vector<int> ri(_vs.size());
- for (size_t i = 0; i < ri.size(); i++) ri[i] = i;
-
- vector<double> grad(d);
- int iter_sample = 0;
- const double eta0 = SGD_ETA0;
-
- // cerr << "l1param = " << l1param << endl;
- cerr << "eta0 = " << eta0 << " alpha = " << SGD_ALPHA << endl;
-
- double u = 0;
- vector<double> q(d, 0);
- vector<int> last_updated(d, 0);
- vector<double> sum_eta;
- sum_eta.push_back(0);
-
- for (int iter = 0; iter < SGD_ITER; iter++) {
-
- random_shuffle(ri.begin(), ri.end());
-
- double logl = 0;
- int ncorrect = 0, ntotal = 0;
- for (size_t i = 0; i < _vs.size(); i++, ntotal++, iter_sample++) {
- const Sample& s = _vs[ri[i]];
-
-#ifdef FOLOS_LAZY
- for (vector<int>::const_iterator j = s.positive_features.begin();
- j != s.positive_features.end(); j++) {
- for (vector<int>::const_iterator k = _feature2mef[*j].begin();
- k != _feature2mef[*j].end(); k++) {
- update_folos_lazy(iter_sample, *k, _vl, sum_eta, last_updated);
- }
- }
-#endif
-
- vector<double> membp(_num_classes);
- const int max_label = conditional_probability(s, membp);
-
- const double eta =
- eta0 * pow(SGD_ALPHA,
- (double)iter_sample / _vs.size()); // exponential decay
- // const double eta = eta0 / (1.0 + (double)iter_sample /
- // _vs.size());
-
- // if (iter_sample % _vs.size() == 0) cerr << "eta = " << eta <<
- // endl;
- u += eta * l1param;
-
- sum_eta.push_back(sum_eta.back() + eta * l1param);
-
- logl += log(membp[s.label]);
- if (max_label == s.label) ncorrect++;
-
- // binary features
- for (vector<int>::const_iterator j = s.positive_features.begin();
- j != s.positive_features.end(); j++) {
- for (vector<int>::const_iterator k = _feature2mef[*j].begin();
- k != _feature2mef[*j].end(); k++) {
- const double me = membp[_fb.Feature(*k).label()];
- const double ee = (_fb.Feature(*k).label() == s.label ? 1.0 : 0);
- const double grad = (me - ee);
- _vl[*k] -= eta * grad;
-#ifdef SGD_CP
- apply_l1_penalty(*k, u, _vl, q);
-#endif
- }
- }
- // real-valued features
- for (vector<pair<int, double> >::const_iterator j = s.rvfeatures.begin();
- j != s.rvfeatures.end(); j++) {
- for (vector<int>::const_iterator k = _feature2mef[j->first].begin();
- k != _feature2mef[j->first].end(); k++) {
- const double me = membp[_fb.Feature(*k).label()];
- const double ee = (_fb.Feature(*k).label() == s.label ? 1.0 : 0);
- const double grad = (me - ee) * j->second;
- _vl[*k] -= eta * grad;
-#ifdef SGD_CP
- apply_l1_penalty(*k, u, _vl, q);
-#endif
- }
- }
-
-#ifdef FOLOS_NAIVE
- for (size_t j = 0; j < d; j++) {
- double& x = _vl[j];
- if (x > 0)
- x = max(0.0, x - eta * l1param);
- else
- x = min(0.0, x + eta * l1param);
- }
-#endif
- }
- logl /= _vs.size();
-// fprintf(stderr, "%4d logl = %8.3f acc = %6.4f ", iter, logl,
-// (double)ncorrect / ntotal);
-
-#ifdef FOLOS_LAZY
- if (l1param > 0) {
- for (size_t j = 0; j < d; j++)
- update_folos_lazy(iter_sample, j, _vl, sum_eta, last_updated);
- }
-#endif
-
- double f = logl;
- if (l1param > 0) {
- const double l1 =
- l1norm(_vl); // this is not accurate when lazy update is used
- // cerr << "f0 = " << update_model_expectation() - l1param * l1 << "
- // ";
- f -= l1param * l1;
- int nonzero = 0;
- for (int j = 0; j < d; j++)
- if (_vl[j] != 0) nonzero++;
- // cerr << " f = " << f << " l1 = " << l1 << " nonzero_features = "
- // << nonzero << endl;
- }
- // fprintf(stderr, "%4d obj = %7.3f acc = %6.4f", iter+1, f,
- // (double)ncorrect/ntotal);
- // fprintf(stderr, "%4d obj = %f", iter+1, f);
- fprintf(stderr, "%3d obj(err) = %f (%6.4f)", iter + 1, f,
- 1 - (double)ncorrect / ntotal);
-
- if (_nheldout > 0) {
- double heldout_logl = heldout_likelihood();
- // fprintf(stderr, " heldout_logl = %f acc = %6.4f\n",
- // heldout_logl, 1 - _heldout_error);
- fprintf(stderr, " heldout_logl(err) = %f (%6.4f)", heldout_logl,
- _heldout_error);
- }
- fprintf(stderr, "\n");
- }
-
- return 0;
-}
diff --git a/utils/small_vector.h b/utils/small_vector.h
index 280ab72c..f16bc898 100644
--- a/utils/small_vector.h
+++ b/utils/small_vector.h
@@ -1,5 +1,5 @@
-#ifndef _SMALL_VECTOR_H_
-#define _SMALL_VECTOR_H_
+#ifndef SMALL_VECTOR_H_
+#define SMALL_VECTOR_H_
/* REQUIRES that T is POD (can be memcpy). won't work (yet) due to union with SMALL_VECTOR_POD==0 - may be possible to handle movable types that have ctor/dtor, by using explicit allocation, ctor/dtor calls. but for now JUST USE THIS FOR no-meaningful ctor/dtor POD types.
@@ -15,6 +15,7 @@
#include <new>
#include <stdint.h>
#include <boost/functional/hash.hpp>
+#include <boost/serialization/map.hpp>
//sizeof(T)/sizeof(T*)>1?sizeof(T)/sizeof(T*):1
@@ -297,6 +298,21 @@ public:
return hash_range(data_.ptr,data_.ptr+size_);
}
+ template<class Archive>
+ void save(Archive & ar, const unsigned int) const {
+ ar & size_;
+ for (unsigned i = 0; i < size_; ++i)
+ ar & (*this)[i];
+ }
+ template<class Archive>
+ void load(Archive & ar, const unsigned int) {
+ uint16_t s;
+ ar & s;
+ this->resize(s);
+ for (unsigned i = 0; i < size_; ++i)
+ ar & (*this)[i];
+ }
+ BOOST_SERIALIZATION_SPLIT_MEMBER()
private:
union StorageType {
T vals[SV_MAX];
diff --git a/utils/small_vector_test.cc b/utils/small_vector_test.cc
index a4eb89ae..9e1a148d 100644
--- a/utils/small_vector_test.cc
+++ b/utils/small_vector_test.cc
@@ -3,6 +3,10 @@
#define BOOST_TEST_MODULE svTest
#include <boost/test/unit_test.hpp>
#include <boost/test/floating_point_comparison.hpp>
+#include <boost/archive/text_oarchive.hpp>
+#include <boost/archive/text_iarchive.hpp>
+#include <string>
+#include <sstream>
#include <iostream>
#include <vector>
@@ -128,3 +132,29 @@ BOOST_AUTO_TEST_CASE(Small) {
cerr << sizeof(SmallVectorInt) << endl;
cerr << sizeof(vector<int>) << endl;
}
+
+BOOST_AUTO_TEST_CASE(Serialize) {
+ std::string in;
+ {
+ SmallVectorInt v;
+ v.push_back(0);
+ v.push_back(1);
+ v.push_back(-2);
+ ostringstream os;
+ boost::archive::text_oarchive oa(os);
+ oa << v;
+ in = os.str();
+ cerr << in;
+ }
+ {
+ istringstream is(in);
+ boost::archive::text_iarchive ia(is);
+ SmallVectorInt v;
+ ia >> v;
+ BOOST_CHECK_EQUAL(v.size(), 3);
+ BOOST_CHECK_EQUAL(v[0], 0);
+ BOOST_CHECK_EQUAL(v[1], 1);
+ BOOST_CHECK_EQUAL(v[2], -2);
+ }
+}
+
diff --git a/utils/sparse_vector.h b/utils/sparse_vector.h
index 049151f7..13601376 100644
--- a/utils/sparse_vector.h
+++ b/utils/sparse_vector.h
@@ -1,5 +1,5 @@
-#ifndef _SPARSE_VECTOR_H_
-#define _SPARSE_VECTOR_H_
+#ifndef SPARSE_VECTOR_H_
+#define SPARSE_VECTOR_H_
#include "fast_sparse_vector.h"
#define SparseVector FastSparseVector
diff --git a/utils/star.h b/utils/star.h
index 21977dc9..01433d12 100644
--- a/utils/star.h
+++ b/utils/star.h
@@ -1,5 +1,5 @@
-#ifndef _STAR_H_
-#define _STAR_H_
+#ifndef STAR_H_
+#define STAR_H_
// star(x) computes the infinite sum x^0 + x^1 + x^2 + ...
diff --git a/utils/sv_test.cc b/utils/sv_test.cc
index 67df8c57..b006e66d 100644
--- a/utils/sv_test.cc
+++ b/utils/sv_test.cc
@@ -1,7 +1,12 @@
#define BOOST_TEST_MODULE WeightsTest
#include <boost/test/unit_test.hpp>
#include <boost/test/floating_point_comparison.hpp>
+#include <boost/archive/text_oarchive.hpp>
+#include <boost/archive/text_iarchive.hpp>
+#include <sstream>
+#include <string>
#include "sparse_vector.h"
+#include "fdict.h"
using namespace std;
@@ -33,3 +38,29 @@ BOOST_AUTO_TEST_CASE(Division) {
x /= -1;
BOOST_CHECK(x == y);
}
+
+BOOST_AUTO_TEST_CASE(Serialization) {
+ string arc;
+ FD::dict_.clear();
+ {
+ SparseVector<double> x;
+ x.set_value(FD::Convert("Feature1"), 1.0);
+ x.set_value(FD::Convert("Pi"), 3.14);
+ ostringstream os;
+ boost::archive::text_oarchive oa(os);
+ oa << x;
+ arc = os.str();
+ }
+ FD::dict_.clear();
+ FD::Convert("SomeNewString");
+ {
+ SparseVector<double> x;
+ istringstream is(arc);
+ boost::archive::text_iarchive ia(is);
+ ia >> x;
+ cerr << x << endl;
+ BOOST_CHECK_CLOSE(x.get(FD::Convert("Pi")), 3.14, 1e-9);
+ BOOST_CHECK_CLOSE(x.get(FD::Convert("Feature1")), 1.0, 1e-9);
+ }
+}
+
diff --git a/utils/tdict.h b/utils/tdict.h
index bb19ecd5..eed33c3a 100644
--- a/utils/tdict.h
+++ b/utils/tdict.h
@@ -1,5 +1,5 @@
-#ifndef _TDICT_H_
-#define _TDICT_H_
+#ifndef TDICT_H_
+#define TDICT_H_
#include <string>
#include <vector>
diff --git a/utils/timing_stats.h b/utils/timing_stats.h
index 0a9f7656..69a1cf4b 100644
--- a/utils/timing_stats.h
+++ b/utils/timing_stats.h
@@ -1,5 +1,5 @@
-#ifndef _TIMING_STATS_H_
-#define _TIMING_STATS_H_
+#ifndef TIMING_STATS_H_
+#define TIMING_STATS_H_
#include <string>
#include <map>
diff --git a/utils/verbose.h b/utils/verbose.h
index 73476383..e39e23cb 100644
--- a/utils/verbose.h
+++ b/utils/verbose.h
@@ -1,5 +1,5 @@
-#ifndef _VERBOSE_H_
-#define _VERBOSE_H_
+#ifndef VERBOSE_H_
+#define VERBOSE_H_
extern bool SILENT;
diff --git a/utils/weights.h b/utils/weights.h
index 920fdd75..0bd4c2d9 100644
--- a/utils/weights.h
+++ b/utils/weights.h
@@ -1,5 +1,5 @@
-#ifndef _WEIGHTS_H_
-#define _WEIGHTS_H_
+#ifndef WEIGHTS_H_
+#define WEIGHTS_H_
#include <string>
#include <vector>
diff --git a/utils/wordid.h b/utils/wordid.h
index 714dcd0b..3aa6cc23 100644
--- a/utils/wordid.h
+++ b/utils/wordid.h
@@ -1,5 +1,5 @@
-#ifndef _WORD_ID_H_
-#define _WORD_ID_H_
+#ifndef WORD_ID_H_
+#define WORD_ID_H_
#include <limits>