summaryrefslogtreecommitdiff
path: root/pro-train/mr_pro_reduce.cc
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-07-11 20:39:45 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2011-07-11 20:39:45 -0400
commita8a8aeba08d5c0f6841394087bb4ec0b6ade0694 (patch)
tree34e795068efa76fda33965caee0c785455dc2865 /pro-train/mr_pro_reduce.cc
parent7ff60b69cc21c90695ca20829375e6bf9b5f452d (diff)
sort of working hopkins&may optimizer
Diffstat (limited to 'pro-train/mr_pro_reduce.cc')
-rw-r--r--pro-train/mr_pro_reduce.cc167
1 files changed, 125 insertions, 42 deletions
diff --git a/pro-train/mr_pro_reduce.cc b/pro-train/mr_pro_reduce.cc
index 3df52020..2b9c5ce7 100644
--- a/pro-train/mr_pro_reduce.cc
+++ b/pro-train/mr_pro_reduce.cc
@@ -1,3 +1,4 @@
+#include <cstdlib>
#include <sstream>
#include <iostream>
#include <fstream>
@@ -6,24 +7,29 @@
#include <boost/program_options.hpp>
#include <boost/program_options/variables_map.hpp>
+#include "weights.h"
#include "sparse_vector.h"
-#include "error_surface.h"
-#include "line_optimizer.h"
-#include "b64tools.h"
+#include "optimize.h"
using namespace std;
namespace po = boost::program_options;
+// since this is a ranking model, there should be equal numbers of
+// positive and negative examples so the bias should be 0
+static const double MAX_BIAS = 1e-10;
+
void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
po::options_description opts("Configuration options");
opts.add_options()
- ("loss_function,l",po::value<string>(), "Loss function being optimized")
+ ("weights,w", po::value<string>(), "Weights from previous iteration (used as initialization and interpolation")
+ ("interpolation,p",po::value<double>()->default_value(0.9), "Output weights are p*w + (1-p)*w_prev")
+ ("memory_buffers,m",po::value<unsigned>()->default_value(200), "Number of memory buffers (LBFGS)")
+ ("sigma_squared,s",po::value<double>()->default_value(0.5), "Sigma squared for Gaussian prior")
("help,h", "Help");
po::options_description dcmdline_options;
dcmdline_options.add(opts);
po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
- bool flag = conf->count("loss_function") == 0;
- if (flag || conf->count("help")) {
+ if (conf->count("help")) {
cerr << dcmdline_options << endl;
exit(1);
}
@@ -32,50 +38,127 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
int main(int argc, char** argv) {
po::variables_map conf;
InitCommandLine(argc, argv, &conf);
- const string loss_function = conf["loss_function"].as<string>();
- ScoreType type = ScoreTypeFromString(loss_function);
- LineOptimizer::ScoreType opt_type = LineOptimizer::MAXIMIZE_SCORE;
- if (type == TER || type == AER) {
- opt_type = LineOptimizer::MINIMIZE_SCORE;
+ string line;
+ vector<pair<bool, SparseVector<double> > > training;
+ int lc = 0;
+ bool flag = false;
+ SparseVector<double> old_weights;
+ const double psi = conf["interpolation"].as<double>();
+ if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; }
+ if (conf.count("weights")) {
+ Weights w;
+ w.InitFromFile(conf["weights"].as<string>());
+ w.InitSparseVector(&old_weights);
}
- string last_key;
- vector<ErrorSurface> esv;
- while(cin) {
- string line;
- getline(cin, line);
+ while(getline(cin, line)) {
+ ++lc;
+ if (lc % 1000 == 0) { cerr << '.'; flag = true; }
+ if (lc % 40000 == 0) { cerr << " [" << lc << "]\n"; flag = false; }
if (line.empty()) continue;
- size_t ks = line.find("\t");
+ const size_t ks = line.find("\t");
assert(string::npos != ks);
- assert(ks > 2);
- string key = line.substr(2, ks - 2);
- string val = line.substr(ks + 1);
- if (key != last_key) {
- if (!last_key.empty()) {
- float score;
- double x = LineOptimizer::LineOptimize(esv, opt_type, &score);
- cout << last_key << "|" << x << "|" << score << endl;
+ assert(ks == 1);
+ const bool y = line[0] == '1';
+ SparseVector<double> x;
+ size_t last_start = ks + 1;
+ size_t last_comma = string::npos;
+ size_t cur = last_start;
+ while(cur <= line.size()) {
+ if (line[cur] == ' ' || cur == line.size()) {
+ if (!(cur > last_start && last_comma != string::npos && cur > last_comma)) {
+ cerr << "[ERROR] " << line << endl << " position = " << cur << endl;
+ exit(1);
+ }
+ const int fid = FD::Convert(line.substr(last_start, last_comma - last_start));
+ if (cur < line.size()) line[cur] = 0;
+ const double val = strtod(&line[last_comma + 1], NULL);
+ x.set_value(fid, val);
+
+ last_comma = string::npos;
+ last_start = cur+1;
+ } else {
+ if (line[cur] == '=')
+ last_comma = cur;
+ }
+ ++cur;
+ }
+ training.push_back(make_pair(y, x));
+ }
+ if (flag) cerr << endl;
+
+ cerr << "Number of features: " << FD::NumFeats() << endl;
+ vector<double> x(FD::NumFeats(), 0.0); // x[0] is bias
+ for (SparseVector<double>::const_iterator it = old_weights.begin();
+ it != old_weights.end(); ++it)
+ x[it->first] = it->second;
+ vector<double> vg(FD::NumFeats(), 0.0);
+ SparseVector<double> g;
+ bool converged = false;
+ LBFGSOptimizer opt(FD::NumFeats(), conf["memory_buffers"].as<unsigned>());
+ while(!converged) {
+ double cll = 0;
+ double dbias = 0;
+ g.clear();
+ for (int i = 0; i < training.size(); ++i) {
+ const double dotprod = training[i].second.dot(x) + x[0]; // x[0] is bias
+ double lp_false = dotprod;
+ double lp_true = -dotprod;
+ if (0 < lp_true) {
+ lp_true += log1p(exp(-lp_true));
+ lp_false = log1p(exp(lp_false));
+ } else {
+ lp_true = log1p(exp(lp_true));
+ lp_false += log1p(exp(-lp_false));
+ }
+ lp_true*=-1;
+ lp_false*=-1;
+ if (training[i].first) { // true label
+ cll -= lp_true;
+ g -= training[i].second * exp(lp_false);
+ dbias -= exp(lp_false);
+ } else { // false label
+ cll -= lp_false;
+ g += training[i].second * exp(lp_true);
+ dbias += exp(lp_true);
}
- last_key = key;
- esv.clear();
}
- if (val.size() % 4 != 0) {
- cerr << "B64 encoding error 1! Skipping.\n";
- continue;
+ vg.clear();
+ g.init_vector(&vg);
+ vg[0] = dbias;
+#if 1
+ const double sigsq = conf["sigma_squared"].as<double>();
+ double norm = 0;
+ for (int i = 1; i < x.size(); ++i) {
+ const double mean_i = 0.0;
+ const double param = (x[i] - mean_i);
+ norm += param * param;
+ vg[i] += param / sigsq;
+ }
+ const double reg = norm / (2.0 * sigsq);
+#else
+ double reg = 0;
+#endif
+ cll += reg;
+ cerr << cll << " (REG=" << reg << ")\t";
+ bool failed = false;
+ try {
+ opt.Optimize(cll, vg, &x);
+ } catch (...) {
+ cerr << "Exception caught, assuming convergence is close enough...\n";
+ failed = true;
}
- string encoded(val.size() / 4 * 3, '\0');
- if (!B64::b64decode(reinterpret_cast<const unsigned char*>(&val[0]), val.size(), &encoded[0], encoded.size())) {
- cerr << "B64 encoding error 2! Skipping.\n";
- continue;
+ if (fabs(x[0]) > MAX_BIAS) {
+ cerr << "Biased model learned. Are your training instances wrong?\n";
+ cerr << " BIAS: " << x[0] << endl;
}
- esv.push_back(ErrorSurface());
- esv.back().Deserialize(type, encoded);
+ converged = failed || opt.HasConverged();
}
- if (!esv.empty()) {
- // cerr << "ESV=" << esv.size() << endl;
- // for (int i = 0; i < esv.size(); ++i) { cerr << esv[i].size() << endl; }
- float score;
- double x = LineOptimizer::LineOptimize(esv, opt_type, &score);
- cout << last_key << "|" << x << "|" << score << endl;
+ Weights w;
+ if (conf.count("weights")) {
+ for (int i = 1; i < x.size(); ++i)
+ x[i] = (x[i] * psi) + old_weights.get(i) * (1.0 - psi);
}
+ w.InitFromVector(x);
+ w.WriteToFile("-");
return 0;
}