1 files changed, 125 insertions, 42 deletions
diff --git a/pro-train/mr_pro_reduce.cc b/pro-train/mr_pro_reduce.cc
index 3df52020..2b9c5ce7 100644
--- a/pro-train/mr_pro_reduce.cc
+++ b/pro-train/mr_pro_reduce.cc
@@ -1,3 +1,4 @@
+#include <cstdlib>
 #include <sstream>
 #include <iostream>
 #include <fstream>
@@ -6,24 +7,29 @@
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 
+#include "weights.h"
 #include "sparse_vector.h"
-#include "error_surface.h"
-#include "line_optimizer.h"
-#include "b64tools.h"
+#include "optimize.h"
 
 using namespace std;
 namespace po = boost::program_options;
 
+// since this is a ranking model, there should be equal numbers of
+// positive and negative examples so the bias should be 0
+static const double MAX_BIAS = 1e-10;
+
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
-        ("loss_function,l",po::value<string>(), "Loss function being optimized")
+        ("weights,w", po::value<string>(), "Weights from previous iteration (used as initialization and interpolation")
+        ("interpolation,p",po::value<double>()->default_value(0.9), "Output weights are p*w + (1-p)*w_prev")
+        ("memory_buffers,m",po::value<unsigned>()->default_value(200), "Number of memory buffers (LBFGS)")
+        ("sigma_squared,s",po::value<double>()->default_value(0.5), "Sigma squared for Gaussian prior")
         ("help,h", "Help");
   po::options_description dcmdline_options;
   dcmdline_options.add(opts);
   po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  bool flag = conf->count("loss_function") == 0;
-  if (flag || conf->count("help")) {
+  if (conf->count("help")) {
     cerr << dcmdline_options << endl;
     exit(1);
   }
@@ -32,50 +38,127 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
 int main(int argc, char** argv) {
   po::variables_map conf;
   InitCommandLine(argc, argv, &conf);
-  const string loss_function = conf["loss_function"].as<string>();
-  ScoreType type = ScoreTypeFromString(loss_function);
-  LineOptimizer::ScoreType opt_type = LineOptimizer::MAXIMIZE_SCORE;
-  if (type == TER || type == AER) {
-    opt_type = LineOptimizer::MINIMIZE_SCORE;
+  string line;
+  vector<pair<bool, SparseVector<double> > > training;
+  int lc = 0;
+  bool flag = false;
+  SparseVector<double> old_weights;
+  const double psi = conf["interpolation"].as<double>();
+  if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; }
+  if (conf.count("weights")) {
+    Weights w;
+    w.InitFromFile(conf["weights"].as<string>());
+    w.InitSparseVector(&old_weights);
   }
-  string last_key;
-  vector<ErrorSurface> esv;
-  while(cin) {
-    string line;
-    getline(cin, line);
+  while(getline(cin, line)) {
+    ++lc;
+    if (lc % 1000 == 0) { cerr << '.'; flag = true; }
+    if (lc % 40000 == 0) { cerr << " [" << lc << "]\n"; flag = false; }
     if (line.empty()) continue;
-    size_t ks = line.find("\t");
+    const size_t ks = line.find("\t");
     assert(string::npos != ks);
-    assert(ks > 2);
-    string key = line.substr(2, ks - 2);
-    string val = line.substr(ks + 1);
-    if (key != last_key) {
-      if (!last_key.empty()) {
-	float score;
-        double x = LineOptimizer::LineOptimize(esv, opt_type, &score);
-	cout << last_key << "|" << x << "|" << score << endl;
+    assert(ks == 1);
+    const bool y = line[0] == '1';
+    SparseVector<double> x;
+    size_t last_start = ks + 1;
+    size_t last_comma = string::npos;
+    size_t cur = last_start;
+    while(cur <= line.size()) {
+      if (line[cur] == ' ' || cur == line.size()) {
+        if (!(cur > last_start && last_comma != string::npos && cur > last_comma)) {
+          cerr << "[ERROR] " << line << endl << "  position = " << cur << endl;
+          exit(1);
+        }
+        const int fid = FD::Convert(line.substr(last_start, last_comma - last_start));
+        if (cur < line.size()) line[cur] = 0;
+        const double val = strtod(&line[last_comma + 1], NULL);
+        x.set_value(fid, val);
+
+        last_comma = string::npos;
+        last_start = cur+1;
+      } else {
+        if (line[cur] == '=')
+          last_comma = cur;
+      }
+      ++cur;
+    }
+    training.push_back(make_pair(y, x));
+  }
+  if (flag) cerr << endl;
+
+  cerr << "Number of features: " << FD::NumFeats() << endl;
+  vector<double> x(FD::NumFeats(), 0.0);  // x[0] is bias
+  for (SparseVector<double>::const_iterator it = old_weights.begin();
+       it != old_weights.end(); ++it)
+    x[it->first] = it->second;
+  vector<double> vg(FD::NumFeats(), 0.0);
+  SparseVector<double> g;
+  bool converged = false;
+  LBFGSOptimizer opt(FD::NumFeats(), conf["memory_buffers"].as<unsigned>());
+  while(!converged) {
+    double cll = 0;
+    double dbias = 0;
+    g.clear();
+    for (int i = 0; i < training.size(); ++i) {
+      const double dotprod = training[i].second.dot(x) + x[0]; // x[0] is bias
+      double lp_false = dotprod;
+      double lp_true = -dotprod;
+      if (0 < lp_true) {
+        lp_true += log1p(exp(-lp_true));
+        lp_false = log1p(exp(lp_false));
+      } else {
+        lp_true = log1p(exp(lp_true));
+        lp_false += log1p(exp(-lp_false));
+      }
+      lp_true*=-1;
+      lp_false*=-1;
+      if (training[i].first) {  // true label
+        cll -= lp_true;
+        g -= training[i].second * exp(lp_false);
+        dbias -= exp(lp_false);
+      } else {                  // false label
+        cll -= lp_false;
+        g += training[i].second * exp(lp_true);
+        dbias += exp(lp_true);
       }
-      last_key = key;
-      esv.clear();
     }
-    if (val.size() % 4 != 0) {
-      cerr << "B64 encoding error 1! Skipping.\n";
-      continue;
+    vg.clear();
+    g.init_vector(&vg);
+    vg[0] = dbias;
+#if 1
+    const double sigsq = conf["sigma_squared"].as<double>();
+    double norm = 0;
+    for (int i = 1; i < x.size(); ++i) {
+      const double mean_i = 0.0;
+      const double param = (x[i] - mean_i);
+      norm += param * param;
+      vg[i] += param / sigsq;
+    } 
+    const double reg = norm / (2.0 * sigsq);
+#else
+    double reg = 0;
+#endif
+    cll += reg;
+    cerr << cll << " (REG=" << reg << ")\t";
+    bool failed = false;
+    try {
+      opt.Optimize(cll, vg, &x);
+    } catch (...) {
+      cerr << "Exception caught, assuming convergence is close enough...\n";
+      failed = true;
     }
-    string encoded(val.size() / 4 * 3, '\0');
-    if (!B64::b64decode(reinterpret_cast<const unsigned char*>(&val[0]), val.size(), &encoded[0], encoded.size())) {
-      cerr << "B64 encoding error 2! Skipping.\n";
-      continue;
+    if (fabs(x[0]) > MAX_BIAS) {
+      cerr << "Biased model learned. Are your training instances wrong?\n";
+      cerr << "  BIAS: " << x[0] << endl;
     }
-    esv.push_back(ErrorSurface());
-    esv.back().Deserialize(type, encoded);
+    converged = failed || opt.HasConverged();
   }
-  if (!esv.empty()) {
-    // cerr << "ESV=" << esv.size() << endl;
-    // for (int i = 0; i < esv.size(); ++i) { cerr << esv[i].size() << endl; }
-    float score;
-    double x = LineOptimizer::LineOptimize(esv, opt_type, &score);
-    cout << last_key << "|" << x << "|" << score << endl;
+  Weights w;
+  if (conf.count("weights")) {
+    for (int i = 1; i < x.size(); ++i)
+      x[i] = (x[i] * psi) + old_weights.get(i) * (1.0 - psi);
   }
+  w.InitFromVector(x);
+  w.WriteToFile("-");
   return 0;
 }