major restructure of the training code

author: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> 2012-11-18 13:35:42 -0500
committer: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> 2012-11-18 13:35:42 -0500
commit: 8aa29810bb77611cc20b7a384897ff6703783ea1 (patch)
tree: 8635daa8fffb3f2cd90e30b41e27f4f9e0909447 /pro/mr_pro_reduce.cc
parent: fbdacabc85bea65d735f2cb7f92b98e08ce72d04 (diff)
1 files changed, 0 insertions, 286 deletions
diff --git a/pro/mr_pro_reduce.cc b/pro/mr_pro_reduce.cc
deleted file mode 100644
index 5ef9b470..00000000
--- a/pro/mr_pro_reduce.cc
+++ /dev/null
@@ -1,286 +0,0 @@
-#include <cstdlib>
-#include <sstream>
-#include <iostream>
-#include <fstream>
-#include <vector>
-
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "filelib.h"
-#include "weights.h"
-#include "sparse_vector.h"
-#include "optimize.h"
-#include "liblbfgs/lbfgs++.h"
-
-using namespace std;
-namespace po = boost::program_options;
-
-// since this is a ranking model, there should be equal numbers of
-// positive and negative examples, so the bias should be 0
-static const double MAX_BIAS = 1e-10;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("weights,w", po::value<string>(), "Weights from previous iteration (used as initialization and interpolation")
-        ("regularization_strength,C",po::value<double>()->default_value(500.0), "l2 regularization strength")
-        ("l1",po::value<double>()->default_value(0.0), "l1 regularization strength")
-        ("regularize_to_weights,y",po::value<double>()->default_value(5000.0), "Differences in learned weights to previous weights are penalized with an l2 penalty with this strength; 0.0 = no effect")
-        ("memory_buffers,m",po::value<unsigned>()->default_value(100), "Number of memory buffers (LBFGS)")
-        ("min_reg,r",po::value<double>()->default_value(0.01), "When tuning (-T) regularization strength, minimum regularization strenght")
-        ("max_reg,R",po::value<double>()->default_value(1e6), "When tuning (-T) regularization strength, maximum regularization strenght")
-        ("testset,t",po::value<string>(), "Optional held-out test set")
-        ("tune_regularizer,T", "Use the held out test set (-t) to tune the regularization strength")
-        ("interpolate_with_weights,p",po::value<double>()->default_value(1.0), "[deprecated] Output weights are p*w + (1-p)*w_prev; 1.0 = no effect")
-        ("help,h", "Help");
-  po::options_description dcmdline_options;
-  dcmdline_options.add(opts);
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("help")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-void ParseSparseVector(string& line, size_t cur, SparseVector<weight_t>* out) {
-  SparseVector<weight_t>& x = *out;
-  size_t last_start = cur;
-  size_t last_comma = string::npos;
-  while(cur <= line.size()) {
-    if (line[cur] == ' ' || cur == line.size()) {
-      if (!(cur > last_start && last_comma != string::npos && cur > last_comma)) {
-        cerr << "[ERROR] " << line << endl << "  position = " << cur << endl;
-        exit(1);
-      }
-      const int fid = FD::Convert(line.substr(last_start, last_comma - last_start));
-      if (cur < line.size()) line[cur] = 0;
-      const weight_t val = strtod(&line[last_comma + 1], NULL);
-      x.set_value(fid, val);
-
-      last_comma = string::npos;
-      last_start = cur+1;
-    } else {
-      if (line[cur] == '=')
-        last_comma = cur;
-    }
-    ++cur;
-  }
-}
-
-void ReadCorpus(istream* pin, vector<pair<bool, SparseVector<weight_t> > >* corpus) {
-  istream& in = *pin;
-  corpus->clear();
-  bool flag = false;
-  int lc = 0;
-  string line;
-  SparseVector<weight_t> x;
-  while(getline(in, line)) {
-    ++lc;
-    if (lc % 1000 == 0) { cerr << '.'; flag = true; }
-    if (lc % 40000 == 0) { cerr << " [" << lc << "]\n"; flag = false; }
-    if (line.empty()) continue;
-    const size_t ks = line.find("\t");
-    assert(string::npos != ks);
-    assert(ks == 1);
-    const bool y = line[0] == '1';
-    x.clear();
-    ParseSparseVector(line, ks + 1, &x);
-    corpus->push_back(make_pair(y, x));
-  }
-  if (flag) cerr << endl;
-}
-
-void GradAdd(const SparseVector<weight_t>& v, const double scale, weight_t* acc) {
-  for (SparseVector<weight_t>::const_iterator it = v.begin();
-       it != v.end(); ++it) {
-    acc[it->first] += it->second * scale;
-  }
-}
-
-double ApplyRegularizationTerms(const double C,
-                                const double T,
-                                const vector<weight_t>& weights,
-                                const vector<weight_t>& prev_weights,
-                                weight_t* g) {
-  double reg = 0;
-  for (size_t i = 0; i < weights.size(); ++i) {
-    const double prev_w_i = (i < prev_weights.size() ? prev_weights[i] : 0.0);
-    const double& w_i = weights[i];
-    reg += C * w_i * w_i;
-    g[i] += 2 * C * w_i;
-
-    const double diff_i = w_i - prev_w_i;
-    reg += T * diff_i * diff_i;
-    g[i] += 2 * T * diff_i;
-  }
-  return reg;
-}
-
-double TrainingInference(const vector<weight_t>& x,
-                         const vector<pair<bool, SparseVector<weight_t> > >& corpus,
-                         weight_t* g = NULL) {
-  double cll = 0;
-  for (int i = 0; i < corpus.size(); ++i) {
-    const double dotprod = corpus[i].second.dot(x) + (x.size() ? x[0] : weight_t()); // x[0] is bias
-    double lp_false = dotprod;
-    double lp_true = -dotprod;
-    if (0 < lp_true) {
-      lp_true += log1p(exp(-lp_true));
-      lp_false = log1p(exp(lp_false));
-    } else {
-      lp_true = log1p(exp(lp_true));
-      lp_false += log1p(exp(-lp_false));
-    }
-    lp_true*=-1;
-    lp_false*=-1;
-    if (corpus[i].first) {  // true label
-      cll -= lp_true;
-      if (g) {
-        // g -= corpus[i].second * exp(lp_false);
-        GradAdd(corpus[i].second, -exp(lp_false), g);
-        g[0] -= exp(lp_false); // bias
-      }
-    } else {                  // false label
-      cll -= lp_false;
-      if (g) {
-        // g += corpus[i].second * exp(lp_true);
-        GradAdd(corpus[i].second, exp(lp_true), g);
-        g[0] += exp(lp_true); // bias
-      }
-    }
-  }
-  return cll;
-}
-
-struct ProLoss {
-  ProLoss(const vector<pair<bool, SparseVector<weight_t> > >& tr,
-          const vector<pair<bool, SparseVector<weight_t> > >& te,
-          const double c,
-          const double t,
-          const vector<weight_t>& px) : training(tr), testing(te), C(c), T(t), prev_x(px){}
-  double operator()(const vector<double>& x, double* g) const {
-    fill(g, g + x.size(), 0.0);
-    double cll = TrainingInference(x, training, g);
-    tppl = 0;
-    if (testing.size())
-      tppl = pow(2.0, TrainingInference(x, testing, g) / (log(2) * testing.size()));
-    double ppl = cll / log(2);
-    ppl /= training.size();
-    ppl = pow(2.0, ppl);
-    double reg = ApplyRegularizationTerms(C, T, x, prev_x, g);
-    return cll + reg;
-  }
-  const vector<pair<bool, SparseVector<weight_t> > >& training, testing;
-  const double C, T;
-  const vector<double>& prev_x;
-  mutable double tppl;
-};
-
-// return held-out log likelihood
-double LearnParameters(const vector<pair<bool, SparseVector<weight_t> > >& training,
-                       const vector<pair<bool, SparseVector<weight_t> > >& testing,
-                       const double C,
-                       const double C1,
-                       const double T,
-                       const unsigned memory_buffers,
-                       const vector<weight_t>& prev_x,
-                       vector<weight_t>* px) {
-  assert(px->size() == prev_x.size());
-  ProLoss loss(training, testing, C, T, prev_x);
-  LBFGS<ProLoss> lbfgs(px, loss, memory_buffers, C1);
-  lbfgs.MinimizeFunction();
-  return loss.tppl;
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-  string line;
-  vector<pair<bool, SparseVector<weight_t> > > training, testing;
-  const bool tune_regularizer = conf.count("tune_regularizer");
-  if (tune_regularizer && !conf.count("testset")) {
-    cerr << "--tune_regularizer requires --testset to be set\n";
-    return 1;
-  }
-  const double min_reg = conf["min_reg"].as<double>();
-  const double max_reg = conf["max_reg"].as<double>();
-  double C = conf["regularization_strength"].as<double>(); // will be overridden if parameter is tuned
-  double C1 = conf["l1"].as<double>(); // will be overridden if parameter is tuned
-  const double T = conf["regularize_to_weights"].as<double>();
-  assert(C >= 0.0);
-  assert(min_reg >= 0.0);
-  assert(max_reg >= 0.0);
-  assert(max_reg > min_reg);
-  const double psi = conf["interpolate_with_weights"].as<double>();
-  if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; return 1; }
-  ReadCorpus(&cin, &training);
-  if (conf.count("testset")) {
-    ReadFile rf(conf["testset"].as<string>());
-    ReadCorpus(rf.stream(), &testing);
-  }
-  cerr << "Number of features: " << FD::NumFeats() << endl;
-
-  vector<weight_t> x, prev_x;  // x[0] is bias
-  if (conf.count("weights")) {
-    Weights::InitFromFile(conf["weights"].as<string>(), &x);
-    x.resize(FD::NumFeats());
-    prev_x = x;
-  } else {
-    x.resize(FD::NumFeats());
-    prev_x = x;
-  }
-  cerr << "         Number of features: " << x.size() << endl;
-  cerr << "Number of training examples: " << training.size() << endl;
-  cerr << "Number of  testing examples: " << testing.size() << endl;
-  double tppl = 0.0;
-  vector<pair<double,double> > sp;
-  vector<double> smoothed;
-  if (tune_regularizer) {
-    C = min_reg;
-    const double steps = 18;
-    double sweep_factor = exp((log(max_reg) - log(min_reg)) / steps);
-    cerr << "SWEEP FACTOR: " << sweep_factor << endl;
-    while(C < max_reg) {
-      cerr << "C=" << C << "\tT=" <<T << endl;
-      tppl = LearnParameters(training, testing, C, C1, T, conf["memory_buffers"].as<unsigned>(), prev_x, &x);
-      sp.push_back(make_pair(C, tppl));
-      C *= sweep_factor;
-    }
-    smoothed.resize(sp.size(), 0);
-    smoothed[0] = sp[0].second;
-    smoothed.back() = sp.back().second; 
-    for (int i = 1; i < sp.size()-1; ++i) {
-      double prev = sp[i-1].second;
-      double next = sp[i+1].second;
-      double cur = sp[i].second;
-      smoothed[i] = (prev*0.2) + cur * 0.6 + (0.2*next);
-    }
-    double best_ppl = 9999999;
-    unsigned best_i = 0;
-    for (unsigned i = 0; i < sp.size(); ++i) {
-      if (smoothed[i] < best_ppl) {
-        best_ppl = smoothed[i];
-        best_i = i;
-      }
-    }
-    C = sp[best_i].first;
-  }  // tune regularizer
-  tppl = LearnParameters(training, testing, C, C1, T, conf["memory_buffers"].as<unsigned>(), prev_x, &x);
-  if (conf.count("weights")) {
-    for (int i = 1; i < x.size(); ++i) {
-      x[i] = (x[i] * psi) + prev_x[i] * (1.0 - psi);
-    }
-  }
-  cout.precision(15);
-  cout << "# C=" << C << "\theld out perplexity=";
-  if (tppl) { cout << tppl << endl; } else { cout << "N/A\n"; }
-  if (sp.size()) {
-    cout << "# Parameter sweep:\n";
-    for (int i = 0; i < sp.size(); ++i) {
-      cout << "# " << sp[i].first << "\t" << sp[i].second << "\t" << smoothed[i] << endl;
-    }
-  }
-  Weights::WriteToFile("-", x);
-  return 0;
-}
author	Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>	2012-11-18 13:35:42 -0500
committer	Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>	2012-11-18 13:35:42 -0500
commit	8aa29810bb77611cc20b7a384897ff6703783ea1 (patch)
tree	8635daa8fffb3f2cd90e30b41e27f4f9e0909447 /pro/mr_pro_reduce.cc
parent	fbdacabc85bea65d735f2cb7f92b98e08ce72d04 (diff)