diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2011-07-11 20:39:45 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2011-07-11 20:39:45 -0400 |
commit | a8a8aeba08d5c0f6841394087bb4ec0b6ade0694 (patch) | |
tree | 34e795068efa76fda33965caee0c785455dc2865 /pro-train/mr_pro_reduce.cc | |
parent | 7ff60b69cc21c90695ca20829375e6bf9b5f452d (diff) |
sort of working hopkins&may optimizer
Diffstat (limited to 'pro-train/mr_pro_reduce.cc')
-rw-r--r-- | pro-train/mr_pro_reduce.cc | 167 |
1 files changed, 125 insertions, 42 deletions
diff --git a/pro-train/mr_pro_reduce.cc b/pro-train/mr_pro_reduce.cc index 3df52020..2b9c5ce7 100644 --- a/pro-train/mr_pro_reduce.cc +++ b/pro-train/mr_pro_reduce.cc @@ -1,3 +1,4 @@ +#include <cstdlib> #include <sstream> #include <iostream> #include <fstream> @@ -6,24 +7,29 @@ #include <boost/program_options.hpp> #include <boost/program_options/variables_map.hpp> +#include "weights.h" #include "sparse_vector.h" -#include "error_surface.h" -#include "line_optimizer.h" -#include "b64tools.h" +#include "optimize.h" using namespace std; namespace po = boost::program_options; +// since this is a ranking model, there should be equal numbers of +// positive and negative examples so the bias should be 0 +static const double MAX_BIAS = 1e-10; + void InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() - ("loss_function,l",po::value<string>(), "Loss function being optimized") + ("weights,w", po::value<string>(), "Weights from previous iteration (used as initialization and interpolation") + ("interpolation,p",po::value<double>()->default_value(0.9), "Output weights are p*w + (1-p)*w_prev") + ("memory_buffers,m",po::value<unsigned>()->default_value(200), "Number of memory buffers (LBFGS)") + ("sigma_squared,s",po::value<double>()->default_value(0.5), "Sigma squared for Gaussian prior") ("help,h", "Help"); po::options_description dcmdline_options; dcmdline_options.add(opts); po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - bool flag = conf->count("loss_function") == 0; - if (flag || conf->count("help")) { + if (conf->count("help")) { cerr << dcmdline_options << endl; exit(1); } @@ -32,50 +38,127 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { int main(int argc, char** argv) { po::variables_map conf; InitCommandLine(argc, argv, &conf); - const string loss_function = conf["loss_function"].as<string>(); - ScoreType type = ScoreTypeFromString(loss_function); - LineOptimizer::ScoreType opt_type = LineOptimizer::MAXIMIZE_SCORE; - if (type == TER || type == AER) { - opt_type = LineOptimizer::MINIMIZE_SCORE; + string line; + vector<pair<bool, SparseVector<double> > > training; + int lc = 0; + bool flag = false; + SparseVector<double> old_weights; + const double psi = conf["interpolation"].as<double>(); + if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; } + if (conf.count("weights")) { + Weights w; + w.InitFromFile(conf["weights"].as<string>()); + w.InitSparseVector(&old_weights); } - string last_key; - vector<ErrorSurface> esv; - while(cin) { - string line; - getline(cin, line); + while(getline(cin, line)) { + ++lc; + if (lc % 1000 == 0) { cerr << '.'; flag = true; } + if (lc % 40000 == 0) { cerr << " [" << lc << "]\n"; flag = false; } if (line.empty()) continue; - size_t ks = line.find("\t"); + const size_t ks = line.find("\t"); assert(string::npos != ks); - assert(ks > 2); - string key = line.substr(2, ks - 2); - string val = line.substr(ks + 1); - if (key != last_key) { - if (!last_key.empty()) { - float score; - double x = LineOptimizer::LineOptimize(esv, opt_type, &score); - cout << last_key << "|" << x << "|" << score << endl; + assert(ks == 1); + const bool y = line[0] == '1'; + SparseVector<double> x; + size_t last_start = ks + 1; + size_t last_comma = string::npos; + size_t cur = last_start; + while(cur <= line.size()) { + if (line[cur] == ' ' || cur == line.size()) { + if (!(cur > last_start && last_comma != string::npos && cur > last_comma)) { + cerr << "[ERROR] " << line << endl << " position = " << cur << endl; + exit(1); + } + const int fid = FD::Convert(line.substr(last_start, last_comma - last_start)); + if (cur < line.size()) line[cur] = 0; + const double val = strtod(&line[last_comma + 1], NULL); + x.set_value(fid, val); + + last_comma = string::npos; + last_start = cur+1; + } else { + if (line[cur] == '=') + last_comma = cur; + } + ++cur; + } + training.push_back(make_pair(y, x)); + } + if (flag) cerr << endl; + + cerr << "Number of features: " << FD::NumFeats() << endl; + vector<double> x(FD::NumFeats(), 0.0); // x[0] is bias + for (SparseVector<double>::const_iterator it = old_weights.begin(); + it != old_weights.end(); ++it) + x[it->first] = it->second; + vector<double> vg(FD::NumFeats(), 0.0); + SparseVector<double> g; + bool converged = false; + LBFGSOptimizer opt(FD::NumFeats(), conf["memory_buffers"].as<unsigned>()); + while(!converged) { + double cll = 0; + double dbias = 0; + g.clear(); + for (int i = 0; i < training.size(); ++i) { + const double dotprod = training[i].second.dot(x) + x[0]; // x[0] is bias + double lp_false = dotprod; + double lp_true = -dotprod; + if (0 < lp_true) { + lp_true += log1p(exp(-lp_true)); + lp_false = log1p(exp(lp_false)); + } else { + lp_true = log1p(exp(lp_true)); + lp_false += log1p(exp(-lp_false)); + } + lp_true*=-1; + lp_false*=-1; + if (training[i].first) { // true label + cll -= lp_true; + g -= training[i].second * exp(lp_false); + dbias -= exp(lp_false); + } else { // false label + cll -= lp_false; + g += training[i].second * exp(lp_true); + dbias += exp(lp_true); } - last_key = key; - esv.clear(); } - if (val.size() % 4 != 0) { - cerr << "B64 encoding error 1! Skipping.\n"; - continue; + vg.clear(); + g.init_vector(&vg); + vg[0] = dbias; +#if 1 + const double sigsq = conf["sigma_squared"].as<double>(); + double norm = 0; + for (int i = 1; i < x.size(); ++i) { + const double mean_i = 0.0; + const double param = (x[i] - mean_i); + norm += param * param; + vg[i] += param / sigsq; + } + const double reg = norm / (2.0 * sigsq); +#else + double reg = 0; +#endif + cll += reg; + cerr << cll << " (REG=" << reg << ")\t"; + bool failed = false; + try { + opt.Optimize(cll, vg, &x); + } catch (...) { + cerr << "Exception caught, assuming convergence is close enough...\n"; + failed = true; } - string encoded(val.size() / 4 * 3, '\0'); - if (!B64::b64decode(reinterpret_cast<const unsigned char*>(&val[0]), val.size(), &encoded[0], encoded.size())) { - cerr << "B64 encoding error 2! Skipping.\n"; - continue; + if (fabs(x[0]) > MAX_BIAS) { + cerr << "Biased model learned. Are your training instances wrong?\n"; + cerr << " BIAS: " << x[0] << endl; } - esv.push_back(ErrorSurface()); - esv.back().Deserialize(type, encoded); + converged = failed || opt.HasConverged(); } - if (!esv.empty()) { - // cerr << "ESV=" << esv.size() << endl; - // for (int i = 0; i < esv.size(); ++i) { cerr << esv[i].size() << endl; } - float score; - double x = LineOptimizer::LineOptimize(esv, opt_type, &score); - cout << last_key << "|" << x << "|" << score << endl; + Weights w; + if (conf.count("weights")) { + for (int i = 1; i < x.size(); ++i) + x[i] = (x[i] * psi) + old_weights.get(i) * (1.0 - psi); } + w.InitFromVector(x); + w.WriteToFile("-"); return 0; } |