From a8a8aeba08d5c0f6841394087bb4ec0b6ade0694 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 11 Jul 2011 20:39:45 -0400 Subject: sort of working hopkins&may optimizer --- pro-train/mr_pro_map.cc | 118 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 100 insertions(+), 18 deletions(-) (limited to 'pro-train/mr_pro_map.cc') diff --git a/pro-train/mr_pro_map.cc b/pro-train/mr_pro_map.cc index b046cdea..128d93ce 100644 --- a/pro-train/mr_pro_map.cc +++ b/pro-train/mr_pro_map.cc @@ -10,6 +10,7 @@ #include "sampler.h" #include "filelib.h" #include "stringlib.h" +#include "weights.h" #include "scorer.h" #include "inside_outside.h" #include "hg_io.h" @@ -27,10 +28,10 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() ("reference,r",po::value >(), "[REQD] Reference translation (tokenized text)") - ("source,s",po::value(), "Source file (ignored, except for AER)") + ("source,s",po::value()->default_value(""), "Source file (ignored, except for AER)") ("loss_function,l",po::value()->default_value("ibm_bleu"), "Loss function being optimized") ("input,i",po::value()->default_value("-"), "Input file to map (- is STDIN)") - ("weights,w",po::value(), "[REQD] Current weights file") + ("weights,w",po::value >(), "[REQD] Weights files from previous and current iterations") ("kbest_size,k",po::value()->default_value(1500u), "Top k-hypotheses to extract") ("candidate_pairs,G", po::value()->default_value(5000u), "Number of pairs to sample per hypothesis (Gamma)") ("best_pairs,X", po::value()->default_value(50u), "Number of pairs, ranked by magnitude of objective delta, to retain (Xi)") @@ -44,6 +45,10 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { cerr << "Please specify one or more references using -r \n"; flag = true; } + if (!conf->count("weights")) { + cerr << "Please specify one or more weights using -w \n"; + flag = true; + } if (flag || conf->count("help")) { cerr << dcmdline_options << endl; exit(1); @@ -51,18 +56,78 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { } struct HypInfo { - HypInfo(const vector& h, const SparseVector& feats) : hyp(h), g_(-1), x(feats) {} - double g() { + HypInfo(const vector& h, const SparseVector& feats) : hyp(h), g_(-100.0), x(feats) {} + + // lazy evaluation + double g(const SentenceScorer& scorer) const { + if (g_ == -100.0) + g_ = scorer.ScoreCandidate(hyp)->ComputeScore(); return g_; } - private: - int sent_id; vector hyp; - double g_; + mutable double g_; public: SparseVector x; }; +struct ThresholdAlpha { + explicit ThresholdAlpha(double t = 0.05) : threshold(t) {} + double operator()(double mag) const { + if (mag < threshold) return 0.0; else return 1.0; + } + const double threshold; +}; + +struct TrainingInstance { + TrainingInstance(const SparseVector& feats, bool positive, double diff) : x(feats), y(positive), gdiff(diff) {} + SparseVector x; +#ifdef DEBUGGING_PRO + vector a; + vector b; +#endif + bool y; + double gdiff; +}; + +struct DiffOrder { + bool operator()(const TrainingInstance& a, const TrainingInstance& b) const { + return a.gdiff > b.gdiff; + } +}; + +template +void Sample(const unsigned gamma, const unsigned xi, const vector& J_i, const SentenceScorer& scorer, const Alpha& alpha_i, bool invert_score, vector* pv) { + vector v; + for (unsigned i = 0; i < gamma; ++i) { + size_t a = rng->inclusive(0, J_i.size() - 1)(); + size_t b = rng->inclusive(0, J_i.size() - 1)(); + if (a == b) continue; + double ga = J_i[a].g(scorer); + double gb = J_i[b].g(scorer); + bool positive = ga < gb; + if (invert_score) positive = !positive; + double gdiff = fabs(ga - gb); + if (!gdiff) continue; + if (rng->next() < alpha_i(gdiff)) { + v.push_back(TrainingInstance((J_i[a].x - J_i[b].x).erase_zeros(), positive, gdiff)); +#ifdef DEBUGGING_PRO + v.back().a = J_i[a].hyp; + v.back().b = J_i[b].hyp; +#endif + } + } + vector::iterator mid = v.begin() + xi; + if (xi > v.size()) mid = v.end(); + partial_sort(v.begin(), mid, v.end(), DiffOrder()); + copy(v.begin(), mid, back_inserter(*pv)); +#ifdef DEBUGGING_PRO + if (v.size() >= 5) + for (int i =0; i < 5; ++i) { + cerr << v[i].gdiff << " y=" << v[i].y << "\tA:" << TD::GetString(v[i].a) << "\n\tB: " << TD::GetString(v[i].b) << endl; + } +#endif +} + int main(int argc, char** argv) { po::variables_map conf; InitCommandLine(argc, argv, &conf); @@ -81,7 +146,15 @@ int main(int argc, char** argv) { const unsigned kbest_size = conf["kbest_size"].as(); const unsigned gamma = conf["candidate_pairs"].as(); const unsigned xi = conf["best_pairs"].as(); + vector weights_files = conf["weights"].as >(); + vector > weights(weights_files.size()); + for (int i = 0; i < weights.size(); ++i) { + Weights w; + w.InitFromFile(weights_files[i]); + w.InitVector(&weights[i]); + } while(in) { + vector v; string line; getline(in, line); if (line.empty()) continue; @@ -92,18 +165,27 @@ int main(int argc, char** argv) { is >> file >> sent_id; ReadFile rf(file); HypergraphIO::ReadFromJSON(rf.stream(), &hg); - KBest::KBestDerivations, ESentenceTraversal> kbest(hg, kbest_size); - vector J_i; - for (int i = 0; i < kbest_size; ++i) { - const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = - kbest.LazyKthBest(hg.nodes_.size() - 1, i); - if (!d) break; - float sentscore = ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore(); - // if (invert_score) sentscore *= -1.0; - // cerr << TD::GetString(d->yield) << " ||| " << d->score << " ||| " << sentscore << endl; - d->feature_values; - sentscore; + int start = weights.size(); + start -= 4; + if (start < 0) start = 0; + for (int i = start; i < weights.size(); ++i) { + hg.Reweight(weights[i]); + KBest::KBestDerivations, ESentenceTraversal> kbest(hg, kbest_size); + + for (int i = 0; i < kbest_size; ++i) { + const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = + kbest.LazyKthBest(hg.nodes_.size() - 1, i); + if (!d) break; + J_i.push_back(HypInfo(d->yield, d->feature_values)); + } + } + + Sample(gamma, xi, J_i, *ds[sent_id], ThresholdAlpha(0.05), (type == TER), &v); + for (unsigned i = 0; i < v.size(); ++i) { + const TrainingInstance& vi = v[i]; + cout << vi.y << "\t" << vi.x << endl; + cout << (!vi.y) << "\t" << (vi.x * -1.0) << endl; } } return 0; -- cgit v1.2.3