From 6c8309c58dc4a6015dfb2f478a2cef5f65f92961 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 14 Sep 2011 12:17:04 +0100 Subject: weight_t refactoring --- pro-train/mr_pro_map.cc | 42 +++++++++++++++++++++--------------------- pro-train/mr_pro_reduce.cc | 34 +++++++++++++++++----------------- 2 files changed, 38 insertions(+), 38 deletions(-) (limited to 'pro-train') diff --git a/pro-train/mr_pro_map.cc b/pro-train/mr_pro_map.cc index bc59285b..0a9b75d7 100644 --- a/pro-train/mr_pro_map.cc +++ b/pro-train/mr_pro_map.cc @@ -27,7 +27,7 @@ namespace po = boost::program_options; struct ApproxVectorHasher { static const size_t MASK = 0xFFFFFFFFull; union UType { - double f; + double f; // leave as double size_t i; }; static inline double round(const double x) { @@ -40,9 +40,9 @@ struct ApproxVectorHasher { t.i &= (1ull - MASK); return t.f; } - size_t operator()(const SparseVector& x) const { + size_t operator()(const SparseVector& x) const { size_t h = 0x573915839; - for (SparseVector::const_iterator it = x.begin(); it != x.end(); ++it) { + for (SparseVector::const_iterator it = x.begin(); it != x.end(); ++it) { UType t; t.f = it->second; if (t.f) { @@ -56,9 +56,9 @@ struct ApproxVectorHasher { }; struct ApproxVectorEquals { - bool operator()(const SparseVector& a, const SparseVector& b) const { - SparseVector::const_iterator bit = b.begin(); - for (SparseVector::const_iterator ait = a.begin(); ait != a.end(); ++ait) { + bool operator()(const SparseVector& a, const SparseVector& b) const { + SparseVector::const_iterator bit = b.begin(); + for (SparseVector::const_iterator ait = a.begin(); ait != a.end(); ++ait) { if (bit == b.end() || ait->first != bit->first || ApproxVectorHasher::round(ait->second) != ApproxVectorHasher::round(bit->second)) @@ -105,18 +105,18 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { } struct HypInfo { - HypInfo() : g_(-100.0) {} - HypInfo(const vector& h, const SparseVector& feats) : hyp(h), g_(-100.0), x(feats) {} + HypInfo() : g_(-100.0f) {} + HypInfo(const vector& h, const SparseVector& feats) : hyp(h), g_(-100.0f), x(feats) {} // lazy evaluation double g(const SentenceScorer& scorer) const { - if (g_ == -100.0) + if (g_ == -100.0f) g_ = scorer.ScoreCandidate(hyp)->ComputeScore(); return g_; } vector hyp; - mutable double g_; - SparseVector x; + mutable float g_; + SparseVector x; }; struct HypInfoCompare { @@ -146,8 +146,8 @@ void WriteKBest(const string& file, const vector& kbest) { } } -void ParseSparseVector(string& line, size_t cur, SparseVector* out) { - SparseVector& x = *out; +void ParseSparseVector(string& line, size_t cur, SparseVector* out) { + SparseVector& x = *out; size_t last_start = cur; size_t last_comma = string::npos; while(cur <= line.size()) { @@ -211,15 +211,15 @@ struct ThresholdAlpha { }; struct TrainingInstance { - TrainingInstance(const SparseVector& feats, bool positive, double diff) : x(feats), y(positive), gdiff(diff) {} - SparseVector x; + TrainingInstance(const SparseVector& feats, bool positive, float diff) : x(feats), y(positive), gdiff(diff) {} + SparseVector x; #undef DEBUGGING_PRO #ifdef DEBUGGING_PRO vector a; vector b; #endif bool y; - double gdiff; + float gdiff; }; #ifdef DEBUGGING_PRO ostream& operator<<(ostream& os, const TrainingInstance& d) { @@ -235,19 +235,19 @@ struct DiffOrder { void Sample(const unsigned gamma, const unsigned xi, const vector& J_i, const SentenceScorer& scorer, const bool invert_score, vector* pv) { vector v1, v2; - double avg_diff = 0; + float avg_diff = 0; for (unsigned i = 0; i < gamma; ++i) { const size_t a = rng->inclusive(0, J_i.size() - 1)(); const size_t b = rng->inclusive(0, J_i.size() - 1)(); if (a == b) continue; - double ga = J_i[a].g(scorer); - double gb = J_i[b].g(scorer); + float ga = J_i[a].g(scorer); + float gb = J_i[b].g(scorer); bool positive = gb < ga; if (invert_score) positive = !positive; - const double gdiff = fabs(ga - gb); + const float gdiff = fabs(ga - gb); if (!gdiff) continue; avg_diff += gdiff; - SparseVector xdiff = (J_i[a].x - J_i[b].x).erase_zeros(); + SparseVector xdiff = (J_i[a].x - J_i[b].x).erase_zeros(); if (xdiff.empty()) { cerr << "Empty diff:\n " << TD::GetString(J_i[a].hyp) << endl << "x=" << J_i[a].x << endl; cerr << " " << TD::GetString(J_i[b].hyp) << endl << "x=" << J_i[b].x << endl; diff --git a/pro-train/mr_pro_reduce.cc b/pro-train/mr_pro_reduce.cc index 9caaa1d1..239649c1 100644 --- a/pro-train/mr_pro_reduce.cc +++ b/pro-train/mr_pro_reduce.cc @@ -40,8 +40,8 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { } } -void ParseSparseVector(string& line, size_t cur, SparseVector* out) { - SparseVector& x = *out; +void ParseSparseVector(string& line, size_t cur, SparseVector* out) { + SparseVector& x = *out; size_t last_start = cur; size_t last_comma = string::npos; while(cur <= line.size()) { @@ -52,7 +52,7 @@ void ParseSparseVector(string& line, size_t cur, SparseVector* out) { } const int fid = FD::Convert(line.substr(last_start, last_comma - last_start)); if (cur < line.size()) line[cur] = 0; - const double val = strtod(&line[last_comma + 1], NULL); + const weight_t val = strtod(&line[last_comma + 1], NULL); x.set_value(fid, val); last_comma = string::npos; @@ -65,13 +65,13 @@ void ParseSparseVector(string& line, size_t cur, SparseVector* out) { } } -void ReadCorpus(istream* pin, vector > >* corpus) { +void ReadCorpus(istream* pin, vector > >* corpus) { istream& in = *pin; corpus->clear(); bool flag = false; int lc = 0; string line; - SparseVector x; + SparseVector x; while(getline(in, line)) { ++lc; if (lc % 1000 == 0) { cerr << '.'; flag = true; } @@ -88,16 +88,16 @@ void ReadCorpus(istream* pin, vector > >* corpus if (flag) cerr << endl; } -void GradAdd(const SparseVector& v, const double scale, vector* acc) { - for (SparseVector::const_iterator it = v.begin(); +void GradAdd(const SparseVector& v, const double scale, vector* acc) { + for (SparseVector::const_iterator it = v.begin(); it != v.end(); ++it) { (*acc)[it->first] += it->second * scale; } } -double TrainingInference(const vector& x, - const vector > >& corpus, - vector* g = NULL) { +double TrainingInference(const vector& x, + const vector > >& corpus, + vector* g = NULL) { double cll = 0; for (int i = 0; i < corpus.size(); ++i) { const double dotprod = corpus[i].second.dot(x) + x[0]; // x[0] is bias @@ -132,13 +132,13 @@ double TrainingInference(const vector& x, } // return held-out log likelihood -double LearnParameters(const vector > >& training, - const vector > >& testing, +double LearnParameters(const vector > >& training, + const vector > >& testing, const double sigsq, const unsigned memory_buffers, - vector* px) { - vector& x = *px; - vector vg(FD::NumFeats(), 0.0); + vector* px) { + vector& x = *px; + vector vg(FD::NumFeats(), 0.0); bool converged = false; LBFGSOptimizer opt(FD::NumFeats(), memory_buffers); double tppl = 0.0; @@ -172,7 +172,7 @@ double LearnParameters(const vector > >& trainin cll += reg; cerr << cll << " (REG=" << reg << ")\tPPL=" << ppl << "\t TEST_PPL=" << tppl << "\t"; try { - vector old_x = x; + vector old_x = x; do { opt.Optimize(cll, vg, &x); converged = opt.HasConverged(); @@ -193,7 +193,7 @@ int main(int argc, char** argv) { po::variables_map conf; InitCommandLine(argc, argv, &conf); string line; - vector > > training, testing; + vector > > training, testing; SparseVector old_weights; const bool tune_regularizer = conf.count("tune_regularizer"); if (tune_regularizer && !conf.count("testset")) { -- cgit v1.2.3