diff options
author | Patrick Simianer <p@simianer.de> | 2015-02-01 22:09:57 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2015-02-01 22:09:57 +0100 |
commit | 139c07aa6ed318184873b895251a5e76c9b593a1 (patch) | |
tree | 29b8759de3231fe6ede0c699822d5a54e9e089ca /training/dtrain | |
parent | 5e2c122e9311c03a80ead15f8a3d7d8d3a6d0568 (diff) |
dtrain: simplified scoring interface, removed obsolete scorers, simplified sampler interface
Diffstat (limited to 'training/dtrain')
-rw-r--r-- | training/dtrain/Makefile.am | 2 | ||||
-rw-r--r-- | training/dtrain/dtrain.cc | 61 | ||||
-rw-r--r-- | training/dtrain/dtrain.h | 100 | ||||
-rw-r--r-- | training/dtrain/sample.h | 62 | ||||
-rw-r--r-- | training/dtrain/score.cc | 292 | ||||
-rw-r--r-- | training/dtrain/score.h | 136 |
6 files changed, 101 insertions, 552 deletions
diff --git a/training/dtrain/Makefile.am b/training/dtrain/Makefile.am index 3c072ffc..7717ec86 100644 --- a/training/dtrain/Makefile.am +++ b/training/dtrain/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = dtrain -dtrain_SOURCES = dtrain.cc score.cc dtrain.h sample.h pairs.h score.h +dtrain_SOURCES = dtrain.cc dtrain.h sample.h pairs.h score.h dtrain_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc index 67e16d23..18addcb0 100644 --- a/training/dtrain/dtrain.cc +++ b/training/dtrain/dtrain.cc @@ -30,10 +30,9 @@ dtrain_init(int argc, char** argv, po::variables_map* conf) ("gamma", po::value<weight_t>()->default_value(0.), "gamma for SVM (0 for perceptron)") ("select_weights", po::value<string>()->default_value("last"), "output best, last, avg weights ('VOID' to throw away)") ("rescale", po::value<bool>()->zero_tokens(), "(re)scale data and weight vector to unit length") - ("l1_reg", po::value<string>()->default_value("none"), "apply l1 regularization as in 'Tsuroka et al' (2010)") + ("l1_reg", po::value<string>()->default_value("none"), "apply l1 regularization with clipping as in 'Tsuroka et al' (2010)") ("l1_reg_strength", po::value<weight_t>(), "l1 regularization strength") ("fselect", po::value<weight_t>()->default_value(-1), "select top x percent (or by threshold) of features after each epoch NOT IMPLEMENTED") // TODO - ("approx_bleu_d", po::value<score_t>()->default_value(0.9), "discount for approx. BLEU") ("loss_margin", po::value<weight_t>()->default_value(0.), "update if no error in pref pair but model scores this near") ("max_pairs", po::value<unsigned>()->default_value(std::numeric_limits<unsigned>::max()), "max. # of pairs per Sent.") ("pclr", po::value<string>()->default_value("no"), "use a (simple|adagrad) per-coordinate learning rate") @@ -107,13 +106,11 @@ main(int argc, char** argv) const unsigned N = conf["N"].as<unsigned>(); const unsigned T = conf["epochs"].as<unsigned>(); const unsigned stop_after = conf["stop_after"].as<unsigned>(); - const string filter_type = conf["filter"].as<string>(); const string pair_sampling = conf["pair_sampling"].as<string>(); const score_t pair_threshold = conf["pair_threshold"].as<score_t>(); const string select_weights = conf["select_weights"].as<string>(); const string output_ranking = conf["output_ranking"].as<string>(); const float hi_lo = conf["hi_lo"].as<float>(); - const score_t approx_bleu_d = conf["approx_bleu_d"].as<score_t>(); const unsigned max_pairs = conf["max_pairs"].as<unsigned>(); int repeat = conf["repeat"].as<unsigned>(); weight_t loss_margin = conf["loss_margin"].as<weight_t>(); @@ -136,39 +133,8 @@ main(int argc, char** argv) cerr << setw(25) << "cdec conf " << "'" << conf["decoder_config"].as<string>() << "'" << endl; Decoder decoder(ini_rf.stream()); - // scoring metric/scorer - string scorer_str = conf["scorer"].as<string>(); - LocalScorer* scorer; - if (scorer_str == "bleu") { - scorer = static_cast<BleuScorer*>(new BleuScorer); - } else if (scorer_str == "stupid_bleu") { - scorer = static_cast<StupidBleuScorer*>(new StupidBleuScorer); - } else if (scorer_str == "fixed_stupid_bleu") { - scorer = static_cast<FixedStupidBleuScorer*>(new FixedStupidBleuScorer); - } else if (scorer_str == "smooth_bleu") { - scorer = static_cast<SmoothBleuScorer*>(new SmoothBleuScorer); - } else if (scorer_str == "sum_bleu") { - scorer = static_cast<SumBleuScorer*>(new SumBleuScorer); - } else if (scorer_str == "sumexp_bleu") { - scorer = static_cast<SumExpBleuScorer*>(new SumExpBleuScorer); - } else if (scorer_str == "sumwhatever_bleu") { - scorer = static_cast<SumWhateverBleuScorer*>(new SumWhateverBleuScorer); - } else if (scorer_str == "approx_bleu") { - scorer = static_cast<ApproxBleuScorer*>(new ApproxBleuScorer(N, approx_bleu_d)); - } else if (scorer_str == "lc_bleu") { - scorer = static_cast<LinearBleuScorer*>(new LinearBleuScorer(N)); - } else { - cerr << "Don't know scoring metric: '" << scorer_str << "', exiting." << endl; - exit(1); - } - vector<score_t> bleu_weights; - scorer->Init(N, bleu_weights); - // setup decoder observer - MT19937 rng; // random number generator, only for forest sampling - HypSampler* observer; - observer = static_cast<KBestGetter*>(new KBestGetter(k, filter_type)); - observer->SetScorer(scorer); + ScoredKbest* observer = new ScoredKbest(k, new PerSentenceBleuScorer(N)); // init weights vector<weight_t>& decoder_weights = decoder.CurrentWeightVector(); @@ -222,10 +188,6 @@ main(int argc, char** argv) cerr << setw(25) << "N " << N << endl; cerr << setw(25) << "T " << T << endl; cerr << setw(25) << "batch " << batch << endl; - cerr << setw(26) << "scorer '" << scorer_str << "'" << endl; - if (scorer_str == "approx_bleu") - cerr << setw(25) << "approx. B discount " << approx_bleu_d << endl; - cerr << setw(25) << "filter " << "'" << filter_type << "'" << endl; cerr << setw(25) << "learning rate " << eta << endl; cerr << setw(25) << "gamma " << gamma << endl; cerr << setw(25) << "loss margin " << loss_margin << endl; @@ -242,7 +204,6 @@ main(int argc, char** argv) cerr << setw(25) << "pclr " << pclr << endl; cerr << setw(25) << "max pairs " << max_pairs << endl; cerr << setw(25) << "repeat " << repeat << endl; - //cerr << setw(25) << "test k-best " << test_k_best << endl; cerr << setw(25) << "cdec conf " << "'" << conf["decoder_config"].as<string>() << "'" << endl; cerr << setw(25) << "input " << "'" << input_fn << "'" << endl; cerr << setw(25) << "output " << "'" << output_fn << "'" << endl; @@ -321,13 +282,13 @@ main(int argc, char** argv) vector<WordID> cur_ref; vector<string> tok; boost::split(tok, r, boost::is_any_of(" ")); - register_and_convert(tok, cur_ref); + RegisterAndConvert(tok, cur_ref); cur_refs.push_back(cur_ref); } refs_as_ids_buf.push_back(cur_refs); src_str_buf.push_back(in); } - observer->SetRef(refs_as_ids_buf[ii]); + observer->SetReference(refs_as_ids_buf[ii]); if (t == 0) decoder.Decode(in, observer); else @@ -341,7 +302,7 @@ main(int argc, char** argv) stringstream ss; for (auto s: *samples) { ss << ii << " ||| "; - printWordIDVec(s.w, ss); + PrintWordIDVec(s.w, ss); ss << " ||| " << s.model << " ||| " << s.score << endl; } of.get() << ss.str(); @@ -350,12 +311,12 @@ main(int argc, char** argv) if (verbose) { cerr << "--- refs for " << ii << ": "; for (auto r: refs_as_ids_buf[ii]) { - printWordIDVec(r); + PrintWordIDVec(r); cerr << endl; } for (unsigned u = 0; u < samples->size(); u++) { cerr << _p2 << _np << "[" << u << ". '"; - printWordIDVec((*samples)[u].w); + PrintWordIDVec((*samples)[u].w); cerr << "'" << endl; cerr << "SCORE=" << (*samples)[u].score << ",model="<< (*samples)[u].model << endl; cerr << "F{" << (*samples)[u].f << "} ]" << endl << endl; @@ -367,8 +328,8 @@ main(int argc, char** argv) model_sum += (*samples)[0].model; } - f_count += observer->get_f_count(); - list_sz += observer->get_sz(); + f_count += observer->GetFeatureCount(); + list_sz += observer->GetSize(); // weight updates if (!noup) { @@ -552,8 +513,6 @@ main(int argc, char** argv) if (average) w_average += lambdas; - if (scorer_str == "approx_bleu" || scorer_str == "lc_bleu") scorer->Reset(); - // print some stats score_t score_avg = score_sum/(score_t)in_sz; score_t model_avg = model_sum/(score_t)in_sz; @@ -665,7 +624,7 @@ main(int argc, char** argv) if (!quiet) { cerr << _p5 << _np << endl << "---" << endl << "Best iteration: "; - cerr << best_it+1 << " [SCORE '" << scorer_str << "'=" << max_score << "]." << endl; + cerr << best_it+1 << " [SCORE = " << max_score << "]." << endl; cerr << "This took " << overall_time/60. << " min." << endl; } } diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h index e25c6f24..2b466930 100644 --- a/training/dtrain/dtrain.h +++ b/training/dtrain/dtrain.h @@ -15,7 +15,6 @@ #include "decoder.h" #include "ff_register.h" -#include "sampler.h" #include "sentence_metadata.h" #include "verbose.h" #include "viterbi.h" @@ -26,113 +25,46 @@ namespace po = boost::program_options; namespace dtrain { - -inline void register_and_convert(const vector<string>& strs, vector<WordID>& ids) -{ - vector<string>::const_iterator it; - for (it = strs.begin(); it < strs.end(); it++) - ids.push_back(TD::Convert(*it)); -} - -inline string gettmpf(const string path, const string infix) -{ - char fn[path.size() + infix.size() + 8]; - strcpy(fn, path.c_str()); - strcat(fn, "/"); - strcat(fn, infix.c_str()); - strcat(fn, "-XXXXXX"); - if (!mkstemp(fn)) { - cerr << "Cannot make temp file in" << path << " , exiting." << endl; - exit(1); - } - return string(fn); -} - typedef double score_t; struct ScoredHyp { vector<WordID> w; - SparseVector<double> f; - score_t model; - score_t score; + SparseVector<weight_t> f; + score_t model, score; unsigned rank; }; -struct LocalScorer +inline void +RegisterAndConvert(const vector<string>& strs, vector<WordID>& ids) { - unsigned N_; - vector<score_t> w_; - - virtual score_t - Score(const vector<WordID>& hyp, const vector<vector<WordID> >& ref, const unsigned rank, const unsigned src_len)=0; - - virtual void Reset() {} // only for ApproxBleuScorer, LinearBleuScorer - - inline void - Init(unsigned N, vector<score_t> weights) - { - assert(N > 0); - N_ = N; - if (weights.empty()) for (unsigned i = 0; i < N_; i++) w_.push_back(1./N_); - else w_ = weights; - } - - inline score_t - brevity_penalty(const unsigned hyp_len, const unsigned ref_len) - { - if (hyp_len > ref_len) return 1; - return exp(1 - (score_t)ref_len/hyp_len); - } -}; - -struct HypSampler : public DecoderObserver -{ - LocalScorer* scorer_; - vector<vector<WordID> >* refs_; - unsigned f_count_, sz_; - virtual vector<ScoredHyp>* GetSamples()=0; - inline void SetScorer(LocalScorer* scorer) { scorer_ = scorer; } - inline void SetRef(vector<vector<WordID> >& refs) { refs_ = &refs; } - inline unsigned get_f_count() { return f_count_; } - inline unsigned get_sz() { return sz_; } -}; + vector<string>::const_iterator it; + for (auto s: strs) + ids.push_back(TD::Convert(s)); +} -struct HSReporter +inline void +PrintWordIDVec(vector<WordID>& v, ostream& os=cerr) { - string task_id_; - - HSReporter(string task_id) : task_id_(task_id) {} - - inline void update_counter(string name, unsigned amount) { - cerr << "reporter:counter:" << task_id_ << "," << name << "," << amount << endl; - } - inline void update_gcounter(string name, unsigned amount) { - cerr << "reporter:counter:Global," << name << "," << amount << endl; + for (unsigned i = 0; i < v.size(); i++) { + os << TD::Convert(v[i]); + if (i < v.size()-1) os << " "; } -}; +} inline ostream& _np(ostream& out) { return out << resetiosflags(ios::showpos); } inline ostream& _p(ostream& out) { return out << setiosflags(ios::showpos); } inline ostream& _p2(ostream& out) { return out << setprecision(2); } inline ostream& _p5(ostream& out) { return out << setprecision(5); } -inline void printWordIDVec(vector<WordID>& v, ostream& os=cerr) -{ - for (unsigned i = 0; i < v.size(); i++) { - os << TD::Convert(v[i]); - if (i < v.size()-1) os << " "; - } -} - template<typename T> -inline T sign(T z) +inline T +sign(T z) { if (z == 0) return 0; return z < 0 ? -1 : +1; } - } // namespace #endif diff --git a/training/dtrain/sample.h b/training/dtrain/sample.h index 25f02273..64d93cb0 100644 --- a/training/dtrain/sample.h +++ b/training/dtrain/sample.h @@ -1,5 +1,5 @@ -#ifndef _DTRAIN_KBESTGET_H_ -#define _DTRAIN_KBESTGET_H_ +#ifndef _DTRAIN_SAMPLE_H_ +#define _DTRAIN_SAMPLE_H_ #include "kbest.h" @@ -7,78 +7,46 @@ namespace dtrain { -struct KBestGetter : public HypSampler +struct ScoredKbest : public DecoderObserver { const unsigned k_; - const string filter_type_; vector<ScoredHyp> s_; unsigned src_len_; + PerSentenceBleuScorer* scorer_; + vector<vector<WordID> >* refs_; + unsigned f_count_, sz_; - KBestGetter(const unsigned k, const string filter_type) : - k_(k), filter_type_(filter_type) {} + ScoredKbest(const unsigned k, PerSentenceBleuScorer* scorer) : + k_(k), scorer_(scorer) {} virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { src_len_ = smeta.GetSourceLength(); - KBestScored(*hg); - } - - vector<ScoredHyp>* GetSamples() { return &s_; } - - void - KBestScored(const Hypergraph& forest) - { - if (filter_type_ == "uniq") { - KBestUnique(forest); - } else if (filter_type_ == "not") { - KBestNoFilter(forest); - } - } - - void - KBestUnique(const Hypergraph& forest) - { s_.clear(); sz_ = f_count_ = 0; KBest::KBestDerivations<vector<WordID>, ESentenceTraversal, - KBest::FilterUnique, prob_t, EdgeProb> kbest(forest, k_); + KBest::FilterUnique, prob_t, EdgeProb> kbest(*hg, k_); for (unsigned i = 0; i < k_; ++i) { const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal, KBest::FilterUnique, prob_t, EdgeProb>::Derivation* d = - kbest.LazyKthBest(forest.nodes_.size() - 1, i); + kbest.LazyKthBest(hg->nodes_.size() - 1, i); if (!d) break; ScoredHyp h; h.w = d->yield; h.f = d->feature_values; h.model = log(d->score); h.rank = i; - h.score = scorer_->Score(h.w, *refs_, i, src_len_); + h.score = scorer_->Score(h.w, *refs_); s_.push_back(h); sz_++; f_count_ += h.f.size(); } } - void - KBestNoFilter(const Hypergraph& forest) - { - s_.clear(); sz_ = f_count_ = 0; - KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(forest, k_); - for (unsigned i = 0; i < k_; ++i) { - const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d = - kbest.LazyKthBest(forest.nodes_.size() - 1, i); - if (!d) break; - ScoredHyp h; - h.w = d->yield; - h.f = d->feature_values; - h.model = log(d->score); - h.rank = i; - h.score = scorer_->Score(h.w, *refs_, i, src_len_); - s_.push_back(h); - sz_++; - f_count_ += h.f.size(); - } - } + vector<ScoredHyp>* GetSamples() { return &s_; } + inline void SetReference(vector<vector<WordID> >& refs) { refs_ = &refs; } + inline unsigned GetFeatureCount() { return f_count_; } + inline unsigned GetSize() { return sz_; } }; diff --git a/training/dtrain/score.cc b/training/dtrain/score.cc deleted file mode 100644 index 8a28771f..00000000 --- a/training/dtrain/score.cc +++ /dev/null @@ -1,292 +0,0 @@ -#include "score.h" - -namespace dtrain -{ - - -/* - * bleu - * - * as in "BLEU: a Method for Automatic Evaluation - * of Machine Translation" - * (Papineni et al. '02) - * - * NOTE: 0 if for one n \in {1..N} count is 0 - */ -score_t -BleuScorer::Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref_len) -{ - if (hyp_len == 0 || ref_len == 0) return 0.; - unsigned M = N_; - vector<score_t> v = w_; - if (ref_len < N_) { - M = ref_len; - for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); - } - score_t sum = 0; - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) return 0.; - sum += v[i] * log((score_t)counts.clipped_[i]/counts.sum_[i]); - } - return brevity_penalty(hyp_len, ref_len) * exp(sum); -} - -size_t -RefLen(vector<vector<WordID> > refs) -{ - size_t ref_len = 0; - for (auto r: refs) - ref_len = max(ref_len, r.size()); // FIXME - return ref_len; -} - -score_t -BleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = RefLen(refs); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, refs, N_); - return Bleu(counts, hyp_len, ref_len); -} - -/* - * 'stupid' bleu - * - * as in "ORANGE: a Method for Evaluating - * Automatic Evaluation Metrics - * for Machine Translation" - * (Lin & Och '04) - * - * NOTE: 0 iff no 1gram match ('grounded') - */ -score_t -StupidBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = RefLen(refs); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, refs, N_); - unsigned M = N_; - vector<score_t> v = w_; - if (ref_len < N_) { - M = ref_len; - for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); - } - score_t sum = 0, add = 0; - for (unsigned i = 0; i < M; i++) { - if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.; - if (i == 1) add = 1; - sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add))); - } - return brevity_penalty(hyp_len, ref_len) * exp(sum); -} - -/* - * fixed 'stupid' bleu - * - * as in "Optimizing for Sentence-Level BLEU+1 - * Yields Short Translations" - * (Nakov et al. '12) - */ -score_t -FixedStupidBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = RefLen(refs); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, refs, N_); - unsigned M = N_; - vector<score_t> v = w_; - if (ref_len < N_) { - M = ref_len; - for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); - } - score_t sum = 0, add = 0; - for (unsigned i = 0; i < M; i++) { - if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.; - if (i == 1) add = 1; - sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add))); - } - return brevity_penalty(hyp_len, ref_len+1) * exp(sum); // <- fix -} - -/* - * smooth bleu - * - * as in "An End-to-End Discriminative Approach - * to Machine Translation" - * (Liang et al. '06) - * - * NOTE: max is 0.9375 (with N=4) - */ -score_t -SmoothBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = RefLen(refs); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, refs, N_); - unsigned M = N_; - if (ref_len < N_) M = ref_len; - score_t sum = 0.; - vector<score_t> i_bleu; - for (unsigned i = 0; i < M; i++) i_bleu.push_back(0.); - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) { - break; - } else { - score_t i_ng = log((score_t)counts.clipped_[i]/counts.sum_[i]); - for (unsigned j = i; j < M; j++) { - i_bleu[j] += (1/((score_t)j+1)) * i_ng; - } - } - sum += exp(i_bleu[i])/pow(2.0, (double)(N_-i)); - } - return brevity_penalty(hyp_len, ref_len) * sum; -} - -/* - * 'sum' bleu - * - * sum up Ngram precisions - */ -score_t -SumBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = RefLen(refs); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, refs, N_); - unsigned M = N_; - if (ref_len < N_) M = ref_len; - score_t sum = 0.; - unsigned j = 1; - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break; - sum += ((score_t)counts.clipped_[i]/counts.sum_[i])/pow(2.0, (double) (N_-j+1)); - j++; - } - return brevity_penalty(hyp_len, ref_len) * sum; -} - -/* - * 'sum' (exp) bleu - * - * sum up exp(Ngram precisions) - */ -score_t -SumExpBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = RefLen(refs); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, refs, N_); - unsigned M = N_; - if (ref_len < N_) M = ref_len; - score_t sum = 0.; - unsigned j = 1; - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break; - sum += exp(((score_t)counts.clipped_[i]/counts.sum_[i]))/pow(2.0, (double) (N_-j+1)); - j++; - } - return brevity_penalty(hyp_len, ref_len) * sum; -} - -/* - * 'sum' (whatever) bleu - * - * sum up exp(weight * log(Ngram precisions)) - */ -score_t -SumWhateverBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = RefLen(refs); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, refs, N_); - unsigned M = N_; - vector<score_t> v = w_; - if (ref_len < N_) { - M = ref_len; - for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); - } - score_t sum = 0.; - unsigned j = 1; - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break; - sum += exp(v[i] * log(((score_t)counts.clipped_[i]/counts.sum_[i])))/pow(2.0, (double) (N_-j+1)); - j++; - } - return brevity_penalty(hyp_len, ref_len) * sum; -} - -/* - * approx. bleu - * - * as in "Online Large-Margin Training of Syntactic - * and Structural Translation Features" - * (Chiang et al. '08) - * - * NOTE: Needs some more code in dtrain.cc . - * No scaling by src len. - */ -score_t -ApproxBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, - const unsigned rank, const unsigned src_len) -{ - unsigned hyp_len = hyp.size(), ref_len = RefLen(refs); - if (ref_len == 0) return 0.; - score_t score = 0.; - NgramCounts counts(N_); - if (hyp_len > 0) { - counts = make_ngram_counts(hyp, refs, N_); - NgramCounts tmp = glob_onebest_counts_ + counts; - score = Bleu(tmp, hyp_len, ref_len); - } - if (rank == 0) { // 'context of 1best translations' - glob_onebest_counts_ += counts; - glob_onebest_counts_ *= discount_; - glob_hyp_len_ = discount_ * (glob_hyp_len_ + hyp_len); - glob_ref_len_ = discount_ * (glob_ref_len_ + ref_len); - glob_src_len_ = discount_ * (glob_src_len_ + src_len); - } - return score; -} - -/* - * Linear (Corpus) Bleu - * - * as in "Lattice Minimum Bayes-Risk Decoding - * for Statistical Machine Translation" - * (Tromble et al. '08) - * - */ -score_t -LinearBleuScorer::Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, - const unsigned rank, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = RefLen(refs); - if (ref_len == 0) return 0.; - unsigned M = N_; - if (ref_len < N_) M = ref_len; - NgramCounts counts(M); - if (hyp_len > 0) - counts = make_ngram_counts(hyp, refs, M); - score_t ret = 0.; - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || onebest_counts_.sum_[i] == 0) break; - ret += counts.sum_[i]/onebest_counts_.sum_[i]; - } - ret = -(hyp_len/(score_t)onebest_len_) + (1./M) * ret; - if (rank == 0) { - onebest_len_ += hyp_len; - onebest_counts_ += counts; - } - return ret; -} - - -} // namespace - diff --git a/training/dtrain/score.h b/training/dtrain/score.h index 62d8f587..c727dd30 100644 --- a/training/dtrain/score.h +++ b/training/dtrain/score.h @@ -6,7 +6,6 @@ namespace dtrain { - struct NgramCounts { unsigned N_; @@ -30,6 +29,7 @@ struct NgramCounts { NgramCounts result = *this; result += other; + return result; } @@ -102,7 +102,7 @@ struct NgramCounts typedef map<vector<WordID>, unsigned> Ngrams; inline Ngrams -make_ngrams(const vector<WordID>& s, const unsigned N) +MakeNgrams(const vector<WordID>& s, const unsigned N) { Ngrams ngrams; vector<WordID> ng; @@ -113,21 +113,21 @@ make_ngrams(const vector<WordID>& s, const unsigned N) ngrams[ng]++; } } + return ngrams; } inline NgramCounts -make_ngram_counts(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned N) +MakeNgramCounts(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned N) { - Ngrams hyp_ngrams = make_ngrams(hyp, N); + Ngrams hyp_ngrams = MakeNgrams(hyp, N); vector<Ngrams> refs_ngrams; for (auto r: refs) { - Ngrams r_ng = make_ngrams(r, N); + Ngrams r_ng = MakeNgrams(r, N); refs_ngrams.push_back(r_ng); } NgramCounts counts(N); - Ngrams::iterator it; - Ngrams::iterator ti; + Ngrams::iterator it, ti; for (it = hyp_ngrams.begin(); it != hyp_ngrams.end(); it++) { unsigned max_ref_count = 0; for (auto ref_ngrams: refs_ngrams) { @@ -137,90 +137,72 @@ make_ngram_counts(const vector<WordID>& hyp, const vector<vector<WordID> >& refs } counts.Add(it->second, min(it->second, max_ref_count), it->first.size() - 1); } + return counts; } -struct BleuScorer : public LocalScorer -{ - score_t Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref_len); - score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/); - void Reset() {} -}; - -struct StupidBleuScorer : public LocalScorer -{ - score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/); - void Reset() {} -}; - -struct FixedStupidBleuScorer : public LocalScorer +/* + * per-sentence BLEU + * as in "Optimizing for Sentence-Level BLEU+1 + * Yields Short Translations" + * (Nakov et al. '12) + * + * [simply add 1 to reference length for calculation of BP] + * + */ + +struct PerSentenceBleuScorer { - score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/); - void Reset() {} -}; - -struct SmoothBleuScorer : public LocalScorer -{ - score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/); - void Reset() {} -}; - -struct SumBleuScorer : public LocalScorer -{ - score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/); - void Reset() {} -}; - -struct SumExpBleuScorer : public LocalScorer -{ - score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/); - void Reset() {} -}; + const unsigned N_; + vector<score_t> w_; -struct SumWhateverBleuScorer : public LocalScorer -{ - score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned /*rank*/, const unsigned /*src_len*/); - void Reset() {}; -}; - -struct ApproxBleuScorer : public BleuScorer -{ - NgramCounts glob_onebest_counts_; - unsigned glob_hyp_len_, glob_ref_len_, glob_src_len_; - score_t discount_; - - ApproxBleuScorer(unsigned N, score_t d) : glob_onebest_counts_(NgramCounts(N)), discount_(d) + PerSentenceBleuScorer(unsigned n) : N_(n) { - glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0; + for (auto i = 1; i <= N_; i++) + w_.push_back(1.0/N_); } - inline void Reset() { - glob_onebest_counts_.Zero(); - glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0.; - } - - score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned rank, const unsigned src_len); -}; - -struct LinearBleuScorer : public BleuScorer -{ - unsigned onebest_len_; - NgramCounts onebest_counts_; - - LinearBleuScorer(unsigned N) : onebest_len_(1), onebest_counts_(N) + inline score_t + BrevityPenalty(const unsigned hyp_len, const unsigned ref_len) { - onebest_counts_.One(); + if (hyp_len > ref_len) return 1; + return exp(1 - (score_t)ref_len/hyp_len); } - score_t Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs, const unsigned rank, const unsigned /*src_len*/); - - inline void Reset() { - onebest_len_ = 1; - onebest_counts_.One(); + score_t + Score(const vector<WordID>& hyp, const vector<vector<WordID> >& refs) + { + unsigned hyp_len = hyp.size(), ref_len = 0; + // best match reference length + if (refs.size() == 1) { + ref_len = refs[0].size(); + } else { + unsigned i = 0, best_idx = 0; + unsigned best = std::numeric_limits<unsigned>::max(); + for (auto r: refs) { + unsigned d = abs(hyp_len-r.size()); + if (best > d) best_idx = i; + } + ref_len = refs[best_idx].size(); + } + if (hyp_len == 0 || ref_len == 0) return 0.; + NgramCounts counts = MakeNgramCounts(hyp, refs, N_); + unsigned M = N_; + vector<score_t> v = w_; + if (ref_len < N_) { + M = ref_len; + for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); + } + score_t sum = 0, add = 0; + for (unsigned i = 0; i < M; i++) { + if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.; + if (i == 1) add = 1; + sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add))); + } + return BrevityPenalty(hyp_len, ref_len+1) * exp(sum); } }; - } // namespace #endif |