From 0ac66e310d57f9aea5ddeea900c84df08abfe8c2 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 27 Apr 2012 01:54:47 +0200 Subject: fix approx. BLEU of (Chiang et al. '08) --- dtrain/Makefile.am | 2 +- dtrain/README.md | 3 ++- dtrain/dtrain.cc | 11 +++++--- dtrain/hstreaming/dtrain.ini | 2 +- dtrain/kbestget.h | 12 ++++++--- dtrain/ksampler.h | 6 +++-- dtrain/pairsampling.h | 2 +- dtrain/score.cc | 58 +++++++++++++++++++++--------------------- dtrain/score.h | 52 ++++++++++++++++++++++--------------- dtrain/test/example/dtrain.ini | 6 ++--- 10 files changed, 87 insertions(+), 67 deletions(-) diff --git a/dtrain/Makefile.am b/dtrain/Makefile.am index 64fef489..f39d161e 100644 --- a/dtrain/Makefile.am +++ b/dtrain/Makefile.am @@ -3,5 +3,5 @@ bin_PROGRAMS = dtrain dtrain_SOURCES = dtrain.cc score.cc dtrain_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval +AM_CPPFLAGS = -O3 -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/dtrain/README.md b/dtrain/README.md index 2a24ec22..92d6ba0d 100644 --- a/dtrain/README.md +++ b/dtrain/README.md @@ -3,7 +3,8 @@ which is able to train the weights of very many (sparse) features. It was used here: "Joint Feature Selection in Distributed Stochastic Learning for Large-Scale Discriminative Training in - SMT" Simianer, Riezler, Dyer; ACL 2012 + SMT" +(Simianer, Riezler, Dyer; ACL 2012) Building diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index ea5b8835..3dee10f2 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -32,7 +32,7 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) ("l1_reg_strength", po::value(), "l1 regularization strength") ("inc_correct", po::value()->zero_tokens(), "include correctly ranked pairs into updates") ("fselect", po::value()->default_value(-1), "TODO select top x percent of features after each epoch") - ("approx_bleu_scale", po::value()->default_value(0.9), "scaling for approx. BLEU") + ("approx_bleu_d", po::value()->default_value(0.9), "discount for approx. BLEU") #ifdef DTRAIN_LOCAL ("refs,r", po::value(), "references in local mode") #endif @@ -136,6 +136,7 @@ main(int argc, char** argv) const score_t pair_threshold = cfg["pair_threshold"].as(); const string select_weights = cfg["select_weights"].as(); const float hi_lo = cfg["hi_lo"].as(); + const score_t approx_bleu_d = cfg["approx_bleu_d"].as(); bool average = false; if (select_weights == "avg") average = true; @@ -161,7 +162,7 @@ main(int argc, char** argv) } else if (scorer_str == "smooth_bleu") { scorer = dynamic_cast(new SmoothBleuScorer); } else if (scorer_str == "approx_bleu") { - scorer = dynamic_cast(new ApproxBleuScorer(N)); + scorer = dynamic_cast(new ApproxBleuScorer(N, approx_bleu_d)); } else { cerr << "Don't know scoring metric: '" << scorer_str << "', exiting." << endl; exit(1); @@ -235,6 +236,8 @@ main(int argc, char** argv) cerr << setw(25) << "N " << N << endl; cerr << setw(25) << "T " << T << endl; cerr << setw(25) << "scorer '" << scorer_str << "'" << endl; + if (scorer_str == "approx_bleu") + cerr << setw(25) << "approx. B discount " << approx_bleu_d << endl; cerr << setw(25) << "sample from " << "'" << sample_from << "'" << endl; if (sample_from == "kbest") cerr << setw(25) << "filter " << "'" << filter_type << "'" << endl; @@ -242,7 +245,7 @@ main(int argc, char** argv) cerr << setw(25) << "gamma " << gamma << endl; cerr << setw(25) << "pairs " << "'" << pair_sampling << "'" << endl; if (pair_sampling == "XYX") - cerr << setw(25) << "hi lo " << "'" << hi_lo << "'" << endl; + cerr << setw(25) << "hi lo " << hi_lo << endl; cerr << setw(25) << "pair threshold " << pair_threshold << endl; cerr << setw(25) << "select weights " << "'" << select_weights << "'" << endl; if (cfg.count("l1_reg")) @@ -261,7 +264,7 @@ main(int argc, char** argv) cerr << setw(25) << "weights in " << "'" << cfg["input_weights"].as() << "'" << endl; if (cfg.count("stop-after")) cerr << setw(25) << "stop_after " << stop_after << endl; - if (!verbose) cerr << "(a dot represents " << DTRAIN_DOTS << " lines of input)" << endl; + if (!verbose) cerr << "(a dot represents " << DTRAIN_DOTS << " inputs)" << endl; } diff --git a/dtrain/hstreaming/dtrain.ini b/dtrain/hstreaming/dtrain.ini index 05535299..a2c219a1 100644 --- a/dtrain/hstreaming/dtrain.ini +++ b/dtrain/hstreaming/dtrain.ini @@ -10,6 +10,6 @@ gamma=0 scorer=stupid_bleu sample_from=kbest filter=uniq -pair_sampling=108010 +pair_sampling=XYX pair_threshold=0 select_weights=last diff --git a/dtrain/kbestget.h b/dtrain/kbestget.h index bcd82610..77d4a139 100644 --- a/dtrain/kbestget.h +++ b/dtrain/kbestget.h @@ -2,6 +2,8 @@ #define _DTRAIN_KBESTGET_H_ #include "kbest.h" // cdec +#include "sentence_metadata.h" + #include "verbose.h" #include "viterbi.h" #include "ff_register.h" @@ -32,7 +34,7 @@ struct LocalScorer vector w_; virtual score_t - Score(vector& hyp, vector& ref, const unsigned rank)=0; + Score(vector& hyp, vector& ref, const unsigned rank, const unsigned src_len)=0; void Reset() {} // only for approx bleu @@ -71,13 +73,15 @@ struct KBestGetter : public HypSampler const unsigned k_; const string filter_type_; vector s_; + unsigned src_len_; KBestGetter(const unsigned k, const string filter_type) : k_(k), filter_type_(filter_type) {} virtual void - NotifyTranslationForest(const SentenceMetadata& /*smeta*/, Hypergraph* hg) + NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { + src_len_ = smeta.GetSourceLength(); KBestScored(*hg); } @@ -109,7 +113,7 @@ struct KBestGetter : public HypSampler h.f = d->feature_values; h.model = log(d->score); h.rank = i; - h.score = scorer_->Score(h.w, *ref_, i); + h.score = scorer_->Score(h.w, *ref_, i, src_len_); s_.push_back(h); } } @@ -128,7 +132,7 @@ struct KBestGetter : public HypSampler h.f = d->feature_values; h.model = log(d->score); h.rank = i; - h.score = scorer_->Score(h.w, *ref_, i); + h.score = scorer_->Score(h.w, *ref_, i, src_len_); s_.push_back(h); } } diff --git a/dtrain/ksampler.h b/dtrain/ksampler.h index eb4813ab..0783f98b 100644 --- a/dtrain/ksampler.h +++ b/dtrain/ksampler.h @@ -15,13 +15,15 @@ struct KSampler : public HypSampler vector s_; MT19937* prng_; score_t (*scorer)(NgramCounts&, const unsigned, const unsigned, unsigned, vector); + unsigned src_len_; explicit KSampler(const unsigned k, MT19937* prng) : k_(k), prng_(prng) {} virtual void - NotifyTranslationForest(const SentenceMetadata& /*smeta*/, Hypergraph* hg) + NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { + src_len_ = smeta.GetSourceLength(); ScoredSamples(*hg); } @@ -37,7 +39,7 @@ struct KSampler : public HypSampler h.f = samples[i].fmap; h.model = log(samples[i].model_score); h.rank = i; - h.score = scorer_->Score(h.w, *ref_, i); + h.score = scorer_->Score(h.w, *ref_, i, src_len_); s_.push_back(h); } } diff --git a/dtrain/pairsampling.h b/dtrain/pairsampling.h index 66ca1706..56702b86 100644 --- a/dtrain/pairsampling.h +++ b/dtrain/pairsampling.h @@ -44,7 +44,7 @@ partXYX(vector* s, vector >& training, scor { sort(s->begin(), s->end(), _XYX_cmp_hyp_by_score); unsigned sz = s->size(); - unsigned sep = sz * hi_lo; + unsigned sep = round(sz*hi_lo); for (unsigned i = 0; i < sep; i++) { for (unsigned j = sep; j < sz; j++) { if ((*s)[i].rank < (*s)[j].rank) { diff --git a/dtrain/score.cc b/dtrain/score.cc index d964b4da..d0f9e8a0 100644 --- a/dtrain/score.cc +++ b/dtrain/score.cc @@ -16,23 +16,23 @@ namespace dtrain score_t BleuScorer::Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref_len) { - if (hyp_len == 0 || ref_len == 0) return 0; + if (hyp_len == 0 || ref_len == 0) return 0.; unsigned M = N_; if (ref_len < N_) M = ref_len; score_t sum = 0; for (unsigned i = 0; i < M; i++) { - if (counts.clipped[i] == 0 || counts.sum[i] == 0) return 0; - sum += w_[i] * log((score_t)counts.clipped[i]/counts.sum[i]); + if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) return 0.; + sum += w_[i] * log((score_t)counts.clipped_[i]/counts.sum_[i]); } return brevity_penalty(hyp_len, ref_len) * exp(sum); } score_t BleuScorer::Score(vector& hyp, vector& ref, - const unsigned /*rank*/) + const unsigned /*rank*/, const unsigned /*src_len*/) { unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (hyp_len == 0 || ref_len == 0) return 0; + if (hyp_len == 0 || ref_len == 0) return 0.; NgramCounts counts = make_ngram_counts(hyp, ref, N_); return Bleu(counts, hyp_len, ref_len); } @@ -49,18 +49,18 @@ BleuScorer::Score(vector& hyp, vector& ref, */ score_t StupidBleuScorer::Score(vector& hyp, vector& ref, - const unsigned /*rank*/) + const unsigned /*rank*/, const unsigned /*src_len*/) { unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (hyp_len == 0 || ref_len == 0) return 0; + if (hyp_len == 0 || ref_len == 0) return 0.; NgramCounts counts = make_ngram_counts(hyp, ref, N_); unsigned M = N_; if (ref_len < N_) M = ref_len; score_t sum = 0, add = 0; for (unsigned i = 0; i < M; i++) { - if (i == 0 && (counts.clipped[i] == 0 || counts.sum[i] == 0)) return 0; + if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.; if (i == 1) add = 1; - sum += w_[i] * log(((score_t)counts.clipped[i] + add)/((counts.sum[i] + add))); + sum += w_[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add))); } return brevity_penalty(hyp_len, ref_len) * exp(sum); } @@ -76,10 +76,10 @@ StupidBleuScorer::Score(vector& hyp, vector& ref, */ score_t SmoothBleuScorer::Score(vector& hyp, vector& ref, - const unsigned /*rank*/) + const unsigned /*rank*/, const unsigned /*src_len*/) { unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (hyp_len == 0 || ref_len == 0) return 0; + if (hyp_len == 0 || ref_len == 0) return 0.; NgramCounts counts = make_ngram_counts(hyp, ref, N_); unsigned M = N_; if (ref_len < N_) M = ref_len; @@ -87,10 +87,10 @@ SmoothBleuScorer::Score(vector& hyp, vector& ref, vector i_bleu; for (unsigned i = 0; i < M; i++) i_bleu.push_back(0.); for (unsigned i = 0; i < M; i++) { - if (counts.clipped[i] == 0 || counts.sum[i] == 0) { + if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) { break; } else { - score_t i_ng = log((score_t)counts.clipped[i]/counts.sum[i]); + score_t i_ng = log((score_t)counts.clipped_[i]/counts.sum_[i]); for (unsigned j = i; j < M; j++) { i_bleu[j] += (1/((score_t)j+1)) * i_ng; } @@ -107,29 +107,29 @@ SmoothBleuScorer::Score(vector& hyp, vector& ref, * and Structural Translation Features" * (Chiang et al. '08) * - * NOTE: needs some code in dtrain.cc + * NOTE: needs some more code in dtrain.cc */ score_t ApproxBleuScorer::Score(vector& hyp, vector& ref, - const unsigned rank) + const unsigned rank, const unsigned src_len) { unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (hyp_len == 0 || ref_len == 0) return 0; - NgramCounts counts = make_ngram_counts(hyp, ref, N_); - NgramCounts tmp(N_); + if (ref_len == 0) return 0.; + score_t score = 0.; + NgramCounts counts(N_); + if (hyp_len > 0) { + counts = make_ngram_counts(hyp, ref, N_); + NgramCounts tmp = glob_onebest_counts_ + counts; + score = Bleu(tmp, hyp_len, ref_len); + } if (rank == 0) { // 'context of 1best translations' - glob_onebest_counts += counts; - glob_hyp_len += hyp_len; - glob_ref_len += ref_len; - hyp_len = glob_hyp_len; - ref_len = glob_ref_len; - tmp = glob_onebest_counts; - } else { - hyp_len = hyp.size(); - ref_len = ref.size(); - tmp = glob_onebest_counts + counts; + glob_onebest_counts_ += counts; + glob_onebest_counts_ *= discount_; + glob_hyp_len_ = discount_ * (glob_hyp_len_ + hyp_len); + glob_ref_len_ = discount_ * (glob_ref_len_ + ref_len); + glob_src_len_ = discount_ * (glob_src_len_ + src_len); } - return 0.9 * Bleu(tmp, hyp_len, ref_len); // TODO param + return (score_t)glob_src_len_ * score; } diff --git a/dtrain/score.h b/dtrain/score.h index 5aceb81f..d0e79f65 100644 --- a/dtrain/score.h +++ b/dtrain/score.h @@ -12,8 +12,8 @@ namespace dtrain struct NgramCounts { unsigned N_; - map clipped; - map sum; + map clipped_; + map sum_; NgramCounts(const unsigned N) : N_(N) { Zero(); } @@ -22,8 +22,8 @@ struct NgramCounts { assert(N_ == rhs.N_); for (unsigned i = 0; i < N_; i++) { - this->clipped[i] += rhs.clipped.find(i)->second; - this->sum[i] += rhs.sum.find(i)->second; + this->clipped_[i] += rhs.clipped_.find(i)->second; + this->sum_[i] += rhs.sum_.find(i)->second; } } @@ -35,16 +35,25 @@ struct NgramCounts return result; } + inline void + operator*=(const score_t rhs) + { + for (unsigned i = 0; i < N_; i++) { + this->clipped_[i] *= rhs; + this->sum_[i] *= rhs; + } + } + inline void Add(const unsigned count, const unsigned ref_count, const unsigned i) { assert(i < N_); if (count > ref_count) { - clipped[i] += ref_count; + clipped_[i] += ref_count; } else { - clipped[i] += count; + clipped_[i] += count; } - sum[i] += count; + sum_[i] += count; } inline void @@ -52,8 +61,8 @@ struct NgramCounts { unsigned i; for (i = 0; i < N_; i++) { - clipped[i] = 0; - sum[i] = 0; + clipped_[i] = 0; + sum_[i] = 0; } } @@ -61,8 +70,8 @@ struct NgramCounts Print() { for (unsigned i = 0; i < N_; i++) { - cout << i+1 << "grams (clipped):\t" << clipped[i] << endl; - cout << i+1 << "grams:\t\t\t" << sum[i] << endl; + cout << i+1 << "grams (clipped):\t" << clipped_[i] << endl; + cout << i+1 << "grams:\t\t\t" << sum_[i] << endl; } } }; @@ -106,35 +115,36 @@ make_ngram_counts(const vector& hyp, const vector& ref, const un struct BleuScorer : public LocalScorer { score_t Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref_len); - score_t Score(vector& hyp, vector& ref, const unsigned rank); + score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); }; struct StupidBleuScorer : public LocalScorer { - score_t Score(vector& hyp, vector& ref, const unsigned rank); + score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); }; struct SmoothBleuScorer : public LocalScorer { - score_t Score(vector& hyp, vector& ref, const unsigned rank); + score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); }; struct ApproxBleuScorer : public BleuScorer { - NgramCounts glob_onebest_counts; - unsigned glob_hyp_len, glob_ref_len; + NgramCounts glob_onebest_counts_; + unsigned glob_hyp_len_, glob_ref_len_, glob_src_len_; + score_t discount_; - ApproxBleuScorer(unsigned N) : glob_onebest_counts(NgramCounts(N)) + ApproxBleuScorer(unsigned N, score_t d) : glob_onebest_counts_(NgramCounts(N)), discount_(d) { - glob_hyp_len = glob_ref_len = 0; + glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0; } inline void Reset() { - glob_onebest_counts.Zero(); - glob_hyp_len = glob_ref_len = 0; + glob_onebest_counts_.Zero(); + glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0.; } - score_t Score(vector& hyp, vector& ref, const unsigned rank); + score_t Score(vector& hyp, vector& ref, const unsigned rank, const unsigned src_len); }; diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini index cd2c75e7..2ad44688 100644 --- a/dtrain/test/example/dtrain.ini +++ b/dtrain/test/example/dtrain.ini @@ -4,18 +4,18 @@ decoder_config=test/example/cdec.ini # config for cdec # weights for these features will be printed on each iteration print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough tmp=/tmp -stop_after=100 # stop epoch after 100 inputs +stop_after=20 # stop epoch after 20 inputs # interesting stuff epochs=3 # run over input 3 times k=100 # use 100best lists N=4 # optimize (approx) BLEU4 -scorer=approx_bleu # use 'stupid' BLEU+1 +scorer=stupid_bleu # use 'stupid' BLEU+1 learning_rate=0.0001 # learning rate gamma=0 # use SVM reg sample_from=kbest # use kbest lists (as opposed to forest) filter=uniq # only unique entries in kbest (surface form) pair_sampling=XYX -hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 +hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here pair_threshold=0 # minimum distance in BLEU (this will still only use pairs with diff > 0) select_weights=VOID # don't output weights -- cgit v1.2.3