From 1deb94cada467ac0fe171ae4182c37c3d217c17a Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Tue, 24 Mar 2015 19:54:55 +0100
Subject: dtrain: resurrected scores
---
training/dtrain/dtrain.cc | 35 ++-
training/dtrain/dtrain.h | 9 +-
training/dtrain/example/standard/dtrain.ini | 1 +
training/dtrain/sample.h | 6 +-
training/dtrain/score.h | 356 ++++++++++++++++++++++------
5 files changed, 327 insertions(+), 80 deletions(-)
(limited to 'training')
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index e5cfd50a..97df530b 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -10,9 +10,9 @@ main(int argc, char** argv)
{
// get configuration
po::variables_map conf;
- if (!dtrain_init(argc, argv, &conf))
- exit(1); // something is wrong
+ dtrain_init(argc, argv, &conf);
const size_t k = conf["k"].as();
+ const string score_name = conf["score"].as();
const size_t N = conf["N"].as();
const size_t T = conf["iterations"].as();
const weight_t eta = conf["learning_rate"].as();
@@ -25,12 +25,28 @@ main(int argc, char** argv)
boost::split(print_weights, conf["print_weights"].as(),
boost::is_any_of(" "));
- // setup decoder
+ // setup decoder and scorer
register_feature_functions();
SetSilent(true);
ReadFile f(conf["decoder_conf"].as());
Decoder decoder(f.stream());
- ScoredKbest* observer = new ScoredKbest(k, new PerSentenceBleuScorer(N));
+ Scorer* scorer;
+ if (score_name == "nakov") {
+ scorer = static_cast(new PerSentenceBleuScorer(N));
+ } else if (score_name == "papineni") {
+ scorer = static_cast(new BleuScorer(N));
+ } else if (score_name == "lin") {
+ scorer = static_cast\
+ (new OriginalPerSentenceBleuScorer(N));
+ } else if (score_name == "liang") {
+ scorer = static_cast\
+ (new SmoothPerSentenceBleuScorer(N));
+ } else if (score_name == "chiang") {
+ scorer = static_cast(new ApproxBleuScorer(N));
+ } else {
+ assert(false);
+ }
+ ScoredKbest* observer = new ScoredKbest(k, scorer);
// weights
vector& decoder_weights = decoder.CurrentWeightVector();
@@ -52,6 +68,7 @@ main(int argc, char** argv)
// output configuration
cerr << "dtrain" << endl << "Parameters:" << endl;
cerr << setw(25) << "k " << k << endl;
+ cerr << setw(25) << "score " << "'" << score_name << "'" << endl;
cerr << setw(25) << "N " << N << endl;
cerr << setw(25) << "T " << T << endl;
cerr << setw(25) << "learning rate " << eta << endl;
@@ -149,6 +166,16 @@ main(int argc, char** argv)
lambdas_copy = lambdas;
lambdas.plus_eq_v_times_s(updates, eta);
+ // update context for approx. BLEU
+ if (score_name == "chiang") {
+ for (auto it: *samples) {
+ if (it.rank == 0) {
+ scorer->UpdateContext(it.w, buf_ngs[i], buf_ls[i], 0.9);
+ break;
+ }
+ }
+ }
+
// l1 regularization
// NB: regularization is done after each sentence,
// not after every single pair!
diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h
index b0ee348c..2636fa89 100644
--- a/training/dtrain/dtrain.h
+++ b/training/dtrain/dtrain.h
@@ -43,7 +43,7 @@ inline ostream& _np(ostream& out) { return out << resetiosflags(ios::showpos); }
inline ostream& _p(ostream& out) { return out << setiosflags(ios::showpos); }
inline ostream& _p4(ostream& out) { return out << setprecision(4); }
-bool
+void
dtrain_init(int argc, char** argv, po::variables_map* conf)
{
po::options_description ini("Configuration File Options");
@@ -55,6 +55,7 @@ dtrain_init(int argc, char** argv, po::variables_map* conf)
("learning_rate,l", po::value()->default_value(1.0), "learning rate")
("l1_reg,r", po::value()->default_value(0.), "l1 regularization strength")
("margin,m", po::value()->default_value(0.), "margin for margin perceptron")
+ ("score,s", po::value()->default_value("nakov"), "per-sentence BLEU approx.")
("N", po::value()->default_value(4), "N for BLEU approximation")
("input_weights,w", po::value(), "input weights file")
("average,a", po::value()->default_value(false), "output average weights")
@@ -74,14 +75,12 @@ dtrain_init(int argc, char** argv, po::variables_map* conf)
po::notify(*conf);
if (!conf->count("decoder_conf")) {
cerr << "Missing decoder configuration." << endl;
- return false;
+ assert(false);
}
if (!conf->count("bitext")) {
cerr << "No input given." << endl;
- return false;
+ assert(false);
}
-
- return true;
}
} // namespace
diff --git a/training/dtrain/example/standard/dtrain.ini b/training/dtrain/example/standard/dtrain.ini
index a0d64fa0..c52bef4a 100644
--- a/training/dtrain/example/standard/dtrain.ini
+++ b/training/dtrain/example/standard/dtrain.ini
@@ -7,3 +7,4 @@ N=4 # optimize (approx.) BLEU4
learning_rate=0.1 # learning rate
margin=1.0 # margin for margin perceptron
print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
+score=nakov
diff --git a/training/dtrain/sample.h b/training/dtrain/sample.h
index 03cc82c3..1249e372 100644
--- a/training/dtrain/sample.h
+++ b/training/dtrain/sample.h
@@ -13,15 +13,15 @@ struct ScoredKbest : public DecoderObserver
const size_t k_;
size_t feature_count_, effective_sz_;
vector samples_;
- PerSentenceBleuScorer* scorer_;
+ Scorer* scorer_;
vector* ref_ngs_;
vector* ref_ls_;
- ScoredKbest(const size_t k, PerSentenceBleuScorer* scorer) :
+ ScoredKbest(const size_t k, Scorer* scorer) :
k_(k), scorer_(scorer) {}
virtual void
- NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg)
+ NotifyTranslationForest(const SentenceMetadata& /*smeta*/, Hypergraph* hg)
{
samples_.clear(); effective_sz_ = feature_count_ = 0;
KBest::KBestDerivations, ESentenceTraversal,
diff --git a/training/dtrain/score.h b/training/dtrain/score.h
index 06dbc5a4..ca3da39b 100644
--- a/training/dtrain/score.h
+++ b/training/dtrain/score.h
@@ -12,6 +12,8 @@ struct NgramCounts
map clipped_;
map sum_;
+ NgramCounts() {}
+
NgramCounts(const size_t N) : N_(N) { Zero(); }
inline void
@@ -24,13 +26,13 @@ struct NgramCounts
}
}
- inline const NgramCounts
- operator+(const NgramCounts &other) const
+ inline void
+ operator*=(const weight_t rhs)
{
- NgramCounts result = *this;
- result += other;
-
- return result;
+ for (unsigned i = 0; i < N_; i++) {
+ this->clipped_[i] *= rhs;
+ this->sum_[i] *= rhs;
+ }
}
inline void
@@ -112,85 +114,303 @@ MakeNgramCounts(const vector& hyp,
return counts;
}
+class Scorer
+{
+ protected:
+ const size_t N_;
+ vector w_;
+
+ public:
+ Scorer(size_t n): N_(n)
+ {
+ for (size_t i = 1; i <= N_; i++)
+ w_.push_back(1.0/N_);
+ }
+
+ inline bool
+ Init(const vector& hyp,
+ const vector& ref_ngs,
+ const vector& ref_ls,
+ size_t& hl,
+ size_t& rl,
+ size_t& M,
+ vector& v,
+ NgramCounts& counts)
+ {
+ hl = hyp.size();
+ if (hl == 0) return false;
+ rl = BestMatchLength(hl, ref_ls);
+ if (rl == 0) return false;
+ counts = MakeNgramCounts(hyp, ref_ngs, N_);
+ if (rl < N_) {
+ M = rl;
+ for (size_t i = 0; i < M; i++) v.push_back(1/((weight_t)M));
+ } else {
+ M = N_;
+ v = w_;
+ }
+
+ return true;
+ }
+
+ inline weight_t
+ BrevityPenalty(const size_t hl, const size_t rl)
+ {
+ if (hl > rl)
+ return 1;
+
+ return exp(1 - (weight_t)rl/hl);
+ }
+
+ inline size_t
+ BestMatchLength(const size_t hl,
+ const vector& ref_ls)
+ {
+ size_t m;
+ if (ref_ls.size() == 1) {
+ m = ref_ls.front();
+ } else {
+ size_t i = 0, best_idx = 0;
+ size_t best = numeric_limits::max();
+ for (auto l: ref_ls) {
+ size_t d = abs(hl-l);
+ if (d < best) {
+ best_idx = i;
+ best = d;
+ }
+ i += 1;
+ }
+ m = ref_ls[best_idx];
+ }
+
+ return m;
+ }
+
+ virtual weight_t
+ Score(const vector&,
+ const vector&,
+ const vector&) = 0;
+
+ void
+ UpdateContext(const vector& /*hyp*/,
+ const vector& /*ref_ngs*/,
+ const vector& /*ref_ls*/,
+ weight_t /*decay*/) {}
+};
+
/*
- * per-sentence BLEU
+ * 'fixed' per-sentence BLEU
+ * simply add 1 to reference length for calculation of BP
+ *
* as in "Optimizing for Sentence-Level BLEU+1
* Yields Short Translations"
* (Nakov et al. '12)
*
- * [simply add 1 to reference length for calculation of BP]
+ */
+class PerSentenceBleuScorer : public Scorer
+{
+ public:
+ PerSentenceBleuScorer(size_t n) : Scorer(n) {}
+
+ weight_t
+ Score(const vector& hyp,
+ const vector& ref_ngs,
+ const vector& ref_ls)
+ {
+ size_t hl, rl, M;
+ vector v;
+ NgramCounts counts;
+ if (!Init(hyp, ref_ngs, ref_ls, hl, rl, M, v, counts))
+ return 0.;
+ weight_t sum=0, add=0;
+ for (size_t i = 0; i < M; i++) {
+ if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.;
+ if (i > 0) add = 1;
+ sum += v[i] * log(((weight_t)counts.clipped_[i] + add)
+ / ((counts.sum_[i] + add)));
+ }
+
+ return BrevityPenalty(hl, rl+1) * exp(sum);
+ }
+};
+
+
+/*
+ * BLEU
+ * 0 if for one n \in {1..N} count is 0
+ *
+ * as in "BLEU: a Method for Automatic Evaluation
+ * of Machine Translation"
+ * (Papineni et al. '02)
*
*/
-struct PerSentenceBleuScorer
+
+class BleuScorer : public Scorer
{
- const size_t N_;
- vector w_;
+ public:
+ BleuScorer(size_t n) : Scorer(n) {}
- PerSentenceBleuScorer(size_t n) : N_(n)
- {
- for (size_t i = 1; i <= N_; i++)
- w_.push_back(1.0/N_);
- }
+ weight_t
+ Score(const vector& hyp,
+ const vector& ref_ngs,
+ const vector& ref_ls)
+ {
+ size_t hl, rl, M;
+ vector v;
+ NgramCounts counts;
+ if (!Init(hyp, ref_ngs, ref_ls, hl, rl, M, v, counts))
+ return 0.;
+ weight_t sum = 0;
+ for (size_t i = 0; i < M; i++) {
+ if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) return 0.;
+ sum += v[i] * log((weight_t)counts.clipped_[i]/counts.sum_[i]);
+ }
- inline weight_t
- BrevityPenalty(const size_t hl, const size_t rl)
- {
- if (hl > rl)
- return 1;
+ return BrevityPenalty(hl, rl) * exp(sum);
+ }
+};
- return exp(1 - (weight_t)rl/hl);
- }
+/*
+ * original BLEU+1
+ * 0 iff no 1gram match ('grounded')
+ *
+ * as in "ORANGE: a Method for Evaluating
+ * Automatic Evaluation Metrics
+ * for Machine Translation"
+ * (Lin & Och '04)
+ *
+ */
+class OriginalPerSentenceBleuScorer : public Scorer
+{
+ public:
+ OriginalPerSentenceBleuScorer(size_t n) : Scorer(n) {}
- inline size_t
- BestMatchLength(const size_t hl,
- const vector& ref_ls)
- {
- size_t m;
- if (ref_ls.size() == 1) {
- m = ref_ls.front();
- } else {
- size_t i = 0, best_idx = 0;
- size_t best = numeric_limits::max();
- for (auto l: ref_ls) {
- size_t d = abs(hl-l);
- if (d < best) {
- best_idx = i;
- best = d;
+ weight_t
+ Score(const vector& hyp,
+ const vector& ref_ngs,
+ const vector& ref_ls)
+ {
+ size_t hl, rl, M;
+ vector v;
+ NgramCounts counts;
+ if (!Init(hyp, ref_ngs, ref_ls, hl, rl, M, v, counts))
+ return 0.;
+ weight_t sum=0, add=0;
+ for (size_t i = 0; i < M; i++) {
+ if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.;
+ if (i == 1) add = 1;
+ sum += v[i] * log(((weight_t)counts.clipped_[i] + add)/((counts.sum_[i] + add)));
+ }
+
+ return BrevityPenalty(hl, rl) * exp(sum);
+ }
+};
+
+/*
+ * smooth BLEU
+ * max is 0.9375 (with N=4)
+ *
+ * as in "An End-to-End Discriminative Approach
+ * to Machine Translation"
+ * (Liang et al. '06)
+ *
+ */
+class SmoothPerSentenceBleuScorer : public Scorer
+{
+ public:
+ SmoothPerSentenceBleuScorer(size_t n) : Scorer(n) {}
+
+ weight_t
+ Score(const vector& hyp,
+ const vector& ref_ngs,
+ const vector& ref_ls)
+ {
+ size_t hl=hyp.size(), rl=BestMatchLength(hl, ref_ls);
+ if (hl == 0 || rl == 0) return 0.;
+ NgramCounts counts = MakeNgramCounts(hyp, ref_ngs, N_);
+ size_t M = N_;
+ if (rl < N_) M = rl;
+ weight_t sum = 0.;
+ vector i_bleu;
+ for (size_t i=0; i < M; i++)
+ i_bleu.push_back(0.);
+ for (size_t i=0; i < M; i++) {
+ if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) {
+ break;
+ } else {
+ weight_t i_score = log((weight_t)counts.clipped_[i]/counts.sum_[i]);
+ for (size_t j=i; j < M; j++) {
+ i_bleu[j] += (1/((weight_t)j+1)) * i_score;
+ }
}
- i += 1;
+ sum += exp(i_bleu[i])/pow(2.0, (double)(N_-i+2));
}
- m = ref_ls[best_idx];
+
+ return BrevityPenalty(hl, hl) * sum;
+ }
+};
+
+/*
+ * approx. bleu
+ * Needs some more code in dtrain.cc .
+ * We do not scaling by source lengths, as hypotheses are compared only
+ * within an kbest list, not globally.
+ *
+ * as in "Online Large-Margin Training of Syntactic
+ * and Structural Translation Features"
+ * (Chiang et al. '08)
+ *
+
+ */
+class ApproxBleuScorer : public Scorer
+{
+ private:
+ NgramCounts context;
+ weight_t hyp_sz_sum;
+ weight_t ref_sz_sum;
+
+ public:
+ ApproxBleuScorer(size_t n) :
+ Scorer(n), context(n), hyp_sz_sum(0), ref_sz_sum(0) {}
+
+ weight_t
+ Score(const vector& hyp,
+ const vector& ref_ngs,
+ const vector& ref_ls)
+ {
+ size_t hl, rl, M;
+ vector v;
+ NgramCounts counts;
+ if (!Init(hyp, ref_ngs, ref_ls, hl, rl, M, v, counts))
+ return 0.;
+ counts += context;
+ weight_t sum = 0;
+ for (size_t i = 0; i < M; i++) {
+ if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) return 0.;
+ sum += v[i] * log((weight_t)counts.clipped_[i]/counts.sum_[i]);
+ }
+
+ return BrevityPenalty(hyp_sz_sum+hl, ref_sz_sum+rl) * exp(sum);
}
- return m;
- }
+ void
+ UpdateContext(const vector& hyp,
+ const vector& ref_ngs,
+ const vector& ref_ls,
+ weight_t decay=0.9)
+ {
+ size_t hl, rl, M;
+ vector v;
+ NgramCounts counts;
+ Init(hyp, ref_ngs, ref_ls, hl, rl, M, v, counts);
- weight_t
- Score(const vector& hyp,
- const vector& ref_ngs,
- const vector& ref_ls)
- {
- size_t hl = hyp.size(), rl = 0;
- if (hl == 0) return 0.;
- rl = BestMatchLength(hl, ref_ls);
- if (rl == 0) return 0.;
- NgramCounts counts = MakeNgramCounts(hyp, ref_ngs, N_);
- size_t M = N_;
- vector v = w_;
- if (rl < N_) {
- M = rl;
- for (size_t i = 0; i < M; i++) v[i] = 1/((weight_t)M);
- }
- weight_t sum = 0, add = 0;
- for (size_t i = 0; i < M; i++) {
- if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.;
- if (i > 0) add = 1;
- sum += v[i] * log(((weight_t)counts.clipped_[i] + add)
- / ((counts.sum_[i] + add)));
- }
-
- return BrevityPenalty(hl, rl+1) * exp(sum);
- }
+ context += counts;
+ context *= decay;
+ hyp_sz_sum += hl;
+ hyp_sz_sum *= decay;
+ ref_sz_sum += rl;
+ ref_sz_sum *= decay;
+ }
};
} // namespace
--
cgit v1.2.3