From 72b07dfc1534862aea06c102b4382513183ce253 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Fri, 15 Mar 2013 09:56:26 +0100
Subject: added fixed BLEU+1
---
training/dtrain/dtrain.cc | 2 ++
training/dtrain/score.cc | 31 ++++++++++++++++++++++++++++++-
training/dtrain/score.h | 5 +++++
3 files changed, 37 insertions(+), 1 deletion(-)
(limited to 'training/dtrain')
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index b317c365..53487d34 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -163,6 +163,8 @@ main(int argc, char** argv)
scorer = dynamic_cast(new BleuScorer);
} else if (scorer_str == "stupid_bleu") {
scorer = dynamic_cast(new StupidBleuScorer);
+ } else if (scorer_str == "fixed_stupid_bleu") {
+ scorer = dynamic_cast(new FixedStupidBleuScorer);
} else if (scorer_str == "smooth_bleu") {
scorer = dynamic_cast(new SmoothBleuScorer);
} else if (scorer_str == "sum_bleu") {
diff --git a/training/dtrain/score.cc b/training/dtrain/score.cc
index 34fc86a9..96d6e10a 100644
--- a/training/dtrain/score.cc
+++ b/training/dtrain/score.cc
@@ -49,7 +49,7 @@ BleuScorer::Score(vector& hyp, vector& ref,
* for Machine Translation"
* (Lin & Och '04)
*
- * NOTE: 0 iff no 1gram match
+ * NOTE: 0 iff no 1gram match ('grounded')
*/
score_t
StupidBleuScorer::Score(vector& hyp, vector& ref,
@@ -73,6 +73,35 @@ StupidBleuScorer::Score(vector& hyp, vector& ref,
return brevity_penalty(hyp_len, ref_len) * exp(sum);
}
+/*
+ * fixed 'stupid' bleu
+ *
+ * as in "Optimizing for Sentence-Level BLEU+1
+ * Yields Short Translations"
+ * (Nakov et al. '12)
+ */
+score_t
+FixedStupidBleuScorer::Score(vector& hyp, vector& ref,
+ const unsigned /*rank*/, const unsigned /*src_len*/)
+{
+ unsigned hyp_len = hyp.size(), ref_len = ref.size();
+ if (hyp_len == 0 || ref_len == 0) return 0.;
+ NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+ unsigned M = N_;
+ vector v = w_;
+ if (ref_len < N_) {
+ M = ref_len;
+ for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M);
+ }
+ score_t sum = 0, add = 0;
+ for (unsigned i = 0; i < M; i++) {
+ if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.;
+ if (i == 1) add = 1;
+ sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add)));
+ }
+ return brevity_penalty(hyp_len, ref_len+1) * exp(sum); // <- fix
+}
+
/*
* smooth bleu
*
diff --git a/training/dtrain/score.h b/training/dtrain/score.h
index f317c903..bddaa071 100644
--- a/training/dtrain/score.h
+++ b/training/dtrain/score.h
@@ -148,6 +148,11 @@ struct StupidBleuScorer : public LocalScorer
score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
};
+struct FixedStupidBleuScorer : public LocalScorer
+{
+ score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+};
+
struct SmoothBleuScorer : public LocalScorer
{
score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
--
cgit v1.2.3