summaryrefslogtreecommitdiff
path: root/training
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2013-03-15 09:56:26 +0100
committerPatrick Simianer <p@simianer.de>2013-03-15 09:56:26 +0100
commit08d5de939f85075fc1569ddfa545b5d815231c3f (patch)
tree6f567e3ebc8e74e0b9b60d4c1fddb5a615622dc3 /training
parent6ffdc1024331eef71aba5ac1c3e670c8393e07dc (diff)
added fixed BLEU+1
Diffstat (limited to 'training')
-rw-r--r--training/dtrain/dtrain.cc2
-rw-r--r--training/dtrain/score.cc31
-rw-r--r--training/dtrain/score.h5
3 files changed, 37 insertions, 1 deletions
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index b317c365..53487d34 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -163,6 +163,8 @@ main(int argc, char** argv)
scorer = dynamic_cast<BleuScorer*>(new BleuScorer);
} else if (scorer_str == "stupid_bleu") {
scorer = dynamic_cast<StupidBleuScorer*>(new StupidBleuScorer);
+ } else if (scorer_str == "fixed_stupid_bleu") {
+ scorer = dynamic_cast<FixedStupidBleuScorer*>(new FixedStupidBleuScorer);
} else if (scorer_str == "smooth_bleu") {
scorer = dynamic_cast<SmoothBleuScorer*>(new SmoothBleuScorer);
} else if (scorer_str == "sum_bleu") {
diff --git a/training/dtrain/score.cc b/training/dtrain/score.cc
index 34fc86a9..96d6e10a 100644
--- a/training/dtrain/score.cc
+++ b/training/dtrain/score.cc
@@ -49,7 +49,7 @@ BleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
* for Machine Translation"
* (Lin & Och '04)
*
- * NOTE: 0 iff no 1gram match
+ * NOTE: 0 iff no 1gram match ('grounded')
*/
score_t
StupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
@@ -74,6 +74,35 @@ StupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
}
/*
+ * fixed 'stupid' bleu
+ *
+ * as in "Optimizing for Sentence-Level BLEU+1
+ * Yields Short Translations"
+ * (Nakov et al. '12)
+ */
+score_t
+FixedStupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+ const unsigned /*rank*/, const unsigned /*src_len*/)
+{
+ unsigned hyp_len = hyp.size(), ref_len = ref.size();
+ if (hyp_len == 0 || ref_len == 0) return 0.;
+ NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+ unsigned M = N_;
+ vector<score_t> v = w_;
+ if (ref_len < N_) {
+ M = ref_len;
+ for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M);
+ }
+ score_t sum = 0, add = 0;
+ for (unsigned i = 0; i < M; i++) {
+ if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.;
+ if (i == 1) add = 1;
+ sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add)));
+ }
+ return brevity_penalty(hyp_len, ref_len+1) * exp(sum); // <- fix
+}
+
+/*
* smooth bleu
*
* as in "An End-to-End Discriminative Approach
diff --git a/training/dtrain/score.h b/training/dtrain/score.h
index f317c903..bddaa071 100644
--- a/training/dtrain/score.h
+++ b/training/dtrain/score.h
@@ -148,6 +148,11 @@ struct StupidBleuScorer : public LocalScorer
score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
};
+struct FixedStupidBleuScorer : public LocalScorer
+{
+ score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+};
+
struct SmoothBleuScorer : public LocalScorer
{
score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);