diff options
author | Patrick Simianer <p@simianer.de> | 2013-03-15 09:56:26 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2013-03-15 09:56:26 +0100 |
commit | 08d5de939f85075fc1569ddfa545b5d815231c3f (patch) | |
tree | 6f567e3ebc8e74e0b9b60d4c1fddb5a615622dc3 /training | |
parent | 6ffdc1024331eef71aba5ac1c3e670c8393e07dc (diff) |
added fixed BLEU+1
Diffstat (limited to 'training')
-rw-r--r-- | training/dtrain/dtrain.cc | 2 | ||||
-rw-r--r-- | training/dtrain/score.cc | 31 | ||||
-rw-r--r-- | training/dtrain/score.h | 5 |
3 files changed, 37 insertions, 1 deletions
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc index b317c365..53487d34 100644 --- a/training/dtrain/dtrain.cc +++ b/training/dtrain/dtrain.cc @@ -163,6 +163,8 @@ main(int argc, char** argv) scorer = dynamic_cast<BleuScorer*>(new BleuScorer); } else if (scorer_str == "stupid_bleu") { scorer = dynamic_cast<StupidBleuScorer*>(new StupidBleuScorer); + } else if (scorer_str == "fixed_stupid_bleu") { + scorer = dynamic_cast<FixedStupidBleuScorer*>(new FixedStupidBleuScorer); } else if (scorer_str == "smooth_bleu") { scorer = dynamic_cast<SmoothBleuScorer*>(new SmoothBleuScorer); } else if (scorer_str == "sum_bleu") { diff --git a/training/dtrain/score.cc b/training/dtrain/score.cc index 34fc86a9..96d6e10a 100644 --- a/training/dtrain/score.cc +++ b/training/dtrain/score.cc @@ -49,7 +49,7 @@ BleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, * for Machine Translation" * (Lin & Och '04) * - * NOTE: 0 iff no 1gram match + * NOTE: 0 iff no 1gram match ('grounded') */ score_t StupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, @@ -74,6 +74,35 @@ StupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, } /* + * fixed 'stupid' bleu + * + * as in "Optimizing for Sentence-Level BLEU+1 + * Yields Short Translations" + * (Nakov et al. '12) + */ +score_t +FixedStupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, + const unsigned /*rank*/, const unsigned /*src_len*/) +{ + unsigned hyp_len = hyp.size(), ref_len = ref.size(); + if (hyp_len == 0 || ref_len == 0) return 0.; + NgramCounts counts = make_ngram_counts(hyp, ref, N_); + unsigned M = N_; + vector<score_t> v = w_; + if (ref_len < N_) { + M = ref_len; + for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); + } + score_t sum = 0, add = 0; + for (unsigned i = 0; i < M; i++) { + if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.; + if (i == 1) add = 1; + sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add))); + } + return brevity_penalty(hyp_len, ref_len+1) * exp(sum); // <- fix +} + +/* * smooth bleu * * as in "An End-to-End Discriminative Approach diff --git a/training/dtrain/score.h b/training/dtrain/score.h index f317c903..bddaa071 100644 --- a/training/dtrain/score.h +++ b/training/dtrain/score.h @@ -148,6 +148,11 @@ struct StupidBleuScorer : public LocalScorer score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/); }; +struct FixedStupidBleuScorer : public LocalScorer +{ + score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/); +}; + struct SmoothBleuScorer : public LocalScorer { score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/); |