diff options
Diffstat (limited to 'dtrain/score.cc')
-rw-r--r-- | dtrain/score.cc | 55 |
1 files changed, 54 insertions, 1 deletions
diff --git a/dtrain/score.cc b/dtrain/score.cc index 7b1f6be4..5c356c0f 100644 --- a/dtrain/score.cc +++ b/dtrain/score.cc @@ -103,11 +103,32 @@ SmoothBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, i_bleu[j] += (1/((score_t)j+1)) * i_ng; } } - sum += exp(i_bleu[i])/(pow(2.0, static_cast<double>(N_-i))); + sum += exp(i_bleu[i])/(pow(2.0, N_-i)); } return brevity_penalty(hyp_len, ref_len) * sum; } +// variant of smooth_bleu; i-Bleu scores only single 'i' +score_t +SmoothSingleBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, + const unsigned /*rank*/, const unsigned /*src_len*/) +{ + unsigned hyp_len = hyp.size(), ref_len = ref.size(); + if (hyp_len == 0 || ref_len == 0) return 0.; + NgramCounts counts = make_ngram_counts(hyp, ref, N_); + unsigned M = N_; + if (ref_len < N_) M = ref_len; + score_t sum = 0.; + unsigned j = 1; + for (unsigned i = 0; i < M; i++) { + if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break; + sum += ((score_t)counts.clipped_[i]/counts.sum_[i])/pow(2., N_-j+1); + j++; + } + return brevity_penalty(hyp_len, ref_len) * sum; +} + + /* * approx. bleu * @@ -140,6 +161,38 @@ ApproxBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, return (score_t)glob_src_len_ * score; } +/* + * Linear (Corpus) Bleu + * + * as in "Lattice Minimum Bayes-Risk Decoding + * for Statistical Machine Translation" + * (Tromble et al. '08) + * + */ +score_t +LinearBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, + const unsigned rank, const unsigned /*src_len*/) +{ + unsigned hyp_len = hyp.size(), ref_len = ref.size(); + if (ref_len == 0) return 0.; + unsigned M = N_; + if (ref_len < N_) M = ref_len; + NgramCounts counts(M); + if (hyp_len > 0) + counts = make_ngram_counts(hyp, ref, M); + score_t ret = 0.; + for (unsigned i = 0; i < M; i++) { + if (counts.sum_[i] == 0 || onebest_counts_.sum_[i] == 0) break; + ret += counts.sum_[i]/onebest_counts_.sum_[i]; + } + ret = -(hyp_len/(score_t)onebest_len_) + (1./M) * ret; + if (rank == 0) { + onebest_len_ += hyp_len; + onebest_counts_ += counts; + } + return ret; +} + } // namespace |