diff options
-rw-r--r-- | dtrain/README.md | 2 | ||||
-rw-r--r-- | dtrain/dtrain.cc | 8 | ||||
-rw-r--r-- | dtrain/dtrain.h | 2 | ||||
-rw-r--r-- | dtrain/score.cc | 62 | ||||
-rw-r--r-- | dtrain/score.h | 12 |
5 files changed, 78 insertions, 8 deletions
diff --git a/dtrain/README.md b/dtrain/README.md index 7aefcc55..843874fa 100644 --- a/dtrain/README.md +++ b/dtrain/README.md @@ -58,6 +58,8 @@ Next + bigger LM, feats (target side Ng., word alignments etc.) + merge kbest lists + proper eval, pairwise ranking, forced transl ++ smmoth bleu variants X ++ MMERT exp Legal ----- diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index 7f44d4cf..eea58393 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -165,8 +165,12 @@ main(int argc, char** argv) scorer = dynamic_cast<StupidBleuScorer*>(new StupidBleuScorer); } else if (scorer_str == "smooth_bleu") { scorer = dynamic_cast<SmoothBleuScorer*>(new SmoothBleuScorer); - } else if (scorer_str == "smooth_single_bleu") { - scorer = dynamic_cast<SmoothSingleBleuScorer*>(new SmoothSingleBleuScorer); + } else if (scorer_str == "sum_bleu") { + scorer = dynamic_cast<SumBleuScorer*>(new SumBleuScorer); + } else if (scorer_str == "sumexp_bleu") { + scorer = dynamic_cast<SumExpBleuScorer*>(new SumExpBleuScorer); + } else if (scorer_str == "sumwhatever_bleu") { + scorer = dynamic_cast<SumWhateverBleuScorer*>(new SumWhateverBleuScorer); } else if (scorer_str == "approx_bleu") { scorer = dynamic_cast<ApproxBleuScorer*>(new ApproxBleuScorer(N, approx_bleu_d)); } else if (scorer_str == "lc_bleu") { diff --git a/dtrain/dtrain.h b/dtrain/dtrain.h index d8dc14b6..7e084a79 100644 --- a/dtrain/dtrain.h +++ b/dtrain/dtrain.h @@ -3,7 +3,7 @@ #undef DTRAIN_FASTER_PERCEPTRON // only look at misranked pairs // DO NOT USE WITH SVM! -#undef DTRAIN_LOCAL +#define DTRAIN_LOCAL #define DTRAIN_DOTS 10 // after how many inputs to display a '.' #define DTRAIN_GRAMMAR_DELIM "########EOS########" #define DTRAIN_SCALE 100000 diff --git a/dtrain/score.cc b/dtrain/score.cc index 5bb0bcaa..4a7cac6e 100644 --- a/dtrain/score.cc +++ b/dtrain/score.cc @@ -80,7 +80,7 @@ StupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, * to Machine Translation" * (Liang et al. '06) * - * NOTE: max is 0.9375 + * NOTE: max is 0.9375 (with N=4) */ score_t SmoothBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, @@ -108,9 +108,13 @@ SmoothBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, return brevity_penalty(hyp_len, ref_len) * sum; } -// variant of smooth_bleu; i-Bleu scores only single 'i' +/* + * 'sum' bleu + * + * sum up Ngram precisions + */ score_t -SmoothSingleBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, +SumBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/) { unsigned hyp_len = hyp.size(), ref_len = ref.size(); @@ -128,7 +132,57 @@ SmoothSingleBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, return brevity_penalty(hyp_len, ref_len) * sum; } -// TODO single variants! +/* + * 'sum' (exp) bleu + * + * sum up exp(Ngram precisions) + */ +score_t +SumExpBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, + const unsigned /*rank*/, const unsigned /*src_len*/) +{ + unsigned hyp_len = hyp.size(), ref_len = ref.size(); + if (hyp_len == 0 || ref_len == 0) return 0.; + NgramCounts counts = make_ngram_counts(hyp, ref, N_); + unsigned M = N_; + if (ref_len < N_) M = ref_len; + score_t sum = 0.; + unsigned j = 1; + for (unsigned i = 0; i < M; i++) { + if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break; + sum += exp(((score_t)counts.clipped_[i]/counts.sum_[i]))/pow(2., N_-j+1); + j++; + } + return brevity_penalty(hyp_len, ref_len) * sum; +} + +/* + * 'sum' (whatever) bleu + * + * sum up exp(weight * log(Ngram precisions)) + */ +score_t +SumWhateverBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref, + const unsigned /*rank*/, const unsigned /*src_len*/) +{ + unsigned hyp_len = hyp.size(), ref_len = ref.size(); + if (hyp_len == 0 || ref_len == 0) return 0.; + NgramCounts counts = make_ngram_counts(hyp, ref, N_); + unsigned M = N_; + vector<score_t> v = w_; + if (ref_len < N_) { + M = ref_len; + for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); + } + score_t sum = 0.; + unsigned j = 1; + for (unsigned i = 0; i < M; i++) { + if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break; + sum += exp(v[i] * log(((score_t)counts.clipped_[i]/counts.sum_[i])))/pow(2., N_-j+1); + j++; + } + return brevity_penalty(hyp_len, ref_len) * sum; +} /* * approx. bleu diff --git a/dtrain/score.h b/dtrain/score.h index c5be2829..f317c903 100644 --- a/dtrain/score.h +++ b/dtrain/score.h @@ -153,7 +153,17 @@ struct SmoothBleuScorer : public LocalScorer score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/); }; -struct SmoothSingleBleuScorer : public LocalScorer +struct SumBleuScorer : public LocalScorer +{ + score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/); +}; + +struct SumExpBleuScorer : public LocalScorer +{ + score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/); +}; + +struct SumWhateverBleuScorer : public LocalScorer { score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/); }; |