diff options
Diffstat (limited to 'lib/nlp_ruby/bleu.rb')
-rw-r--r-- | lib/nlp_ruby/bleu.rb | 25 |
1 files changed, 22 insertions, 3 deletions
diff --git a/lib/nlp_ruby/bleu.rb b/lib/nlp_ruby/bleu.rb index 42be45e..ee91985 100644 --- a/lib/nlp_ruby/bleu.rb +++ b/lib/nlp_ruby/bleu.rb @@ -79,12 +79,12 @@ def BLEU::get_counts hypothesis, reference, n, times=1 return p end -def BLEU::brevity_penalty(c, r) - if c > r then return 1.0 end +def BLEU::brevity_penalty c, r + return 1.0 if c>r return Math.exp(1-r/c) end -def BLEU::bleu(counts, n, debug=false) +def BLEU::bleu counts, n, debug=false corpus_stats = NgramCounts.new n counts.each { |i| corpus_stats.plus_eq i } sum = 0.0 @@ -105,6 +105,25 @@ def BLEU::hbleu counts, n, debug=false (100*bleu(counts, n, debug)).round(3) end +def BLEU::per_sentence_bleu hypothesis, reference, n=4 + h_ng = {}; r_ng = {} + (1).upto(n) {|i| h_ng[i] = []; r_ng[i] = []} + ngrams(hypothesis, n) {|i| h_ng[i.size] << i} + ngrams(reference, n) {|i| r_ng[i.size] << i} + m = [n, reference.split.size].min + weight = 1.0/m + add = 0.0 + sum = 0 + (1).upto(m) { |i| + counts_clipped = 0 + counts_sum = h_ng[i].size + h_ng[i].uniq.each {|j| counts_clipped += r_ng[i].count(j)} + add = 1.0 if i >= 2 + sum += weight * Math.log((counts_clipped + add)/(counts_sum + add)); + } + return brevity_penalty(hypothesis.strip.split.size, reference.strip.split.size) * Math.exp(sum) +end + end |