summaryrefslogtreecommitdiff
path: root/lib/nlp_ruby/bleu.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/nlp_ruby/bleu.rb')
-rw-r--r--lib/nlp_ruby/bleu.rb25
1 files changed, 22 insertions, 3 deletions
diff --git a/lib/nlp_ruby/bleu.rb b/lib/nlp_ruby/bleu.rb
index 42be45e..ee91985 100644
--- a/lib/nlp_ruby/bleu.rb
+++ b/lib/nlp_ruby/bleu.rb
@@ -79,12 +79,12 @@ def BLEU::get_counts hypothesis, reference, n, times=1
return p
end
-def BLEU::brevity_penalty(c, r)
- if c > r then return 1.0 end
+def BLEU::brevity_penalty c, r
+ return 1.0 if c>r
return Math.exp(1-r/c)
end
-def BLEU::bleu(counts, n, debug=false)
+def BLEU::bleu counts, n, debug=false
corpus_stats = NgramCounts.new n
counts.each { |i| corpus_stats.plus_eq i }
sum = 0.0
@@ -105,6 +105,25 @@ def BLEU::hbleu counts, n, debug=false
(100*bleu(counts, n, debug)).round(3)
end
+def BLEU::per_sentence_bleu hypothesis, reference, n=4
+ h_ng = {}; r_ng = {}
+ (1).upto(n) {|i| h_ng[i] = []; r_ng[i] = []}
+ ngrams(hypothesis, n) {|i| h_ng[i.size] << i}
+ ngrams(reference, n) {|i| r_ng[i.size] << i}
+ m = [n, reference.split.size].min
+ weight = 1.0/m
+ add = 0.0
+ sum = 0
+ (1).upto(m) { |i|
+ counts_clipped = 0
+ counts_sum = h_ng[i].size
+ h_ng[i].uniq.each {|j| counts_clipped += r_ng[i].count(j)}
+ add = 1.0 if i >= 2
+ sum += weight * Math.log((counts_clipped + add)/(counts_sum + add));
+ }
+ return brevity_penalty(hypothesis.strip.split.size, reference.strip.split.size) * Math.exp(sum)
+end
+
end