summaryrefslogtreecommitdiff
path: root/per_sentence_bleu
diff options
context:
space:
mode:
Diffstat (limited to 'per_sentence_bleu')
-rwxr-xr-xper_sentence_bleu67
1 files changed, 67 insertions, 0 deletions
diff --git a/per_sentence_bleu b/per_sentence_bleu
new file mode 100755
index 0000000..f024e74
--- /dev/null
+++ b/per_sentence_bleu
@@ -0,0 +1,67 @@
+#!/usr/bin/env ruby
+
+require 'trollop'
+
+
+def ngrams_it(s, n, fix=false)
+ a = s.strip.split
+ a.each_with_index { |tok, i|
+ tok.strip!
+ 0.upto([n-1, a.size-i-1].min) { |m|
+ yield a[i..i+m] if !(fix||(a[i..i+m].size>n))
+ }
+ }
+end
+
+def brevity_penalty hypothesis, reference
+ a = hypothesis.split; b = reference.split
+ return 1.0 if a.size>b.size
+ return Math.exp(1.0 - b.size.to_f/a.size);
+end
+
+def per_sentence_bleu hypothesis, reference, n=4
+ h_ng = {}; r_ng = {}
+ (1).upto(n) {|i| h_ng[i] = []; r_ng[i] = []}
+ ngrams_it(hypothesis, n) {|i| h_ng[i.size] << i}
+ ngrams_it(reference, n) {|i| r_ng[i.size] << i}
+ m = [n, reference.split.size].min
+ weight = 1.0/m
+ add = 0.0
+ sum = 0
+ (1).upto(m) { |i|
+ counts_clipped = 0
+ counts_sum = h_ng[i].size
+ h_ng[i].uniq.each {|j| counts_clipped += r_ng[i].count(j)}
+ add = 1.0 if i >= 2
+ sum += weight * Math.log((counts_clipped + add)/(counts_sum + add));
+ }
+ return brevity_penalty(hypothesis, reference) * Math.exp(sum)
+end
+
+def main
+ opts = Trollop::options do
+ opt :input, "input", :type => :string, :default => '-'
+ opt :references, "references", :type => :string, :required => true
+ end
+
+ refs = File.new(opts[:references], 'r').readlines.map{|i|i.strip}
+ i = -1
+ if opts[:input] == '-'
+ input = STDIN
+ else
+ input = File.new opts[:input], 'r'
+ end
+ while line = input.gets
+ i += 1
+ if line.strip == ''
+ puts 0.0
+ next
+ end
+ puts per_sentence_bleu line.strip, refs[i]
+ end
+ input.close if opts[:input]!='-'
+end
+
+
+main
+