diff options
Diffstat (limited to 'per_sentence_bleu')
-rwxr-xr-x | per_sentence_bleu | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/per_sentence_bleu b/per_sentence_bleu new file mode 100755 index 0000000..f024e74 --- /dev/null +++ b/per_sentence_bleu @@ -0,0 +1,67 @@ +#!/usr/bin/env ruby + +require 'trollop' + + +def ngrams_it(s, n, fix=false) + a = s.strip.split + a.each_with_index { |tok, i| + tok.strip! + 0.upto([n-1, a.size-i-1].min) { |m| + yield a[i..i+m] if !(fix||(a[i..i+m].size>n)) + } + } +end + +def brevity_penalty hypothesis, reference + a = hypothesis.split; b = reference.split + return 1.0 if a.size>b.size + return Math.exp(1.0 - b.size.to_f/a.size); +end + +def per_sentence_bleu hypothesis, reference, n=4 + h_ng = {}; r_ng = {} + (1).upto(n) {|i| h_ng[i] = []; r_ng[i] = []} + ngrams_it(hypothesis, n) {|i| h_ng[i.size] << i} + ngrams_it(reference, n) {|i| r_ng[i.size] << i} + m = [n, reference.split.size].min + weight = 1.0/m + add = 0.0 + sum = 0 + (1).upto(m) { |i| + counts_clipped = 0 + counts_sum = h_ng[i].size + h_ng[i].uniq.each {|j| counts_clipped += r_ng[i].count(j)} + add = 1.0 if i >= 2 + sum += weight * Math.log((counts_clipped + add)/(counts_sum + add)); + } + return brevity_penalty(hypothesis, reference) * Math.exp(sum) +end + +def main + opts = Trollop::options do + opt :input, "input", :type => :string, :default => '-' + opt :references, "references", :type => :string, :required => true + end + + refs = File.new(opts[:references], 'r').readlines.map{|i|i.strip} + i = -1 + if opts[:input] == '-' + input = STDIN + else + input = File.new opts[:input], 'r' + end + while line = input.gets + i += 1 + if line.strip == '' + puts 0.0 + next + end + puts per_sentence_bleu line.strip, refs[i] + end + input.close if opts[:input]!='-' +end + + +main + |