From 4c08c31d6759bfc1cd9d31a051fe182827345e17 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 25 Jan 2015 13:28:05 +0100 Subject: BLEU: support for multiple references, unit test --- lib/zipf/bleu.rb | 75 ++++++++++++++++++++++++++++++++++++++++--------------- test/bleu/h | 2 ++ test/bleu/r | 2 ++ test/test_bleu.rb | 30 ++++++++++++++++++++++ 4 files changed, 89 insertions(+), 20 deletions(-) create mode 100644 test/bleu/h create mode 100644 test/bleu/r create mode 100755 test/test_bleu.rb diff --git a/lib/zipf/bleu.rb b/lib/zipf/bleu.rb index c07315e..3997c67 100644 --- a/lib/zipf/bleu.rb +++ b/lib/zipf/bleu.rb @@ -62,19 +62,34 @@ class BLEU::Ngrams end end -def BLEU::get_counts hypothesis, reference, n, times=1 +def BLEU::best_match_length hypothesis, references + hyp_len = hypothesis.strip.split.size + ref_lens = references.map { |r| r.strip.split.size } + min = Integer::MAX + min_idx = -1 + ref_lens.each_with_index { |l,i| + min_idx = i if (hyp_len-l).abs < min + } + return hyp_len, ref_lens[min_idx] +end + +def BLEU::get_counts hypothesis, references, n, times=1 p = NgramCounts.new n - r = Ngrams.new - ngrams(reference, n) { |ng| r.add ng } + r = [] + references.each { |reference| + r << Ngrams.new + ngrams(reference, n) { |ng| r.last.add ng } + } h = Ngrams.new ngrams(hypothesis, n) { |ng| h.add ng } h.each { |ng,count| sz = ng.size-1 p.sum[sz] += count * times - p.clipped[sz] += [r.get_count(ng), count].min * times + p.clipped[sz] += [r.map { |i| i.get_count(ng)}.max, count].min * times } - p.ref_len = tokenize(reference.strip).size * times - p.hyp_len = tokenize(hypothesis.strip).size * times + p.hyp_len, p.ref_len = best_match_length hypothesis, references + p.hyp_len *= times + p.ref_len *= times return p end @@ -82,45 +97,65 @@ def BLEU::brevity_penalty c, r, smooth=0.0 return [0.0, 1.0-((r+smooth)/c)].min end -def BLEU::bleu counts, n, debug=false +def BLEU::bleu_ counts, n, debug=false corpus_stats = NgramCounts.new n counts.each { |i| corpus_stats.plus_eq i } logbleu = 0.0 0.upto(n-1) { |m| - STDERR.write "#{m+1} #{corpus_stats.clipped[m]} / #{corpus_stats.sum[m]}\n" if debug + STDERR.write "#{m+1} #{corpus_stats.clipped[m]} / #{corpus_stats.sum[m]} = #{(corpus_stats.clipped[m]/corpus_stats.sum[m]).round 2}\n" if debug return 0.0 if corpus_stats.clipped[m] == 0 or corpus_stats.sum == 0 logbleu += Math.log(corpus_stats.clipped[m]) - Math.log(corpus_stats.sum[m]) } logbleu /= n - if debug - STDERR.write "BP #{brevity_penalty(corpus_stats.hyp_len, corpus_stats.ref_len)}\n" - STDERR.write "sum #{Math.exp(sum)}\n" - end + STDERR.write "BP #{brevity_penalty(corpus_stats.hyp_len, corpus_stats.ref_len).round 2}\n" if debug logbleu += brevity_penalty corpus_stats.hyp_len, corpus_stats.ref_len return Math.exp logbleu end -def BLEU::hbleu counts, n, debug=false +def BLEU::bleu hyp_file, ref_file, n, debug=false + hypotheses = ReadFile.readlines_strip(hyp_file) + references = ReadFile.readlines_strip(ref_file).map { |l| + splitpipe(l,3) + } + counts = [] + hypotheses.each_with_index { |h,i| + counts << BLEU::get_counts(h, references[i], 4) + } + bleu_ counts, n, debug +end + +def BLEU::hbleu_ counts, n, debug=false (100*bleu(counts, n, debug)).round(3) end -def BLEU::per_sentence_bleu hypothesis, reference, n=4, smooth=0.0 - h_ng = {}; r_ng = {} - (1).upto(n) { |i| h_ng[i] = []; r_ng[i] = [] } +def BLEU::hbleu hypotheses, references, n, debug=false +end + +def BLEU::per_sentence_bleu hypothesis, references, n=4, smooth=0.0 + h_ng = {}; r_ng = [] + num_ref = references.size + num_ref.times { r_ng << {} } + (1).upto(n) { |i| h_ng[i] = []; num_ref.times { |j| r_ng[j][i] = [] } } ngrams(hypothesis, n) { |i| h_ng[i.size] << i } - ngrams(reference, n) { |i| r_ng[i.size] << i } - m = [n, reference.split.size].min + references.each_with_index { |reference,j| + ngrams(reference, n) { |i| r_ng[j][i.size] << i } + } + m = [n, references.map { |i| i.split.size }.max].min add = 0.0 logbleu = 0.0 (1).upto(m) { |i| counts_clipped = 0 counts_sum = h_ng[i].size - h_ng[i].uniq.each { |j| counts_clipped += r_ng[i].count(j) } + h_ng[i].uniq.each { |j| + max_count = [h_ng[i].count(j), r_ng.map { |r| r[i].count(j) }.max].min + counts_clipped += max_count + } add = 1.0 if i >= 2 logbleu += Math.log(counts_clipped+add) - Math.log(counts_sum+add); } logbleu /= m - logbleu += brevity_penalty hypothesis.strip.split.size, reference.strip.split.size, smooth + hyp_len, best_ref_len = BLEU::best_match_length hypothesis, references + logbleu += brevity_penalty hyp_len, best_ref_len, smooth return Math.exp logbleu end diff --git a/test/bleu/h b/test/bleu/h new file mode 100644 index 0000000..e9901ad --- /dev/null +++ b/test/bleu/h @@ -0,0 +1,2 @@ +a s d f x +a s f d diff --git a/test/bleu/r b/test/bleu/r new file mode 100644 index 0000000..6b5f216 --- /dev/null +++ b/test/bleu/r @@ -0,0 +1,2 @@ +a s d f ||| a s d f a ||| a s d f x +a s d f ||| a s d f a ||| a s d f x diff --git a/test/test_bleu.rb b/test/test_bleu.rb new file mode 100755 index 0000000..4b9addc --- /dev/null +++ b/test/test_bleu.rb @@ -0,0 +1,30 @@ +#!/usr/bin/env ruby + +require_relative '../lib/zipf/bleu' +require_relative '../lib/zipf/stringutil' +require_relative '../lib/zipf/fileutil' +require_relative '../lib/zipf/misc' +require 'test/unit' + +class TestBLEU < Test::Unit::TestCase + + def test_raw + h = ["a s d f x", "a s f d"] + r = [["a s d f", "a s d f a", "a s d f x"], ["a s d f", "a s d f a", "a s d f x"]] + counts = [] + h.each_with_index { |h,i| + counts << BLEU::get_counts(h, r[i], 4) + } + BLEU::bleu_ counts, 4, true + end + + def test + BLEU::bleu 'test/bleu/h', 'test/bleu/r', 4, true + end +end + + + + + + -- cgit v1.2.3