summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2015-01-25 13:28:05 +0100
committerPatrick Simianer <p@simianer.de>2015-01-25 13:28:05 +0100
commit4c08c31d6759bfc1cd9d31a051fe182827345e17 (patch)
tree6b11555daf845b2fdcf6e252e96be99bee2af229
parent51e19d21496ec977843f1c10c229c5370cd381b4 (diff)
BLEU: support for multiple references, unit test
-rw-r--r--lib/zipf/bleu.rb75
-rw-r--r--test/bleu/h2
-rw-r--r--test/bleu/r2
-rwxr-xr-xtest/test_bleu.rb30
4 files changed, 89 insertions, 20 deletions
diff --git a/lib/zipf/bleu.rb b/lib/zipf/bleu.rb
index c07315e..3997c67 100644
--- a/lib/zipf/bleu.rb
+++ b/lib/zipf/bleu.rb
@@ -62,19 +62,34 @@ class BLEU::Ngrams
end
end
-def BLEU::get_counts hypothesis, reference, n, times=1
+def BLEU::best_match_length hypothesis, references
+ hyp_len = hypothesis.strip.split.size
+ ref_lens = references.map { |r| r.strip.split.size }
+ min = Integer::MAX
+ min_idx = -1
+ ref_lens.each_with_index { |l,i|
+ min_idx = i if (hyp_len-l).abs < min
+ }
+ return hyp_len, ref_lens[min_idx]
+end
+
+def BLEU::get_counts hypothesis, references, n, times=1
p = NgramCounts.new n
- r = Ngrams.new
- ngrams(reference, n) { |ng| r.add ng }
+ r = []
+ references.each { |reference|
+ r << Ngrams.new
+ ngrams(reference, n) { |ng| r.last.add ng }
+ }
h = Ngrams.new
ngrams(hypothesis, n) { |ng| h.add ng }
h.each { |ng,count|
sz = ng.size-1
p.sum[sz] += count * times
- p.clipped[sz] += [r.get_count(ng), count].min * times
+ p.clipped[sz] += [r.map { |i| i.get_count(ng)}.max, count].min * times
}
- p.ref_len = tokenize(reference.strip).size * times
- p.hyp_len = tokenize(hypothesis.strip).size * times
+ p.hyp_len, p.ref_len = best_match_length hypothesis, references
+ p.hyp_len *= times
+ p.ref_len *= times
return p
end
@@ -82,45 +97,65 @@ def BLEU::brevity_penalty c, r, smooth=0.0
return [0.0, 1.0-((r+smooth)/c)].min
end
-def BLEU::bleu counts, n, debug=false
+def BLEU::bleu_ counts, n, debug=false
corpus_stats = NgramCounts.new n
counts.each { |i| corpus_stats.plus_eq i }
logbleu = 0.0
0.upto(n-1) { |m|
- STDERR.write "#{m+1} #{corpus_stats.clipped[m]} / #{corpus_stats.sum[m]}\n" if debug
+ STDERR.write "#{m+1} #{corpus_stats.clipped[m]} / #{corpus_stats.sum[m]} = #{(corpus_stats.clipped[m]/corpus_stats.sum[m]).round 2}\n" if debug
return 0.0 if corpus_stats.clipped[m] == 0 or corpus_stats.sum == 0
logbleu += Math.log(corpus_stats.clipped[m]) - Math.log(corpus_stats.sum[m])
}
logbleu /= n
- if debug
- STDERR.write "BP #{brevity_penalty(corpus_stats.hyp_len, corpus_stats.ref_len)}\n"
- STDERR.write "sum #{Math.exp(sum)}\n"
- end
+ STDERR.write "BP #{brevity_penalty(corpus_stats.hyp_len, corpus_stats.ref_len).round 2}\n" if debug
logbleu += brevity_penalty corpus_stats.hyp_len, corpus_stats.ref_len
return Math.exp logbleu
end
-def BLEU::hbleu counts, n, debug=false
+def BLEU::bleu hyp_file, ref_file, n, debug=false
+ hypotheses = ReadFile.readlines_strip(hyp_file)
+ references = ReadFile.readlines_strip(ref_file).map { |l|
+ splitpipe(l,3)
+ }
+ counts = []
+ hypotheses.each_with_index { |h,i|
+ counts << BLEU::get_counts(h, references[i], 4)
+ }
+ bleu_ counts, n, debug
+end
+
+def BLEU::hbleu_ counts, n, debug=false
(100*bleu(counts, n, debug)).round(3)
end
-def BLEU::per_sentence_bleu hypothesis, reference, n=4, smooth=0.0
- h_ng = {}; r_ng = {}
- (1).upto(n) { |i| h_ng[i] = []; r_ng[i] = [] }
+def BLEU::hbleu hypotheses, references, n, debug=false
+end
+
+def BLEU::per_sentence_bleu hypothesis, references, n=4, smooth=0.0
+ h_ng = {}; r_ng = []
+ num_ref = references.size
+ num_ref.times { r_ng << {} }
+ (1).upto(n) { |i| h_ng[i] = []; num_ref.times { |j| r_ng[j][i] = [] } }
ngrams(hypothesis, n) { |i| h_ng[i.size] << i }
- ngrams(reference, n) { |i| r_ng[i.size] << i }
- m = [n, reference.split.size].min
+ references.each_with_index { |reference,j|
+ ngrams(reference, n) { |i| r_ng[j][i.size] << i }
+ }
+ m = [n, references.map { |i| i.split.size }.max].min
add = 0.0
logbleu = 0.0
(1).upto(m) { |i|
counts_clipped = 0
counts_sum = h_ng[i].size
- h_ng[i].uniq.each { |j| counts_clipped += r_ng[i].count(j) }
+ h_ng[i].uniq.each { |j|
+ max_count = [h_ng[i].count(j), r_ng.map { |r| r[i].count(j) }.max].min
+ counts_clipped += max_count
+ }
add = 1.0 if i >= 2
logbleu += Math.log(counts_clipped+add) - Math.log(counts_sum+add);
}
logbleu /= m
- logbleu += brevity_penalty hypothesis.strip.split.size, reference.strip.split.size, smooth
+ hyp_len, best_ref_len = BLEU::best_match_length hypothesis, references
+ logbleu += brevity_penalty hyp_len, best_ref_len, smooth
return Math.exp logbleu
end
diff --git a/test/bleu/h b/test/bleu/h
new file mode 100644
index 0000000..e9901ad
--- /dev/null
+++ b/test/bleu/h
@@ -0,0 +1,2 @@
+a s d f x
+a s f d
diff --git a/test/bleu/r b/test/bleu/r
new file mode 100644
index 0000000..6b5f216
--- /dev/null
+++ b/test/bleu/r
@@ -0,0 +1,2 @@
+a s d f ||| a s d f a ||| a s d f x
+a s d f ||| a s d f a ||| a s d f x
diff --git a/test/test_bleu.rb b/test/test_bleu.rb
new file mode 100755
index 0000000..4b9addc
--- /dev/null
+++ b/test/test_bleu.rb
@@ -0,0 +1,30 @@
+#!/usr/bin/env ruby
+
+require_relative '../lib/zipf/bleu'
+require_relative '../lib/zipf/stringutil'
+require_relative '../lib/zipf/fileutil'
+require_relative '../lib/zipf/misc'
+require 'test/unit'
+
+class TestBLEU < Test::Unit::TestCase
+
+ def test_raw
+ h = ["a s d f x", "a s f d"]
+ r = [["a s d f", "a s d f a", "a s d f x"], ["a s d f", "a s d f a", "a s d f x"]]
+ counts = []
+ h.each_with_index { |h,i|
+ counts << BLEU::get_counts(h, r[i], 4)
+ }
+ BLEU::bleu_ counts, 4, true
+ end
+
+ def test
+ BLEU::bleu 'test/bleu/h', 'test/bleu/r', 4, true
+ end
+end
+
+
+
+
+
+