BLEU: support for multiple references, unit test

author: Patrick Simianer <p@simianer.de> 2015-01-25 13:28:05 +0100
committer: Patrick Simianer <p@simianer.de> 2015-01-25 13:28:05 +0100
commit: 4c08c31d6759bfc1cd9d31a051fe182827345e17 (patch)
tree: 6b11555daf845b2fdcf6e252e96be99bee2af229
parent: 51e19d21496ec977843f1c10c229c5370cd381b4 (diff)
4 files changed, 89 insertions, 20 deletions
diff --git a/lib/zipf/bleu.rb b/lib/zipf/bleu.rb
index c07315e..3997c67 100644
--- a/lib/zipf/bleu.rb
+++ b/lib/zipf/bleu.rb
@@ -62,19 +62,34 @@ class BLEU::Ngrams
   end
 end
 
-def BLEU::get_counts hypothesis, reference, n, times=1
+def BLEU::best_match_length hypothesis, references
+  hyp_len = hypothesis.strip.split.size 
+  ref_lens = references.map { |r| r.strip.split.size }
+  min = Integer::MAX
+  min_idx = -1
+  ref_lens.each_with_index { |l,i|
+    min_idx = i if (hyp_len-l).abs < min
+  }
+  return hyp_len, ref_lens[min_idx]
+end
+
+def BLEU::get_counts hypothesis, references, n, times=1
   p = NgramCounts.new n
-  r = Ngrams.new
-  ngrams(reference, n) { |ng| r.add ng }
+  r = []
+  references.each { |reference|
+    r << Ngrams.new
+    ngrams(reference, n) { |ng| r.last.add ng }
+  }
   h = Ngrams.new
   ngrams(hypothesis, n) { |ng| h.add ng }
   h.each { |ng,count|
     sz = ng.size-1
     p.sum[sz] += count * times
-    p.clipped[sz] += [r.get_count(ng), count].min * times
+    p.clipped[sz] += [r.map { |i| i.get_count(ng)}.max, count].min * times
   }
-  p.ref_len = tokenize(reference.strip).size * times
-  p.hyp_len = tokenize(hypothesis.strip).size * times
+  p.hyp_len, p.ref_len = best_match_length hypothesis, references
+  p.hyp_len *= times
+  p.ref_len *= times
   return p
 end
 
@@ -82,45 +97,65 @@ def BLEU::brevity_penalty c, r, smooth=0.0
   return [0.0, 1.0-((r+smooth)/c)].min
 end
 
-def BLEU::bleu counts, n, debug=false
+def BLEU::bleu_ counts, n, debug=false
   corpus_stats = NgramCounts.new n
   counts.each { |i| corpus_stats.plus_eq i }
   logbleu = 0.0
   0.upto(n-1) { |m|
-    STDERR.write "#{m+1} #{corpus_stats.clipped[m]} / #{corpus_stats.sum[m]}\n" if debug
+    STDERR.write "#{m+1} #{corpus_stats.clipped[m]} / #{corpus_stats.sum[m]} = #{(corpus_stats.clipped[m]/corpus_stats.sum[m]).round 2}\n" if debug
     return 0.0 if corpus_stats.clipped[m] == 0 or corpus_stats.sum == 0
     logbleu += Math.log(corpus_stats.clipped[m]) - Math.log(corpus_stats.sum[m])
   }
   logbleu /= n
-  if debug
-    STDERR.write "BP #{brevity_penalty(corpus_stats.hyp_len, corpus_stats.ref_len)}\n"
-    STDERR.write "sum #{Math.exp(sum)}\n"
-  end
+  STDERR.write "BP #{brevity_penalty(corpus_stats.hyp_len, corpus_stats.ref_len).round 2}\n" if debug
   logbleu += brevity_penalty corpus_stats.hyp_len, corpus_stats.ref_len
   return Math.exp logbleu
 end
 
-def BLEU::hbleu counts, n, debug=false
+def BLEU::bleu hyp_file, ref_file, n, debug=false
+  hypotheses = ReadFile.readlines_strip(hyp_file)
+  references = ReadFile.readlines_strip(ref_file).map { |l|
+    splitpipe(l,3)
+  }
+  counts = []
+  hypotheses.each_with_index { |h,i|
+    counts << BLEU::get_counts(h, references[i], 4)
+  }
+  bleu_ counts, n, debug
+end
+
+def BLEU::hbleu_ counts, n, debug=false
   (100*bleu(counts, n, debug)).round(3)
 end
 
-def BLEU::per_sentence_bleu hypothesis, reference, n=4, smooth=0.0
-  h_ng = {}; r_ng = {}
-  (1).upto(n) { |i| h_ng[i] = []; r_ng[i] = [] }
+def BLEU::hbleu hypotheses, references, n, debug=false
+end
+
+def BLEU::per_sentence_bleu hypothesis, references, n=4, smooth=0.0
+  h_ng = {}; r_ng = []
+  num_ref = references.size
+  num_ref.times { r_ng << {} }
+  (1).upto(n) { |i| h_ng[i] = []; num_ref.times { |j| r_ng[j][i] = [] } }
   ngrams(hypothesis, n) { |i| h_ng[i.size] << i }
-  ngrams(reference, n) { |i| r_ng[i.size] << i }
-  m = [n, reference.split.size].min
+  references.each_with_index { |reference,j|
+    ngrams(reference, n) { |i| r_ng[j][i.size] << i }
+  }
+  m = [n, references.map { |i| i.split.size }.max].min
   add = 0.0
   logbleu = 0.0
   (1).upto(m) { |i|
     counts_clipped = 0
     counts_sum = h_ng[i].size
-    h_ng[i].uniq.each { |j| counts_clipped += r_ng[i].count(j) }
+    h_ng[i].uniq.each { |j|
+      max_count = [h_ng[i].count(j), r_ng.map { |r| r[i].count(j) }.max].min
+      counts_clipped += max_count 
+    }
     add = 1.0 if i >= 2
     logbleu += Math.log(counts_clipped+add) - Math.log(counts_sum+add);
   }
   logbleu /= m
-  logbleu += brevity_penalty hypothesis.strip.split.size, reference.strip.split.size, smooth
+  hyp_len, best_ref_len = BLEU::best_match_length hypothesis, references
+  logbleu += brevity_penalty hyp_len, best_ref_len, smooth
   return Math.exp logbleu
 end
 
diff --git a/test/bleu/h b/test/bleu/h
new file mode 100644
index 0000000..e9901ad
--- /dev/null
+++ b/test/bleu/h
@@ -0,0 +1,2 @@
+a s d f x
+a s f d
diff --git a/test/bleu/r b/test/bleu/r
new file mode 100644
index 0000000..6b5f216
--- /dev/null
+++ b/test/bleu/r
@@ -0,0 +1,2 @@
+a s d f ||| a s d f a ||| a s d f x
+a s d f ||| a s d f a ||| a s d f x
diff --git a/test/test_bleu.rb b/test/test_bleu.rb
new file mode 100755
index 0000000..4b9addc
--- /dev/null
+++ b/test/test_bleu.rb
@@ -0,0 +1,30 @@
+#!/usr/bin/env ruby
+
+require_relative '../lib/zipf/bleu'
+require_relative '../lib/zipf/stringutil'
+require_relative '../lib/zipf/fileutil'
+require_relative '../lib/zipf/misc'
+require 'test/unit'
+
+class TestBLEU <  Test::Unit::TestCase
+
+  def test_raw
+    h = ["a s d f x", "a s f d"]
+    r = [["a s d f", "a s d f a", "a s d f x"], ["a s d f", "a s d f a", "a s d f x"]]
+    counts = []
+    h.each_with_index { |h,i|
+      counts << BLEU::get_counts(h, r[i], 4)
+    }
+    BLEU::bleu_ counts, 4, true
+  end
+
+  def test
+    BLEU::bleu 'test/bleu/h', 'test/bleu/r', 4, true
+  end
+end
+
+
+
+
+
+
author	Patrick Simianer <p@simianer.de>	2015-01-25 13:28:05 +0100
committer	Patrick Simianer <p@simianer.de>	2015-01-25 13:28:05 +0100
commit	4c08c31d6759bfc1cd9d31a051fe182827345e17 (patch)
tree	6b11555daf845b2fdcf6e252e96be99bee2af229
parent	51e19d21496ec977843f1c10c229c5370cd381b4 (diff)