diff options
author | Patrick Simianer <p@simianer.de> | 2014-02-16 00:12:32 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-02-16 00:12:32 +0100 |
commit | 81a637ae52d2a1d0bc751b44c193765cdc1091f1 (patch) | |
tree | 19708fb523ef32cbeccc4d87133f115650e13280 /per_sentence_bleu | |
parent | 99ae15932eae7e727b74f723107cf42aad80ba3f (diff) |
nlp_ruby 0.3
Diffstat (limited to 'per_sentence_bleu')
-rwxr-xr-x | per_sentence_bleu | 30 |
1 files changed, 2 insertions, 28 deletions
diff --git a/per_sentence_bleu b/per_sentence_bleu index 724b1e1..b7243df 100755 --- a/per_sentence_bleu +++ b/per_sentence_bleu @@ -4,32 +4,6 @@ require 'nlp_ruby' require 'trollop' -# reference-length hack as in (Nakov et al., 2012) -def brevity_penalty hypothesis, reference, hack=0 - a = tokenize hypothesis; b = tokenize reference - return 1.0 if a.size>=b.size - return Math.exp(1.0 - ((b.size.to_f+hack)/a.size)); -end - -def per_sentence_bleu hypothesis, reference, n=4, hack=0 - h_ng = {}; r_ng = {} - (1).upto(n) {|i| h_ng[i] = []; r_ng[i] = []} - ngrams(hypothesis, n) {|i| h_ng[i.size] << i} - ngrams(reference, n) {|i| r_ng[i.size] << i} - m = [n, reference.split.size].min - weight = 1.0/m - add = 0.0 - sum = 0 - (1).upto(m) { |i| - counts_clipped = 0 - counts_sum = h_ng[i].size - h_ng[i].uniq.each {|j| counts_clipped += r_ng[i].count(j)} - add = 1.0 if i >= 2 - sum += weight * Math.log((counts_clipped + add)/(counts_sum + add)); - } - return brevity_penalty(hypothesis, reference, hack) * Math.exp(sum) -end - def main cfg = Trollop::options do opt :input, "input", :type => :string, :default => '-' @@ -38,7 +12,7 @@ def main opt :n, "N", :default => 4 end - refs = ReadFile.new(cfg[:references]).readlines_strip + refs = ReadFile.readlines_strip cfg[:references] i = -1 input = ReadFile.new cfg[:input] while line = input.gets @@ -47,7 +21,7 @@ def main puts 0.0 next end - puts per_sentence_bleu line.strip, refs[i], cfg[:n], cfg[:len_hack] + puts BLEU::per_sentence_bleu line.strip, refs[i], cfg[:n], cfg[:len_hack] end input.close end |