diff options
Diffstat (limited to 'multi-bleu.perl')
-rwxr-xr-x | multi-bleu.perl | 174 |
1 files changed, 0 insertions, 174 deletions
diff --git a/multi-bleu.perl b/multi-bleu.perl deleted file mode 100755 index a25e347..0000000 --- a/multi-bleu.perl +++ /dev/null @@ -1,174 +0,0 @@ -#!/usr/bin/env perl -# -# This file is part of moses. Its use is licensed under the GNU Lesser General -# Public License version 2.1 or, at your option, any later version. - -# $Id$ -use warnings; -use strict; - -my $lowercase = 0; -if ($ARGV[0] eq "-lc") { - $lowercase = 1; - shift; -} - -my $stem = $ARGV[0]; -if (!defined $stem) { - print STDERR "usage: multi-bleu.pl [-lc] reference < hypothesis\n"; - print STDERR "Reads the references from reference or reference0, reference1, ...\n"; - exit(1); -} - -$stem .= ".ref" if !-e $stem && !-e $stem."0" && -e $stem.".ref0"; - -my @REF; -my $ref=0; -while(-e "$stem$ref") { - &add_to_ref("$stem$ref",\@REF); - $ref++; -} -&add_to_ref($stem,\@REF) if -e $stem; -die("ERROR: could not find reference file $stem") unless scalar @REF; - -# add additional references explicitly specified on the command line -shift; -foreach my $stem (@ARGV) { - &add_to_ref($stem,\@REF) if -e $stem; -} - - - -sub add_to_ref { - my ($file,$REF) = @_; - my $s=0; - if ($file =~ /.gz$/) { - open(REF,"gzip -dc $file|") or die "Can't read $file"; - } else { - open(REF,$file) or die "Can't read $file"; - } - while(<REF>) { - chop; - push @{$$REF[$s++]}, $_; - } - close(REF); -} - -my(@CORRECT,@TOTAL,$length_translation,$length_reference); -my $s=0; -while(<STDIN>) { - chop; - $_ = lc if $lowercase; - my @WORD = split; - my %REF_NGRAM = (); - my $length_translation_this_sentence = scalar(@WORD); - my ($closest_diff,$closest_length) = (9999,9999); - foreach my $reference (@{$REF[$s]}) { -# print "$s $_ <=> $reference\n"; - $reference = lc($reference) if $lowercase; - my @WORD = split(' ',$reference); - my $length = scalar(@WORD); - my $diff = abs($length_translation_this_sentence-$length); - if ($diff < $closest_diff) { - $closest_diff = $diff; - $closest_length = $length; - # print STDERR "$s: closest diff ".abs($length_translation_this_sentence-$length)." = abs($length_translation_this_sentence-$length), setting len: $closest_length\n"; - } elsif ($diff == $closest_diff) { - $closest_length = $length if $length < $closest_length; - # from two references with the same closeness to me - # take the *shorter* into account, not the "first" one. - } - for(my $n=1;$n<=4;$n++) { - my %REF_NGRAM_N = (); - for(my $start=0;$start<=$#WORD-($n-1);$start++) { - my $ngram = "$n"; - for(my $w=0;$w<$n;$w++) { - $ngram .= " ".$WORD[$start+$w]; - } - $REF_NGRAM_N{$ngram}++; - } - foreach my $ngram (keys %REF_NGRAM_N) { - if (!defined($REF_NGRAM{$ngram}) || - $REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) { - $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram}; -# print "$i: REF_NGRAM{$ngram} = $REF_NGRAM{$ngram}<BR>\n"; - } - } - } - } - $length_translation += $length_translation_this_sentence; - $length_reference += $closest_length; - for(my $n=1;$n<=4;$n++) { - my %T_NGRAM = (); - for(my $start=0;$start<=$#WORD-($n-1);$start++) { - my $ngram = "$n"; - for(my $w=0;$w<$n;$w++) { - $ngram .= " ".$WORD[$start+$w]; - } - $T_NGRAM{$ngram}++; - } - foreach my $ngram (keys %T_NGRAM) { - $ngram =~ /^(\d+) /; - my $n = $1; - # my $corr = 0; -# print "$i e $ngram $T_NGRAM{$ngram}<BR>\n"; - $TOTAL[$n] += $T_NGRAM{$ngram}; - if (defined($REF_NGRAM{$ngram})) { - if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) { - $CORRECT[$n] += $T_NGRAM{$ngram}; - # $corr = $T_NGRAM{$ngram}; -# print "$i e correct1 $T_NGRAM{$ngram}<BR>\n"; - } - else { - $CORRECT[$n] += $REF_NGRAM{$ngram}; - # $corr = $REF_NGRAM{$ngram}; -# print "$i e correct2 $REF_NGRAM{$ngram}<BR>\n"; - } - } - # $REF_NGRAM{$ngram} = 0 if !defined $REF_NGRAM{$ngram}; - # print STDERR "$ngram: {$s, $REF_NGRAM{$ngram}, $T_NGRAM{$ngram}, $corr}\n" - } - } - $s++; -} -my $brevity_penalty = 1; -my $bleu = 0; - -my @bleu=(); - -for(my $n=1;$n<=4;$n++) { - if (defined ($TOTAL[$n])){ - $bleu[$n]=($TOTAL[$n])?$CORRECT[$n]/$TOTAL[$n]:0; - # print STDERR "CORRECT[$n]:$CORRECT[$n] TOTAL[$n]:$TOTAL[$n]\n"; - }else{ - $bleu[$n]=0; - } -} - -if ($length_reference==0){ - printf "BLEU = 0, 0/0/0/0 (BP=0, ratio=0, hyp_len=0, ref_len=0)\n"; - exit(1); -} - -if ($length_translation<$length_reference) { - $brevity_penalty = exp(1-$length_reference/$length_translation); -} -$bleu = $brevity_penalty * exp((my_log( $bleu[1] ) + - my_log( $bleu[2] ) + - my_log( $bleu[3] ) + - my_log( $bleu[4] ) ) / 4) ; -printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ratio=%.3f, hyp_len=%d, ref_len=%d)\n", - 100*$bleu, - 100*$bleu[1], - 100*$bleu[2], - 100*$bleu[3], - 100*$bleu[4], - $brevity_penalty, - $length_translation / $length_reference, - $length_translation, - $length_reference; - -sub my_log { - return -9999999999 unless $_[0]; - return log($_[0]); -} |