From c0afd1924cf0c228a85352b3584c64a5e00b88c7 Mon Sep 17 00:00:00 2001 From: redpony Date: Mon, 15 Nov 2010 19:44:29 +0000 Subject: reranker git-svn-id: https://ws10smt.googlecode.com/svn/trunk@725 ec762483-ff6d-05da-a07a-a48fb63a330f --- rescore/rerank.pl | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100755 rescore/rerank.pl diff --git a/rescore/rerank.pl b/rescore/rerank.pl new file mode 100755 index 00000000..ce7a67bd --- /dev/null +++ b/rescore/rerank.pl @@ -0,0 +1,77 @@ +#!/usr/bin/perl -w + +use strict; +use utf8; +use Getopt::Long; + +my $weights_file; +my $hyp_file; +my $help; + +Getopt::Long::Configure("no_auto_abbrev"); +if (GetOptions( + "weights_file|w=s" => \$weights_file, + "hypothesis_file|h=s" => \$hyp_file, + "help" => \$help, +) == 0 || @ARGV!=0 || $help || !$weights_file || !$hyp_file) { + usage(); + exit(1); +} + +open W, "<$weights_file" or die "Can't read $weights_file: $!"; +my %weights; +while() { + chomp; + next if /^#/; + next if /^\s*$/; + my ($fname, $w) = split /\s+/; + $weights{$fname} = $w; +} +close W; + +my $cur = undef; +my %hyps = (); +open HYP, "<$hyp_file" or die "Can't read $hyp_file: $!"; +while() { + chomp; + my ($id, $hyp, $feats) = split / \|\|\| /; + unless (defined $cur) { $cur = $id; } + if ($cur ne $id) { + extract_1best(\%hyps); + $cur = $id; + %hyps = (); + } + my @afeats = split /\s+/, $feats; + my $tot = 0; + for my $featpair (@afeats) { + my ($fname,$fval) = split /=/, $featpair; + my $weight = $weights{$fname}; + die "Unweighted feature '$fname'" unless defined $weight; + $tot += ($weight * $fval); + } + $hyps{$hyp} = $tot; +} +extract_1best(\%hyps) if defined $cur; +close HYP; + +sub extract_1best { + my $rh = shift; + my %hyps = %$rh; + my $best_score = undef; + my $best_hyp = undef; + for my $hyp (keys %hyps) { + if (!defined $best_score || $hyps{$hyp} > $best_score) { + $best_score = $hyps{$hyp}; + $best_hyp = $hyp; + } + } + print "$best_hyp\n"; +} + +sub usage { + print <