diff options
author | Patrick Simianer <p@simianer.de> | 2012-03-13 09:24:47 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2012-03-13 09:24:47 +0100 |
commit | c3a9ea64251605532c7954959662643a6a927bb7 (patch) | |
tree | fed6048a5acdaf3834740107771c2bc48f26fd4d /rescore/rerank.pl | |
parent | 867bca3e5fa0cdd63bf032e5859fb5092d9a4ca1 (diff) | |
parent | a45af4a3704531a8382cd231f6445b3a33b598a3 (diff) |
merge with upstream
Diffstat (limited to 'rescore/rerank.pl')
-rwxr-xr-x | rescore/rerank.pl | 86 |
1 files changed, 0 insertions, 86 deletions
diff --git a/rescore/rerank.pl b/rescore/rerank.pl deleted file mode 100755 index 4a0c5750..00000000 --- a/rescore/rerank.pl +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/perl -w - -use strict; -use utf8; -use Getopt::Long; - -my $weights_file; -my $hyp_file; -my $help; -my $kbest; # flag to extract reranked list - -Getopt::Long::Configure("no_auto_abbrev"); -if (GetOptions( - "weights_file|w=s" => \$weights_file, - "hypothesis_file|h=s" => \$hyp_file, - "kbest" => \$kbest, - "help" => \$help, -) == 0 || @ARGV!=0 || $help || !$weights_file || !$hyp_file) { - usage(); - exit(1); -} - -open W, "<$weights_file" or die "Can't read $weights_file: $!"; -my %weights; -while(<W>) { - chomp; - next if /^#/; - next if /^\s*$/; - my ($fname, $w) = split /\s+/; - $weights{$fname} = $w; -} -close W; - -my $cur = undef; -my %hyps = (); -open HYP, "<$hyp_file" or die "Can't read $hyp_file: $!"; -while(<HYP>) { - chomp; - my ($id, $hyp, $feats) = split / \|\|\| /; - unless (defined $cur) { $cur = $id; } - if ($cur ne $id) { - extract_1best($cur, \%hyps); - $cur = $id; - %hyps = (); - } - my @afeats = split /\s+/, $feats; - my $tot = 0; - for my $featpair (@afeats) { - my ($fname,$fval) = split /=/, $featpair; - my $weight = $weights{$fname}; - die "Unweighted feature '$fname'" unless defined $weight; - $tot += ($weight * $fval); - } - $hyps{"$hyp ||| $feats"} = $tot; -} -extract_1best($cur, \%hyps) if defined $cur; -close HYP; - -sub extract_1best { - my ($id, $rh) = @_; - my %hyps = %$rh; - if ($kbest) { - for my $hyp (sort { $hyps{$b} <=> $hyps{$a} } keys %hyps) { - print "$id ||| $hyp\n"; - } - } else { - my $best_score = undef; - my $best_hyp = undef; - for my $hyp (keys %hyps) { - if (!defined $best_score || $hyps{$hyp} > $best_score) { - $best_score = $hyps{$hyp}; - $best_hyp = $hyp; - } - } - $best_hyp =~ s/ \|\|\|.*$//; - print "$best_hyp\n"; - } -} - -sub usage { - print <<EOT; -Usage: $0 -w weights.txt -h hyp.nbest.txt [--kbest] - Reranks n-best lists with new weights, extracting the new 1/k-best entries. -EOT -} - |