summaryrefslogtreecommitdiff
path: root/rescore/rerank.pl
blob: 4a0c5750035149c4505265fd20092f04aea0ed92 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/perl -w

use strict;
use utf8;
use Getopt::Long;

my $weights_file;
my $hyp_file;
my $help;
my $kbest; # flag to extract reranked list

Getopt::Long::Configure("no_auto_abbrev");
if (GetOptions(
    "weights_file|w=s" => \$weights_file,
    "hypothesis_file|h=s" => \$hyp_file,
    "kbest" => \$kbest,
    "help" => \$help,
) == 0 || @ARGV!=0 || $help || !$weights_file || !$hyp_file) {
  usage();
  exit(1);
}

open W, "<$weights_file" or die "Can't read $weights_file: $!";
my %weights;
while(<W>) {
  chomp;
  next if /^#/;
  next if /^\s*$/;
  my ($fname, $w) = split /\s+/;
  $weights{$fname} = $w;
}
close W;

my $cur = undef;
my %hyps = ();
open HYP, "<$hyp_file" or die "Can't read $hyp_file: $!";
while(<HYP>) {
  chomp;
  my ($id, $hyp, $feats) = split / \|\|\| /;
  unless (defined $cur) { $cur = $id; }
  if ($cur ne $id) {
    extract_1best($cur, \%hyps);
    $cur = $id;
    %hyps = ();
  }
  my @afeats = split /\s+/, $feats;
  my $tot = 0;
  for my $featpair (@afeats) {
    my ($fname,$fval) = split /=/, $featpair;
    my $weight = $weights{$fname};
    die "Unweighted feature '$fname'" unless defined $weight;
    $tot += ($weight * $fval);
  }
  $hyps{"$hyp ||| $feats"} = $tot;
}
extract_1best($cur, \%hyps) if defined $cur;
close HYP;

sub extract_1best {
  my ($id, $rh) = @_;
  my %hyps = %$rh;
  if ($kbest) {
    for my $hyp (sort { $hyps{$b} <=> $hyps{$a} } keys %hyps) {
      print "$id ||| $hyp\n";
    }
  } else {
    my $best_score = undef;
    my $best_hyp = undef;
    for my $hyp (keys %hyps) {
      if (!defined $best_score || $hyps{$hyp} > $best_score) {
        $best_score = $hyps{$hyp};
        $best_hyp = $hyp;
      }
    }
    $best_hyp =~ s/ \|\|\|.*$//;
    print "$best_hyp\n";
  }
}

sub usage {
  print <<EOT;
Usage: $0 -w weights.txt -h hyp.nbest.txt [--kbest]
  Reranks n-best lists with new weights, extracting the new 1/k-best entries.
EOT
}