summaryrefslogtreecommitdiff
path: root/training/add-model1-features-to-scfg.pl
blob: a0074317930fe24ef5c37dab0a6d5b2ecf41f283 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/perl -w

# [X] ||| so [X,1] die [X,2] der ||| as [X,1] existing [X,2] the ||| 2.47712135315 2.53182387352 5.07100057602 ||| 0-0 2-2 4-4
# [X] ||| so [X,1] die [X,2] der ||| this [X,1] the [X,2] of ||| 2.47712135315 3.19828724861 2.38270020485 ||| 0-0 2-2 4-4
# [X] ||| so [X,1] die [X,2] der ||| as [X,1] the [X,2] the ||| 2.47712135315 2.53182387352 1.48463630676 ||| 0-0 2-2 4-4
# [X] ||| so [X,1] die [X,2] der ||| is [X,1] the [X,2] of the ||| 2.47712135315 3.45197868347 2.64251494408 ||| 0-0 2-2 4-4 4-5

die "Usage: $0 model1.f-e model1.e-f < grammar.scfg\n  (use trianing/model1 to extract the model files)\n" unless scalar @ARGV == 2;

my $fm1 = shift @ARGV;
die unless $fm1;
my $frm1 = shift @ARGV;
die unless $frm1;
open M1,"<$fm1" or die;
open RM1,"<$frm1" or die;
print STDERR "Loading Model 1 probs from $fm1...\n";
my %m1;
while(<M1>) {
  chomp;
  my ($f, $e, $lp) = split /\s+/;
  $m1{$e}->{$f} = exp($lp);
}
close M1;

print STDERR "Loading Inverse Model 1 probs from $frm1...\n";
my %rm1;
while(<RM1>) {
  chomp;
  my ($e, $f, $lp) = split /\s+/;
  $rm1{$f}->{$e} = exp($lp);
}
close RM1;

my @label = qw( EGivenF LexFGivenE LexEGivenF );
while(<>) {
  chomp;
  my ($l, $f, $e, $sscores, $al) = split / \|\|\| /;
  my @scores = split /\s+/, $sscores;
  unless ($sscores =~ /=/) {
    for (my $i=0; $i<3; $i++) { $scores[$i] = "$label[$i]=$scores[$i]"; }
  }
  push @scores, "RuleCount=1";
  my @fs = split /\s+/, $f;
  my @es = split /\s+/, $e;
  my $flen = scalar @fs;
  my $elen = scalar @es;
  my $pgen = 0;
  my $nongen = 0;
  for (my $i =0; $i < $flen; $i++) {
    my $ftot = 0;
    next if ($fs[$i] =~ /\[X/);
    my $cr = $rm1{$fs[$i]};
    for (my $j=0; $j <= $elen; $j++) {
      my $ej = '<eps>';
      if ($j < $elen) { $ej = $es[$j]; }
      my $p = $cr->{$ej};
      if (defined $p) { $ftot += $p; }
    }
    if ($ftot == 0) { $nongen = 1; last; }
    $pgen += log($ftot) - log($elen);
  }
  my $bad = 0;
  my $good = 0;
  unless ($nongen) { push @scores, "RGood=1"; $good++; } else { push @scores, "RBad=1"; $bad++; }

  $nongen = 0;
  $pgen = 0;
  for (my $i =0; $i < $elen; $i++) {
    my $etot = 0;
    next if ($es[$i] =~ /\[X/);
    my $cr = $m1{$es[$i]};
#    print STDERR "$es[$i]\n";
    for (my $j=0; $j <= $flen; $j++) {
      my $fj = '<eps>';
      if ($j < $flen) { $fj = $fs[$j]; }
      my $p = $cr->{$fj};
#      print STDERR "  $fs[$j] : $p\n";
      if (defined $p) { $etot += $p; }
    }
    if ($etot == 0) { $nongen = 1; last; }
    $pgen += log($etot) - log($flen);
  }
  unless ($nongen) {
    push @scores, "FGood=1";
    if ($good) { push @scores, "BothGood=1"; } else { push @scores, "SusDel=1"; }
  } else {
    push @scores, "FBad=1";
    if ($bad) { push @scores, "BothBad=1"; } else { push @scores, "SusHall=1"; }
  }
  print "$l ||| $f ||| $e ||| @scores";
  if (defined $al) { print " ||| $al\n"; } else { print "\n"; }
}