summaryrefslogtreecommitdiff
path: root/gi/pipeline/scripts/xfeats.pl
blob: bdb9224c4f926d38f54b7d312ae41065120f61ee (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/perl -w
use strict;

die "Usage: $0 x-grammar.scfg < cat-grammar.scfg\n" unless scalar @ARGV > 0;

my $xgrammar = shift @ARGV;
open F, "<$xgrammar" or die "Can't read $xgrammar: $!";
print STDERR "Reading X-feats from $xgrammar...\n";
my %dict;
while(<F>) {
  chomp;
  my ($lhs, $f, $e, $feats) = split / \|\|\| /;
  my $xfeats;
  my $cc = 0;
  if ($feats =~ /(EGivenF=[^ ]+)( |$)/) {
    $xfeats = "X_$1"; $cc++;
  }
  if ($feats =~ /(FGivenE=[^ ]+)( |$)/) {
    $xfeats = "$xfeats X_$1"; $cc++;
  }
  die "EGivenF and FGivenE features not found: $_" unless $cc == 2;
  #print "$lhs ||| $f ||| $e ||| $xfeats\n";
  $dict{"$lhs ||| $f ||| $e"} = $xfeats;
}
close F;

print STDERR "Add features...\n";
while(<>) {
  chomp;
  my ($lhs, $f, $e) = split / \|\|\| /;
  $f=~ s/\[[^]]+,([12])\]/\[X,$1\]/g;
  my $xfeats = $dict{"[X] ||| $f ||| $e"};
  die "Can't find x features for: $_\n" unless $xfeats;
  print "$_ $xfeats\n";
}