blob: dc5785135fe5dfea2b528f866b86bd68923015a1 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
#!/usr/bin/perl -w
use strict;
die "Usage: $0 x-grammar.scfg[.gz] < cat-grammar.scfg\n" unless scalar @ARGV > 0;
my $xgrammar = shift @ARGV;
die "Can't find $xgrammar" unless -f $xgrammar;
my $fh;
if ($xgrammar =~ /\.gz$/) {
open $fh, "gunzip -c $xgrammar|" or die "Can't fork: $!";
} else {
open $fh, "<$xgrammar" or die "Can't read $xgrammar: $!";
}
print STDERR "Reading X-feats from $xgrammar...\n";
my %dict;
while(<$fh>) {
chomp;
my ($lhs, $f, $e, $feats) = split / \|\|\| /;
my $xfeats;
my $cc = 0;
my @xfeats = ();
while ($feats =~ /(EGivenF|FGivenE|LogRuleCount|LogECount|LogFCount|SingletonRule|SingletonE|SingletonF)=([^ ]+)( |$)/og) {
push @xfeats, "X_$1=$2";
}
#print "$lhs ||| $f ||| $e ||| @xfeats\n";
$dict{"$lhs ||| $f ||| $e"} = "@xfeats";
}
close $fh;
print STDERR "Add features...\n";
while(<>) {
chomp;
my ($lhs, $f, $e) = split / \|\|\| /;
$f=~ s/\[[^]]+,([12])\]/\[X,$1\]/g;
my $xfeats = $dict{"[X] ||| $f ||| $e"};
die "Can't find x features for: $_\n" unless $xfeats;
print "$_ $xfeats\n";
}
|