summaryrefslogtreecommitdiff
path: root/gi/pipeline
diff options
context:
space:
mode:
Diffstat (limited to 'gi/pipeline')
-rwxr-xr-xgi/pipeline/scripts/xfeats.pl27
1 files changed, 15 insertions, 12 deletions
diff --git a/gi/pipeline/scripts/xfeats.pl b/gi/pipeline/scripts/xfeats.pl
index bdb9224c..dc578513 100755
--- a/gi/pipeline/scripts/xfeats.pl
+++ b/gi/pipeline/scripts/xfeats.pl
@@ -1,28 +1,31 @@
#!/usr/bin/perl -w
use strict;
-die "Usage: $0 x-grammar.scfg < cat-grammar.scfg\n" unless scalar @ARGV > 0;
+die "Usage: $0 x-grammar.scfg[.gz] < cat-grammar.scfg\n" unless scalar @ARGV > 0;
my $xgrammar = shift @ARGV;
-open F, "<$xgrammar" or die "Can't read $xgrammar: $!";
+die "Can't find $xgrammar" unless -f $xgrammar;
+my $fh;
+if ($xgrammar =~ /\.gz$/) {
+ open $fh, "gunzip -c $xgrammar|" or die "Can't fork: $!";
+} else {
+ open $fh, "<$xgrammar" or die "Can't read $xgrammar: $!";
+}
print STDERR "Reading X-feats from $xgrammar...\n";
my %dict;
-while(<F>) {
+while(<$fh>) {
chomp;
my ($lhs, $f, $e, $feats) = split / \|\|\| /;
my $xfeats;
my $cc = 0;
- if ($feats =~ /(EGivenF=[^ ]+)( |$)/) {
- $xfeats = "X_$1"; $cc++;
- }
- if ($feats =~ /(FGivenE=[^ ]+)( |$)/) {
- $xfeats = "$xfeats X_$1"; $cc++;
+ my @xfeats = ();
+ while ($feats =~ /(EGivenF|FGivenE|LogRuleCount|LogECount|LogFCount|SingletonRule|SingletonE|SingletonF)=([^ ]+)( |$)/og) {
+ push @xfeats, "X_$1=$2";
}
- die "EGivenF and FGivenE features not found: $_" unless $cc == 2;
- #print "$lhs ||| $f ||| $e ||| $xfeats\n";
- $dict{"$lhs ||| $f ||| $e"} = $xfeats;
+ #print "$lhs ||| $f ||| $e ||| @xfeats\n";
+ $dict{"$lhs ||| $f ||| $e"} = "@xfeats";
}
-close F;
+close $fh;
print STDERR "Add features...\n";
while(<>) {