summaryrefslogtreecommitdiff
path: root/gi/pipeline
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-06 13:42:42 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-06 13:42:42 +0000
commit5d74d37d10eabf50981ed7c0550ee66e0ef63308 (patch)
treea48af01e068275636ac112a75995edcd470dd957 /gi/pipeline
parent2cbb1d7f6c991c583a551236e688b588cfc2501d (diff)
handle more features
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@148 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/pipeline')
-rwxr-xr-xgi/pipeline/scripts/xfeats.pl27
1 files changed, 15 insertions, 12 deletions
diff --git a/gi/pipeline/scripts/xfeats.pl b/gi/pipeline/scripts/xfeats.pl
index bdb9224c..dc578513 100755
--- a/gi/pipeline/scripts/xfeats.pl
+++ b/gi/pipeline/scripts/xfeats.pl
@@ -1,28 +1,31 @@
#!/usr/bin/perl -w
use strict;
-die "Usage: $0 x-grammar.scfg < cat-grammar.scfg\n" unless scalar @ARGV > 0;
+die "Usage: $0 x-grammar.scfg[.gz] < cat-grammar.scfg\n" unless scalar @ARGV > 0;
my $xgrammar = shift @ARGV;
-open F, "<$xgrammar" or die "Can't read $xgrammar: $!";
+die "Can't find $xgrammar" unless -f $xgrammar;
+my $fh;
+if ($xgrammar =~ /\.gz$/) {
+ open $fh, "gunzip -c $xgrammar|" or die "Can't fork: $!";
+} else {
+ open $fh, "<$xgrammar" or die "Can't read $xgrammar: $!";
+}
print STDERR "Reading X-feats from $xgrammar...\n";
my %dict;
-while(<F>) {
+while(<$fh>) {
chomp;
my ($lhs, $f, $e, $feats) = split / \|\|\| /;
my $xfeats;
my $cc = 0;
- if ($feats =~ /(EGivenF=[^ ]+)( |$)/) {
- $xfeats = "X_$1"; $cc++;
- }
- if ($feats =~ /(FGivenE=[^ ]+)( |$)/) {
- $xfeats = "$xfeats X_$1"; $cc++;
+ my @xfeats = ();
+ while ($feats =~ /(EGivenF|FGivenE|LogRuleCount|LogECount|LogFCount|SingletonRule|SingletonE|SingletonF)=([^ ]+)( |$)/og) {
+ push @xfeats, "X_$1=$2";
}
- die "EGivenF and FGivenE features not found: $_" unless $cc == 2;
- #print "$lhs ||| $f ||| $e ||| $xfeats\n";
- $dict{"$lhs ||| $f ||| $e"} = $xfeats;
+ #print "$lhs ||| $f ||| $e ||| @xfeats\n";
+ $dict{"$lhs ||| $f ||| $e"} = "@xfeats";
}
-close F;
+close $fh;
print STDERR "Add features...\n";
while(<>) {