diff options
author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-06 13:42:42 +0000 |
---|---|---|
committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-06 13:42:42 +0000 |
commit | 5d74d37d10eabf50981ed7c0550ee66e0ef63308 (patch) | |
tree | a48af01e068275636ac112a75995edcd470dd957 | |
parent | 2cbb1d7f6c991c583a551236e688b588cfc2501d (diff) |
handle more features
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@148 ec762483-ff6d-05da-a07a-a48fb63a330f
-rwxr-xr-x | gi/pipeline/scripts/xfeats.pl | 27 |
1 files changed, 15 insertions, 12 deletions
diff --git a/gi/pipeline/scripts/xfeats.pl b/gi/pipeline/scripts/xfeats.pl index bdb9224c..dc578513 100755 --- a/gi/pipeline/scripts/xfeats.pl +++ b/gi/pipeline/scripts/xfeats.pl @@ -1,28 +1,31 @@ #!/usr/bin/perl -w use strict; -die "Usage: $0 x-grammar.scfg < cat-grammar.scfg\n" unless scalar @ARGV > 0; +die "Usage: $0 x-grammar.scfg[.gz] < cat-grammar.scfg\n" unless scalar @ARGV > 0; my $xgrammar = shift @ARGV; -open F, "<$xgrammar" or die "Can't read $xgrammar: $!"; +die "Can't find $xgrammar" unless -f $xgrammar; +my $fh; +if ($xgrammar =~ /\.gz$/) { + open $fh, "gunzip -c $xgrammar|" or die "Can't fork: $!"; +} else { + open $fh, "<$xgrammar" or die "Can't read $xgrammar: $!"; +} print STDERR "Reading X-feats from $xgrammar...\n"; my %dict; -while(<F>) { +while(<$fh>) { chomp; my ($lhs, $f, $e, $feats) = split / \|\|\| /; my $xfeats; my $cc = 0; - if ($feats =~ /(EGivenF=[^ ]+)( |$)/) { - $xfeats = "X_$1"; $cc++; - } - if ($feats =~ /(FGivenE=[^ ]+)( |$)/) { - $xfeats = "$xfeats X_$1"; $cc++; + my @xfeats = (); + while ($feats =~ /(EGivenF|FGivenE|LogRuleCount|LogECount|LogFCount|SingletonRule|SingletonE|SingletonF)=([^ ]+)( |$)/og) { + push @xfeats, "X_$1=$2"; } - die "EGivenF and FGivenE features not found: $_" unless $cc == 2; - #print "$lhs ||| $f ||| $e ||| $xfeats\n"; - $dict{"$lhs ||| $f ||| $e"} = $xfeats; + #print "$lhs ||| $f ||| $e ||| @xfeats\n"; + $dict{"$lhs ||| $f ||| $e"} = "@xfeats"; } -close F; +close $fh; print STDERR "Add features...\n"; while(<>) { |