summaryrefslogtreecommitdiff
path: root/word-aligner
diff options
context:
space:
mode:
Diffstat (limited to 'word-aligner')
-rwxr-xr-x[-rw-r--r--]word-aligner/support/generate_per_sentence_grammars.pl (renamed from word-aligner/support/generate-per-sentence-grammars.pl)22
1 files changed, 19 insertions, 3 deletions
diff --git a/word-aligner/support/generate-per-sentence-grammars.pl b/word-aligner/support/generate_per_sentence_grammars.pl
index d621213e..c644ec6d 100644..100755
--- a/word-aligner/support/generate-per-sentence-grammars.pl
+++ b/word-aligner/support/generate_per_sentence_grammars.pl
@@ -38,11 +38,27 @@ while(<G>) {
$memrc++;
} else {
$loadrc++;
- $grammar{$f}="$e ||| $feats";
+ my $r = $grammar{$f};
+ if (!defined $r) {
+ $r = [];
+ $grammar{$f} = $r;
+ }
+ push @$r, "$e ||| $feats";
}
}
-
+close G;
print STDERR " mem rc: $memrc\n";
print STDERR " load rc: $loadrc\n";
-
+while(<C>) {
+ my ($f,$e) = split / \|\|\| /;
+ my @fwords = split /\s+/, $f;
+ my $tot = 0;
+ for my $f (@fwords) {
+ my $r = $grammar{$f};
+ die "No translations for: $f" unless $r;
+ my $num = scalar @$r;
+ $tot += $num;
+ }
+ print "RULES: $tot\n";
+}