diff options
author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-10-28 17:58:34 +0000 |
---|---|---|
committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-10-28 17:58:34 +0000 |
commit | 549df389bbc4fcee4f9641efea35ca7a5857eedf (patch) | |
tree | 643523dc1d4821aed678d2fd1da299d71e3b5b35 /word-aligner | |
parent | 6548dbd9e6421b79899384a748bd356ff126cff3 (diff) |
more
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@695 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'word-aligner')
-rw-r--r-- | word-aligner/support/generate-per-sentence-grammars.pl | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/word-aligner/support/generate-per-sentence-grammars.pl b/word-aligner/support/generate-per-sentence-grammars.pl index 695cfc17..d621213e 100644 --- a/word-aligner/support/generate-per-sentence-grammars.pl +++ b/word-aligner/support/generate-per-sentence-grammars.pl @@ -4,7 +4,7 @@ use utf8; die "Usage: $0 f.voc corpus.f-e grammar.f-e.gz\n" unless scalar @ARGV == 3; -my $MAX_INMEM = 1000; +my $MAX_INMEM = 3000; open FV,"<$ARGV[0]" or die "Can't read $ARGV[0]: $!"; open C,"<$ARGV[1]" or die "Can't read $ARGV[1]: $!"; @@ -19,7 +19,7 @@ my %most_freq; $most_freq{"<eps>"} = 1; while(my $f = <FV>) { chomp $f; - %most_freq{$f}=1; + $most_freq{$f}=1; $vc++; last if $vc == $MAX_INMEM; } @@ -27,6 +27,7 @@ close FV; print STDERR "Loaded $vc vocabulary items for permanent translation cache\n"; +my %grammar; my $memrc = 0; my $loadrc = 0; while(<G>) { |