diff options
| author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-10-28 22:19:32 +0000 | 
|---|---|---|
| committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-10-28 22:19:32 +0000 | 
| commit | 0c6c6e1e72b13ab0bf6ea2da3ac83ba5a74e5cff (patch) | |
| tree | 59d9ff0c8fef2b062e76ddf24d4ff0dc8b503b5e | |
| parent | 9e73380e46ada871ad114f4fd4238b8f2263ef27 (diff) | |
small fixes
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@700 ec762483-ff6d-05da-a07a-a48fb63a330f
| -rwxr-xr-x | word-aligner/support/extract_vocab.pl | 5 | ||||
| -rwxr-xr-x | word-aligner/support/generate_per_sentence_grammars.pl | 1 | ||||
| -rwxr-xr-x | word-aligner/support/make_lex_grammar.pl | 9 | 
3 files changed, 11 insertions, 4 deletions
diff --git a/word-aligner/support/extract_vocab.pl b/word-aligner/support/extract_vocab.pl index 070d4202..ef94b7d6 100755 --- a/word-aligner/support/extract_vocab.pl +++ b/word-aligner/support/extract_vocab.pl @@ -7,7 +7,10 @@ my $wc = 0;  while(<>) {    chomp;    my @words = split /\s+/; -  for my $word (@words) { $wc++; $dict{$word}++; } +  for my $word (@words) { +    die if $word eq ''; +    $wc++; $dict{$word}++; +  }  }  my $tc = 0; diff --git a/word-aligner/support/generate_per_sentence_grammars.pl b/word-aligner/support/generate_per_sentence_grammars.pl index 8779ac9c..6a02bb0a 100755 --- a/word-aligner/support/generate_per_sentence_grammars.pl +++ b/word-aligner/support/generate_per_sentence_grammars.pl @@ -14,6 +14,7 @@ open FILT,"|gzip -c > $ARGV[3]" or die "Can't write $ARGV[3]: $!";  open PSG,">$ARGV[4]" or die "Can't write $ARGV[4]: $!";  open OTRAIN,">$ARGV[5]" or die "Can't write $ARGV[5]: $!"; +binmode OTRAIN, ":utf8";  binmode FILT, ":utf8";  binmode PSG, ":utf8";  binmode STDOUT, ":utf8"; diff --git a/word-aligner/support/make_lex_grammar.pl b/word-aligner/support/make_lex_grammar.pl index 3e243125..e4cbf7ba 100755 --- a/word-aligner/support/make_lex_grammar.pl +++ b/word-aligner/support/make_lex_grammar.pl @@ -35,7 +35,7 @@ my %sizes = ();  while(<M1>) {    chomp;    my ($f, $e, $lp) = split /\s+/; -  $model1{$f}->{$e} = 1; +  $model1{$f}->{$e} = 1e-12 + exp($lp);    $sizes{$f}++;  }  close M1; @@ -50,7 +50,7 @@ while(<IM1>) {    $invm1{$e}->{$f} = 1;    $esizes{$e}++;    if (($sizes{$f} or 0) < $LIMIT_SIZE && !(defined $model1{$f}->{$e})) { -    $model1{$f}->{$e} = 1; +    $model1{$f}->{$e} = 1e-12;      $sizes{$f}++;      $inv_add++;    } @@ -66,7 +66,7 @@ while(<M1>) {    chomp;    my ($f, $e, $lp) = split /\s+/;    if (($esizes{$e} or 0) < $LIMIT_SIZE && !(defined $invm1{$e}->{$f})) { -    $invm1{$e}->{$f} = 1; +    $invm1{$e}->{$f} = 1e-12;      $esizes{$e}++;      $dir_add++;    } @@ -106,6 +106,7 @@ my $ADD_111 = 1;  my $ADD_ID = 1;  my $ADD_PUNC = 1;  my $ADD_NULL = 1; +my $ADD_MODEL1 = 1;  my $ADD_STEM_ID = 0;  my $ADD_SYM = 0;  my $BEAM_RATIO = 50; @@ -184,6 +185,8 @@ for my $f (sort keys %fdict) {      my $total_eandf = $ecounts{$e} + $fcounts{$f};      my $dice = 2 * $efcount / $total_eandf;      my @feats; +    if (defined $m1 && $ADD_MODEL1) { push @feats, "Model1=$m1"; my $m1d = $m1 * $dice; push @feats, "M1Dice=$m1d"; } +    if ($ADD_MODEL1 && !defined $m1) { push @feats, "NoModel1=1"; }      if ($ADD_FIDENT && $efcount > $MIN_FEATURE_COUNT) {        $fc++;        push @feats, "F$fc=1";  | 
