diff options
Diffstat (limited to 'word-aligner')
-rwxr-xr-x | word-aligner/support/make_lex_grammar.pl | 21 |
1 files changed, 18 insertions, 3 deletions
diff --git a/word-aligner/support/make_lex_grammar.pl b/word-aligner/support/make_lex_grammar.pl index 0d034abc..c96071bf 100755 --- a/word-aligner/support/make_lex_grammar.pl +++ b/word-aligner/support/make_lex_grammar.pl @@ -47,7 +47,7 @@ my %esizes=(); while(<IM1>) { chomp; my ($e, $f, $lp) = split /\s+/; - $invm1{$e}->{$f} = 1; + $invm1{$e}->{$f} = sprintf("%.5g", 1e-12 + exp($lp)); $esizes{$e}++; if (($sizes{$f} or 0) < $LIMIT_SIZE && !(defined $model1{$f}->{$e})) { $model1{$f}->{$e} = 1e-12; @@ -196,10 +196,16 @@ for my $f (sort keys %fdict) { next unless $is_good_pair; if (defined $m1 && $ADD_MODEL1) { push @feats, "Model1=$m1"; - my $m1d = sprintf("%.5g", $m1 * $dice); - push @feats, "M1Dice=$m1d"; + my $m1d = sprintf("%.5g", sqrt($m1 * $dice)); + push @feats, "Model1Dice=$m1d"; } if ($ADD_MODEL1 && !defined $m1) { push @feats, "NoModel1=1"; } + if (defined $im1 && $ADD_MODEL1) { + push @feats, "InvModel1=$im1"; + } + if (!defined $im1 && $ADD_MODEL1) { + push @feats, "NoInvModel1=1"; + } if ($ADD_FIDENT && $efcount > $MIN_FEATURE_COUNT) { $fc++; push @feats, "F$fc=1"; @@ -235,6 +241,15 @@ for my $f (sort keys %fdict) { my $f_num = ($of =~ /^-?\d[0-9\.\,]+%?$/ && (length($of) > 3)); my $e_num = ($oe =~ /^-?\d[0-9\.\,]+%?$/ && (length($oe) > 3)); my $both_non_numeric = (!$e_num && !$f_num); + unless ($total_eandf > 20) { + if ($f_num && $e_num) { + my $xf = $of; + $xf =~ s/[.,]//g; + my $xe = $oe; + $xe =~ s/[.,]//g; + if (($of ne $oe) && ($xe eq $xf)) { push @feats, "NumNearIdent=1"; } + } + } if ($ADD_STEM_ID) { my $el = 4; my $fl = 4; |