diff options
author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-12-01 02:32:20 +0000 |
---|---|---|
committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-12-01 02:32:20 +0000 |
commit | 94dae3d1c5bb3a4cbde53e55790c90c819e324a9 (patch) | |
tree | eedcdb20a2bec1ecc9243e42135b9371a841e4aa /word-aligner/support/make_lex_grammar.pl | |
parent | fd4259da347b371a1a399b9130f62938e3db462b (diff) |
new feats
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@735 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'word-aligner/support/make_lex_grammar.pl')
-rwxr-xr-x | word-aligner/support/make_lex_grammar.pl | 21 |
1 files changed, 18 insertions, 3 deletions
diff --git a/word-aligner/support/make_lex_grammar.pl b/word-aligner/support/make_lex_grammar.pl index 0d034abc..c96071bf 100755 --- a/word-aligner/support/make_lex_grammar.pl +++ b/word-aligner/support/make_lex_grammar.pl @@ -47,7 +47,7 @@ my %esizes=(); while(<IM1>) { chomp; my ($e, $f, $lp) = split /\s+/; - $invm1{$e}->{$f} = 1; + $invm1{$e}->{$f} = sprintf("%.5g", 1e-12 + exp($lp)); $esizes{$e}++; if (($sizes{$f} or 0) < $LIMIT_SIZE && !(defined $model1{$f}->{$e})) { $model1{$f}->{$e} = 1e-12; @@ -196,10 +196,16 @@ for my $f (sort keys %fdict) { next unless $is_good_pair; if (defined $m1 && $ADD_MODEL1) { push @feats, "Model1=$m1"; - my $m1d = sprintf("%.5g", $m1 * $dice); - push @feats, "M1Dice=$m1d"; + my $m1d = sprintf("%.5g", sqrt($m1 * $dice)); + push @feats, "Model1Dice=$m1d"; } if ($ADD_MODEL1 && !defined $m1) { push @feats, "NoModel1=1"; } + if (defined $im1 && $ADD_MODEL1) { + push @feats, "InvModel1=$im1"; + } + if (!defined $im1 && $ADD_MODEL1) { + push @feats, "NoInvModel1=1"; + } if ($ADD_FIDENT && $efcount > $MIN_FEATURE_COUNT) { $fc++; push @feats, "F$fc=1"; @@ -235,6 +241,15 @@ for my $f (sort keys %fdict) { my $f_num = ($of =~ /^-?\d[0-9\.\,]+%?$/ && (length($of) > 3)); my $e_num = ($oe =~ /^-?\d[0-9\.\,]+%?$/ && (length($oe) > 3)); my $both_non_numeric = (!$e_num && !$f_num); + unless ($total_eandf > 20) { + if ($f_num && $e_num) { + my $xf = $of; + $xf =~ s/[.,]//g; + my $xe = $oe; + $xe =~ s/[.,]//g; + if (($of ne $oe) && ($xe eq $xf)) { push @feats, "NumNearIdent=1"; } + } + } if ($ADD_STEM_ID) { my $el = 4; my $fl = 4; |