diff options
author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-10-29 14:07:45 +0000 |
---|---|---|
committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-10-29 14:07:45 +0000 |
commit | 78f50e0c3c63de2149045c5afb307e9a3cacff82 (patch) | |
tree | 5aa0a441e06b36397070281d383f1dc2d4bf9e91 /word-aligner | |
parent | 0c6c6e1e72b13ab0bf6ea2da3ac83ba5a74e5cff (diff) |
more wa
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@701 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'word-aligner')
-rw-r--r-- | word-aligner/makefiles/makefile.grammars | 14 | ||||
-rwxr-xr-x | word-aligner/support/make_lex_grammar.pl | 8 |
2 files changed, 16 insertions, 6 deletions
diff --git a/word-aligner/makefiles/makefile.grammars b/word-aligner/makefiles/makefile.grammars index 8a10cb19..c113688c 100644 --- a/word-aligner/makefiles/makefile.grammars +++ b/word-aligner/makefiles/makefile.grammars @@ -1,14 +1,14 @@ -all: corpus.f-e.lex-grammar.gz corpus.e-f.lex-grammar.gz corpus.class.e corpus.class.f corpus.stemmed.f fstem.map corpus.stemmed.e estem.map +all: corpus.f-e.lex-grammar.gz corpus.e-f.lex-grammar.gz corpus.class.e corpus.class.f corpus.stemmed.f fstem.map corpus.stemmed.e estem.map corpus.f-e.sgml corpus.e-f.sgml clean: - $(RM) orthonorm-dict.* voc2class* corpus.class.* corpus.e-f corpus.f-e corpus.f-e.lex-grammar* *.model1 *voc corpus.e-f.lex-grammar* *stem* + $(RM) orthonorm-dict.* voc2class* corpus.class.* corpus.e-f corpus.f-e corpus.f-e.lex-grammar* *.model1 *voc corpus.e-f.lex-grammar* *stem* corpus.f-e.sgml freq* psg* + SUPPORT_DIR = $(SCRIPT_DIR)/support GZIP = /usr/bin/gzip ZCAT = zcat -EXTRACT_WEIGHTS = $(SUPPORT_DIR)/extract_weights.pl EXTRACT_GRAMMAR = $(SUPPORT_DIR)/extract_grammar.pl -SUPPLEMENT_WEIGHTS = $(SUPPORT_DIR)/supplement_weights_file.pl EXTRACT_VOCAB = $(SUPPORT_DIR)/extract_vocab.pl +GENERATE_PSG = $(SUPPORT_DIR)/generate_per_sentence_grammars.pl ORTHONORM_E = $(SCRIPT_DIR)/ortho-norm/$(E_LANG).pl ORTHONORM_F = $(SCRIPT_DIR)/ortho-norm/$(F_LANG).pl STEM_F = $(SCRIPT_DIR)/stemmers/$(F_LANG).pl @@ -81,3 +81,9 @@ corpus.f-e.lex-grammar.gz: bidir.grammars corpus.e-f.lex-grammar.gz: bidir.grammars $(EXTRACT_GRAMMAR) 2 bidir.grammars | $(GZIP) -9 > corpus.e-f.lex-grammar.gz +corpus.f-e.sgml: f.voc corpus.f-e.lex-grammar.gz corpus.f-e + $(GENERATE_PSG) f.voc corpus.f-e corpus.f-e.lex-grammar.gz freq_grammar.f-e.gz psg.f-e $@ + +corpus.e-f.sgml: e.voc corpus.e-f.lex-grammar.gz corpus.e-f + $(GENERATE_PSG) e.voc corpus.e-f corpus.e-f.lex-grammar.gz freq_grammar.e-f.gz psg.e-f $@ + diff --git a/word-aligner/support/make_lex_grammar.pl b/word-aligner/support/make_lex_grammar.pl index e4cbf7ba..8d38abda 100755 --- a/word-aligner/support/make_lex_grammar.pl +++ b/word-aligner/support/make_lex_grammar.pl @@ -35,7 +35,7 @@ my %sizes = (); while(<M1>) { chomp; my ($f, $e, $lp) = split /\s+/; - $model1{$f}->{$e} = 1e-12 + exp($lp); + $model1{$f}->{$e} = sprintf("%.5g", 1e-12 + exp($lp)); $sizes{$f}++; } close M1; @@ -185,7 +185,11 @@ for my $f (sort keys %fdict) { my $total_eandf = $ecounts{$e} + $fcounts{$f}; my $dice = 2 * $efcount / $total_eandf; my @feats; - if (defined $m1 && $ADD_MODEL1) { push @feats, "Model1=$m1"; my $m1d = $m1 * $dice; push @feats, "M1Dice=$m1d"; } + if (defined $m1 && $ADD_MODEL1) { + push @feats, "Model1=$m1"; + my $m1d = sprintf("%.5g", $m1 * $dice); + push @feats, "M1Dice=$m1d"; + } if ($ADD_MODEL1 && !defined $m1) { push @feats, "NoModel1=1"; } if ($ADD_FIDENT && $efcount > $MIN_FEATURE_COUNT) { $fc++; |