From 945adc48b08d8cbc1084b0e13a68876db707b816 Mon Sep 17 00:00:00 2001 From: redpony Date: Fri, 29 Oct 2010 14:07:45 +0000 Subject: more wa git-svn-id: https://ws10smt.googlecode.com/svn/trunk@701 ec762483-ff6d-05da-a07a-a48fb63a330f --- word-aligner/makefiles/makefile.grammars | 14 ++++++++++---- word-aligner/support/make_lex_grammar.pl | 8 ++++++-- 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'word-aligner') diff --git a/word-aligner/makefiles/makefile.grammars b/word-aligner/makefiles/makefile.grammars index 8a10cb19..c113688c 100644 --- a/word-aligner/makefiles/makefile.grammars +++ b/word-aligner/makefiles/makefile.grammars @@ -1,14 +1,14 @@ -all: corpus.f-e.lex-grammar.gz corpus.e-f.lex-grammar.gz corpus.class.e corpus.class.f corpus.stemmed.f fstem.map corpus.stemmed.e estem.map +all: corpus.f-e.lex-grammar.gz corpus.e-f.lex-grammar.gz corpus.class.e corpus.class.f corpus.stemmed.f fstem.map corpus.stemmed.e estem.map corpus.f-e.sgml corpus.e-f.sgml clean: - $(RM) orthonorm-dict.* voc2class* corpus.class.* corpus.e-f corpus.f-e corpus.f-e.lex-grammar* *.model1 *voc corpus.e-f.lex-grammar* *stem* + $(RM) orthonorm-dict.* voc2class* corpus.class.* corpus.e-f corpus.f-e corpus.f-e.lex-grammar* *.model1 *voc corpus.e-f.lex-grammar* *stem* corpus.f-e.sgml freq* psg* + SUPPORT_DIR = $(SCRIPT_DIR)/support GZIP = /usr/bin/gzip ZCAT = zcat -EXTRACT_WEIGHTS = $(SUPPORT_DIR)/extract_weights.pl EXTRACT_GRAMMAR = $(SUPPORT_DIR)/extract_grammar.pl -SUPPLEMENT_WEIGHTS = $(SUPPORT_DIR)/supplement_weights_file.pl EXTRACT_VOCAB = $(SUPPORT_DIR)/extract_vocab.pl +GENERATE_PSG = $(SUPPORT_DIR)/generate_per_sentence_grammars.pl ORTHONORM_E = $(SCRIPT_DIR)/ortho-norm/$(E_LANG).pl ORTHONORM_F = $(SCRIPT_DIR)/ortho-norm/$(F_LANG).pl STEM_F = $(SCRIPT_DIR)/stemmers/$(F_LANG).pl @@ -81,3 +81,9 @@ corpus.f-e.lex-grammar.gz: bidir.grammars corpus.e-f.lex-grammar.gz: bidir.grammars $(EXTRACT_GRAMMAR) 2 bidir.grammars | $(GZIP) -9 > corpus.e-f.lex-grammar.gz +corpus.f-e.sgml: f.voc corpus.f-e.lex-grammar.gz corpus.f-e + $(GENERATE_PSG) f.voc corpus.f-e corpus.f-e.lex-grammar.gz freq_grammar.f-e.gz psg.f-e $@ + +corpus.e-f.sgml: e.voc corpus.e-f.lex-grammar.gz corpus.e-f + $(GENERATE_PSG) e.voc corpus.e-f corpus.e-f.lex-grammar.gz freq_grammar.e-f.gz psg.e-f $@ + diff --git a/word-aligner/support/make_lex_grammar.pl b/word-aligner/support/make_lex_grammar.pl index e4cbf7ba..8d38abda 100755 --- a/word-aligner/support/make_lex_grammar.pl +++ b/word-aligner/support/make_lex_grammar.pl @@ -35,7 +35,7 @@ my %sizes = (); while() { chomp; my ($f, $e, $lp) = split /\s+/; - $model1{$f}->{$e} = 1e-12 + exp($lp); + $model1{$f}->{$e} = sprintf("%.5g", 1e-12 + exp($lp)); $sizes{$f}++; } close M1; @@ -185,7 +185,11 @@ for my $f (sort keys %fdict) { my $total_eandf = $ecounts{$e} + $fcounts{$f}; my $dice = 2 * $efcount / $total_eandf; my @feats; - if (defined $m1 && $ADD_MODEL1) { push @feats, "Model1=$m1"; my $m1d = $m1 * $dice; push @feats, "M1Dice=$m1d"; } + if (defined $m1 && $ADD_MODEL1) { + push @feats, "Model1=$m1"; + my $m1d = sprintf("%.5g", $m1 * $dice); + push @feats, "M1Dice=$m1d"; + } if ($ADD_MODEL1 && !defined $m1) { push @feats, "NoModel1=1"; } if ($ADD_FIDENT && $efcount > $MIN_FEATURE_COUNT) { $fc++; -- cgit v1.2.3