summaryrefslogtreecommitdiff
path: root/word-aligner
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-10-29 14:07:45 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-10-29 14:07:45 +0000
commit78f50e0c3c63de2149045c5afb307e9a3cacff82 (patch)
tree5aa0a441e06b36397070281d383f1dc2d4bf9e91 /word-aligner
parent0c6c6e1e72b13ab0bf6ea2da3ac83ba5a74e5cff (diff)
more wa
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@701 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'word-aligner')
-rw-r--r--word-aligner/makefiles/makefile.grammars14
-rwxr-xr-xword-aligner/support/make_lex_grammar.pl8
2 files changed, 16 insertions, 6 deletions
diff --git a/word-aligner/makefiles/makefile.grammars b/word-aligner/makefiles/makefile.grammars
index 8a10cb19..c113688c 100644
--- a/word-aligner/makefiles/makefile.grammars
+++ b/word-aligner/makefiles/makefile.grammars
@@ -1,14 +1,14 @@
-all: corpus.f-e.lex-grammar.gz corpus.e-f.lex-grammar.gz corpus.class.e corpus.class.f corpus.stemmed.f fstem.map corpus.stemmed.e estem.map
+all: corpus.f-e.lex-grammar.gz corpus.e-f.lex-grammar.gz corpus.class.e corpus.class.f corpus.stemmed.f fstem.map corpus.stemmed.e estem.map corpus.f-e.sgml corpus.e-f.sgml
clean:
- $(RM) orthonorm-dict.* voc2class* corpus.class.* corpus.e-f corpus.f-e corpus.f-e.lex-grammar* *.model1 *voc corpus.e-f.lex-grammar* *stem*
+ $(RM) orthonorm-dict.* voc2class* corpus.class.* corpus.e-f corpus.f-e corpus.f-e.lex-grammar* *.model1 *voc corpus.e-f.lex-grammar* *stem* corpus.f-e.sgml freq* psg*
+
SUPPORT_DIR = $(SCRIPT_DIR)/support
GZIP = /usr/bin/gzip
ZCAT = zcat
-EXTRACT_WEIGHTS = $(SUPPORT_DIR)/extract_weights.pl
EXTRACT_GRAMMAR = $(SUPPORT_DIR)/extract_grammar.pl
-SUPPLEMENT_WEIGHTS = $(SUPPORT_DIR)/supplement_weights_file.pl
EXTRACT_VOCAB = $(SUPPORT_DIR)/extract_vocab.pl
+GENERATE_PSG = $(SUPPORT_DIR)/generate_per_sentence_grammars.pl
ORTHONORM_E = $(SCRIPT_DIR)/ortho-norm/$(E_LANG).pl
ORTHONORM_F = $(SCRIPT_DIR)/ortho-norm/$(F_LANG).pl
STEM_F = $(SCRIPT_DIR)/stemmers/$(F_LANG).pl
@@ -81,3 +81,9 @@ corpus.f-e.lex-grammar.gz: bidir.grammars
corpus.e-f.lex-grammar.gz: bidir.grammars
$(EXTRACT_GRAMMAR) 2 bidir.grammars | $(GZIP) -9 > corpus.e-f.lex-grammar.gz
+corpus.f-e.sgml: f.voc corpus.f-e.lex-grammar.gz corpus.f-e
+ $(GENERATE_PSG) f.voc corpus.f-e corpus.f-e.lex-grammar.gz freq_grammar.f-e.gz psg.f-e $@
+
+corpus.e-f.sgml: e.voc corpus.e-f.lex-grammar.gz corpus.e-f
+ $(GENERATE_PSG) e.voc corpus.e-f corpus.e-f.lex-grammar.gz freq_grammar.e-f.gz psg.e-f $@
+
diff --git a/word-aligner/support/make_lex_grammar.pl b/word-aligner/support/make_lex_grammar.pl
index e4cbf7ba..8d38abda 100755
--- a/word-aligner/support/make_lex_grammar.pl
+++ b/word-aligner/support/make_lex_grammar.pl
@@ -35,7 +35,7 @@ my %sizes = ();
while(<M1>) {
chomp;
my ($f, $e, $lp) = split /\s+/;
- $model1{$f}->{$e} = 1e-12 + exp($lp);
+ $model1{$f}->{$e} = sprintf("%.5g", 1e-12 + exp($lp));
$sizes{$f}++;
}
close M1;
@@ -185,7 +185,11 @@ for my $f (sort keys %fdict) {
my $total_eandf = $ecounts{$e} + $fcounts{$f};
my $dice = 2 * $efcount / $total_eandf;
my @feats;
- if (defined $m1 && $ADD_MODEL1) { push @feats, "Model1=$m1"; my $m1d = $m1 * $dice; push @feats, "M1Dice=$m1d"; }
+ if (defined $m1 && $ADD_MODEL1) {
+ push @feats, "Model1=$m1";
+ my $m1d = sprintf("%.5g", $m1 * $dice);
+ push @feats, "M1Dice=$m1d";
+ }
if ($ADD_MODEL1 && !defined $m1) { push @feats, "NoModel1=1"; }
if ($ADD_FIDENT && $efcount > $MIN_FEATURE_COUNT) {
$fc++;