summaryrefslogtreecommitdiff
path: root/word-aligner
diff options
context:
space:
mode:
Diffstat (limited to 'word-aligner')
-rw-r--r--word-aligner/makefiles/makefile.grammars18
-rwxr-xr-xword-aligner/support/generate_word_pair_features.pl11
2 files changed, 14 insertions, 15 deletions
diff --git a/word-aligner/makefiles/makefile.grammars b/word-aligner/makefiles/makefile.grammars
index 60417ec5..be0644df 100644
--- a/word-aligner/makefiles/makefile.grammars
+++ b/word-aligner/makefiles/makefile.grammars
@@ -54,28 +54,28 @@ voc2class.e: corpus.e $(MKCLS)
voc2class.f: corpus.f $(MKCLS)
$(MKCLS) -c$(NCLASSES) -n10 -pcorpus.f -Vvoc2class.f opt
-corpus.class.e: corpus.e voc2class.e $(CLASSIFY)
+corpus.class.e: corpus.e voc2class.e
$(CLASSIFY) voc2class.e corpus.e > $@
-corpus.class.f: corpus.f voc2class.f $(CLASSIFY)
+corpus.class.f: corpus.f voc2class.f
$(CLASSIFY) voc2class.f corpus.f > $@
-corpus.f-e: corpus.f corpus.e $(MERGE_CORPUS)
+corpus.f-e: corpus.f corpus.e
$(MERGE_CORPUS) corpus.f corpus.e > $@
-corpus.e-f: corpus.f corpus.e $(MERGE_CORPUS)
+corpus.e-f: corpus.f corpus.e
$(MERGE_CORPUS) corpus.e corpus.f > $@
-corpus.f-e.model1: corpus.f-e $(MODEL1)
- $(MODEL1) -v -V corpus.f-e > $@
+corpus.f-e.model1: corpus.f-e
+ $(MODEL1) -v corpus.f-e > $@
-corpus.e-f.model1: corpus.e-f $(MODEL1)
+corpus.e-f.model1: corpus.e-f
$(MODEL1) -v -V corpus.e-f > $@
-corpus.f-e.full-model1: corpus.f-e $(MODEL1)
+corpus.f-e.full-model1: corpus.f-e
$(MODEL1) -t -999999 -v -V corpus.f-e > $@
-corpus.e-f.full-model1: corpus.e-f $(MODEL1)
+corpus.e-f.full-model1: corpus.e-f
$(MODEL1) -t -999999 -v -V corpus.e-f > $@
corpus.f-e.lex-grammar.gz: corpus.f-e corpus.f-e.model1 corpus.e-f.model1
diff --git a/word-aligner/support/generate_word_pair_features.pl b/word-aligner/support/generate_word_pair_features.pl
index b722ee49..b28f6feb 100755
--- a/word-aligner/support/generate_word_pair_features.pl
+++ b/word-aligner/support/generate_word_pair_features.pl
@@ -10,9 +10,7 @@ my %fclass = ();
load_classes($class_e, \%eclass);
load_classes($class_f, \%fclass);
-our @IDENT_BINS = qw (Ident0 Ident1 Ident2 Ident3 Ident4 Ident5 Ident6 Ident7 Ident8_9 Ident8_9 Ident10_11 Ident10_11 Ident12_14 Ident12_14 Ident12_14);
-die unless scalar @IDENT_BINS == 15;
-our $MAX_IDENT_BIN = 'IdentGT' . scalar @IDENT_BINS;
+our @IDENT_BINS = qw (Ident0 Ident1 Ident2 Ident3 Ident4 Ident5 Ident6 Ident7 Ident8 Ident9);
my $MIN_MAGNITUDE = 0.001; # minimum value of a feature
@@ -203,8 +201,8 @@ for my $f (sort keys %fdict) {
}
}
}
- my $f_num = ($of =~ /^-?\d[0-9\.\,]+%?$/ && (length($of) > 3));
- my $e_num = ($oe =~ /^-?\d[0-9\.\,]+%?$/ && (length($oe) > 3));
+ my $f_num = ($of =~ /^-?\d[0-9\.\,]+%?$/ && (length($of) > 2));
+ my $e_num = ($oe =~ /^-?\d[0-9\.\,]+%?$/ && (length($oe) > 2));
my $both_non_numeric = (!$e_num && !$f_num);
unless ($total_eandf > 20) {
@@ -425,7 +423,8 @@ sub dlenbin {
sub identbin {
my $x = shift;
if ($x == 0) { die; }
- if ($x > scalar @IDENT_BINS) { return $MAX_IDENT_BIN; }
+ $x = int(log($x + 1) / log(1.3));
+ if ($x >= scalar @IDENT_BINS) { return $IDENT_BINS[-1]; }
return $IDENT_BINS[$x];
}