summaryrefslogtreecommitdiff
path: root/word-aligner/aligner.pl
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2010-12-09 17:04:29 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2010-12-09 17:04:29 -0500
commit9a8cbe4db88e63378b6d3c4ec96438819f1f1131 (patch)
treeabf1a23739a033eaabd62f61e39ac249d9cf7717 /word-aligner/aligner.pl
parent61bfaf15c02a0555d8ffa5dd4e6ae32f09354610 (diff)
major refactor of markov features for word alignment
Diffstat (limited to 'word-aligner/aligner.pl')
-rwxr-xr-xword-aligner/aligner.pl14
1 files changed, 8 insertions, 6 deletions
diff --git a/word-aligner/aligner.pl b/word-aligner/aligner.pl
index 81ac4198..f5ee5d3f 100755
--- a/word-aligner/aligner.pl
+++ b/word-aligner/aligner.pl
@@ -120,17 +120,19 @@ grammar=$align_dir/grammars/corpus.$direction.lex-grammar.gz
feature_function=WordPairFeatures $align_dir/grammars/wordpairs.$direction.features.gz
feature_function=LexicalPairIdentity
-feature_function=LexicalPairIdentity C $align_dir/grammars/corpus.class.$first $align_dir/grammars/voc2class.$second
+# stem translation
feature_function=LexicalPairIdentity S $align_dir/grammars/corpus.stemmed.$first $align_dir/grammars/${second}stem.map
+# POS translation
+feature_function=LexicalPairIdentity C $align_dir/grammars/corpus.class.$first $align_dir/grammars/voc2class.$second
feature_function=InputIdentity
feature_function=OutputIdentity
feature_function=RelativeSentencePosition $align_dir/grammars/corpus.class.$first
-# the following two are deprecated
-feature_function=MarkovJump +b
-feature_function=MarkovJumpFClass $align_dir/grammars/corpus.class.$first
+feature_function=NewJump
+feature_function=NewJump use_binned_log_lengths flen
+# jump distance and src and destination class type
+feature_function=NewJump use_binned_log_lengths f0 fprev f:$align_dir/grammars/corpus.class.$first
feature_function=SourceBigram
-# following is deprecated- should reuse SourceBigram the way LexicalPairIdentity does
-feature_function=SourcePOSBigram $align_dir/grammars/corpus.class.$first
+feature_function=SourceBigram SC $align_dir/grammars/corpus.class.$first
EOT
close CDEC;
open AGENDA, ">$stage_dir/agenda.txt" or die "Can't write $stage_dir/agenda.txt: $!";