summaryrefslogtreecommitdiff
path: root/word-aligner/aligner.pl
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2010-12-09 17:04:29 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2010-12-09 17:04:29 -0500
commit35142ef52f15d610ca08fa622b83594cf111ce4a (patch)
treec2196761993353bca47c7073e6cb5d996c4dad8f /word-aligner/aligner.pl
parenta80c69d266886d9911eb91833811d7f8393ac64d (diff)
major refactor of markov features for word alignment
Diffstat (limited to 'word-aligner/aligner.pl')
-rwxr-xr-xword-aligner/aligner.pl14
1 files changed, 8 insertions, 6 deletions
diff --git a/word-aligner/aligner.pl b/word-aligner/aligner.pl
index 81ac4198..f5ee5d3f 100755
--- a/word-aligner/aligner.pl
+++ b/word-aligner/aligner.pl
@@ -120,17 +120,19 @@ grammar=$align_dir/grammars/corpus.$direction.lex-grammar.gz
feature_function=WordPairFeatures $align_dir/grammars/wordpairs.$direction.features.gz
feature_function=LexicalPairIdentity
-feature_function=LexicalPairIdentity C $align_dir/grammars/corpus.class.$first $align_dir/grammars/voc2class.$second
+# stem translation
feature_function=LexicalPairIdentity S $align_dir/grammars/corpus.stemmed.$first $align_dir/grammars/${second}stem.map
+# POS translation
+feature_function=LexicalPairIdentity C $align_dir/grammars/corpus.class.$first $align_dir/grammars/voc2class.$second
feature_function=InputIdentity
feature_function=OutputIdentity
feature_function=RelativeSentencePosition $align_dir/grammars/corpus.class.$first
-# the following two are deprecated
-feature_function=MarkovJump +b
-feature_function=MarkovJumpFClass $align_dir/grammars/corpus.class.$first
+feature_function=NewJump
+feature_function=NewJump use_binned_log_lengths flen
+# jump distance and src and destination class type
+feature_function=NewJump use_binned_log_lengths f0 fprev f:$align_dir/grammars/corpus.class.$first
feature_function=SourceBigram
-# following is deprecated- should reuse SourceBigram the way LexicalPairIdentity does
-feature_function=SourcePOSBigram $align_dir/grammars/corpus.class.$first
+feature_function=SourceBigram SC $align_dir/grammars/corpus.class.$first
EOT
close CDEC;
open AGENDA, ">$stage_dir/agenda.txt" or die "Can't write $stage_dir/agenda.txt: $!";