diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2010-12-09 17:04:29 -0500 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2010-12-09 17:04:29 -0500 |
commit | 9a8cbe4db88e63378b6d3c4ec96438819f1f1131 (patch) | |
tree | abf1a23739a033eaabd62f61e39ac249d9cf7717 /word-aligner/aligner.pl | |
parent | 61bfaf15c02a0555d8ffa5dd4e6ae32f09354610 (diff) |
major refactor of markov features for word alignment
Diffstat (limited to 'word-aligner/aligner.pl')
-rwxr-xr-x | word-aligner/aligner.pl | 14 |
1 files changed, 8 insertions, 6 deletions
diff --git a/word-aligner/aligner.pl b/word-aligner/aligner.pl index 81ac4198..f5ee5d3f 100755 --- a/word-aligner/aligner.pl +++ b/word-aligner/aligner.pl @@ -120,17 +120,19 @@ grammar=$align_dir/grammars/corpus.$direction.lex-grammar.gz feature_function=WordPairFeatures $align_dir/grammars/wordpairs.$direction.features.gz feature_function=LexicalPairIdentity -feature_function=LexicalPairIdentity C $align_dir/grammars/corpus.class.$first $align_dir/grammars/voc2class.$second +# stem translation feature_function=LexicalPairIdentity S $align_dir/grammars/corpus.stemmed.$first $align_dir/grammars/${second}stem.map +# POS translation +feature_function=LexicalPairIdentity C $align_dir/grammars/corpus.class.$first $align_dir/grammars/voc2class.$second feature_function=InputIdentity feature_function=OutputIdentity feature_function=RelativeSentencePosition $align_dir/grammars/corpus.class.$first -# the following two are deprecated -feature_function=MarkovJump +b -feature_function=MarkovJumpFClass $align_dir/grammars/corpus.class.$first +feature_function=NewJump +feature_function=NewJump use_binned_log_lengths flen +# jump distance and src and destination class type +feature_function=NewJump use_binned_log_lengths f0 fprev f:$align_dir/grammars/corpus.class.$first feature_function=SourceBigram -# following is deprecated- should reuse SourceBigram the way LexicalPairIdentity does -feature_function=SourcePOSBigram $align_dir/grammars/corpus.class.$first +feature_function=SourceBigram SC $align_dir/grammars/corpus.class.$first EOT close CDEC; open AGENDA, ">$stage_dir/agenda.txt" or die "Can't write $stage_dir/agenda.txt: $!"; |