From f4b4aade473f9463dda6fac4baf9c0502d004deb Mon Sep 17 00:00:00 2001 From: graehl Date: Mon, 26 Jul 2010 04:53:15 +0000 Subject: LanguageModelFsa works. TODO: sri context shortening? git-svn-id: https://ws10smt.googlecode.com/svn/trunk@414 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/ff_sample_fsa.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'decoder/ff_sample_fsa.h') diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h index 24f12560..6e6ad30e 100755 --- a/decoder/ff_sample_fsa.h +++ b/decoder/ff_sample_fsa.h @@ -27,6 +27,23 @@ struct WordPenaltyFsa : public FsaFeatureFunctionBase { typedef FeatureFunctionFromFsa WordPenaltyFromFsa; +struct SameFirstLetter : public FsaFeatureFunctionBase { + SameFirstLetter(std::string const& param) : FsaFeatureFunctionBase(1,singleton_sentence("END")) { start[0]='a';h_start[0]=0; } // 1 byte of state, scan final (single) symbol "END" to get final state cost + int markov_order() const { return 1; } + Featval Scan1(WordID w,void const* old_state,void *new_state) const { + char cw=TD::Convert(w)[0]; + char co=*(char const*)old_state; + *(char *)new_state = cw; + return cw==co?1:0; + } + void print_state(std::ostream &o,void const* st) const { + o<<*(char const*)st; + } + static std::string usage(bool param,bool verbose) { + return FeatureFunction::usage_helper("SameFirstLetter","[no args]","1 each time 2 consecutive words start with the same letter",param,verbose); + } +}; + // appears to be buggy right now: give it a bonus weight (+) struct LongerThanPrev : public FsaFeatureFunctionBase { -- cgit v1.2.3