summaryrefslogtreecommitdiff
path: root/decoder/ff_sample_fsa.h
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-26 04:53:15 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-26 04:53:15 +0000
commitf4b4aade473f9463dda6fac4baf9c0502d004deb (patch)
tree7b6641f2733b4d64a9f1e273c0f6f2b8fd757d5f /decoder/ff_sample_fsa.h
parentb2ad842245f1645e4e9f3c60a80a07e13151a560 (diff)
LanguageModelFsa works. TODO: sri context shortening?
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@414 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/ff_sample_fsa.h')
-rwxr-xr-xdecoder/ff_sample_fsa.h17
1 files changed, 17 insertions, 0 deletions
diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h
index 24f12560..6e6ad30e 100755
--- a/decoder/ff_sample_fsa.h
+++ b/decoder/ff_sample_fsa.h
@@ -27,6 +27,23 @@ struct WordPenaltyFsa : public FsaFeatureFunctionBase<WordPenaltyFsa> {
typedef FeatureFunctionFromFsa<WordPenaltyFsa> WordPenaltyFromFsa;
+struct SameFirstLetter : public FsaFeatureFunctionBase<SameFirstLetter> {
+ SameFirstLetter(std::string const& param) : FsaFeatureFunctionBase<SameFirstLetter>(1,singleton_sentence("END")) { start[0]='a';h_start[0]=0; } // 1 byte of state, scan final (single) symbol "END" to get final state cost
+ int markov_order() const { return 1; }
+ Featval Scan1(WordID w,void const* old_state,void *new_state) const {
+ char cw=TD::Convert(w)[0];
+ char co=*(char const*)old_state;
+ *(char *)new_state = cw;
+ return cw==co?1:0;
+ }
+ void print_state(std::ostream &o,void const* st) const {
+ o<<*(char const*)st;
+ }
+ static std::string usage(bool param,bool verbose) {
+ return FeatureFunction::usage_helper("SameFirstLetter","[no args]","1 each time 2 consecutive words start with the same letter",param,verbose);
+ }
+};
+
// appears to be buggy right now: give it a bonus weight (+)
struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {