diff options
author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-23 04:21:51 +0000 |
---|---|---|
committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-23 04:21:51 +0000 |
commit | 90d6674878bfc231012bb8eb2a3eaa183eee5220 (patch) | |
tree | bf67f2bb74a58088b8a77a4ab37c7b46e8d95da1 /decoder/ff_sample_fsa.h | |
parent | bcf7ad8a05799172ccd5d6ce73ddcd2f4c4a174e (diff) |
fsa: stateless works, debug sample bigram {Longer,Shorter}ThanPrev
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@375 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/ff_sample_fsa.h')
-rwxr-xr-x | decoder/ff_sample_fsa.h | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h new file mode 100755 index 00000000..13a25387 --- /dev/null +++ b/decoder/ff_sample_fsa.h @@ -0,0 +1,111 @@ +#ifndef FF_SAMPLE_FSA_H +#define FF_SAMPLE_FSA_H + +#include "ff_from_fsa.h" + +// example: feature val = -1 * # of target words +struct WordPenaltyFsa : public FsaFeatureFunctionBase { + static std::string usage(bool param,bool verbose) { + return FeatureFunction::usage_helper( + "WordPenaltyFsa","","-1 per target word" + ,param,verbose); + } + + WordPenaltyFsa(std::string const& param) { + init_fid(usage(false,false)); + return; + //below are all defaults: + set_state_bytes(0); + start.clear(); + h_start.clear(); + } + static const float val_per_target_word=-1; + // move from state to next_state after seeing word x, while emitting features->add_value(fid,val) possibly with duplicates. state and next_state may be same memory. + void Scan(SentenceMetadata const& smeta,WordID w,void const* state,void *next_state,FeatureVector *features) const { + features->add_value(fid_,val_per_target_word); + } +}; + +typedef FeatureFunctionFromFsa<WordPenaltyFsa> WordPenaltyFromFsa; + + +// +struct LongerThanPrev : public FsaFeatureFunctionBase { + static std::string usage(bool param,bool verbose) { + return FeatureFunction::usage_helper( + "LongerThanPrev", + "", + "stupid example stateful (bigram) feature: -1 per target word that's longer than the previous word (always fires for first word of sentence)", + param,verbose); + } + + static inline int &wordlen(void *state) { + return *(int*)state; + } + static inline int wordlen(void const* state) { + return *(int const*)state; + } + static inline int wordlen(WordID w) { + return std::strlen(TD::Convert(w)); + } + int markov_order() const { return 1; } + LongerThanPrev(std::string const& param) { + init_fid(usage(false,false)); + set_state_bytes(sizeof(int)); +// start.resize(state_bytes()); // this is done by set_state_bytes already. +// h_start.resize(state_bytes()); +// int ss=-1; +// wordcpy((WordID*)start.begin(),&ss,&ss+1); + //to_state(start.begin(),&ss,1); + wordlen(start.begin())=-1; // same as above. + wordlen(h_start.begin())=4; // estimate: anything >4 chars is usually longer than previous + } + + static const float val_per_target_word=-1; + void Scan(SentenceMetadata const& smeta,WordID w,void const* state,void *next_state,FeatureVector *features) const { + int prevlen=wordlen(state); + int len=wordlen(w); + wordlen(next_state)=len; + if (len>prevlen) + features->add_value(fid_,val_per_target_word); + } + +}; + +// similar example feature; base type exposes stateful type, defines markov_order 1, state size = sizeof(State) +struct ShorterThanPrev : public FsaTypedBase<int>,FsaTypedScan<ShorterThanPrev> { + typedef int State; // defines # of bytes in state and return type of state(void *) + static std::string usage(bool param,bool verbose) { + return FeatureFunction::usage_helper( + "ShorterThanPrev", + "", + "stupid example stateful (bigram) feature: -1 per target word that's shorter than the previous word (always fires for end of sentence)", + param,verbose); + } + + static inline int wordlen(WordID w) { + return std::strlen(TD::Convert(w)); + } + ShorterThanPrev(std::string const& param) { + init_fid(usage(false,false)); + end_phrase_.push_back(TD::Convert("")); // this triggers end of sentence firing + set_starts(-1,4); // estimate: anything <4 chars is usually shorter than previous + } + + static const float val_per_target_word=-1; + // evil anti-google int & len out-param: + void Scan(SentenceMetadata const& smeta,WordID w,int prevlen,int &len,FeatureVector *features) const { + len=wordlen(w); + if (len<prevlen) + features->add_value(fid_,val_per_target_word); + } + + // already provided by FsaTypedScan<ShorterThanPrev> + void Scan(SentenceMetadata const& smeta,WordID w,void const* st,void *next_state,FeatureVector *features) const { + Scan(smeta,w,state(st),state(next_state),features); + } + +}; + + +#endif |