#ifndef FF_FROM_FSA_H #define FF_FROM_FSA_H #include "ff_fsa.h" #ifndef FSA_FF_DEBUG # define FSA_FF_DEBUG 0 #endif #if FSA_FF_DEBUG # define FSAFFDBG(e,x) FSADBGif(debug(),e,x) # define FSAFFDBGnl(e) FSADBGif_nl(debug(),e) #else # define FSAFFDBG(e,x) # define FSAFFDBGnl(e) #endif /* regular bottom up scorer from Fsa feature uses guarantee about markov order=N to score ASAP encoding of state: if less than N-1 (ctxlen) words either: struct FF : public FsaImpl,FeatureFunctionFromFsa (more efficient) or: struct FF : public FsaFeatureFunctionDynamic,FeatureFunctionFromFsa (code sharing, but double dynamic dispatch) */ template class FeatureFunctionFromFsa : public FeatureFunction { typedef void const* SP; typedef WordID *W; typedef WordID const* WP; public: FeatureFunctionFromFsa(std::string const& param) : ff(param) { debug_=true; // because factory won't set until after we construct. Init(); } static std::string usage(bool args,bool verbose) { return Impl::usage(args,verbose); } Features features() const { return ff.features(); } // Log because it potentially stores info in edge. otherwise the same as regular TraversalFeatures. void TraversalFeaturesLog(const SentenceMetadata& smeta, Hypergraph::Edge& edge, const std::vector& ant_contexts, FeatureVector* features, FeatureVector* estimated_features, void* out_state) const { TRule const& rule=*edge.rule_; Sentence const& e = rule.e(); // items in target side of rule typename Impl::Accum accum,h_accum; if (!ssz) { // special case for no state - but still build up longer phrases to score in case FSA overrides ScanPhraseAccum if (Impl::simple_phrase_score) { // save the effort of building up the contiguous rule phrases - probably can just use the else branch, now that phrases aren't copied but are scanned off e directly. for (int j=0,ee=e.size();j=1) // token ff.ScanAccum(smeta,edge,(WordID)e[j],NULL,NULL,&accum); FSAFFDBG(edge," "<=1) for (int j=0,ee=e.size();;++j) { // items in target side of rule for(;;++j) { if (j>=ee) goto rhs_done; // j may go 1 past ee due to k possibly getting to end if (RHS_WORD(j)) break; } // word @j int k=j; while(k{"<") FSAFFDBG(edge," end="<{"< markov order 0 } int M; // markov order (ctx len) FeatureFunctionFromFsa(); // not allowed. int state_offset; // NOTE: in bytes (add to char* only). store left-words first, then fsa state int ssz; // bytes in fsa state /* state layout: left WordIds, followed by fsa state left words have never been scored. last ones remaining will be scored on FinalTraversalFeatures only. right state is unknown until we have all M left words (less than M means TD::none will pad out right end). unk right state will be zeroed out for proper hash/equal recombination. */ static inline WordID const* left_end(WordID const* left, WordID const* e) { for (;e>left;--e) if (e[-1]!=TD::none) break; //post: [left,e] are the seen left words return e; } inline WP left_end(SP ant) const { return left_end((WP)ant,(WP)fsa_state(ant)); } inline WP left_end_full(SP ant) const { return (WP)fsa_state(ant); } inline SP fsa_state(SP ant) const { return ((char const*)ant+state_offset); } inline void *fsa_state(void * ant) const { return ((char *)ant+state_offset); } }; #ifdef TEST_FSA # include "tdict.cc" # include "ff_sample_fsa.h" int main() { std::cerr<<"Testing left_end...\n"; std::cerr<<"sizeof(FeatureVector)="<