#ifndef FF_FROM_FSA_H #define FF_FROM_FSA_H #include "ff_fsa.h" #define FSA_FF_DEBUG 0 #if FSA_FF_DEBUG # define FSAFFDBG(e,x) FSADBGif(debug,e,x) # define FSAFFDBGnl(e) FSADBGif_nl(debug,e) #else # define FSAFFDBG(e,x) # define FSAFFDBGnl(e) #endif /* regular bottom up scorer from Fsa feature uses guarantee about markov order=N to score ASAP encoding of state: if less than N-1 (ctxlen) words either: struct FF : public FsaImpl,FeatureFunctionFromFsa (more efficient) or: struct FF : public FsaFeatureFunctionDynamic,FeatureFunctionFromFsa (code sharing, but double dynamic dispatch) */ template class FeatureFunctionFromFsa : public FeatureFunction { typedef void const* SP; typedef WordID *W; typedef WordID const* WP; public: FeatureFunctionFromFsa(std::string const& param) : ff(param) { debug=true; // because factory won't set until after we construct. Init(); } static std::string usage(bool args,bool verbose) { return Impl::usage(args,verbose); } Features features() const { return ff.features(); } //TODO: add source span to Fsa FF interface, pass along //TODO: read/debug VERY CAREFULLY void TraversalFeaturesLog(const SentenceMetadata& smeta, Hypergraph::Edge& edge, const std::vector& ant_contexts, FeatureVector* features, FeatureVector* estimated_features, void* out_state) const { ff.init_features(features); // estimated_features is fresh if (!ssz) { TRule const& rule=*edge.rule_; Sentence const& e = rule.e(); for (int j = 0; j < e.size(); ++j) { // items in target side of rule if (e[j] < 1) { // variable } else { WordID ew=e[j]; FSAFFDBG(edge,' '< fsa(ff,smeta,edge); TRule const& rule=*edge.rule_; Sentence const& e = rule.e(); for (int j = 0; j < e.size(); ++j) { // items in target side of rule if (e[j] < 1) { // variable SP a = ant_contexts[-e[j]]; FSAFFDBG(edge,' '<{"<") FSAFFDBG(edge," end="<{"< markov order 0 } int M; // markov order (ctx len) FeatureFunctionFromFsa(); // not allowed. int state_offset; // NOTE: in bytes (add to char* only). store left-words first, then fsa state int ssz; // bytes in fsa state /* state layout: left WordIds, followed by fsa state left words have never been scored. last ones remaining will be scored on FinalTraversalFeatures only. right state is unknown until we have all M left words (less than M means TD::none will pad out right end). unk right state will be zeroed out for proper hash/equal recombination. */ static inline WordID const* left_end(WordID const* left, WordID const* e) { for (;e>left;--e) if (e[-1]!=TD::none) break; //post: [left,e] are the seen left words return e; } inline WP left_end(SP ant) const { return left_end((WP)ant,(WP)fsa_state(ant)); } inline WP left_end_full(SP ant) const { return (WP)fsa_state(ant); } inline SP fsa_state(SP ant) const { return ((char const*)ant+state_offset); } inline void *fsa_state(void * ant) const { return ((char *)ant+state_offset); } void clear_fsa_state(void *ant) const { // when state is unknown std::memset(fsa_state(ant),0,ssz); } inline void fstatecpy(void *ant,void const* src) const { std::memcpy(fsa_state(ant),src,ssz); } }; #ifdef TEST_FSA # include "tdict.cc" # include "ff_sample_fsa.h" int main() { std::cerr<<"Testing left_end...\n"; std::cerr<<"sizeof(FeatureVector)="<