#ifndef FF_FROM_FSA_H #define FF_FROM_FSA_H #include "ff_fsa.h" /* regular bottom up scorer from Fsa feature uses guarantee about markov order=N to score ASAP encoding of state: if less than N-1 (ctxlen) words either: struct FF : public FsaImpl,FeatureFunctionFromFsa (more efficient) or: struct FF : public FsaFeatureFunctionDynamic,FeatureFunctionFromFsa (code sharing, but double dynamic dispatch) */ template class FeatureFunctionFromFsa : public FeatureFunction { typedef void const* SP; typedef WordID *W; typedef WordID const* WP; public: FeatureFunctionFromFsa(std::string const& param) : ff(param) { Init(); } static std::string usage(bool args,bool verbose) { return Impl::usage(args,verbose); } Features features() const { return ff.features(); } //TODO: add source span to Fsa FF interface, pass along //TODO: read/debug VERY CAREFULLY void TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, const std::vector& ant_contexts, FeatureVector* features, FeatureVector* estimated_features, void* out_state) const { ff.init_features(features); // estimated_features is fresh if (!ssz) { TRule const& rule=*edge.rule_; Sentence const& e = rule.e(); for (int j = 0; j < e.size(); ++j) { // items in target side of rule if (e[j] < 1) { // variable } else { WordID ew=e[j]; ff.Scan(smeta,ew,0,0,features); } } return; } SP h_start=ff.heuristic_start_state(); W left_begin=(W)out_state; W left_out=left_begin; // [left,fsa_state) = left ctx words. if left words aren't full, then null wordid WP left_full=left_end_full(out_state); FsaScanner fsa(ff,smeta); TRule const& rule=*edge.rule_; Sentence const& e = rule.e(); for (int j = 0; j < e.size(); ++j) { // items in target side of rule if (e[j] < 1) { // variable SP a = ant_contexts[-e[j]]; WP al=(WP)a; WP ale=left_end(a); // scan(al,le) these - the same as below else. macro for now; pull into closure object later? int nw=ale-al; if (left_out+nwM) // child had full state already (had a "gap"); if nw==M then we already reached the same state via left word heuristic scan above fsa.reset(fsa_state(a)); } else { // single word WordID ew=e[j]; // some redundancy: non-vectorized version of above handling of left words of child item if (left_out score(full left unscored phrase) AccumFeatures(ff,smeta,begin(ends),end(ends),final_features,rst); // e.g. [ctx for last M words] score("") } else { // all we have is a single short phrase < M words before adding ends int nl=lend-l; Sentence whole(ends.size()+nl); WordID *w=begin(whole); wordcpy(w,l,nl); wordcpy(w+nl,begin(ends),ends.size()); // whole = left-words + end-phrase AccumFeatures(ff,smeta,w,end(whole),final_features,ss); } } bool rule_feature() const { return StateSize()==0; // Fsa features don't get info about span } static void test() { WordID w1[1],w1b[1],w2[2]; w1[0]=w2[0]=TD::Convert("hi"); w2[1]=w1b[0]=TD::none; assert(left_end(w1,w1+1)==w1+1); assert(left_end(w1b,w1b+1)==w1b); assert(left_end(w2,w2+2)==w2+1); } private: Impl ff; void Init() { // FeatureFunction::name=Impl::usage(false,false); // already achieved by ff_factory.cc M=ff.markov_order(); ssz=ff.state_bytes(); state_offset=sizeof(WordID)*M; SetStateSize(ff.state_bytes()+state_offset); } int M; // markov order (ctx len) FeatureFunctionFromFsa(); // not allowed. int state_offset; // store left-words first, then fsa state int ssz; // bytes in fsa state /* state layout: left WordIds, followed by fsa state left words have never been scored. last ones remaining will be scored on FinalTraversalFeatures only. right state is unknown until we have all M left words (less than M means TD::none will pad out right end). unk right state will be zeroed out for proper hash/equal recombination. */ static inline WordID const* left_end(WordID const* left, WordID const* e) { for (;e>left;--e) if (e[-1]!=TD::none) break; //post: [left,e] are the seen left words return e; } inline WP left_end(SP ant) const { return left_end((WP)ant,(WP)fsa_state(ant)); } inline WP left_end_full(SP ant) const { return (WP)fsa_state(ant); } inline SP fsa_state(SP ant) const { return ((char const*)ant+state_offset); } inline void *fsa_state(void * ant) const { return ((char *)ant+state_offset); } void clear_fsa_state(void *ant) const { // when state is unknown std::memset(fsa_state(ant),0,ssz); } inline void fstatecpy(void *dest,void const* src) const { std::memcpy(dest,src,ssz); } }; #ifdef TEST_FSA # include "tdict.cc" # include "ff_sample_fsa.h" int main() { std::cerr<<"Testing left_end...\n"; WordPenaltyFromFsa::test(); return 0; } #endif #endif