From 8c6536c56c728213b9e1190f0c9f76f7b4948140 Mon Sep 17 00:00:00 2001 From: graehl Date: Fri, 23 Jul 2010 02:27:28 +0000 Subject: bottom-up FF from fsa FF - WordPenaltyFsa - needs debugging git-svn-id: https://ws10smt.googlecode.com/svn/trunk@373 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/ff_from_fsa.h | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100755 decoder/ff_from_fsa.h (limited to 'decoder/ff_from_fsa.h') diff --git a/decoder/ff_from_fsa.h b/decoder/ff_from_fsa.h new file mode 100755 index 00000000..3bd3f070 --- /dev/null +++ b/decoder/ff_from_fsa.h @@ -0,0 +1,189 @@ +#ifndef FF_FROM_FSA_H +#define FF_FROM_FSA_H + +#include "ff_fsa.h" + +/* regular bottom up scorer from Fsa feature + uses guarantee about markov order=N to score ASAP + encoding of state: if less than N-1 (ctxlen) words + + either: + struct FF : public FsaImpl,FeatureFunctionFromFsa (more efficient) + + or: + struct FF : public FsaFeatureFunctionDynamic,FeatureFunctionFromFsa (code sharing, but double dynamic dispatch) +*/ + +template +class FeatureFunctionFromFsa : public FeatureFunction { + typedef void const* SP; + typedef WordID *W; + typedef WordID const* WP; +public: + FeatureFunctionFromFsa(std::string const& param) : ff(param) { + Init(); + } + + static std::string usage(bool args,bool verbose) { + return Impl::usage(args,verbose); + } + + Features features() const { return ff.features(); } + + //TODO: add source span to Fsa FF interface, pass along + //TODO: read/debug VERY CAREFULLY + void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_contexts, + FeatureVector* features, + FeatureVector* estimated_features, + void* out_state) const + { + if (!ssz) { + TRule const& rule=*edge.rule_; + Sentence const& e = rule.e(); + for (int j = 0; j < e.size(); ++j) { // items in target side of rule + if (e[j] < 1) { // variable + } else { + WordID ew=e[j]; + ff.Scan(smeta,ew,0,0,features); + } + } + return; + } + + SP h_start=ff.heuristic_start_state(); + W left_begin=(W)out_state; + W left_out=left_begin; // [left,fsa_state) = left ctx words. if left words aren't full, then null wordid + WP left_full=left_end_full(out_state); + FsaScanner fsa(ff,smeta); + TRule const& rule=*edge.rule_; + Sentence const& e = rule.e(); + for (int j = 0; j < e.size(); ++j) { // items in target side of rule + if (e[j] < 1) { // variable + SP a = ant_contexts[-e[j]]; + WP al=(WP)a; + WP ale=left_end(a); + // scan(al,le) these - the same as below else. macro for now; pull into closure object later? + int nw=ale-al; + if (left_out+nwM) // child had full state already (had a "gap"); if nw==M then we already reached the same state via left word heuristic scan above + fsa.reset(fsa_state(a)); + } else { // single word + WordID ew=e[j]; + // some redundancy: non-vectorized version of above handling of left words of child item + if (left_out score(full left unscored phrase) + AccumFeatures(ff,smeta,begin(ends),end(ends),final_features,rst); // e.g. [ctx for last M words] score("") + } else { // all we have is a single short phrase < M words before adding ends + int nl=lend-l; + Sentence whole(ends.size()+nl); + WordID *w=begin(whole); + wordcpy(w,l,nl); + wordcpy(w+nl,begin(ends),ends.size()); + // whole = left-words + end-phrase + AccumFeatures(ff,smeta,w,end(whole),final_features,ss); + } + } + + bool rule_feature() const { + return StateSize()==0; // Fsa features don't get info about span + } + +private: + Impl ff; + void Init() { +// FeatureFunction::name=Impl::usage(false,false); // already achieved by ff_factory.cc + M=ff.markov_order(); + ssz=ff.state_bytes(); + state_offset=sizeof(WordID)*M; + SetStateSize(ff.state_bytes()+state_offset); + } + int M; // markov order (ctx len) + FeatureFunctionFromFsa() { } + // call this explicitly in constructor body: + int state_offset; // store left-words first, then fsa state + int ssz; // bytes in fsa state + /* + state layout: left WordIds, followed by fsa state + left words have never been scored. last ones remaining will be scored on FinalTraversalFeatures only. + right state is unknown until we have all M left words (less than M means TD::none will pad out right end). unk right state will be zeroed out for proper hash/equal recombination. + */ + + static inline WordID const* left_end(WordID const* left, WordID const* e) { + while (e>left) + if (*--e!=TD::none) break; + //post: [left,e] are the seen left words + return e+1; + } + inline WP left_end(SP ant) const { + return left_end((WP)ant,(WP)fsa_state(ant)); + } + inline WP left_end_full(SP ant) const { + return (WP)fsa_state(ant); + } + inline SP fsa_state(SP ant) const { + return ((char const*)ant+state_offset); + } + inline void *fsa_state(void * ant) const { + return ((char *)ant+state_offset); + } + + void clear_fsa_state(void *ant) const { // when state is unknown + std::memset(fsa_state(ant),0,ssz); + } + + inline void fstatecpy(void *dest,void const* src) const { + std::memcpy(dest,src,ssz); + } + + +}; + +typedef FeatureFunctionFromFsa WordPenaltyFromFsa; + + +#endif -- cgit v1.2.3