diff options
author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-26 01:07:36 +0000 |
---|---|---|
committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-26 01:07:36 +0000 |
commit | b2ad842245f1645e4e9f3c60a80a07e13151a560 (patch) | |
tree | 7cb1d788b29b64964d5eec1f099193f8ed5d8a8f | |
parent | dc78bb0d8590033475404c1950fa49e984e2108a (diff) |
stateful ff_from_fsa works - fixed bug that never moved right state from rightmost variable up to result
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@413 ec762483-ff6d-05da-a07a-a48fb63a330f
-rwxr-xr-x | decoder/ff_from_fsa.h | 21 | ||||
-rwxr-xr-x | decoder/ff_fsa.h | 35 | ||||
-rwxr-xr-x | decoder/ff_sample_fsa.h | 20 | ||||
-rw-r--r-- | decoder/hg.h | 4 |
4 files changed, 48 insertions, 32 deletions
diff --git a/decoder/ff_from_fsa.h b/decoder/ff_from_fsa.h index d32e90df..237e5d0d 100755 --- a/decoder/ff_from_fsa.h +++ b/decoder/ff_from_fsa.h @@ -3,8 +3,8 @@ #include "ff_fsa.h" -#define FSA_FF_DEBUG -#ifdef FSA_FF_DEBUG +#define FSA_FF_DEBUG 0 +#if FSA_FF_DEBUG # define FSAFFDBG(e,x) FSADBGif(debug,e,x) # define FSAFFDBGnl(e) FSADBGif_nl(debug,e) #else @@ -49,7 +49,6 @@ public: FeatureVector* estimated_features, void* out_state) const { - FSAFFDBG(edge,"(FromFsa) "<<name); ff.init_features(features); // estimated_features is fresh if (!ssz) { TRule const& rule=*edge.rule_; @@ -97,8 +96,9 @@ public: } else { // more to score / state to update (left already full) fsa.scan(al,ale,features); } - if (nw>M) // child had full state already (had a "gap"); if nw==M then we already reached the same state via left word heuristic scan above + if (nw==M) // child had full state already fsa.reset(fsa_state(a)); + assert(nw<=M); } else { // single word WordID ew=e[j]; FSAFFDBG(edge,' '<<TD::Convert(ew)); @@ -121,7 +121,7 @@ public: do { *left_out++=TD::none; } while(left_out<left_full); // none-terminate so left_end(out_state) will know how many words } else // or else store final right-state. heuristic was already assigned fstatecpy(out_state,fsa.cs); - FSAFFDBG(edge," = " << describe_state(out_state)<<" "<<(*features)[ff.fid()]<<" h="<<(*estimated_features)[ff.fid()]); + FSAFFDBG(edge," = " << describe_state(out_state)<<" "<<name<<"="<<ff.describe_features(*features)<<" h="<<ff.describe_features(*estimated_features)<<")"); FSAFFDBGnl(edge); } @@ -156,23 +156,24 @@ public: SP ss=ff.start_state(); WP l=(WP)residual_state,lend=left_end(residual_state); SP rst=fsa_state(residual_state); - FSAFFDBG(edge,"(FromFsa) Final "<<name<< " before="<<*final_features); + FSAFFDBG(edge," (final");// "<<name);//<< " before="<<*final_features); + if (lend==rst) { // implying we have an fsa state AccumFeatures(ff,smeta,edge,l,lend,final_features,ss); // e.g. <s> score(full left unscored phrase) - FSAFFDBG(edge," left: "<<ff.describe_state(ss)<<" -> "<<Sentence(l,lend)); + FSAFFDBG(edge," start="<<ff.describe_state(ss)<<"->{"<<Sentence(l,lend)<<"}"); AccumFeatures(ff,smeta,edge,begin(ends),end(ends),final_features,rst); // e.g. [ctx for last M words] score("</s>") - FSAFFDBG(edge," right: "<<ff.describe_state(rst)<<" -> "<<ends); + FSAFFDBG(edge," end="<<ff.describe_state(rst)<<"->{"<<ends<<"}"); } else { // all we have is a single short phrase < M words before adding ends int nl=lend-l; Sentence whole(ends.size()+nl); WordID *w=begin(whole); wordcpy(w,l,nl); wordcpy(w+nl,begin(ends),ends.size()); - FSAFFDBG(edge," score whole sentence: "<<whole); + FSAFFDBG(edge," whole={"<<whole<<"}"); // whole = left-words + end-phrase AccumFeatures(ff,smeta,edge,w,end(whole),final_features,ss); } - FSAFFDBG(edge," = "<<*final_features); + FSAFFDBG(edge,' '<<name<<"="<<ff.describe_features(*final_features)); FSAFFDBGnl(edge); } diff --git a/decoder/ff_fsa.h b/decoder/ff_fsa.h index 1be773b9..e21cbf6f 100755 --- a/decoder/ff_fsa.h +++ b/decoder/ff_fsa.h @@ -6,16 +6,18 @@ state is some fixed width byte array. could actually be a void *, WordID sequence, whatever. - TODO: fsa feature aggregator that presents itself as a single fsa; benefit: when wrapped in ff_from_fsa, only one set of left words is stored. + TODO: fsa feature aggregator that presents itself as a single fsa; benefit: when wrapped in ff_from_fsa, only one set of left words is stored. downside: compared to separate ff, the inside portion of lower-order models is incorporated later. however, the full heuristic is already available and exact for those words. so don't sweat it. + + TODO: state (+ possibly span-specific) custom heuristic, e.g. in "longer than previous word" model, you can expect a higher outside if your state is a word of 2 letters. this is on top of the nice heuristic for the unscored words, of course. in ngrams, the avg prob will be about the same, but if the words possible for a source span are summarized, maybe it's possible to predict. probably not worht the time. */ //SEE ALSO: ff_fsa_dynamic.h, ff_from_fsa.h //TODO: decide whether to use init_features / add_value vs. summing elsewhere + set_value once (or inefficient for from_fsa: sum distinct feature_vectors. but L->R if we only scan 1 word at a time, that's fine -//#define FSA_DEBUG +#define FSA_DEBUG 0 -#ifdef USE_INFO_EDGE +#if USE_INFO_EDGE #define FSA_DEBUG_CERR 0 #else #define FSA_DEBUG_CERR 1 @@ -24,7 +26,7 @@ #define FSA_DEBUG_DEBUG 0 # define FSADBGif(i,e,x) do { if (i) { if (FSA_DEBUG_CERR){std::cerr<<x;} INFO_EDGE(e,x); if (FSA_DEBUG_DEBUG){std::cerr<<"FSADBGif edge.info "<<&e<<" = "<<e.info()<<std::endl;}} } while(0) # define FSADBGif_nl(i,e) do { if (i) { if (FSA_DEBUG_CERR) std::cerr<<std::endl; INFO_EDGE(e,"; "); } } while(0) -#ifdef FSA_DEBUG +#if FSA_DEBUG # include <iostream> # define FSADBG(e,x) FSADBGif(d().debug(),e,x) # define FSADBGnl(e) FSADBGif_nl(d().debug(),e,x) @@ -93,6 +95,11 @@ protected: } public: + // can override to different return type, e.g. just return feats: + Featval describe_features(FeatureVector const& feats) const { + return feats.get(fid_); + } + bool debug() const { return true; } int fid() const { return fid_; } // return the one most important feature (for debugging) std::string name() const { @@ -240,6 +247,8 @@ protected: Base::start.resize(sizeof(State)); Base::h_start.resize(sizeof(State)); } + assert(Base::start.size()==sizeof(State)); + assert(Base::h_start.size()==sizeof(State)); state(Base::start.begin())=s; state(Base::h_start.begin())=heuristic_s; } @@ -254,28 +263,24 @@ public: o<<state(st); } int markov_order() const { return 1; } - Featval ScanT1(WordID w,int prevlen,int &len) const { return 0; } - inline void ScanT(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,int prevlen,int &len,FeatureVector *features) const { - features->maybe_add(d().fid_,d().ScanT1(w,prevlen,len)); + Featval ScanT1(WordID w,St const&,St &) const { return 0; } + inline void ScanT(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,St const& prev_st,St &new_st,FeatureVector *features) const { + features->maybe_add(d().fid_,d().ScanT1(w,prev_st,new_st)); } - inline void Scan(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,void const* st,void *next_state,FeatureVector *features) const { Impl const& im=d(); - FSADBG(edge,"Scan "<<FD::Convert(im.fid_)<<" = "<<(*features)[im.fid_]<<" "<<im.state(st)<<" ->"<<TD::Convert(w)<<" "); - im.ScanT(smeta,edge,w,im.state(st),im.state(next_state),features); - FSADBG(edge,im.state(next_state)<<" = "<<(*features)[im.fid_]); + FSADBG(edge,"Scan "<<FD::Convert(im.fid_)<<" = "<<im.describe_features(*features)<<" "<<im.state(st)<<"->"<<TD::Convert(w)<<" "); + im.ScanT(smeta,edge,w,state(st),state(next_state),features); + FSADBG(edge,state(next_state)<<" = "<<im.describe_features(*features)); FSADBGnl(edge); } }; - - - -// do not use if state size is 0, please. const bool optimize_FsaScanner_zerostate=false; +// do not use if state size is 0. should crash (maybe won't if you set optimize_FsaScanner_zerostate true) template <class FF> struct FsaScanner { // enum {ALIGN=8}; diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h index 2aeaa6de..24f12560 100755 --- a/decoder/ff_sample_fsa.h +++ b/decoder/ff_sample_fsa.h @@ -28,7 +28,7 @@ struct WordPenaltyFsa : public FsaFeatureFunctionBase<WordPenaltyFsa> { typedef FeatureFunctionFromFsa<WordPenaltyFsa> WordPenaltyFromFsa; -// appears to be buggy right now: give it a bonus weight (-) and it overstates how many +// appears to be buggy right now: give it a bonus weight (+) struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> { typedef FsaFeatureFunctionBase<LongerThanPrev> Base; static std::string usage(bool param,bool verbose) { @@ -45,6 +45,15 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> { static inline int state(void const* st) { return *(int const*)st; } +/* int describe_state(void const* st) const { + return state(st); + } +*/ + // only need 1 of the 2 + void print_state(std::ostream &o,void const* st) const { + o<<state(st); + } + static inline int wordlen(WordID w) { return std::strlen(TD::Convert(w)); } @@ -53,14 +62,15 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> { Init(); if (0) { // all this is done in constructor already set_state_bytes(sizeof(int)); - start.resize(state_bytes()); // this is done by set_state_bytes already. - h_start.resize(state_bytes()); + //start.resize(state_bytes());h_start.resize(state_bytes()); // this is done by set_state_bytes already. int ss=3; to_state(start.begin(),&ss,1); ss=4; to_state(h_start.begin(),&ss,1); } - + assert(state_bytes()==sizeof(int)); + assert(start.size()==sizeof(int)); + assert(h_start.size()==sizeof(int)); state(start.begin())=999999; state(h_start.begin())=4; // estimate: anything >4 chars is usually longer than previous @@ -75,7 +85,7 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> { }; // similar example feature; base type exposes stateful type, defines markov_order 1, state size = sizeof(State) -// also buggy right now: give it a bonus weight (-) and it overstates how many +// also buggy right now: give it a bonus weight struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> { typedef FsaTypedBase<int,ShorterThanPrev> Base; static std::string usage(bool param,bool verbose) { diff --git a/decoder/hg.h b/decoder/hg.h index 6dd10584..ea0e8aa1 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -98,7 +98,7 @@ public: void copy_info(Edge const& o) { #if USE_INFO_EDGE - set_info(o.info_.str()); + set_info(o.info_.str()); // by convention, each person putting info here starts with a separator (e.g. space). it's empty if nobody put any info there. #endif } void copy_pod(Edge const& o) { @@ -157,7 +157,7 @@ public: o<<rule_->AsString(mask&RULE_LHS); if (USE_INFO_EDGE) { std::string const& i=info(); - if (mask&&!i.empty()) o << " ||| "<<i; + if (mask&&!i.empty()) o << " |||"<<i; // remember, the initial space is expected as part of i } o<<'}'; } |