From b2cbd45b1f8b1d2194624d43ffee12bf53d56a4d Mon Sep 17 00:00:00 2001 From: graehl Date: Wed, 28 Jul 2010 05:25:56 +0000 Subject: fsa lm phrase mystery remains, but bool fsa::simple_phrase_score indicates whether stateless features should copy phrases from rules (e.g. unigram lm) git-svn-id: https://ws10smt.googlecode.com/svn/trunk@444 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/ff_from_fsa.h | 72 ++++++++++++++++++++++++++++++--------------------- decoder/ff_fsa.h | 55 ++++++++++++++++++++++----------------- decoder/ff_lm.cc | 17 ++++++++++++ decoder/ff_lm_fsa.h | 11 +++++--- decoder/sentences.h | 3 ++- 5 files changed, 99 insertions(+), 59 deletions(-) diff --git a/decoder/ff_from_fsa.h b/decoder/ff_from_fsa.h index 7fa6be67..c517ec64 100755 --- a/decoder/ff_from_fsa.h +++ b/decoder/ff_from_fsa.h @@ -40,7 +40,7 @@ public: Features features() const { return ff.features(); } - // Log because it + // Log because it potentially stores info in edge. otherwise the same as regular TraversalFeatures. void TraversalFeaturesLog(const SentenceMetadata& smeta, Hypergraph::Edge& edge, const std::vector& ant_contexts, @@ -51,62 +51,74 @@ public: TRule const& rule=*edge.rule_; Sentence const& e = rule.e(); typename Impl::Accum accum,h_accum; - if (!ssz) { - Sentence phrase; - phrase.reserve(e.size()); - for (int j=0,je=e.size();;++j) { // items in target side of rule - if (je==j || e[j]<1) { // end or variable - if (phrase.size()) { - FSAFFDBG(edge," ["<=1) // token + ff.ScanAccum(smeta,edge,(WordID)e[j],NULL,NULL,&accum); + FSAFFDBG(edge," "< - inline void ScanAccum(SentenceMetadata const& smeta,const Hypergraph::Edge& edge, + inline void ScanAccum(SentenceMetadata const& smeta,Hypergraph::Edge const& edge, WordID w,void const* state,void *next_state,Accum *a) const { Add(d().Scan1Meta(smeta,edge,w,state,next_state),a); } // bounce back and forth between two state vars starting at cs, returning end state location. if we required src=dest addr safe state updating, this concept wouldn't need to exist. - // recommend you override this if you score phrases differently than word-by-word. + // required that you override this if you score phrases differently than word-by-word, however, you can just use the SCAN_PHRASE_ACCUM_OVERRIDE macro to do that in terms of ScanPhraseAccum template - void *ScanPhraseAccumBounce(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID const* i, WordID const* end,void *cs,void *ns,Accum *accum) const { + void *ScanPhraseAccumBounce(SentenceMetadata const& smeta,Hypergraph::Edge const& edge,WordID const* i, WordID const* end,void *cs,void *ns,Accum *accum) const { // extra code - IT'S FOR EFFICIENCY, MAN! IT'S OK! definitely no bugs here. if (!ssz) { for (;i M. so, set this false if you provide ScanPhraseAccum (SCAN_PHRASE_ACCUM_OVERRIDE macro does this) + + // override this (and use SCAN_PHRASE_ACCUM_OVERRIDE ) if you want e.g. maximum possible order ngram scores with markov_order < n-1. in the future SparseFeatureAccumulator will probably be the only option for type-erased FSA ffs. // note you'll still have to override ScanAccum - // override this (and SCAN_PHRASE_ACCUM_OVERRIDE ) if you want e.g. maximum possible order ngram scores with markov_order < n-1. in the future SparseFeatureAccumulator will probably be the only option for type-erased FSA ffs. you will be adding to accum, not setting template - inline void ScanPhraseAccum(SentenceMetadata const& smeta,const Hypergraph::Edge& edge, - WordID const* i, WordID const* end,void const* state,void *next_state,Accum *accum) const { + void ScanPhraseAccum(SentenceMetadata const& smeta,Hypergraph::Edge const & edge, + WordID const* i, WordID const* end, + void const* state,void *next_state,Accum *accum) const { if (!ssz) { for (;i \ - void *ScanPhraseAccumBounce(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID const* i, WordID const* end,void *cs,void *ns,Accum *accum) const { \ - ScanPhraseAccum(smeta,edge,i,end,cs,ns,accum); \ + void *ScanPhraseAccumBounce(SentenceMetadata const& smeta,Hypergraph::Edge const& edge,WordID const* i, WordID const* end,void *cs,void *ns,Accum *accum) const { \ + ScanPhraseAccum(smeta,edge,i,end,cs,ns,accum); \ return ns; \ } \ template \ - inline void ScanPhraseAccumOnly(SentenceMetadata const& smeta,const Hypergraph::Edge& edge, \ - WordID const* i, WordID const* end,void const* state,Accum *accum) const { \ - char s2[ssz]; ScanPhraseAccum(smeta,edge,i,end,state,(void*)s2,accum); \ + void ScanPhraseAccumOnly(SentenceMetadata const& smeta,Hypergraph::Edge const& edge, \ + WordID const* i, WordID const* end, \ + void const* state,Accum *accum) const { \ + char s2[ssz]; ScanPhraseAccum(smeta,edge,i,end,state,(void*)s2,accum); \ } // override this or bounce along with above. note: you can just call ScanPhraseAccum // doesn't set state (for heuristic in ff_from_fsa) template - inline void ScanPhraseAccumOnly(SentenceMetadata const& smeta,const Hypergraph::Edge& edge, - WordID const* i, WordID const* end,void const* state,Accum *accum) const { + void ScanPhraseAccumOnly(SentenceMetadata const& smeta,Hypergraph::Edge const& edge, + WordID const* i, WordID const* end, + void const* state,Accum *accum) const { char s1[ssz]; char s2[ssz]; state_copy(s1,state); @@ -354,20 +361,20 @@ public: } // or this - Featval ScanT1(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,St const& from ,St & to) const { + Featval ScanT1(SentenceMetadata const& /* smeta */,Hypergraph::Edge const& /* edge */,WordID w,St const& from ,St & to) const { return d().ScanT1S(w,from,to); } // or this (most general) template - inline void ScanT(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,St const& prev_st,St &new_st,Accum *a) const { + inline void ScanT(SentenceMetadata const& smeta,Hypergraph::Edge const& edge,WordID w,St const& prev_st,St &new_st,Accum *a) const { Add(d().ScanT1(smeta,edge,w,prev_st,new_st),a); } // note: you're on your own when it comes to Phrase overrides. see FsaFeatureFunctionBase. sorry. template - inline void ScanAccum(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,void const* st,void *next_state,Accum *a) const { + inline void ScanAccum(SentenceMetadata const& smeta,Hypergraph::Edge const& edge,WordID w,void const* st,void *next_state,Accum *a) const { Impl const& im=d(); FSADBG(edge,"Scan "<describe(im)<<" "<"<