From f9859ad4116733e145d7b8eb31c3cc9318ff7564 Mon Sep 17 00:00:00 2001 From: "graehl@gmail.com" Date: Mon, 2 Aug 2010 07:57:23 +0000 Subject: fake tdict names for non-ids, push viterbi cost to root in hg, store as feature. type erased fsa feature via virtual interface. made lexical_cast assume C locale for speed. git-svn-id: https://ws10smt.googlecode.com/svn/trunk@465 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/ff_fsa.h | 89 ++++++-------------------------------------------------- 1 file changed, 9 insertions(+), 80 deletions(-) (limited to 'decoder/ff_fsa.h') diff --git a/decoder/ff_fsa.h b/decoder/ff_fsa.h index de777fd5..6c1294f8 100755 --- a/decoder/ff_fsa.h +++ b/decoder/ff_fsa.h @@ -47,19 +47,14 @@ # define FSADBGnl(e) #endif -#include +#include "fast_lexical_cast.hpp" #include -#include //C99 #include #include "ff.h" #include "sparse_vector.h" -#include "value_array.h" // used to hold state #include "tdict.h" #include "hg.h" -#include "sentences.h" -#include "feature_accum.h" - -typedef ValueArray Bytes; +#include "ff_fsa_data.h" /* usage: see ff_sample_fsa.h or ff_lm_fsa.h @@ -68,8 +63,9 @@ usage: see ff_sample_fsa.h or ff_lm_fsa.h */ + template -struct FsaFeatureFunctionBase { +struct FsaFeatureFunctionBase : public FsaFeatureFunctionData { // CALL 1 of these MANUALLY (because feature name(s) may depend on param, it's not done in ctor) void Init(std::string const& fname="") { fid_=FD::Convert(fname.empty()?name():fname); @@ -82,54 +78,9 @@ struct FsaFeatureFunctionBase { Impl const& d() const { return static_cast(*this); } Impl & d() { return static_cast(*this); } -protected: - int ssz; // don't forget to set this. default 0 (it may depend on params of course) - Bytes start,h_start; // start state and estimated-features (heuristic) start state. set these. default empty. - Sentence end_phrase_; // words appended for final traversal (final state cost is assessed using Scan) e.g. "" for lm. - void set_state_bytes(int sb=0) { - if (start.size()!=sb) start.resize(sb); - if (h_start.size()!=sb) h_start.resize(sb); - ssz=sb; - } - void set_end_phrase(WordID single) { - end_phrase_=singleton_sentence(single); - } - - inline void static to_state(void *state,char const* begin,char const* end) { - std::memcpy(state,begin,end-begin); - } - inline void static to_state(void *state,char const* begin,int n) { - std::memcpy(state,begin,n); - } - template - inline void static to_state(void *state,T const* begin,int n=1) { - to_state(state,(char const*)begin,n*sizeof(T)); - } - template - inline void static to_state(void *state,T const* begin,T const* end) { - to_state(state,(char const*)begin,(char const*)end); - } - - inline static char hexdigit(int i) { - int j=i-10; - return j>=0?'a'+j:'0'+i; - } - inline static void print_hex_byte(std::ostream &o,unsigned c) { - o<>4); - o<set_value(fid,val) possibly with duplicates. state and next_state will never be the same memory. //TODO: decide if we want to require you to support dest same as src, since that's how we use it most often in ff_from_fsa bottom-up wrapper (in l->r scoring, however, distinct copies will be the rule), and it probably wouldn't be too hard for most people to support. however, it's good to hide the complexity here, once (see overly clever FsaScan loop that swaps src/dest addresses repeatedly to scan a sequence by effectively swapping) @@ -206,10 +143,6 @@ protected: return d().Scan1(w,state,next_state); } public: - template - static inline T* state_as(void *p) { return (T*)p; } - template - static inline T const* state_as(void const* p) { return (T*)p; } // must override this or Scan1Meta or Scan1 template @@ -307,21 +240,18 @@ public: d().ScanPhraseAccumBounce(smeta,edge,i,end,(void*)s1,(void*)s2,accum); } + // for single-feat only. but will work for different accums template - inline void Add(Featval v,Accum *a) const { // for single-feat only. but will work for different accums + inline void Add(Featval v,Accum *a) const { a->Add(fid_,v); } - inline void Add(Featval v,SingleFeatureAccumulator *a) const { - a->Add(v); - } - - inline void set_feat(FeatureVector *features,Featval v) const { features->set_value(fid_,v); } // don't set state-bytes etc. in ctor because it may depend on parsing param string - FsaFeatureFunctionBase(int statesz=0,Sentence const& end_sentence_phrase=Sentence()) : ssz(statesz),start(statesz),h_start(statesz),end_phrase_(end_sentence_phrase) {} + FsaFeatureFunctionBase(int statesz=0,Sentence const& end_sentence_phrase=Sentence()) : + FsaFeatureFunctionData(statesz,end_sentence_phrase) { } }; @@ -398,7 +328,6 @@ public: FSADBG(edge,state(next_state)<<" = "<describe(im)); FSADBGnl(edge); } - }; -- cgit v1.2.3