From f9859ad4116733e145d7b8eb31c3cc9318ff7564 Mon Sep 17 00:00:00 2001 From: "graehl@gmail.com" Date: Mon, 2 Aug 2010 07:57:23 +0000 Subject: fake tdict names for non-ids, push viterbi cost to root in hg, store as feature. type erased fsa feature via virtual interface. made lexical_cast assume C locale for speed. git-svn-id: https://ws10smt.googlecode.com/svn/trunk@465 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/ff_fsa_data.h | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100755 decoder/ff_fsa_data.h (limited to 'decoder/ff_fsa_data.h') diff --git a/decoder/ff_fsa_data.h b/decoder/ff_fsa_data.h new file mode 100755 index 00000000..66d2cca8 --- /dev/null +++ b/decoder/ff_fsa_data.h @@ -0,0 +1,105 @@ +#ifndef FF_FSA_DATA_H +#define FF_FSA_DATA_H + +#include //C99 +#include +#include "sentences.h" +#include "feature_accum.h" +#include "value_array.h" + +typedef ValueArray Bytes; + +// stuff I see no reason to have virtual. +struct FsaFeatureFunctionData +{ + FsaFeatureFunctionData(int statesz=0,Sentence const& end_sentence_phrase=Sentence()) : ssz(statesz),start(statesz),h_start(statesz),end_phrase_(end_sentence_phrase) { + debug_=true; + } + + std::string name_; + std::string name() const { + return name_; + } + typedef SparseFeatureAccumulator Accum; + bool debug_; + bool debug() const { return debug_; } + void state_copy(void *to,void const*from) const { + if (ssz) + std::memcpy(to,from,ssz); + } + void state_zero(void *st) const { // you should call this if you don't know the state yet and want it to be hashed/compared properly + std::memset(st,0,ssz); + } + Features features() const { + return features_; + } + int n_features() const { + return features_.size(); + } + int state_bytes() const { return ssz; } // or override this + void const* start_state() const { + return start.begin(); + } + void const * heuristic_start_state() const { + return h_start.begin(); + } + Sentence const& end_phrase() const { return end_phrase_; } + template + static inline T* state_as(void *p) { return (T*)p; } + template + static inline T const* state_as(void const* p) { return (T*)p; } + std::string describe_features(FeatureVector const& feats) { + std::ostringstream o; + o<" for lm. + void set_state_bytes(int sb=0) { + if (start.size()!=sb) start.resize(sb); + if (h_start.size()!=sb) h_start.resize(sb); + ssz=sb; + } + void set_end_phrase(WordID single) { + end_phrase_=singleton_sentence(single); + } + + inline void static to_state(void *state,char const* begin,char const* end) { + std::memcpy(state,begin,end-begin); + } + inline void static to_state(void *state,char const* begin,int n) { + std::memcpy(state,begin,n); + } + template + inline void static to_state(void *state,T const* begin,int n=1) { + to_state(state,(char const*)begin,n*sizeof(T)); + } + template + inline void static to_state(void *state,T const* begin,T const* end) { + to_state(state,(char const*)begin,(char const*)end); + } + inline static char hexdigit(int i) { + int j=i-10; + return j>=0?'a'+j:'0'+i; + } + inline static void print_hex_byte(std::ostream &o,unsigned c) { + o<>4); + o<Add(v); + } + +}; + +#endif -- cgit v1.2.3