diff options
| author | graehl@gmail.com <graehl@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-08-02 07:57:23 +0000 | 
|---|---|---|
| committer | graehl@gmail.com <graehl@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-08-02 07:57:23 +0000 | 
| commit | 1fc4b6a45c18b52ab2c27eb9a825ad14f31d803b (patch) | |
| tree | 33eddab63ea60d0b252dc842ebd0c2a8408af40d /decoder/ff_fsa_data.h | |
| parent | 526efd6515eb9efdcc1fe756c3cec4981ca10186 (diff) | |
fake tdict names for non-ids, push viterbi cost to root in hg, store as feature.  type erased fsa feature via virtual interface.  made lexical_cast assume C locale for speed.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@465 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/ff_fsa_data.h')
| -rwxr-xr-x | decoder/ff_fsa_data.h | 105 | 
1 files changed, 105 insertions, 0 deletions
| diff --git a/decoder/ff_fsa_data.h b/decoder/ff_fsa_data.h new file mode 100755 index 00000000..66d2cca8 --- /dev/null +++ b/decoder/ff_fsa_data.h @@ -0,0 +1,105 @@ +#ifndef FF_FSA_DATA_H +#define FF_FSA_DATA_H + +#include <stdint.h> //C99 +#include <sstream> +#include "sentences.h" +#include "feature_accum.h" +#include "value_array.h" + +typedef ValueArray<uint8_t> Bytes; + +// stuff I see no reason to have virtual. +struct FsaFeatureFunctionData +{ +  FsaFeatureFunctionData(int statesz=0,Sentence const& end_sentence_phrase=Sentence()) : ssz(statesz),start(statesz),h_start(statesz),end_phrase_(end_sentence_phrase) { +    debug_=true; +  } + +  std::string name_; +  std::string name() const { +    return name_; +  } +  typedef SparseFeatureAccumulator Accum; +  bool debug_; +  bool debug() const { return debug_; } +  void state_copy(void *to,void const*from) const { +    if (ssz) +      std::memcpy(to,from,ssz); +  } +  void state_zero(void *st) const { // you should call this if you don't know the state yet and want it to be hashed/compared properly +    std::memset(st,0,ssz); +  } +  Features features() const { +    return features_; +  } +  int n_features() const { +    return features_.size(); +  } +  int state_bytes() const { return ssz; } // or override this +  void const* start_state() const { +    return start.begin(); +  } +  void const * heuristic_start_state() const { +    return h_start.begin(); +  } +  Sentence const& end_phrase() const { return end_phrase_; } +  template <class T> +  static inline T* state_as(void *p) { return (T*)p; } +  template <class T> +  static inline T const* state_as(void const* p) { return (T*)p; } +  std::string describe_features(FeatureVector const& feats) { +    std::ostringstream o; +    o<<feats; +    return o.str(); +  } +  void print_state(std::ostream &o,void const*state) const { +    char const* i=(char const*)state; +    char const* e=i+ssz; +    for (;i!=e;++i) +      print_hex_byte(o,*i); +  } + +protected: +  Features features_; +  int ssz; // don't forget to set this. default 0 (it may depend on params of course) +  Bytes start,h_start; // start state and estimated-features (heuristic) start state.  set these.  default empty. +  Sentence end_phrase_; // words appended for final traversal (final state cost is assessed using Scan) e.g. "</s>" for lm. +  void set_state_bytes(int sb=0) { +    if (start.size()!=sb) start.resize(sb); +    if (h_start.size()!=sb) h_start.resize(sb); +    ssz=sb; +  } +  void set_end_phrase(WordID single) { +    end_phrase_=singleton_sentence(single); +  } + +  inline void static to_state(void *state,char const* begin,char const* end) { +    std::memcpy(state,begin,end-begin); +  } +  inline void static to_state(void *state,char const* begin,int n) { +    std::memcpy(state,begin,n); +  } +  template <class T> +  inline void static to_state(void *state,T const* begin,int n=1) { +    to_state(state,(char const*)begin,n*sizeof(T)); +  } +  template <class T> +  inline void static to_state(void *state,T const* begin,T const* end) { +    to_state(state,(char const*)begin,(char const*)end); +  } +  inline static char hexdigit(int i) { +    int j=i-10; +    return j>=0?'a'+j:'0'+i; +  } +  inline static void print_hex_byte(std::ostream &o,unsigned c) { +    o<<hexdigit(c>>4); +    o<<hexdigit(c&0x0f); +  } +  inline static void Add(Featval v,SingleFeatureAccumulator *a) { +    a->Add(v); +  } + +}; + +#endif | 
