summaryrefslogtreecommitdiff
path: root/decoder/ff_fsa_data.h
blob: e60bce45080aaaf3d3316654b5190746281e45b2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#ifndef FF_FSA_DATA_H
#define FF_FSA_DATA_H

#include <stdint.h> //C99
#include <sstream>
#include "sentences.h"
#include "feature_accum.h"
#include "value_array.h"
#include "ff.h" //debug
typedef ValueArray<uint8_t> Bytes;

// stuff I see no reason to have virtual.  but because it's impossible (w/o virtual inheritance to have dynamic fsa ff know where the impl's data starts, implemented a sync (copy) method that needs to be called.  init_name_debug was already necessary to keep state in sync between ff and ff_from_fsa, so no sync should be needed after it.  supposing all modifications were through setters, then no explicit sync call would ever be needed; updates could be mirrored.
struct FsaFeatureFunctionData
{
  void init_name_debug(std::string const& n,bool debug) {
    name_=n;
    debug_=debug;
  }
  //HACK for diamond inheritance (w/o costing performance)
  FsaFeatureFunctionData *sync_to_;

  void sync() const { // call this if you modify any fields after your constructor is done
    if (sync_to_) {
      DBGINIT("sync to "<<*sync_to_);
      *sync_to_=*this;
      DBGINIT("synced result="<<*sync_to_<< " from this="<<*this);
    } else {
      DBGINIT("nobody to sync to - from FeatureFunctionData this="<<*this);
    }
  }

  friend std::ostream &operator<<(std::ostream &o,FsaFeatureFunctionData const& d) {
    o << "[FSA "<<d.name_<<" features="<<FD::Convert(d.features_)<<" state_bytes="<<d.state_bytes()<<" end='"<<d.end_phrase()<<"' start=";
    d.print_state(o,d.start_state());
    o<<"]";
    return o;
  }

  FsaFeatureFunctionData(int statesz=0,Sentence const& end_sentence_phrase=Sentence()) : ssz(statesz),start(statesz),h_start(statesz),end_phrase_(end_sentence_phrase) {
    debug_=true;
    sync_to_=0;
  }

  std::string name_;
  std::string name() const {
    return name_;
  }
  typedef SparseFeatureAccumulator Accum;
  bool debug_;
  bool debug() const { return debug_; }
  void state_copy(void *to,void const*from) const {
    if (ssz)
      std::memcpy(to,from,ssz);
  }
  void state_zero(void *st) const { // you should call this if you don't know the state yet and want it to be hashed/compared properly
    std::memset(st,0,ssz);
  }
  Features features() const {
    return features_;
  }
  int n_features() const {
    return features_.size();
  }
  int state_bytes() const { return ssz; }
  void const* start_state() const {
    return start.begin();
  }
  void const * heuristic_start_state() const {
    return h_start.begin();
  }
  Sentence const& end_phrase() const { return end_phrase_; }
  template <class T>
  static inline T* state_as(void *p) { return (T*)p; }
  template <class T>
  static inline T const* state_as(void const* p) { return (T*)p; }
  std::string describe_features(FeatureVector const& feats) {
    std::ostringstream o;
    o<<feats;
    return o.str();
  }
  void print_state(std::ostream &o,void const*state) const {
    char const* i=(char const*)state;
    char const* e=i+ssz;
    for (;i!=e;++i)
      print_hex_byte(o,*i);
  }

  Features features_;
protected:
  int ssz; // don't forget to set this. default 0 (it may depend on params of course)
  Bytes start,h_start; // start state and estimated-features (heuristic) start state.  set these.  default empty.
  Sentence end_phrase_; // words appended for final traversal (final state cost is assessed using Scan) e.g. "</s>" for lm.
  // this can be called instead or after constructor (also set bytes and end_phrase_)
  void set_state_bytes(int sb=0) {
    if (start.size()!=sb) start.resize(sb);
    if (h_start.size()!=sb) h_start.resize(sb);
    ssz=sb;
  }
  void set_end_phrase(WordID single) {
    end_phrase_=singleton_sentence(single);
  }

  inline void static to_state(void *state,char const* begin,char const* end) {
    std::memcpy(state,begin,end-begin);
  }
  inline void static to_state(void *state,char const* begin,int n) {
    std::memcpy(state,begin,n);
  }
  template <class T>
  inline void static to_state(void *state,T const* begin,int n=1) {
    to_state(state,(char const*)begin,n*sizeof(T));
  }
  template <class T>
  inline void static to_state(void *state,T const* begin,T const* end) {
    to_state(state,(char const*)begin,(char const*)end);
  }
  inline static char hexdigit(int i) {
    int j=i-10;
    return j>=0?'a'+j:'0'+i;
  }
  inline static void print_hex_byte(std::ostream &o,unsigned c) {
    o<<hexdigit(c>>4);
    o<<hexdigit(c&0x0f);
  }
  inline static void Add(Featval v,SingleFeatureAccumulator *a) {
    a->Add(v);
  }

};

#endif