fsa: stateless works, debug sample bigram {Longer,Shorter}ThanPrev

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@375 ec762483-ff6d-05da-a07a-a48fb63a330f
author: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-23 04:21:51 +0000
committer: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-23 04:21:51 +0000
commit: 90d6674878bfc231012bb8eb2a3eaa183eee5220 (patch)
tree: bf67f2bb74a58088b8a77a4ab37c7b46e8d95da1 /decoder/ff_fsa.h
parent: bcf7ad8a05799172ccd5d6ce73ddcd2f4c4a174e (diff)
1 files changed, 55 insertions, 22 deletions
diff --git a/decoder/ff_fsa.h b/decoder/ff_fsa.h
index 3096f049..3a9478e2 100755
--- a/decoder/ff_fsa.h
+++ b/decoder/ff_fsa.h
@@ -30,6 +30,8 @@ protected:
   Sentence end_phrase_; // words appended for final traversal (final state cost is assessed using Scan) e.g. "</s>" for lm.
   int state_bytes_; // don't forget to set this. default 0 (it may depend on params of course)
   void set_state_bytes(int sb=0) {
+    if (start.size()!=sb) start.resize(sb);
+    if (h_start.size()!=sb) h_start.resize(sb);
     state_bytes_=sb;
   }
 
@@ -37,8 +39,21 @@ protected:
   void init_fid(std::string const& name) { // call this, though, if you have a single feature
     fid_=FD::Convert(name);
   }
+  inline void static to_state(void *state,char const* begin,char const* end) {
+    std::memcpy(state,begin,end-begin);
+  }
+  inline void static to_state(void *state,char const* begin,int n) {
+    std::memcpy(state,begin,n);
+  }
+  template <class T>
+  inline void static to_state(void *state,T const* begin,int n) {
+    to_state(state,(char const*)begin,n);
+  }
+  template <class T>
+  inline void static to_state(void *state,T const* begin,T const* end) {
+    to_state(state,(char const*)begin,(char const*)end);
+  }
 public:
-
   // return m: all strings x with the same final m+1 letters must end in this state
   /* markov chain of order m: P(xn|xn-1...x1)=P(xn|xn-1...xn-m) */
   int markov_order() const { return 0; } // override if you use state.  order 0 implies state_bytes()==0 as well, as far as scoring/splitting is concerned (you can still track state, though)
@@ -64,12 +79,49 @@ public:
   //TODO: decide if we want to require you to support dest same as src, since that's how we use it most often in ff_from_fsa bottom-up wrapper (in l->r scoring, however, distinct copies will be the rule), and it probably wouldn't be too hard for most people to support.  however, it's good to hide the complexity here, once (see overly clever FsaScan loop that swaps src/dest addresses repeatedly to scan a sequence by effectively swapping)
 
   // NOTE: if you want to e.g. track statistics, cache, whatever, cast const away or use mutable members
-  void Scan(SentenceMetadata const& smeta,WordID x,void const* state,void *next_state,FeatureVector *features) const {
+  void Scan(SentenceMetadata const& smeta,WordID w,void const* state,void *next_state,FeatureVector *features) const {
   }
 
   // don't set state-bytes etc. in ctor because it may depend on parsing param string
-  FsaFeatureFunctionBase() : start(0),h_start(0),state_bytes_(0) {  }
+  FsaFeatureFunctionBase(int statesz=0) : start(statesz),h_start(statesz),state_bytes_(statesz) {  }
+
+};
+
+// if State is pod.  sets state size and allocs start, h_start
+template <class St>
+struct FsaTypedBase : public FsaFeatureFunctionBase {
+protected:
+  typedef St State;
+  static inline State & state(void *state) {
+    return *(State*)state;
+  }
+  static inline State const& state(void const* state) {
+    return *(State const*)state;
+  }
+  void set_starts(State const& s,State const& heuristic_s) {
+    if (0) { // already in ctor
+      start.resize(sizeof(State));
+      h_start.resize(sizeof(State));
+    }
+    state(start.begin())=s;
+    state(h_start.begin())=heuristic_s;
+  }
+  void set_h_start(State const& s) {
+  }
+public:
+  int markov_order() const { return 1; }
+  FsaTypedBase() : FsaFeatureFunctionBase(sizeof(State)) {
+  }
+};
 
+// usage (if you're lazy):
+// struct ShorterThanPrev : public FsaTypedBase<int>,FsaTypedScan<ShorterThanPrev>
+template <class Impl>
+struct FsaTypedScan  {
+  void Scan(SentenceMetadata const& smeta,WordID w,void const* st,void *next_state,FeatureVector *features) const {
+    Impl const* impl=static_cast<Impl const*>(this);
+    impl->Scan(smeta,w,impl->state(st),impl->state(next_state),features);
+  }
 };
 
 
@@ -169,26 +221,7 @@ void AccumFeatures(FF const& ff,SentenceMetadata const& smeta,WordID const* i, W
 
 //TODO: combine 2 FsaFeatures typelist style (can recurse for more)
 
-// example: feature val = -1 * # of target words
-struct WordPenaltyFsa : public FsaFeatureFunctionBase {
-  WordPenaltyFsa(std::string const& param) {
-    init_fid(usage(false,false));
-    return;
-    //below are all defaults:
-    set_state_bytes(0);
-    start.clear();
-    h_start.clear();
-  }
-  static const float val_per_target_word=-1;
-  // move from state to next_state after seeing word x, while emitting features->add_value(fid,val) possibly with duplicates.  state and next_state may be same memory.
-  void Scan(SentenceMetadata const& smeta,WordID x,void const* state,void *next_state,FeatureVector *features) const {
-    features->add_value(fid_,val_per_target_word);
-  }
-  static std::string usage(bool param,bool verbose) {
-    return FeatureFunction::usage_helper("WordPenaltyFsa","","-1 per target word",param,verbose);
-  }
 
-};
 
 
 #endif
author	graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-23 04:21:51 +0000
committer	graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-23 04:21:51 +0000
commit	90d6674878bfc231012bb8eb2a3eaa183eee5220 (patch)
tree	bf67f2bb74a58088b8a77a4ab37c7b46e8d95da1 /decoder/ff_fsa.h
parent	bcf7ad8a05799172ccd5d6ce73ddcd2f4c4a174e (diff)