stateful ff_from_fsa works - fixed bug that never moved right state from rightmost variable up to result

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@413 ec762483-ff6d-05da-a07a-a48fb63a330f
author: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-26 01:07:36 +0000
committer: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-26 01:07:36 +0000
commit: 5dc4b5c2d796b339dd6b9f7616c5765ee28728f0 (patch)
tree: c3624b236eb7e88314fc439552af501e697214d5 /decoder
parent: 17623986ba1c9c9a3f91da4f215f25b9d9c0ea96 (diff)
4 files changed, 48 insertions, 32 deletions
diff --git a/decoder/ff_from_fsa.h b/decoder/ff_from_fsa.h
index d32e90df..237e5d0d 100755
--- a/decoder/ff_from_fsa.h
+++ b/decoder/ff_from_fsa.h
@@ -3,8 +3,8 @@
 
 #include "ff_fsa.h"
 
-#define FSA_FF_DEBUG
-#ifdef FSA_FF_DEBUG
+#define FSA_FF_DEBUG 0
+#if FSA_FF_DEBUG
 # define FSAFFDBG(e,x) FSADBGif(debug,e,x)
 # define FSAFFDBGnl(e) FSADBGif_nl(debug,e)
 #else
@@ -49,7 +49,6 @@ public:
                              FeatureVector* estimated_features,
                              void* out_state) const
   {
-    FSAFFDBG(edge,"(FromFsa) "<<name);
     ff.init_features(features); // estimated_features is fresh
     if (!ssz) {
       TRule const& rule=*edge.rule_;
@@ -97,8 +96,9 @@ public:
         } else { // more to score / state to update (left already full)
           fsa.scan(al,ale,features);
         }
-        if (nw>M) // child had full state already (had a "gap"); if nw==M then we already reached the same state via left word heuristic scan above
+        if (nw==M) // child had full state already
           fsa.reset(fsa_state(a));
+        assert(nw<=M);
       } else { // single word
         WordID ew=e[j];
         FSAFFDBG(edge,' '<<TD::Convert(ew));
@@ -121,7 +121,7 @@ public:
       do { *left_out++=TD::none; } while(left_out<left_full); // none-terminate so left_end(out_state) will know how many words
     } else // or else store final right-state.  heuristic was already assigned
       fstatecpy(out_state,fsa.cs);
-    FSAFFDBG(edge," = " << describe_state(out_state)<<" "<<(*features)[ff.fid()]<<" h="<<(*estimated_features)[ff.fid()]);
+    FSAFFDBG(edge," = " << describe_state(out_state)<<" "<<name<<"="<<ff.describe_features(*features)<<" h="<<ff.describe_features(*estimated_features)<<")");
     FSAFFDBGnl(edge);
   }
 
@@ -156,23 +156,24 @@ public:
     SP ss=ff.start_state();
     WP l=(WP)residual_state,lend=left_end(residual_state);
     SP rst=fsa_state(residual_state);
-    FSAFFDBG(edge,"(FromFsa) Final "<<name<< " before="<<*final_features);
+    FSAFFDBG(edge," (final");// "<<name);//<< " before="<<*final_features);
+
     if (lend==rst) { // implying we have an fsa state
       AccumFeatures(ff,smeta,edge,l,lend,final_features,ss); // e.g. <s> score(full left unscored phrase)
-      FSAFFDBG(edge," left: "<<ff.describe_state(ss)<<" -> "<<Sentence(l,lend));
+      FSAFFDBG(edge," start="<<ff.describe_state(ss)<<"->{"<<Sentence(l,lend)<<"}");
       AccumFeatures(ff,smeta,edge,begin(ends),end(ends),final_features,rst); // e.g. [ctx for last M words] score("</s>")
-      FSAFFDBG(edge," right: "<<ff.describe_state(rst)<<" -> "<<ends);
+      FSAFFDBG(edge," end="<<ff.describe_state(rst)<<"->{"<<ends<<"}");
     } else { // all we have is a single short phrase < M words before adding ends
       int nl=lend-l;
       Sentence whole(ends.size()+nl);
       WordID *w=begin(whole);
       wordcpy(w,l,nl);
       wordcpy(w+nl,begin(ends),ends.size());
-      FSAFFDBG(edge," score whole sentence: "<<whole);
+      FSAFFDBG(edge," whole={"<<whole<<"}");
       // whole = left-words + end-phrase
       AccumFeatures(ff,smeta,edge,w,end(whole),final_features,ss);
     }
-    FSAFFDBG(edge," = "<<*final_features);
+    FSAFFDBG(edge,' '<<name<<"="<<ff.describe_features(*final_features));
     FSAFFDBGnl(edge);
   }
 
diff --git a/decoder/ff_fsa.h b/decoder/ff_fsa.h
index 1be773b9..e21cbf6f 100755
--- a/decoder/ff_fsa.h
+++ b/decoder/ff_fsa.h
@@ -6,16 +6,18 @@
 
   state is some fixed width byte array.  could actually be a void *, WordID sequence, whatever.
 
-  TODO: fsa feature aggregator that presents itself as a single fsa; benefit: when wrapped in ff_from_fsa, only one set of left words is stored.
+  TODO: fsa feature aggregator that presents itself as a single fsa; benefit: when wrapped in ff_from_fsa, only one set of left words is stored.  downside: compared to separate ff, the inside portion of lower-order models is incorporated later.  however, the full heuristic is already available and exact for those words.  so don't sweat it.
+
+  TODO: state (+ possibly span-specific) custom heuristic, e.g. in "longer than previous word" model, you can expect a higher outside if your state is a word of 2 letters.  this is on top of the nice heuristic for the unscored words, of course.  in ngrams, the avg prob will be about the same, but if the words possible for a source span are summarized, maybe it's possible to predict.  probably not worht the time.
 */
 
 //SEE ALSO: ff_fsa_dynamic.h, ff_from_fsa.h
 
 //TODO: decide whether to use init_features / add_value vs. summing elsewhere + set_value once (or inefficient for from_fsa: sum distinct feature_vectors.  but L->R if we only scan 1 word at a time, that's fine
 
-//#define FSA_DEBUG
+#define FSA_DEBUG 0
 
-#ifdef USE_INFO_EDGE
+#if USE_INFO_EDGE
 #define FSA_DEBUG_CERR 0
 #else
 #define FSA_DEBUG_CERR 1
@@ -24,7 +26,7 @@
 #define FSA_DEBUG_DEBUG 0
 # define FSADBGif(i,e,x) do { if (i) { if (FSA_DEBUG_CERR){std::cerr<<x;}  INFO_EDGE(e,x); if (FSA_DEBUG_DEBUG){std::cerr<<"FSADBGif edge.info "<<&e<<" = "<<e.info()<<std::endl;}} } while(0)
 # define FSADBGif_nl(i,e) do { if (i) { if (FSA_DEBUG_CERR) std::cerr<<std::endl; INFO_EDGE(e,"; "); } } while(0)
-#ifdef FSA_DEBUG
+#if FSA_DEBUG
 # include <iostream>
 # define FSADBG(e,x) FSADBGif(d().debug(),e,x)
 # define FSADBGnl(e) FSADBGif_nl(d().debug(),e,x)
@@ -93,6 +95,11 @@ protected:
   }
 
 public:
+  // can override to different return type, e.g. just return feats:
+  Featval describe_features(FeatureVector const& feats) const {
+    return feats.get(fid_);
+  }
+
   bool debug() const { return true; }
   int fid() const { return fid_; } // return the one most important feature (for debugging)
   std::string name() const {
@@ -240,6 +247,8 @@ protected:
       Base::start.resize(sizeof(State));
       Base::h_start.resize(sizeof(State));
     }
+    assert(Base::start.size()==sizeof(State));
+    assert(Base::h_start.size()==sizeof(State));
     state(Base::start.begin())=s;
     state(Base::h_start.begin())=heuristic_s;
   }
@@ -254,28 +263,24 @@ public:
     o<<state(st);
   }
   int markov_order() const { return 1; }
-  Featval ScanT1(WordID w,int prevlen,int &len) const { return 0; }
-  inline void ScanT(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,int prevlen,int &len,FeatureVector *features) const {
-    features->maybe_add(d().fid_,d().ScanT1(w,prevlen,len));
+  Featval ScanT1(WordID w,St const&,St &) const { return 0; }
+  inline void ScanT(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,St const& prev_st,St &new_st,FeatureVector *features) const {
+    features->maybe_add(d().fid_,d().ScanT1(w,prev_st,new_st));
   }
-
   inline void Scan(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,void const* st,void *next_state,FeatureVector *features) const {
     Impl const& im=d();
-    FSADBG(edge,"Scan "<<FD::Convert(im.fid_)<<" = "<<(*features)[im.fid_]<<" "<<im.state(st)<<" ->"<<TD::Convert(w)<<" ");
-    im.ScanT(smeta,edge,w,im.state(st),im.state(next_state),features);
-    FSADBG(edge,im.state(next_state)<<" = "<<(*features)[im.fid_]);
+    FSADBG(edge,"Scan "<<FD::Convert(im.fid_)<<" = "<<im.describe_features(*features)<<" "<<im.state(st)<<"->"<<TD::Convert(w)<<" ");
+    im.ScanT(smeta,edge,w,state(st),state(next_state),features);
+    FSADBG(edge,state(next_state)<<" = "<<im.describe_features(*features));
     FSADBGnl(edge);
   }
 
 };
 
 
-
-
-
-// do not use if state size is 0, please.
 const bool optimize_FsaScanner_zerostate=false;
 
+// do not use if state size is 0.  should crash (maybe won't if you set optimize_FsaScanner_zerostate true)
 template <class FF>
 struct FsaScanner {
 //  enum {ALIGN=8};
diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h
index 2aeaa6de..24f12560 100755
--- a/decoder/ff_sample_fsa.h
+++ b/decoder/ff_sample_fsa.h
@@ -28,7 +28,7 @@ struct WordPenaltyFsa : public FsaFeatureFunctionBase<WordPenaltyFsa> {
 typedef FeatureFunctionFromFsa<WordPenaltyFsa> WordPenaltyFromFsa;
 
 
-// appears to be buggy right now: give it a bonus weight (-) and it overstates how many
+// appears to be buggy right now: give it a bonus weight (+)
 struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
   typedef FsaFeatureFunctionBase<LongerThanPrev> Base;
   static std::string usage(bool param,bool verbose) {
@@ -45,6 +45,15 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
   static inline int state(void const* st) {
     return *(int const*)st;
   }
+/*  int describe_state(void const* st) const {
+    return state(st);
+  }
+*/
+  // only need 1 of the 2
+  void print_state(std::ostream &o,void const* st) const {
+    o<<state(st);
+  }
+
   static inline int wordlen(WordID w) {
     return std::strlen(TD::Convert(w));
   }
@@ -53,14 +62,15 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
     Init();
     if (0) { // all this is done in constructor already
       set_state_bytes(sizeof(int));
-      start.resize(state_bytes()); // this is done by set_state_bytes already.
-      h_start.resize(state_bytes());
+      //start.resize(state_bytes());h_start.resize(state_bytes()); // this is done by set_state_bytes already.
       int ss=3;
       to_state(start.begin(),&ss,1);
       ss=4;
       to_state(h_start.begin(),&ss,1);
     }
-
+    assert(state_bytes()==sizeof(int));
+    assert(start.size()==sizeof(int));
+    assert(h_start.size()==sizeof(int));
     state(start.begin())=999999;
     state(h_start.begin())=4; // estimate: anything >4 chars is usually longer than previous
 
@@ -75,7 +85,7 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
 };
 
 // similar example feature; base type exposes stateful type, defines markov_order 1, state size = sizeof(State)
-// also buggy right now: give it a bonus weight (-) and it overstates how many
+// also buggy right now: give it a bonus weight
 struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> {
   typedef FsaTypedBase<int,ShorterThanPrev> Base;
   static std::string usage(bool param,bool verbose) {
diff --git a/decoder/hg.h b/decoder/hg.h
index 6dd10584..ea0e8aa1 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -98,7 +98,7 @@ public:
 
     void copy_info(Edge const& o) {
 #if USE_INFO_EDGE
-      set_info(o.info_.str());
+      set_info(o.info_.str()); // by convention, each person putting info here starts with a separator (e.g. space).  it's empty if nobody put any info there.
 #endif
     }
     void copy_pod(Edge const& o) {
@@ -157,7 +157,7 @@ public:
         o<<rule_->AsString(mask&RULE_LHS);
       if (USE_INFO_EDGE) {
         std::string const& i=info();
-        if (mask&&!i.empty()) o << " ||| "<<i;
+        if (mask&&!i.empty()) o << " |||"<<i; // remember, the initial space is expected as part of i
       }
       o<<'}';
     }
author	graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-26 01:07:36 +0000
committer	graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-26 01:07:36 +0000
commit	5dc4b5c2d796b339dd6b9f7616c5765ee28728f0 (patch)
tree	c3624b236eb7e88314fc439552af501e697214d5 /decoder
parent	17623986ba1c9c9a3f91da4f215f25b9d9c0ea96 (diff)