strip debug param from feature spec; debug info on from_fsa Init

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@393 ec762483-ff6d-05da-a07a-a48fb63a330f
author: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-23 23:10:34 +0000
committer: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-23 23:10:34 +0000
commit: 5f9c85a6072d64b10879feafc040374e274027eb (patch)
tree: cebeb55f05e770bd8b1983821312c1856caec0a8
parent: a4187c028432c9f9ec3693747abc1c52ef310deb (diff)
5 files changed, 21 insertions, 17 deletions
diff --git a/decoder/ff.cc b/decoder/ff.cc
index 28d6f732..4f1a3d32 100644
--- a/decoder/ff.cc
+++ b/decoder/ff.cc
@@ -1,6 +1,6 @@
-//TODO: actually score rule_feature()==true features once only, hash keyed on rule or modify TRule directly?  need to keep clear in forest which features come from models vs. rules; then rescoring could drop all the old models features at once
+//TODO: non-sparse vector for all feature functions?  modelset applymodels keeps track of who has what features?  it's nice having FF that could generate a handful out of 10000 possible feats, though.
 
-//TODO: 0 size state != rule-local feature, i.e. still may depend on source span loc/context.  identify truly rule-local features so if we want they can be added to grammar rules (minor speedup)
+//TODO: actually score rule_feature()==true features once only, hash keyed on rule or modify TRule directly?  need to keep clear in forest which features come from models vs. rules; then rescoring could drop all the old models features at once
 
 #include <boost/lexical_cast.hpp>
 #include "ff.h"
diff --git a/decoder/ff_factory.cc b/decoder/ff_factory.cc
index a6d834e0..cc07b2f2 100644
--- a/decoder/ff_factory.cc
+++ b/decoder/ff_factory.cc
@@ -35,7 +35,7 @@ shared_ptr<FeatureFunction> FFRegistry::Create(const string& ffname, const strin
     int pl=debug_pre.size();
     bool space=false;
     std::string p=param;
-    bool debug=match_begin(p,debug_pre)&&(p.size()==pl||(space=p[pl]==' '));
+    bool debug=match_begin(p,debug_pre)&&(p.size()==pl||(space=(p[pl]==' '));
     if (debug) {
       p.erase(0,debug_pre.size()+space);
       cerr<<"debug enabled for "<<ffname<< " - rest of param='"<<p<<"'\n";
diff --git a/decoder/ff_from_fsa.h b/decoder/ff_from_fsa.h
index 04a30578..51d89376 100755
--- a/decoder/ff_from_fsa.h
+++ b/decoder/ff_from_fsa.h
@@ -28,6 +28,7 @@ class FeatureFunctionFromFsa : public FeatureFunction {
   typedef WordID const* WP;
 public:
   FeatureFunctionFromFsa(std::string const& param) : ff(param) {
+    FSAFFDBG(ff.name()<<" params="<<param<<" calling Init: ");
     Init();
   }
 
@@ -137,6 +138,7 @@ public:
     return o.str();
   }
 
+  //FIXME: it's assumed that the final rule is just a unary no-target-terminal rewrite (same as ff_lm)
   virtual void FinalTraversalFeatures(const SentenceMetadata& smeta,
                                       const void* residual_state,
                                       FeatureVector* final_features) const
@@ -189,12 +191,14 @@ private:
     M=ff.markov_order();
     ssz=ff.state_bytes();
     state_offset=sizeof(WordID)*M;
-    SetStateSize(ff.state_bytes()+state_offset);
+    SetStateSize(ssz+state_offset);
+    assert(!ssz == !M); // no fsa state <=> markov order 0
+    FSAFFDBG("order="<<M<<" fsa_state_offset="<<state_offset<<" fsa_state_bytes="<<ssz<<" ff_state_bytes="<<StateSize()<<'\n');
   }
   int M; // markov order (ctx len)
   FeatureFunctionFromFsa(); // not allowed.
 
-  int state_offset; // store left-words first, then fsa state
+  int state_offset; // NOTE: in bytes (add to char* only). store left-words first, then fsa state
   int ssz; // bytes in fsa state
   /*
     state layout: left WordIds, followed by fsa state
diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h
index d8aa7830..6e42b83b 100755
--- a/decoder/ff_sample_fsa.h
+++ b/decoder/ff_sample_fsa.h
@@ -29,7 +29,7 @@ struct WordPenaltyFsa : public FsaFeatureFunctionBase<WordPenaltyFsa> {
 typedef FeatureFunctionFromFsa<WordPenaltyFsa> WordPenaltyFromFsa;
 
 
-//
+// appears to be buggy right now: give it a bonus weight (-) and it overstates how many
 struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
   typedef FsaFeatureFunctionBase<LongerThanPrev> Base;
   static std::string usage(bool param,bool verbose) {
@@ -40,11 +40,11 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
       param,verbose);
   }
 
-  static inline int &wordlen(void *state) {
-    return *(int*)state;
+  static inline int &state(void *st) {
+    return *(int*)st;
   }
-  static inline int wordlen(void const* state) {
-    return *(int const*)state;
+  static inline int state(void const* st) {
+    return *(int const*)st;
   }
   static inline int wordlen(WordID w) {
     return std::strlen(TD::Convert(w));
@@ -62,23 +62,23 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
       to_state(h_start.begin(),&ss,1);
     }
 
-    wordlen(start.begin())=3;
-    wordlen(h_start.begin())=4; // estimate: anything >4 chars is usually longer than previous
+    state(start.begin())=3;
+    state(h_start.begin())=4; // estimate: anything >4 chars is usually longer than previous
 
   }
 
   static const float val_per_target_word=-1;
-  void Scan(SentenceMetadata const& smeta,WordID w,void const* state,void *next_state,FeatureVector *features) const {
-    int prevlen=wordlen(state);
+  void Scan(SentenceMetadata const& smeta,WordID w,void const* from,void *next_state,FeatureVector *features) const {
+    int prevlen=state(from);
     int len=wordlen(w);
-    wordlen(next_state)=len;
     if (len>prevlen)
       features->add_value(fid_,val_per_target_word);
+    state(next_state)=len;
   }
-
 };
 
 // similar example feature; base type exposes stateful type, defines markov_order 1, state size = sizeof(State)
+// also buggy right now: give it a bonus weight (-) and it overstates how many
 struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> {
   typedef FsaTypedBase<int,ShorterThanPrev> Base;
   static std::string usage(bool param,bool verbose) {
diff --git a/decoder/sparse_vector.h b/decoder/sparse_vector.h
index f8310fc1..1733a4bd 100644
--- a/decoder/sparse_vector.h
+++ b/decoder/sparse_vector.h
@@ -1,7 +1,7 @@
 #ifndef _SPARSE_VECTOR_H_
 #define _SPARSE_VECTOR_H_
 
-#define SPARSE_VECTOR_HASH
+//#define SPARSE_VECTOR_HASH
 
 #ifdef SPARSE_VECTOR_HASH
 #include "hash.h"
author	graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-23 23:10:34 +0000
committer	graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-23 23:10:34 +0000
commit	5f9c85a6072d64b10879feafc040374e274027eb (patch)
tree	cebeb55f05e770bd8b1983821312c1856caec0a8
parent	a4187c028432c9f9ec3693747abc1c52ef310deb (diff)