diff options
-rw-r--r-- | decoder/ff.cc | 4 | ||||
-rw-r--r-- | decoder/ff_factory.cc | 2 | ||||
-rwxr-xr-x | decoder/ff_from_fsa.h | 8 | ||||
-rwxr-xr-x | decoder/ff_sample_fsa.h | 22 | ||||
-rw-r--r-- | decoder/sparse_vector.h | 2 |
5 files changed, 21 insertions, 17 deletions
diff --git a/decoder/ff.cc b/decoder/ff.cc index 28d6f732..4f1a3d32 100644 --- a/decoder/ff.cc +++ b/decoder/ff.cc @@ -1,6 +1,6 @@ -//TODO: actually score rule_feature()==true features once only, hash keyed on rule or modify TRule directly? need to keep clear in forest which features come from models vs. rules; then rescoring could drop all the old models features at once +//TODO: non-sparse vector for all feature functions? modelset applymodels keeps track of who has what features? it's nice having FF that could generate a handful out of 10000 possible feats, though. -//TODO: 0 size state != rule-local feature, i.e. still may depend on source span loc/context. identify truly rule-local features so if we want they can be added to grammar rules (minor speedup) +//TODO: actually score rule_feature()==true features once only, hash keyed on rule or modify TRule directly? need to keep clear in forest which features come from models vs. rules; then rescoring could drop all the old models features at once #include <boost/lexical_cast.hpp> #include "ff.h" diff --git a/decoder/ff_factory.cc b/decoder/ff_factory.cc index a6d834e0..cc07b2f2 100644 --- a/decoder/ff_factory.cc +++ b/decoder/ff_factory.cc @@ -35,7 +35,7 @@ shared_ptr<FeatureFunction> FFRegistry::Create(const string& ffname, const strin int pl=debug_pre.size(); bool space=false; std::string p=param; - bool debug=match_begin(p,debug_pre)&&(p.size()==pl||(space=p[pl]==' ')); + bool debug=match_begin(p,debug_pre)&&(p.size()==pl||(space=(p[pl]==' ')); if (debug) { p.erase(0,debug_pre.size()+space); cerr<<"debug enabled for "<<ffname<< " - rest of param='"<<p<<"'\n"; diff --git a/decoder/ff_from_fsa.h b/decoder/ff_from_fsa.h index 04a30578..51d89376 100755 --- a/decoder/ff_from_fsa.h +++ b/decoder/ff_from_fsa.h @@ -28,6 +28,7 @@ class FeatureFunctionFromFsa : public FeatureFunction { typedef WordID const* WP; public: FeatureFunctionFromFsa(std::string const& param) : ff(param) { + FSAFFDBG(ff.name()<<" params="<<param<<" calling Init: "); Init(); } @@ -137,6 +138,7 @@ public: return o.str(); } + //FIXME: it's assumed that the final rule is just a unary no-target-terminal rewrite (same as ff_lm) virtual void FinalTraversalFeatures(const SentenceMetadata& smeta, const void* residual_state, FeatureVector* final_features) const @@ -189,12 +191,14 @@ private: M=ff.markov_order(); ssz=ff.state_bytes(); state_offset=sizeof(WordID)*M; - SetStateSize(ff.state_bytes()+state_offset); + SetStateSize(ssz+state_offset); + assert(!ssz == !M); // no fsa state <=> markov order 0 + FSAFFDBG("order="<<M<<" fsa_state_offset="<<state_offset<<" fsa_state_bytes="<<ssz<<" ff_state_bytes="<<StateSize()<<'\n'); } int M; // markov order (ctx len) FeatureFunctionFromFsa(); // not allowed. - int state_offset; // store left-words first, then fsa state + int state_offset; // NOTE: in bytes (add to char* only). store left-words first, then fsa state int ssz; // bytes in fsa state /* state layout: left WordIds, followed by fsa state diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h index d8aa7830..6e42b83b 100755 --- a/decoder/ff_sample_fsa.h +++ b/decoder/ff_sample_fsa.h @@ -29,7 +29,7 @@ struct WordPenaltyFsa : public FsaFeatureFunctionBase<WordPenaltyFsa> { typedef FeatureFunctionFromFsa<WordPenaltyFsa> WordPenaltyFromFsa; -// +// appears to be buggy right now: give it a bonus weight (-) and it overstates how many struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> { typedef FsaFeatureFunctionBase<LongerThanPrev> Base; static std::string usage(bool param,bool verbose) { @@ -40,11 +40,11 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> { param,verbose); } - static inline int &wordlen(void *state) { - return *(int*)state; + static inline int &state(void *st) { + return *(int*)st; } - static inline int wordlen(void const* state) { - return *(int const*)state; + static inline int state(void const* st) { + return *(int const*)st; } static inline int wordlen(WordID w) { return std::strlen(TD::Convert(w)); @@ -62,23 +62,23 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> { to_state(h_start.begin(),&ss,1); } - wordlen(start.begin())=3; - wordlen(h_start.begin())=4; // estimate: anything >4 chars is usually longer than previous + state(start.begin())=3; + state(h_start.begin())=4; // estimate: anything >4 chars is usually longer than previous } static const float val_per_target_word=-1; - void Scan(SentenceMetadata const& smeta,WordID w,void const* state,void *next_state,FeatureVector *features) const { - int prevlen=wordlen(state); + void Scan(SentenceMetadata const& smeta,WordID w,void const* from,void *next_state,FeatureVector *features) const { + int prevlen=state(from); int len=wordlen(w); - wordlen(next_state)=len; if (len>prevlen) features->add_value(fid_,val_per_target_word); + state(next_state)=len; } - }; // similar example feature; base type exposes stateful type, defines markov_order 1, state size = sizeof(State) +// also buggy right now: give it a bonus weight (-) and it overstates how many struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> { typedef FsaTypedBase<int,ShorterThanPrev> Base; static std::string usage(bool param,bool verbose) { diff --git a/decoder/sparse_vector.h b/decoder/sparse_vector.h index f8310fc1..1733a4bd 100644 --- a/decoder/sparse_vector.h +++ b/decoder/sparse_vector.h @@ -1,7 +1,7 @@ #ifndef _SPARSE_VECTOR_H_ #define _SPARSE_VECTOR_H_ -#define SPARSE_VECTOR_HASH +//#define SPARSE_VECTOR_HASH #ifdef SPARSE_VECTOR_HASH #include "hash.h" |