diff options
-rw-r--r-- | decoder/cdec.cc | 11 | ||||
-rw-r--r-- | decoder/cdec_ff.cc | 5 | ||||
-rwxr-xr-x | decoder/ff_fsa.h | 40 | ||||
-rwxr-xr-x | decoder/ff_lm_fsa.h | 6 | ||||
-rwxr-xr-x | decoder/ff_sample_fsa.h | 5 |
5 files changed, 43 insertions, 24 deletions
diff --git a/decoder/cdec.cc b/decoder/cdec.cc index 76551cfa..460e9f15 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -388,6 +388,12 @@ int main(int argc, char** argv) { exit(1); } + const string input = str("input",conf); + cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl; + ReadFile in_read(input); + istream *in = in_read.stream(); + assert(*in); + // load feature weights (and possibly freeze feature set) vector<double> feature_weights,prelm_feature_weights; Weights w,prelm_w; @@ -503,11 +509,6 @@ int main(int argc, char** argv) { int combine_size = conf["combine_size"].as<int>(); if (combine_size < 1) combine_size = 1; - const string input = str("input",conf); - cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl; - ReadFile in_read(input); - istream *in = in_read.stream(); - assert(*in); SparseVector<prob_t> acc_vec; // accumulate gradient double acc_obj = 0; // accumulate objective diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index c8cd8a66..fc64e5c4 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -8,19 +8,18 @@ #include "ff_factory.h" #include "ff_ruleshape.h" #include "ff_bleu.h" -//#include "ff_sample_fsa.h" #include "ff_lm_fsa.h" +#include "ff_sample_fsa.h" + boost::shared_ptr<FFRegistry> global_ff_registry; void register_feature_functions() { global_ff_registry->Register(new FFFactory<LanguageModel>); global_ff_registry->Register(new FFFactory<FeatureFunctionFromFsa<LanguageModelFsa> >); // same as LM but using fsa wrapper - /* // sample_fsa: global_ff_registry->Register(new FFFactory<WordPenaltyFromFsa>); // same as WordPenalty, but implemented using ff_fsa global_ff_registry->Register(new FFFactory<FeatureFunctionFromFsa<LongerThanPrev> >); global_ff_registry->Register(new FFFactory<FeatureFunctionFromFsa<ShorterThanPrev> >); - */ //TODO: use for all features the new Register which requires usage(...) #ifdef HAVE_RANDLM diff --git a/decoder/ff_fsa.h b/decoder/ff_fsa.h index 85d184ee..45837f2c 100755 --- a/decoder/ff_fsa.h +++ b/decoder/ff_fsa.h @@ -201,7 +201,7 @@ public: template <class Accum> inline void ScanAccum(SentenceMetadata const& smeta,const Hypergraph::Edge& edge, WordID w,void const* state,void *next_state,Accum *a) const { - Add(d().Scan1Meta(smeta,edge,smeta,edge,w,state,next_state),a); + Add(d().Scan1Meta(smeta,edge,w,state,next_state),a); } // bounce back and forth between two state vars starting at cs, returning end state location. if we required src=dest addr safe state updating, this concept wouldn't need to exist. @@ -233,7 +233,7 @@ public: } - + // note you'll still have to override ScanAccum // override this (and SCAN_PHRASE_ACCUM_OVERRIDE ) if you want e.g. maximum possible order ngram scores with markov_order < n-1. in the future SparseFeatureAccumulator will probably be the only option for type-erased FSA ffs. you will be adding to accum, not setting template <class Accum> inline void ScanPhraseAccum(SentenceMetadata const& smeta,const Hypergraph::Edge& edge, @@ -284,10 +284,14 @@ public: } template <class Accum> - inline void Add(Featval v,Accum *a) const { // for single-feat only + inline void Add(Featval v,Accum *a) const { // for single-feat only. but will work for different accums + a->Add(fid_,v); + } + inline void Add(Featval v,SingleFeatureAccumulator *a) const { a->Add(v); } + inline void set_feat(FeatureVector *features,Featval v) const { features->set_value(fid_,v); } @@ -343,15 +347,31 @@ public: o<<state(st); } int markov_order() const { return 1; } - Featval ScanT1(WordID w,St const&,St &) const { return 0; } - inline void ScanT(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,St const& prev_st,St &new_st,FeatureVector *features) const { - features->maybe_add(d().fid_,d().ScanT1(w,prev_st,new_st)); + + // override this + Featval ScanT1S(WordID w,St const& /* from */ ,St & /* to */) const { + return 0; + } + + // or this + Featval ScanT1(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,St const& from ,St & to) const { + return d().ScanT1S(w,from,to); } - inline void Scan(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,void const* st,void *next_state,FeatureVector *features) const { + + // or this (most general) + template <class Accum> + inline void ScanT(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,St const& prev_st,St &new_st,Accum *a) const { + Add(d().ScanT1(smeta,edge,w,prev_st,new_st),a); + } + + // note: you're on your own when it comes to Phrase overrides. see FsaFeatureFunctionBase. sorry. + + template <class Accum> + inline void ScanAccum(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,void const* st,void *next_state,Accum *a) const { Impl const& im=d(); - FSADBG(edge,"Scan "<<FD::Convert(im.fid_)<<" = "<<im.describe_features(*features)<<" "<<im.state(st)<<"->"<<TD::Convert(w)<<" "); - im.ScanT(smeta,edge,w,state(st),state(next_state),features); - FSADBG(edge,state(next_state)<<" = "<<im.describe_features(*features)); + FSADBG(edge,"Scan "<<FD::Convert(im.fid_)<<" = "<<a->describe(im)<<" "<<im.state(st)<<"->"<<TD::Convert(w)<<" "); + im.ScanT(smeta,edge,w,state(st),state(next_state),a); + FSADBG(edge,state(next_state)<<" = "<<a->describe(im)); FSADBGnl(edge); } diff --git a/decoder/ff_lm_fsa.h b/decoder/ff_lm_fsa.h index 3547d75b..c2c0972e 100755 --- a/decoder/ff_lm_fsa.h +++ b/decoder/ff_lm_fsa.h @@ -1,7 +1,7 @@ #ifndef FF_LM_FSA_H #define FF_LM_FSA_H -//FIXME: 3gram has differences in 4th decimal digit, compared to regular ff_lm. this is USUALLY a bug (there's way more actual precision in there). this was with #define LM_FSA_SHORTEN_CONTEXT 1 +//FIXME: 3gram has differences in 4th decimal digit, compared to regular ff_lm. this is USUALLY a bug (there's way more actual precision in there). this was with #define LM_FSA_SHORTEN_CONTEXT 1 and 0 (so it's not that) #define FSA_LM_DEBUG 0 @@ -84,7 +84,7 @@ struct LanguageModelFsa : public FsaFeatureFunctionBase<LanguageModelFsa> { WordID ctx[ngram_order_]; state_copy(ctx,old_st); ctx[ctxlen_]=TD::none; // make this part of state? wastes space but saves copies. - Featval p=pimpl_->WordProb(w,ctx); + Featval p=floored(pimpl_->WordProb(w,ctx)); // states are sri contexts so are in reverse order (most recent word is first, then 1-back comes next, etc.). WordID *nst=(WordID *)new_st; nst[0]=w; // new most recent word @@ -92,7 +92,7 @@ struct LanguageModelFsa : public FsaFeatureFunctionBase<LanguageModelFsa> { #if LM_FSA_SHORTEN_CONTEXT p+=pimpl_->ShortenContext(nst,ctxlen_); #endif - Add(floored(p),a); + Add(p,a); } } diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h index fb44dade..a129806d 100755 --- a/decoder/ff_sample_fsa.h +++ b/decoder/ff_sample_fsa.h @@ -132,10 +132,9 @@ struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> { */ // evil anti-google int & len out-param: - void ScanT(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,int prevlen,int &len,FeatureVector *features) const { + Featval ScanT1(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,int prevlen,int &len) const { len=wordlen(w); - if (len<prevlen) - features->add_value(fid_,1); + return (len<prevlen) ? 1 : 0; } }; |