diff options
author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-27 04:59:37 +0000 |
---|---|---|
committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-27 04:59:37 +0000 |
commit | cc7ee5e3f6ac08750b80e468b77e8ad732e48b77 (patch) | |
tree | 2bd8688735602402918b02747ed7557a1aed97d4 /decoder/ff_lm.cc | |
parent | d038d9d39793f140c97bf71163fa0556ad015014 (diff) |
fsa feature templated Accum interface, phrase interface allows exceeding markov order e.g. unigram state, 3gram lm. use Accum,set_value rather than clear,add_value. warning: 3gram fsa lm disagrees with bottom-up in 4th decimal place
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@431 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/ff_lm.cc')
-rw-r--r-- | decoder/ff_lm.cc | 62 |
1 files changed, 11 insertions, 51 deletions
diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc index 0f44f8d3..3d81a599 100644 --- a/decoder/ff_lm.cc +++ b/decoder/ff_lm.cc @@ -1,5 +1,5 @@ #define LM_FSA_SHORTEN_CONTEXT 1 -// seems to work great - just not sure if it actually speeds anything up +// seems to work great - just not sure if it actually speeds anything up // virtual LogP contextBOW(const VocabIndex *context, unsigned length); /* backoff weight for truncating context */ // does that need to be used? i think so. @@ -188,7 +188,7 @@ struct LMClient { char request_buffer[16000]; }; -class LanguageModelImpl { +class LanguageModelImpl : public LanguageModelInterface { void init(int order) { //all these used to be const members, but that has no performance implication, and now there's less duplication. order_=order; @@ -251,21 +251,6 @@ class LanguageModelImpl { return ngram_.contextBOW((VocabIndex*)context,shortened_len); } - double ShortenContext(WordID * context,int len) { - int slen=ContextSize(context,len); - double p=ContextBOW(context,slen); - while (len>slen) { - --len; - context[len]=TD::none; - } - return p; - } - - /// NOT a negative logp, i.e. should be worse prob = more negative. that's what SRI wordProb returns, fortunately. - inline double clamp(double logp) const { - return logp < floor_ ? floor_ : logp; - } - inline double LookupProbForBufferContents(int i) { // int k = i; cerr << "P("; while(buffer_[k] > 0) { std::cerr << TD::Convert(buffer_[k++]) << " "; } double p = WordProb(buffer_[i], &buffer_[i+1]); @@ -457,7 +442,6 @@ public: int order_; int state_size_; public: - double floor_; WordID kSTART; WordID kSTOP; WordID kUNKNOWN; @@ -606,9 +590,6 @@ void LanguageModelFsa::set_ngram_order(int i) { } } } -namespace { -WordID empty_context=TD::none; -} LanguageModelFsa::LanguageModelFsa(string const& param) { int lmorder; @@ -617,29 +598,8 @@ LanguageModelFsa::LanguageModelFsa(string const& param) { set_ngram_order(lmorder); } -void LanguageModelFsa::Scan(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,void const* old_st,void *new_st,FeatureVector *features) const { - //variable length array is in C99, msvc++, if it doesn't support it, #ifdef it or use a stackalloc call (forget the name) - Featval p; - if (ctxlen_) { - WordID ctx[ngram_order_]; - state_copy(ctx,old_st); - ctx[ctxlen_]=TD::none; // make this part of state? wastes space but saves copies. - p=pimpl_->WordProb(w,ctx); -// states are sri contexts so are in reverse order (most recent word is first, then 1-back comes next, etc.). - WordID *nst=(WordID *)new_st; - nst[0]=w; // new most recent word - to_state(nst+1,ctx,ctxlen_-1); // rotate old words right -#if LM_FSA_SHORTEN_CONTEXT - pimpl_->ShortenContext(nst,ctxlen_); -#endif - } else { - p=pimpl_->WordProb(w,&empty_context); - } - add_feat(features,(p<floor_)?floor_:p); -} - -void LanguageModelFsa::print_state(ostream &o,void *st) const { - WordID *wst=(WordID *)st; +void LanguageModelFsa::print_state(ostream &o,void const* st) const { + WordID const *wst=(WordID const*)st; o<<'['; for (int i=ctxlen_;i>0;) { --i; @@ -660,7 +620,7 @@ LanguageModel::~LanguageModel() { } string LanguageModel::DebugStateToString(const void* state) const{ - return pimpl_->DebugStateToString(state); + return imp().DebugStateToString(state); } void LanguageModel::TraversalFeaturesImpl(const SentenceMetadata& /* smeta */, @@ -669,13 +629,13 @@ void LanguageModel::TraversalFeaturesImpl(const SentenceMetadata& /* smeta */, SparseVector<double>* features, SparseVector<double>* estimated_features, void* state) const { - features->set_value(fid_, pimpl_->LookupWords(*edge.rule_, ant_states, state)); - estimated_features->set_value(fid_, pimpl_->EstimateProb(state)); + features->set_value(fid_, imp().LookupWords(*edge.rule_, ant_states, state)); + estimated_features->set_value(fid_, imp().EstimateProb(state)); } void LanguageModel::FinalTraversalFeatures(const void* ant_state, SparseVector<double>* features) const { - features->set_value(fid_, pimpl_->FinalTraversalCost(ant_state)); + features->set_value(fid_, imp().FinalTraversalCost(ant_state)); } #ifdef HAVE_RANDLM @@ -763,13 +723,13 @@ void LanguageModelRandLM::TraversalFeaturesImpl(const SentenceMetadata& smeta, SparseVector<double>* estimated_features, void* state) const { (void) smeta; - features->set_value(fid_, pimpl_->LookupWords(*edge.rule_, ant_states, state)); - estimated_features->set_value(fid_, pimpl_->EstimateProb(state)); + features->set_value(fid_, imp().LookupWords(*edge.rule_, ant_states, state)); + estimated_features->set_value(fid_, imp().EstimateProb(state)); } void LanguageModelRandLM::FinalTraversalFeatures(const void* ant_state, SparseVector<double>* features) const { - features->set_value(fid_, pimpl_->FinalTraversalCost(ant_state)); + features->set_value(fid_, imp().FinalTraversalCost(ant_state)); } #endif |