diff options
Diffstat (limited to 'decoder/ff_lm.h')
-rw-r--r-- | decoder/ff_lm.h | 37 |
1 files changed, 35 insertions, 2 deletions
diff --git a/decoder/ff_lm.h b/decoder/ff_lm.h index 935e283c..e682481d 100644 --- a/decoder/ff_lm.h +++ b/decoder/ff_lm.h @@ -8,7 +8,38 @@ #include "ff.h" #include "config.h" -class LanguageModelImpl; +class LanguageModelInterface { + public: + double floor_; + LanguageModelInterface() : floor_(-100) { } + virtual ~LanguageModelInterface() { } + + // not clamped to floor. log10prob + virtual double WordProb(WordID word, WordID const* context) = 0; + inline double WordProbFloored(WordID word, WordID const* context) { + return clamp(WordProb(word,context)); + } + // may be shorter than actual null-terminated length. context must be null terminated. len is just to save effort for subclasses that don't support contextID + virtual int ContextSize(WordID const* context,int len) = 0; + // use this as additional logprob when shortening the context as above + virtual double ContextBOW(WordID const* context,int shortened_len) = 0; // unlikely that you'll ever need to floor a backoff cost. i'd say impossible. + + inline double ShortenContext(WordID * context,int len) { + int slen=ContextSize(context,len); + double p=ContextBOW(context,slen); + while (len>slen) { + --len; + context[len]=TD::none; + } + return p; + } + /// should be worse prob = more negative. that's what SRI wordProb returns: log10(prob) + inline double clamp(double logp) const { + return logp < floor_ ? floor_ : logp; + } +}; + +struct LanguageModelImpl; class LanguageModel : public FeatureFunction { public: @@ -29,7 +60,9 @@ class LanguageModel : public FeatureFunction { void* out_context) const; private: int fid_; // conceptually const; mutable only to simplify constructor - mutable LanguageModelImpl* pimpl_; + //LanguageModelImpl &imp() { return *(LanguageModelImpl*)pimpl_; } + LanguageModelImpl & imp() const { return *(LanguageModelImpl*)pimpl_; } + /* mutable */ LanguageModelInterface* pimpl_; }; #ifdef HAVE_RANDLM |