summaryrefslogtreecommitdiff
path: root/decoder/ff_lm.h
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-27 04:59:37 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-27 04:59:37 +0000
commit9f34384f610e512488df4bb0f08e962ad95d6d13 (patch)
tree6a91597be780d1460dc2f92e58452a56e745dd18 /decoder/ff_lm.h
parentcd24c0b69bbfe3218fc8ad50f0729f388b23cc2a (diff)
fsa feature templated Accum interface, phrase interface allows exceeding markov order e.g. unigram state, 3gram lm. use Accum,set_value rather than clear,add_value. warning: 3gram fsa lm disagrees with bottom-up in 4th decimal place
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@431 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/ff_lm.h')
-rw-r--r--decoder/ff_lm.h37
1 files changed, 35 insertions, 2 deletions
diff --git a/decoder/ff_lm.h b/decoder/ff_lm.h
index 935e283c..e682481d 100644
--- a/decoder/ff_lm.h
+++ b/decoder/ff_lm.h
@@ -8,7 +8,38 @@
#include "ff.h"
#include "config.h"
-class LanguageModelImpl;
+class LanguageModelInterface {
+ public:
+ double floor_;
+ LanguageModelInterface() : floor_(-100) { }
+ virtual ~LanguageModelInterface() { }
+
+ // not clamped to floor. log10prob
+ virtual double WordProb(WordID word, WordID const* context) = 0;
+ inline double WordProbFloored(WordID word, WordID const* context) {
+ return clamp(WordProb(word,context));
+ }
+ // may be shorter than actual null-terminated length. context must be null terminated. len is just to save effort for subclasses that don't support contextID
+ virtual int ContextSize(WordID const* context,int len) = 0;
+ // use this as additional logprob when shortening the context as above
+ virtual double ContextBOW(WordID const* context,int shortened_len) = 0; // unlikely that you'll ever need to floor a backoff cost. i'd say impossible.
+
+ inline double ShortenContext(WordID * context,int len) {
+ int slen=ContextSize(context,len);
+ double p=ContextBOW(context,slen);
+ while (len>slen) {
+ --len;
+ context[len]=TD::none;
+ }
+ return p;
+ }
+ /// should be worse prob = more negative. that's what SRI wordProb returns: log10(prob)
+ inline double clamp(double logp) const {
+ return logp < floor_ ? floor_ : logp;
+ }
+};
+
+struct LanguageModelImpl;
class LanguageModel : public FeatureFunction {
public:
@@ -29,7 +60,9 @@ class LanguageModel : public FeatureFunction {
void* out_context) const;
private:
int fid_; // conceptually const; mutable only to simplify constructor
- mutable LanguageModelImpl* pimpl_;
+ //LanguageModelImpl &imp() { return *(LanguageModelImpl*)pimpl_; }
+ LanguageModelImpl & imp() const { return *(LanguageModelImpl*)pimpl_; }
+ /* mutable */ LanguageModelInterface* pimpl_;
};
#ifdef HAVE_RANDLM