From 94b7112d6acf69194a4e431ee551ebfdc690cdd3 Mon Sep 17 00:00:00 2001 From: graehl Date: Mon, 26 Jul 2010 05:27:47 +0000 Subject: include contextBOW prob when shortening context git-svn-id: https://ws10smt.googlecode.com/svn/trunk@418 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/ff_lm.cc | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'decoder/ff_lm.cc') diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc index e9c172da..12f760bf 100644 --- a/decoder/ff_lm.cc +++ b/decoder/ff_lm.cc @@ -1,5 +1,8 @@ #define LM_FSA_SHORTEN_CONTEXT 1 // seems to work great - just not sure if it actually speeds anything up +// virtual LogP contextBOW(const VocabIndex *context, unsigned length); + /* backoff weight for truncating context */ +// does that need to be used? i think so. namespace { char const* usage_name="LanguageModel"; @@ -244,13 +247,18 @@ class LanguageModelImpl { ngram_.contextID((VocabIndex*)context,ret); return ret; } + virtual double ContextBOW(WordID const* context,int shortened_len) { + return ngram_.contextBOW((VocabIndex*)context,shortened_len); + } - void ShortenContext(WordID * context,int len) { + double ShortenContext(WordID * context,int len) { int slen=ContextSize(context,len); + double p=ContextBOW(context,slen); while (len>slen) { --len; context[len]=TD::none; } + return p; } /// NOT a negative logp, i.e. should be worse prob = more negative. that's what SRI wordProb returns, fortunately. @@ -469,6 +477,10 @@ struct ClientLMI : public LanguageModelImpl virtual int ContextSize(WordID const* const, int len) { return len; } + virtual double ContextBOW(WordID const* context,int shortened_len) { + return 0; + } + protected: LMClient client_; }; @@ -485,6 +497,9 @@ struct ReuseLMI : public LanguageModelImpl ng->contextID((VocabIndex*)context,ret); return ret; } + virtual double ContextBOW(WordID const* context,int shortened_len) { + return ng->contextBOW((VocabIndex*)context,shortened_len); + } protected: Ngram *ng; }; -- cgit v1.2.3