summaryrefslogtreecommitdiff
path: root/decoder/ff_lm.cc
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-26 05:27:47 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-26 05:27:47 +0000
commit44abbdc8ea557481db0b24097cdad9c5136a5a72 (patch)
tree5e158e7cc156b5c126ee7f35e1a5bab1779ddb66 /decoder/ff_lm.cc
parentec734c5fda0397f47d664b2118b348dd9d8f6070 (diff)
include contextBOW prob when shortening context
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@418 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/ff_lm.cc')
-rw-r--r--decoder/ff_lm.cc17
1 files changed, 16 insertions, 1 deletions
diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc
index e9c172da..12f760bf 100644
--- a/decoder/ff_lm.cc
+++ b/decoder/ff_lm.cc
@@ -1,5 +1,8 @@
#define LM_FSA_SHORTEN_CONTEXT 1
// seems to work great - just not sure if it actually speeds anything up
+// virtual LogP contextBOW(const VocabIndex *context, unsigned length);
+ /* backoff weight for truncating context */
+// does that need to be used? i think so.
namespace {
char const* usage_name="LanguageModel";
@@ -244,13 +247,18 @@ class LanguageModelImpl {
ngram_.contextID((VocabIndex*)context,ret);
return ret;
}
+ virtual double ContextBOW(WordID const* context,int shortened_len) {
+ return ngram_.contextBOW((VocabIndex*)context,shortened_len);
+ }
- void ShortenContext(WordID * context,int len) {
+ double ShortenContext(WordID * context,int len) {
int slen=ContextSize(context,len);
+ double p=ContextBOW(context,slen);
while (len>slen) {
--len;
context[len]=TD::none;
}
+ return p;
}
/// NOT a negative logp, i.e. should be worse prob = more negative. that's what SRI wordProb returns, fortunately.
@@ -469,6 +477,10 @@ struct ClientLMI : public LanguageModelImpl
virtual int ContextSize(WordID const* const, int len) {
return len;
}
+ virtual double ContextBOW(WordID const* context,int shortened_len) {
+ return 0;
+ }
+
protected:
LMClient client_;
};
@@ -485,6 +497,9 @@ struct ReuseLMI : public LanguageModelImpl
ng->contextID((VocabIndex*)context,ret);
return ret;
}
+ virtual double ContextBOW(WordID const* context,int shortened_len) {
+ return ng->contextBOW((VocabIndex*)context,shortened_len);
+ }
protected:
Ngram *ng;
};