diff options
author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-26 05:27:47 +0000 |
---|---|---|
committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-26 05:27:47 +0000 |
commit | 94b7112d6acf69194a4e431ee551ebfdc690cdd3 (patch) | |
tree | 0a8406d67055bb8c1c0529a6ad96656a55e0701c | |
parent | cf24bee42d28321d690f4651d74a6be9bfbd7a97 (diff) |
include contextBOW prob when shortening context
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@418 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r-- | decoder/ff_lm.cc | 17 | ||||
-rwxr-xr-x | vest/dist-vest.pl | 1 |
2 files changed, 17 insertions, 1 deletions
diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc index e9c172da..12f760bf 100644 --- a/decoder/ff_lm.cc +++ b/decoder/ff_lm.cc @@ -1,5 +1,8 @@ #define LM_FSA_SHORTEN_CONTEXT 1 // seems to work great - just not sure if it actually speeds anything up +// virtual LogP contextBOW(const VocabIndex *context, unsigned length); + /* backoff weight for truncating context */ +// does that need to be used? i think so. namespace { char const* usage_name="LanguageModel"; @@ -244,13 +247,18 @@ class LanguageModelImpl { ngram_.contextID((VocabIndex*)context,ret); return ret; } + virtual double ContextBOW(WordID const* context,int shortened_len) { + return ngram_.contextBOW((VocabIndex*)context,shortened_len); + } - void ShortenContext(WordID * context,int len) { + double ShortenContext(WordID * context,int len) { int slen=ContextSize(context,len); + double p=ContextBOW(context,slen); while (len>slen) { --len; context[len]=TD::none; } + return p; } /// NOT a negative logp, i.e. should be worse prob = more negative. that's what SRI wordProb returns, fortunately. @@ -469,6 +477,10 @@ struct ClientLMI : public LanguageModelImpl virtual int ContextSize(WordID const* const, int len) { return len; } + virtual double ContextBOW(WordID const* context,int shortened_len) { + return 0; + } + protected: LMClient client_; }; @@ -485,6 +497,9 @@ struct ReuseLMI : public LanguageModelImpl ng->contextID((VocabIndex*)context,ret); return ret; } + virtual double ContextBOW(WordID const* context,int shortened_len) { + return ng->contextBOW((VocabIndex*)context,shortened_len); + } protected: Ngram *ng; }; diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 2789b6fd..e445b4a0 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -76,6 +76,7 @@ if (GetOptions( "max-iterations=i" => \$max_iterations, "normalize=s" => \$normalize, "pmem=s" => \$pmem, + "cpbin!" => \$cpbin, "rand-directions=i" => \$rand_directions, "random_directions=i" => \$rand_directions, "no-primary!" => \$noprimary, |