summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-26 05:27:47 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-26 05:27:47 +0000
commit94b7112d6acf69194a4e431ee551ebfdc690cdd3 (patch)
tree0a8406d67055bb8c1c0529a6ad96656a55e0701c
parentcf24bee42d28321d690f4651d74a6be9bfbd7a97 (diff)
include contextBOW prob when shortening context
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@418 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r--decoder/ff_lm.cc17
-rwxr-xr-xvest/dist-vest.pl1
2 files changed, 17 insertions, 1 deletions
diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc
index e9c172da..12f760bf 100644
--- a/decoder/ff_lm.cc
+++ b/decoder/ff_lm.cc
@@ -1,5 +1,8 @@
#define LM_FSA_SHORTEN_CONTEXT 1
// seems to work great - just not sure if it actually speeds anything up
+// virtual LogP contextBOW(const VocabIndex *context, unsigned length);
+ /* backoff weight for truncating context */
+// does that need to be used? i think so.
namespace {
char const* usage_name="LanguageModel";
@@ -244,13 +247,18 @@ class LanguageModelImpl {
ngram_.contextID((VocabIndex*)context,ret);
return ret;
}
+ virtual double ContextBOW(WordID const* context,int shortened_len) {
+ return ngram_.contextBOW((VocabIndex*)context,shortened_len);
+ }
- void ShortenContext(WordID * context,int len) {
+ double ShortenContext(WordID * context,int len) {
int slen=ContextSize(context,len);
+ double p=ContextBOW(context,slen);
while (len>slen) {
--len;
context[len]=TD::none;
}
+ return p;
}
/// NOT a negative logp, i.e. should be worse prob = more negative. that's what SRI wordProb returns, fortunately.
@@ -469,6 +477,10 @@ struct ClientLMI : public LanguageModelImpl
virtual int ContextSize(WordID const* const, int len) {
return len;
}
+ virtual double ContextBOW(WordID const* context,int shortened_len) {
+ return 0;
+ }
+
protected:
LMClient client_;
};
@@ -485,6 +497,9 @@ struct ReuseLMI : public LanguageModelImpl
ng->contextID((VocabIndex*)context,ret);
return ret;
}
+ virtual double ContextBOW(WordID const* context,int shortened_len) {
+ return ng->contextBOW((VocabIndex*)context,shortened_len);
+ }
protected:
Ngram *ng;
};
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index 2789b6fd..e445b4a0 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -76,6 +76,7 @@ if (GetOptions(
"max-iterations=i" => \$max_iterations,
"normalize=s" => \$normalize,
"pmem=s" => \$pmem,
+ "cpbin!" => \$cpbin,
"rand-directions=i" => \$rand_directions,
"random_directions=i" => \$rand_directions,
"no-primary!" => \$noprimary,