disable fsa lm phrase due to slight (.05%) diff in scores - solve it later

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@434 ec762483-ff6d-05da-a07a-a48fb63a330f
author: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-27 05:12:40 +0000
committer: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-27 05:12:40 +0000
commit: 4c5df460c9da5c935438850ef7993463a9113286 (patch)
tree: dc9c94ae0f87b7ce4337c2a30b9c48f363e97d48
parent: 684db46e977bca456e02e677d22ba5e4a33ae6ce (diff)
1 files changed, 28 insertions, 25 deletions
diff --git a/decoder/ff_lm_fsa.h b/decoder/ff_lm_fsa.h
index c1d875eb..4b0682d1 100755
--- a/decoder/ff_lm_fsa.h
+++ b/decoder/ff_lm_fsa.h
@@ -1,8 +1,9 @@
 #ifndef FF_LM_FSA_H
 #define FF_LM_FSA_H
 
-//FIXME: 3gram has differences in 4th decimal digit, compared to regular ff_lm.  this is USUALLY a bug (there's way more actual precision in there).  this was with #define LM_FSA_SHORTEN_CONTEXT 1 and 0 (so it's not that)
+//FIXME: when FSA_LM_PHRASE 1, 3gram has differences in 4th decimal digit, compared to regular ff_lm.  this is USUALLY a bug (there's way more actual precision in there).  this was with #define LM_FSA_SHORTEN_CONTEXT 1 and 0 (so it's not that).  also, LM_FSA_SHORTEN_CONTEXT gives identical scores with FSA_LM_PHRASE 0
 
+#define FSA_LM_PHRASE 0
 
 #define FSA_LM_DEBUG 0
 #if FSA_LM_DEBUG
@@ -41,6 +42,31 @@ struct LanguageModelFsa : public FsaFeatureFunctionBase<LanguageModelFsa> {
   }
 
   template <class Accum>
+  void ScanAccum(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,void const* old_st,void *new_st,Accum *a) const {
+    if (!ctxlen_) {
+      Add(floored(pimpl_->WordProb(w,&empty_context)),a);
+      return;
+    }
+    //variable length array is in C99, msvc++, if it doesn't support it, #ifdef it or use a stackalloc call (forget the name)
+    if (ctxlen_) {
+      WordID ctx[ngram_order_];
+      state_copy(ctx,old_st);
+      ctx[ctxlen_]=TD::none; // make this part of state?  wastes space but saves copies.
+      Featval p=floored(pimpl_->WordProb(w,ctx));
+// states are sri contexts so are in reverse order (most recent word is first, then 1-back comes next, etc.).
+      WordID *nst=(WordID *)new_st;
+      nst[0]=w; // new most recent word
+      to_state(nst+1,ctx,ctxlen_-1); // rotate old words right
+#if LM_FSA_SHORTEN_CONTEXT
+      p+=pimpl_->ShortenContext(nst,ctxlen_);
+#endif
+      Add(p,a);
+    }
+  }
+
+#if FSA_LM_PHRASE
+  //FIXME: there is a bug in here somewhere, or else the 3gram LM we use gives different scores for phrases (impossible? BOW nonzero when shortening context past what LM has?)
+  template <class Accum>
   void ScanPhraseAccum(SentenceMetadata const& /* smeta */,const Hypergraph::Edge&edge,WordID const* begin,WordID const* end,void const* old_st,void *new_st,Accum *a) const {
     if (begin==end) return; // otherwise w/ shortening it's possible to end up with no words at all.
     /* // this is forcing unigram prob always.  we will instead build the phrase
@@ -75,29 +101,8 @@ struct LanguageModelFsa : public FsaFeatureFunctionBase<LanguageModelFsa> {
     Add(p,a);
   }
 
-  template <class Accum>
-  void ScanAccum(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,void const* old_st,void *new_st,Accum *a) const {
-    if (!ctxlen_) {
-      Add(floored(pimpl_->WordProb(w,&empty_context)),a);
-      return;
-    }
-    //variable length array is in C99, msvc++, if it doesn't support it, #ifdef it or use a stackalloc call (forget the name)
-    if (ctxlen_) {
-      WordID ctx[ngram_order_];
-      state_copy(ctx,old_st);
-      ctx[ctxlen_]=TD::none; // make this part of state?  wastes space but saves copies.
-      Featval p=floored(pimpl_->WordProb(w,ctx));
-// states are sri contexts so are in reverse order (most recent word is first, then 1-back comes next, etc.).
-      WordID *nst=(WordID *)new_st;
-      nst[0]=w; // new most recent word
-      to_state(nst+1,ctx,ctxlen_-1); // rotate old words right
-#if LM_FSA_SHORTEN_CONTEXT
-      p+=pimpl_->ShortenContext(nst,ctxlen_);
+  SCAN_PHRASE_ACCUM_OVERRIDE
 #endif
-      Add(p,a);
-    }
-  }
-
   // impl details:
   void set_ngram_order(int i); // if you build ff_from_fsa first, then increase this, you will get memory overflows.  otherwise, it's the same as a "-o i" argument to constructor
   double floor_; // log10prob minimum used (e.g. unk words)
@@ -106,8 +111,6 @@ private:
   int ngram_order_;
   int ctxlen_; // 1 less than above
   LanguageModelInterface *pimpl_;
-public:
-  SCAN_PHRASE_ACCUM_OVERRIDE
 
 };
author	graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-27 05:12:40 +0000
committer	graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-27 05:12:40 +0000
commit	4c5df460c9da5c935438850ef7993463a9113286 (patch)
tree	dc9c94ae0f87b7ce4337c2a30b9c48f363e97d48
parent	684db46e977bca456e02e677d22ba5e4a33ae6ce (diff)