safe hg pruning without needing additional inside reachability pass (max margin tightness is less at bottom of derivation tree)

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@181 ec762483-ff6d-05da-a07a-a48fb63a330f
author: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-07 21:26:51 +0000
committer: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-07 21:26:51 +0000
commit: 6b19aa3fa80b6ce0c6b9e6e26ca4a8fcfc41c4fb (patch)
tree: 3e3b5048ae2f850f52fe7123e4032a7b5b928c6f /decoder/ff_lm.cc
parent: 40bc789dae572c3aa73171c3083326963fe41ffc (diff)
1 files changed, 3 insertions, 1 deletions
diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc
index 03dc2054..e6f7912e 100644
--- a/decoder/ff_lm.cc
+++ b/decoder/ff_lm.cc
@@ -1,3 +1,5 @@
+//TODO: backoff wordclasses for named entity xltns, esp. numbers.  e.g. digits -> @.  idealy rule features would specify replacement lm tokens/classes
+
 //TODO: extra int in state to hold "GAP" token is not needed.  if there are less than (N-1) words, then null terminate the e.g. left words.  however, this would mean treating gapless items differently.  not worth the potential bugs right now.
 
 //TODO: allow features to reorder by heuristic*weight the rules' terminal phrases (or of hyperedges').  if first pass has pruning, then compute over whole ruleset as part of heuristic
@@ -311,7 +313,7 @@ class LanguageModelImpl {
     double sum=0;
     for (;rend>rbegin;--rend) {
       sum+=clamp(WordProb(rend[-1],rend));
-      UNIDBG(","<<TD::Convert(rend[-1]));
+      UNIDBG(" "<<TD::Convert(rend[-1]));
     }
     UNIDBG(")="<<sum<<endl);
     return sum;
author	graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-07 21:26:51 +0000
committer	graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-07 21:26:51 +0000
commit	6b19aa3fa80b6ce0c6b9e6e26ca4a8fcfc41c4fb (patch)
tree	3e3b5048ae2f850f52fe7123e4032a7b5b928c6f /decoder/ff_lm.cc
parent	40bc789dae572c3aa73171c3083326963fe41ffc (diff)