From c85ba4a9147d180277c49752d6fbefbfe9c2f392 Mon Sep 17 00:00:00 2001
From: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>
Date: Mon, 26 Jul 2010 17:25:57 +0000
Subject: comment cleanup

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@421 ec762483-ff6d-05da-a07a-a48fb63a330f
---
 decoder/ff.h          |  4 ++++
 decoder/ff_from_fsa.h | 40 +++++++++++++++++-----------------------
 decoder/ff_fsa.h      |  8 ++++++--
 decoder/ff_lm.cc      |  3 +--
 decoder/hg.h          |  3 ---
 5 files changed, 28 insertions(+), 30 deletions(-)
diff --git a/decoder/ff.h b/decoder/ff.h
index 5c1f214f..08715766 100644
--- a/decoder/ff.h
+++ b/decoder/ff.h
@@ -2,6 +2,7 @@
 #define _FF_H_
 
 #include <vector>
+#include <cstring>
 #include "fdict.h"
 #include "hg.h"
 
@@ -29,6 +30,7 @@ class FeatureFunction {
   static std::string usage_helper(std::string const& name,std::string const& params,std::string const& details,bool show_params,bool show_details);
   static Features single_feature(int feat);
 public:
+
   // stateless feature that doesn't depend on source span: override and return true.  then your feature can be precomputed over rules.
   virtual bool rule_feature() const { return false; }
 
@@ -81,6 +83,8 @@ public:
   // of the particular FeatureFunction class.  There is one exception:
   // equality of the contents (i.e., memcmp) is required to determine whether
   // two states can be combined.
+
+  // by Log, I mean that the edge is non-const only so you can log to it with INFO_EDGE(edge,msg<<"etc.").  most features don't use this so implement the below.  it has a different name to allow a default implementation without name hiding when inheriting + overriding just 1.
   virtual void TraversalFeaturesLog(const SentenceMetadata& smeta,
                                     Hypergraph::Edge& edge, // this is writable only so you can use log()
                                      const std::vector<const void*>& ant_contexts,
diff --git a/decoder/ff_from_fsa.h b/decoder/ff_from_fsa.h
index 237e5d0d..f50e0fdc 100755
--- a/decoder/ff_from_fsa.h
+++ b/decoder/ff_from_fsa.h
@@ -40,8 +40,7 @@ public:
 
   Features features() const { return ff.features(); }
 
-  //TODO: add source span to Fsa FF interface, pass along
-  //TODO: read/debug VERY CAREFULLY
+  // Log because it
   void TraversalFeaturesLog(const SentenceMetadata& smeta,
                              Hypergraph::Edge& edge,
                              const std::vector<const void*>& ant_contexts,
@@ -64,25 +63,27 @@ public:
       FSAFFDBGnl(edge);
       return;
     }
-
+//why do we compute heuristic in so many places?  well, because that's how we know what state we should score words in once we're full on our left context (because of markov order bound, we know the score will be the same no matter what came before that left context)
     SP h_start=ff.heuristic_start_state();
+    // these left_* refer to our output (out_state):
     W left_begin=(W)out_state;
     W left_out=left_begin; // [left,fsa_state) = left ctx words.  if left words aren't full, then null wordid
     WP left_full=left_end_full(out_state);
-    FsaScanner<Impl> fsa(ff,smeta,edge);
+    FsaScanner<Impl> fsa(ff,smeta,edge); // this holds our current state and eventuallybecomes our right state if we saw enough words
     TRule const& rule=*edge.rule_;
     Sentence const& e = rule.e();
     for (int j = 0; j < e.size(); ++j) { // items in target side of rule
       if (e[j] < 1) { // variable
-        SP a = ant_contexts[-e[j]];
+        SP a = ant_contexts[-e[j]]; // variables a* are referring to this child derivation state.
         FSAFFDBG(edge,' '<<describe_state(a));
         WP al=(WP)a;
         WP ale=left_end(a);
         // scan(al,le) these - the same as below else.  macro for now; pull into closure object later?
-        int nw=ale-al; // this many new words
-        if (left_out+nw<left_full) { // nothing to score after adding
-          wordcpy(left_out,al,nw);
-          left_out+=nw;
+        int anw=ale-al;
+// anw left words in child.  full if == M.  we will use them to fill our left words, and then score the rest fully, knowing what state we're in based on h_state -> our left words -> any number of interior words which are scored then hidden
+        if (left_out+anw<left_full) { // nothing to score after adding
+          wordcpy(left_out,al,anw);
+          left_out+=anw;
         } else if (left_out<left_full) { // something to score AND newly full left context to fill
           int ntofill=left_full-left_out;
           assert(ntofill==M-(left_out-left_begin));
@@ -91,14 +92,14 @@ public:
           // heuristic known now
           fsa.reset(h_start);
           fsa.scan(left_begin,left_full,estimated_features); // save heuristic (happens once only)
-          fsa.scan(al+ntofill,ale,features);
+          fsa.scan(al+ntofill,ale,features); // because of markov order, fully filled left words scored starting at h_start put us in the right state to score the extra words (which are forgotten)
           al+=ntofill; // we used up the first ntofill words of al to end up in some known state via exactly M words total (M-ntofill were there beforehand).  now we can scan the remaining al words of this child
         } else { // more to score / state to update (left already full)
           fsa.scan(al,ale,features);
         }
-        if (nw==M) // child had full state already
+        if (anw==M) // child had full state already
           fsa.reset(fsa_state(a));
-        assert(nw<=M);
+        assert(anw<=M);
       } else { // single word
         WordID ew=e[j];
         FSAFFDBG(edge,' '<<TD::Convert(ew));
@@ -114,13 +115,14 @@ public:
       }
     }
 
-    if (left_out<left_full) { // finally: partial heuristic foru nfilled items
+    void *out_fsa_state=fsa_state(out_state);
+    if (left_out<left_full) { // finally: partial heuristic for unfilled items
       fsa.reset(h_start);
       fsa.scan(left_begin,left_out,estimated_features);
-      clear_fsa_state(out_state); // 0 bytes so we compare / hash correctly. don't know state yet
       do { *left_out++=TD::none; } while(left_out<left_full); // none-terminate so left_end(out_state) will know how many words
+      ff.state_zero(out_fsa_state); // so we compare / hash correctly. don't know state yet because left context isn't full
     } else // or else store final right-state.  heuristic was already assigned
-      fstatecpy(out_state,fsa.cs);
+      ff.state_copy(out_fsa_state,fsa.cs);
     FSAFFDBG(edge," = " << describe_state(out_state)<<" "<<name<<"="<<ff.describe_features(*features)<<" h="<<ff.describe_features(*estimated_features)<<")");
     FSAFFDBGnl(edge);
   }
@@ -229,14 +231,6 @@ private:
   inline void *fsa_state(void * ant) const {
     return ((char *)ant+state_offset);
   }
-
-  void clear_fsa_state(void *ant) const { // when state is unknown
-    std::memset(fsa_state(ant),0,ssz);
-  }
-
-  inline void fstatecpy(void *ant,void const* src) const {
-    std::memcpy(fsa_state(ant),src,ssz);
-  }
 };
 
 
diff --git a/decoder/ff_fsa.h b/decoder/ff_fsa.h
index 4575b648..459d80ba 100755
--- a/decoder/ff_fsa.h
+++ b/decoder/ff_fsa.h
@@ -8,6 +8,7 @@
 
   state is some fixed width byte array.  could actually be a void *, WordID sequence, whatever.
 
+  TODO: fsa ff scores phrases not just words
   TODO: fsa feature aggregator that presents itself as a single fsa; benefit: when wrapped in ff_from_fsa, only one set of left words is stored.  downside: compared to separate ff, the inside portion of lower-order models is incorporated later.  however, the full heuristic is already available and exact for those words.  so don't sweat it.
 
   TODO: state (+ possibly span-specific) custom heuristic, e.g. in "longer than previous word" model, you can expect a higher outside if your state is a word of 2 letters.  this is on top of the nice heuristic for the unscored words, of course.  in ngrams, the avg prob will be about the same, but if the words possible for a source span are summarized, maybe it's possible to predict.  probably not worth the effort.
@@ -118,9 +119,12 @@ protected:
   }
 
 public:
-  void state_cpy(void *to,void const*from) const {
+  void state_copy(void *to,void const*from) const {
     std::memcpy(to,from,state_bytes_);
   }
+  void state_zero(void *st) const { // you should call this if you don't know the state yet and want it to be hashed/compared properly
+    std::memset(st,0,state_bytes_);
+  }
 
   // can override to different return type, e.g. just return feats:
   Featval describe_features(FeatureVector const& feats) const {
@@ -325,7 +329,7 @@ struct FsaScanner {
   Bytes states; // first is at begin, second is at (char*)begin+stride
   void *st0; // states
   void *st1; // states+stride
-  void *cs;
+  void *cs; // initially st0, alternates between st0 and st1
   inline void *nexts() const {
     return (cs==st0)?st1:st0;
   }
diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc
index 12f760bf..0f44f8d3 100644
--- a/decoder/ff_lm.cc
+++ b/decoder/ff_lm.cc
@@ -617,13 +617,12 @@ LanguageModelFsa::LanguageModelFsa(string const& param) {
   set_ngram_order(lmorder);
 }
 
-//TODO: use sri equivalent states (expose in lm impl?)
 void LanguageModelFsa::Scan(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,void const* old_st,void *new_st,FeatureVector *features) const {
   //variable length array is in C99, msvc++, if it doesn't support it, #ifdef it or use a stackalloc call (forget the name)
   Featval p;
   if (ctxlen_) {
     WordID ctx[ngram_order_];
-    state_cpy(ctx,old_st);
+    state_copy(ctx,old_st);
     ctx[ctxlen_]=TD::none; // make this part of state?  wastes space but saves copies.
     p=pimpl_->WordProb(w,ctx);
 // states are sri contexts so are in reverse order (most recent word is first, then 1-back comes next, etc.).
diff --git a/decoder/hg.h b/decoder/hg.h
index ea0e8aa1..b0785d33 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -1,9 +1,6 @@
 #ifndef _HG_H_
 #define _HG_H_
 
-
-//FIXME: is the edge given to ffs the coarse (previous forest) edge?  if so, then INFO_EDGE is effectively not working.  supposed to have logging associated with each edge and see how it fits together in kbest afterwards.
-
 // define USE_INFO_EDGE 1 if you want lots of debug info shown with --show_derivations - otherwise it adds quite a bit of overhead if ffs have their logging enabled (e.g. ff_from_fsa)
 #define USE_INFO_EDGE 0
 #if USE_INFO_EDGE
-- 
cgit v1.2.3