From 9f34384f610e512488df4bb0f08e962ad95d6d13 Mon Sep 17 00:00:00 2001
From: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>
Date: Tue, 27 Jul 2010 04:59:37 +0000
Subject: fsa feature templated Accum interface, phrase interface allows
 exceeding markov order e.g. unigram state, 3gram lm.  use Accum,set_value
 rather than clear,add_value.  warning: 3gram fsa lm disagrees with bottom-up
 in 4th decimal place

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@431 ec762483-ff6d-05da-a07a-a48fb63a330f
---
 decoder/cdec.cc         |   1 -
 decoder/cdec_ff.cc      |   6 +-
 decoder/feature_accum.h | 129 ++++++++++++++++++++++
 decoder/ff.h            |   3 +-
 decoder/ff_factory.cc   |   2 +-
 decoder/ff_from_fsa.h   | 100 +++++++++--------
 decoder/ff_fsa.h        | 286 ++++++++++++++++++++++++++----------------------
 decoder/ff_lm.cc        |  62 ++---------
 decoder/ff_lm.h         |  37 ++++++-
 decoder/ff_lm_fsa.h     |  95 +++++++++++++++-
 decoder/ff_sample_fsa.h |   2 +
 decoder/sentences.h     |   5 +-
 decoder/sparse_vector.h |  16 ++-
 decoder/tdict.cc        |  11 ++
 decoder/tdict.h         |   1 +
 decoder/value_array.h   |  53 +++++++++
 16 files changed, 571 insertions(+), 238 deletions(-)
 create mode 100755 decoder/feature_accum.h

(limited to 'decoder')
diff --git a/decoder/cdec.cc b/decoder/cdec.cc
index bc1c348d..76551cfa 100644
--- a/decoder/cdec.cc
+++ b/decoder/cdec.cc
@@ -190,7 +190,6 @@ void InitCommandLine(int argc, char** argv, OracleBleu &ob, po::variables_map* c
     exit(1);
   }
 
-
   if (conf.count("usage")) {
     cout<<global_ff_registry->usage(str("usage",conf),true,true)<<endl;
     exit(0);
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index 037cd92e..c8cd8a66 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -8,16 +8,20 @@
 #include "ff_factory.h"
 #include "ff_ruleshape.h"
 #include "ff_bleu.h"
-#include "ff_sample_fsa.h"
+//#include "ff_sample_fsa.h"
 #include "ff_lm_fsa.h"
 boost::shared_ptr<FFRegistry> global_ff_registry;
 
 void register_feature_functions() {
   global_ff_registry->Register(new FFFactory<LanguageModel>);
   global_ff_registry->Register(new FFFactory<FeatureFunctionFromFsa<LanguageModelFsa> >); // same as LM but using fsa wrapper
+
+  /* // sample_fsa:
   global_ff_registry->Register(new FFFactory<WordPenaltyFromFsa>); // same as WordPenalty, but implemented using ff_fsa
   global_ff_registry->Register(new FFFactory<FeatureFunctionFromFsa<LongerThanPrev> >);
   global_ff_registry->Register(new FFFactory<FeatureFunctionFromFsa<ShorterThanPrev> >);
+  */
+
   //TODO: use for all features the new Register which requires usage(...)
 #ifdef HAVE_RANDLM
   global_ff_registry->Register("RandLM", new FFFactory<LanguageModelRandLM>);
diff --git a/decoder/feature_accum.h b/decoder/feature_accum.h
new file mode 100755
index 00000000..851b29db
--- /dev/null
+++ b/decoder/feature_accum.h
@@ -0,0 +1,129 @@
+#ifndef FEATURE_ACCUM_H
+#define FEATURE_ACCUM_H
+
+#include "ff.h"
+#include "sparse_vector.h"
+#include "value_array.h"
+
+struct SparseFeatureAccumulator : public FeatureVector {
+  typedef FeatureVector State;
+  SparseFeatureAccumulator() {  }
+  template <class FF>
+  FeatureVector const& describe(FF const& ) { return *this; }
+  void Store(FeatureVector *fv) const {
+    fv->set_from(*this);
+  }
+  template <class FF>
+  void Store(FF const& /* ff */,FeatureVector *fv) const {
+    fv->set_from(*this);
+  }
+  template <class FF>
+  void Add(FF const& /* ff */,FeatureVector const& fv) {
+    (*this)+=fv;
+  }
+  void Add(FeatureVector const& fv) {
+    (*this)+=fv;
+  }
+  /*
+  SparseFeatureAccumulator(FeatureVector const& fv) : State(fv) {}
+  FeatureAccumulator(Features const& fids) {}
+  FeatureAccumulator(Features const& fids,FeatureVector const& fv) : State(fv) {}
+  void Add(Features const& fids,FeatureVector const& fv) {
+    *this += fv;
+  }
+  */
+  void Add(int i,Featval v) {
+    (*this)[i]+=v;
+  }
+  void Add(Features const& fids,int i,Featval v) {
+    (*this)[i]+=v;
+  }
+};
+
+struct SingleFeatureAccumulator {
+  typedef Featval State;
+  typedef SingleFeatureAccumulator Self;
+  State v;
+  /*
+  void operator +=(State const& o) {
+    v+=o;
+  }
+  */
+  void operator +=(Self const& s) {
+    v+=s.v;
+  }
+  SingleFeatureAccumulator() : v() {}
+  template <class FF>
+  State const& describe(FF const& ) const { return v; }
+
+  template <class FF>
+  void Store(FF const& ff,FeatureVector *fv) const {
+    fv->set_value(ff.fid_,v);
+  }
+  void Store(Features const& fids,FeatureVector *fv) const {
+    assert(fids.size()==1);
+    fv->set_value(fids[0],v);
+  }
+  /*
+  SingleFeatureAccumulator(Features const& fids) { assert(fids.size()==1); }
+  SingleFeatureAccumulator(Features const& fids,FeatureVector const& fv)
+  {
+    assert(fids.size()==1);
+    v=fv.get_singleton();
+  }
+  */
+
+  template <class FF>
+  void Add(FF const& ff,FeatureVector const& fv) {
+    v+=fv.get(ff.fid_);
+  }
+  void Add(FeatureVector const& fv) {
+    v+=fv.get_singleton();
+  }
+
+  void Add(Features const& fids,FeatureVector const& fv) {
+    v += fv.get(fids[0]);
+  }
+  void Add(Featval dv) {
+    v+=dv;
+  }
+  void Add(int,Featval dv) {
+    v+=dv;
+  }
+  void Add(FeatureVector const& fids,int i,Featval dv) {
+    assert(fids.size()==1 && i==0);
+    v+=dv;
+  }
+};
+
+
+#if 0
+// omitting this so we can default construct an accum.  might be worth resurrecting in the future
+struct ArrayFeatureAccumulator : public ValueArray<Featval> {
+  typedef ValueArray<Featval> State;
+  template <class Fsa>
+  ArrayFeatureAccumulator(Fsa const& fsa) : State(fsa.features_.size()) { }
+  ArrayFeatureAccumulator(Features const& fids) : State(fids.size()) {  }
+  ArrayFeatureAccumulator(Features const& fids) : State(fids.size()) {  }
+  ArrayFeatureAccumulator(Features const& fids,FeatureVector const& fv) : State(fids.size()) {
+    for (int i=0,e=i<fids.size();i<e;++i)
+      (*this)[i]=fv.get(i);
+  }
+  State const& describe(Features const& fids) const { return *this; }
+  void Store(Features const& fids,FeatureVector *fv) const {
+    assert(fids.size()==size());
+    for (int i=0,e=i<fids.size();i<e;++i)
+      fv->set_value(fids[i],(*this)[i]);
+  }
+  void Add(Features const& fids,FeatureVector const& fv) {
+    for (int i=0,e=i<fids.size();i<e;++i)
+      (*this)[i]+=fv.get(i);
+  }
+  void Add(FeatureVector const& fids,int i,Featval v) {
+    (*this)[i]+=v;
+  }
+};
+#endif
+
+
+#endif
diff --git a/decoder/ff.h b/decoder/ff.h
index 08715766..f563fb93 100644
--- a/decoder/ff.h
+++ b/decoder/ff.h
@@ -18,7 +18,8 @@ typedef std::vector<WordID> Features; // set of features ids
 class FeatureFunction {
  public:
   std::string name; // set by FF factory using usage()
-  bool debug; // also set by FF factory checking param for immediate initial "debug"
+  bool debug_; // also set by FF factory checking param for immediate initial "debug"
+  bool debug() const { return debug_; }
   FeatureFunction() : state_size_() {}
   explicit FeatureFunction(int state_size) : state_size_(state_size) {}
   virtual ~FeatureFunction();
diff --git a/decoder/ff_factory.cc b/decoder/ff_factory.cc
index aec82d38..88991fbf 100644
--- a/decoder/ff_factory.cc
+++ b/decoder/ff_factory.cc
@@ -43,7 +43,7 @@ shared_ptr<FeatureFunction> FFRegistry::Create(const string& ffname, const strin
     }
     res = it->second->Create(p);
     res->name=ffname;
-    res->debug=debug;
+    res->debug_=debug;
   }
   return res;
 }
diff --git a/decoder/ff_from_fsa.h b/decoder/ff_from_fsa.h
index f50e0fdc..7fa6be67 100755
--- a/decoder/ff_from_fsa.h
+++ b/decoder/ff_from_fsa.h
@@ -5,8 +5,8 @@
 
 #define FSA_FF_DEBUG 0
 #if FSA_FF_DEBUG
-# define FSAFFDBG(e,x) FSADBGif(debug,e,x)
-# define FSAFFDBGnl(e) FSADBGif_nl(debug,e)
+# define FSAFFDBG(e,x) FSADBGif(debug(),e,x)
+# define FSAFFDBGnl(e) FSADBGif_nl(debug(),e)
 #else
 # define FSAFFDBG(e,x)
 # define FSAFFDBGnl(e)
@@ -30,7 +30,7 @@ class FeatureFunctionFromFsa : public FeatureFunction {
   typedef WordID const* WP;
 public:
   FeatureFunctionFromFsa(std::string const& param) : ff(param) {
-    debug=true; // because factory won't set until after we construct.
+    debug_=true; // because factory won't set until after we construct.
     Init();
   }
 
@@ -48,18 +48,27 @@ public:
                              FeatureVector* estimated_features,
                              void* out_state) const
   {
-    ff.init_features(features); // estimated_features is fresh
+    TRule const& rule=*edge.rule_;
+    Sentence const& e = rule.e();
+    typename Impl::Accum accum,h_accum;
     if (!ssz) {
-      TRule const& rule=*edge.rule_;
-      Sentence const& e = rule.e();
-      for (int j = 0; j < e.size(); ++j) { // items in target side of rule
-        if (e[j] < 1) { // variable
-        } else {
+      Sentence phrase;
+      phrase.reserve(e.size());
+      for (int j=0,je=e.size();;++j) { // items in target side of rule
+        if (je==j || e[j]<1) { // end or variable
+          if (phrase.size()) {
+            FSAFFDBG(edge," ["<<TD::GetString(phrase)<<']');
+            ff.ScanPhraseAccum(smeta,edge,begin(phrase),end(phrase),0,0,&accum);
+          }
+          if (je==j)
+            break;
+          phrase.clear();
+        } else { // word
           WordID ew=e[j];
-          FSAFFDBG(edge,' '<<TD::Convert(ew));
-          ff.Scan(smeta,edge,ew,0,0,features);
+          phrase.push_back(ew);
         }
       }
+      accum.Store(ff,features);
       FSAFFDBGnl(edge);
       return;
     }
@@ -70,8 +79,6 @@ public:
     W left_out=left_begin; // [left,fsa_state) = left ctx words.  if left words aren't full, then null wordid
     WP left_full=left_end_full(out_state);
     FsaScanner<Impl> fsa(ff,smeta,edge); // this holds our current state and eventuallybecomes our right state if we saw enough words
-    TRule const& rule=*edge.rule_;
-    Sentence const& e = rule.e();
     for (int j = 0; j < e.size(); ++j) { // items in target side of rule
       if (e[j] < 1) { // variable
         SP a = ant_contexts[-e[j]]; // variables a* are referring to this child derivation state.
@@ -91,11 +98,11 @@ public:
           left_out=(W)left_full;
           // heuristic known now
           fsa.reset(h_start);
-          fsa.scan(left_begin,left_full,estimated_features); // save heuristic (happens once only)
-          fsa.scan(al+ntofill,ale,features); // because of markov order, fully filled left words scored starting at h_start put us in the right state to score the extra words (which are forgotten)
+          fsa.scan(left_begin,left_full,&h_accum); // save heuristic (happens once only)
+          fsa.scan(al+ntofill,ale,&accum); // because of markov order, fully filled left words scored starting at h_start put us in the right state to score the extra words (which are forgotten)
           al+=ntofill; // we used up the first ntofill words of al to end up in some known state via exactly M words total (M-ntofill were there beforehand).  now we can scan the remaining al words of this child
         } else { // more to score / state to update (left already full)
-          fsa.scan(al,ale,features);
+          fsa.scan(al,ale,&accum);
         }
         if (anw==M) // child had full state already
           fsa.reset(fsa_state(a));
@@ -108,22 +115,24 @@ public:
           *left_out++=ew;
           if (left_out==left_full) { // handle heuristic, once only, establish state
             fsa.reset(h_start);
-            fsa.scan(left_begin,left_full,estimated_features); // save heuristic (happens only once)
+            fsa.scan(left_begin,left_full,&h_accum); // save heuristic (happens only once)
           }
         } else
-          fsa.scan(ew,features);
+          fsa.scan(ew,&accum);
       }
     }
 
     void *out_fsa_state=fsa_state(out_state);
     if (left_out<left_full) { // finally: partial heuristic for unfilled items
-      fsa.reset(h_start);
-      fsa.scan(left_begin,left_out,estimated_features);
+//      fsa.reset(h_start);      fsa.scan(left_begin,left_out,&h_accum);
+      ff.ScanPhraseAccumOnly(smeta,edge,left_begin,left_out,h_start,&h_accum);
       do { *left_out++=TD::none; } while(left_out<left_full); // none-terminate so left_end(out_state) will know how many words
       ff.state_zero(out_fsa_state); // so we compare / hash correctly. don't know state yet because left context isn't full
     } else // or else store final right-state.  heuristic was already assigned
       ff.state_copy(out_fsa_state,fsa.cs);
-    FSAFFDBG(edge," = " << describe_state(out_state)<<" "<<name<<"="<<ff.describe_features(*features)<<" h="<<ff.describe_features(*estimated_features)<<")");
+    accum.Store(ff,features);
+    h_accum.Store(ff,estimated_features);
+    FSAFFDBG(edge," = " << describe_state(out_state)<<" "<<name<<"="<<accum.describe(ff)<<" h="<<h_accum.describe(ff)<<")");
     FSAFFDBGnl(edge);
   }
 
@@ -149,34 +158,35 @@ public:
                                       const void* residual_state,
                                       FeatureVector* final_features) const
   {
-    ff.init_features(final_features);
     Sentence const& ends=ff.end_phrase();
+    typename Impl::Accum accum;
     if (!ssz) {
-      AccumFeatures(ff,smeta,edge,begin(ends),end(ends),final_features,0);
-      return;
-    }
-    SP ss=ff.start_state();
-    WP l=(WP)residual_state,lend=left_end(residual_state);
-    SP rst=fsa_state(residual_state);
-    FSAFFDBG(edge," (final");// "<<name);//<< " before="<<*final_features);
-
-    if (lend==rst) { // implying we have an fsa state
-      AccumFeatures(ff,smeta,edge,l,lend,final_features,ss); // e.g. <s> score(full left unscored phrase)
-      FSAFFDBG(edge," start="<<ff.describe_state(ss)<<"->{"<<Sentence(l,lend)<<"}");
-      AccumFeatures(ff,smeta,edge,begin(ends),end(ends),final_features,rst); // e.g. [ctx for last M words] score("</s>")
-      FSAFFDBG(edge," end="<<ff.describe_state(rst)<<"->{"<<ends<<"}");
-    } else { // all we have is a single short phrase < M words before adding ends
-      int nl=lend-l;
-      Sentence whole(ends.size()+nl);
-      WordID *w=begin(whole);
-      wordcpy(w,l,nl);
-      wordcpy(w+nl,begin(ends),ends.size());
-      FSAFFDBG(edge," whole={"<<whole<<"}");
-      // whole = left-words + end-phrase
-      AccumFeatures(ff,smeta,edge,w,end(whole),final_features,ss);
+      FSAFFDBG(edge," (final,0state,end="<<ends<<")");
+      ff.ScanPhraseAccumOnly(smeta,edge,begin(ends),end(ends),0,&accum);
+    } else {
+      SP ss=ff.start_state();
+      WP l=(WP)residual_state,lend=left_end(residual_state);
+      SP rst=fsa_state(residual_state);
+      FSAFFDBG(edge," (final");// "<<name);//<< " before="<<*final_features);
+      if (lend==rst) { // implying we have an fsa state
+        ff.ScanPhraseAccumOnly(smeta,edge,l,lend,ss,&accum); // e.g. <s> score(full left unscored phrase)
+        FSAFFDBG(edge," start="<<ff.describe_state(ss)<<"->{"<<Sentence(l,lend)<<"}");
+        ff.ScanPhraseAccumOnly(smeta,edge,begin(ends),end(ends),rst,&accum); // e.g. [ctx for last M words] score("</s>")
+        FSAFFDBG(edge," end="<<ff.describe_state(rst)<<"->{"<<ends<<"}");
+      } else { // all we have is a single short phrase < M words before adding ends
+        int nl=lend-l;
+        Sentence whole(ends.size()+nl);
+        WordID *wb=begin(whole);
+        wordcpy(wb,l,nl);
+        wordcpy(wb+nl,begin(ends),ends.size());
+        FSAFFDBG(edge," whole={"<<whole<<"}");
+        // whole = left-words + end-phrase
+        ff.ScanPhraseAccumOnly(smeta,edge,wb,end(whole),ss,&accum);
+      }
     }
-    FSAFFDBG(edge,' '<<name<<"="<<ff.describe_features(*final_features));
+    FSAFFDBG(edge,' '<<name<<"="<<accum.describe(ff));
     FSAFFDBGnl(edge);
+    accum.Store(ff,final_features);
   }
 
   bool rule_feature() const {
diff --git a/decoder/ff_fsa.h b/decoder/ff_fsa.h
index 459d80ba..85d184ee 100755
--- a/decoder/ff_fsa.h
+++ b/decoder/ff_fsa.h
@@ -8,14 +8,20 @@
 
   state is some fixed width byte array.  could actually be a void *, WordID sequence, whatever.
 
+  TODO: there are a confusing array of default-implemented supposedly slightly more efficient overrides enabled; however, the two key differences are: do you score a phrase, or just word at a time (the latter constraining you to obey markov_order() everywhere.
+
+  TODO: considerable simplification of implementation if Scan implementors are required to update state in place (using temporary copy if they need it), or e.g. using memmove (copy from end to beginning) to rotate state right.
+
+  TODO: at what sizes is memcpy/memmove better than looping over 2-3 ints and assigning?
+
   TODO: fsa ff scores phrases not just words
   TODO: fsa feature aggregator that presents itself as a single fsa; benefit: when wrapped in ff_from_fsa, only one set of left words is stored.  downside: compared to separate ff, the inside portion of lower-order models is incorporated later.  however, the full heuristic is already available and exact for those words.  so don't sweat it.
 
   TODO: state (+ possibly span-specific) custom heuristic, e.g. in "longer than previous word" model, you can expect a higher outside if your state is a word of 2 letters.  this is on top of the nice heuristic for the unscored words, of course.  in ngrams, the avg prob will be about the same, but if the words possible for a source span are summarized, maybe it's possible to predict.  probably not worth the effort.
 */
 
-
-//TODO: decide whether to use init_features / add_value vs. summing elsewhere + set_value once (or inefficient for from_fsa: sum distinct feature_vectors.  but L->R if we only scan 1 word at a time, that's fine
+#define FSA_SCORE_PHRASE 1
+// if true, special effort is made to give entire phrases to fsa models so they can understate their true markov_order but sometimes have more specific probs.
 
 #define FSA_DEBUG 0
 
@@ -47,29 +53,15 @@
 #include "tdict.h"
 #include "hg.h"
 #include "sentences.h"
+#include "feature_accum.h"
 
 typedef ValueArray<uint8_t> Bytes;
 
 /*
-usage:
-struct SameFirstLetter : public FsaFeatureFunctionBase<SameFirstLetter> {
-SameFirstLetter(string const& param) : FsaFeatureFunctionBase<SameFirstLetter>(1,singleton_sentence("END")) {  start[0]='a';h_start[0]=0; } // 1 byte of state, scan final (single) symbol "END" to get final state cost
-  int markov_order() const { return 1; }
-  Featval Scan1(WordID w,void const* old_state,void *new_state) const {
-    char cw=TD::Convert(w)[0];
-    char co=*(char const*)old_state;
-    *(char *)new_state = cw;
-    return cw==co?1:0;
-  }
-  void print_state(std::ostream &o,void const* st) const {
-    o<<*(char const*)st;
-  }
-  static std::string usage(bool param,bool verbose) {
-    return FeatureFunction::usage_helper("SameFirstLetter","[no args]","1 each time 2 consecutive words start with the same letter",param,verbose);
-  }
-};
+usage: see ff_sample_fsa.h or ff_lm_fsa.h
+
+ then, to decode, see ff_from_fsa.h (or TODO: left->right target-earley style rescoring)
 
-// then, to decode, see ff_from_fsa.h
  */
 
 template <class Impl>
@@ -77,21 +69,26 @@ struct FsaFeatureFunctionBase {
   Impl const& d() const { return static_cast<Impl const&>(*this); }
   Impl & d()  { return static_cast<Impl &>(*this); }
 protected:
-  int state_bytes_; // don't forget to set this. default 0 (it may depend on params of course)
+  int ssz; // don't forget to set this. default 0 (it may depend on params of course)
   Bytes start,h_start; // start state and estimated-features (heuristic) start state.  set these.  default empty.
   Sentence end_phrase_; // words appended for final traversal (final state cost is assessed using Scan) e.g. "</s>" for lm.
   void set_state_bytes(int sb=0) {
     if (start.size()!=sb) start.resize(sb);
     if (h_start.size()!=sb) h_start.resize(sb);
-    state_bytes_=sb;
+    ssz=sb;
   }
   void set_end_phrase(WordID single) {
     end_phrase_=singleton_sentence(single);
   }
 
-  int fid_; // you can have more than 1 feature of course.
-  void Init() { // CALL THIS MANUALLY (because feature name(s) may depend on param
-    fid_=FD::Convert(d().name());
+  // CALL 1 of these MANUALLY  (because feature name(s) may depend on param, it's not done in ctor)
+  void InitFidNamed(std::string const& fname="") {
+    fid_=FD::Convert(name.empty()?name():fname);
+    Init();
+  }
+  Features features_;
+  void Init() {
+    features_=FeatureFunction::single_feature(fid_);
   }
 
   inline void static to_state(void *state,char const* begin,char const* end) {
@@ -119,11 +116,14 @@ protected:
   }
 
 public:
+  int fid_; // you can have more than 1 feature of course.
+
   void state_copy(void *to,void const*from) const {
-    std::memcpy(to,from,state_bytes_);
+    if (ssz)
+      std::memcpy(to,from,ssz);
   }
   void state_zero(void *st) const { // you should call this if you don't know the state yet and want it to be hashed/compared properly
-    std::memset(st,0,state_bytes_);
+    std::memset(st,0,ssz);
   }
 
   // can override to different return type, e.g. just return feats:
@@ -139,7 +139,7 @@ public:
 
   void print_state(std::ostream &o,void const*state) const {
     char const* i=(char const*)state;
-    char const* e=i+state_bytes_;
+    char const* e=i+ssz;
     for (;i!=e;++i)
       print_hex_byte(o,*i);
   }
@@ -149,26 +149,26 @@ public:
     d().print_state(o,state);
     return o.str();
   }
+  typedef SingleFeatureAccumulator Accum;
 
-  //edges may have old features on them.  override if you have more than 1 fid.  we need to call this explicitly because edges may have old feature values already, and I chose to use add_value (+=) to simplify scanning a phrase, rather than set_value (=) for fsa ffs.  could revisit this and use set_value and therefore sum
-  void init_features(FeatureVector *fv) const {
-    fv->set_value(fid_,0);
-    //erase(fid_)
-  }
   // return m: all strings x with the same final m+1 letters must end in this state
   /* markov chain of order m: P(xn|xn-1...x1)=P(xn|xn-1...xn-m) */
   int markov_order() const { return 0; } // override if you use state.  order 0 implies state_bytes()==0 as well, as far as scoring/splitting is concerned (you can still track state, though)
   //TODO: if we wanted, we could mark certain states as maximal-context, but this would lose our fixed amount of left context in ff_from_fsa, and lose also our vector operations (have to scan left words 1 at a time, checking always to see where you change from h to inside - BUT, could detect equivalent LM states, which would be nice).
 
-  Features features() const { // override this if >1 fid
-    return FeatureFunction::single_feature(fid_);
+  Features features() const {
+    return features_;
+  }
+
+  int n_features() const {
+    return features_.size();
   }
 
   // override this (static)
   static std::string usage(bool param,bool verbose) {
     return FeatureFunction::usage_helper("unnamed_fsa_feature","","",param,verbose);
   }
-  int state_bytes() const { return state_bytes_; } // or override this
+  int state_bytes() const { return ssz; } // or override this
   void const* start_state() const {
     return start.begin();
   }
@@ -176,92 +176,134 @@ public:
     return h_start.begin();
   }
   Sentence const& end_phrase() const { return end_phrase_; }
-  // move from state to next_state after seeing word x, while emitting features->add_value(fid,val) possibly with duplicates.  state and next_state will never be the same memory.
+  // move from state to next_state after seeing word x, while emitting features->set_value(fid,val) possibly with duplicates.  state and next_state will never be the same memory.
   //TODO: decide if we want to require you to support dest same as src, since that's how we use it most often in ff_from_fsa bottom-up wrapper (in l->r scoring, however, distinct copies will be the rule), and it probably wouldn't be too hard for most people to support.  however, it's good to hide the complexity here, once (see overly clever FsaScan loop that swaps src/dest addresses repeatedly to scan a sequence by effectively swapping)
 
-  // different name because of inheritance method hiding; simple/common case; 1 fid
+protected:
+  // overrides have different name because of inheritance method hiding;
+
+  // simple/common case; 1 fid.  these need not be overriden if you have multiple feature ids
   Featval Scan1(WordID w,void const* state,void *next_state) const {
+    assert(0);
     return 0;
   }
+  Featval Scan1Meta(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,
+                    WordID w,void const* state,void *next_state) const {
+    return d().Scan1(w,state,next_state);
+  }
+public:
+  template <class T>
+  static inline T* state_as(void *p) { return (T*)p; }
+  template <class T>
+  static inline T const* state_as(void const* p) { return (T*)p; }
+
+  // must override this or Scan1Meta or Scan1
+  template <class Accum>
+  inline void ScanAccum(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,
+                        WordID w,void const* state,void *next_state,Accum *a) const {
+    Add(d().Scan1Meta(smeta,edge,smeta,edge,w,state,next_state),a);
+  }
+
+  // bounce back and forth between two state vars starting at cs, returning end state location.  if we required src=dest addr safe state updating, this concept wouldn't need to exist.
+  // recommend you override this if you score phrases differently than word-by-word.
+  template <class Accum>
+  void *ScanPhraseAccumBounce(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID const* i, WordID const* end,void *cs,void *ns,Accum *accum) const {
+    // extra code - IT'S FOR EFFICIENCY, MAN!  IT'S OK!  definitely no bugs here.
+    if (!ssz) {
+      for (;i<end;++i)
+        ScanAccum(smeta,edge,*i,0,0,accum);
+      return 0;
+    }
+    void *os,*es;
+    if ((end-i)&1) { // odd # of words
+      os=cs;
+      es=ns;
+      goto odd;
+    } else {
+      i+=1;
+      es=cs;
+      os=ns;
+    }
+    for (;i<end;i+=2) {
+      d().ScanAccum(smeta,edge,i[-1],es,os,accum); // e->o
+    odd:
+      d().ScanAccum(smeta,edge,i[0],os,es,accum); // o->e
+    }
+    return es;
+  }
+
+
+
+  // override this (and SCAN_PHRASE_ACCUM_OVERRIDE  ) if you want e.g. maximum possible order ngram scores with markov_order < n-1.  in the future SparseFeatureAccumulator will probably be the only option for type-erased FSA ffs.  you will be adding to accum, not setting
+  template <class Accum>
+  inline void ScanPhraseAccum(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,
+                              WordID const* i, WordID const* end,void const* state,void *next_state,Accum *accum) const {
+    if (!ssz) {
+      for (;i<end;++i)
+        d().ScanAccum(smeta,edge,*i,0,0,accum);
+      return;
+    }
+    char tstate[ssz];
+    void *tst=tstate;
+    bool odd=(end-i)&1;
+    void *cs,*ns;
+    if (odd) {
+      cs=tst;
+      ns=next_state;
+    } else {
+      cs=next_state;
+      ns=tst;
+    }
+    state_copy(cs,state);
+    void *est=d().ScanPhraseAccumBounce(smeta,edge,i,end,cs,ns,accum);
+    assert(est==next_state);
+  }
+
+  // could replace this with a CRTP subclass providing these impls.
+#define SCAN_PHRASE_ACCUM_OVERRIDE \
+  template <class Accum> \
+  void *ScanPhraseAccumBounce(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID const* i, WordID const* end,void *cs,void *ns,Accum *accum) const { \
+    ScanPhraseAccum(smeta,edge,i,end,cs,ns,accum); \
+    return ns; \
+  } \
+  template <class Accum> \
+  inline void ScanPhraseAccumOnly(SentenceMetadata const& smeta,const Hypergraph::Edge& edge, \
+                              WordID const* i, WordID const* end,void const* state,Accum *accum) const { \
+    char s2[ssz]; ScanPhraseAccum(smeta,edge,i,end,state,(void*)s2,accum);                \
+  }
 
-  // NOTE: if you want to e.g. track statistics, cache, whatever, cast const away or use mutable members
-  inline void Scan(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,void const* state,void *next_state,FeatureVector *features) const {
-    maybe_add_feat(features,d().Scan1(w,state,next_state));
+  // override this or bounce along with above.  note: you can just call ScanPhraseAccum
+  // doesn't set state (for heuristic in ff_from_fsa)
+  template <class Accum>
+  inline void ScanPhraseAccumOnly(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,
+                              WordID const* i, WordID const* end,void const* state,Accum *accum) const {
+    char s1[ssz];
+    char s2[ssz];
+    state_copy(s1,state);
+    d().ScanPhraseAccumBounce(smeta,edge,i,end,(void*)s1,(void*)s2,accum);
   }
 
-  inline void maybe_add_feat(FeatureVector *features,Featval v) const {
-    features->maybe_add(fid_,v);
+  template <class Accum>
+  inline void Add(Featval v,Accum *a) const { // for single-feat only
+    a->Add(v);
   }
-  inline void add_feat(FeatureVector *features,Featval v) const {
-    features->add_value(fid_,v);
+
+  inline void set_feat(FeatureVector *features,Featval v) const {
+    features->set_value(fid_,v);
   }
 
   // don't set state-bytes etc. in ctor because it may depend on parsing param string
-  FsaFeatureFunctionBase(int statesz=0,Sentence const& end_sentence_phrase=Sentence()) : state_bytes_(statesz),start(statesz),h_start(statesz),end_phrase_(end_sentence_phrase) {}
+  FsaFeatureFunctionBase(int statesz=0,Sentence const& end_sentence_phrase=Sentence()) : ssz(statesz),start(statesz),h_start(statesz),end_phrase_(end_sentence_phrase) {}
 
 };
 
-// init state is in cs; overwrite cs, ns repeatedly (alternatively).  return resulting state
-template <class FsaFF>
-void *FsaScan(FsaFF const& ff,SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID const* i, WordID const* end,FeatureVector *features, void *cs,void *ns) {
-  // extra code - IT'S FOR EFFICIENCY, MAN!  IT'S OK!  definitely no bugs here.
-  void *os,*es;
-  if ((end-i)&1) { // odd # of words
-    os=cs;
-    es=ns;
-    goto odd;
-  } else {
-    i+=1;
-    es=cs;
-    os=ns;
-  }
-  for (;i<end;i+=2) {
-    ff.Scan(smeta,edge,i[-1],es,os,features); // e->o
-  odd:
-    ff.Scan(smeta,edge,i[0],os,es,features); // o->e
-  }
-  return es;
-}
-
-// if you have a more efficient implementation for scanning a phrase than one word at a time (e.g. LM context using sliding window buffer rather than rotating through a fixed state size), you can override this
-template <class FsaFF>
-void Scan(FsaFF const& ff,SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID const* i,WordID const* e,void const* state,void *next_state,FeatureVector *features) {
-  int ssz=ff.state_bytes();
-  if (!ssz) {
-    for (;i<e;++i)
-      ff.Scan(smeta,edge,*i,0,0,features);
-    return;
-  }
-  Bytes tstate(ssz);
-  void *tst=tstate.begin();
-  bool odd=(e-i)&1;
-  void *cs,*ns;
-  if (odd) {
-    cs=tst;
-    ns=next_state;
-  } else {
-    cs=next_state;
-    ns=tst;
-  }
-  std::memcpy(cs,state,ssz);
-  void *est=FsaScan(ff,smeta,edge,i,end,features,cs,ns);
-  assert(est==next_state);
-}
-
-
-// like above Scan, but don't bother storing final state (for FinalTraversalFeatures)
-template <class FF>
-void AccumFeatures(FF const& ff,SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID const* i, WordID const* end,FeatureVector *h_features,void const* start_state) {
-  int ssz=ff.state_bytes();
-  if (ssz) {
-    Bytes state(ssz),state2(ssz);
-    void *cs=state.begin(),*ns=state2.begin();
-    memcpy(cs,start_state,ff.state_bytes());
-    FsaScan(ff,smeta,edge,i,end,h_features,cs,ns);
-  } else
-    for (;i<end;++i)
-      ff.Scan(smeta,edge,*i,0,0,h_features);
-}
+template <class Impl>
+struct MultipleFeatureFsa : public FsaFeatureFunctionBase<Impl> {
+  typedef SparseFeatureAccumulator Accum;
+};
+
+
+
 
 // if State is pod.  sets state size and allocs start, h_start
 // usage:
@@ -316,9 +358,7 @@ public:
 };
 
 
-const bool optimize_FsaScanner_zerostate=false;
-
-// do not use if state size is 0.  should crash (maybe won't if you set optimize_FsaScanner_zerostate true)
+// keep a "current state" (bouncing back and forth)
 template <class FF>
 struct FsaScanner {
 //  enum {ALIGN=8};
@@ -347,28 +387,16 @@ struct FsaScanner {
     cs=st0;
     std::memcpy(st0,state,ssz);
   }
-  void scan(WordID w,FeatureVector *feat) {
-    if (optimize_FsaScanner_zerostate && !ssz) {
-      ff.Scan(smeta,edge,w,0,0,feat);
-      return;
-    }
+  template <class Accum>
+  void scan(WordID w,Accum *a) {
     void *ns=nexts();
-    ff.Scan(smeta,edge,w,cs,ns,feat);
+    ff.ScanAccum(smeta,edge,w,cs,ns,a);
     cs=ns;
   }
-
-  void scan(WordID const* i,WordID const* end,FeatureVector *feat) {
-#if 1
-    // faster.
-    if (optimize_FsaScanner_zerostate && !ssz)
-      for (;i<end;++i)
-        ff.Scan(smeta,edge,*i,0,0,feat);
-    else
-      cs=FsaScan(ff,smeta,edge,i,end,feat,cs,nexts());
-#else
-    for (;i<end;++i)
-      scan(*i,feat);
-#endif
+  template <class Accum>
+  void scan(WordID const* i,WordID const* end,Accum *a) {
+    // faster. and allows greater-order excursions
+    cs=ff.ScanPhraseAccumBounce(smeta,edge,i,end,cs,nexts(),a);
   }
 };
 
diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc
index 0f44f8d3..3d81a599 100644
--- a/decoder/ff_lm.cc
+++ b/decoder/ff_lm.cc
@@ -1,5 +1,5 @@
 #define LM_FSA_SHORTEN_CONTEXT 1
-// seems to work great - just not sure if it actually speeds anything up
+// seems to work great  - just not sure if it actually speeds anything up
 //      virtual LogP contextBOW(const VocabIndex *context, unsigned length);
 				   /* backoff weight for truncating context */
 // does that need to be used?  i think so.
@@ -188,7 +188,7 @@ struct LMClient {
   char request_buffer[16000];
 };
 
-class LanguageModelImpl {
+class LanguageModelImpl : public LanguageModelInterface {
   void init(int order) {
     //all these used to be const members, but that has no performance implication, and now there's less duplication.
     order_=order;
@@ -251,21 +251,6 @@ class LanguageModelImpl {
     return ngram_.contextBOW((VocabIndex*)context,shortened_len);
   }
 
-  double ShortenContext(WordID * context,int len) {
-    int slen=ContextSize(context,len);
-    double p=ContextBOW(context,slen);
-    while (len>slen) {
-      --len;
-      context[len]=TD::none;
-    }
-    return p;
-  }
-
-  /// NOT a negative logp, i.e. should be worse prob = more negative.  that's what SRI wordProb returns, fortunately.
-  inline double clamp(double logp) const {
-    return logp < floor_ ? floor_ : logp;
-  }
-
   inline double LookupProbForBufferContents(int i) {
 //    int k = i; cerr << "P("; while(buffer_[k] > 0) { std::cerr << TD::Convert(buffer_[k++]) << " "; }
     double p = WordProb(buffer_[i], &buffer_[i+1]);
@@ -457,7 +442,6 @@ public:
   int order_;
   int state_size_;
  public:
-  double floor_;
   WordID kSTART;
   WordID kSTOP;
   WordID kUNKNOWN;
@@ -606,9 +590,6 @@ void LanguageModelFsa::set_ngram_order(int i) {
     }
   }
 }
-namespace {
-WordID empty_context=TD::none;
-}
 
 LanguageModelFsa::LanguageModelFsa(string const& param) {
   int lmorder;
@@ -617,29 +598,8 @@ LanguageModelFsa::LanguageModelFsa(string const& param) {
   set_ngram_order(lmorder);
 }
 
-void LanguageModelFsa::Scan(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,void const* old_st,void *new_st,FeatureVector *features) const {
-  //variable length array is in C99, msvc++, if it doesn't support it, #ifdef it or use a stackalloc call (forget the name)
-  Featval p;
-  if (ctxlen_) {
-    WordID ctx[ngram_order_];
-    state_copy(ctx,old_st);
-    ctx[ctxlen_]=TD::none; // make this part of state?  wastes space but saves copies.
-    p=pimpl_->WordProb(w,ctx);
-// states are sri contexts so are in reverse order (most recent word is first, then 1-back comes next, etc.).
-    WordID *nst=(WordID *)new_st;
-    nst[0]=w; // new most recent word
-    to_state(nst+1,ctx,ctxlen_-1); // rotate old words right
-#if LM_FSA_SHORTEN_CONTEXT
-    pimpl_->ShortenContext(nst,ctxlen_);
-#endif
-  } else {
-    p=pimpl_->WordProb(w,&empty_context);
-  }
-  add_feat(features,(p<floor_)?floor_:p);
-}
-
-void LanguageModelFsa::print_state(ostream &o,void *st) const {
-  WordID *wst=(WordID *)st;
+void LanguageModelFsa::print_state(ostream &o,void const* st) const {
+  WordID const *wst=(WordID const*)st;
   o<<'[';
   for (int i=ctxlen_;i>0;) {
     --i;
@@ -660,7 +620,7 @@ LanguageModel::~LanguageModel() {
 }
 
 string LanguageModel::DebugStateToString(const void* state) const{
-  return pimpl_->DebugStateToString(state);
+  return imp().DebugStateToString(state);
 }
 
 void LanguageModel::TraversalFeaturesImpl(const SentenceMetadata& /* smeta */,
@@ -669,13 +629,13 @@ void LanguageModel::TraversalFeaturesImpl(const SentenceMetadata& /* smeta */,
                                           SparseVector<double>* features,
                                           SparseVector<double>* estimated_features,
                                           void* state) const {
-  features->set_value(fid_, pimpl_->LookupWords(*edge.rule_, ant_states, state));
-  estimated_features->set_value(fid_, pimpl_->EstimateProb(state));
+  features->set_value(fid_, imp().LookupWords(*edge.rule_, ant_states, state));
+  estimated_features->set_value(fid_, imp().EstimateProb(state));
 }
 
 void LanguageModel::FinalTraversalFeatures(const void* ant_state,
                                            SparseVector<double>* features) const {
-  features->set_value(fid_, pimpl_->FinalTraversalCost(ant_state));
+  features->set_value(fid_, imp().FinalTraversalCost(ant_state));
 }
 
 #ifdef HAVE_RANDLM
@@ -763,13 +723,13 @@ void LanguageModelRandLM::TraversalFeaturesImpl(const SentenceMetadata& smeta,
                                           SparseVector<double>* estimated_features,
                                           void* state) const {
   (void) smeta;
-  features->set_value(fid_, pimpl_->LookupWords(*edge.rule_, ant_states, state));
-  estimated_features->set_value(fid_, pimpl_->EstimateProb(state));
+  features->set_value(fid_, imp().LookupWords(*edge.rule_, ant_states, state));
+  estimated_features->set_value(fid_, imp().EstimateProb(state));
 }
 
 void LanguageModelRandLM::FinalTraversalFeatures(const void* ant_state,
                                            SparseVector<double>* features) const {
-  features->set_value(fid_, pimpl_->FinalTraversalCost(ant_state));
+  features->set_value(fid_, imp().FinalTraversalCost(ant_state));
 }
 
 #endif
diff --git a/decoder/ff_lm.h b/decoder/ff_lm.h
index 935e283c..e682481d 100644
--- a/decoder/ff_lm.h
+++ b/decoder/ff_lm.h
@@ -8,7 +8,38 @@
 #include "ff.h"
 #include "config.h"
 
-class LanguageModelImpl;
+class LanguageModelInterface {
+ public:
+  double floor_;
+  LanguageModelInterface() : floor_(-100) {  }
+  virtual ~LanguageModelInterface() {  }
+
+  // not clamped to floor.  log10prob
+  virtual double WordProb(WordID word, WordID const* context) = 0;
+  inline double WordProbFloored(WordID word, WordID const* context) {
+    return clamp(WordProb(word,context));
+  }
+  // may be shorter than actual null-terminated length.  context must be null terminated.  len is just to save effort for subclasses that don't support contextID
+  virtual int ContextSize(WordID const* context,int len) = 0;
+  // use this as additional logprob when shortening the context as above
+  virtual double ContextBOW(WordID const* context,int shortened_len) = 0; // unlikely that you'll ever need to floor a backoff cost.  i'd say impossible.
+
+  inline double ShortenContext(WordID * context,int len) {
+    int slen=ContextSize(context,len);
+    double p=ContextBOW(context,slen);
+    while (len>slen) {
+      --len;
+      context[len]=TD::none;
+    }
+    return p;
+  }
+  /// should be worse prob = more negative.  that's what SRI wordProb returns: log10(prob)
+  inline double clamp(double logp) const {
+    return logp < floor_ ? floor_ : logp;
+  }
+};
+
+struct LanguageModelImpl;
 
 class LanguageModel : public FeatureFunction {
  public:
@@ -29,7 +60,9 @@ class LanguageModel : public FeatureFunction {
                                      void* out_context) const;
  private:
   int fid_; // conceptually const; mutable only to simplify constructor
-  mutable LanguageModelImpl* pimpl_;
+  //LanguageModelImpl &imp() { return *(LanguageModelImpl*)pimpl_; }
+  LanguageModelImpl & imp() const { return *(LanguageModelImpl*)pimpl_; }
+  /* mutable */ LanguageModelInterface* pimpl_;
 };
 
 #ifdef HAVE_RANDLM
diff --git a/decoder/ff_lm_fsa.h b/decoder/ff_lm_fsa.h
index 55a8b497..3547d75b 100755
--- a/decoder/ff_lm_fsa.h
+++ b/decoder/ff_lm_fsa.h
@@ -1,27 +1,112 @@
 #ifndef FF_LM_FSA_H
 #define FF_LM_FSA_H
 
-//TODO: use SRI LM::contextID to shorten state
-//TODO: expose ScanPhrase interface to achieve > ngram probs (e.g. unigram) with higher order lm - but that wouldn't apply to L->R maximal hook/sharing decoding
+//FIXME: 3gram has differences in 4th decimal digit, compared to regular ff_lm.  this is USUALLY a bug (there's way more actual precision in there).  this was with #define LM_FSA_SHORTEN_CONTEXT 1
+
+
+#define FSA_LM_DEBUG 0
+#if FSA_LM_DEBUG
+# define FSALMDBG(e,x) FSADBGif(debug(),e,x)
+# define FSALMDBGnl(e) FSADBGif_nl(debug(),e)
+#else
+# define FSALMDBG(e,x)
+# define FSALMDBGnl(e)
+#endif
 
 #include "ff_lm.h"
 #include "ff_from_fsa.h"
 
+namespace {
+WordID empty_context=TD::none;
+}
+
 struct LanguageModelFsa : public FsaFeatureFunctionBase<LanguageModelFsa> {
+  typedef WordID * W;
+  typedef WordID const* WP;
+
   // overrides; implementations in ff_lm.cc
+  typedef SingleFeatureAccumulator Accum;
   static std::string usage(bool,bool);
   LanguageModelFsa(std::string const& param);
   int markov_order() const { return ctxlen_; }
-  void Scan(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,void const* old_st,void *new_st,FeatureVector *features) const;
-  void print_state(std::ostream &,void *) const;
+  void print_state(std::ostream &,void const *) const;
+  inline Featval floored(Featval p) const {
+    return p<floor_?floor_:p;
+  }
+  static inline WordID const* left_end(WordID const* left, WordID const* e) {
+    for (;e>left;--e)
+      if (e[-1]!=TD::none) break;
+    //post: [left,e] are the seen left words
+    return e;
+  }
+  template <class Accum>
+  void ScanPhraseAccum(SentenceMetadata const& /* smeta */,const Hypergraph::Edge&edge,WordID const* begin,WordID const* end,void const* old_st,void *new_st,Accum *a) const {
+    if (begin==end) return; // otherwise w/ shortening it's possible to end up with no words at all.
+    /* // this is forcing unigram prob always.  we will instead build the phrase
+    if (!ctxlen_) {
+      Featval p=0;
+      for (;i<end;++i)
+        p+=floored(pimpl_->WordProb(*i,e&mpty_context));
+      Add(p,a);
+      return;
+      } */
+    int nw=end-begin;
+    WP st=(WP)old_st;
+    WP st_end=st+ctxlen_; // may include some null already (or none if full)
+    int nboth=nw+ctxlen_;
+    WordID ctx[nboth+1];
+    ctx[nboth]=TD::none;
+    // reverse order - state at very end of context, then [i,end) in rev order ending at ctx[0]
+    wordcpy_reverse(ctx,begin,end);
+    W ctx_score_end=ctx+nw;
+    wordcpy(ctx_score_end,st,st_end); // st already reversed
+//    FSALMDBG(edge," Scan("<<TD::GetString(ctx,ctx+nboth)<<')');
+    Featval p=0;
+    FSALMDBGnl(edge);
+    for(;ctx_score_end>ctx;--ctx_score_end)
+      p+=floored(pimpl_->WordProb(ctx_score_end[-1],ctx_score_end));
+#if LM_FSA_SHORTEN_CONTEXT
+    p+=pimpl_->ShortenContext(ctx,nboth<ctxlen_?nboth:ctxlen_);
+#endif
+    state_copy(new_st,ctx);
+    FSALMDBG(edge," lm.Scan("<<TD::GetString(begin,end)<<"|"<<describe_state(old_st)<<")"<<"="<<p<<","<<describe_state(new_st));
+    FSALMDBGnl(edge);
+    Add(p,a);
+  }
+  template <class Accum>
+  void ScanAccum(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,void const* old_st,void *new_st,Accum *a) const {
+    if (!ctxlen_) {
+      Add(floored(pimpl_->WordProb(w,&empty_context)),a);
+      return;
+    }
+    //variable length array is in C99, msvc++, if it doesn't support it, #ifdef it or use a stackalloc call (forget the name)
+    if (ctxlen_) {
+      WordID ctx[ngram_order_];
+      state_copy(ctx,old_st);
+      ctx[ctxlen_]=TD::none; // make this part of state?  wastes space but saves copies.
+      Featval p=pimpl_->WordProb(w,ctx);
+// states are sri contexts so are in reverse order (most recent word is first, then 1-back comes next, etc.).
+      WordID *nst=(WordID *)new_st;
+      nst[0]=w; // new most recent word
+      to_state(nst+1,ctx,ctxlen_-1); // rotate old words right
+#if LM_FSA_SHORTEN_CONTEXT
+      p+=pimpl_->ShortenContext(nst,ctxlen_);
+#endif
+      Add(floored(p),a);
+    }
+  }
 
   // impl details:
   void set_ngram_order(int i); // if you build ff_from_fsa first, then increase this, you will get memory overflows.  otherwise, it's the same as a "-o i" argument to constructor
   double floor_; // log10prob minimum used (e.g. unk words)
+
 private:
   int ngram_order_;
   int ctxlen_; // 1 less than above
-  LanguageModelImpl *pimpl_;
+  LanguageModelInterface *pimpl_;
+public:
+  SCAN_PHRASE_ACCUM_OVERRIDE
+
 };
 
 typedef FeatureFunctionFromFsa<LanguageModelFsa> LanguageModelFromFsa;
diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h
index 6e6ad30e..fb44dade 100755
--- a/decoder/ff_sample_fsa.h
+++ b/decoder/ff_sample_fsa.h
@@ -1,6 +1,8 @@
 #ifndef FF_SAMPLE_FSA_H
 #define FF_SAMPLE_FSA_H
 
+//TODO: fsa interface changed to include ScanPhrase* Scan*Accum.  update these so they compile+work
+
 #include "ff_from_fsa.h"
 
 // example: feature val = 1 * # of target words
diff --git a/decoder/sentences.h b/decoder/sentences.h
index 452fb21e..4c71f8f1 100755
--- a/decoder/sentences.h
+++ b/decoder/sentences.h
@@ -29,7 +29,10 @@ inline void wordcpy(WordID *dest,WordID const* src,int n) {
 inline void wordcpy(WordID *dest,WordID const* src,WordID const* src_end) {
   wordcpy(dest,src,src_end-src);
 }
-
+inline void wordcpy_reverse(WordID *dest,WordID const* src,WordID const* src_end) {
+  for(WordID const* i=src_end;i>src;)
+    *dest++=*--i;
+}
 inline Sentence singleton_sentence(WordID t) {
   return Sentence(1,t);
 }
diff --git a/decoder/sparse_vector.h b/decoder/sparse_vector.h
index 8fc0f520..207489c5 100644
--- a/decoder/sparse_vector.h
+++ b/decoder/sparse_vector.h
@@ -60,6 +60,11 @@ class SparseVector {
     SPARSE_VECTOR_MAP_RESERVED(values_,-1,-2);
   }
 public:
+  T const& get_singleton() const {
+    assert(values_.size()==1);
+    return values_.begin()->second;
+  }
+
   typedef SparseVector<T> Self;
   typedef SPARSE_VECTOR_MAP<int, T> MapType;
   typedef typename MapType::const_iterator const_iterator;
@@ -113,7 +118,7 @@ public:
     return values_[index];
   }
 
-  void set_value(int index, const T &value) {
+  inline void set_value(int index, const T &value) {
     values_[index] = value;
   }
 
@@ -236,6 +241,15 @@ public:
     values_.erase(found);*/
   }
 
+  template <class T2>
+  void set_from(SparseVector<T2> const& other) {
+    for (typename MapType::const_iterator
+           it = other.values_.begin(); it != other.values_.end(); ++it)
+    {
+      values_[it->first]=it->second;
+    }
+  }
+
     SparseVector<T> &operator+=(const SparseVector<T> &other) {
         for (typename MapType::const_iterator
                 it = other.values_.begin(); it != other.values_.end(); ++it)
diff --git a/decoder/tdict.cc b/decoder/tdict.cc
index 6794bc79..7b56d259 100644
--- a/decoder/tdict.cc
+++ b/decoder/tdict.cc
@@ -85,6 +85,17 @@ std::string TD::GetString(const std::vector<WordID>& str) {
   return o.str();
 }
 
+std::string TD::GetString(WordID const* i,WordID const* e) {
+  ostringstream o;
+  bool sp=false;
+  for (;i<e;++i,sp=true) {
+    if (sp)
+      o << ' ';
+    o << TD::Convert(*i);
+  }
+  return o.str();
+}
+
 int TD::AppendString(const WordID& w, int pos, int bufsize, char* buffer)
 {
   const char* word = TD::Convert(w);
diff --git a/decoder/tdict.h b/decoder/tdict.h
index e0b4b5f0..cb030dc6 100644
--- a/decoder/tdict.h
+++ b/decoder/tdict.h
@@ -31,6 +31,7 @@ struct TD {
   static void ConvertSentence(std::string const& sent, std::vector<WordID>* ids);
   static void GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids);
   static std::string GetString(const std::vector<WordID>& str);
+  static std::string GetString(WordID const* i,WordID const* e);
   static int AppendString(const WordID& w, int pos, int bufsize, char* buffer);
   static unsigned int NumWords();
   static WordID Convert(const std::string& s);
diff --git a/decoder/value_array.h b/decoder/value_array.h
index 6fa221ee..1a0d5f95 100755
--- a/decoder/value_array.h
+++ b/decoder/value_array.h
@@ -15,11 +15,33 @@
 # include <boost/serialization/access.hpp>
 #endif
 
+//TODO: use awesome type traits (and/or policy typelist argument) to provide these only when possible?
+#define VALUE_ARRAY_ADD 1
+#define VALUE_ARRAY_MUL 1
+#define VALUE_ARRAY_BITWISE 0
+#define VALUE_ARRAY_OSTREAM 1
+
+#if VALUE_ARRAY_OSTREAM
+# include <iostream>
+#endif
+
 // valarray like in that size is fixed (so saves space compared to vector), but same interface as vector (less resize/push_back/insert, of course)
 template <class T, class A = std::allocator<T> >
 class ValueArray : A // private inheritance so stateless allocator adds no size.
 {
+  typedef ValueArray<T,A> Self;
 public:
+#if VALUE_ARRAY_OSTREAM
+  friend inline std::ostream & operator << (std::ostream &o,Self const& s) {
+    o<<'[';
+    for (unsigned i=0,e=s.size();i<e;++i) {
+        if (i) o<<' ';
+        o<<s[i];
+    }
+    o<<']';
+    return o;
+  }
+#endif
   static const int SV_MAX=sizeof(T)/sizeof(T*)>1?sizeof(T)/sizeof(T*):1;
   //space optimization: SV_MAX T will fit inside what would otherwise be a pointer to heap data.  todo in the far future if bored.
   typedef T value_type;
@@ -82,6 +104,37 @@ public:
     clear();
   }
 
+#undef VALUE_ARRAY_OPEQ
+#define VALUE_ARRAY_OPEQ(op) template <class T2,class A2> Self & operator op (ValueArray<T2,A2> const& o) { assert(sz==o.sz); for (int i=0,e=sz;i<=e;++i) array[i] op o.array[i]; return *this; }
+#if VALUE_ARRAY_ADD
+  VALUE_ARRAY_OPEQ(+=)
+  VALUE_ARRAY_OPEQ(-=)
+#endif
+#if VALUE_ARRAY_MUL
+  VALUE_ARRAY_OPEQ(*=)
+  VALUE_ARRAY_OPEQ(/=)
+#endif
+#if VALUE_ARRAY_BITWISE
+  VALUE_ARRAY_OPEQ(|=)
+  VALUE_ARRAY_OPEQ(*=)
+#endif
+#undef VALUE_ARRAY_OPEQ
+#undef VALUE_ARRAY_BINOP
+#define VALUE_ARRAY_BINOP(op,opeq) template <class T2,class A2> friend inline Self operator op (Self x,ValueArray<T2,A2> const& y) { x opeq y; return x; }
+#if VALUE_ARRAY_ADD
+  VALUE_ARRAY_BINOP(+,+=)
+  VALUE_ARRAY_BINOP(-,-=)
+#endif
+#if VALUE_ARRAY_MUL
+  VALUE_ARRAY_BINOP(*,*=)
+  VALUE_ARRAY_BINOP(/,/=)
+#endif
+#if VALUE_ARRAY_BITWISE
+  VALUE_ARRAY_BINOP(|,|=)
+  VALUE_ARRAY_BINOP(*,*=)
+#endif
+
+#undef VALUE_ARRAY_BINOP
   void clear()
   {
     for (size_type i = sz; i != 0; --i) {
-- 
cgit v1.2.3