14 files changed, 276 insertions, 212 deletions
diff --git a/decoder/ff.cc b/decoder/ff.cc
index 4f1a3d32..9fc2dbd8 100644
--- a/decoder/ff.cc
+++ b/decoder/ff.cc
@@ -13,10 +13,8 @@ using namespace std;
 FeatureFunction::~FeatureFunction() {}
 
 
-void FeatureFunction::FinalTraversalFeatures(const void* ant_state,
-                                             SparseVector<double>* features) const {
-  (void) ant_state;
-  (void) features;
+void FeatureFunction::FinalTraversalFeatures(const void* /* ant_state */,
+                                             SparseVector<double>* /* features */) const {
 }
 
 string FeatureFunction::usage_helper(std::string const& name,std::string const& params,std::string const& details,bool sp,bool sd) {
@@ -225,7 +223,7 @@ void ModelSet::AddFinalFeatures(const std::string& state, Hypergraph::Edge* edge
       int spos = model_state_pos_[i];
       ant_state = &state[spos];
     }
-    ff.FinalTraversalFeatures(smeta, ant_state, &edge->feature_values_);
+    ff.FinalTraversalFeatures(smeta, *edge, ant_state, &edge->feature_values_);
   }
   edge->edge_prob_.logeq(edge->feature_values_.dot(weights_));
 }
diff --git a/decoder/ff.h b/decoder/ff.h
index 9ff67dd8..b8ca71c4 100644
--- a/decoder/ff.h
+++ b/decoder/ff.h
@@ -64,8 +64,9 @@ protected:
   virtual void FinalTraversalFeatures(const void* residual_state,
                                       FeatureVector* final_features) const;
 public:
-  //override either this or above. (no need to do both)
-  virtual void FinalTraversalFeatures(const SentenceMetadata& smeta,
+  //override either this or one of above.
+  virtual void FinalTraversalFeatures(const SentenceMetadata& /* smeta */,
+                                      const Hypergraph::Edge& /* edge */,
                                       const void* residual_state,
                                       FeatureVector* final_features) const {
     FinalTraversalFeatures(residual_state,final_features);
diff --git a/decoder/ff_from_fsa.h b/decoder/ff_from_fsa.h
index 75b99f52..20e7c5ca 100755
--- a/decoder/ff_from_fsa.h
+++ b/decoder/ff_from_fsa.h
@@ -58,7 +58,7 @@ public:
         } else {
           WordID ew=e[j];
           FSAFFDBG(' '<<TD::Convert(ew));
-          ff.Scan(smeta,ew,0,0,features);
+          ff.Scan(smeta,edge,ew,0,0,features);
         }
       }
       FSAFFDBG('\n');
@@ -69,7 +69,7 @@ public:
     W left_begin=(W)out_state;
     W left_out=left_begin; // [left,fsa_state) = left ctx words.  if left words aren't full, then null wordid
     WP left_full=left_end_full(out_state);
-    FsaScanner<Impl> fsa(ff,smeta);
+    FsaScanner<Impl> fsa(ff,smeta,edge);
     TRule const& rule=*edge.rule_;
     Sentence const& e = rule.e();
     for (int j = 0; j < e.size(); ++j) { // items in target side of rule
@@ -141,23 +141,24 @@ public:
 
   //FIXME: it's assumed that the final rule is just a unary no-target-terminal rewrite (same as ff_lm)
   virtual void FinalTraversalFeatures(const SentenceMetadata& smeta,
+                                      const Hypergraph::Edge& edge,
                                       const void* residual_state,
                                       FeatureVector* final_features) const
   {
     ff.init_features(final_features);
     Sentence const& ends=ff.end_phrase();
     if (!ssz) {
-      AccumFeatures(ff,smeta,begin(ends),end(ends),final_features,0);
+      AccumFeatures(ff,smeta,edge,begin(ends),end(ends),final_features,0);
       return;
     }
     SP ss=ff.start_state();
     WP l=(WP)residual_state,lend=left_end(residual_state);
     SP rst=fsa_state(residual_state);
-    FSAFFDBG("(FromFsa) Final "<<name);
+    FSAFFDBG("(FromFsa) Final "<<name<< " before="<<*final_features);
     if (lend==rst) { // implying we have an fsa state
-      AccumFeatures(ff,smeta,l,lend,final_features,ss); // e.g. <s> score(full left unscored phrase)
+      AccumFeatures(ff,smeta,edge,l,lend,final_features,ss); // e.g. <s> score(full left unscored phrase)
       FSAFFDBG(" left: "<<ff.describe_state(ss)<<" -> "<<Sentence(l,lend));
-      AccumFeatures(ff,smeta,begin(ends),end(ends),final_features,rst); // e.g. [ctx for last M words] score("</s>")
+      AccumFeatures(ff,smeta,edge,begin(ends),end(ends),final_features,rst); // e.g. [ctx for last M words] score("</s>")
       FSAFFDBG(" right: "<<ff.describe_state(rst)<<" -> "<<ends);
     } else { // all we have is a single short phrase < M words before adding ends
       int nl=lend-l;
@@ -167,7 +168,7 @@ public:
       wordcpy(w+nl,begin(ends),ends.size());
       FSAFFDBG(" score whole sentence: "<<whole);
       // whole = left-words + end-phrase
-      AccumFeatures(ff,smeta,w,end(whole),final_features,ss);
+      AccumFeatures(ff,smeta,edge,w,end(whole),final_features,ss);
     }
     FSAFFDBG(" = "<<*final_features<<'\n');
   }
diff --git a/decoder/ff_fsa.h b/decoder/ff_fsa.h
index 41343f6c..0a7aebde 100755
--- a/decoder/ff_fsa.h
+++ b/decoder/ff_fsa.h
@@ -1,8 +1,17 @@
 #ifndef FF_FSA_H
 #define FF_FSA_H
 
+/*
+  features whose score is just some PFSA over target string.  however, PFSA can use edge and smeta info (e.g. spans on edge) - not usually useful.
+
+  state is some fixed width byte array.  could actually be a void *, WordID sequence, whatever.
+
+*/
+
 //SEE ALSO: ff_fsa_dynamic.h, ff_from_fsa.h
 
+//TODO: decide whether to use init_features / add_value vs. summing elsewhere + set_value once (or inefficient for from_fsa: sum distinct feature_vectors.  but L->R if we only scan 1 word at a time, that's fine
+
 //#define FSA_DEBUG
 
 #ifdef FSA_DEBUG
@@ -25,13 +34,6 @@
 
 typedef ValueArray<uint8_t> Bytes;
 
-/*
-  features whose score is just some PFSA over target string.  TODO: could decide to give access to source span of scanned words as well if someone devises a feature that can use it
-
-  state is some fixed width byte array.  could actually be a void *, WordID sequence, whatever.
-
-*/
-
 // it's not necessary to inherit from this, but you probably should to save yourself some boilerplate.  defaults to no-state
 
 // usage:
@@ -70,7 +72,8 @@ protected:
     to_state(state,(char const*)begin,(char const*)end);
   }
   inline static char hexdigit(int i) {
-    return '0'+i;
+    int j=i-10;
+    return j>=0?'a'+j:'0'+i;
   }
   inline static void print_hex_byte(std::ostream &o,unsigned c) {
       o<<hexdigit(c>>4);
@@ -126,8 +129,14 @@ public:
   // move from state to next_state after seeing word x, while emitting features->add_value(fid,val) possibly with duplicates.  state and next_state will never be the same memory.
   //TODO: decide if we want to require you to support dest same as src, since that's how we use it most often in ff_from_fsa bottom-up wrapper (in l->r scoring, however, distinct copies will be the rule), and it probably wouldn't be too hard for most people to support.  however, it's good to hide the complexity here, once (see overly clever FsaScan loop that swaps src/dest addresses repeatedly to scan a sequence by effectively swapping)
 
+  // different name because of inheritance method hiding; simple/common case; 1 fid
+  Featval Scan1(WordID w,void const* state,void *next_state) const {
+    return 0;
+  }
+
   // NOTE: if you want to e.g. track statistics, cache, whatever, cast const away or use mutable members
-  void Scan(SentenceMetadata const& smeta,WordID w,void const* state,void *next_state,FeatureVector *features) const {
+  inline void Scan(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,void const* state,void *next_state,FeatureVector *features) const {
+    features->add_value(fid_,Scan1(w,state,next_state));
   }
 
   // don't set state-bytes etc. in ctor because it may depend on parsing param string
@@ -137,7 +146,7 @@ public:
 
 // init state is in cs; overwrite cs, ns repeatedly (alternatively).  return resulting state
 template <class FsaFF>
-void *FsaScan(FsaFF const& ff,SentenceMetadata const& smeta,WordID const* i, WordID const* end,FeatureVector *features, void *cs,void *ns) {
+void *FsaScan(FsaFF const& ff,SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID const* i, WordID const* end,FeatureVector *features, void *cs,void *ns) {
   // extra code - IT'S FOR EFFICIENCY, MAN!  IT'S OK!  definitely no bugs here.
   void *os,*es;
   if ((end-i)&1) { // odd # of words
@@ -150,20 +159,20 @@ void *FsaScan(FsaFF const& ff,SentenceMetadata const& smeta,WordID const* i, Wor
     os=ns;
   }
   for (;i<end;i+=2) {
-    ff.Scan(smeta,i[-1],es,os,features); // e->o
+    ff.Scan(smeta,edge,i[-1],es,os,features); // e->o
   odd:
-    ff.Scan(smeta,i[0],os,es,features); // o->e
+    ff.Scan(smeta,edge,i[0],os,es,features); // o->e
   }
   return es;
 }
 
 // if you have a more efficient implementation for scanning a phrase than one word at a time (e.g. LM context using sliding window buffer rather than rotating through a fixed state size), you can override this
 template <class FsaFF>
-void Scan(FsaFF const& ff,SentenceMetadata const& smeta,WordID const* i,WordID const* e,void const* state,void *next_state,FeatureVector *features) {
+void Scan(FsaFF const& ff,SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID const* i,WordID const* e,void const* state,void *next_state,FeatureVector *features) {
   int ssz=ff.state_bytes();
   if (!ssz) {
     for (;i<e;++i)
-      ff.Scan(smeta,*i,0,0,features);
+      ff.Scan(smeta,edge,*i,0,0,features);
     return;
   }
   Bytes tstate(ssz);
@@ -178,23 +187,23 @@ void Scan(FsaFF const& ff,SentenceMetadata const& smeta,WordID const* i,WordID c
     ns=tst;
   }
   std::memcpy(cs,state,ssz);
-  void *est=FsaScan(ff,smeta,i,end,features,cs,ns);
+  void *est=FsaScan(ff,smeta,edge,i,end,features,cs,ns);
   assert(est==next_state);
 }
 
 
 // like above Scan, but don't bother storing final state (for FinalTraversalFeatures)
 template <class FF>
-void AccumFeatures(FF const& ff,SentenceMetadata const& smeta,WordID const* i, WordID const* end,FeatureVector *h_features,void const* start_state) {
+void AccumFeatures(FF const& ff,SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID const* i, WordID const* end,FeatureVector *h_features,void const* start_state) {
   int ssz=ff.state_bytes();
   if (ssz) {
     Bytes state(ssz),state2(ssz);
     void *cs=state.begin(),*ns=state2.begin();
     memcpy(cs,start_state,ff.state_bytes());
-    FsaScan(ff,smeta,i,end,h_features,cs,ns);
+    FsaScan(ff,smeta,edge,i,end,h_features,cs,ns);
   } else
     for (;i<end;++i)
-      ff.Scan(smeta,*i,0,0,h_features);
+      ff.Scan(smeta,edge,*i,0,0,h_features);
 }
 
 // if State is pod.  sets state size and allocs start, h_start
@@ -233,10 +242,15 @@ public:
     o<<state(st);
   }
   int markov_order() const { return 1; }
-  void Scan(SentenceMetadata const& smeta,WordID w,void const* st,void *next_state,FeatureVector *features) const {
+  Featval ScanT1(WordID w,int prevlen,int &len) const { return 0; }
+  inline void ScanT(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,int prevlen,int &len,FeatureVector *features) const {
+    features->add_value(d().fid_,d().ScanT1(w,prevlen,len));
+  }
+
+  inline void Scan(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,void const* st,void *next_state,FeatureVector *features) const {
     Impl const& im=d();
     FSADBG("Scan "<<FD::Convert(im.fid_)<<" = "<<(*features)[im.fid_]<<" "<<im.state(st)<<" ->"<<TD::Convert(w)<<" ");
-    im.ScanTyped(smeta,w,im.state(st),im.state(next_state),features);
+    im.ScanT(smeta,edge,w,im.state(st),im.state(next_state),features);
     FSADBG(im.state(next_state)<<" = "<<(*features)[im.fid_]<<std::endl);
   }
 
@@ -263,7 +277,8 @@ struct FsaScanner {
   inline void *nexts() const {
     return (cs==st0)?st1:st0;
   }
-  FsaScanner(FF const& ff,SentenceMetadata const& smeta) : ff(ff),smeta(smeta)
+  const Hypergraph::Edge& edge;
+  FsaScanner(FF const& ff,SentenceMetadata const& smeta,const Hypergraph::Edge& edge) : ff(ff),smeta(smeta),edge(edge)
   {
     ssz=ff.state_bytes();
     int stride=((ssz+ALIGN-1)/ALIGN)*ALIGN; // round up to multiple of ALIGN
@@ -278,11 +293,11 @@ struct FsaScanner {
   }
   void scan(WordID w,FeatureVector *feat) {
     if (optimize_FsaScanner_zerostate && !ssz) {
-      ff.Scan(smeta,w,0,0,feat);
+      ff.Scan(smeta,edge,w,0,0,feat);
       return;
     }
     void *ns=nexts();
-    ff.Scan(smeta,w,cs,ns,feat);
+    ff.Scan(smeta,edge,w,cs,ns,feat);
     cs=ns;
   }
 
@@ -291,9 +306,9 @@ struct FsaScanner {
     // faster.
     if (optimize_FsaScanner_zerostate && !ssz)
       for (;i<end;++i)
-        ff.Scan(smeta,*i,0,0,feat);
+        ff.Scan(smeta,edge,*i,0,0,feat);
     else
-      cs=FsaScan(ff,smeta,i,end,feat,cs,nexts());
+      cs=FsaScan(ff,smeta,edge,i,end,feat,cs,nexts());
 #else
     for (;i<end;++i)
       scan(*i,feat);
diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h
index 6e42b83b..74d9e7b5 100755
--- a/decoder/ff_sample_fsa.h
+++ b/decoder/ff_sample_fsa.h
@@ -19,10 +19,9 @@ struct WordPenaltyFsa : public FsaFeatureFunctionBase<WordPenaltyFsa> {
     start.clear();
     h_start.clear();
   }
-  static const float val_per_target_word=-1;
   // move from state to next_state after seeing word x, while emitting features->add_value(fid,val) possibly with duplicates.  state and next_state may be same memory.
-  void Scan(SentenceMetadata const& smeta,WordID w,void const* state,void *next_state,FeatureVector *features) const {
-    features->add_value(fid_,val_per_target_word);
+  Featval Scan1(WordID w,void const* state,void *next_state) const {
+    return -1;
   }
 };
 
@@ -67,13 +66,11 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
 
   }
 
-  static const float val_per_target_word=-1;
-  void Scan(SentenceMetadata const& smeta,WordID w,void const* from,void *next_state,FeatureVector *features) const {
+  Featval Scan1(WordID w,void const* from,void *next_state) const {
     int prevlen=state(from);
     int len=wordlen(w);
-    if (len>prevlen)
-      features->add_value(fid_,val_per_target_word);
     state(next_state)=len;
+    return len>prevlen ? -1 : 0;
   }
 };
 
@@ -100,19 +97,18 @@ struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> {
     Init();
   }
 
-  static const float val_per_target_word=-1;
+
+/*  Featval ScanT1(WordID w,int prevlen,int &len) const;
+    // alternative to below:
+    */
+
   // evil anti-google int & len out-param:
-  void ScanTyped(SentenceMetadata const& smeta,WordID w,int prevlen,int &len,FeatureVector *features) const {
+  void ScanT(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,int prevlen,int &len,FeatureVector *features) const {
     len=wordlen(w);
     if (len<prevlen)
-      features->add_value(fid_,val_per_target_word);
+      features->add_value(fid_,-1);
   }
 
-  // already provided by FsaTypedScan<ShorterThanPrev>
-/*  void Scan(SentenceMetadata const& smeta,WordID w,void const* st,void *next_state,FeatureVector *features) const {
-    ScanTyped(smeta,w,state(st),state(next_state),features);
-    } */
-
 };
 
 
diff --git a/decoder/hash.h b/decoder/hash.h
index 3e4ad1ff..3a60a429 100755
--- a/decoder/hash.h
+++ b/decoder/hash.h
@@ -1,6 +1,8 @@
 #ifndef CDEC_HASH_H
 #define CDEC_HASH_H
 
+#include "murmur_hash.h"
+
 #include "config.h"
 #ifdef HAVE_SPARSEHASH
 # include <google/dense_hash_map>
@@ -13,7 +15,40 @@
 # define HASH_MAP_RESERVED(h,empty,deleted)
 # define HASH_MAP_EMPTY(h,empty)
 #endif
+
 #include <boost/functional/hash.hpp>
 
+// assumes C is POD
+template <class C>
+struct murmur_hash
+{
+  typedef MurmurInt return_type;
+  typedef C /*const&*/ argument_type;
+  return_type operator()(argument_type const& c) const {
+    return MurmurHash((void*)&c,sizeof(c));
+  }
+};
+
+// murmur_hash_array isn't std guaranteed safe (you need to use string::data())
+template <>
+struct murmur_hash<std::string>
+{
+  typedef MurmurInt return_type;
+  typedef std::string /*const&*/ argument_type;
+  return_type operator()(argument_type const& c) const {
+    return MurmurHash(c.data(),c.size());
+  }
+};
+
+// uses begin(),size() assuming contiguous layout and POD
+template <class C>
+struct murmur_hash_array
+{
+  typedef MurmurInt return_type;
+  typedef C /*const&*/ argument_type;
+  return_type operator()(argument_type const& c) const {
+    return MurmurHash(&*c.begin(),c.size()*sizeof(*c.begin()));
+  }
+};
 
 #endif
diff --git a/decoder/hg.cc b/decoder/hg.cc
index 5a33fc97..e7a86c5b 100644
--- a/decoder/hg.cc
+++ b/decoder/hg.cc
@@ -100,7 +100,7 @@ prob_t Hypergraph::ComputeEdgePosteriors(double scale, vector<prob_t>* posts) co
                   ScaledTransitionEventWeightFunction>(*this, &pv, weight, w2);
   posts->resize(edges_.size());
   for (int i = 0; i < edges_.size(); ++i)
-    (*posts)[i] = prob_t(pv.value(i));
+    (*posts)[i] = prob_t(pv.get(i));
   return prob_t(inside);
 }
 
@@ -113,7 +113,7 @@ prob_t Hypergraph::ComputeBestPathThroughEdges(vector<prob_t>* post) const {
                   ViterbiTransitionEventWeightFunction>(*this, &pv);
   post->resize(edges_.size());
   for (int i = 0; i < edges_.size(); ++i)
-    (*post)[i] = pv.value(i).v_;
+    (*post)[i] = pv.get(i).v_;
   return viterbi_weight.v_;
 }
 
diff --git a/decoder/hg.h b/decoder/hg.h
index 53b5a53d..b3bfd19c 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -4,8 +4,8 @@
 #include <string>
 #include <vector>
 
+#include "feature_vector.h"
 #include "small_vector.h"
-#include "sparse_vector.h"
 #include "wordid.h"
 #include "trule.h"
 #include "prob.h"
diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc
index d6e73b8a..65c2c382 100644
--- a/decoder/hg_io.cc
+++ b/decoder/hg_io.cc
@@ -556,6 +556,7 @@ void HypergraphIO::PLFtoLattice(const string& plf, Lattice* pl) {
   ReadFromPLF(plf, &g, 0);
   const int num_nodes = g.nodes_.size() - 1;
   l.resize(num_nodes);
+  int fid0=FD::Convert("Feature_0");
   for (int i = 0; i < num_nodes; ++i) {
     vector<LatticeArc>& alts = l[i];
     const Hypergraph::Node& node = g.nodes_[i];
@@ -564,7 +565,7 @@ void HypergraphIO::PLFtoLattice(const string& plf, Lattice* pl) {
     for (int j = 0; j < num_alts; ++j) {
       const Hypergraph::Edge& edge = g.edges_[node.out_edges_[j]];
       alts[j].label = edge.rule_->e_[1];
-      alts[j].cost = edge.feature_values_.value(FD::Convert("Feature_0"));
+      alts[j].cost = edge.feature_values_.get(fid0);
       alts[j].dist2next = edge.head_node_ - node.id_;
     }
   }
diff --git a/decoder/murmur_hash.h b/decoder/murmur_hash.h
index 8ddad339..8dbd7807 100755
--- a/decoder/murmur_hash.h
+++ b/decoder/murmur_hash.h
@@ -3,126 +3,124 @@
 
 //NOTE: quite fast, nice collision properties, but endian dependent hash values
 
-#include <stdint.h>
+#include "have_64_bits.h"
+typedef uintptr_t MurmurInt;
 
 // MurmurHash2, by Austin Appleby
 
 static const uint32_t DEFAULT_SEED=2654435769U;
 
-#if sizeof(void *)==8
-typedef uint64_t MurmurInt;
+#if HAVE_64_BITS
 //MurmurInt MurmurHash(void const *key, int len, uint32_t seed=DEFAULT_SEED);
 
 inline uint64_t MurmurHash64( const void * key, int len, unsigned int seed=DEFAULT_SEED )
 {
-	const uint64_t m = 0xc6a4a7935bd1e995;
-	const int r = 47;
+  const uint64_t m = 0xc6a4a7935bd1e995;
+  const int r = 47;
 
-	uint64_t h = seed ^ (len * m);
+  uint64_t h = seed ^ (len * m);
 
-	const uint64_t * data = (const uint64_t *)key;
-	const uint64_t * end = data + (len/8);
+  const uint64_t * data = (const uint64_t *)key;
+  const uint64_t * end = data + (len/8);
 
-	while(data != end)
-	{
-		uint64_t k = *data++;
+  while(data != end)
+  {
+    uint64_t k = *data++;
 
-		k *= m;
-		k ^= k >> r;
-		k *= m;
+    k *= m;
+    k ^= k >> r;
+    k *= m;
 
-		h ^= k;
-		h *= m;
-	}
+    h ^= k;
+    h *= m;
+  }
 
-	const unsigned char * data2 = (const unsigned char*)data;
+  const unsigned char * data2 = (const unsigned char*)data;
 
-	switch(len & 7)
-	{
-	case 7: h ^= uint64_t(data2[6]) << 48;
-	case 6: h ^= uint64_t(data2[5]) << 40;
-	case 5: h ^= uint64_t(data2[4]) << 32;
-	case 4: h ^= uint64_t(data2[3]) << 24;
-	case 3: h ^= uint64_t(data2[2]) << 16;
-	case 2: h ^= uint64_t(data2[1]) << 8;
-	case 1: h ^= uint64_t(data2[0]);
-	        h *= m;
-	};
+  switch(len & 7)
+  {
+  case 7: h ^= uint64_t(data2[6]) << 48;
+  case 6: h ^= uint64_t(data2[5]) << 40;
+  case 5: h ^= uint64_t(data2[4]) << 32;
+  case 4: h ^= uint64_t(data2[3]) << 24;
+  case 3: h ^= uint64_t(data2[2]) << 16;
+  case 2: h ^= uint64_t(data2[1]) << 8;
+  case 1: h ^= uint64_t(data2[0]);
+    h *= m;
+  };
 
-	h ^= h >> r;
-	h *= m;
-	h ^= h >> r;
+  h ^= h >> r;
+  h *= m;
+  h ^= h >> r;
 
-	return h;
+  return h;
 }
 
-inline uint32_t MurmurHash2(void const *key, int len, uint32_t seed=DEFAULT_SEED)
+inline uint32_t MurmurHash32(void const *key, int len, uint32_t seed=DEFAULT_SEED)
 {
   return (uint32_t) MurmurHash64(key,len,seed);
 }
 
-typedef uint64_t MurmurInt;
 inline MurmurInt MurmurHash(void const *key, int len, uint32_t seed=DEFAULT_SEED)
 {
   return MurmurHash64(key,len,seed);
 }
 
-#else // 32-bit
-typedef uint32_t MurmurInt;
-
+#else
+// 32-bit
 
 // Note - This code makes a few assumptions about how your machine behaves -
 // 1. We can read a 4-byte value from any address without crashing
 // 2. sizeof(int) == 4
-inline uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed=DEFAULT_SEED)
+inline uint32_t MurmurHash32 ( const void * key, int len, uint32_t seed=DEFAULT_SEED)
 {
-	// 'm' and 'r' are mixing constants generated offline.
-	// They're not really 'magic', they just happen to work well.
+  // 'm' and 'r' are mixing constants generated offline.
+  // They're not really 'magic', they just happen to work well.
 
-	const uint32_t m = 0x5bd1e995;
-	const int r = 24;
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
 
-	// Initialize the hash to a 'random' value
+  // Initialize the hash to a 'random' value
 
-	uint32_t h = seed ^ len;
+  uint32_t h = seed ^ len;
 
-	// Mix 4 bytes at a time into the hash
+  // Mix 4 bytes at a time into the hash
 
-	const unsigned char * data = (const unsigned char *)key;
+  const unsigned char * data = (const unsigned char *)key;
 
-	while(len >= 4)
-	{
-		uint32_t k = *(uint32_t *)data;
+  while(len >= 4)
+  {
+    uint32_t k = *(uint32_t *)data;
 
-		k *= m;
-		k ^= k >> r;
-		k *= m;
+    k *= m;
+    k ^= k >> r;
+    k *= m;
 
-		h *= m;
-		h ^= k;
+    h *= m;
+    h ^= k;
 
-		data += 4;
-		len -= 4;
-	}
+    data += 4;
+    len -= 4;
+  }
 
-	// Handle the last few bytes of the input array
+  // Handle the last few bytes of the input array
 
-	switch(len)
-	{
-	case 3: h ^= data[2] << 16;
-	case 2: h ^= data[1] << 8;
-	case 1: h ^= data[0];
-	        h *= m;
-	};
+  switch(len)
+  {
+  case 3: h ^= data[2] << 16;
+  case 2: h ^= data[1] << 8;
+  case 1: h ^= data[0];
+    h *= m;
+  };
 
-	// Do a few final mixes of the hash to ensure the last few
-	// bytes are well-incorporated.
+  // Do a few final mixes of the hash to ensure the last few
+  // bytes are well-incorporated.
 
-	h ^= h >> 13;
-	h *= m;
-	h ^= h >> 15;
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
 
-	return h;
+  return h;
 }
 
 inline MurmurInt MurmurHash ( const void * key, int len, uint32_t seed=DEFAULT_SEED) {
@@ -133,55 +131,56 @@ inline MurmurInt MurmurHash ( const void * key, int len, uint32_t seed=DEFAULT_S
 
 inline uint64_t MurmurHash64 ( const void * key, int len, uint32_t seed=DEFAULT_SEED)
 {
-	const uint32_t m = 0x5bd1e995;
-	const int r = 24;
-
-	uint32_t h1 = seed ^ len;
-	uint32_t h2 = 0;
-
-	const uint32_t * data = (const uint32_t *)key;
-
-	while(len >= 8)
-	{
-		uint32_t k1 = *data++;
-		k1 *= m; k1 ^= k1 >> r; k1 *= m;
-		h1 *= m; h1 ^= k1;
-		len -= 4;
-
-		uint32_t k2 = *data++;
-		k2 *= m; k2 ^= k2 >> r; k2 *= m;
-		h2 *= m; h2 ^= k2;
-		len -= 4;
-	}
-
-	if(len >= 4)
-	{
-		uint32_t k1 = *data++;
-		k1 *= m; k1 ^= k1 >> r; k1 *= m;
-		h1 *= m; h1 ^= k1;
-		len -= 4;
-	}
-
-	switch(len)
-	{
-	case 3: h2 ^= ((unsigned char*)data)[2] << 16;
-	case 2: h2 ^= ((unsigned char*)data)[1] << 8;
-	case 1: h2 ^= ((unsigned char*)data)[0];
-			h2 *= m;
-	};
-
-	h1 ^= h2 >> 18; h1 *= m;
-	h2 ^= h1 >> 22; h2 *= m;
-	h1 ^= h2 >> 17; h1 *= m;
-	h2 ^= h1 >> 19; h2 *= m;
-
-	uint64_t h = h1;
-
-	h = (h << 32) | h2;
-
-	return h;
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+
+  uint32_t h1 = seed ^ len;
+  uint32_t h2 = 0;
+
+  const uint32_t * data = (const uint32_t *)key;
+
+  while(len >= 8)
+  {
+    uint32_t k1 = *data++;
+    k1 *= m; k1 ^= k1 >> r; k1 *= m;
+    h1 *= m; h1 ^= k1;
+    len -= 4;
+
+    uint32_t k2 = *data++;
+    k2 *= m; k2 ^= k2 >> r; k2 *= m;
+    h2 *= m; h2 ^= k2;
+    len -= 4;
+  }
+
+  if(len >= 4)
+  {
+    uint32_t k1 = *data++;
+    k1 *= m; k1 ^= k1 >> r; k1 *= m;
+    h1 *= m; h1 ^= k1;
+    len -= 4;
+  }
+
+  switch(len)
+  {
+  case 3: h2 ^= ((unsigned char*)data)[2] << 16;
+  case 2: h2 ^= ((unsigned char*)data)[1] << 8;
+  case 1: h2 ^= ((unsigned char*)data)[0];
+    h2 *= m;
+  };
+
+  h1 ^= h2 >> 18; h1 *= m;
+  h2 ^= h1 >> 22; h2 *= m;
+  h1 ^= h2 >> 17; h1 *= m;
+  h2 ^= h1 >> 19; h2 *= m;
+
+  uint64_t h = h1;
+
+  h = (h << 32) | h2;
+
+  return h;
 }
 
 #endif
+//32bit
 
 #endif
diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h
index 2ccace61..4dc86bc7 100755
--- a/decoder/oracle_bleu.h
+++ b/decoder/oracle_bleu.h
@@ -41,7 +41,7 @@ struct Translation {
     out<<pre<<"Viterbi: "<<TD::GetString(sentence)<<"\n";
     out<<pre<<"features: "<<features;
     if (include_0_fid && features.nonzero(0))
-      out<< " dummy-feature(0)="<<features[0];
+      out<< " dummy-feature(0)="<<features.get(0);
     out<<std::endl;
   }
   bool is_null() {
diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc
index bfbe44ee..08276c71 100644
--- a/decoder/scfg_translator.cc
+++ b/decoder/scfg_translator.cc
@@ -2,10 +2,10 @@
 
 //TODO: grammar heuristic (min cost of reachable rule set) for binarizations (active edges) if we wish to prune those also
 
+#include "hash.h"
 #include "translator.h"
 #include <algorithm>
 #include <vector>
-#include <tr1/unordered_map>
 #include <boost/foreach.hpp>
 #include <boost/functional/hash.hpp>
 #include "hg.h"
@@ -169,13 +169,15 @@ struct SCFGTranslatorImpl {
 
   typedef std::pair<int, WordID> StateSplit;
   typedef std::pair<StateSplit, int> StateSplitPair;
-  typedef std::tr1::unordered_map<StateSplit, int, boost::hash<StateSplit> > Split2Node;
-  typedef std::tr1::unordered_map<int, vector<WordID> > Splits;
+  typedef HASH_MAP<StateSplit, int, boost::hash<StateSplit> > Split2Node;
+  typedef HASH_MAP<int, vector<WordID> > Splits;
 
   bool RefineForest(Hypergraph* forest) {
     Hypergraph refined_forest;
     Split2Node s2n;
+    HASH_MAP_RESERVED(s2n,StateSplit(-1,-1),StateSplit(-2,-2));
     Splits splits;
+    HASH_MAP_RESERVED(splits,-1,-2);
     Hypergraph::Node& coarse_goal_node = *(forest->nodes_.end()-1);
     bool refined_goal_node = false;
     foreach(Hypergraph::Node& node, forest->nodes_){
diff --git a/decoder/sparse_vector.h b/decoder/sparse_vector.h
index e58485c0..ec623c6e 100644
--- a/decoder/sparse_vector.h
+++ b/decoder/sparse_vector.h
@@ -15,7 +15,25 @@
    use SparseVectorList (pair smallvector) for feat funcs / hypergraphs (you rarely need random access; just append a feature to the list)
 */
 /* hack: index 0 never gets printed because cdyer is creative and efficient. features which have no weight got feature dict id 0, see, and the models all clobered that value.  nobody wants to see it.  except that vlad is also creative and efficient and stored the oracle bleu there. */
+/* NOTE: zero vals may or may not be dropped from map (sparse, but not guaranteed to be so).
 
+   I rely on !v the same as !((bool)v) the same as v==0 and v() same as v(0).
+
+   one exception:
+
+   a local:
+   T sum = 0;
+   is used instead of
+   T sum;
+
+   because T may be a primitive type, and
+
+   T sum();
+
+   is parsed as a function decl :(
+
+   the alternative T sum=T() is also be reasonable.  i've switched to that.
+*/
 
 // this is a modified version of code originally written
 // by Phil Blunsom
@@ -78,31 +96,35 @@ public:
 
   // warning: exploits the fact that 0 values are always removed from map.  change this if you change that.
   bool nonzero(int index) const {
-    return values_.find(index) != values_.end();
+    typename MapType::const_iterator found = values_.find(index);
+    return found==values_.end() || !found->second;
   }
 
 
-  T operator[](int index) const {
+  T get(int index) const {
     typename MapType::const_iterator found = values_.find(index);
-    if (found == values_.end())
-      return 0;
-    else
-      return found->second;
+    return found==values_.end()?T():found->second;
   }
 
-  T value(int index) const {
-    return (*this)[index];
+  // same as above but may add a 0 entry.  TODO: check that people relying on no entry use get
+  T & operator[](int index){
+    return values_[index];
   }
 
   void set_value(int index, const T &value) {
     values_[index] = value;
   }
 
-    T const& add_value(int index, const T &value) {
+    void add_value(int index, const T &value) {
+      if (!value) return;
+#if 1
+      values_[index]+=value;
+#else
+      // this is not really going to be any faster, and we already rely on default init = 0 init
       std::pair<typename MapType::iterator,bool> art=values_.insert(std::make_pair(index,value));
       T &val=art.first->second;
       if (!art.second) val += value; // already existed
-      return val;
+#endif
     }
 
 
@@ -125,7 +147,7 @@ public:
     // dot product with a unit vector of the same length
     // as the sparse vector
     T dot() const {
-        T sum = 0;
+        T sum = T();
         for (typename MapType::const_iterator
                 it = values_.begin(); it != values_.end(); ++it)
             sum += it->second;
@@ -146,7 +168,7 @@ public:
 
     template<typename S>
     S dot(const SparseVector<S> &vec) const {
-        S sum = 0;
+        S sum = S();
         for (typename MapType::const_iterator
                 it = values_.begin(); it != values_.end(); ++it)
         {
@@ -160,7 +182,7 @@ public:
 
     template<typename S>
     S dot(const std::vector<S> &vec) const {
-        S sum = 0;
+      S sum = S();
         for (typename MapType::const_iterator
                 it = values_.begin(); it != values_.end(); ++it)
         {
@@ -173,7 +195,7 @@ public:
     template<typename S>
     S dot(const S *vec) const {
         // this is not range checked!
-        S sum = 0;
+        S sum = S();
         for (typename MapType::const_iterator
                 it = values_.begin(); it != values_.end(); ++it)
             sum += it->second * vec[it->first];
@@ -182,7 +204,7 @@ public:
     }
 
     T l1norm() const {
-      T sum = 0;
+      T sum = T();
       for (typename MapType::const_iterator
               it = values_.begin(); it != values_.end(); ++it)
         sum += fabs(it->second);
@@ -190,7 +212,7 @@ public:
     }
 
   T l2norm_sq() const {
-      T sum = 0;
+      T sum = T();
       for (typename MapType::const_iterator
               it = values_.begin(); it != values_.end(); ++it)
         sum += it->second * it->second;
@@ -214,7 +236,7 @@ public:
         {
 //            T v =
               (values_[it->first] += it->second);
-//            if (v == 0) values_.erase(it->first);
+//            if (!v) values_.erase(it->first);
         }
         return *this;
     }
@@ -225,7 +247,7 @@ public:
         {
 //            T v =
           (values_[it->first] -= it->second);
-//            if (v == 0) values_.erase(it->first);
+//            if (!v) values_.erase(it->first);
         }
         return *this;
     }
@@ -294,7 +316,7 @@ public:
         for (typename MapType::const_iterator
                 it = values_.begin(); it != values_.end(); ++it) {
           // by definition feature id 0 is a dummy value
-          if (it->first == 0) continue;
+          if (!it->first) continue;
           if (with_semi) {
             (*os) << (first ? "" : ";")
 	         << FD::Convert(it->first) << '=' << it->second;
@@ -316,7 +338,7 @@ public:
 
   bool at_equals(int i,T const& val) const {
     const_iterator it=values_.find(i);
-    if (it==values_.end()) return val==0;
+    if (it==values_.end()) return !val;
     return it->second==val;
   }
 
@@ -395,19 +417,17 @@ class SparseVectorList {
   SparseVectorList() {  }
   template <class I>
   SparseVectorList(I i,I const& end) {
-    const T z=0;
     int c=0;
     for (;i<end;++i,++c) {
-      if (*i!=z)
+      if (*i)
         p.push_back(featval(c,*i));
     }
     p.compact();
   }
   explicit SparseVectorList(std::vector<T> const& v) {
-    const T z=0;
     for (unsigned i=0;i<v.size();++i) {
       T const& t=v[i];
-      if (t!=z)
+      if (t)
         p.push_back(featval(i,t));
     }
     p.compact();
@@ -433,10 +453,6 @@ private:
   List p;
 };
 
-typedef SparseVectorList<double> FeatureVectorList;
-typedef SparseVector<double> FeatureVector;
-typedef SparseVector<double> WeightVector;
-typedef std::vector<double> DenseWeightVector;
 template <typename T>
 SparseVector<T> operator+(const SparseVector<T>& a, const SparseVector<T>& b) {
   SparseVector<T> result = a;
diff --git a/decoder/trule.h b/decoder/trule.h
index 3bc96165..6b98a8fa 100644
--- a/decoder/trule.h
+++ b/decoder/trule.h
@@ -121,7 +121,7 @@ class TRule {
   int Arity() const { return arity_; }
   bool IsUnary() const { return (Arity() == 1) && (f_.size() == 1); }
   const SparseVector<double>& GetFeatureValues() const { return scores_; }
-  double Score(int i) const { return scores_[i]; }
+  double Score(int i) const { return scores_.get(i); }
   WordID GetLHS() const { return lhs_; }
   void ComputeArity();
 
@@ -133,8 +133,8 @@ class TRule {
   SparseVector<double> scores_;
 
   char arity_;
-  
-  // these attributes are application-specific and should probably be refactored 
+
+  // these attributes are application-specific and should probably be refactored
   TRulePtr parent_rule_;  // usually NULL, except when doing constrained decoding
 
   // this is only used when doing synchronous parsing