57 files changed, 585 insertions, 740 deletions
diff --git a/decoder/Jamfile b/decoder/Jamfile
deleted file mode 100644
index d778dc7f..00000000
--- a/decoder/Jamfile
+++ /dev/null
@@ -1,83 +0,0 @@
-import testing ;
-import lex ;
-import option ;
-
-if [ option.get "with-glc" ] {
-  glc = ff_glc.cc string_util.cc feature-factory.cc ;
-}
-
-lib decoder : 
-  forest_writer.cc
-  maxtrans_blunsom.cc
-  cdec_ff.cc
-  cfg.cc
-  dwarf.cc
-  ff_dwarf.cc
-  rule_lexer.ll
-  fst_translator.cc
-  csplit.cc
-  translator.cc
-  scfg_translator.cc
-  hg.cc
-  hg_io.cc
-  decoder.cc
-  hg_intersect.cc
-  hg_sampler.cc
-  factored_lexicon_helper.cc
-  viterbi.cc
-  lattice.cc
-  aligner.cc
-  apply_models.cc
-  earley_composer.cc
-  phrasetable_fst.cc
-  trule.cc
-  ff.cc
-  ff_rules.cc
-  ff_wordset.cc
-  ff_context.cc
-  ff_charset.cc
-  ff_lm.cc
-  ff_klm.cc
-  ff_ngrams.cc
-  ff_spans.cc
-  ff_ruleshape.cc
-  ff_wordalign.cc
-  ff_csplit.cc
-  ff_tagger.cc
-  ff_source_syntax.cc
-  ff_bleu.cc
-  ff_factory.cc
-  lexalign.cc
-  lextrans.cc
-  tagger.cc
-  bottom_up_parser.cc
-  phrasebased_translator.cc
-  JSON_parser.c
-  json_parse.cc
-  grammar.cc
-  rescore_translator.cc
-  hg_remove_eps.cc
-  hg_union.cc
-  lazy.cc
-  $(glc)
-  ..//utils
-  ..//mteval
-  ../klm/lm//kenlm
-  ../klm/search//search
-  ..//boost_program_options
-  : <include>.
-  : :
-  <library>..//utils
-  <library>..//mteval
-  <library>../klm/lm//kenlm
-  <library>..//boost_program_options
-  <include>.
-  ;
-
-exe cdec : cdec.cc decoder ..//utils ..//mteval ../klm/lm//kenlm ..//boost_program_options ;
-
-all_tests [ glob *_test.cc : cfg_test.cc ] : decoder : <testing.arg>$(TOP)/decoder/test_data ;
-
-install legacy : cdec
-  : <location>$(TOP)/cdec <install-type>EXE <install-dependencies>on <link>shared:<dll-path>$(TOP)/cdec <link>shared:<install-type>LIB ;
-
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index 4a98a4f1..5c0a1964 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -33,6 +33,7 @@ libcdec_a_SOURCES = \
   cfg.cc \
   dwarf.cc \
   ff_dwarf.cc \
+  ff_external.cc \
   rule_lexer.cc \
   fst_translator.cc \
   csplit.cc \
@@ -55,6 +56,8 @@ libcdec_a_SOURCES = \
   phrasetable_fst.cc \
   trule.cc \
   ff.cc \
+  ffset.cc \
+  ff_basic.cc \
   ff_rules.cc \
   ff_wordset.cc \
   ff_context.cc \
diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc
index 9ba59d1b..330de9e2 100644
--- a/decoder/apply_models.cc
+++ b/decoder/apply_models.cc
@@ -16,6 +16,7 @@
 #include "verbose.h"
 #include "hg.h"
 #include "ff.h"
+#include "ffset.h"
 
 #define NORMAL_CP 1
 #define FAST_CP 2
diff --git a/decoder/cdec.cc b/decoder/cdec.cc
index 25d3b6af..cc3fcff1 100644
--- a/decoder/cdec.cc
+++ b/decoder/cdec.cc
@@ -4,6 +4,7 @@
 #include "decoder.h"
 #include "ff_register.h"
 #include "verbose.h"
+#include "timing_stats.h"
 #include "util/usage.hh"
 
 using namespace std;
@@ -28,6 +29,7 @@ int main(int argc, char** argv) {
     if (buf.empty()) continue;
     decoder.Decode(buf);
   }
+  Timer::Summarize();
 #ifdef CP_TIME
     cerr << "Time required for Cube Pruning execution: "
     << CpTime::Get()
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index b516c386..99ab7473 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -1,6 +1,7 @@
 #include <boost/shared_ptr.hpp>
 
 #include "ff.h"
+#include "ff_basic.h"
 #include "ff_context.h"
 #include "ff_spans.h"
 #include "ff_lm.h"
@@ -18,6 +19,7 @@
 #include "ff_charset.h"
 #include "ff_wordset.h"
 #include "ff_dwarf.h"
+#include "ff_external.h"
 
 #ifdef HAVE_GLC
 #include <cdec/ff_glc.h>
@@ -69,6 +71,7 @@ void register_feature_functions() {
   ff_registry.Register("WordPairFeatures", new FFFactory<WordPairFeatures>);
   ff_registry.Register("WordSet", new FFFactory<WordSet>);
   ff_registry.Register("Dwarf", new FFFactory<Dwarf>);
+  ff_registry.Register("External", new FFFactory<ExternalFeature>);
 #ifdef HAVE_GLC
   ff_registry.Register("ContextCRF", new FFFactory<Model1Features>);
 #endif
diff --git a/decoder/cfg.h b/decoder/cfg.h
index 8cb29bb9..aeeacb83 100644
--- a/decoder/cfg.h
+++ b/decoder/cfg.h
@@ -130,7 +130,7 @@ struct CFG {
     int lhs; // index into nts
     RHS rhs;
     prob_t p; // h unused for now (there's nothing admissable, and p is already using 1st pass inside as pushed toward top)
-    FeatureVector f; // may be empty, unless copy_features on Init
+    SparseVector<double> f; // may be empty, unless copy_features on Init
     IF_CFG_TRULE(TRulePtr rule;)
     int size() const { // for stats only
       return rhs.size();
diff --git a/decoder/cfg_format.h b/decoder/cfg_format.h
index 2f40d483..d12da261 100644
--- a/decoder/cfg_format.h
+++ b/decoder/cfg_format.h
@@ -100,7 +100,7 @@ struct CFGFormat {
     }
   }
 
-  void print_features(std::ostream &o,prob_t p,FeatureVector const& fv=FeatureVector()) const {
+  void print_features(std::ostream &o,prob_t p,SparseVector<double> const& fv=SparseVector<double>()) const {
     bool logp=(logprob_feat && p!=prob_t::One());
     if (features || logp) {
       o << partsep;
diff --git a/decoder/cfg_test.cc b/decoder/cfg_test.cc
index b8f4cf11..316c6d16 100644
--- a/decoder/cfg_test.cc
+++ b/decoder/cfg_test.cc
@@ -25,9 +25,9 @@ struct CFGTest : public TestWithParam<HgW> {
   Hypergraph hg;
   CFG cfg;
   CFGFormat form;
-  FeatureVector weights;
+  SparseVector<double> weights;
 
-  static void JsonFN(Hypergraph &hg,CFG &cfg,FeatureVector &featw,std::string file
+  static void JsonFN(Hypergraph &hg,CFG &cfg,SparseVector<double> &featw,std::string file
                      ,std::string const& wts="Model_0 1 EgivenF 1 f1 1")
   {
     istringstream ws(wts);
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 29eaa4f6..052823ca 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -29,6 +29,7 @@
 #include "oracle_bleu.h"
 #include "apply_models.h"
 #include "ff.h"
+#include "ffset.h"
 #include "ff_factory.h"
 #include "viterbi.h"
 #include "kbest.h"
@@ -91,11 +92,6 @@ inline void ShowBanner() {
   cerr << "cdec v1.0 (c) 2009-2011 by Chris Dyer\n";
 }
 
-inline void show_models(po::variables_map const& conf,ModelSet &ms,char const* header) {
-  cerr<<header<<": ";
-  ms.show_features(cerr,cerr,conf.count("warn_0_weight"));
-}
-
 inline string str(char const* name,po::variables_map const& conf) {
   return conf[name].as<string>();
 }
@@ -133,7 +129,7 @@ inline boost::shared_ptr<FeatureFunction> make_ff(string const& ffp,bool verbose
   }
   boost::shared_ptr<FeatureFunction> pf = ff_registry.Create(ff, param);
   if (!pf) exit(1);
-  int nbyte=pf->NumBytesContext();
+  int nbyte=pf->StateSize();
   if (verbose_feature_functions && !SILENT)
     cerr<<"State is "<<nbyte<<" bytes for "<<pre<<"feature "<<ffp<<endl;
   return pf;
@@ -644,8 +640,6 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
       prev_weights = rp.weight_vector;
     }
     rp.models.reset(new ModelSet(*rp.weight_vector, rp.ffs));
-    string ps = "Pass1 "; ps[4] += pass;
-    if (!SILENT) show_models(conf,*rp.models,ps.c_str());
   }
 
   // show configuration of rescoring passes
@@ -879,13 +873,13 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
     if (rp.fid_summary) {
       if (summary_feature_type == kEDGE_PROB) {
         const prob_t z = forest.PushWeightsToGoal(1.0);
-        if (!isfinite(log(z)) || isnan(log(z))) {
+        if (!std::isfinite(log(z)) || std::isnan(log(z))) {
           cerr << "  " << passtr << " !!! Invalid partition detected, abandoning.\n";
         } else {
           for (int i = 0; i < forest.edges_.size(); ++i) {
             const double log_prob_transition = log(forest.edges_[i].edge_prob_); // locally normalized by the edge
                                                                               // head node by forest.PushWeightsToGoal
-            if (!isfinite(log_prob_transition) || isnan(log_prob_transition)) {
+            if (!std::isfinite(log_prob_transition) || std::isnan(log_prob_transition)) {
               cerr << "Edge: i=" << i << " got bad inside prob: " << *forest.edges_[i].rule_ << endl;
               abort();
             }
@@ -897,7 +891,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
       } else if (summary_feature_type == kNODE_RISK) {
         Hypergraph::EdgeProbs posts;
         const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts);
-        if (!isfinite(log(z)) || isnan(log(z))) {
+        if (!std::isfinite(log(z)) || std::isnan(log(z))) {
           cerr << "  " << passtr << " !!! Invalid partition detected, abandoning.\n";
         } else {
           for (int i = 0; i < forest.nodes_.size(); ++i) {
@@ -906,7 +900,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
             for (int j = 0; j < in_edges.size(); ++j)
               node_post += (posts[in_edges[j]] / z);
             const double log_np = log(node_post);
-            if (!isfinite(log_np) || isnan(log_np)) {
+            if (!std::isfinite(log_np) || std::isnan(log_np)) {
               cerr << "got bad posterior prob for node " << i << endl;
               abort();
             }
@@ -921,13 +915,13 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
       } else if (summary_feature_type == kEDGE_RISK) {
         Hypergraph::EdgeProbs posts;
         const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts);
-        if (!isfinite(log(z)) || isnan(log(z))) {
+        if (!std::isfinite(log(z)) || std::isnan(log(z))) {
           cerr << "  " << passtr << " !!! Invalid partition detected, abandoning.\n";
         } else {
           assert(posts.size() == forest.edges_.size());
           for (int i = 0; i < posts.size(); ++i) {
             const double log_np = log(posts[i] / z);
-            if (!isfinite(log_np) || isnan(log_np)) {
+            if (!std::isfinite(log_np) || std::isnan(log_np)) {
               cerr << "got bad posterior prob for node " << i << endl;
               abort();
             }
@@ -967,7 +961,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
 
   // Oracle Rescoring
   if(get_oracle_forest) {
-    assert(!"this is broken"); FeatureVector dummy; // = last_weights
+    assert(!"this is broken"); SparseVector<double> dummy; // = last_weights
     Oracle oc=oracle.ComputeOracle(smeta,&forest,dummy,10,conf["forest_output"].as<std::string>());
     if (!SILENT) cerr << "  +Oracle BLEU forest (nodes/edges): " << forest.nodes_.size() << '/' << forest.edges_.size() << endl;
     if (!SILENT) cerr << "  +Oracle BLEU (paths): " << forest.NumberOfPaths() << endl;
@@ -1098,7 +1092,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
           cerr << "DIFF. ERR! log_z < log_ref_z: " << log_z << " " << log_ref_z << endl;
           exit(1);
         }
-        assert(!isnan(log_ref_z));
+        assert(!std::isnan(log_ref_z));
         ref_exp -= full_exp;
         acc_vec += ref_exp;
         acc_obj += (log_z - log_ref_z);
diff --git a/decoder/decoder.h b/decoder/decoder.h
index bef2ff5e..79c7a602 100644
--- a/decoder/decoder.h
+++ b/decoder/decoder.h
@@ -24,7 +24,7 @@ private:
 #endif
 
 class SentenceMetadata;
-struct Hypergraph;
+class Hypergraph;
 struct DecoderImpl;
 
 struct DecoderObserver {
diff --git a/decoder/exp_semiring.h b/decoder/exp_semiring.h
index 111eaaf1..2a9034bb 100644
--- a/decoder/exp_semiring.h
+++ b/decoder/exp_semiring.h
@@ -59,7 +59,7 @@ struct PRWeightFunction {
   explicit PRWeightFunction(const PWeightFunction& pwf = PWeightFunction(),
                             const RWeightFunction& rwf = RWeightFunction()) :
     pweight(pwf), rweight(rwf) {}
-  PRPair<P,R> operator()(const Hypergraph::Edge& e) const {
+  PRPair<P,R> operator()(const HG::Edge& e) const {
     const P p = pweight(e);
     const R r = rweight(e);
     return PRPair<P,R>(p, r * p);
diff --git a/decoder/ff.cc b/decoder/ff.cc
index 557e0b5f..a6a035b5 100644
--- a/decoder/ff.cc
+++ b/decoder/ff.cc
@@ -1,9 +1,3 @@
-//TODO: non-sparse vector for all feature functions?  modelset applymodels keeps track of who has what features?  it's nice having FF that could generate a handful out of 10000 possible feats, though.
-
-//TODO: actually score rule_feature()==true features once only, hash keyed on rule or modify TRule directly?  need to keep clear in forest which features come from models vs. rules; then rescoring could drop all the old models features at once
-
-#include "fast_lexical_cast.hpp"
-#include <stdexcept>
 #include "ff.h"
 
 #include "tdict.h"
@@ -16,8 +10,7 @@ FeatureFunction::~FeatureFunction() {}
 void FeatureFunction::PrepareForInput(const SentenceMetadata&) {}
 
 void FeatureFunction::FinalTraversalFeatures(const void* /* ant_state */,
-                                             SparseVector<double>* /* features */) const {
-}
+                                             SparseVector<double>* /* features */) const {}
 
 string FeatureFunction::usage_helper(std::string const& name,std::string const& params,std::string const& details,bool sp,bool sd) {
   string r=name;
@@ -32,188 +25,14 @@ string FeatureFunction::usage_helper(std::string const& name,std::string const&
   return r;
 }
 
-Features FeatureFunction::single_feature(WordID feat) {
-  return Features(1,feat);
-}
-
-Features ModelSet::all_features(std::ostream *warn,bool warn0) {
-  //return ::all_features(models_,weights_,warn,warn0);
-}
-
-void show_features(Features const& ffs,DenseWeightVector const& weights_,std::ostream &out,std::ostream &warn,bool warn_zero_wt) {
-  out << "Weight  Feature\n";
-  for (unsigned i=0;i<ffs.size();++i) {
-    WordID fid=ffs[i];
-    string const& fname=FD::Convert(fid);
-    double wt=weights_[fid];
-    if (warn_zero_wt && wt==0)
-      warn<<"WARNING: "<<fname<<" has 0 weight."<<endl;
-    out << wt << "  " << fname<<endl;
-  }
-}
-
-void ModelSet::show_features(std::ostream &out,std::ostream &warn,bool warn_zero_wt)
-{
-//  ::show_features(all_features(),weights_,out,warn,warn_zero_wt);
-  //show_all_features(models_,weights_,out,warn,warn_zero_wt,warn_zero_wt);
-}
-
-// Hiero and Joshua use log_10(e) as the value, so I do to
-WordPenalty::WordPenalty(const string& param) :
-  fid_(FD::Convert("WordPenalty")),
-    value_(-1.0 / log(10)) {
-  if (!param.empty()) {
-    cerr << "Warning WordPenalty ignoring parameter: " << param << endl;
-  }
-}
-
-void FeatureFunction::TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                        const Hypergraph::Edge& edge,
-                                        const std::vector<const void*>& ant_states,
-                                        SparseVector<double>* features,
-                                        SparseVector<double>* estimated_features,
-                                        void* state) const {
-  throw std::runtime_error("TraversalFeaturesImpl not implemented - override it or TraversalFeaturesLog.\n");
+void FeatureFunction::TraversalFeaturesImpl(const SentenceMetadata&,
+                                        const Hypergraph::Edge&,
+                                        const std::vector<const void*>&,
+                                        SparseVector<double>*,
+                                        SparseVector<double>*,
+                                        void*) const {
+  cerr << "TraversalFeaturesImpl not implemented - override it or TraversalFeaturesLog\n";
   abort();
 }
 
 
-void WordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                        const Hypergraph::Edge& edge,
-                                        const std::vector<const void*>& ant_states,
-                                        SparseVector<double>* features,
-                                        SparseVector<double>* estimated_features,
-                                        void* state) const {
-  (void) smeta;
-  (void) ant_states;
-  (void) state;
-  (void) estimated_features;
-  features->set_value(fid_, edge.rule_->EWords() * value_);
-}
-
-SourceWordPenalty::SourceWordPenalty(const string& param) :
-    fid_(FD::Convert("SourceWordPenalty")),
-    value_(-1.0 / log(10)) {
-  if (!param.empty()) {
-    cerr << "Warning SourceWordPenalty ignoring parameter: " << param << endl;
-  }
-}
-
-Features SourceWordPenalty::features() const {
-  return single_feature(fid_);
-}
-
-Features WordPenalty::features() const {
-  return single_feature(fid_);
-}
-
-
-void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                        const Hypergraph::Edge& edge,
-                                        const std::vector<const void*>& ant_states,
-                                        SparseVector<double>* features,
-                                        SparseVector<double>* estimated_features,
-                                        void* state) const {
-  (void) smeta;
-  (void) ant_states;
-  (void) state;
-  (void) estimated_features;
-  features->set_value(fid_, edge.rule_->FWords() * value_);
-}
-
-ArityPenalty::ArityPenalty(const std::string& param) :
-    value_(-1.0 / log(10)) {
-  string fname = "Arity_";
-  unsigned MAX=DEFAULT_MAX_ARITY;
-  using namespace boost;
-  if (!param.empty())
-    MAX=lexical_cast<unsigned>(param);
-  for (unsigned i = 0; i <= MAX; ++i) {
-    WordID fid=FD::Convert(fname+lexical_cast<string>(i));
-    fids_.push_back(fid);
-  }
-  while (!fids_.empty() && fids_.back()==0) fids_.pop_back(); // pretty up features vector in case FD was frozen.  doesn't change anything
-}
-
-Features ArityPenalty::features() const {
-  return Features(fids_.begin(),fids_.end());
-}
-
-void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                         const Hypergraph::Edge& edge,
-                                         const std::vector<const void*>& ant_states,
-                                         SparseVector<double>* features,
-                                         SparseVector<double>* estimated_features,
-                                         void* state) const {
-  (void) smeta;
-  (void) ant_states;
-  (void) state;
-  (void) estimated_features;
-  unsigned a=edge.Arity();
-  features->set_value(a<fids_.size()?fids_[a]:0, value_);
-}
-
-ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>& models) :
-    models_(models),
-    weights_(w),
-    state_size_(0),
-    model_state_pos_(models.size()) {
-  for (int i = 0; i < models_.size(); ++i) {
-    model_state_pos_[i] = state_size_;
-    state_size_ += models_[i]->NumBytesContext();
-  }
-}
-
-void ModelSet::PrepareForInput(const SentenceMetadata& smeta) {
-  for (int i = 0; i < models_.size(); ++i)
-    const_cast<FeatureFunction*>(models_[i])->PrepareForInput(smeta);
-}
-
-void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta,
-                                 const Hypergraph& /* hg */,
-                                 const FFStates& node_states,
-                                 Hypergraph::Edge* edge,
-                                 FFState* context,
-                                 prob_t* combination_cost_estimate) const {
-  edge->reset_info();
-  context->resize(state_size_);
-  if (state_size_ > 0) {
-    memset(&(*context)[0], 0, state_size_);
-  }
-  SparseVector<double> est_vals;  // only computed if combination_cost_estimate is non-NULL
-  if (combination_cost_estimate) *combination_cost_estimate = prob_t::One();
-  for (int i = 0; i < models_.size(); ++i) {
-    const FeatureFunction& ff = *models_[i];
-    void* cur_ff_context = NULL;
-    vector<const void*> ants(edge->tail_nodes_.size());
-    bool has_context = ff.NumBytesContext() > 0;
-    if (has_context) {
-      int spos = model_state_pos_[i];
-      cur_ff_context = &(*context)[spos];
-      for (int i = 0; i < ants.size(); ++i) {
-        ants[i] = &node_states[edge->tail_nodes_[i]][spos];
-      }
-    }
-    ff.TraversalFeatures(smeta, *edge, ants, &edge->feature_values_, &est_vals, cur_ff_context);
-  }
-  if (combination_cost_estimate)
-    combination_cost_estimate->logeq(est_vals.dot(weights_));
-  edge->edge_prob_.logeq(edge->feature_values_.dot(weights_));
-}
-
-void ModelSet::AddFinalFeatures(const FFState& state, Hypergraph::Edge* edge,SentenceMetadata const& smeta) const {
-  assert(1 == edge->rule_->Arity());
-  edge->reset_info();
-  for (int i = 0; i < models_.size(); ++i) {
-    const FeatureFunction& ff = *models_[i];
-    const void* ant_state = NULL;
-    bool has_context = ff.NumBytesContext() > 0;
-    if (has_context) {
-      int spos = model_state_pos_[i];
-      ant_state = &state[spos];
-    }
-    ff.FinalTraversalFeatures(smeta, *edge, ant_state, &edge->feature_values_);
-  }
-  edge->edge_prob_.logeq(edge->feature_values_.dot(weights_));
-}
-
diff --git a/decoder/ff.h b/decoder/ff.h
index 6c22d39f..3280592e 100644
--- a/decoder/ff.h
+++ b/decoder/ff.h
@@ -1,79 +1,47 @@
 #ifndef _FF_H_
 #define _FF_H_
 
-#define DEBUG_INIT 0
-#if DEBUG_INIT
-# include <iostream>
-# define DBGINIT(a) do { std::cerr<<a<<"\n"; } while(0)
-#else
-# define DBGINIT(a)
-#endif
-
-#include <stdint.h>
+#include <string>
 #include <vector>
-#include <cstring>
-#include "fdict.h"
-#include "hg.h"
-#include "feature_vector.h"
-#include "value_array.h"
+#include "sparse_vector.h"
 
+namespace HG { struct Edge; struct Node; }
+class Hypergraph;
 class SentenceMetadata;
-class FeatureFunction;  // see definition below
-
-typedef std::vector<WordID> Features; // set of features ids
 
 // if you want to develop a new feature, inherit from this class and
 // override TraversalFeaturesImpl(...).  If it's a feature that returns /
 // depends on context, you may also need to implement
 // FinalTraversalFeatures(...)
 class FeatureFunction {
+  friend class ExternalFeature;
  public:
   std::string name_; // set by FF factory using usage()
-  bool debug_; // also set by FF factory checking param for immediate initial "debug"
-  //called after constructor, but before name_ and debug_ have been set
-  virtual void Init() { DBGINIT("default FF::Init name="<<name_); }
-  virtual void init_name_debug(std::string const& n,bool debug) {
-    name_=n;
-    debug_=debug;
-  }
-  bool debug() const { return debug_; }
   FeatureFunction() : state_size_() {}
   explicit FeatureFunction(int state_size) : state_size_(state_size) {}
   virtual ~FeatureFunction();
   bool IsStateful() const { return state_size_ > 0; }
+  int StateSize() const { return state_size_; }
 
   // override this.  not virtual because we want to expose this to factory template for help before creating a FF
   static std::string usage(bool show_params,bool show_details) {
     return usage_helper("FIXME_feature_needs_name","[no parameters]","[no documentation yet]",show_params,show_details);
   }
   static std::string usage_helper(std::string const& name,std::string const& params,std::string const& details,bool show_params,bool show_details);
-  static Features single_feature(int feat);
-public:
-
-  // stateless feature that doesn't depend on source span: override and return true.  then your feature can be precomputed over rules.
-  virtual bool rule_feature() const { return false; }
 
   // called once, per input, before any feature calls to TraversalFeatures, etc.
   // used to initialize sentence-specific data structures
   virtual void PrepareForInput(const SentenceMetadata& smeta);
 
-  //OVERRIDE THIS:
-  virtual Features features() const { return single_feature(FD::Convert(name_)); }
-  // returns the number of bytes of context that this feature function will
-  // (maximally) use.  By default, 0 ("stateless" models in Hiero/Joshua).
-  // NOTE: this value is fixed for the instance of your class, you cannot
-  // use different amounts of memory for different nodes in the forest.  this will be read as soon as you create a ModelSet, then fixed forever on
-  inline int NumBytesContext() const { return state_size_; }
-
   // Compute the feature values and (if this applies) the estimates of the
   // feature values when this edge is used incorporated into a larger context
   inline void TraversalFeatures(const SentenceMetadata& smeta,
-                                Hypergraph::Edge& edge,
+                                const HG::Edge& edge,
                                 const std::vector<const void*>& ant_contexts,
-                                FeatureVector* features,
-                                FeatureVector* estimated_features,
+                                SparseVector<double>* features,
+                                SparseVector<double>* estimated_features,
                                 void* out_state) const {
-    TraversalFeaturesLog(smeta, edge, ant_contexts,
+    TraversalFeaturesImpl(smeta, edge, ant_contexts,
                           features, estimated_features, out_state);
     // TODO it's easy for careless feature function developers to overwrite
     // the end of their state and clobber someone else's memory.  These bugs
@@ -83,21 +51,10 @@ public:
   }
 
   // if there's some state left when you transition to the goal state, score
-  // it here.  For example, the language model computes the cost of adding
+  // it here.  For example, a language model might the cost of adding
   // <s> and </s>.
-
-protected:
   virtual void FinalTraversalFeatures(const void* residual_state,
-                                      FeatureVector* final_features) const;
-public:
-  //override either this or one of above.
-  virtual void FinalTraversalFeatures(const SentenceMetadata& /* smeta */,
-                                      Hypergraph::Edge& /* edge */, // so you can log()
-                                      const void* residual_state,
-                                      FeatureVector* final_features) const {
-    FinalTraversalFeatures(residual_state,final_features);
-  }
-
+                                      SparseVector<double>* final_features) const;
 
  protected:
   // context is a pointer to a buffer of size NumBytesContext() that the
@@ -107,191 +64,19 @@ public:
   // of the particular FeatureFunction class.  There is one exception:
   // equality of the contents (i.e., memcmp) is required to determine whether
   // two states can be combined.
-
-  // by Log, I mean that the edge is non-const only so you can log to it with INFO_EDGE(edge,msg<<"etc.").  most features don't use this so implement the below.  it has a different name to allow a default implementation without name hiding when inheriting + overriding just 1.
-  virtual void TraversalFeaturesLog(const SentenceMetadata& smeta,
-                                    Hypergraph::Edge& edge, // this is writable only so you can use log()
-                                     const std::vector<const void*>& ant_contexts,
-                                     FeatureVector* features,
-                                     FeatureVector* estimated_features,
-                                     void* context) const {
-    TraversalFeaturesImpl(smeta,edge,ant_contexts,features,estimated_features,context);
-  }
-
-  // override above or below.
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     Hypergraph::Edge const& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
-                                     FeatureVector* features,
-                                     FeatureVector* estimated_features,
+                                     SparseVector<double>* features,
+                                     SparseVector<double>* estimated_features,
                                      void* context) const;
 
   // !!! ONLY call this from subclass *CONSTRUCTORS* !!!
   void SetStateSize(size_t state_size) {
     state_size_ = state_size;
   }
-  int StateSize() const { return state_size_; }
- private:
-  int state_size_;
-};
-
-
-// word penalty feature, for each word on the E side of a rule,
-// add value_
-class WordPenalty : public FeatureFunction {
- public:
-  Features features() const;
-  WordPenalty(const std::string& param);
-  static std::string usage(bool p,bool d) {
-    return usage_helper("WordPenalty","","number of target words (local feature)",p,d);
-  }
-  bool rule_feature() const { return true; }
- protected:
-  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
-                                     const std::vector<const void*>& ant_contexts,
-                                     FeatureVector* features,
-                                     FeatureVector* estimated_features,
-                                     void* context) const;
- private:
-  const int fid_;
-  const double value_;
-};
-
-class SourceWordPenalty : public FeatureFunction {
- public:
-  bool rule_feature() const { return true; }
-  Features features() const;
-  SourceWordPenalty(const std::string& param);
-  static std::string usage(bool p,bool d) {
-    return usage_helper("SourceWordPenalty","","number of source words (local feature, and meaningless except when input has non-constant number of source words, e.g. segmentation/morphology/speech recognition lattice)",p,d);
-  }
- protected:
-  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
-                                     const std::vector<const void*>& ant_contexts,
-                                     FeatureVector* features,
-                                     FeatureVector* estimated_features,
-                                     void* context) const;
- private:
-  const int fid_;
-  const double value_;
-};
-
-#define DEFAULT_MAX_ARITY 9
-#define DEFAULT_MAX_ARITY_STRINGIZE(x) #x
-#define DEFAULT_MAX_ARITY_STRINGIZE_EVAL(x) DEFAULT_MAX_ARITY_STRINGIZE(x)
-#define DEFAULT_MAX_ARITY_STR DEFAULT_MAX_ARITY_STRINGIZE_EVAL(DEFAULT_MAX_ARITY)
-
-class ArityPenalty : public FeatureFunction {
- public:
-  bool rule_feature() const { return true; }
-  Features features() const;
-  ArityPenalty(const std::string& param);
-  static std::string usage(bool p,bool d) {
-    return usage_helper("ArityPenalty","[MaxArity(default " DEFAULT_MAX_ARITY_STR ")]","Indicator feature Arity_N=1 for rule of arity N (local feature).  0<=N<=MaxArity(default " DEFAULT_MAX_ARITY_STR ")",p,d);
-  }
-
- protected:
-  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
-                                     const std::vector<const void*>& ant_contexts,
-                                     FeatureVector* features,
-                                     FeatureVector* estimated_features,
-                                     void* context) const;
- private:
-  std::vector<WordID> fids_;
-  const double value_;
-};
-
-void show_features(Features const& features,DenseWeightVector const& weights,std::ostream &out,std::ostream &warn,bool warn_zero_wt=true); //show features and weights
-
-template <class FFp>
-Features all_features(std::vector<FFp> const& models_,DenseWeightVector &weights_,std::ostream *warn=0,bool warn_fid_0=false) {
-  using namespace std;
-  Features ffs;
-#define WARNFF(x) do { if (warn) { *warn << "WARNING: "<< x << endl; } } while(0)
-  typedef map<WordID,string> FFM;
-  FFM ff_from;
-  for (unsigned i=0;i<models_.size();++i) {
-    string const& ffname=models_[i]->name_;
-    Features si=models_[i]->features();
-    if (si.empty()) {
-      WARNFF(ffname<<" doesn't yet report any feature IDs - either supply feature weight, or use --no_freeze_feature_set, or implement features() method");
-    }
-    unsigned n0=0;
-    for (unsigned j=0;j<si.size();++j) {
-      WordID fid=si[j];
-      if (!fid) ++n0;
-      if (fid >= weights_.size())
-        weights_.resize(fid+1);
-      if (warn_fid_0 || fid) {
-        pair<FFM::iterator,bool> i_new=ff_from.insert(FFM::value_type(fid,ffname));
-        if (i_new.second) {
-          if (fid)
-            ffs.push_back(fid);
-          else
-            WARNFF("Feature id 0 for "<<ffname<<" (models["<<i<<"]) - probably no weight provided.  Don't freeze feature ids to see the name");
-        } else {
-          WARNFF(ffname<<" (models["<<i<<"]) tried to define feature "<<FD::Convert(fid)<<" already defined earlier by "<<i_new.first->second);
-        }
-      }
-    }
-    if (n0)
-      WARNFF(ffname<<" (models["<<i<<"]) had "<<n0<<" unused features (--no_freeze_feature_set to see them)");
-  }
-  return ffs;
-#undef WARNFF
-}
-
-template <class FFp>
-void show_all_features(std::vector<FFp> const& models_,DenseWeightVector &weights_,std::ostream &out,std::ostream &warn,bool warn_fid_0=true,bool warn_zero_wt=true) {
-  return show_features(all_features(models_,weights_,&warn,warn_fid_0),weights_,out,warn,warn_zero_wt);
-}
-
-typedef ValueArray<uint8_t> FFState; // this is about 10% faster than string.
-//typedef std::string FFState;
-
-//FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation.  use ValueArray instead? (higher performance perhaps, save a word due to fixed size)
-typedef std::vector<FFState> FFStates;
-
-// this class is a set of FeatureFunctions that can be used to score, rescore,
-// etc. a (translation?) forest
-class ModelSet {
- public:
-  ModelSet(const std::vector<double>& weights,
-           const std::vector<const FeatureFunction*>& models);
-
-  // sets edge->feature_values_ and edge->edge_prob_
-  // NOTE: edge must not necessarily be in hg.edges_ but its TAIL nodes
-  // must be.  edge features are supposed to be overwritten, not added to (possibly because rule features aren't in ModelSet so need to be left alone
-  void AddFeaturesToEdge(const SentenceMetadata& smeta,
-                         const Hypergraph& hg,
-                         const FFStates& node_states,
-                         Hypergraph::Edge* edge,
-                         FFState* residual_context,
-                         prob_t* combination_cost_estimate = NULL) const;
-
-  //this is called INSTEAD of above when result of edge is goal (must be a unary rule - i.e. one variable, but typically it's assumed that there are no target terminals either (e.g. for LM))
-  void AddFinalFeatures(const FFState& residual_context,
-                        Hypergraph::Edge* edge,
-                        SentenceMetadata const& smeta) const;
-
-  // this is called once before any feature functions apply to a hypergraph
-  // it can be used to initialize sentence-specific data structures
-  void PrepareForInput(const SentenceMetadata& smeta);
-
-  bool empty() const { return models_.empty(); }
-
-  bool stateless() const { return !state_size_; }
-  Features all_features(std::ostream *warnings=0,bool warn_fid_zero=false); // this will warn about duplicate features as well (one function overwrites the feature of another).  also resizes weights_ so it is large enough to hold the (0) weight for the largest reported feature id.  since 0 is a NULL feature id, it's never included.  if warn_fid_zero, then even the first 0 id is
-  void show_features(std::ostream &out,std::ostream &warn,bool warn_zero_wt=true);
-
  private:
-  std::vector<const FeatureFunction*> models_;
-  const std::vector<double>& weights_;
   int state_size_;
-  std::vector<int> model_state_pos_;
 };
 
 #endif
diff --git a/decoder/ff_basic.cc b/decoder/ff_basic.cc
new file mode 100644
index 00000000..f9404d24
--- /dev/null
+++ b/decoder/ff_basic.cc
@@ -0,0 +1,80 @@
+#include "ff_basic.h"
+
+#include "fast_lexical_cast.hpp"
+#include "hg.h"
+
+using namespace std;
+
+// Hiero and Joshua use log_10(e) as the value, so I do to
+WordPenalty::WordPenalty(const string& param) :
+  fid_(FD::Convert("WordPenalty")),
+    value_(-1.0 / log(10)) {
+  if (!param.empty()) {
+    cerr << "Warning WordPenalty ignoring parameter: " << param << endl;
+  }
+}
+
+void WordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+                                        const Hypergraph::Edge& edge,
+                                        const std::vector<const void*>& ant_states,
+                                        SparseVector<double>* features,
+                                        SparseVector<double>* estimated_features,
+                                        void* state) const {
+  (void) smeta;
+  (void) ant_states;
+  (void) state;
+  (void) estimated_features;
+  features->set_value(fid_, edge.rule_->EWords() * value_);
+}
+
+
+SourceWordPenalty::SourceWordPenalty(const string& param) :
+    fid_(FD::Convert("SourceWordPenalty")),
+    value_(-1.0 / log(10)) {
+  if (!param.empty()) {
+    cerr << "Warning SourceWordPenalty ignoring parameter: " << param << endl;
+  }
+}
+
+void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+                                        const Hypergraph::Edge& edge,
+                                        const std::vector<const void*>& ant_states,
+                                        SparseVector<double>* features,
+                                        SparseVector<double>* estimated_features,
+                                        void* state) const {
+  (void) smeta;
+  (void) ant_states;
+  (void) state;
+  (void) estimated_features;
+  features->set_value(fid_, edge.rule_->FWords() * value_);
+}
+
+
+ArityPenalty::ArityPenalty(const std::string& param) :
+    value_(-1.0 / log(10)) {
+  string fname = "Arity_";
+  unsigned MAX=DEFAULT_MAX_ARITY;
+  using namespace boost;
+  if (!param.empty())
+    MAX=lexical_cast<unsigned>(param);
+  for (unsigned i = 0; i <= MAX; ++i) {
+    WordID fid=FD::Convert(fname+lexical_cast<string>(i));
+    fids_.push_back(fid);
+  }
+  while (!fids_.empty() && fids_.back()==0) fids_.pop_back(); // pretty up features vector in case FD was frozen.  doesn't change anything
+}
+
+void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+                                         const Hypergraph::Edge& edge,
+                                         const std::vector<const void*>& ant_states,
+                                         SparseVector<double>* features,
+                                         SparseVector<double>* estimated_features,
+                                         void* state) const {
+  (void) smeta;
+  (void) ant_states;
+  (void) state;
+  (void) estimated_features;
+  unsigned a=edge.Arity();
+  features->set_value(a<fids_.size()?fids_[a]:0, value_);
+}
+
diff --git a/decoder/ff_basic.h b/decoder/ff_basic.h
new file mode 100644
index 00000000..901c0110
--- /dev/null
+++ b/decoder/ff_basic.h
@@ -0,0 +1,68 @@
+#ifndef _FF_BASIC_H_
+#define _FF_BASIC_H_
+
+#include "ff.h"
+
+// word penalty feature, for each word on the E side of a rule,
+// add value_
+class WordPenalty : public FeatureFunction {
+ public:
+  WordPenalty(const std::string& param);
+  static std::string usage(bool p,bool d) {
+    return usage_helper("WordPenalty","","number of target words (local feature)",p,d);
+  }
+ protected:
+  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+                                     const HG::Edge& edge,
+                                     const std::vector<const void*>& ant_contexts,
+                                     SparseVector<double>* features,
+                                     SparseVector<double>* estimated_features,
+                                     void* context) const;
+ private:
+  const int fid_;
+  const double value_;
+};
+
+class SourceWordPenalty : public FeatureFunction {
+ public:
+  SourceWordPenalty(const std::string& param);
+  static std::string usage(bool p,bool d) {
+    return usage_helper("SourceWordPenalty","","number of source words (local feature, and meaningless except when input has non-constant number of source words, e.g. segmentation/morphology/speech recognition lattice)",p,d);
+  }
+ protected:
+  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+                                     const HG::Edge& edge,
+                                     const std::vector<const void*>& ant_contexts,
+                                     SparseVector<double>* features,
+                                     SparseVector<double>* estimated_features,
+                                     void* context) const;
+ private:
+  const int fid_;
+  const double value_;
+};
+
+#define DEFAULT_MAX_ARITY 9
+#define DEFAULT_MAX_ARITY_STRINGIZE(x) #x
+#define DEFAULT_MAX_ARITY_STRINGIZE_EVAL(x) DEFAULT_MAX_ARITY_STRINGIZE(x)
+#define DEFAULT_MAX_ARITY_STR DEFAULT_MAX_ARITY_STRINGIZE_EVAL(DEFAULT_MAX_ARITY)
+
+class ArityPenalty : public FeatureFunction {
+ public:
+  ArityPenalty(const std::string& param);
+  static std::string usage(bool p,bool d) {
+    return usage_helper("ArityPenalty","[MaxArity(default " DEFAULT_MAX_ARITY_STR ")]","Indicator feature Arity_N=1 for rule of arity N (local feature).  0<=N<=MaxArity(default " DEFAULT_MAX_ARITY_STR ")",p,d);
+  }
+
+ protected:
+  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+                                     const HG::Edge& edge,
+                                     const std::vector<const void*>& ant_contexts,
+                                     SparseVector<double>* features,
+                                     SparseVector<double>* estimated_features,
+                                     void* context) const;
+ private:
+  std::vector<WordID> fids_;
+  const double value_;
+};
+
+#endif
diff --git a/decoder/ff_bleu.h b/decoder/ff_bleu.h
index 5544920e..344dc788 100644
--- a/decoder/ff_bleu.h
+++ b/decoder/ff_bleu.h
@@ -20,7 +20,7 @@ class BLEUModel : public FeatureFunction {
   static std::string usage(bool param,bool verbose);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
diff --git a/decoder/ff_charset.cc b/decoder/ff_charset.cc
index 472de82b..6429088b 100644
--- a/decoder/ff_charset.cc
+++ b/decoder/ff_charset.cc
@@ -1,5 +1,7 @@
 #include "ff_charset.h"
 
+#include "tdict.h"
+#include "hg.h"
 #include "fdict.h"
 #include "stringlib.h"
 
@@ -20,8 +22,8 @@ bool ContainsNonLatin(const string& word) {
 void NonLatinCount::TraversalFeaturesImpl(const SentenceMetadata& smeta,
                                           const Hypergraph::Edge& edge,
                                           const std::vector<const void*>& ant_contexts,
-                                          FeatureVector* features,
-                                          FeatureVector* estimated_features,
+                                          SparseVector<double>* features,
+                                          SparseVector<double>* estimated_features,
                                           void* context) const {
   const vector<WordID>& e = edge.rule_->e();
   int count = 0;
diff --git a/decoder/ff_charset.h b/decoder/ff_charset.h
index b1ad537e..267ef65d 100644
--- a/decoder/ff_charset.h
+++ b/decoder/ff_charset.h
@@ -13,10 +13,10 @@ class NonLatinCount : public FeatureFunction {
   NonLatinCount(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
-                                     FeatureVector* features,
-                                     FeatureVector* estimated_features,
+                                     SparseVector<double>* features,
+                                     SparseVector<double>* estimated_features,
                                      void* context) const;
  private:
   mutable std::map<WordID, bool> is_non_latin_;
diff --git a/decoder/ff_context.cc b/decoder/ff_context.cc
index 9de4d737..f2b0e67c 100644
--- a/decoder/ff_context.cc
+++ b/decoder/ff_context.cc
@@ -5,12 +5,14 @@
 #include <cassert>
 #include <cmath>
 
+#include "hg.h"
 #include "filelib.h"
 #include "stringlib.h"
 #include "sentence_metadata.h"
 #include "lattice.h"
 #include "fdict.h"
 #include "verbose.h"
+#include "tdict.h"
 
 RuleContextFeatures::RuleContextFeatures(const string& param) {
   //  cerr << "initializing RuleContextFeatures with parameters: " << param;
diff --git a/decoder/ff_context.h b/decoder/ff_context.h
index 89bcb557..19198ec3 100644
--- a/decoder/ff_context.h
+++ b/decoder/ff_context.h
@@ -14,7 +14,7 @@ class RuleContextFeatures : public FeatureFunction {
   RuleContextFeatures(const string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc
index 252dbf8c..e6f78f84 100644
--- a/decoder/ff_csplit.cc
+++ b/decoder/ff_csplit.cc
@@ -5,6 +5,7 @@
 
 #include "klm/lm/model.hh"
 
+#include "hg.h"
 #include "sentence_metadata.h"
 #include "lattice.h"
 #include "tdict.h"
diff --git a/decoder/ff_csplit.h b/decoder/ff_csplit.h
index 38c0c5b8..64d42526 100644
--- a/decoder/ff_csplit.h
+++ b/decoder/ff_csplit.h
@@ -12,7 +12,7 @@ class BasicCSplitFeatures : public FeatureFunction {
   BasicCSplitFeatures(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
@@ -27,7 +27,7 @@ class ReverseCharLMCSplitFeature : public FeatureFunction {
   ReverseCharLMCSplitFeature(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
diff --git a/decoder/ff_dwarf.cc b/decoder/ff_dwarf.cc
index 43528405..fe7a472e 100644
--- a/decoder/ff_dwarf.cc
+++ b/decoder/ff_dwarf.cc
@@ -4,6 +4,7 @@
 #include <string>
 #include <iostream>
 #include <map>
+#include "hg.h"
 #include "ff_dwarf.h"
 #include "dwarf.h"
 #include "wordid.h"
diff --git a/decoder/ff_dwarf.h b/decoder/ff_dwarf.h
index 083fcc7c..3d6a7da6 100644
--- a/decoder/ff_dwarf.h
+++ b/decoder/ff_dwarf.h
@@ -56,7 +56,7 @@ class Dwarf : public FeatureFunction {
         function word alignments set by 3.
   */ 
   void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
diff --git a/decoder/ff_external.cc b/decoder/ff_external.cc
new file mode 100644
index 00000000..dea0e20f
--- /dev/null
+++ b/decoder/ff_external.cc
@@ -0,0 +1,60 @@
+#include "ff_external.h"
+
+#include <dlfcn.h>
+
+#include "stringlib.h"
+#include "hg.h"
+
+using namespace std;
+
+ExternalFeature::ExternalFeature(const string& param) {
+  size_t pos = param.find(' ');
+  string nparam;
+  string file = param;
+  if (pos < param.size()) {
+    nparam = Trim(param.substr(pos + 1));
+    file = param.substr(0, pos);
+  }
+  if (file.size() < 1) {
+    cerr << "External requires a path to a dynamic library!\n";
+    abort();
+  }
+  lib_handle = dlopen(file.c_str(), RTLD_LAZY);
+  if (!lib_handle) {
+    cerr << "dlopen reports: " << dlerror() << endl;
+    cerr << "Did you provide a full path to the dynamic library?\n";
+    abort();
+  }
+  FeatureFunction* (*fn)(const string&) =
+    (FeatureFunction* (*)(const string&))(dlsym(lib_handle, "create_ff"));
+  if (!fn) {
+    cerr << "dlsym reports: " << dlerror() << endl;
+    abort();
+  }
+  ff_ext = (*fn)(nparam);
+  SetStateSize(ff_ext->StateSize());
+}
+
+ExternalFeature::~ExternalFeature() {
+  delete ff_ext;
+  dlclose(lib_handle);
+}
+
+void ExternalFeature::PrepareForInput(const SentenceMetadata& smeta) {
+  ff_ext->PrepareForInput(smeta);
+}
+
+void ExternalFeature::FinalTraversalFeatures(const void* context,
+                                             SparseVector<double>* features) const {
+  ff_ext->FinalTraversalFeatures(context, features);
+}
+
+void ExternalFeature::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+                                     const Hypergraph::Edge& edge,
+                                     const std::vector<const void*>& ant_contexts,
+                                     SparseVector<double>* features,
+                                     SparseVector<double>* estimated_features,
+                                     void* context) const {
+  ff_ext->TraversalFeaturesImpl(smeta, edge, ant_contexts, features, estimated_features, context);
+}
+
diff --git a/decoder/ff_external.h b/decoder/ff_external.h
new file mode 100644
index 00000000..3e2bee51
--- /dev/null
+++ b/decoder/ff_external.h
@@ -0,0 +1,26 @@
+#ifndef _FFEXTERNAL_H_
+#define _FFEXTERNAL_H_
+
+#include "ff.h"
+
+// dynamically loaded feature function
+class ExternalFeature : public FeatureFunction {
+ public:
+  ExternalFeature(const std::string& param);
+  ~ExternalFeature();
+  virtual void PrepareForInput(const SentenceMetadata& smeta);
+  virtual void FinalTraversalFeatures(const void* context,
+                                      SparseVector<double>* features) const;
+ protected:
+  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+                                     const HG::Edge& edge,
+                                     const std::vector<const void*>& ant_contexts,
+                                     SparseVector<double>* features,
+                                     SparseVector<double>* estimated_features,
+                                     void* context) const;
+ private:
+  void* lib_handle;
+  FeatureFunction* ff_ext;
+};
+
+#endif
diff --git a/decoder/ff_factory.h b/decoder/ff_factory.h
index 5eb68c8b..bfdd3257 100644
--- a/decoder/ff_factory.h
+++ b/decoder/ff_factory.h
@@ -43,7 +43,6 @@ template<class FF>
 struct FFFactory : public FactoryBase<FeatureFunction> {
   FP Create(std::string param) const {
     FF *ret=new FF(param);
-    ret->Init();
     return FP(ret);
   }
   virtual std::string usage(bool params,bool verbose) const {
@@ -57,7 +56,6 @@ template<class FF>
 struct FsaFactory : public FactoryBase<FsaFeatureFunction> {
   FP Create(std::string param) const {
     FF *ret=new FF(param);
-    ret->Init();
     return FP(ret);
   }
   virtual std::string usage(bool params,bool verbose) const {
@@ -98,8 +96,6 @@ struct FactoryRegistry : public UntypedFactoryRegistry {
     if (debug)
       cerr<<"debug enabled for "<<ffname<< " - remaining options: '"<<param<<"'\n";
     FP res = dynamic_cast<FB const&>(*it->second).Create(param);
-    res->init_name_debug(ffname,debug);
-    // could add a res->Init() here instead of in Create if we wanted feature id to potentially differ based on the registered name rather than static usage() - of course, specific feature ids can be computed on the basis of feature param as well; this only affects the default single feature id=name
     return res;
   }
 };
diff --git a/decoder/ff_klm.cc b/decoder/ff_klm.cc
index 09ef282c..fefa90bd 100644
--- a/decoder/ff_klm.cc
+++ b/decoder/ff_klm.cc
@@ -327,11 +327,6 @@ KLanguageModel<Model>::KLanguageModel(const string& param) {
 }
 
 template <class Model>
-Features KLanguageModel<Model>::features() const {
-  return single_feature(fid_);
-}
-
-template <class Model>
 KLanguageModel<Model>::~KLanguageModel() {
   delete pimpl_;
 }
@@ -362,7 +357,6 @@ void KLanguageModel<Model>::FinalTraversalFeatures(const void* ant_state,
 
 template <class Model> boost::shared_ptr<FeatureFunction> CreateModel(const std::string &param) {
   KLanguageModel<Model> *ret = new KLanguageModel<Model>(param);
-  ret->Init();
   return boost::shared_ptr<FeatureFunction>(ret);
 }
 
diff --git a/decoder/ff_klm.h b/decoder/ff_klm.h
index 6efe50f6..b5ceffd0 100644
--- a/decoder/ff_klm.h
+++ b/decoder/ff_klm.h
@@ -20,10 +20,9 @@ class KLanguageModel : public FeatureFunction {
   virtual void FinalTraversalFeatures(const void* context,
                                       SparseVector<double>* features) const;
   static std::string usage(bool param,bool verbose);
-  Features features() const;
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc
index 5e16d4e3..6ec7b4f3 100644
--- a/decoder/ff_lm.cc
+++ b/decoder/ff_lm.cc
@@ -519,10 +519,6 @@ LanguageModel::LanguageModel(const string& param) {
   SetStateSize(LanguageModelImpl::OrderToStateSize(order));
 }
 
-Features LanguageModel::features() const {
-  return single_feature(fid_);
-}
-
 LanguageModel::~LanguageModel() {
   delete pimpl_;
 }
diff --git a/decoder/ff_lm.h b/decoder/ff_lm.h
index ccee4268..94e18f00 100644
--- a/decoder/ff_lm.h
+++ b/decoder/ff_lm.h
@@ -55,10 +55,9 @@ class LanguageModel : public FeatureFunction {
                                       SparseVector<double>* features) const;
   std::string DebugStateToString(const void* state) const;
   static std::string usage(bool param,bool verbose);
-  Features features() const;
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
@@ -81,7 +80,7 @@ class LanguageModelRandLM : public FeatureFunction {
   std::string DebugStateToString(const void* state) const;
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
diff --git a/decoder/ff_ngrams.h b/decoder/ff_ngrams.h
index 064dbb49..4965d235 100644
--- a/decoder/ff_ngrams.h
+++ b/decoder/ff_ngrams.h
@@ -17,7 +17,7 @@ class NgramDetector : public FeatureFunction {
                                       SparseVector<double>* features) const;
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
diff --git a/decoder/ff_rules.cc b/decoder/ff_rules.cc
index bd4c4cc0..0aafb0ba 100644
--- a/decoder/ff_rules.cc
+++ b/decoder/ff_rules.cc
@@ -10,6 +10,8 @@
 #include "lattice.h"
 #include "fdict.h"
 #include "verbose.h"
+#include "tdict.h"
+#include "hg.h"
 
 using namespace std;
 
diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h
index 48d8bd05..7f5e1dfa 100644
--- a/decoder/ff_rules.h
+++ b/decoder/ff_rules.h
@@ -3,6 +3,7 @@
 
 #include <vector>
 #include <map>
+#include "trule.h"
 #include "ff.h"
 #include "array2d.h"
 #include "wordid.h"
@@ -12,7 +13,7 @@ class RuleIdentityFeatures : public FeatureFunction {
   RuleIdentityFeatures(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
@@ -27,7 +28,7 @@ class RuleNgramFeatures : public FeatureFunction {
   RuleNgramFeatures(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
diff --git a/decoder/ff_ruleshape.cc b/decoder/ff_ruleshape.cc
index f56ccfa9..7bb548c4 100644
--- a/decoder/ff_ruleshape.cc
+++ b/decoder/ff_ruleshape.cc
@@ -1,5 +1,7 @@
 #include "ff_ruleshape.h"
 
+#include "trule.h"
+#include "hg.h"
 #include "fdict.h"
 #include <sstream>
 
diff --git a/decoder/ff_ruleshape.h b/decoder/ff_ruleshape.h
index 23c9827e..9f20faf3 100644
--- a/decoder/ff_ruleshape.h
+++ b/decoder/ff_ruleshape.h
@@ -9,7 +9,7 @@ class RuleShapeFeatures : public FeatureFunction {
   RuleShapeFeatures(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
diff --git a/decoder/ff_source_syntax.cc b/decoder/ff_source_syntax.cc
index 035132b4..a1997695 100644
--- a/decoder/ff_source_syntax.cc
+++ b/decoder/ff_source_syntax.cc
@@ -3,6 +3,7 @@
 #include <sstream>
 #include <stack>
 
+#include "hg.h"
 #include "sentence_metadata.h"
 #include "array2d.h"
 #include "filelib.h"
diff --git a/decoder/ff_source_syntax.h b/decoder/ff_source_syntax.h
index 279563e1..a8c7150a 100644
--- a/decoder/ff_source_syntax.h
+++ b/decoder/ff_source_syntax.h
@@ -11,7 +11,7 @@ class SourceSyntaxFeatures : public FeatureFunction {
   ~SourceSyntaxFeatures();
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
@@ -28,7 +28,7 @@ class SourceSpanSizeFeatures : public FeatureFunction {
   ~SourceSpanSizeFeatures();
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
diff --git a/decoder/ff_spans.cc b/decoder/ff_spans.cc
index 0483517b..0ccac69b 100644
--- a/decoder/ff_spans.cc
+++ b/decoder/ff_spans.cc
@@ -4,6 +4,8 @@
 #include <cassert>
 #include <cmath>
 
+#include "hg.h"
+#include "tdict.h"
 #include "filelib.h"
 #include "stringlib.h"
 #include "sentence_metadata.h"
diff --git a/decoder/ff_spans.h b/decoder/ff_spans.h
index 24e0dede..d2f5e84c 100644
--- a/decoder/ff_spans.h
+++ b/decoder/ff_spans.h
@@ -12,7 +12,7 @@ class SpanFeatures : public FeatureFunction {
   SpanFeatures(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
@@ -49,7 +49,7 @@ class CMR2008ReorderingFeatures : public FeatureFunction {
   CMR2008ReorderingFeatures(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
diff --git a/decoder/ff_tagger.cc b/decoder/ff_tagger.cc
index fd9210fa..7f9af9cd 100644
--- a/decoder/ff_tagger.cc
+++ b/decoder/ff_tagger.cc
@@ -2,6 +2,7 @@
 
 #include <sstream>
 
+#include "hg.h"
 #include "tdict.h"
 #include "sentence_metadata.h"
 #include "stringlib.h"
diff --git a/decoder/ff_tagger.h b/decoder/ff_tagger.h
index bd5b62c0..46418b0c 100644
--- a/decoder/ff_tagger.h
+++ b/decoder/ff_tagger.h
@@ -18,7 +18,7 @@ class Tagger_BigramIndicator : public FeatureFunction {
   Tagger_BigramIndicator(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
@@ -39,7 +39,7 @@ class LexicalPairIndicator : public FeatureFunction {
   virtual void PrepareForInput(const SentenceMetadata& smeta);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
@@ -59,7 +59,7 @@ class OutputIndicator : public FeatureFunction {
   OutputIndicator(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h
index d7a2dda8..ba3d0b9b 100644
--- a/decoder/ff_wordalign.h
+++ b/decoder/ff_wordalign.h
@@ -13,7 +13,7 @@ class RelativeSentencePosition : public FeatureFunction {
   RelativeSentencePosition(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
@@ -36,7 +36,7 @@ class SourceBigram : public FeatureFunction {
   void PrepareForInput(const SentenceMetadata& smeta);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
@@ -55,7 +55,7 @@ class LexNullJump : public FeatureFunction {
   LexNullJump(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
@@ -72,7 +72,7 @@ class NewJump : public FeatureFunction {
   NewJump(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
@@ -109,7 +109,7 @@ class LexicalTranslationTrigger : public FeatureFunction {
   LexicalTranslationTrigger(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
@@ -132,14 +132,14 @@ class BlunsomSynchronousParseHack : public FeatureFunction {
   BlunsomSynchronousParseHack(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
                                      void* out_context) const;
  private:
   inline bool DoesNotBelong(const void* state) const {
-    for (int i = 0; i < NumBytesContext(); ++i) {
+    for (int i = 0; i < StateSize(); ++i) {
       if (*(static_cast<const unsigned char*>(state) + i)) return false;
     }
     return true;
@@ -148,9 +148,9 @@ class BlunsomSynchronousParseHack : public FeatureFunction {
   inline void AppendAntecedentString(const void* state, std::vector<WordID>* yield) const {
     int i = 0;
     int ind = 0;
-    while (i < NumBytesContext() && !(*(static_cast<const unsigned char*>(state) + i))) { ++i; ind += 8; }
-    // std::cerr << i << " " << NumBytesContext() << std::endl;
-    assert(i != NumBytesContext());
+    while (i < StateSize() && !(*(static_cast<const unsigned char*>(state) + i))) { ++i; ind += 8; }
+    // std::cerr << i << " " << StateSize() << std::endl;
+    assert(i != StateSize());
     assert(ind < cur_ref_->size());
     int cur = *(static_cast<const unsigned char*>(state) + i);
     int comp = 1;
@@ -171,7 +171,7 @@ class BlunsomSynchronousParseHack : public FeatureFunction {
   }
 
   inline void SetStateMask(int start, int end, void* state) const {
-    assert((end / 8) < NumBytesContext());
+    assert((end / 8) < StateSize());
     int i = 0;
     int comp = 1;
     for (int j = 0; j < start; ++j) {
@@ -209,7 +209,7 @@ class WordPairFeatures : public FeatureFunction {
   WordPairFeatures(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
@@ -226,7 +226,7 @@ class IdentityCycleDetector : public FeatureFunction {
   IdentityCycleDetector(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
@@ -242,7 +242,7 @@ class InputIndicator : public FeatureFunction {
   InputIndicator(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
@@ -258,7 +258,7 @@ class Fertility : public FeatureFunction {
   Fertility(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
diff --git a/decoder/ff_wordset.cc b/decoder/ff_wordset.cc
index 44468899..70cea7de 100644
--- a/decoder/ff_wordset.cc
+++ b/decoder/ff_wordset.cc
@@ -1,5 +1,6 @@
 #include "ff_wordset.h"
 
+#include "hg.h"
 #include "fdict.h"
 #include <sstream>
 #include <iostream>
diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h
index 7c9a3fb7..639e1514 100644
--- a/decoder/ff_wordset.h
+++ b/decoder/ff_wordset.h
@@ -2,6 +2,7 @@
 #define _FF_WORDSET_H_
 
 #include "ff.h"
+#include "tdict.h"
 
 #include <tr1/unordered_set>
 #include <boost/algorithm/string.hpp>
@@ -32,11 +33,9 @@ class WordSet : public FeatureFunction {
   ~WordSet() {
   }
 
-  Features features() const { return single_feature(fid_); }
-
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
diff --git a/decoder/ffset.cc b/decoder/ffset.cc
new file mode 100644
index 00000000..5820f421
--- /dev/null
+++ b/decoder/ffset.cc
@@ -0,0 +1,72 @@
+#include "ffset.h"
+
+#include "ff.h"
+#include "tdict.h"
+#include "hg.h"
+
+using namespace std;
+
+ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>& models) :
+    models_(models),
+    weights_(w),
+    state_size_(0),
+    model_state_pos_(models.size()) {
+  for (int i = 0; i < models_.size(); ++i) {
+    model_state_pos_[i] = state_size_;
+    state_size_ += models_[i]->StateSize();
+  }
+}
+
+void ModelSet::PrepareForInput(const SentenceMetadata& smeta) {
+  for (int i = 0; i < models_.size(); ++i)
+    const_cast<FeatureFunction*>(models_[i])->PrepareForInput(smeta);
+}
+
+void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta,
+                                 const Hypergraph& /* hg */,
+                                 const FFStates& node_states,
+                                 HG::Edge* edge,
+                                 FFState* context,
+                                 prob_t* combination_cost_estimate) const {
+  //edge->reset_info();
+  context->resize(state_size_);
+  if (state_size_ > 0) {
+    memset(&(*context)[0], 0, state_size_);
+  }
+  SparseVector<double> est_vals;  // only computed if combination_cost_estimate is non-NULL
+  if (combination_cost_estimate) *combination_cost_estimate = prob_t::One();
+  for (int i = 0; i < models_.size(); ++i) {
+    const FeatureFunction& ff = *models_[i];
+    void* cur_ff_context = NULL;
+    vector<const void*> ants(edge->tail_nodes_.size());
+    bool has_context = ff.StateSize() > 0;
+    if (has_context) {
+      int spos = model_state_pos_[i];
+      cur_ff_context = &(*context)[spos];
+      for (int i = 0; i < ants.size(); ++i) {
+        ants[i] = &node_states[edge->tail_nodes_[i]][spos];
+      }
+    }
+    ff.TraversalFeatures(smeta, *edge, ants, &edge->feature_values_, &est_vals, cur_ff_context);
+  }
+  if (combination_cost_estimate)
+    combination_cost_estimate->logeq(est_vals.dot(weights_));
+  edge->edge_prob_.logeq(edge->feature_values_.dot(weights_));
+}
+
+void ModelSet::AddFinalFeatures(const FFState& state, HG::Edge* edge,SentenceMetadata const& smeta) const {
+  assert(1 == edge->rule_->Arity());
+  //edge->reset_info();
+  for (int i = 0; i < models_.size(); ++i) {
+    const FeatureFunction& ff = *models_[i];
+    const void* ant_state = NULL;
+    bool has_context = ff.StateSize() > 0;
+    if (has_context) {
+      int spos = model_state_pos_[i];
+      ant_state = &state[spos];
+    }
+    ff.FinalTraversalFeatures(ant_state, &edge->feature_values_);
+  }
+  edge->edge_prob_.logeq(edge->feature_values_.dot(weights_));
+}
+
diff --git a/decoder/ffset.h b/decoder/ffset.h
new file mode 100644
index 00000000..28aef667
--- /dev/null
+++ b/decoder/ffset.h
@@ -0,0 +1,57 @@
+#ifndef _FFSET_H_
+#define _FFSET_H_
+
+#include <vector>
+#include "value_array.h"
+#include "prob.h"
+
+namespace HG { struct Edge; struct Node; }
+class Hypergraph;
+class FeatureFunction;
+class SentenceMetadata;
+class FeatureFunction;  // see definition below
+
+// TODO let states be dynamically sized
+typedef ValueArray<uint8_t> FFState; // this is a fixed array, but about 10% faster than string
+
+//FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation.  use ValueArray instead? (higher performance perhaps, save a word due to fixed size)
+typedef std::vector<FFState> FFStates;
+
+// this class is a set of FeatureFunctions that can be used to score, rescore,
+// etc. a (translation?) forest
+class ModelSet {
+ public:
+  ModelSet(const std::vector<double>& weights,
+           const std::vector<const FeatureFunction*>& models);
+
+  // sets edge->feature_values_ and edge->edge_prob_
+  // NOTE: edge must not necessarily be in hg.edges_ but its TAIL nodes
+  // must be.  edge features are supposed to be overwritten, not added to (possibly because rule features aren't in ModelSet so need to be left alone
+  void AddFeaturesToEdge(const SentenceMetadata& smeta,
+                         const Hypergraph& hg,
+                         const FFStates& node_states,
+                         HG::Edge* edge,
+                         FFState* residual_context,
+                         prob_t* combination_cost_estimate = NULL) const;
+
+  //this is called INSTEAD of above when result of edge is goal (must be a unary rule - i.e. one variable, but typically it's assumed that there are no target terminals either (e.g. for LM))
+  void AddFinalFeatures(const FFState& residual_context,
+                        HG::Edge* edge,
+                        SentenceMetadata const& smeta) const;
+
+  // this is called once before any feature functions apply to a hypergraph
+  // it can be used to initialize sentence-specific data structures
+  void PrepareForInput(const SentenceMetadata& smeta);
+
+  bool empty() const { return models_.empty(); }
+
+  bool stateless() const { return !state_size_; }
+
+ private:
+  std::vector<const FeatureFunction*> models_;
+  const std::vector<double>& weights_;
+  int state_size_;
+  std::vector<int> model_state_pos_;
+};
+
+#endif
diff --git a/decoder/grammar_test.cc b/decoder/grammar_test.cc
index 4500490a..912f4f12 100644
--- a/decoder/grammar_test.cc
+++ b/decoder/grammar_test.cc
@@ -10,7 +10,9 @@
 #include "tdict.h"
 #include "grammar.h"
 #include "bottom_up_parser.h"
+#include "hg.h"
 #include "ff.h"
+#include "ffset.h"
 #include "weights.h"
 
 using namespace std;
diff --git a/decoder/hg.h b/decoder/hg.h
index 591e98ce..3d8cd9bc 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -33,47 +33,20 @@
 // slow
 #undef HG_EDGES_TOPO_SORTED
 
-class Hypergraph;
-typedef boost::shared_ptr<Hypergraph> HypergraphP;
-
-// class representing an acyclic hypergraph
-//  - edges have 1 head, 0..n tails
-class Hypergraph {
-public:
-  Hypergraph() : is_linear_chain_(false) {}
+// SmallVector is a fast, small vector<int> implementation for sizes <= 2
+typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_
+typedef std::vector<int> EdgesVector; // indices in edges_
 
-  // SmallVector is a fast, small vector<int> implementation for sizes <= 2
-  typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_
-  typedef std::vector<int> EdgesVector; // indices in edges_
-
-  // TODO get rid of cat_?
-  // TODO keep cat_ and add span and/or state? :)
-  struct Node {
-    Node() : id_(), cat_() {}
-    int id_; // equal to this object's position in the nodes_ vector
-    WordID cat_;  // non-terminal category if <0, 0 if not set
-    WordID NT() const { return -cat_; }
-    EdgesVector in_edges_;   // an in edge is an edge with this node as its head.  (in edges come from the bottom up to us)  indices in edges_
-    EdgesVector out_edges_;  // an out edge is an edge with this node as its tail.  (out edges leave us up toward the top/goal). indices in edges_
-    void copy_fixed(Node const& o) { // nonstructural fields only - structural ones are managed by sorting/pruning/subsetting
-      cat_=o.cat_;
-    }
-    void copy_reindex(Node const& o,indices_after const& n2,indices_after const& e2) {
-      copy_fixed(o);
-      id_=n2[id_];
-      e2.reindex_push_back(o.in_edges_,in_edges_);
-      e2.reindex_push_back(o.out_edges_,out_edges_);
-    }
-  };
+enum {
+  NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF
+};
 
+namespace HG {
 
-  // TODO get rid of edge_prob_? (can be computed on the fly as the dot
-  // product of the weight vector and the feature values)
   struct Edge {
-//    int poplimit; //TODO: cube pruning per edge limit?  per node didn't work well at all.  also, inside cost + outside(node) is the same information i'd use to set a per-edge limit anyway - and nonmonotonicity in cube pruning may mean it's good to favor edge (in same node) w/ relatively worse score
     Edge() : i_(-1), j_(-1), prev_i_(-1), prev_j_(-1) {}
     Edge(int id,Edge const& copy_pod_from) : id_(id) { copy_pod(copy_pod_from); } // call copy_features yourself later.
-    Edge(int id,Edge const& copy_from,TailNodeVector const& tail) // fully inits - probably more expensive when push_back(Edge(...)) than setting after
+    Edge(int id,Edge const& copy_from,TailNodeVector const& tail) // fully inits - probably more expensive when push_back(Edge(...)) than sett
       : tail_nodes_(tail),id_(id) { copy_pod(copy_from);copy_features(copy_from); }
     inline int Arity() const { return tail_nodes_.size(); }
     int head_node_;               // refers to a position in nodes_
@@ -83,8 +56,6 @@ public:
     prob_t edge_prob_;             // dot product of weights and feat_values
     int id_;   // equal to this object's position in the edges_ vector
 
-    //FIXME: these span ids belong in Node, not Edge, right?  every node should have the same spans.
-
     // span info. typically, i_ and j_ refer to indices in the source sentence.
     // In synchronous parsing, i_ and j_ will refer to target sentence/lattice indices
     // while prev_i_ prev_j_ will refer to positions in the source.
@@ -97,54 +68,6 @@ public:
     short int j_;
     short int prev_i_;
     short int prev_j_;
-
-    void copy_info(Edge const& o) {
-#if USE_INFO_EDGE
-      set_info(o.info_.str()); // by convention, each person putting info here starts with a separator (e.g. space).  it's empty if nobody put any info there.
-#else
-      (void) o;
-#endif
-    }
-    void copy_pod(Edge const& o) {
-      rule_=o.rule_;
-      i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_;
-    }
-    void copy_features(Edge const& o) {
-      feature_values_=o.feature_values_;
-      copy_info(o);
-    }
-    void copy_fixed(Edge const& o) {
-      copy_pod(o);
-      copy_features(o);
-      edge_prob_ = o.edge_prob_;
-    }
-    void copy_reindex(Edge const& o,indices_after const& n2,indices_after const& e2) {
-      copy_fixed(o);
-      head_node_=n2[o.head_node_];
-      id_=e2[o.id_];
-      n2.reindex_push_back(o.tail_nodes_,tail_nodes_);
-    }
-
-#if USE_INFO_EDGE
-    std::ostringstream info_;
-    void set_info(std::string const& s) {
-      info_.str(s);
-      info_.seekp(0,std::ios_base::end);
-    }
-    Edge(Edge const& o) : head_node_(o.head_node_),tail_nodes_(o.tail_nodes_),rule_(o.rule_),feature_values_(o.feature_values_),edge_prob_(o.edge_prob_),id_(o.id_),i_(o.i_),j_(o.j_),prev_i_(o.prev_i_),prev_j_(o.prev_j_), info_(o.info_.str(),std::ios_base::ate) {
-//      info_.seekp(0,std::ios_base::end);
- }
-    void operator=(Edge const& o) {
-      head_node_ = o.head_node_; tail_nodes_ = o.tail_nodes_; rule_ = o.rule_; feature_values_ = o.feature_values_; edge_prob_ = o.edge_prob_; id_ = o.id_; i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_;
-      set_info(o.info_.str());
-    }
-    std::string info() const { return info_.str(); }
-    void reset_info() { info_.str(""); info_.clear(); }
-#else
-    std::string info() const { return std::string(); }
-    void reset_info() {  }
-    void set_info(std::string const& ) {  }
-#endif
     void show(std::ostream &o,unsigned mask=SPAN|RULE) const {
       o<<'{';
       if (mask&CATEGORY)
@@ -159,10 +82,6 @@ public:
         o<<' '<<feature_values_;
       if (mask&RULE)
         o<<' '<<rule_->AsString(mask&RULE_LHS);
-      if (USE_INFO_EDGE) {
-        std::string const& i=info();
-        if (mask&&!i.empty()) o << " |||"<<i; // remember, the initial space is expected as part of i
-      }
       o<<'}';
     }
     std::string show(unsigned mask=SPAN|RULE) const {
@@ -170,12 +89,28 @@ public:
       show(o,mask);
       return o.str();
     }
-    /* generic recursion re: child_handle=re(tail_nodes_[i],i,parent_handle)
-
-       FIXME: make kbest create a simple derivation-tree structure (could be a
-       hg), and replace the list-of-edges viterbi.h with a tree-structured one.
-       CreateViterbiHypergraph can do for 1best, though.
-    */
+    void copy_pod(Edge const& o) {
+      rule_=o.rule_;
+      i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_;
+    }
+    void copy_features(Edge const& o) {
+      feature_values_=o.feature_values_;
+    }
+    void copy_fixed(Edge const& o) {
+      copy_pod(o);
+      copy_features(o);
+      edge_prob_ = o.edge_prob_;
+    }
+    void copy_reindex(Edge const& o,indices_after const& n2,indices_after const& e2) {
+      copy_fixed(o);
+      head_node_=n2[o.head_node_];
+      id_=e2[o.id_];
+      n2.reindex_push_back(o.tail_nodes_,tail_nodes_);
+    }
+    // generic recursion re: child_handle=re(tail_nodes_[i],i,parent_handle)
+    //   FIXME: make kbest create a simple derivation-tree structure (could be a
+    //   hg), and replace the list-of-edges viterbi.h with a tree-structured one.
+    //   CreateViterbiHypergraph can do for 1best, though.
     template <class EdgeRecurse,class TEdgeHandle>
     std::string derivation_tree(EdgeRecurse const& re,TEdgeHandle const& eh,bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const {
       std::ostringstream o;
@@ -203,7 +138,43 @@ public:
     }
   };
 
-  // all this info ought to live in Node, but for some reason it's on Edges.
+  // TODO get rid of cat_?
+  // TODO keep cat_ and add span and/or state? :)
+  struct Node {
+    Node() : id_(), cat_() {}
+    int id_; // equal to this object's position in the nodes_ vector
+    WordID cat_;  // non-terminal category if <0, 0 if not set
+    WordID NT() const { return -cat_; }
+    EdgesVector in_edges_;   // an in edge is an edge with this node as its head.  (in edges come from the bottom up to us)  indices in edges_
+    EdgesVector out_edges_;  // an out edge is an edge with this node as its tail.  (out edges leave us up toward the top/goal). indices in edges_
+    void copy_fixed(Node const& o) { // nonstructural fields only - structural ones are managed by sorting/pruning/subsetting
+      cat_=o.cat_;
+    }
+    void copy_reindex(Node const& o,indices_after const& n2,indices_after const& e2) {
+      copy_fixed(o);
+      id_=n2[id_];
+      e2.reindex_push_back(o.in_edges_,in_edges_);
+      e2.reindex_push_back(o.out_edges_,out_edges_);
+    }
+  };
+
+} // namespace HG
+
+class Hypergraph;
+typedef boost::shared_ptr<Hypergraph> HypergraphP;
+// class representing an acyclic hypergraph
+//  - edges have 1 head, 0..n tails
+class Hypergraph {
+public:
+  Hypergraph() : is_linear_chain_(false) {}
+  typedef HG::Node Node;
+  typedef HG::Edge Edge;
+  typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_
+  typedef std::vector<int> EdgesVector; // indices in edges_
+  enum {
+    NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF
+  };
+
   // except for stateful models that have split nt,span, this should identify the node
   void SetNodeOrigin(int nodeid,NTSpan &r) const {
     Node const &n=nodes_[nodeid];
@@ -230,18 +201,9 @@ public:
     }
     return s;
   }
-  // 0 if none, -TD index otherwise (just like in rule)
   WordID NodeLHS(int nodeid) const {
     Node const &n=nodes_[nodeid];
     return n.NT();
-    /*
-    if (!n.in_edges_.empty()) {
-      Edge const& e=edges_[n.in_edges_.front()];
-      if (e.rule_)
-        return -e.rule_->lhs_;
-    }
-    return 0;
-    */
   }
 
   typedef std::vector<prob_t> EdgeProbs;
@@ -250,14 +212,8 @@ public:
   typedef std::vector<bool> NodeMask;
 
   std::string show_viterbi_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const;
-// builds viterbi hg and returns it formatted as a pretty string
-
-  enum {
-    NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF
-  };
 
   std::string show_first_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const;
-  // same as above, but takes in_edges_[0] all the way down - to make it viterbi cost (1-best), call ViterbiSortInEdges() first
 
   typedef Edge const* EdgeHandle;
   EdgeHandle operator()(int tailn,int /*taili*/,EdgeHandle /*parent*/) const {
@@ -334,7 +290,7 @@ public:
   Edge* AddEdge(Edge const& in_edge, const TailNodeVector& tail) {
     edges_.push_back(Edge(edges_.size(),in_edge));
     Edge* edge = &edges_.back();
-    edge->copy_features(in_edge);
+    edge->feature_values_ = in_edge.feature_values_;
     edge->tail_nodes_ = tail; // possibly faster than copying to Edge() constructed above then copying via push_back.  perhaps optimized it's the same.
     index_tails(*edge);
     return edge;
@@ -503,9 +459,9 @@ public:
 
   template <class V>
   void visit_edges_topo(V &v) {
-    for (int i = 0; i < nodes_.size(); ++i) {
+    for (unsigned i = 0; i < nodes_.size(); ++i) {
       EdgesVector const& in=nodes_[i].in_edges_;
-      for (int j=0;j<in.size();++j) {
+      for (unsigned j=0;j<in.size();++j) {
         int e=in[j];
         v(i,e,edges_[e]);
       }
@@ -534,14 +490,14 @@ private:
 // for generic Viterbi/Inside algorithms
 struct EdgeProb {
   typedef prob_t Weight;
-  inline const prob_t& operator()(const Hypergraph::Edge& e) const { return e.edge_prob_; }
+  inline const prob_t& operator()(const HG::Edge& e) const { return e.edge_prob_; }
 };
 
 struct EdgeSelectEdgeWeightFunction {
   typedef prob_t Weight;
   typedef std::vector<bool> EdgeMask;
   EdgeSelectEdgeWeightFunction(const EdgeMask& v) : v_(v) {}
-  inline prob_t operator()(const Hypergraph::Edge& e) const {
+  inline prob_t operator()(const HG::Edge& e) const {
     if (v_[e.id_]) return prob_t::One();
     else return prob_t::Zero();
   }
@@ -551,7 +507,7 @@ private:
 
 struct ScaledEdgeProb {
   ScaledEdgeProb(const double& alpha) : alpha_(alpha) {}
-  inline prob_t operator()(const Hypergraph::Edge& e) const { return e.edge_prob_.pow(alpha_); }
+  inline prob_t operator()(const HG::Edge& e) const { return e.edge_prob_.pow(alpha_); }
   const double alpha_;
   typedef prob_t Weight;
 };
@@ -560,7 +516,7 @@ struct ScaledEdgeProb {
 struct EdgeFeaturesAndProbWeightFunction {
   typedef SparseVector<prob_t> Weight;
   typedef Weight Result; //TODO: change Result->Weight everywhere?
-  inline const Weight operator()(const Hypergraph::Edge& e) const {
+  inline const Weight operator()(const HG::Edge& e) const {
     SparseVector<prob_t> res;
     for (SparseVector<double>::const_iterator it = e.feature_values_.begin();
          it != e.feature_values_.end(); ++it)
@@ -571,7 +527,7 @@ struct EdgeFeaturesAndProbWeightFunction {
 
 struct TransitionCountWeightFunction {
   typedef double Weight;
-  inline double operator()(const Hypergraph::Edge& e) const { (void)e; return 1.0; }
+  inline double operator()(const HG::Edge& e) const { (void)e; return 1.0; }
 };
 
 #endif
diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc
index 3a68a429..64c6663e 100644
--- a/decoder/hg_io.cc
+++ b/decoder/hg_io.cc
@@ -28,7 +28,7 @@ struct HGReader : public JSONParser {
       hg.ConnectEdgeToHeadNode(&hg.edges_[in_edges[i]], node);
     }
   }
-  void CreateEdge(const TRulePtr& rule, FeatureVector* feats, const SmallVectorUnsigned& tail) {
+  void CreateEdge(const TRulePtr& rule, SparseVector<double>* feats, const SmallVectorUnsigned& tail) {
     Hypergraph::Edge* edge = hg.AddEdge(rule, tail);
     feats->swap(edge->feature_values_);
     edge->i_ = spans[0];
@@ -392,8 +392,8 @@ string HypergraphIO::AsPLF(const Hypergraph& hg, bool include_global_parentheses
         const Hypergraph::Edge& e = hg.edges_[hg.nodes_[i].out_edges_[j]];
         const string output = e.rule_->e_.size() ==2 ? Escape(TD::Convert(e.rule_->e_[1])) : EPS;
         double prob = log(e.edge_prob_);
-        if (isinf(prob)) { prob = -9e20; }
-        if (isnan(prob)) { prob = 0; }
+        if (std::isinf(prob)) { prob = -9e20; }
+        if (std::isnan(prob)) { prob = 0; }
         os << "('" << output << "'," << prob << "," << e.head_node_ - i << "),";
       }
       os << "),";
diff --git a/decoder/inside_outside.h b/decoder/inside_outside.h
index f73a1d3f..c0377fe8 100644
--- a/decoder/inside_outside.h
+++ b/decoder/inside_outside.h
@@ -42,7 +42,7 @@ WeightType Inside(const Hypergraph& hg,
     Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_;
     const unsigned num_in_edges = in.size();
     for (unsigned j = 0; j < num_in_edges; ++j) {
-      const Hypergraph::Edge& edge = hg.edges_[in[j]];
+      const HG::Edge& edge = hg.edges_[in[j]];
       WeightType score = weight(edge);
       for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) {
         const int tail_node_index = edge.tail_nodes_[k];
@@ -74,7 +74,7 @@ void Outside(const Hypergraph& hg,
     Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_;
     const int num_in_edges = in.size();
     for (int j = 0; j < num_in_edges; ++j) {
-      const Hypergraph::Edge& edge = hg.edges_[in[j]];
+      const HG::Edge& edge = hg.edges_[in[j]];
       WeightType head_and_edge_weight = weight(edge);
       head_and_edge_weight *= head_node_outside_score;
       const int num_tail_nodes = edge.tail_nodes_.size();
@@ -138,7 +138,7 @@ struct InsideOutsides {
       Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_;
       const int num_in_edges = in.size();
       for (int j = 0; j < num_in_edges; ++j) {
-        const Hypergraph::Edge& edge = hg.edges_[in[j]];
+        const HG::Edge& edge = hg.edges_[in[j]];
         KType kbar_e = outside[i];
         const int num_tail_nodes = edge.tail_nodes_.size();
         for (int k = 0; k < num_tail_nodes; ++k)
@@ -156,7 +156,7 @@ struct InsideOutsides {
       const int num_in_edges = in.size();
       for (int j = 0; j < num_in_edges; ++j) {
         int edgei=in[j];
-        const Hypergraph::Edge& edge = hg.edges_[edgei];
+        const HG::Edge& edge = hg.edges_[edgei];
         V x=weight(edge)*outside[i];
         const int num_tail_nodes = edge.tail_nodes_.size();
         for (int k = 0; k < num_tail_nodes; ++k)
diff --git a/decoder/kbest.h b/decoder/kbest.h
index 9af3a20e..9a55f653 100644
--- a/decoder/kbest.h
+++ b/decoder/kbest.h
@@ -48,7 +48,7 @@ namespace KBest {
     }
 
     struct Derivation {
-      Derivation(const Hypergraph::Edge& e,
+      Derivation(const HG::Edge& e,
                  const SmallVectorInt& jv,
                  const WeightType& w,
                  const SparseVector<double>& f) :
@@ -58,11 +58,11 @@ namespace KBest {
         feature_values(f) {}
 
       // dummy constructor, just for query
-      Derivation(const Hypergraph::Edge& e,
+      Derivation(const HG::Edge& e,
                  const SmallVectorInt& jv) : edge(&e), j(jv) {}
 
       T yield;
-      const Hypergraph::Edge* const edge;
+      const HG::Edge* const edge;
       const SmallVectorInt j;
       const WeightType score;
       const SparseVector<double> feature_values;
@@ -82,8 +82,8 @@ namespace KBest {
       Derivation const* d;
       explicit EdgeHandle(Derivation const* d) : d(d) {  }
 //      operator bool() const { return d->edge; }
-      operator Hypergraph::Edge const* () const { return d->edge; }
-//      Hypergraph::Edge const * operator ->() const { return d->edge; }
+      operator HG::Edge const* () const { return d->edge; }
+//      HG::Edge const * operator ->() const { return d->edge; }
     };
 
     EdgeHandle operator()(unsigned t,unsigned taili,EdgeHandle const& parent) const {
@@ -158,7 +158,7 @@ namespace KBest {
     // the yield is computed in LazyKthBest before the derivation is added to D
     // returns NULL if j refers to derivation numbers larger than the
     // antecedent structure define
-    Derivation* CreateDerivation(const Hypergraph::Edge& e, const SmallVectorInt& j) {
+    Derivation* CreateDerivation(const HG::Edge& e, const SmallVectorInt& j) {
       WeightType score = w(e);
       SparseVector<double> feats = e.feature_values_;
       for (int i = 0; i < e.Arity(); ++i) {
@@ -177,7 +177,7 @@ namespace KBest {
 
       const Hypergraph::Node& node = g.nodes_[v];
       for (unsigned i = 0; i < node.in_edges_.size(); ++i) {
-        const Hypergraph::Edge& edge = g.edges_[node.in_edges_[i]];
+        const HG::Edge& edge = g.edges_[node.in_edges_[i]];
         SmallVectorInt jv(edge.Arity(), 0);
         Derivation* d = CreateDerivation(edge, jv);
         assert(d);
diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h
index b603e27a..d2c4715c 100644
--- a/decoder/oracle_bleu.h
+++ b/decoder/oracle_bleu.h
@@ -12,6 +12,7 @@
 #include "scorer.h"
 #include "hg.h"
 #include "ff_factory.h"
+#include "ffset.h"
 #include "ff_bleu.h"
 #include "sparse_vector.h"
 #include "viterbi.h"
@@ -26,7 +27,7 @@
 struct Translation {
   typedef std::vector<WordID> Sentence;
   Sentence sentence;
-  FeatureVector features;
+  SparseVector<double> features;
   Translation() {  }
   Translation(Hypergraph const& hg,WeightVector *feature_weights=0)
   {
@@ -57,14 +58,14 @@ struct Oracle {
   }
   // feature 0 will be the error rate in fear and hope
   // move toward hope
-  FeatureVector ModelHopeGradient() const {
-    FeatureVector r=hope.features-model.features;
+  SparseVector<double> ModelHopeGradient() const {
+    SparseVector<double> r=hope.features-model.features;
     r.set_value(0,0);
     return r;
   }
   // move toward hope from fear
-  FeatureVector FearHopeGradient() const {
-    FeatureVector r=hope.features-fear.features;
+  SparseVector<double> FearHopeGradient() const {
+    SparseVector<double> r=hope.features-fear.features;
     r.set_value(0,0);
     return r;
   }
diff --git a/decoder/program_options.h b/decoder/program_options.h
index 87afb320..3cd7649a 100644
--- a/decoder/program_options.h
+++ b/decoder/program_options.h
@@ -94,7 +94,7 @@ struct any_printer  : public boost::function<void (Ostream &,boost::any const&)>
   {}
 
   template <class T>
-  explicit any_printer(T const* tag) : F(typed_print<T>()) {
+  explicit any_printer(T const*) : F(typed_print<T>()) {
   }
 
   template <class T>
diff --git a/decoder/tromble_loss.h b/decoder/tromble_loss.h
index 599a2d54..fde33100 100644
--- a/decoder/tromble_loss.h
+++ b/decoder/tromble_loss.h
@@ -28,7 +28,7 @@ class TrombleLossComputer : private boost::base_from_member<boost::scoped_ptr<Tr
 
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
+                                     const HG::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
                                      SparseVector<double>* estimated_features,
diff --git a/decoder/viterbi.cc b/decoder/viterbi.cc
index 1b9c6665..9e381ac6 100644
--- a/decoder/viterbi.cc
+++ b/decoder/viterbi.cc
@@ -139,8 +139,8 @@ inline bool close_enough(double a,double b,double epsilon)
     return diff<=epsilon*fabs(a) || diff<=epsilon*fabs(b);
 }
 
-FeatureVector ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights,bool fatal_dotprod_disagreement) {
-  FeatureVector r;
+SparseVector<double> ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights,bool fatal_dotprod_disagreement) {
+  SparseVector<double> r;
   const prob_t p = Viterbi<FeatureVectorTraversal>(hg, &r);
   if (weights) {
     double logp=log(p);
diff --git a/decoder/viterbi.h b/decoder/viterbi.h
index 03e961a2..a8a0ea7f 100644
--- a/decoder/viterbi.h
+++ b/decoder/viterbi.h
@@ -14,10 +14,10 @@ std::string viterbi_stats(Hypergraph const& hg, std::string const& name="forest"
 //TODO: make T a typename inside Traversal and WeightType a typename inside WeightFunction?
 // Traversal must implement:
 //  typedef T Result;
-//  void operator()(Hypergraph::Edge const& e,const vector<const Result*>& ants, Result* result) const;
+//  void operator()(HG::Edge const& e,const vector<const Result*>& ants, Result* result) const;
 // WeightFunction must implement:
 //  typedef prob_t Weight;
-//  Weight operator()(Hypergraph::Edge const& e) const;
+//  Weight operator()(HG::Edge const& e) const;
 template<class Traversal,class WeightFunction>
 typename WeightFunction::Weight Viterbi(const Hypergraph& hg,
                    typename Traversal::Result* result,
@@ -39,9 +39,9 @@ typename WeightFunction::Weight Viterbi(const Hypergraph& hg,
       *cur_node_best_weight = WeightType(1);
       continue;
     }
-    Hypergraph::Edge const* edge_best=0;
+    HG::Edge const* edge_best=0;
     for (unsigned j = 0; j < num_in_edges; ++j) {
-      const Hypergraph::Edge& edge = hg.edges_[cur_node.in_edges_[j]];
+      const HG::Edge& edge = hg.edges_[cur_node.in_edges_[j]];
       WeightType score = weight(edge);
       for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k)
         score *= vit_weight[edge.tail_nodes_[k]];
@@ -51,7 +51,7 @@ typename WeightFunction::Weight Viterbi(const Hypergraph& hg,
       }
     }
     assert(edge_best);
-    Hypergraph::Edge const& edgeb=*edge_best;
+    HG::Edge const& edgeb=*edge_best;
     std::vector<const T*> antsb(edgeb.tail_nodes_.size());
     for (unsigned k = 0; k < edgeb.tail_nodes_.size(); ++k)
       antsb[k] = &vit_result[edgeb.tail_nodes_[k]];
@@ -98,7 +98,7 @@ prob_t Viterbi(const Hypergraph& hg,
 
 struct PathLengthTraversal {
   typedef int Result;
-  void operator()(const Hypergraph::Edge& edge,
+  void operator()(const HG::Edge& edge,
                   const std::vector<const int*>& ants,
                   int* result) const {
     (void) edge;
@@ -109,7 +109,7 @@ struct PathLengthTraversal {
 
 struct ESentenceTraversal {
   typedef std::vector<WordID> Result;
-  void operator()(const Hypergraph::Edge& edge,
+  void operator()(const HG::Edge& edge,
                   const std::vector<const Result*>& ants,
                   Result* result) const {
     edge.rule_->ESubstitute(ants, result);
@@ -118,7 +118,7 @@ struct ESentenceTraversal {
 
 struct ELengthTraversal {
   typedef int Result;
-  void operator()(const Hypergraph::Edge& edge,
+  void operator()(const HG::Edge& edge,
                   const std::vector<const int*>& ants,
                   int* result) const {
     *result = edge.rule_->ELength() - edge.rule_->Arity();
@@ -128,7 +128,7 @@ struct ELengthTraversal {
 
 struct FSentenceTraversal {
   typedef std::vector<WordID> Result;
-  void operator()(const Hypergraph::Edge& edge,
+  void operator()(const HG::Edge& edge,
                   const std::vector<const Result*>& ants,
                   Result* result) const {
     edge.rule_->FSubstitute(ants, result);
@@ -142,7 +142,7 @@ struct ETreeTraversal {
   const std::string space;
   const std::string right;
   typedef std::vector<WordID> Result;
-  void operator()(const Hypergraph::Edge& edge,
+  void operator()(const HG::Edge& edge,
                   const std::vector<const Result*>& ants,
                   Result* result) const {
     Result tmp;
@@ -162,7 +162,7 @@ struct FTreeTraversal {
   const std::string space;
   const std::string right;
   typedef std::vector<WordID> Result;
-  void operator()(const Hypergraph::Edge& edge,
+  void operator()(const HG::Edge& edge,
                   const std::vector<const Result*>& ants,
                   Result* result) const {
     Result tmp;
@@ -177,8 +177,8 @@ struct FTreeTraversal {
 };
 
 struct ViterbiPathTraversal {
-  typedef std::vector<Hypergraph::Edge const*> Result;
-  void operator()(const Hypergraph::Edge& edge,
+  typedef std::vector<HG::Edge const*> Result;
+  void operator()(const HG::Edge& edge,
                   std::vector<Result const*> const& ants,
                   Result* result) const {
     for (unsigned i = 0; i < ants.size(); ++i)
@@ -189,8 +189,8 @@ struct ViterbiPathTraversal {
 };
 
 struct FeatureVectorTraversal {
-  typedef FeatureVector Result;
-  void operator()(Hypergraph::Edge const& edge,
+  typedef SparseVector<double> Result;
+  void operator()(HG::Edge const& edge,
                   std::vector<Result const*> const& ants,
                   Result* result) const {
     for (unsigned i = 0; i < ants.size(); ++i)
@@ -210,6 +210,6 @@ int ViterbiELength(const Hypergraph& hg);
 int ViterbiPathLength(const Hypergraph& hg);
 
 /// if weights supplied, assert viterbi prob = features.dot(*weights) (exception if fatal, cerr warn if not).  return features (sum over all edges in viterbi derivation)
-FeatureVector ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights=0,bool fatal_dotprod_disagreement=false);
+SparseVector<double> ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights=0,bool fatal_dotprod_disagreement=false);
 
 #endif