From bb86637332d49f71c485df34576e464eaf053656 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Tue, 13 Sep 2011 17:36:23 +0100
Subject: get rid of bad Weights class so it no longer keeps a copy of a vector
 inside it

---
 decoder/decoder.cc                    |  64 ++++++++---------
 decoder/decoder.h                     |   9 ++-
 mira/kbest_mira.cc                    |  62 ++++-------------
 pro-train/mr_pro_map.cc               |   8 +--
 pro-train/mr_pro_reduce.cc            |  16 ++---
 training/Makefile.am                  |   8 ---
 training/augment_grammar.cc           |   4 +-
 training/collapse_weights.cc          |   6 +-
 training/compute_cllh.cc              |  23 +++---
 training/grammar_convert.cc           |   8 +--
 training/mpi_batch_optimize.cc        | 127 ++++++++--------------------------
 training/mpi_online_optimize.cc       |  69 +++++++-----------
 training/mr_optimize_reduce.cc        |  19 ++---
 utils/fdict.h                         |   2 +
 utils/phmt.cc                         |   8 +--
 utils/weights.cc                      |  75 ++++++++++++--------
 utils/weights.h                       |  22 +++---
 vest/mr_vest_generate_mapper_input.cc |   6 +-
 18 files changed, 201 insertions(+), 335 deletions(-)
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 25eb2de4..4d4b6245 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -159,8 +159,7 @@ struct RescoringPass {
   shared_ptr<ModelSet> models;
   shared_ptr<IntersectionConfiguration> inter_conf;
   vector<const FeatureFunction*> ffs;
-  shared_ptr<Weights> w;      // null == use previous weights
-  vector<double> weight_vector;
+  shared_ptr<vector<weight_t> > weight_vector;
   int fid_summary;            // 0 == no summary feature
   double density_prune;       // 0 == don't density prune
   double beam_prune;          // 0 == don't beam prune
@@ -169,7 +168,7 @@ struct RescoringPass {
 ostream& operator<<(ostream& os, const RescoringPass& rp) {
   os << "[num_fn=" << rp.ffs.size();
   if (rp.inter_conf) { os << " int_alg=" << *rp.inter_conf; }
-  if (rp.w) os << " new_weights";
+  //if (rp.weight_vector.size() > 0) os << " new_weights";
   if (rp.fid_summary) os << " summary_feature=" << FD::Convert(rp.fid_summary);
   if (rp.density_prune) os << " density_prune=" << rp.density_prune;
   if (rp.beam_prune) os << " beam_prune=" << rp.beam_prune;
@@ -181,13 +180,8 @@ struct DecoderImpl {
   DecoderImpl(po::variables_map& conf, int argc, char** argv, istream* cfg);
   ~DecoderImpl();
   bool Decode(const string& input, DecoderObserver*);
-  void SetWeights(const vector<double>& weights) {
-    init_weights = weights;
-    for (int i = 0; i < rescoring_passes.size(); ++i) {
-      if (rescoring_passes[i].models)
-        rescoring_passes[i].models->SetWeights(weights);
-      rescoring_passes[i].weight_vector = weights;
-    }
+  vector<weight_t>& CurrentWeightVector() {
+    return *rescoring_passes.back().weight_vector;
   }
   void SetId(int next_sent_id) { sent_id = next_sent_id - 1; }
 
@@ -300,8 +294,7 @@ struct DecoderImpl {
   OracleBleu oracle;
   string formalism;
   shared_ptr<Translator> translator;
-  Weights w_init_weights;      // used with initial parse
-  vector<double> init_weights; // weights used with initial parse
+  shared_ptr<vector<weight_t> > init_weights; // weights used with initial parse
   vector<shared_ptr<FeatureFunction> > pffs;
 #ifdef FSA_RESCORING
   CFGOptions cfg_options;
@@ -557,13 +550,18 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
     exit(1);
   }
 
-  // load initial feature weights (and possibly freeze feature set)
-  if (conf.count("weights")) {
-    w_init_weights.InitFromFile(str("weights",conf));
-    w_init_weights.InitVector(&init_weights);
-    init_weights.resize(FD::NumFeats());
+  // load perfect hash function for features
+  if (conf.count("cmph_perfect_feature_hash")) {
+    cerr << "Loading perfect hash function from " << conf["cmph_perfect_feature_hash"].as<string>() << " ...\n";
+    FD::EnableHash(conf["cmph_perfect_feature_hash"].as<string>());
+    cerr << "  " << FD::NumFeats() << " features in map\n";
   }
 
+  // load initial feature weights (and possibly freeze feature set)
+  init_weights.reset(new vector<weight_t>);
+  if (conf.count("weights"))
+    Weights::InitFromFile(str("weights",conf), init_weights.get());
+
   // cube pruning pop-limit: we may want to configure this on a per-pass basis
   pop_limit = conf["cubepruning_pop_limit"].as<int>();
 
@@ -582,9 +580,8 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
       RescoringPass& rp = rescoring_passes.back();
       // only configure new weights if pass > 0, otherwise we reuse the initial chart weights
       if (nth_pass_condition && conf.count(ws)) {
-        rp.w.reset(new Weights);
-        rp.w->InitFromFile(str(ws.c_str(), conf));
-        rp.w->InitVector(&rp.weight_vector);
+        rp.weight_vector.reset(new vector<weight_t>());
+        Weights::InitFromFile(str(ws.c_str(), conf), rp.weight_vector.get());
       }
       bool has_stateful = false;
       if (conf.count(ff)) {
@@ -624,11 +621,15 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
   }
 
   // set up weight vectors since later phases may reuse weights from earlier phases
-  const vector<double>* prev = &init_weights;
+  shared_ptr<vector<weight_t> > prev_weights = init_weights;
   for (int pass = 0; pass < rescoring_passes.size(); ++pass) {
     RescoringPass& rp = rescoring_passes[pass];
-    if (!rp.w) { rp.weight_vector = *prev; } else { prev = &rp.weight_vector; }
-    rp.models.reset(new ModelSet(rp.weight_vector, rp.ffs));
+    if (!rp.weight_vector) {
+      rp.weight_vector = prev_weights;
+    } else {
+      prev_weights = rp.weight_vector;
+    }
+    rp.models.reset(new ModelSet(*rp.weight_vector, rp.ffs));
     string ps = "Pass1 "; ps[4] += pass;
     if (!SILENT) show_models(conf,*rp.models,ps.c_str());
   }
@@ -650,12 +651,6 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
     FD::Freeze(); // this means we can't see the feature names of not-weighted features
   }
 
-  if (conf.count("cmph_perfect_feature_hash")) {
-    cerr << "Loading perfect hash function from " << conf["cmph_perfect_feature_hash"].as<string>() << " ...\n";
-    FD::EnableHash(conf["cmph_perfect_feature_hash"].as<string>());
-    cerr << "  " << FD::NumFeats() << " features in map\n";
-  }
-
   // set up translation back end
   if (formalism == "scfg")
     translator.reset(new SCFGTranslator(conf));
@@ -685,7 +680,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
   }
   if (!fsa_ffs.empty()) {
     cerr<<"FSA: ";
-    show_all_features(fsa_ffs,init_weights,cerr,cerr,true,true);
+    show_all_features(fsa_ffs,*init_weights,cerr,cerr,true,true);
   }
 #endif
 
@@ -733,7 +728,8 @@ bool Decoder::Decode(const string& input, DecoderObserver* o) {
   if (del) delete o;
   return res;
 }
-void Decoder::SetWeights(const vector<double>& weights) { pimpl_->SetWeights(weights); }
+vector<weight_t>& Decoder::CurrentWeightVector() { return pimpl_->CurrentWeightVector(); }
+const vector<weight_t>& Decoder::CurrentWeightVector() const { return pimpl_->CurrentWeightVector(); }
 void Decoder::SetSupplementalGrammar(const std::string& grammar_string) {
   assert(pimpl_->translator->GetDecoderType() == "SCFG");
   static_cast<SCFGTranslator&>(*pimpl_->translator).SetSupplementalGrammar(grammar_string);
@@ -774,7 +770,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
   translator->ProcessMarkupHints(smeta.sgml_);
   Timer t("Translation");
   const bool translation_successful =
-    translator->Translate(to_translate, &smeta, init_weights, &forest);
+    translator->Translate(to_translate, &smeta, *init_weights, &forest);
   translator->SentenceComplete();
 
   if (!translation_successful) {
@@ -812,7 +808,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
 
   for (int pass = 0; pass < rescoring_passes.size(); ++pass) {
     const RescoringPass& rp = rescoring_passes[pass];
-    const vector<double>& cur_weights = rp.weight_vector;
+    const vector<weight_t>& cur_weights = *rp.weight_vector;
     if (!SILENT) cerr << endl << "  RESCORING PASS #" << (pass+1) << " " << rp << endl;
 #ifdef FSA_RESCORING
     cfg_options.maybe_output_source(forest);
@@ -933,7 +929,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
 #endif
   }
 
-  const vector<double>& last_weights = (rescoring_passes.empty() ? init_weights : rescoring_passes.back().weight_vector);
+  const vector<double>& last_weights = (rescoring_passes.empty() ? *init_weights : *rescoring_passes.back().weight_vector);
 
   // Oracle Rescoring
   if(get_oracle_forest) {
diff --git a/decoder/decoder.h b/decoder/decoder.h
index 5491369f..9d009ffa 100644
--- a/decoder/decoder.h
+++ b/decoder/decoder.h
@@ -7,6 +7,8 @@
 #include <boost/shared_ptr.hpp>
 #include <boost/program_options/variables_map.hpp>
 
+#include "weights.h"  // weight_t
+
 #undef CP_TIME
 //#define CP_TIME
 #ifdef CP_TIME
@@ -39,7 +41,12 @@ struct Decoder {
   Decoder(int argc, char** argv);
   Decoder(std::istream* config_file);
   bool Decode(const std::string& input, DecoderObserver* observer = NULL);
-  void SetWeights(const std::vector<double>& weights);
+
+  // access this to either *read* or *write* to the decoder's last
+  // weight vector (i.e., the weights of the finest past)
+  std::vector<weight_t>& CurrentWeightVector();
+  const std::vector<weight_t>& CurrentWeightVector() const;
+
   void SetId(int id);
   ~Decoder();
   const boost::program_options::variables_map& GetConf() const { return conf; }
diff --git a/mira/kbest_mira.cc b/mira/kbest_mira.cc
index 6918a9a1..459a5e6f 100644
--- a/mira/kbest_mira.cc
+++ b/mira/kbest_mira.cc
@@ -32,21 +32,6 @@ namespace po = boost::program_options;
 bool invert_score;
 boost::shared_ptr<MT19937> rng;
 
-void SanityCheck(const vector<double>& w) {
-  for (int i = 0; i < w.size(); ++i) {
-    assert(!isnan(w[i]));
-    assert(!isinf(w[i]));
-  }
-}
-
-struct FComp {
-  const vector<double>& w_;
-  FComp(const vector<double>& w) : w_(w) {}
-  bool operator()(int a, int b) const {
-    return fabs(w_[a]) > fabs(w_[b]);
-  }
-};
-
 void RandomPermutation(int len, vector<int>* p_ids) {
   vector<int>& ids = *p_ids;
   ids.resize(len);
@@ -58,21 +43,6 @@ void RandomPermutation(int len, vector<int>* p_ids) {
   }  
 }
 
-void ShowLargestFeatures(const vector<double>& w) {
-  vector<int> fnums(w.size());
-  for (int i = 0; i < w.size(); ++i)
-    fnums[i] = i;
-  vector<int>::iterator mid = fnums.begin();
-  mid += (w.size() > 10 ? 10 : w.size());
-  partial_sort(fnums.begin(), mid, fnums.end(), FComp(w));
-  cerr << "TOP FEATURES:";
-  --mid;
-  for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) {
-    cerr << ' ' << FD::Convert(*i) << '=' << w[*i];
-  }
-  cerr << endl;
-}
-
 bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
@@ -209,14 +179,16 @@ int main(int argc, char** argv) {
     cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n";
     return 1;
   }
-  // load initial weights
-  Weights weights;
-  weights.InitFromFile(conf["input_weights"].as<string>());
-  SparseVector<double> lambdas;
-  weights.InitSparseVector(&lambdas);
 
   ReadFile ini_rf(conf["decoder_config"].as<string>());
   Decoder decoder(ini_rf.stream());
+
+  // load initial weights
+  vector<weight_t>& dense_weights = decoder.CurrentWeightVector();
+  SparseVector<weight_t> lambdas;
+  Weights::InitFromFile(conf["input_weights"].as<string>(), &dense_weights);
+  Weights::InitSparseVector(dense_weights, &lambdas);
+
   const double max_step_size = conf["max_step_size"].as<double>();
   const double mt_metric_scale = conf["mt_metric_scale"].as<double>();
 
@@ -230,7 +202,6 @@ int main(int argc, char** argv) {
   double tot_loss = 0;
   int dots = 0;
   int cur_pass = 0;
-  vector<double> dense_weights;
   SparseVector<double> tot;
   tot += lambdas;          // initial weights
   normalizer++;            // count for initial weights
@@ -240,27 +211,22 @@ int main(int argc, char** argv) {
   vector<int> order;
   RandomPermutation(corpus.size(), &order);
   while (lcount <= max_iteration) {
-    dense_weights.clear();
-    weights.InitFromVector(lambdas);
-    weights.InitVector(&dense_weights);
-    decoder.SetWeights(dense_weights);
+    lambdas.init_vector(&dense_weights);
     if ((cur_sent * 40 / corpus.size()) > dots) { ++dots; cerr << '.'; }
     if (corpus.size() == cur_sent) {
       cerr << " [AVG METRIC LAST PASS=" << (tot_loss / corpus.size()) << "]\n";
-      ShowLargestFeatures(dense_weights);
+      Weights::ShowLargestFeatures(dense_weights);
       cur_sent = 0;
       tot_loss = 0;
       dots = 0;
       ostringstream os;
       os << "weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << ".gz";
-      weights.WriteToFile(os.str(), true, &msg);
       SparseVector<double> x = tot;
       x /= normalizer;
       ostringstream sa;
       sa << "weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "-avg.gz";
-      Weights ww;
-      ww.InitFromVector(x);
-      ww.WriteToFile(sa.str(), true, &msga);
+      x.init_vector(&dense_weights);
+      Weights::WriteToFile(os.str(), dense_weights, true, &msg);
       ++cur_pass;
       RandomPermutation(corpus.size(), &order);
     }
@@ -294,11 +260,11 @@ int main(int argc, char** argv) {
     ++cur_sent;
   }
   cerr << endl;
-  weights.WriteToFile("weights.mira-final.gz", true, &msg);
+  Weights::WriteToFile("weights.mira-final.gz", dense_weights, true, &msg);
   tot /= normalizer;
-  weights.InitFromVector(tot);
+  tot.init_vector(dense_weights);
   msg = "# MIRA tuned weights (averaged vector)";
-  weights.WriteToFile("weights.mira-final-avg.gz", true, &msg);
+  Weights::WriteToFile("weights.mira-final-avg.gz", dense_weights, true, &msg);
   cerr << "Optimization complete.\nAVERAGED WEIGHTS: weights.mira-final-avg.gz\n";
   return 0;
 }
diff --git a/pro-train/mr_pro_map.cc b/pro-train/mr_pro_map.cc
index 4324e8de..bc59285b 100644
--- a/pro-train/mr_pro_map.cc
+++ b/pro-train/mr_pro_map.cc
@@ -301,12 +301,8 @@ int main(int argc, char** argv) {
   const unsigned gamma = conf["candidate_pairs"].as<unsigned>();
   const unsigned xi = conf["best_pairs"].as<unsigned>();
   string weightsf = conf["weights"].as<string>();
-  vector<double> weights;
-  {
-    Weights w;
-    w.InitFromFile(weightsf);
-    w.InitVector(&weights);
-  }
+  vector<weight_t> weights;
+  Weights::InitFromFile(weightsf, &weights);
   string kbest_repo = conf["kbest_repository"].as<string>();
   MkDirP(kbest_repo);
   while(in) {
diff --git a/pro-train/mr_pro_reduce.cc b/pro-train/mr_pro_reduce.cc
index 9b422f33..9caaa1d1 100644
--- a/pro-train/mr_pro_reduce.cc
+++ b/pro-train/mr_pro_reduce.cc
@@ -194,7 +194,7 @@ int main(int argc, char** argv) {
   InitCommandLine(argc, argv, &conf);
   string line;
   vector<pair<bool, SparseVector<double> > > training, testing;
-  SparseVector<double> old_weights;
+  SparseVector<weight_t> old_weights;
   const bool tune_regularizer = conf.count("tune_regularizer");
   if (tune_regularizer && !conf.count("testset")) {
     cerr << "--tune_regularizer requires --testset to be set\n";
@@ -210,9 +210,9 @@ int main(int argc, char** argv) {
   const double psi = conf["interpolation"].as<double>();
   if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; }
   if (conf.count("weights")) {
-    Weights w;
-    w.InitFromFile(conf["weights"].as<string>());
-    w.InitSparseVector(&old_weights);
+    vector<weight_t> dt;
+    Weights::InitFromFile(conf["weights"].as<string>(), &dt);
+    Weights::InitSparseVector(dt, &old_weights);
   }
   ReadCorpus(&cin, &training);
   if (conf.count("testset")) {
@@ -220,8 +220,8 @@ int main(int argc, char** argv) {
     ReadCorpus(rf.stream(), &testing);
   }
   cerr << "Number of features: " << FD::NumFeats() << endl;
-  vector<double> x(FD::NumFeats(), 0.0);  // x[0] is bias
-  for (SparseVector<double>::const_iterator it = old_weights.begin();
+  vector<weight_t> x(FD::NumFeats(), 0.0);  // x[0] is bias
+  for (SparseVector<weight_t>::const_iterator it = old_weights.begin();
        it != old_weights.end(); ++it)
     x[it->first] = it->second;
   double tppl = 0.0;
@@ -257,7 +257,6 @@ int main(int argc, char** argv) {
     sigsq = sp[best_i].first;
     tppl = LearnParameters(training, testing, sigsq, conf["memory_buffers"].as<unsigned>(), &x);
   }
-  Weights w;
   if (conf.count("weights")) {
     for (int i = 1; i < x.size(); ++i)
       x[i] = (x[i] * psi) + old_weights.get(i) * (1.0 - psi);
@@ -271,7 +270,6 @@ int main(int argc, char** argv) {
       cout << "# " << sp[i].first << "\t" << sp[i].second << "\t" << smoothed[i] << endl;
     }
   }
-  w.InitFromVector(x);
-  w.WriteToFile("-");
+  Weights::WriteToFile("-", x);
   return 0;
 }
diff --git a/training/Makefile.am b/training/Makefile.am
index e075e417..6e2c06f5 100644
--- a/training/Makefile.am
+++ b/training/Makefile.am
@@ -12,9 +12,7 @@ bin_PROGRAMS = \
   cllh_filter_grammar \
   mpi_online_optimize \
   mpi_batch_optimize \
-  mpi_em_optimize \
   compute_cllh \
-  feature_expectations \
   augment_grammar
 
 noinst_PROGRAMS = \
@@ -29,12 +27,6 @@ mpi_online_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval
 mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc optimize.cc
 mpi_batch_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
 
-feature_expectations_SOURCES = feature_expectations.cc
-feature_expectations_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-mpi_em_optimize_SOURCES = mpi_em_optimize.cc optimize.cc
-mpi_em_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
 compute_cllh_SOURCES = compute_cllh.cc
 compute_cllh_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
 
diff --git a/training/augment_grammar.cc b/training/augment_grammar.cc
index df8d4ee8..e89a92d5 100644
--- a/training/augment_grammar.cc
+++ b/training/augment_grammar.cc
@@ -134,9 +134,7 @@ int main(int argc, char** argv) {
   } else { ngram = NULL; }
   extra_feature = conf.count("extra_lex_feature") > 0;
   if (conf.count("collapse_weights")) {
-    Weights w;
-    w.InitFromFile(conf["collapse_weights"].as<string>());
-    w.InitVector(&col_weights);
+    Weights::InitFromFile(conf["collapse_weights"].as<string>(), &col_weights);
   }
   clear_features = conf.count("clear_features_after_collapse") > 0;
   gather_rules = false;
diff --git a/training/collapse_weights.cc b/training/collapse_weights.cc
index 4fb742fb..dc480f6c 100644
--- a/training/collapse_weights.cc
+++ b/training/collapse_weights.cc
@@ -59,10 +59,8 @@ int main(int argc, char** argv) {
   InitCommandLine(argc, argv, &conf);
   const string wfile = conf["weights"].as<string>();
   const string gfile = conf["grammar"].as<string>();
-  Weights wm;
-  wm.InitFromFile(wfile);
-  vector<double> w;
-  wm.InitVector(&w);
+  vector<weight_t> w;
+  Weights::InitFromFile(wfile, &w);
   MarginalMap e_tots;
   MarginalMap f_tots;
   prob_t tot;
diff --git a/training/compute_cllh.cc b/training/compute_cllh.cc
index 332f6d0c..b496d196 100644
--- a/training/compute_cllh.cc
+++ b/training/compute_cllh.cc
@@ -148,15 +148,6 @@ int main(int argc, char** argv) {
   if (!InitCommandLine(argc, argv, &conf))
     return false;
 
-  // load initial weights
-  Weights weights;
-  if (conf.count("weights"))
-    weights.InitFromFile(conf["weights"].as<string>());
-
-  // freeze feature set
-  //const bool freeze_feature_set = conf.count("freeze_feature_set");
-  //if (freeze_feature_set) FD::Freeze();
-
   // load cdec.ini and set up decoder
   ReadFile ini_rf(conf["decoder_config"].as<string>());
   Decoder decoder(ini_rf.stream());
@@ -165,17 +156,22 @@ int main(int argc, char** argv) {
     abort();
   }
 
+  // load weights
+  vector<weight_t>& weights = decoder.CurrentWeightVector();
+  if (conf.count("weights"))
+    Weights::InitFromFile(conf["weights"].as<string>(), &weights);
+
+  // freeze feature set
+  //const bool freeze_feature_set = conf.count("freeze_feature_set");
+  //if (freeze_feature_set) FD::Freeze();
+
   vector<string> corpus; vector<int> ids;
   ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus, &ids);
   assert(corpus.size() > 0);
   assert(corpus.size() == ids.size());
 
-  vector<double> wv;
-  weights.InitVector(&wv);
-  decoder.SetWeights(wv);
   TrainingObserver observer;
   double objective = 0;
-  bool converged = false;
 
   observer.Reset();
   if (rank == 0)
@@ -197,3 +193,4 @@ int main(int argc, char** argv) {
 
   return 0;
 }
+
diff --git a/training/grammar_convert.cc b/training/grammar_convert.cc
index 8d292f8a..bf8abb26 100644
--- a/training/grammar_convert.cc
+++ b/training/grammar_convert.cc
@@ -251,12 +251,10 @@ int main(int argc, char **argv) {
   const bool is_split_input = (conf["format"].as<string>() == "split");
   const bool is_json_input = is_split_input || (conf["format"].as<string>() == "json");
   const bool collapse_weights = conf.count("collapse_weights");
-  Weights wts;
   vector<double> w;
-  if (conf.count("weights")) {
-    wts.InitFromFile(conf["weights"].as<string>());
-    wts.InitVector(&w);
-  }
+  if (conf.count("weights"))
+    Weights::InitFromFile(conf["weights"].as<string>(), &w);
+
   if (collapse_weights && !w.size()) {
     cerr << "--collapse_weights requires a weights file to be specified!\n";
     exit(1);
diff --git a/training/mpi_batch_optimize.cc b/training/mpi_batch_optimize.cc
index 39a8af7d..cc5953f6 100644
--- a/training/mpi_batch_optimize.cc
+++ b/training/mpi_batch_optimize.cc
@@ -31,42 +31,12 @@ using namespace std;
 using boost::shared_ptr;
 namespace po = boost::program_options;
 
-void SanityCheck(const vector<double>& w) {
-  for (int i = 0; i < w.size(); ++i) {
-    assert(!isnan(w[i]));
-    assert(!isinf(w[i]));
-  }
-}
-
-struct FComp {
-  const vector<double>& w_;
-  FComp(const vector<double>& w) : w_(w) {}
-  bool operator()(int a, int b) const {
-    return fabs(w_[a]) > fabs(w_[b]);
-  }
-};
-
-void ShowLargestFeatures(const vector<double>& w) {
-  vector<int> fnums(w.size());
-  for (int i = 0; i < w.size(); ++i)
-    fnums[i] = i;
-  vector<int>::iterator mid = fnums.begin();
-  mid += (w.size() > 10 ? 10 : w.size());
-  partial_sort(fnums.begin(), mid, fnums.end(), FComp(w));
-  cerr << "TOP FEATURES:";
-  for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) {
-    cerr << ' ' << FD::Convert(*i) << '=' << w[*i];
-  }
-  cerr << endl;
-}
-
 bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
         ("input_weights,w",po::value<string>(),"Input feature weights file")
         ("training_data,t",po::value<string>(),"Training data")
         ("decoder_config,d",po::value<string>(),"Decoder configuration file")
-        ("sharded_input,s",po::value<string>(), "Corpus and grammar files are 'sharded' so each processor loads its own input and grammar file. Argument is the directory containing the shards.")
         ("output_weights,o",po::value<string>()->default_value("-"),"Output feature weights file")
         ("optimization_method,m", po::value<string>()->default_value("lbfgs"), "Optimization method (sgd, lbfgs, rprop)")
 	("correction_buffers,M", po::value<int>()->default_value(10), "Number of gradients for LBFGS to maintain in memory")
@@ -88,14 +58,10 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   }
   po::notify(*conf);
 
-  if (conf->count("help") || !conf->count("input_weights") || !(conf->count("training_data") | conf->count("sharded_input")) || !conf->count("decoder_config")) {
+  if (conf->count("help") || !conf->count("input_weights") || !(conf->count("training_data")) || !conf->count("decoder_config")) {
     cerr << dcmdline_options << endl;
     return false;
   }
-  if (conf->count("training_data") && conf->count("sharded_input")) {
-    cerr << "Cannot specify both --training_data and --sharded_input\n";
-    return false;
-  }
   return true;
 }
 
@@ -236,42 +202,9 @@ int main(int argc, char** argv) {
   po::variables_map conf;
   if (!InitCommandLine(argc, argv, &conf)) return 1;
 
-  string shard_dir;
-  if (conf.count("sharded_input")) {
-    shard_dir = conf["sharded_input"].as<string>();
-    if (!DirectoryExists(shard_dir)) {
-      if (rank == 0) cerr << "Can't find shard directory: " << shard_dir << endl;
-      return 1;
-    }
-    if (rank == 0)
-      cerr << "Shard directory: " << shard_dir << endl;
-  }
-
-  // load initial weights
-  Weights weights;
-  if (rank == 0) { cerr << "Loading weights...\n"; }
-  weights.InitFromFile(conf["input_weights"].as<string>());
-  if (rank == 0) { cerr << "Done loading weights.\n"; }
-
-  // freeze feature set (should be optional?)
-  const bool freeze_feature_set = true;
-  if (freeze_feature_set) FD::Freeze();
-
   // load cdec.ini and set up decoder
   vector<string> cdec_ini;
   ReadConfig(conf["decoder_config"].as<string>(), &cdec_ini);
-  if (shard_dir.size()) {
-    if (rank == 0) {
-      for (int i = 0; i < cdec_ini.size(); ++i) {
-        if (cdec_ini[i].find("grammar=") == 0) {
-          cerr << "!!! using sharded input and " << conf["decoder_config"].as<string>() << " contains a grammar specification:\n" << cdec_ini[i] << "\n  VERIFY THAT THIS IS CORRECT!\n";
-        }
-      }
-    }
-    ostringstream g;
-    g << "grammar=" << shard_dir << "/grammar." << rank << "_of_" << size << ".gz";
-    cdec_ini.push_back(g.str());
-  }
   istringstream ini;
   StoreConfig(cdec_ini, &ini);
   if (rank == 0) cerr << "Loading grammar...\n";
@@ -282,22 +215,28 @@ int main(int argc, char** argv) {
   }
   if (rank == 0) cerr << "Done loading grammar!\n";
 
+  // load initial weights
+  if (rank == 0) { cerr << "Loading weights...\n"; }
+  vector<weight_t>& lambdas = decoder->CurrentWeightVector();
+  Weights::InitFromFile(conf["input_weights"].as<string>(), &lambdas);
+  if (rank == 0) { cerr << "Done loading weights.\n"; }
+
+  // freeze feature set (should be optional?)
+  const bool freeze_feature_set = true;
+  if (freeze_feature_set) FD::Freeze();
+
   const int num_feats = FD::NumFeats();
   if (rank == 0) cerr << "Number of features: " << num_feats << endl;
+  lambdas.resize(num_feats);
+
   const bool gaussian_prior = conf.count("gaussian_prior");
-  vector<double> means(num_feats, 0);
+  vector<weight_t> means(num_feats, 0);
   if (conf.count("means")) {
     if (!gaussian_prior) {
       cerr << "Don't use --means without --gaussian_prior!\n";
       exit(1);
     }
-    Weights wm; 
-    wm.InitFromFile(conf["means"].as<string>());
-    if (num_feats != FD::NumFeats()) {
-      cerr << "[ERROR] Means file had unexpected features!\n";
-      exit(1);
-    }
-    wm.InitVector(&means);
+    Weights::InitFromFile(conf["means"].as<string>(), &means);
   }
   shared_ptr<BatchOptimizer> o;
   if (rank == 0) {
@@ -309,26 +248,13 @@ int main(int argc, char** argv) {
     cerr << "Optimizer: " << o->Name() << endl;
   }
   double objective = 0;
-  vector<double> lambdas(num_feats, 0.0);
-  weights.InitVector(&lambdas);
-  if (lambdas.size() != num_feats) {
-    cerr << "Initial weights file did not have all features specified!\n  feats="
-         << num_feats << "\n  weights file=" << lambdas.size() << endl;
-    lambdas.resize(num_feats, 0.0);
-  }
   vector<double> gradient(num_feats, 0.0);
-  vector<double> rcv_grad(num_feats, 0.0);
+  vector<double> rcv_grad;
+  rcv_grad.clear();
   bool converged = false;
 
   vector<string> corpus;
-  if (shard_dir.size()) {
-    ostringstream os; os << shard_dir << "/corpus." << rank << "_of_" << size;
-    ReadTrainingCorpus(os.str(), 0, 1, &corpus);
-    cerr << os.str() << " has " << corpus.size() << " training examples. " << endl;
-    if (corpus.size() > 500) { corpus.resize(500); cerr << "  TRUNCATING\n"; }
-  } else {
-    ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus);
-  }
+  ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus);
   assert(corpus.size() > 0);
 
   TrainingObserver observer;
@@ -341,19 +267,20 @@ int main(int argc, char** argv) {
     if (rank == 0) {
       cerr << "Starting decoding... (~" << corpus.size() << " sentences / proc)\n";
     }
-    decoder->SetWeights(lambdas);
     for (int i = 0; i < corpus.size(); ++i)
       decoder->Decode(corpus[i], &observer);
     cerr << "  process " << rank << '/' << size << " done\n";
     fill(gradient.begin(), gradient.end(), 0);
-    fill(rcv_grad.begin(), rcv_grad.end(), 0);
     observer.SetLocalGradientAndObjective(&gradient, &objective);
 
     double to = 0;
 #ifdef HAVE_MPI
+    rcv_grad.resize(num_feats, 0.0);
     mpi::reduce(world, &gradient[0], gradient.size(), &rcv_grad[0], plus<double>(), 0);
-    mpi::reduce(world, objective, to, plus<double>(), 0);
     swap(gradient, rcv_grad);
+    rcv_grad.clear();
+
+    mpi::reduce(world, objective, to, plus<double>(), 0);
     objective = to;
 #endif
 
@@ -378,7 +305,7 @@ int main(int argc, char** argv) {
       for (int i = 0; i < gradient.size(); ++i)
         gnorm += gradient[i] * gradient[i];
       cerr << "  GNORM=" << sqrt(gnorm) << endl;
-      vector<double> old = lambdas;
+      vector<weight_t> old = lambdas;
       int c = 0;
       while (old == lambdas) {
         ++c;
@@ -387,9 +314,8 @@ int main(int argc, char** argv) {
         assert(c < 5);
       }
       old.clear();
-      SanityCheck(lambdas);
-      ShowLargestFeatures(lambdas);
-      weights.InitFromVector(lambdas);
+      Weights::SanityCheck(lambdas);
+      Weights::ShowLargestFeatures(lambdas);
 
       converged = o->HasConverged();
       if (converged) { cerr << "OPTIMIZER REPORTS CONVERGENCE!\n"; }
@@ -399,7 +325,7 @@ int main(int argc, char** argv) {
       ostringstream vv;
       vv << "Objective = " << objective << "  (eval count=" << o->EvaluationCount() << ")";
       const string svv = vv.str();
-      weights.WriteToFile(fname, true, &svv);
+      Weights::WriteToFile(fname, lambdas, true, &svv);
     }  // rank == 0
     int cint = converged;
 #ifdef HAVE_MPI
@@ -411,3 +337,4 @@ int main(int argc, char** argv) {
   }
   return 0;
 }
+
diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc
index 32033c19..2ef4a2e7 100644
--- a/training/mpi_online_optimize.cc
+++ b/training/mpi_online_optimize.cc
@@ -31,35 +31,6 @@ namespace mpi = boost::mpi;
 using namespace std;
 namespace po = boost::program_options;
 
-void SanityCheck(const vector<double>& w) {
-  for (int i = 0; i < w.size(); ++i) {
-    assert(!isnan(w[i]));
-    assert(!isinf(w[i]));
-  }
-}
-
-struct FComp {
-  const vector<double>& w_;
-  FComp(const vector<double>& w) : w_(w) {}
-  bool operator()(int a, int b) const {
-    return fabs(w_[a]) > fabs(w_[b]);
-  }
-};
-
-void ShowLargestFeatures(const vector<double>& w) {
-  vector<int> fnums(w.size());
-  for (int i = 0; i < w.size(); ++i)
-    fnums[i] = i;
-  vector<int>::iterator mid = fnums.begin();
-  mid += (w.size() > 10 ? 10 : w.size());
-  partial_sort(fnums.begin(), mid, fnums.end(), FComp(w));
-  cerr << "TOP FEATURES:";
-  for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) {
-    cerr << ' ' << FD::Convert(*i) << '=' << w[*i];
-  }
-  cerr << endl;
-}
-
 bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
@@ -250,10 +221,25 @@ int main(int argc, char** argv) {
   if (!InitCommandLine(argc, argv, &conf))
     return 1;
 
+  vector<pair<string, int> > agenda;
+  if (!LoadAgenda(conf["training_agenda"].as<string>(), &agenda))
+    return 1;
+  if (rank == 0)
+    cerr << "Loaded agenda defining " << agenda.size() << " training epochs\n";
+
+  assert(agenda.size() > 0);
+
+  if (1) {  // hack to load the feature hash functions -- TODO this should not be in cdec.ini
+    const string& cur_config = agenda[0].first;
+    const unsigned max_iteration = agenda[0].second;
+    ReadFile ini_rf(cur_config);
+    Decoder decoder(ini_rf.stream());
+  }
+
   // load initial weights
-  Weights weights;
+  vector<weight_t> init_weights;
   if (conf.count("input_weights"))
-    weights.InitFromFile(conf["input_weights"].as<string>());
+    Weights::InitFromFile(conf["input_weights"].as<string>(), &init_weights);
 
   vector<int> frozen_fids;
   if (conf.count("frozen_features")) {
@@ -310,19 +296,12 @@ int main(int argc, char** argv) {
     rng.reset(new MT19937);
 
   SparseVector<double> x;
-  weights.InitSparseVector(&x);
+  Weights::InitSparseVector(init_weights, &x);
   TrainingObserver observer;
 
   int write_weights_every_ith = 100; // TODO configure
   int titer = -1;
 
-  vector<pair<string, int> > agenda;
-  if (!LoadAgenda(conf["training_agenda"].as<string>(), &agenda))
-    return 1;
-  if (rank == 0)
-    cerr << "Loaded agenda defining " << agenda.size() << " training epochs\n";
-
-  vector<double> lambdas;
   for (int ai = 0; ai < agenda.size(); ++ai) {
     const string& cur_config = agenda[ai].first;
     const unsigned max_iteration = agenda[ai].second;
@@ -331,6 +310,8 @@ int main(int argc, char** argv) {
     // load cdec.ini and set up decoder
     ReadFile ini_rf(cur_config);
     Decoder decoder(ini_rf.stream());
+    vector<weight_t>& lambdas = decoder.CurrentWeightVector();
+    if (ai == 0) { lambdas.swap(init_weights); init_weights.clear(); }
 
     if (rank == 0)
       o->ResetEpoch(); // resets the learning rate-- TODO is this good?
@@ -341,15 +322,13 @@ int main(int argc, char** argv) {
 #ifdef HAVE_MPI
       mpi::timer timer;
 #endif
-      weights.InitFromVector(x);
-      weights.InitVector(&lambdas);
+      x.init_vector(&lambdas);
       ++iter; ++titer;
       observer.Reset();
-      decoder.SetWeights(lambdas);
       if (rank == 0) {
         converged = (iter == max_iteration);
-        SanityCheck(lambdas);
-        ShowLargestFeatures(lambdas);
+        Weights::SanityCheck(lambdas);
+        Weights::ShowLargestFeatures(lambdas);
         string fname = "weights.cur.gz";
         if (iter % write_weights_every_ith == 0) {
           ostringstream o; o << "weights.epoch_" << (ai+1) << '.' << iter << ".gz";
@@ -360,7 +339,7 @@ int main(int argc, char** argv) {
         vv << "total iter=" << titer << " (of current config iter=" << iter << ")  minibatch=" << size_per_proc << " sentences/proc x " << size << " procs.   num_feats=" << x.size() << '/' << FD::NumFeats() << "   passes_thru_data=" << (titer * size_per_proc / static_cast<double>(corpus.size())) << "   eta=" << lr->eta(titer);
         const string svv = vv.str();
         cerr << svv << endl;
-        weights.WriteToFile(fname, true, &svv);
+        Weights::WriteToFile(fname, lambdas, true, &svv);
       }
 
       for (int i = 0; i < size_per_proc; ++i) {
diff --git a/training/mr_optimize_reduce.cc b/training/mr_optimize_reduce.cc
index b931991d..15e28fa1 100644
--- a/training/mr_optimize_reduce.cc
+++ b/training/mr_optimize_reduce.cc
@@ -88,25 +88,19 @@ int main(int argc, char** argv) {
 
   const bool use_b64 = conf["input_format"].as<string>() == "b64";
 
-  Weights weights;
-  weights.InitFromFile(conf["input_weights"].as<string>());
+  vector<weight_t> lambdas;
+  Weights::InitFromFile(conf["input_weights"].as<string>(), &lambdas);
   const string s_obj = "**OBJ**";
   int num_feats = FD::NumFeats();
   cerr << "Number of features: " << num_feats << endl;
   const bool gaussian_prior = conf.count("gaussian_prior");
-  vector<double> means(num_feats, 0);
+  vector<weight_t> means(num_feats, 0);
   if (conf.count("means")) {
     if (!gaussian_prior) {
       cerr << "Don't use --means without --gaussian_prior!\n";
       exit(1);
     }
-    Weights wm; 
-    wm.InitFromFile(conf["means"].as<string>());
-    if (num_feats != FD::NumFeats()) {
-      cerr << "[ERROR] Means file had unexpected features!\n";
-      exit(1);
-    }
-    wm.InitVector(&means);
+    Weights::InitFromFile(conf["means"].as<string>(), &means);
   }
   shared_ptr<BatchOptimizer> o;
   const string omethod = conf["optimization_method"].as<string>();
@@ -124,8 +118,6 @@ int main(int argc, char** argv) {
       cerr << "No state file found, assuming ITERATION 1\n";
   }
 
-  vector<double> lambdas(num_feats, 0);
-  weights.InitVector(&lambdas);
   double objective = 0;
   vector<double> gradient(num_feats, 0);
   // 0<TAB>**OBJ**=12.2;Feat1=2.3;Feat2=-0.2;
@@ -223,8 +215,7 @@ int main(int argc, char** argv) {
   old.clear();
   SanityCheck(lambdas);
   ShowLargestFeatures(lambdas);
-  weights.InitFromVector(lambdas);
-  weights.WriteToFile(conf["output_weights"].as<string>(), false);
+  Weights::WriteToFile(conf["output_weights"].as<string>(), lambdas, false);
 
   const bool conv = o->HasConverged();
   if (conv) { cerr << "OPTIMIZER REPORTS CONVERGENCE!\n"; }
diff --git a/utils/fdict.h b/utils/fdict.h
index 771e8b91..f0871b9a 100644
--- a/utils/fdict.h
+++ b/utils/fdict.h
@@ -28,6 +28,8 @@ struct FD {
   }
   static void EnableHash(const std::string& cmph_file) {
 #ifdef HAVE_CMPH
+    assert(dict_.max() == 0);  // dictionary must not have
+                               // been added to
     hash_ = new PerfectHashFunction(cmph_file);
 #endif
   }
diff --git a/utils/phmt.cc b/utils/phmt.cc
index 1f59afaf..48d9f093 100644
--- a/utils/phmt.cc
+++ b/utils/phmt.cc
@@ -19,22 +19,18 @@ int main(int argc, char** argv) {
   cerr << "LexFE = " << FD::Convert("LexFE") << endl;
   cerr << "LexEF = " << FD::Convert("LexEF") << endl;
   {
-    Weights w;
     vector<weight_t> v(FD::NumFeats());
     v[FD::Convert("LexFE")] = 1.0;
     v[FD::Convert("LexEF")] = 0.5;
-    w.InitFromVector(v);
     cerr << "Writing...\n";
-    w.WriteToFile("weights.bin");
+    Weights::WriteToFile("weights.bin", v);
     cerr << "Done.\n";
   }
   {
-    Weights w;
     vector<weight_t> v(FD::NumFeats());
     cerr << "Reading...\n";
-    w.InitFromFile("weights.bin");
+    Weights::InitFromFile("weights.bin", &v);
     cerr << "Done.\n";
-    w.InitVector(&v);
     assert(v[FD::Convert("LexFE")] == 1.0);
     assert(v[FD::Convert("LexEF")] == 0.5);
   }
diff --git a/utils/weights.cc b/utils/weights.cc
index 0916b72a..c49000be 100644
--- a/utils/weights.cc
+++ b/utils/weights.cc
@@ -8,7 +8,10 @@
 
 using namespace std;
 
-void Weights::InitFromFile(const std::string& filename, vector<string>* feature_list) {
+void Weights::InitFromFile(const string& filename,
+                           vector<weight_t>* pweights,
+                           vector<string>* feature_list) {
+  vector<weight_t>& weights = *pweights;
   if (!SILENT) cerr << "Reading weights from " << filename << endl;
   ReadFile in_file(filename);
   istream& in = *in_file.stream();
@@ -47,16 +50,16 @@ void Weights::InitFromFile(const std::string& filename, vector<string>* feature_
       int end = 0;
       while(end < buf.size() && buf[end] != ' ') ++end;
       const int fid = FD::Convert(buf.substr(start, end - start));
+      if (feature_list) { feature_list->push_back(buf.substr(start, end - start)); }
       while(end < buf.size() && buf[end] == ' ') ++end;
       val = strtod(&buf.c_str()[end], NULL);
       if (isnan(val)) {
         cerr << FD::Convert(fid) << " has weight NaN!\n";
         abort();
       }
-      if (wv_.size() <= fid)
-        wv_.resize(fid + 1);
-      wv_[fid] = val;
-      if (feature_list) { feature_list->push_back(FD::Convert(fid)); }
+      if (weights.size() <= fid)
+        weights.resize(fid + 1);
+      weights[fid] = val;
       ++weight_count;
       if (!SILENT) {
         if (weight_count %   50000 == 0) { cerr << '.' << flush; fl = true; }
@@ -76,8 +79,8 @@ void Weights::InitFromFile(const std::string& filename, vector<string>* feature_
       cerr << "Hash function reports " << FD::NumFeats() << " keys but weights file contains " << num_keys[0] << endl;
       abort();
     }
-    wv_.resize(num_keys[0]);
-    in.get(reinterpret_cast<char*>(&wv_[0]), num_keys[0] * sizeof(weight_t));
+    weights.resize(num_keys[0]);
+    in.get(reinterpret_cast<char*>(&weights[0]), num_keys[0] * sizeof(weight_t));
     if (!in.good()) {
       cerr << "Error loading weights!\n";
       abort();
@@ -85,7 +88,10 @@ void Weights::InitFromFile(const std::string& filename, vector<string>* feature_
   }
 }
 
-void Weights::WriteToFile(const std::string& fname, bool hide_zero_value_features, const string* extra) const {
+void Weights::WriteToFile(const string& fname,
+                          const vector<weight_t>& weights,
+                          bool hide_zero_value_features,
+                          const string* extra) {
   WriteFile out(fname);
   ostream& o = *out.stream();
   assert(o);
@@ -96,41 +102,54 @@ void Weights::WriteToFile(const std::string& fname, bool hide_zero_value_feature
     o.precision(17);
     const int num_feats = FD::NumFeats();
     for (int i = 1; i < num_feats; ++i) {
-      const weight_t val = (i < wv_.size() ? wv_[i] : 0.0);
+      const weight_t val = (i < weights.size() ? weights[i] : 0.0);
       if (hide_zero_value_features && val == 0.0) continue;
       o << FD::Convert(i) << ' ' << val << endl;
     }
   } else {
     o.write("_PHWf", 5);
     const size_t keys = FD::NumFeats();
-    assert(keys <= wv_.size());
+    assert(keys <= weights.size());
     o.write(reinterpret_cast<const char*>(&keys), sizeof(keys));
-    o.write(reinterpret_cast<const char*>(&wv_[0]), keys * sizeof(weight_t));
+    o.write(reinterpret_cast<const char*>(&weights[0]), keys * sizeof(weight_t));
   }
 }
 
-void Weights::InitVector(std::vector<weight_t>* w) const {
-  *w = wv_;
+void Weights::InitSparseVector(const vector<weight_t>& dv,
+                               SparseVector<weight_t>* sv) {
+  sv->clear();
+  for (unsigned i = 1; i < dv.size(); ++i) {
+    if (dv[i]) sv->set_value(i, dv[i]);
+  }
 }
 
-void Weights::InitSparseVector(SparseVector<weight_t>* w) const {
-  for (int i = 1; i < wv_.size(); ++i) {
-    const weight_t& weight = wv_[i];
-    if (weight) w->set_value(i, weight);
+void Weights::SanityCheck(const vector<weight_t>& w) {
+  for (int i = 0; i < w.size(); ++i) {
+    assert(!isnan(w[i]));
+    assert(!isinf(w[i]));
   }
 }
 
-void Weights::InitFromVector(const std::vector<weight_t>& w) {
-  wv_ = w;
-  if (wv_.size() > FD::NumFeats())
-    cerr << "WARNING: initializing weight vector has more features than the global feature dictionary!\n";
-  wv_.resize(FD::NumFeats(), 0);
-}
+struct FComp {
+  const vector<weight_t>& w_;
+  FComp(const vector<weight_t>& w) : w_(w) {}
+  bool operator()(int a, int b) const {
+    return fabs(w_[a]) > fabs(w_[b]);
+  }
+};
 
-void Weights::InitFromVector(const SparseVector<weight_t>& w) {
-  wv_.clear();
-  wv_.resize(FD::NumFeats(), 0.0);
-  for (int i = 1; i < FD::NumFeats(); ++i)
-    wv_[i] = w.value(i);
+void Weights::ShowLargestFeatures(const vector<weight_t>& w) {
+  vector<int> fnums(w.size());
+  for (int i = 0; i < w.size(); ++i)
+    fnums[i] = i;
+  vector<int>::iterator mid = fnums.begin();
+  mid += (w.size() > 10 ? 10 : w.size());
+  partial_sort(fnums.begin(), mid, fnums.end(), FComp(w));
+  cerr << "TOP FEATURES:";
+  for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) {
+    cerr << ' ' << FD::Convert(*i) << '=' << w[*i];
+  }
+  cerr << endl;
 }
 
+
diff --git a/utils/weights.h b/utils/weights.h
index 7664810b..30f71db0 100644
--- a/utils/weights.h
+++ b/utils/weights.h
@@ -10,15 +10,21 @@ typedef double weight_t;
 
 class Weights {
  public:
-  Weights() {}
-  void InitFromFile(const std::string& fname, std::vector<std::string>* feature_list = NULL);
-  void WriteToFile(const std::string& fname, bool hide_zero_value_features = true, const std::string* extra = NULL) const;
-  void InitVector(std::vector<weight_t>* w) const;
-  void InitSparseVector(SparseVector<weight_t>* w) const;
-  void InitFromVector(const std::vector<weight_t>& w);
-  void InitFromVector(const SparseVector<weight_t>& w);
+  static void InitFromFile(const std::string& fname,
+                           std::vector<weight_t>* weights,
+                           std::vector<std::string>* feature_list = NULL);
+  static void WriteToFile(const std::string& fname,
+                          const std::vector<weight_t>& weights,
+                          bool hide_zero_value_features = true,
+                          const std::string* extra = NULL);
+  static void InitSparseVector(const std::vector<weight_t>& dv,
+                               SparseVector<weight_t>* sv);
+  // check for infinities, NaNs, etc
+  static void SanityCheck(const std::vector<weight_t>& w);
+  // write weights with largest magnitude to cerr
+  static void ShowLargestFeatures(const std::vector<weight_t>& w);
  private:
-  std::vector<weight_t> wv_;
+  Weights();
 };
 
 #endif
diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc
index b84c44bc..0c094fd5 100644
--- a/vest/mr_vest_generate_mapper_input.cc
+++ b/vest/mr_vest_generate_mapper_input.cc
@@ -223,16 +223,16 @@ struct oracle_directions {
     cerr << "Forest repo: " << forest_repository << endl;
     assert(DirectoryExists(forest_repository));
     vector<string> features;
-    weights.InitFromFile(weights_file, &features);
+    vector<weight_t> dorigin;
+    Weights::InitFromFile(weights_file, &dorigin, &features);
     if (optimize_features.size())
       features=optimize_features;
-    weights.InitSparseVector(&origin);
+    Weights::InitSparseVector(dorigin, &origin);
     fids.clear();
     AddFeatureIds(features);
     oracles.resize(dev_set_size);
   }
 
-  Weights weights;
   void AddFeatureIds(vector<string> const& features) {
     int i = fids.size();
     fids.resize(fids.size()+features.size());
-- 
cgit v1.2.3