From 71daf4bf0b91a247d0d1663ae7850a3db85a378d Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Thu, 7 Jul 2011 18:39:38 -0400
Subject: support for extracting k-best derivation trees

---
 decoder/decoder.cc | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'decoder/decoder.cc')
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index ff068be9..2c3a06de 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -416,6 +416,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
         ("csplit_output_plf", "(Compound splitter) Output lattice in PLF format")
         ("csplit_preserve_full_word", "(Compound splitter) Always include the unsegmented form in the output lattice")
         ("extract_rules", po::value<string>(), "Extract the rules used in translation (de-duped) to this file")
+        ("show_derivations", po::value<string>(), "Directory to print the derivation structures to")
         ("graphviz","Show (constrained) translation forest in GraphViz format")
         ("max_translation_beam,x", po::value<int>(), "Beam approximation to get max translation from the chart")
         ("max_translation_sample,X", po::value<int>(), "Sample the max translation from the chart")
@@ -426,6 +427,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
         ("vector_format",po::value<string>()->default_value("b64"), "Sparse vector serialization format for feature expectations or gradients, includes (text or b64)")
         ("combine_size,C",po::value<int>()->default_value(1), "When option -G is used, process this many sentence pairs before writing the gradient (1=emit after every sentence pair)")
         ("forest_output,O",po::value<string>(),"Directory to write forests to");
+
   // ob.AddOptions(&opts);
 #ifdef FSA_RESCORING
   po::options_description cfgo(cfg_options.description());
@@ -677,6 +679,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
   kbest = conf.count("k_best");
   unique_kbest = conf.count("unique_k_best");
   get_oracle_forest = conf.count("get_oracle_forest");
+  oracle.show_derivation=conf.count("show_derivations");
 
 #ifdef FSA_RESCORING
   cfg_options.Validate();
@@ -938,7 +941,8 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
   } else {
     if (kbest && !has_ref) {
       //TODO: does this work properly?
-      oracle.DumpKBest(sent_id, forest, conf["k_best"].as<int>(), unique_kbest,"-");
+      const string deriv_fname = conf.count("show_derivations") ? str("show_derivations",conf) : "-";
+      oracle.DumpKBest(sent_id, forest, conf["k_best"].as<int>(), unique_kbest,"-", deriv_fname);
     } else if (csplit_output_plf) {
       cout << HypergraphIO::AsPLF(forest, false) << endl;
     } else {
@@ -1055,8 +1059,10 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
         }
       }
       if (conf.count("graphviz")) forest.PrintGraphviz();
-      if (kbest)
-        oracle.DumpKBest(sent_id, forest, conf["k_best"].as<int>(), unique_kbest,"-");
+      if (kbest) {
+        const string deriv_fname = conf.count("show_derivations") ? str("show_derivations",conf) : "-";
+        oracle.DumpKBest(sent_id, forest, conf["k_best"].as<int>(), unique_kbest,"-", deriv_fname);
+      }
       if (conf.count("show_conditional_prob")) {
         const prob_t ref_z = Inside<prob_t, EdgeProb>(forest);
         cout << (log(ref_z) - log(first_z)) << endl << flush;
-- 
cgit v1.2.3


From 3396d8de52872e47ec61be942e4b50170a789950 Mon Sep 17 00:00:00 2001
From: andrea gesmundo <andrea.gesmundo@gmail.com>
Date: Fri, 8 Jul 2011 13:56:42 +0200
Subject: add Fast Cube Pruning

---
 decoder/apply_models.cc | 196 ++++++++++++++++++++++++++++++++++++++++++++++--
 decoder/apply_models.h  |   6 +-
 decoder/decoder.cc      |  10 ++-
 3 files changed, 204 insertions(+), 8 deletions(-)

(limited to 'decoder/decoder.cc')

diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc
index 9390c809..62eff262 100644
--- a/decoder/apply_models.cc
+++ b/decoder/apply_models.cc
@@ -17,6 +17,10 @@
 #include "hg.h"
 #include "ff.h"
 
+#define NORMAL_CP 1
+#define FAST_CP 2
+#define FAST_CP_2 3
+
 using namespace std;
 using namespace std::tr1;
 
@@ -164,13 +168,15 @@ public:
                       const SentenceMetadata& sm,
                       const Hypergraph& i,
                       int pop_limit,
-                      Hypergraph* o) :
+                      Hypergraph* o,
+                      int s = NORMAL_CP ) :
       models(m),
       smeta(sm),
       in(i),
       out(*o),
       D(in.nodes_.size()),
-      pop_limit_(pop_limit) {
+      pop_limit_(pop_limit),
+      strategy_(s){
     if (!SILENT) cerr << "  Applying feature functions (cube pruning, pop_limit = " << pop_limit_ << ')' << endl;
     node_states_.reserve(kRESERVE_NUM_NODES);
   }
@@ -186,7 +192,15 @@ public:
     if (!SILENT) cerr << "    ";
     for (int i = 0; i < in.nodes_.size(); ++i) {
       if (!SILENT && i % every == 0) cerr << '.';
-      KBest(i, i == goal_id);
+      if (strategy_==NORMAL_CP){
+        KBest(i, i == goal_id);
+      }
+      if (strategy_==FAST_CP){
+        KBestFast(i, i == goal_id);
+      }
+      if (strategy_==FAST_CP_2){
+        KBestFast2(i, i == goal_id);
+      }
     }
     if (!SILENT) {
       cerr << endl;
@@ -283,6 +297,114 @@ public:
       delete freelist[i];
   }
 
+  void KBestFast(const int vert_index, const bool is_goal) {
+	  // cerr << "KBest(" << vert_index << ")\n";
+	  CandidateList& D_v = D[vert_index];
+	  assert(D_v.empty());
+	  const Hypergraph::Node& v = in.nodes_[vert_index];
+	  // cerr << " has " << v.in_edges_.size() << " in-coming edges\n";
+	  const vector<int>& in_edges = v.in_edges_;
+	  CandidateHeap cand;
+	  CandidateList freelist;
+	  cand.reserve(in_edges.size());
+	  //init with j<0,0> for all rules-edges that lead to node-(NT-span)
+	  for (int i = 0; i < in_edges.size(); ++i) {
+		  const Hypergraph::Edge& edge = in.edges_[in_edges[i]];
+		  const JVector j(edge.tail_nodes_.size(), 0);
+		  cand.push_back(new Candidate(edge, j, out, D, node_states_, smeta, models, is_goal));
+	  }
+	  // cerr << " making heap of " << cand.size() << " candidates\n";
+	  make_heap(cand.begin(), cand.end(), HeapCandCompare());
+	  State2Node state2node; // "buf" in Figure 2
+	  int pops = 0;
+	  while(!cand.empty() && pops < pop_limit_) {
+		  pop_heap(cand.begin(), cand.end(), HeapCandCompare());
+		  Candidate* item = cand.back();
+		  cand.pop_back();
+		  // cerr << "POPPED: " << *item << endl;
+
+		  PushSuccFast(*item, is_goal, &cand);
+		  IncorporateIntoPlusLMForest(item, &state2node, &freelist);
+		  ++pops;
+	  }
+	  D_v.resize(state2node.size());
+	  int c = 0;
+	  for (State2Node::iterator i = state2node.begin(); i != state2node.end(); ++i){
+		  D_v[c++] = i->second;
+		  // cerr << "MERGED: " << *i->second << endl;
+	  }
+	  //cerr <<"Node id: "<< vert_index<< endl;
+	  //#ifdef MEASURE_CA
+	  // cerr << "countInProcess (pop/tot): node id: " << vert_index << " (" << count_in_process_pop << "/" << count_in_process_tot << ")"<<endl;
+	  // cerr << "countAtEnd (pop/tot): node id: " << vert_index << " (" << count_at_end_pop << "/" << count_at_end_tot << ")"<<endl;
+	  //#endif
+	  sort(D_v.begin(), D_v.end(), EstProbSorter());
+
+	  // cerr << " expanded to " << D_v.size() << " nodes\n";
+
+	  for (int i = 0; i < cand.size(); ++i)
+		  delete cand[i];
+	  // freelist is necessary since even after an item merged, it still stays in
+	  // the unique set so it can't be deleted til now
+	  for (int i = 0; i < freelist.size(); ++i)
+		  delete freelist[i];
+  }
+
+  void KBestFast2(const int vert_index, const bool is_goal) {
+	  // cerr << "KBest(" << vert_index << ")\n";
+	  CandidateList& D_v = D[vert_index];
+	  assert(D_v.empty());
+	  const Hypergraph::Node& v = in.nodes_[vert_index];
+	  // cerr << " has " << v.in_edges_.size() << " in-coming edges\n";
+	  const vector<int>& in_edges = v.in_edges_;
+	  CandidateHeap cand;
+	  CandidateList freelist;
+	  cand.reserve(in_edges.size());
+	  UniqueCandidateSet unique_accepted;
+	  //init with j<0,0> for all rules-edges that lead to node-(NT-span)
+	  for (int i = 0; i < in_edges.size(); ++i) {
+		  const Hypergraph::Edge& edge = in.edges_[in_edges[i]];
+		  const JVector j(edge.tail_nodes_.size(), 0);
+		  cand.push_back(new Candidate(edge, j, out, D, node_states_, smeta, models, is_goal));
+	  }
+	  // cerr << " making heap of " << cand.size() << " candidates\n";
+	  make_heap(cand.begin(), cand.end(), HeapCandCompare());
+	  State2Node state2node; // "buf" in Figure 2
+	  int pops = 0;
+	  while(!cand.empty() && pops < pop_limit_) {
+		  pop_heap(cand.begin(), cand.end(), HeapCandCompare());
+		  Candidate* item = cand.back();
+		  cand.pop_back();
+		  assert(unique_accepted.insert(item).second); // these should all be unique!
+		  // cerr << "POPPED: " << *item << endl;
+
+		  PushSuccFast2(*item, is_goal, &cand, &unique_accepted);
+		  IncorporateIntoPlusLMForest(item, &state2node, &freelist);
+		  ++pops;
+	  }
+	  D_v.resize(state2node.size());
+	  int c = 0;
+	  for (State2Node::iterator i = state2node.begin(); i != state2node.end(); ++i){
+		  D_v[c++] = i->second;
+		  // cerr << "MERGED: " << *i->second << endl;
+	  }
+	  //cerr <<"Node id: "<< vert_index<< endl;
+	  //#ifdef MEASURE_CA
+	  // cerr << "countInProcess (pop/tot): node id: " << vert_index << " (" << count_in_process_pop << "/" << count_in_process_tot << ")"<<endl;
+	  // cerr << "countAtEnd (pop/tot): node id: " << vert_index << " (" << count_at_end_pop << "/" << count_at_end_tot << ")"<<endl;
+	  //#endif
+	  sort(D_v.begin(), D_v.end(), EstProbSorter());
+
+	  // cerr << " expanded to " << D_v.size() << " nodes\n";
+
+	  for (int i = 0; i < cand.size(); ++i)
+		  delete cand[i];
+	  // freelist is necessary since even after an item merged, it still stays in
+	  // the unique set so it can't be deleted til now
+	  for (int i = 0; i < freelist.size(); ++i)
+		  delete freelist[i];
+  }
+
   void PushSucc(const Candidate& item, const bool is_goal, CandidateHeap* pcand, UniqueCandidateSet* cs) {
     CandidateHeap& cand = *pcand;
     for (int i = 0; i < item.j_.size(); ++i) {
@@ -300,6 +422,54 @@ public:
     }
   }
 
+  //PushSucc following unique ancestor generation function
+  void PushSuccFast(const Candidate& item, const bool is_goal, CandidateHeap* pcand){
+	  CandidateHeap& cand = *pcand;
+	  for (int i = 0; i < item.j_.size(); ++i) {
+		  JVector j = item.j_;
+		  ++j[i];
+		  if (j[i] < D[item.in_edge_->tail_nodes_[i]].size()) {
+			  Candidate* new_cand = new Candidate(*item.in_edge_, j, out, D, node_states_, smeta, models, is_goal);
+			  cand.push_back(new_cand);
+			  push_heap(cand.begin(), cand.end(), HeapCandCompare());
+		  }
+		  if(item.j_[i]!=0){
+			  return;
+		  }
+	  }
+  }
+
+  //PushSucc only if all ancest Cand are added
+  void PushSuccFast2(const Candidate& item, const bool is_goal, CandidateHeap* pcand, UniqueCandidateSet* ps){
+	  CandidateHeap& cand = *pcand;
+	  for (int i = 0; i < item.j_.size(); ++i) {
+		  JVector j = item.j_;
+		  ++j[i];
+		  if (j[i] < D[item.in_edge_->tail_nodes_[i]].size()) {
+			  Candidate query_unique(*item.in_edge_, j);
+			  if (HasAllAncestors(&query_unique,ps)) {
+				  Candidate* new_cand = new Candidate(*item.in_edge_, j, out, D, node_states_, smeta, models, is_goal);
+				  cand.push_back(new_cand);
+				  push_heap(cand.begin(), cand.end(), HeapCandCompare());
+			  }
+		  }
+	  }
+  }
+
+  bool HasAllAncestors(const Candidate* item, UniqueCandidateSet* cs){
+	  for (int i = 0; i < item->j_.size(); ++i) {
+		  JVector j = item->j_;
+		  --j[i];
+		  if (j[i] >=0) {
+			  Candidate query_unique(*item->in_edge_, j);
+			  if (cs->count(&query_unique) == 0) {
+				  return false;
+			  }
+		  }
+	  }
+	  return true;
+  }
+
   const ModelSet& models;
   const SentenceMetadata& smeta;
   const Hypergraph& in;
@@ -311,6 +481,7 @@ public:
   FFStates node_states_;  // for each node in the out-HG what is
                              // its q function value?
   const int pop_limit_;
+ const int strategy_;       //switch Cube Pruning strategy: 1 normal, 2 fast (alg 2), 3 fast_2 (alg 3). (see: Gesmundo A., Henderson J,. Faster Cube Pruning, IWSLT 2010)
 };
 
 struct NoPruningRescorer {
@@ -412,15 +583,28 @@ void ApplyModelSet(const Hypergraph& in,
   if (models.stateless() || config.algorithm == IntersectionConfiguration::FULL) {
     NoPruningRescorer ma(models, smeta, in, out); // avoid overhead of best-first when no state
     ma.Apply();
-  } else if (config.algorithm == IntersectionConfiguration::CUBE) {
+  } else if (config.algorithm == IntersectionConfiguration::CUBE 
+             || config.algorithm == IntersectionConfiguration::FAST_CUBE_PRUNING
+             || config.algorithm == IntersectionConfiguration::FAST_CUBE_PRUNING_2) {
     int pl = config.pop_limit;
     const int max_pl_for_large=50;
     if (pl > max_pl_for_large && in.nodes_.size() > 80000) {
       pl = max_pl_for_large;
       cerr << "  Note: reducing pop_limit to " << pl << " for very large forest\n";
     }
-    CubePruningRescorer ma(models, smeta, in, pl, out);
-    ma.Apply();
+    if      (config.algorithm == IntersectionConfiguration::CUBE) {
+    	CubePruningRescorer ma(models, smeta, in, pl, out);
+        ma.Apply();
+    }
+    else if (config.algorithm == IntersectionConfiguration::FAST_CUBE_PRUNING){
+    	CubePruningRescorer ma(models, smeta, in, pl, out, FAST_CP);
+        ma.Apply();
+    }
+    else if (config.algorithm == IntersectionConfiguration::FAST_CUBE_PRUNING_2){
+    	CubePruningRescorer ma(models, smeta, in, pl, out, FAST_CP_2);
+        ma.Apply();
+    }
+
   } else {
     cerr << "Don't understand intersection algorithm " << config.algorithm << endl;
     exit(1);
diff --git a/decoder/apply_models.h b/decoder/apply_models.h
index a85694aa..19a4c7be 100644
--- a/decoder/apply_models.h
+++ b/decoder/apply_models.h
@@ -13,6 +13,8 @@ struct IntersectionConfiguration {
 enum {
   FULL,
   CUBE,
+  FAST_CUBE_PRUNING,
+  FAST_CUBE_PRUNING_2,
   N_ALGORITHMS
 };
 
@@ -25,7 +27,9 @@ enum {
 inline std::ostream& operator<<(std::ostream& os, const IntersectionConfiguration& c) {
   if (c.algorithm == 0) { os << "FULL"; }
   else if (c.algorithm == 1) { os << "CUBE:k=" << c.pop_limit; }
-  else if (c.algorithm == 2) { os << "N_ALGORITHMS"; }
+  else if (c.algorithm == 2) { os << "FAST_CUBE_PRUNING"; }
+  else if (c.algorithm == 3) { os << "FAST_CUBE_PRUNING_2"; }
+  else if (c.algorithm == 4) { os << "N_ALGORITHMS"; }
   else os << "OTHER";
   return os;
 }
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 2c3a06de..8a4a1485 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -357,7 +357,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
 
         ("weights,w",po::value<string>(),"Feature weights file (initial forest / pass 1)")
         ("feature_function,F",po::value<vector<string> >()->composing(), "Pass 1 additional feature function(s) (-L for list)")
-        ("intersection_strategy,I",po::value<string>()->default_value("cube_pruning"), "Pass 1 intersection strategy for incorporating finite-state features; values include Cube_pruning, Full")
+        ("intersection_strategy,I",po::value<string>()->default_value("cube_pruning"), "Pass 1 intersection strategy for incorporating finite-state features; values include Cube_pruning, Full, Fast_cube_pruning, Fast_cube_pruning_2")
         ("summary_feature", po::value<string>(), "Compute a 'summary feature' at the end of the pass (before any pruning) with name=arg and value=inside-outside/Z")
         ("summary_feature_type", po::value<string>()->default_value("node_risk"), "Summary feature types: node_risk, edge_risk, edge_prob")
         ("density_prune", po::value<double>(), "Pass 1 pruning: keep no more than this many times the number of edges used in the best derivation tree (>=1.0)")
@@ -597,6 +597,14 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
       if (LowercaseString(str(isn.c_str(),conf)) == "full") {
         palg = 0;
       }
+      if (LowercaseString(conf["intersection_strategy"].as<string>()) == "fast_cube_pruning") {
+        palg = 2;
+        cerr << "Using Fast Cube Pruning intersection (see Algorithm 2 described in: Gesmundo A., Henderson J,. Faster Cube Pruning, IWSLT 2010).\n";
+      }
+      if (LowercaseString(conf["intersection_strategy"].as<string>()) == "fast_cube_pruning_2") {
+        palg = 3;
+        cerr << "Using Fast Cube Pruning 2 intersection (see Algorithm 3 described in: Gesmundo A., Henderson J,. Faster Cube Pruning, IWSLT 2010).\n";
+      }
       rp.inter_conf.reset(new IntersectionConfiguration(palg, pop_limit));
     } else {
       break;  // TODO alert user if there are any future configurations
-- 
cgit v1.2.3


From ed8a6e81d87f6e917ecffc290cde0a340b6aa03b Mon Sep 17 00:00:00 2001
From: andrea gesmundo <andrea.gesmundo@gmail.com>
Date: Fri, 8 Jul 2011 15:33:47 +0200
Subject: add cp time measure (def macro)

---
 decoder/cdec.cc    |  8 ++++++++
 decoder/decoder.cc | 13 +++++++++++++
 decoder/decoder.h  | 14 ++++++++++++++
 3 files changed, 35 insertions(+)

(limited to 'decoder/decoder.cc')

diff --git a/decoder/cdec.cc b/decoder/cdec.cc
index 5c40f56e..c671af57 100644
--- a/decoder/cdec.cc
+++ b/decoder/cdec.cc
@@ -19,11 +19,19 @@ int main(int argc, char** argv) {
   assert(*in);
 
   string buf;
+#ifdef CP_TIME
+    clock_t time_cp(0);//, end_cp;
+#endif
   while(*in) {
     getline(*in, buf);
     if (buf.empty()) continue;
     decoder.Decode(buf);
   }
+#ifdef CP_TIME
+    cerr << "Time required for Cube Pruning execution: "
+    << CpTime::Get()
+    << " seconds." << "\n\n";
+#endif
   if (show_feature_dictionary) {
     int num = FD::NumFeats();
     for (int i = 1; i < num; ++i) {
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 8a4a1485..76f31352 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -46,6 +46,13 @@
 #include "cfg_options.h"
 #endif
 
+#ifdef CP_TIME
+    clock_t CpTime::time_;
+	void CpTime::Add(clock_t x){time_+=x;}
+	void CpTime::Sub(clock_t x){time_-=x;}
+	double CpTime::Get(){return (double)(time_)/CLOCKS_PER_SEC;}
+#endif
+
 static const double kMINUS_EPSILON = -1e-6;  // don't be too strict
 
 using namespace std;
@@ -806,11 +813,17 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
       Timer t("Forest rescoring:");
       rp.models->PrepareForInput(smeta);
       Hypergraph rescored_forest;
+#ifdef CP_TIME
+      CpTime::Sub(clock());
+#endif
       ApplyModelSet(forest,
                   smeta,
                   *rp.models,
                   *rp.inter_conf,
                   &rescored_forest);
+#ifdef CP_TIME
+      CpTime::Add(clock());
+#endif
       forest.swap(rescored_forest);
       forest.Reweight(cur_weights);
       if (!SILENT) forest_stats(forest,"  " + passtr +" forest",show_tree_structure,oracle.show_derivation);
diff --git a/decoder/decoder.h b/decoder/decoder.h
index 813400e3..5491369f 100644
--- a/decoder/decoder.h
+++ b/decoder/decoder.h
@@ -7,6 +7,20 @@
 #include <boost/shared_ptr.hpp>
 #include <boost/program_options/variables_map.hpp>
 
+#undef CP_TIME
+//#define CP_TIME
+#ifdef CP_TIME
+#include <time.h>
+struct CpTime{
+public:
+	static void Add(clock_t x);
+	static void Sub(clock_t x);
+	static double Get();
+private:
+    static clock_t time_;
+};
+#endif
+
 class SentenceMetadata;
 struct Hypergraph;
 struct DecoderImpl;
-- 
cgit v1.2.3


From 38a5bee71f6b49515cd105a9467ff602ff9dee64 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Tue, 13 Sep 2011 13:25:46 +0100
Subject: optional support for doing perfect hashing of feature strings to save
 lots of memory

---
 decoder/decoder.cc    |  22 ++++++++-
 utils/Makefile.am     |   9 +++-
 utils/fdict.cc        |   4 ++
 utils/fdict.h         |  36 ++++++++++++++
 utils/perfect_hash.cc |  37 ++++++++++++++
 utils/perfect_hash.h  |  24 +++++++++
 utils/phmt.cc         |  44 +++++++++++++++++
 utils/weights.cc      | 132 ++++++++++++++++++++++++++++++++++----------------
 utils/weights.h       |  14 +++---
 9 files changed, 269 insertions(+), 53 deletions(-)
 create mode 100644 utils/perfect_hash.cc
 create mode 100644 utils/perfect_hash.h
 create mode 100644 utils/phmt.cc

(limited to 'decoder/decoder.cc')

diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 76f31352..25eb2de4 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -328,6 +328,7 @@ struct DecoderImpl {
   bool write_gradient; // TODO Observer
   bool feature_expectations; // TODO Observer
   bool output_training_vector; // TODO Observer
+  bool remove_intersected_rule_annotations;
 
   static void ConvertSV(const SparseVector<prob_t>& src, SparseVector<double>* trg) {
     for (SparseVector<prob_t>::const_iterator it = src.begin(); it != src.end(); ++it)
@@ -361,6 +362,9 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
         ("grammar,g",po::value<vector<string> >()->composing(),"Either SCFG grammar file(s) or phrase tables file(s)")
         ("per_sentence_grammar_file", po::value<string>(), "Optional (and possibly not implemented) per sentence grammar file enables all per sentence grammars to be stored in a single large file and accessed by offset")
         ("list_feature_functions,L","List available feature functions")
+#ifdef HAVE_CMPH
+        ("cmph_perfect_feature_hash,h", po::value<string>(), "Load perfect hash function for features")
+#endif
 
         ("weights,w",po::value<string>(),"Feature weights file (initial forest / pass 1)")
         ("feature_function,F",po::value<vector<string> >()->composing(), "Pass 1 additional feature function(s) (-L for list)")
@@ -433,7 +437,8 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
         ("feature_expectations","Write feature expectations for all features in chart (**OBJ** will be the partition)")
         ("vector_format",po::value<string>()->default_value("b64"), "Sparse vector serialization format for feature expectations or gradients, includes (text or b64)")
         ("combine_size,C",po::value<int>()->default_value(1), "When option -G is used, process this many sentence pairs before writing the gradient (1=emit after every sentence pair)")
-        ("forest_output,O",po::value<string>(),"Directory to write forests to");
+        ("forest_output,O",po::value<string>(),"Directory to write forests to")
+        ("remove_intersected_rule_annotations", "After forced decoding is completed, remove nonterminal annotations (i.e., the source side spans)");
 
   // ob.AddOptions(&opts);
 #ifdef FSA_RESCORING
@@ -443,7 +448,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
   po::options_description clo("Command line options");
   clo.add_options()
     ("config,c", po::value<vector<string> >(&cfg_files), "Configuration file(s) - latest has priority")
-        ("help,h", "Print this help message and exit")
+        ("help,?", "Print this help message and exit")
     ("usage,u", po::value<string>(), "Describe a feature function type")
     ("compgen", "Print just option names suitable for bash command line completion builtin 'compgen'")
     ;
@@ -645,6 +650,12 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
     FD::Freeze(); // this means we can't see the feature names of not-weighted features
   }
 
+  if (conf.count("cmph_perfect_feature_hash")) {
+    cerr << "Loading perfect hash function from " << conf["cmph_perfect_feature_hash"].as<string>() << " ...\n";
+    FD::EnableHash(conf["cmph_perfect_feature_hash"].as<string>());
+    cerr << "  " << FD::NumFeats() << " features in map\n";
+  }
+
   // set up translation back end
   if (formalism == "scfg")
     translator.reset(new SCFGTranslator(conf));
@@ -695,6 +706,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
   unique_kbest = conf.count("unique_k_best");
   get_oracle_forest = conf.count("get_oracle_forest");
   oracle.show_derivation=conf.count("show_derivations");
+  remove_intersected_rule_annotations = conf.count("remove_intersected_rule_annotations");
 
 #ifdef FSA_RESCORING
   cfg_options.Validate();
@@ -1010,6 +1022,12 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
 //        if (!SILENT) cerr << "  USING UNIFORM WEIGHTS\n";
 //        for (int i = 0; i < forest.edges_.size(); ++i)
 //          forest.edges_[i].edge_prob_=prob_t::One(); }
+      if (remove_intersected_rule_annotations) {
+        for (unsigned i = 0; i < forest.edges_.size(); ++i)
+          if (forest.edges_[i].rule_ &&
+              forest.edges_[i].rule_->parent_rule_)
+            forest.edges_[i].rule_ = forest.edges_[i].rule_->parent_rule_;
+      }
       forest.Reweight(last_weights);
       if (!SILENT) forest_stats(forest,"  Constr. forest",show_tree_structure,oracle.show_derivation);
       if (!SILENT) cerr << "  Constr. VitTree: " << ViterbiFTree(forest) << endl;
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 94f9be30..c50747bf 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -1,5 +1,5 @@
-noinst_PROGRAMS = ts
-TESTS = ts
+noinst_PROGRAMS = ts phmt
+TESTS = ts phmt
 
 if HAVE_GTEST
 noinst_PROGRAMS += \
@@ -27,6 +27,11 @@ libutils_a_SOURCES = \
   verbose.cc \
   weights.cc
 
+if HAVE_CMPH
+  libutils_a_SOURCES += perfect_hash.cc
+endif
+
+phmt_SOURCES = phmt.cc
 ts_SOURCES = ts.cc
 dict_test_SOURCES = dict_test.cc
 dict_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS)
diff --git a/utils/fdict.cc b/utils/fdict.cc
index baa0b552..676c951c 100644
--- a/utils/fdict.cc
+++ b/utils/fdict.cc
@@ -9,6 +9,10 @@ using namespace std;
 Dict FD::dict_;
 bool FD::frozen_ = false;
 
+#ifdef HAVE_CMPH
+PerfectHashFunction* FD::hash_ = NULL;
+#endif
+
 std::string FD::Convert(std::vector<WordID> const& v) {
     return Convert(&*v.begin(),&*v.end());
 }
diff --git a/utils/fdict.h b/utils/fdict.h
index f9673023..771e8b91 100644
--- a/utils/fdict.h
+++ b/utils/fdict.h
@@ -1,23 +1,56 @@
 #ifndef _FDICT_H_
 #define _FDICT_H_
 
+#include "config.h"
+
+#include <iostream>
 #include <string>
 #include <vector>
 #include "dict.h"
 
+#ifdef HAVE_CMPH
+#include "perfect_hash.h"
+#include "string_to.h"
+#endif
+
 struct FD {
   // once the FD is frozen, new features not already in the
   // dictionary will return 0
   static void Freeze() {
     frozen_ = true;
   }
+  static bool UsingPerfectHashFunction() {
+#ifdef HAVE_CMPH
+    return hash_;
+#else
+    return false;
+#endif
+  }
+  static void EnableHash(const std::string& cmph_file) {
+#ifdef HAVE_CMPH
+    hash_ = new PerfectHashFunction(cmph_file);
+#endif
+  }
   static inline int NumFeats() {
+#ifdef HAVE_CMPH
+    if (hash_) return hash_->number_of_keys();
+#endif
     return dict_.max() + 1;
   }
   static inline WordID Convert(const std::string& s) {
+#ifdef HAVE_CMPH
+    if (hash_) return (*hash_)(s);
+#endif
     return dict_.Convert(s, frozen_);
   }
   static inline const std::string& Convert(const WordID& w) {
+#ifdef HAVE_CMPH
+    if (hash_) {
+      static std::string tls;
+      tls = to_string(w);
+      return tls;
+    }
+#endif
     return dict_.Convert(w);
   }
   static std::string Convert(WordID const *i,WordID const* e);
@@ -29,6 +62,9 @@ struct FD {
   static Dict dict_;
  private:
   static bool frozen_;
+#ifdef HAVE_CMPH
+  static PerfectHashFunction* hash_;
+#endif
 };
 
 #endif
diff --git a/utils/perfect_hash.cc b/utils/perfect_hash.cc
new file mode 100644
index 00000000..706e2741
--- /dev/null
+++ b/utils/perfect_hash.cc
@@ -0,0 +1,37 @@
+#include "config.h"
+
+#ifdef HAVE_CMPH
+
+#include "perfect_hash.h"
+
+#include <cstdio>
+#include <iostream>
+
+using namespace std;
+
+PerfectHashFunction::~PerfectHashFunction() {
+  cmph_destroy(mphf_);
+}
+
+PerfectHashFunction::PerfectHashFunction(const string& fname) {
+  FILE* f = fopen(fname.c_str(), "r");
+  if (!f) {
+    cerr << "Failed to open file " << fname << " for reading: cannot load hash function.\n";
+    abort();
+  }
+  mphf_ = cmph_load(f);
+  if (!mphf_) {
+    cerr << "cmph_load failed on " << fname << "!\n";
+    abort();
+  }
+}
+
+size_t PerfectHashFunction::operator()(const string& key) const {
+  return cmph_search(mphf_, &key[0], key.size());
+}
+
+size_t PerfectHashFunction::number_of_keys() const {
+  return cmph_size(mphf_);
+}
+
+#endif
diff --git a/utils/perfect_hash.h b/utils/perfect_hash.h
new file mode 100644
index 00000000..8ac11f18
--- /dev/null
+++ b/utils/perfect_hash.h
@@ -0,0 +1,24 @@
+#ifndef _PERFECT_HASH_MAP_H_
+#define _PERFECT_HASH_MAP_H_
+
+#include "config.h"
+
+#ifndef HAVE_CMPH
+#error libcmph is required to use PerfectHashFunction
+#endif
+
+#include <vector>
+#include <boost/utility.hpp>
+#include "cmph.h"
+
+class PerfectHashFunction : boost::noncopyable {
+ public:
+  explicit PerfectHashFunction(const std::string& fname);
+  ~PerfectHashFunction();
+  size_t operator()(const std::string& key) const;
+  size_t number_of_keys() const;
+ private:
+  cmph_t *mphf_;
+};
+
+#endif
diff --git a/utils/phmt.cc b/utils/phmt.cc
new file mode 100644
index 00000000..1f59afaf
--- /dev/null
+++ b/utils/phmt.cc
@@ -0,0 +1,44 @@
+#include "config.h"
+
+#ifndef HAVE_CMPH
+int main() {
+  return 0;
+}
+#else
+
+#include <iostream>
+#include "weights.h"
+#include "fdict.h"
+
+using namespace std;
+
+int main(int argc, char** argv) {
+  if (argc != 2) { cerr << "Usage: " << argv[0] << " file.mphf\n"; return 1; }
+  FD::EnableHash(argv[1]);
+  cerr << "Number of keys: " << FD::NumFeats() << endl;
+  cerr << "LexFE = " << FD::Convert("LexFE") << endl;
+  cerr << "LexEF = " << FD::Convert("LexEF") << endl;
+  {
+    Weights w;
+    vector<weight_t> v(FD::NumFeats());
+    v[FD::Convert("LexFE")] = 1.0;
+    v[FD::Convert("LexEF")] = 0.5;
+    w.InitFromVector(v);
+    cerr << "Writing...\n";
+    w.WriteToFile("weights.bin");
+    cerr << "Done.\n";
+  }
+  {
+    Weights w;
+    vector<weight_t> v(FD::NumFeats());
+    cerr << "Reading...\n";
+    w.InitFromFile("weights.bin");
+    cerr << "Done.\n";
+    w.InitVector(&v);
+    assert(v[FD::Convert("LexFE")] == 1.0);
+    assert(v[FD::Convert("LexEF")] == 0.5);
+  }
+}
+
+#endif
+
diff --git a/utils/weights.cc b/utils/weights.cc
index b994a2fe..0916b72a 100644
--- a/utils/weights.cc
+++ b/utils/weights.cc
@@ -13,40 +13,75 @@ void Weights::InitFromFile(const std::string& filename, vector<string>* feature_
   ReadFile in_file(filename);
   istream& in = *in_file.stream();
   assert(in);
-  int weight_count = 0;
-  bool fl = false;
-  string buf;
-  double val = 0;
-  while (in) {
-    getline(in, buf);
-    if (buf.size() == 0) continue;
-    if (buf[0] == '#') continue;
-    for (int i = 0; i < buf.size(); ++i)
-      if (buf[i] == '=') buf[i] = ' ';
-    int start = 0;
-    while(start < buf.size() && buf[start] == ' ') ++start;
-    int end = 0;
-    while(end < buf.size() && buf[end] != ' ') ++end;
-    const int fid = FD::Convert(buf.substr(start, end - start));
-    while(end < buf.size() && buf[end] == ' ') ++end;
-    val = strtod(&buf.c_str()[end], NULL);
-    if (isnan(val)) {
-      cerr << FD::Convert(fid) << " has weight NaN!\n";
-     abort();
+  
+  bool read_text = true;
+  if (1) {
+    ReadFile hdrrf(filename);
+    istream& hi = *hdrrf.stream();
+    assert(hi);
+    char buf[10];
+    hi.get(buf, 6);
+    assert(hi.good());
+    if (strncmp(buf, "_PHWf", 5) == 0) {
+      read_text = false;
+    }
+  }
+
+  if (read_text) {
+    int weight_count = 0;
+    bool fl = false;
+    string buf;
+    weight_t val = 0;
+    while (in) {
+      getline(in, buf);
+      if (buf.size() == 0) continue;
+      if (buf[0] == '#') continue;
+      if (buf[0] == ' ') {
+        cerr << "Weights file lines may not start with whitespace.\n" << buf << endl;
+        abort();
+      }
+      for (int i = buf.size() - 1; i > 0; --i)
+        if (buf[i] == '=' || buf[i] == '\t') { buf[i] = ' '; break; }
+      int start = 0;
+      while(start < buf.size() && buf[start] == ' ') ++start;
+      int end = 0;
+      while(end < buf.size() && buf[end] != ' ') ++end;
+      const int fid = FD::Convert(buf.substr(start, end - start));
+      while(end < buf.size() && buf[end] == ' ') ++end;
+      val = strtod(&buf.c_str()[end], NULL);
+      if (isnan(val)) {
+        cerr << FD::Convert(fid) << " has weight NaN!\n";
+        abort();
+      }
+      if (wv_.size() <= fid)
+        wv_.resize(fid + 1);
+      wv_[fid] = val;
+      if (feature_list) { feature_list->push_back(FD::Convert(fid)); }
+      ++weight_count;
+      if (!SILENT) {
+        if (weight_count %   50000 == 0) { cerr << '.' << flush; fl = true; }
+        if (weight_count % 2000000 == 0) { cerr << " [" << weight_count << "]\n"; fl = false; }
+      }
     }
-    if (wv_.size() <= fid)
-      wv_.resize(fid + 1);
-    wv_[fid] = val;
-    if (feature_list) { feature_list->push_back(FD::Convert(fid)); }
-    ++weight_count;
     if (!SILENT) {
-      if (weight_count %   50000 == 0) { cerr << '.' << flush; fl = true; }
-      if (weight_count % 2000000 == 0) { cerr << " [" << weight_count << "]\n"; fl = false; }
+      if (fl) { cerr << endl; }
+      cerr << "Loaded " << weight_count << " feature weights\n";
+    }
+  } else {   // !read_text
+    char buf[6];
+    in.get(buf, 6);
+    size_t num_keys[2];
+    in.get(reinterpret_cast<char*>(&num_keys[0]), sizeof(size_t) + 1);
+    if (num_keys[0] != FD::NumFeats()) {
+      cerr << "Hash function reports " << FD::NumFeats() << " keys but weights file contains " << num_keys[0] << endl;
+      abort();
+    }
+    wv_.resize(num_keys[0]);
+    in.get(reinterpret_cast<char*>(&wv_[0]), num_keys[0] * sizeof(weight_t));
+    if (!in.good()) {
+      cerr << "Error loading weights!\n";
+      abort();
     }
-  }
-  if (!SILENT) {
-    if (fl) { cerr << endl; }
-    cerr << "Loaded " << weight_count << " feature weights\n";
   }
 }
 
@@ -54,37 +89,48 @@ void Weights::WriteToFile(const std::string& fname, bool hide_zero_value_feature
   WriteFile out(fname);
   ostream& o = *out.stream();
   assert(o);
-  if (extra) { o << "# " << *extra << endl; }
-  o.precision(17);
-  const int num_feats = FD::NumFeats();
-  for (int i = 1; i < num_feats; ++i) {
-    const double val = (i < wv_.size() ? wv_[i] : 0.0);
-    if (hide_zero_value_features && val == 0.0) continue;
-    o << FD::Convert(i) << ' ' << val << endl;
+  bool write_text = !FD::UsingPerfectHashFunction();
+
+  if (write_text) {
+    if (extra) { o << "# " << *extra << endl; }
+    o.precision(17);
+    const int num_feats = FD::NumFeats();
+    for (int i = 1; i < num_feats; ++i) {
+      const weight_t val = (i < wv_.size() ? wv_[i] : 0.0);
+      if (hide_zero_value_features && val == 0.0) continue;
+      o << FD::Convert(i) << ' ' << val << endl;
+    }
+  } else {
+    o.write("_PHWf", 5);
+    const size_t keys = FD::NumFeats();
+    assert(keys <= wv_.size());
+    o.write(reinterpret_cast<const char*>(&keys), sizeof(keys));
+    o.write(reinterpret_cast<const char*>(&wv_[0]), keys * sizeof(weight_t));
   }
 }
 
-void Weights::InitVector(std::vector<double>* w) const {
+void Weights::InitVector(std::vector<weight_t>* w) const {
   *w = wv_;
 }
 
-void Weights::InitSparseVector(SparseVector<double>* w) const {
+void Weights::InitSparseVector(SparseVector<weight_t>* w) const {
   for (int i = 1; i < wv_.size(); ++i) {
-    const double& weight = wv_[i];
+    const weight_t& weight = wv_[i];
     if (weight) w->set_value(i, weight);
   }
 }
 
-void Weights::InitFromVector(const std::vector<double>& w) {
+void Weights::InitFromVector(const std::vector<weight_t>& w) {
   wv_ = w;
   if (wv_.size() > FD::NumFeats())
     cerr << "WARNING: initializing weight vector has more features than the global feature dictionary!\n";
   wv_.resize(FD::NumFeats(), 0);
 }
 
-void Weights::InitFromVector(const SparseVector<double>& w) {
+void Weights::InitFromVector(const SparseVector<weight_t>& w) {
   wv_.clear();
   wv_.resize(FD::NumFeats(), 0.0);
   for (int i = 1; i < FD::NumFeats(); ++i)
     wv_[i] = w.value(i);
 }
+
diff --git a/utils/weights.h b/utils/weights.h
index cc20283c..7664810b 100644
--- a/utils/weights.h
+++ b/utils/weights.h
@@ -2,21 +2,23 @@
 #define _WEIGHTS_H_
 
 #include <string>
-#include <map>
 #include <vector>
 #include "sparse_vector.h"
 
+// warning: in the future this will become float
+typedef double weight_t;
+
 class Weights {
  public:
   Weights() {}
   void InitFromFile(const std::string& fname, std::vector<std::string>* feature_list = NULL);
   void WriteToFile(const std::string& fname, bool hide_zero_value_features = true, const std::string* extra = NULL) const;
-  void InitVector(std::vector<double>* w) const;
-  void InitSparseVector(SparseVector<double>* w) const;
-  void InitFromVector(const std::vector<double>& w);
-  void InitFromVector(const SparseVector<double>& w);
+  void InitVector(std::vector<weight_t>* w) const;
+  void InitSparseVector(SparseVector<weight_t>* w) const;
+  void InitFromVector(const std::vector<weight_t>& w);
+  void InitFromVector(const SparseVector<weight_t>& w);
  private:
-  std::vector<double> wv_;
+  std::vector<weight_t> wv_;
 };
 
 #endif
-- 
cgit v1.2.3


From 251da4347ea356f799e6c227ac8cf541c0cef2f2 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Tue, 13 Sep 2011 17:36:23 +0100
Subject: get rid of bad Weights class so it no longer keeps a copy of a vector
 inside it

---
 decoder/decoder.cc                    |  64 ++++++++---------
 decoder/decoder.h                     |   9 ++-
 mira/kbest_mira.cc                    |  62 ++++-------------
 pro-train/mr_pro_map.cc               |   8 +--
 pro-train/mr_pro_reduce.cc            |  16 ++---
 training/Makefile.am                  |   8 ---
 training/augment_grammar.cc           |   4 +-
 training/collapse_weights.cc          |   6 +-
 training/compute_cllh.cc              |  23 +++---
 training/grammar_convert.cc           |   8 +--
 training/mpi_batch_optimize.cc        | 127 ++++++++--------------------------
 training/mpi_online_optimize.cc       |  69 +++++++-----------
 training/mr_optimize_reduce.cc        |  19 ++---
 utils/fdict.h                         |   2 +
 utils/phmt.cc                         |   8 +--
 utils/weights.cc                      |  75 ++++++++++++--------
 utils/weights.h                       |  22 +++---
 vest/mr_vest_generate_mapper_input.cc |   6 +-
 18 files changed, 201 insertions(+), 335 deletions(-)

(limited to 'decoder/decoder.cc')

diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 25eb2de4..4d4b6245 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -159,8 +159,7 @@ struct RescoringPass {
   shared_ptr<ModelSet> models;
   shared_ptr<IntersectionConfiguration> inter_conf;
   vector<const FeatureFunction*> ffs;
-  shared_ptr<Weights> w;      // null == use previous weights
-  vector<double> weight_vector;
+  shared_ptr<vector<weight_t> > weight_vector;
   int fid_summary;            // 0 == no summary feature
   double density_prune;       // 0 == don't density prune
   double beam_prune;          // 0 == don't beam prune
@@ -169,7 +168,7 @@ struct RescoringPass {
 ostream& operator<<(ostream& os, const RescoringPass& rp) {
   os << "[num_fn=" << rp.ffs.size();
   if (rp.inter_conf) { os << " int_alg=" << *rp.inter_conf; }
-  if (rp.w) os << " new_weights";
+  //if (rp.weight_vector.size() > 0) os << " new_weights";
   if (rp.fid_summary) os << " summary_feature=" << FD::Convert(rp.fid_summary);
   if (rp.density_prune) os << " density_prune=" << rp.density_prune;
   if (rp.beam_prune) os << " beam_prune=" << rp.beam_prune;
@@ -181,13 +180,8 @@ struct DecoderImpl {
   DecoderImpl(po::variables_map& conf, int argc, char** argv, istream* cfg);
   ~DecoderImpl();
   bool Decode(const string& input, DecoderObserver*);
-  void SetWeights(const vector<double>& weights) {
-    init_weights = weights;
-    for (int i = 0; i < rescoring_passes.size(); ++i) {
-      if (rescoring_passes[i].models)
-        rescoring_passes[i].models->SetWeights(weights);
-      rescoring_passes[i].weight_vector = weights;
-    }
+  vector<weight_t>& CurrentWeightVector() {
+    return *rescoring_passes.back().weight_vector;
   }
   void SetId(int next_sent_id) { sent_id = next_sent_id - 1; }
 
@@ -300,8 +294,7 @@ struct DecoderImpl {
   OracleBleu oracle;
   string formalism;
   shared_ptr<Translator> translator;
-  Weights w_init_weights;      // used with initial parse
-  vector<double> init_weights; // weights used with initial parse
+  shared_ptr<vector<weight_t> > init_weights; // weights used with initial parse
   vector<shared_ptr<FeatureFunction> > pffs;
 #ifdef FSA_RESCORING
   CFGOptions cfg_options;
@@ -557,13 +550,18 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
     exit(1);
   }
 
-  // load initial feature weights (and possibly freeze feature set)
-  if (conf.count("weights")) {
-    w_init_weights.InitFromFile(str("weights",conf));
-    w_init_weights.InitVector(&init_weights);
-    init_weights.resize(FD::NumFeats());
+  // load perfect hash function for features
+  if (conf.count("cmph_perfect_feature_hash")) {
+    cerr << "Loading perfect hash function from " << conf["cmph_perfect_feature_hash"].as<string>() << " ...\n";
+    FD::EnableHash(conf["cmph_perfect_feature_hash"].as<string>());
+    cerr << "  " << FD::NumFeats() << " features in map\n";
   }
 
+  // load initial feature weights (and possibly freeze feature set)
+  init_weights.reset(new vector<weight_t>);
+  if (conf.count("weights"))
+    Weights::InitFromFile(str("weights",conf), init_weights.get());
+
   // cube pruning pop-limit: we may want to configure this on a per-pass basis
   pop_limit = conf["cubepruning_pop_limit"].as<int>();
 
@@ -582,9 +580,8 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
       RescoringPass& rp = rescoring_passes.back();
       // only configure new weights if pass > 0, otherwise we reuse the initial chart weights
       if (nth_pass_condition && conf.count(ws)) {
-        rp.w.reset(new Weights);
-        rp.w->InitFromFile(str(ws.c_str(), conf));
-        rp.w->InitVector(&rp.weight_vector);
+        rp.weight_vector.reset(new vector<weight_t>());
+        Weights::InitFromFile(str(ws.c_str(), conf), rp.weight_vector.get());
       }
       bool has_stateful = false;
       if (conf.count(ff)) {
@@ -624,11 +621,15 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
   }
 
   // set up weight vectors since later phases may reuse weights from earlier phases
-  const vector<double>* prev = &init_weights;
+  shared_ptr<vector<weight_t> > prev_weights = init_weights;
   for (int pass = 0; pass < rescoring_passes.size(); ++pass) {
     RescoringPass& rp = rescoring_passes[pass];
-    if (!rp.w) { rp.weight_vector = *prev; } else { prev = &rp.weight_vector; }
-    rp.models.reset(new ModelSet(rp.weight_vector, rp.ffs));
+    if (!rp.weight_vector) {
+      rp.weight_vector = prev_weights;
+    } else {
+      prev_weights = rp.weight_vector;
+    }
+    rp.models.reset(new ModelSet(*rp.weight_vector, rp.ffs));
     string ps = "Pass1 "; ps[4] += pass;
     if (!SILENT) show_models(conf,*rp.models,ps.c_str());
   }
@@ -650,12 +651,6 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
     FD::Freeze(); // this means we can't see the feature names of not-weighted features
   }
 
-  if (conf.count("cmph_perfect_feature_hash")) {
-    cerr << "Loading perfect hash function from " << conf["cmph_perfect_feature_hash"].as<string>() << " ...\n";
-    FD::EnableHash(conf["cmph_perfect_feature_hash"].as<string>());
-    cerr << "  " << FD::NumFeats() << " features in map\n";
-  }
-
   // set up translation back end
   if (formalism == "scfg")
     translator.reset(new SCFGTranslator(conf));
@@ -685,7 +680,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
   }
   if (!fsa_ffs.empty()) {
     cerr<<"FSA: ";
-    show_all_features(fsa_ffs,init_weights,cerr,cerr,true,true);
+    show_all_features(fsa_ffs,*init_weights,cerr,cerr,true,true);
   }
 #endif
 
@@ -733,7 +728,8 @@ bool Decoder::Decode(const string& input, DecoderObserver* o) {
   if (del) delete o;
   return res;
 }
-void Decoder::SetWeights(const vector<double>& weights) { pimpl_->SetWeights(weights); }
+vector<weight_t>& Decoder::CurrentWeightVector() { return pimpl_->CurrentWeightVector(); }
+const vector<weight_t>& Decoder::CurrentWeightVector() const { return pimpl_->CurrentWeightVector(); }
 void Decoder::SetSupplementalGrammar(const std::string& grammar_string) {
   assert(pimpl_->translator->GetDecoderType() == "SCFG");
   static_cast<SCFGTranslator&>(*pimpl_->translator).SetSupplementalGrammar(grammar_string);
@@ -774,7 +770,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
   translator->ProcessMarkupHints(smeta.sgml_);
   Timer t("Translation");
   const bool translation_successful =
-    translator->Translate(to_translate, &smeta, init_weights, &forest);
+    translator->Translate(to_translate, &smeta, *init_weights, &forest);
   translator->SentenceComplete();
 
   if (!translation_successful) {
@@ -812,7 +808,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
 
   for (int pass = 0; pass < rescoring_passes.size(); ++pass) {
     const RescoringPass& rp = rescoring_passes[pass];
-    const vector<double>& cur_weights = rp.weight_vector;
+    const vector<weight_t>& cur_weights = *rp.weight_vector;
     if (!SILENT) cerr << endl << "  RESCORING PASS #" << (pass+1) << " " << rp << endl;
 #ifdef FSA_RESCORING
     cfg_options.maybe_output_source(forest);
@@ -933,7 +929,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
 #endif
   }
 
-  const vector<double>& last_weights = (rescoring_passes.empty() ? init_weights : rescoring_passes.back().weight_vector);
+  const vector<double>& last_weights = (rescoring_passes.empty() ? *init_weights : *rescoring_passes.back().weight_vector);
 
   // Oracle Rescoring
   if(get_oracle_forest) {
diff --git a/decoder/decoder.h b/decoder/decoder.h
index 5491369f..9d009ffa 100644
--- a/decoder/decoder.h
+++ b/decoder/decoder.h
@@ -7,6 +7,8 @@
 #include <boost/shared_ptr.hpp>
 #include <boost/program_options/variables_map.hpp>
 
+#include "weights.h"  // weight_t
+
 #undef CP_TIME
 //#define CP_TIME
 #ifdef CP_TIME
@@ -39,7 +41,12 @@ struct Decoder {
   Decoder(int argc, char** argv);
   Decoder(std::istream* config_file);
   bool Decode(const std::string& input, DecoderObserver* observer = NULL);
-  void SetWeights(const std::vector<double>& weights);
+
+  // access this to either *read* or *write* to the decoder's last
+  // weight vector (i.e., the weights of the finest past)
+  std::vector<weight_t>& CurrentWeightVector();
+  const std::vector<weight_t>& CurrentWeightVector() const;
+
   void SetId(int id);
   ~Decoder();
   const boost::program_options::variables_map& GetConf() const { return conf; }
diff --git a/mira/kbest_mira.cc b/mira/kbest_mira.cc
index 6918a9a1..459a5e6f 100644
--- a/mira/kbest_mira.cc
+++ b/mira/kbest_mira.cc
@@ -32,21 +32,6 @@ namespace po = boost::program_options;
 bool invert_score;
 boost::shared_ptr<MT19937> rng;
 
-void SanityCheck(const vector<double>& w) {
-  for (int i = 0; i < w.size(); ++i) {
-    assert(!isnan(w[i]));
-    assert(!isinf(w[i]));
-  }
-}
-
-struct FComp {
-  const vector<double>& w_;
-  FComp(const vector<double>& w) : w_(w) {}
-  bool operator()(int a, int b) const {
-    return fabs(w_[a]) > fabs(w_[b]);
-  }
-};
-
 void RandomPermutation(int len, vector<int>* p_ids) {
   vector<int>& ids = *p_ids;
   ids.resize(len);
@@ -58,21 +43,6 @@ void RandomPermutation(int len, vector<int>* p_ids) {
   }  
 }
 
-void ShowLargestFeatures(const vector<double>& w) {
-  vector<int> fnums(w.size());
-  for (int i = 0; i < w.size(); ++i)
-    fnums[i] = i;
-  vector<int>::iterator mid = fnums.begin();
-  mid += (w.size() > 10 ? 10 : w.size());
-  partial_sort(fnums.begin(), mid, fnums.end(), FComp(w));
-  cerr << "TOP FEATURES:";
-  --mid;
-  for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) {
-    cerr << ' ' << FD::Convert(*i) << '=' << w[*i];
-  }
-  cerr << endl;
-}
-
 bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
@@ -209,14 +179,16 @@ int main(int argc, char** argv) {
     cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n";
     return 1;
   }
-  // load initial weights
-  Weights weights;
-  weights.InitFromFile(conf["input_weights"].as<string>());
-  SparseVector<double> lambdas;
-  weights.InitSparseVector(&lambdas);
 
   ReadFile ini_rf(conf["decoder_config"].as<string>());
   Decoder decoder(ini_rf.stream());
+
+  // load initial weights
+  vector<weight_t>& dense_weights = decoder.CurrentWeightVector();
+  SparseVector<weight_t> lambdas;
+  Weights::InitFromFile(conf["input_weights"].as<string>(), &dense_weights);
+  Weights::InitSparseVector(dense_weights, &lambdas);
+
   const double max_step_size = conf["max_step_size"].as<double>();
   const double mt_metric_scale = conf["mt_metric_scale"].as<double>();
 
@@ -230,7 +202,6 @@ int main(int argc, char** argv) {
   double tot_loss = 0;
   int dots = 0;
   int cur_pass = 0;
-  vector<double> dense_weights;
   SparseVector<double> tot;
   tot += lambdas;          // initial weights
   normalizer++;            // count for initial weights
@@ -240,27 +211,22 @@ int main(int argc, char** argv) {
   vector<int> order;
   RandomPermutation(corpus.size(), &order);
   while (lcount <= max_iteration) {
-    dense_weights.clear();
-    weights.InitFromVector(lambdas);
-    weights.InitVector(&dense_weights);
-    decoder.SetWeights(dense_weights);
+    lambdas.init_vector(&dense_weights);
     if ((cur_sent * 40 / corpus.size()) > dots) { ++dots; cerr << '.'; }
     if (corpus.size() == cur_sent) {
       cerr << " [AVG METRIC LAST PASS=" << (tot_loss / corpus.size()) << "]\n";
-      ShowLargestFeatures(dense_weights);
+      Weights::ShowLargestFeatures(dense_weights);
       cur_sent = 0;
       tot_loss = 0;
       dots = 0;
       ostringstream os;
       os << "weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << ".gz";
-      weights.WriteToFile(os.str(), true, &msg);
       SparseVector<double> x = tot;
       x /= normalizer;
       ostringstream sa;
       sa << "weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "-avg.gz";
-      Weights ww;
-      ww.InitFromVector(x);
-      ww.WriteToFile(sa.str(), true, &msga);
+      x.init_vector(&dense_weights);
+      Weights::WriteToFile(os.str(), dense_weights, true, &msg);
       ++cur_pass;
       RandomPermutation(corpus.size(), &order);
     }
@@ -294,11 +260,11 @@ int main(int argc, char** argv) {
     ++cur_sent;
   }
   cerr << endl;
-  weights.WriteToFile("weights.mira-final.gz", true, &msg);
+  Weights::WriteToFile("weights.mira-final.gz", dense_weights, true, &msg);
   tot /= normalizer;
-  weights.InitFromVector(tot);
+  tot.init_vector(dense_weights);
   msg = "# MIRA tuned weights (averaged vector)";
-  weights.WriteToFile("weights.mira-final-avg.gz", true, &msg);
+  Weights::WriteToFile("weights.mira-final-avg.gz", dense_weights, true, &msg);
   cerr << "Optimization complete.\nAVERAGED WEIGHTS: weights.mira-final-avg.gz\n";
   return 0;
 }
diff --git a/pro-train/mr_pro_map.cc b/pro-train/mr_pro_map.cc
index 4324e8de..bc59285b 100644
--- a/pro-train/mr_pro_map.cc
+++ b/pro-train/mr_pro_map.cc
@@ -301,12 +301,8 @@ int main(int argc, char** argv) {
   const unsigned gamma = conf["candidate_pairs"].as<unsigned>();
   const unsigned xi = conf["best_pairs"].as<unsigned>();
   string weightsf = conf["weights"].as<string>();
-  vector<double> weights;
-  {
-    Weights w;
-    w.InitFromFile(weightsf);
-    w.InitVector(&weights);
-  }
+  vector<weight_t> weights;
+  Weights::InitFromFile(weightsf, &weights);
   string kbest_repo = conf["kbest_repository"].as<string>();
   MkDirP(kbest_repo);
   while(in) {
diff --git a/pro-train/mr_pro_reduce.cc b/pro-train/mr_pro_reduce.cc
index 9b422f33..9caaa1d1 100644
--- a/pro-train/mr_pro_reduce.cc
+++ b/pro-train/mr_pro_reduce.cc
@@ -194,7 +194,7 @@ int main(int argc, char** argv) {
   InitCommandLine(argc, argv, &conf);
   string line;
   vector<pair<bool, SparseVector<double> > > training, testing;
-  SparseVector<double> old_weights;
+  SparseVector<weight_t> old_weights;
   const bool tune_regularizer = conf.count("tune_regularizer");
   if (tune_regularizer && !conf.count("testset")) {
     cerr << "--tune_regularizer requires --testset to be set\n";
@@ -210,9 +210,9 @@ int main(int argc, char** argv) {
   const double psi = conf["interpolation"].as<double>();
   if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; }
   if (conf.count("weights")) {
-    Weights w;
-    w.InitFromFile(conf["weights"].as<string>());
-    w.InitSparseVector(&old_weights);
+    vector<weight_t> dt;
+    Weights::InitFromFile(conf["weights"].as<string>(), &dt);
+    Weights::InitSparseVector(dt, &old_weights);
   }
   ReadCorpus(&cin, &training);
   if (conf.count("testset")) {
@@ -220,8 +220,8 @@ int main(int argc, char** argv) {
     ReadCorpus(rf.stream(), &testing);
   }
   cerr << "Number of features: " << FD::NumFeats() << endl;
-  vector<double> x(FD::NumFeats(), 0.0);  // x[0] is bias
-  for (SparseVector<double>::const_iterator it = old_weights.begin();
+  vector<weight_t> x(FD::NumFeats(), 0.0);  // x[0] is bias
+  for (SparseVector<weight_t>::const_iterator it = old_weights.begin();
        it != old_weights.end(); ++it)
     x[it->first] = it->second;
   double tppl = 0.0;
@@ -257,7 +257,6 @@ int main(int argc, char** argv) {
     sigsq = sp[best_i].first;
     tppl = LearnParameters(training, testing, sigsq, conf["memory_buffers"].as<unsigned>(), &x);
   }
-  Weights w;
   if (conf.count("weights")) {
     for (int i = 1; i < x.size(); ++i)
       x[i] = (x[i] * psi) + old_weights.get(i) * (1.0 - psi);
@@ -271,7 +270,6 @@ int main(int argc, char** argv) {
       cout << "# " << sp[i].first << "\t" << sp[i].second << "\t" << smoothed[i] << endl;
     }
   }
-  w.InitFromVector(x);
-  w.WriteToFile("-");
+  Weights::WriteToFile("-", x);
   return 0;
 }
diff --git a/training/Makefile.am b/training/Makefile.am
index e075e417..6e2c06f5 100644
--- a/training/Makefile.am
+++ b/training/Makefile.am
@@ -12,9 +12,7 @@ bin_PROGRAMS = \
   cllh_filter_grammar \
   mpi_online_optimize \
   mpi_batch_optimize \
-  mpi_em_optimize \
   compute_cllh \
-  feature_expectations \
   augment_grammar
 
 noinst_PROGRAMS = \
@@ -29,12 +27,6 @@ mpi_online_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval
 mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc optimize.cc
 mpi_batch_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
 
-feature_expectations_SOURCES = feature_expectations.cc
-feature_expectations_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-mpi_em_optimize_SOURCES = mpi_em_optimize.cc optimize.cc
-mpi_em_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
 compute_cllh_SOURCES = compute_cllh.cc
 compute_cllh_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
 
diff --git a/training/augment_grammar.cc b/training/augment_grammar.cc
index df8d4ee8..e89a92d5 100644
--- a/training/augment_grammar.cc
+++ b/training/augment_grammar.cc
@@ -134,9 +134,7 @@ int main(int argc, char** argv) {
   } else { ngram = NULL; }
   extra_feature = conf.count("extra_lex_feature") > 0;
   if (conf.count("collapse_weights")) {
-    Weights w;
-    w.InitFromFile(conf["collapse_weights"].as<string>());
-    w.InitVector(&col_weights);
+    Weights::InitFromFile(conf["collapse_weights"].as<string>(), &col_weights);
   }
   clear_features = conf.count("clear_features_after_collapse") > 0;
   gather_rules = false;
diff --git a/training/collapse_weights.cc b/training/collapse_weights.cc
index 4fb742fb..dc480f6c 100644
--- a/training/collapse_weights.cc
+++ b/training/collapse_weights.cc
@@ -59,10 +59,8 @@ int main(int argc, char** argv) {
   InitCommandLine(argc, argv, &conf);
   const string wfile = conf["weights"].as<string>();
   const string gfile = conf["grammar"].as<string>();
-  Weights wm;
-  wm.InitFromFile(wfile);
-  vector<double> w;
-  wm.InitVector(&w);
+  vector<weight_t> w;
+  Weights::InitFromFile(wfile, &w);
   MarginalMap e_tots;
   MarginalMap f_tots;
   prob_t tot;
diff --git a/training/compute_cllh.cc b/training/compute_cllh.cc
index 332f6d0c..b496d196 100644
--- a/training/compute_cllh.cc
+++ b/training/compute_cllh.cc
@@ -148,15 +148,6 @@ int main(int argc, char** argv) {
   if (!InitCommandLine(argc, argv, &conf))
     return false;
 
-  // load initial weights
-  Weights weights;
-  if (conf.count("weights"))
-    weights.InitFromFile(conf["weights"].as<string>());
-
-  // freeze feature set
-  //const bool freeze_feature_set = conf.count("freeze_feature_set");
-  //if (freeze_feature_set) FD::Freeze();
-
   // load cdec.ini and set up decoder
   ReadFile ini_rf(conf["decoder_config"].as<string>());
   Decoder decoder(ini_rf.stream());
@@ -165,17 +156,22 @@ int main(int argc, char** argv) {
     abort();
   }
 
+  // load weights
+  vector<weight_t>& weights = decoder.CurrentWeightVector();
+  if (conf.count("weights"))
+    Weights::InitFromFile(conf["weights"].as<string>(), &weights);
+
+  // freeze feature set
+  //const bool freeze_feature_set = conf.count("freeze_feature_set");
+  //if (freeze_feature_set) FD::Freeze();
+
   vector<string> corpus; vector<int> ids;
   ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus, &ids);
   assert(corpus.size() > 0);
   assert(corpus.size() == ids.size());
 
-  vector<double> wv;
-  weights.InitVector(&wv);
-  decoder.SetWeights(wv);
   TrainingObserver observer;
   double objective = 0;
-  bool converged = false;
 
   observer.Reset();
   if (rank == 0)
@@ -197,3 +193,4 @@ int main(int argc, char** argv) {
 
   return 0;
 }
+
diff --git a/training/grammar_convert.cc b/training/grammar_convert.cc
index 8d292f8a..bf8abb26 100644
--- a/training/grammar_convert.cc
+++ b/training/grammar_convert.cc
@@ -251,12 +251,10 @@ int main(int argc, char **argv) {
   const bool is_split_input = (conf["format"].as<string>() == "split");
   const bool is_json_input = is_split_input || (conf["format"].as<string>() == "json");
   const bool collapse_weights = conf.count("collapse_weights");
-  Weights wts;
   vector<double> w;
-  if (conf.count("weights")) {
-    wts.InitFromFile(conf["weights"].as<string>());
-    wts.InitVector(&w);
-  }
+  if (conf.count("weights"))
+    Weights::InitFromFile(conf["weights"].as<string>(), &w);
+
   if (collapse_weights && !w.size()) {
     cerr << "--collapse_weights requires a weights file to be specified!\n";
     exit(1);
diff --git a/training/mpi_batch_optimize.cc b/training/mpi_batch_optimize.cc
index 39a8af7d..cc5953f6 100644
--- a/training/mpi_batch_optimize.cc
+++ b/training/mpi_batch_optimize.cc
@@ -31,42 +31,12 @@ using namespace std;
 using boost::shared_ptr;
 namespace po = boost::program_options;
 
-void SanityCheck(const vector<double>& w) {
-  for (int i = 0; i < w.size(); ++i) {
-    assert(!isnan(w[i]));
-    assert(!isinf(w[i]));
-  }
-}
-
-struct FComp {
-  const vector<double>& w_;
-  FComp(const vector<double>& w) : w_(w) {}
-  bool operator()(int a, int b) const {
-    return fabs(w_[a]) > fabs(w_[b]);
-  }
-};
-
-void ShowLargestFeatures(const vector<double>& w) {
-  vector<int> fnums(w.size());
-  for (int i = 0; i < w.size(); ++i)
-    fnums[i] = i;
-  vector<int>::iterator mid = fnums.begin();
-  mid += (w.size() > 10 ? 10 : w.size());
-  partial_sort(fnums.begin(), mid, fnums.end(), FComp(w));
-  cerr << "TOP FEATURES:";
-  for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) {
-    cerr << ' ' << FD::Convert(*i) << '=' << w[*i];
-  }
-  cerr << endl;
-}
-
 bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
         ("input_weights,w",po::value<string>(),"Input feature weights file")
         ("training_data,t",po::value<string>(),"Training data")
         ("decoder_config,d",po::value<string>(),"Decoder configuration file")
-        ("sharded_input,s",po::value<string>(), "Corpus and grammar files are 'sharded' so each processor loads its own input and grammar file. Argument is the directory containing the shards.")
         ("output_weights,o",po::value<string>()->default_value("-"),"Output feature weights file")
         ("optimization_method,m", po::value<string>()->default_value("lbfgs"), "Optimization method (sgd, lbfgs, rprop)")
 	("correction_buffers,M", po::value<int>()->default_value(10), "Number of gradients for LBFGS to maintain in memory")
@@ -88,14 +58,10 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   }
   po::notify(*conf);
 
-  if (conf->count("help") || !conf->count("input_weights") || !(conf->count("training_data") | conf->count("sharded_input")) || !conf->count("decoder_config")) {
+  if (conf->count("help") || !conf->count("input_weights") || !(conf->count("training_data")) || !conf->count("decoder_config")) {
     cerr << dcmdline_options << endl;
     return false;
   }
-  if (conf->count("training_data") && conf->count("sharded_input")) {
-    cerr << "Cannot specify both --training_data and --sharded_input\n";
-    return false;
-  }
   return true;
 }
 
@@ -236,42 +202,9 @@ int main(int argc, char** argv) {
   po::variables_map conf;
   if (!InitCommandLine(argc, argv, &conf)) return 1;
 
-  string shard_dir;
-  if (conf.count("sharded_input")) {
-    shard_dir = conf["sharded_input"].as<string>();
-    if (!DirectoryExists(shard_dir)) {
-      if (rank == 0) cerr << "Can't find shard directory: " << shard_dir << endl;
-      return 1;
-    }
-    if (rank == 0)
-      cerr << "Shard directory: " << shard_dir << endl;
-  }
-
-  // load initial weights
-  Weights weights;
-  if (rank == 0) { cerr << "Loading weights...\n"; }
-  weights.InitFromFile(conf["input_weights"].as<string>());
-  if (rank == 0) { cerr << "Done loading weights.\n"; }
-
-  // freeze feature set (should be optional?)
-  const bool freeze_feature_set = true;
-  if (freeze_feature_set) FD::Freeze();
-
   // load cdec.ini and set up decoder
   vector<string> cdec_ini;
   ReadConfig(conf["decoder_config"].as<string>(), &cdec_ini);
-  if (shard_dir.size()) {
-    if (rank == 0) {
-      for (int i = 0; i < cdec_ini.size(); ++i) {
-        if (cdec_ini[i].find("grammar=") == 0) {
-          cerr << "!!! using sharded input and " << conf["decoder_config"].as<string>() << " contains a grammar specification:\n" << cdec_ini[i] << "\n  VERIFY THAT THIS IS CORRECT!\n";
-        }
-      }
-    }
-    ostringstream g;
-    g << "grammar=" << shard_dir << "/grammar." << rank << "_of_" << size << ".gz";
-    cdec_ini.push_back(g.str());
-  }
   istringstream ini;
   StoreConfig(cdec_ini, &ini);
   if (rank == 0) cerr << "Loading grammar...\n";
@@ -282,22 +215,28 @@ int main(int argc, char** argv) {
   }
   if (rank == 0) cerr << "Done loading grammar!\n";
 
+  // load initial weights
+  if (rank == 0) { cerr << "Loading weights...\n"; }
+  vector<weight_t>& lambdas = decoder->CurrentWeightVector();
+  Weights::InitFromFile(conf["input_weights"].as<string>(), &lambdas);
+  if (rank == 0) { cerr << "Done loading weights.\n"; }
+
+  // freeze feature set (should be optional?)
+  const bool freeze_feature_set = true;
+  if (freeze_feature_set) FD::Freeze();
+
   const int num_feats = FD::NumFeats();
   if (rank == 0) cerr << "Number of features: " << num_feats << endl;
+  lambdas.resize(num_feats);
+
   const bool gaussian_prior = conf.count("gaussian_prior");
-  vector<double> means(num_feats, 0);
+  vector<weight_t> means(num_feats, 0);
   if (conf.count("means")) {
     if (!gaussian_prior) {
       cerr << "Don't use --means without --gaussian_prior!\n";
       exit(1);
     }
-    Weights wm; 
-    wm.InitFromFile(conf["means"].as<string>());
-    if (num_feats != FD::NumFeats()) {
-      cerr << "[ERROR] Means file had unexpected features!\n";
-      exit(1);
-    }
-    wm.InitVector(&means);
+    Weights::InitFromFile(conf["means"].as<string>(), &means);
   }
   shared_ptr<BatchOptimizer> o;
   if (rank == 0) {
@@ -309,26 +248,13 @@ int main(int argc, char** argv) {
     cerr << "Optimizer: " << o->Name() << endl;
   }
   double objective = 0;
-  vector<double> lambdas(num_feats, 0.0);
-  weights.InitVector(&lambdas);
-  if (lambdas.size() != num_feats) {
-    cerr << "Initial weights file did not have all features specified!\n  feats="
-         << num_feats << "\n  weights file=" << lambdas.size() << endl;
-    lambdas.resize(num_feats, 0.0);
-  }
   vector<double> gradient(num_feats, 0.0);
-  vector<double> rcv_grad(num_feats, 0.0);
+  vector<double> rcv_grad;
+  rcv_grad.clear();
   bool converged = false;
 
   vector<string> corpus;
-  if (shard_dir.size()) {
-    ostringstream os; os << shard_dir << "/corpus." << rank << "_of_" << size;
-    ReadTrainingCorpus(os.str(), 0, 1, &corpus);
-    cerr << os.str() << " has " << corpus.size() << " training examples. " << endl;
-    if (corpus.size() > 500) { corpus.resize(500); cerr << "  TRUNCATING\n"; }
-  } else {
-    ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus);
-  }
+  ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus);
   assert(corpus.size() > 0);
 
   TrainingObserver observer;
@@ -341,19 +267,20 @@ int main(int argc, char** argv) {
     if (rank == 0) {
       cerr << "Starting decoding... (~" << corpus.size() << " sentences / proc)\n";
     }
-    decoder->SetWeights(lambdas);
     for (int i = 0; i < corpus.size(); ++i)
       decoder->Decode(corpus[i], &observer);
     cerr << "  process " << rank << '/' << size << " done\n";
     fill(gradient.begin(), gradient.end(), 0);
-    fill(rcv_grad.begin(), rcv_grad.end(), 0);
     observer.SetLocalGradientAndObjective(&gradient, &objective);
 
     double to = 0;
 #ifdef HAVE_MPI
+    rcv_grad.resize(num_feats, 0.0);
     mpi::reduce(world, &gradient[0], gradient.size(), &rcv_grad[0], plus<double>(), 0);
-    mpi::reduce(world, objective, to, plus<double>(), 0);
     swap(gradient, rcv_grad);
+    rcv_grad.clear();
+
+    mpi::reduce(world, objective, to, plus<double>(), 0);
     objective = to;
 #endif
 
@@ -378,7 +305,7 @@ int main(int argc, char** argv) {
       for (int i = 0; i < gradient.size(); ++i)
         gnorm += gradient[i] * gradient[i];
       cerr << "  GNORM=" << sqrt(gnorm) << endl;
-      vector<double> old = lambdas;
+      vector<weight_t> old = lambdas;
       int c = 0;
       while (old == lambdas) {
         ++c;
@@ -387,9 +314,8 @@ int main(int argc, char** argv) {
         assert(c < 5);
       }
       old.clear();
-      SanityCheck(lambdas);
-      ShowLargestFeatures(lambdas);
-      weights.InitFromVector(lambdas);
+      Weights::SanityCheck(lambdas);
+      Weights::ShowLargestFeatures(lambdas);
 
       converged = o->HasConverged();
       if (converged) { cerr << "OPTIMIZER REPORTS CONVERGENCE!\n"; }
@@ -399,7 +325,7 @@ int main(int argc, char** argv) {
       ostringstream vv;
       vv << "Objective = " << objective << "  (eval count=" << o->EvaluationCount() << ")";
       const string svv = vv.str();
-      weights.WriteToFile(fname, true, &svv);
+      Weights::WriteToFile(fname, lambdas, true, &svv);
     }  // rank == 0
     int cint = converged;
 #ifdef HAVE_MPI
@@ -411,3 +337,4 @@ int main(int argc, char** argv) {
   }
   return 0;
 }
+
diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc
index 32033c19..2ef4a2e7 100644
--- a/training/mpi_online_optimize.cc
+++ b/training/mpi_online_optimize.cc
@@ -31,35 +31,6 @@ namespace mpi = boost::mpi;
 using namespace std;
 namespace po = boost::program_options;
 
-void SanityCheck(const vector<double>& w) {
-  for (int i = 0; i < w.size(); ++i) {
-    assert(!isnan(w[i]));
-    assert(!isinf(w[i]));
-  }
-}
-
-struct FComp {
-  const vector<double>& w_;
-  FComp(const vector<double>& w) : w_(w) {}
-  bool operator()(int a, int b) const {
-    return fabs(w_[a]) > fabs(w_[b]);
-  }
-};
-
-void ShowLargestFeatures(const vector<double>& w) {
-  vector<int> fnums(w.size());
-  for (int i = 0; i < w.size(); ++i)
-    fnums[i] = i;
-  vector<int>::iterator mid = fnums.begin();
-  mid += (w.size() > 10 ? 10 : w.size());
-  partial_sort(fnums.begin(), mid, fnums.end(), FComp(w));
-  cerr << "TOP FEATURES:";
-  for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) {
-    cerr << ' ' << FD::Convert(*i) << '=' << w[*i];
-  }
-  cerr << endl;
-}
-
 bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
@@ -250,10 +221,25 @@ int main(int argc, char** argv) {
   if (!InitCommandLine(argc, argv, &conf))
     return 1;
 
+  vector<pair<string, int> > agenda;
+  if (!LoadAgenda(conf["training_agenda"].as<string>(), &agenda))
+    return 1;
+  if (rank == 0)
+    cerr << "Loaded agenda defining " << agenda.size() << " training epochs\n";
+
+  assert(agenda.size() > 0);
+
+  if (1) {  // hack to load the feature hash functions -- TODO this should not be in cdec.ini
+    const string& cur_config = agenda[0].first;
+    const unsigned max_iteration = agenda[0].second;
+    ReadFile ini_rf(cur_config);
+    Decoder decoder(ini_rf.stream());
+  }
+
   // load initial weights
-  Weights weights;
+  vector<weight_t> init_weights;
   if (conf.count("input_weights"))
-    weights.InitFromFile(conf["input_weights"].as<string>());
+    Weights::InitFromFile(conf["input_weights"].as<string>(), &init_weights);
 
   vector<int> frozen_fids;
   if (conf.count("frozen_features")) {
@@ -310,19 +296,12 @@ int main(int argc, char** argv) {
     rng.reset(new MT19937);
 
   SparseVector<double> x;
-  weights.InitSparseVector(&x);
+  Weights::InitSparseVector(init_weights, &x);
   TrainingObserver observer;
 
   int write_weights_every_ith = 100; // TODO configure
   int titer = -1;
 
-  vector<pair<string, int> > agenda;
-  if (!LoadAgenda(conf["training_agenda"].as<string>(), &agenda))
-    return 1;
-  if (rank == 0)
-    cerr << "Loaded agenda defining " << agenda.size() << " training epochs\n";
-
-  vector<double> lambdas;
   for (int ai = 0; ai < agenda.size(); ++ai) {
     const string& cur_config = agenda[ai].first;
     const unsigned max_iteration = agenda[ai].second;
@@ -331,6 +310,8 @@ int main(int argc, char** argv) {
     // load cdec.ini and set up decoder
     ReadFile ini_rf(cur_config);
     Decoder decoder(ini_rf.stream());
+    vector<weight_t>& lambdas = decoder.CurrentWeightVector();
+    if (ai == 0) { lambdas.swap(init_weights); init_weights.clear(); }
 
     if (rank == 0)
       o->ResetEpoch(); // resets the learning rate-- TODO is this good?
@@ -341,15 +322,13 @@ int main(int argc, char** argv) {
 #ifdef HAVE_MPI
       mpi::timer timer;
 #endif
-      weights.InitFromVector(x);
-      weights.InitVector(&lambdas);
+      x.init_vector(&lambdas);
       ++iter; ++titer;
       observer.Reset();
-      decoder.SetWeights(lambdas);
       if (rank == 0) {
         converged = (iter == max_iteration);
-        SanityCheck(lambdas);
-        ShowLargestFeatures(lambdas);
+        Weights::SanityCheck(lambdas);
+        Weights::ShowLargestFeatures(lambdas);
         string fname = "weights.cur.gz";
         if (iter % write_weights_every_ith == 0) {
           ostringstream o; o << "weights.epoch_" << (ai+1) << '.' << iter << ".gz";
@@ -360,7 +339,7 @@ int main(int argc, char** argv) {
         vv << "total iter=" << titer << " (of current config iter=" << iter << ")  minibatch=" << size_per_proc << " sentences/proc x " << size << " procs.   num_feats=" << x.size() << '/' << FD::NumFeats() << "   passes_thru_data=" << (titer * size_per_proc / static_cast<double>(corpus.size())) << "   eta=" << lr->eta(titer);
         const string svv = vv.str();
         cerr << svv << endl;
-        weights.WriteToFile(fname, true, &svv);
+        Weights::WriteToFile(fname, lambdas, true, &svv);
       }
 
       for (int i = 0; i < size_per_proc; ++i) {
diff --git a/training/mr_optimize_reduce.cc b/training/mr_optimize_reduce.cc
index b931991d..15e28fa1 100644
--- a/training/mr_optimize_reduce.cc
+++ b/training/mr_optimize_reduce.cc
@@ -88,25 +88,19 @@ int main(int argc, char** argv) {
 
   const bool use_b64 = conf["input_format"].as<string>() == "b64";
 
-  Weights weights;
-  weights.InitFromFile(conf["input_weights"].as<string>());
+  vector<weight_t> lambdas;
+  Weights::InitFromFile(conf["input_weights"].as<string>(), &lambdas);
   const string s_obj = "**OBJ**";
   int num_feats = FD::NumFeats();
   cerr << "Number of features: " << num_feats << endl;
   const bool gaussian_prior = conf.count("gaussian_prior");
-  vector<double> means(num_feats, 0);
+  vector<weight_t> means(num_feats, 0);
   if (conf.count("means")) {
     if (!gaussian_prior) {
       cerr << "Don't use --means without --gaussian_prior!\n";
       exit(1);
     }
-    Weights wm; 
-    wm.InitFromFile(conf["means"].as<string>());
-    if (num_feats != FD::NumFeats()) {
-      cerr << "[ERROR] Means file had unexpected features!\n";
-      exit(1);
-    }
-    wm.InitVector(&means);
+    Weights::InitFromFile(conf["means"].as<string>(), &means);
   }
   shared_ptr<BatchOptimizer> o;
   const string omethod = conf["optimization_method"].as<string>();
@@ -124,8 +118,6 @@ int main(int argc, char** argv) {
       cerr << "No state file found, assuming ITERATION 1\n";
   }
 
-  vector<double> lambdas(num_feats, 0);
-  weights.InitVector(&lambdas);
   double objective = 0;
   vector<double> gradient(num_feats, 0);
   // 0<TAB>**OBJ**=12.2;Feat1=2.3;Feat2=-0.2;
@@ -223,8 +215,7 @@ int main(int argc, char** argv) {
   old.clear();
   SanityCheck(lambdas);
   ShowLargestFeatures(lambdas);
-  weights.InitFromVector(lambdas);
-  weights.WriteToFile(conf["output_weights"].as<string>(), false);
+  Weights::WriteToFile(conf["output_weights"].as<string>(), lambdas, false);
 
   const bool conv = o->HasConverged();
   if (conv) { cerr << "OPTIMIZER REPORTS CONVERGENCE!\n"; }
diff --git a/utils/fdict.h b/utils/fdict.h
index 771e8b91..f0871b9a 100644
--- a/utils/fdict.h
+++ b/utils/fdict.h
@@ -28,6 +28,8 @@ struct FD {
   }
   static void EnableHash(const std::string& cmph_file) {
 #ifdef HAVE_CMPH
+    assert(dict_.max() == 0);  // dictionary must not have
+                               // been added to
     hash_ = new PerfectHashFunction(cmph_file);
 #endif
   }
diff --git a/utils/phmt.cc b/utils/phmt.cc
index 1f59afaf..48d9f093 100644
--- a/utils/phmt.cc
+++ b/utils/phmt.cc
@@ -19,22 +19,18 @@ int main(int argc, char** argv) {
   cerr << "LexFE = " << FD::Convert("LexFE") << endl;
   cerr << "LexEF = " << FD::Convert("LexEF") << endl;
   {
-    Weights w;
     vector<weight_t> v(FD::NumFeats());
     v[FD::Convert("LexFE")] = 1.0;
     v[FD::Convert("LexEF")] = 0.5;
-    w.InitFromVector(v);
     cerr << "Writing...\n";
-    w.WriteToFile("weights.bin");
+    Weights::WriteToFile("weights.bin", v);
     cerr << "Done.\n";
   }
   {
-    Weights w;
     vector<weight_t> v(FD::NumFeats());
     cerr << "Reading...\n";
-    w.InitFromFile("weights.bin");
+    Weights::InitFromFile("weights.bin", &v);
     cerr << "Done.\n";
-    w.InitVector(&v);
     assert(v[FD::Convert("LexFE")] == 1.0);
     assert(v[FD::Convert("LexEF")] == 0.5);
   }
diff --git a/utils/weights.cc b/utils/weights.cc
index 0916b72a..c49000be 100644
--- a/utils/weights.cc
+++ b/utils/weights.cc
@@ -8,7 +8,10 @@
 
 using namespace std;
 
-void Weights::InitFromFile(const std::string& filename, vector<string>* feature_list) {
+void Weights::InitFromFile(const string& filename,
+                           vector<weight_t>* pweights,
+                           vector<string>* feature_list) {
+  vector<weight_t>& weights = *pweights;
   if (!SILENT) cerr << "Reading weights from " << filename << endl;
   ReadFile in_file(filename);
   istream& in = *in_file.stream();
@@ -47,16 +50,16 @@ void Weights::InitFromFile(const std::string& filename, vector<string>* feature_
       int end = 0;
       while(end < buf.size() && buf[end] != ' ') ++end;
       const int fid = FD::Convert(buf.substr(start, end - start));
+      if (feature_list) { feature_list->push_back(buf.substr(start, end - start)); }
       while(end < buf.size() && buf[end] == ' ') ++end;
       val = strtod(&buf.c_str()[end], NULL);
       if (isnan(val)) {
         cerr << FD::Convert(fid) << " has weight NaN!\n";
         abort();
       }
-      if (wv_.size() <= fid)
-        wv_.resize(fid + 1);
-      wv_[fid] = val;
-      if (feature_list) { feature_list->push_back(FD::Convert(fid)); }
+      if (weights.size() <= fid)
+        weights.resize(fid + 1);
+      weights[fid] = val;
       ++weight_count;
       if (!SILENT) {
         if (weight_count %   50000 == 0) { cerr << '.' << flush; fl = true; }
@@ -76,8 +79,8 @@ void Weights::InitFromFile(const std::string& filename, vector<string>* feature_
       cerr << "Hash function reports " << FD::NumFeats() << " keys but weights file contains " << num_keys[0] << endl;
       abort();
     }
-    wv_.resize(num_keys[0]);
-    in.get(reinterpret_cast<char*>(&wv_[0]), num_keys[0] * sizeof(weight_t));
+    weights.resize(num_keys[0]);
+    in.get(reinterpret_cast<char*>(&weights[0]), num_keys[0] * sizeof(weight_t));
     if (!in.good()) {
       cerr << "Error loading weights!\n";
       abort();
@@ -85,7 +88,10 @@ void Weights::InitFromFile(const std::string& filename, vector<string>* feature_
   }
 }
 
-void Weights::WriteToFile(const std::string& fname, bool hide_zero_value_features, const string* extra) const {
+void Weights::WriteToFile(const string& fname,
+                          const vector<weight_t>& weights,
+                          bool hide_zero_value_features,
+                          const string* extra) {
   WriteFile out(fname);
   ostream& o = *out.stream();
   assert(o);
@@ -96,41 +102,54 @@ void Weights::WriteToFile(const std::string& fname, bool hide_zero_value_feature
     o.precision(17);
     const int num_feats = FD::NumFeats();
     for (int i = 1; i < num_feats; ++i) {
-      const weight_t val = (i < wv_.size() ? wv_[i] : 0.0);
+      const weight_t val = (i < weights.size() ? weights[i] : 0.0);
       if (hide_zero_value_features && val == 0.0) continue;
       o << FD::Convert(i) << ' ' << val << endl;
     }
   } else {
     o.write("_PHWf", 5);
     const size_t keys = FD::NumFeats();
-    assert(keys <= wv_.size());
+    assert(keys <= weights.size());
     o.write(reinterpret_cast<const char*>(&keys), sizeof(keys));
-    o.write(reinterpret_cast<const char*>(&wv_[0]), keys * sizeof(weight_t));
+    o.write(reinterpret_cast<const char*>(&weights[0]), keys * sizeof(weight_t));
   }
 }
 
-void Weights::InitVector(std::vector<weight_t>* w) const {
-  *w = wv_;
+void Weights::InitSparseVector(const vector<weight_t>& dv,
+                               SparseVector<weight_t>* sv) {
+  sv->clear();
+  for (unsigned i = 1; i < dv.size(); ++i) {
+    if (dv[i]) sv->set_value(i, dv[i]);
+  }
 }
 
-void Weights::InitSparseVector(SparseVector<weight_t>* w) const {
-  for (int i = 1; i < wv_.size(); ++i) {
-    const weight_t& weight = wv_[i];
-    if (weight) w->set_value(i, weight);
+void Weights::SanityCheck(const vector<weight_t>& w) {
+  for (int i = 0; i < w.size(); ++i) {
+    assert(!isnan(w[i]));
+    assert(!isinf(w[i]));
   }
 }
 
-void Weights::InitFromVector(const std::vector<weight_t>& w) {
-  wv_ = w;
-  if (wv_.size() > FD::NumFeats())
-    cerr << "WARNING: initializing weight vector has more features than the global feature dictionary!\n";
-  wv_.resize(FD::NumFeats(), 0);
-}
+struct FComp {
+  const vector<weight_t>& w_;
+  FComp(const vector<weight_t>& w) : w_(w) {}
+  bool operator()(int a, int b) const {
+    return fabs(w_[a]) > fabs(w_[b]);
+  }
+};
 
-void Weights::InitFromVector(const SparseVector<weight_t>& w) {
-  wv_.clear();
-  wv_.resize(FD::NumFeats(), 0.0);
-  for (int i = 1; i < FD::NumFeats(); ++i)
-    wv_[i] = w.value(i);
+void Weights::ShowLargestFeatures(const vector<weight_t>& w) {
+  vector<int> fnums(w.size());
+  for (int i = 0; i < w.size(); ++i)
+    fnums[i] = i;
+  vector<int>::iterator mid = fnums.begin();
+  mid += (w.size() > 10 ? 10 : w.size());
+  partial_sort(fnums.begin(), mid, fnums.end(), FComp(w));
+  cerr << "TOP FEATURES:";
+  for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) {
+    cerr << ' ' << FD::Convert(*i) << '=' << w[*i];
+  }
+  cerr << endl;
 }
 
+
diff --git a/utils/weights.h b/utils/weights.h
index 7664810b..30f71db0 100644
--- a/utils/weights.h
+++ b/utils/weights.h
@@ -10,15 +10,21 @@ typedef double weight_t;
 
 class Weights {
  public:
-  Weights() {}
-  void InitFromFile(const std::string& fname, std::vector<std::string>* feature_list = NULL);
-  void WriteToFile(const std::string& fname, bool hide_zero_value_features = true, const std::string* extra = NULL) const;
-  void InitVector(std::vector<weight_t>* w) const;
-  void InitSparseVector(SparseVector<weight_t>* w) const;
-  void InitFromVector(const std::vector<weight_t>& w);
-  void InitFromVector(const SparseVector<weight_t>& w);
+  static void InitFromFile(const std::string& fname,
+                           std::vector<weight_t>* weights,
+                           std::vector<std::string>* feature_list = NULL);
+  static void WriteToFile(const std::string& fname,
+                          const std::vector<weight_t>& weights,
+                          bool hide_zero_value_features = true,
+                          const std::string* extra = NULL);
+  static void InitSparseVector(const std::vector<weight_t>& dv,
+                               SparseVector<weight_t>* sv);
+  // check for infinities, NaNs, etc
+  static void SanityCheck(const std::vector<weight_t>& w);
+  // write weights with largest magnitude to cerr
+  static void ShowLargestFeatures(const std::vector<weight_t>& w);
  private:
-  std::vector<weight_t> wv_;
+  Weights();
 };
 
 #endif
diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc
index b84c44bc..0c094fd5 100644
--- a/vest/mr_vest_generate_mapper_input.cc
+++ b/vest/mr_vest_generate_mapper_input.cc
@@ -223,16 +223,16 @@ struct oracle_directions {
     cerr << "Forest repo: " << forest_repository << endl;
     assert(DirectoryExists(forest_repository));
     vector<string> features;
-    weights.InitFromFile(weights_file, &features);
+    vector<weight_t> dorigin;
+    Weights::InitFromFile(weights_file, &dorigin, &features);
     if (optimize_features.size())
       features=optimize_features;
-    weights.InitSparseVector(&origin);
+    Weights::InitSparseVector(dorigin, &origin);
     fids.clear();
     AddFeatureIds(features);
     oracles.resize(dev_set_size);
   }
 
-  Weights weights;
   void AddFeatureIds(vector<string> const& features) {
     int i = fids.size();
     fids.resize(fids.size()+features.size());
-- 
cgit v1.2.3


From bff9f7f6e3ed777c9379c0373657eeaf43a6a213 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Tue, 13 Sep 2011 17:57:32 +0100
Subject: fix for crash with no rescoring

---
 decoder/decoder.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'decoder/decoder.cc')

diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 4d4b6245..45404c47 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -181,7 +181,7 @@ struct DecoderImpl {
   ~DecoderImpl();
   bool Decode(const string& input, DecoderObserver*);
   vector<weight_t>& CurrentWeightVector() {
-    return *rescoring_passes.back().weight_vector;
+    return (rescoring_passes.empty() ? *init_weights : *rescoring_passes.back().weight_vector);
   }
   void SetId(int next_sent_id) { sent_id = next_sent_id - 1; }
 
-- 
cgit v1.2.3


From e1b61419329c83709018ca397a29d069e4294bd1 Mon Sep 17 00:00:00 2001
From: Guest_account Guest_account prguest11 <prguest11@taipan.cs>
Date: Fri, 23 Sep 2011 15:44:35 +0100
Subject: make show_partition work even in absence of feature functions

---
 decoder/decoder.cc | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'decoder/decoder.cc')

diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 45404c47..c4fe3c4d 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -794,6 +794,11 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
     cerr << "  Expected length  (words): " << res.r / res.p << "\t" << res << endl;
   }
 
+  if (conf.count("show_partition")) {
+    const prob_t z = Inside<prob_t, EdgeProb>(forest);
+    cerr << "  Partition         log(Z): " << log(z) << endl;
+  }
+
   SummaryFeature summary_feature_type = kNODE_RISK;
   if (conf["summary_feature_type"].as<string>() == "edge_risk")
     summary_feature_type = kEDGE_RISK;
-- 
cgit v1.2.3


From 0af7d663194beddcde420349bbd91430e0b2e423 Mon Sep 17 00:00:00 2001
From: Guest_account Guest_account prguest11 <prguest11@taipan.cs>
Date: Tue, 11 Oct 2011 16:16:53 +0100
Subject: remove implicit conversion-to-double operator from LogVal<T> that
 caused overflow errors, clean up some pf code

---
 decoder/aligner.cc              |  2 +-
 decoder/cfg.cc                  |  2 +-
 decoder/cfg_format.h            |  2 +-
 decoder/decoder.cc              | 10 ++++----
 decoder/hg.cc                   |  4 ++--
 decoder/rule_lexer.l            |  2 ++
 decoder/trule.h                 | 15 +++++++++++-
 gi/pf/brat.cc                   | 11 ---------
 gi/pf/cbgi.cc                   | 10 --------
 gi/pf/dpnaive.cc                | 12 ----------
 gi/pf/itg.cc                    | 11 ---------
 gi/pf/pfbrat.cc                 | 11 ---------
 gi/pf/pfdist.cc                 | 11 ---------
 gi/pf/pfnaive.cc                | 11 ---------
 mteval/mbr_kbest.cc             |  4 ++--
 phrasinator/ccrp_nt.h           | 24 +++++++++++++++----
 training/mpi_batch_optimize.cc  |  2 +-
 training/mpi_compute_cllh.cc    | 51 +++++++++++++++++++----------------------
 training/mpi_online_optimize.cc |  4 ++--
 utils/logval.h                  | 10 ++++----
 20 files changed, 78 insertions(+), 131 deletions(-)

(limited to 'decoder/decoder.cc')

diff --git a/decoder/aligner.cc b/decoder/aligner.cc
index 292ee123..53e059fb 100644
--- a/decoder/aligner.cc
+++ b/decoder/aligner.cc
@@ -165,7 +165,7 @@ inline void WriteProbGrid(const Array2D<prob_t>& m, ostream* pos) {
       if (m(i,j) == prob_t::Zero()) {
         os << "\t---X---";
       } else {
-        snprintf(b, 1024, "%0.5f", static_cast<double>(m(i,j)));
+        snprintf(b, 1024, "%0.5f", m(i,j).as_float());
         os << '\t' << b;
       }
     }
diff --git a/decoder/cfg.cc b/decoder/cfg.cc
index 651978d2..cd7e66e9 100755
--- a/decoder/cfg.cc
+++ b/decoder/cfg.cc
@@ -639,7 +639,7 @@ void CFG::Print(std::ostream &o,CFGFormat const& f) const {
     o << '['<<f.goal_nt_name <<']';
     WordID rhs=-goal_nt;
     f.print_rhs(o,*this,&rhs,&rhs+1);
-    if (pushed_inside!=1)
+    if (pushed_inside!=prob_t::One())
       f.print_features(o,pushed_inside);
     o<<'\n';
   }
diff --git a/decoder/cfg_format.h b/decoder/cfg_format.h
index c6a594b8..2f40d483 100755
--- a/decoder/cfg_format.h
+++ b/decoder/cfg_format.h
@@ -101,7 +101,7 @@ struct CFGFormat {
   }
 
   void print_features(std::ostream &o,prob_t p,FeatureVector const& fv=FeatureVector()) const {
-    bool logp=(logprob_feat && p!=1);
+    bool logp=(logprob_feat && p!=prob_t::One());
     if (features || logp) {
       o << partsep;
       if (logp)
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index c4fe3c4d..3b53fd6b 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -325,7 +325,7 @@ struct DecoderImpl {
 
   static void ConvertSV(const SparseVector<prob_t>& src, SparseVector<double>* trg) {
     for (SparseVector<prob_t>::const_iterator it = src.begin(); it != src.end(); ++it)
-      trg->set_value(it->first, it->second);
+      trg->set_value(it->first, it->second.as_float());
   }
 };
 
@@ -788,10 +788,10 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
   const bool show_tree_structure=conf.count("show_tree_structure");
   if (!SILENT) forest_stats(forest,"  Init. forest",show_tree_structure,oracle.show_derivation);
   if (conf.count("show_expected_length")) {
-    const PRPair<double, double> res =
-      Inside<PRPair<double, double>,
-             PRWeightFunction<double, EdgeProb, double, ELengthWeightFunction> >(forest);
-    cerr << "  Expected length  (words): " << res.r / res.p << "\t" << res << endl;
+    const PRPair<prob_t, prob_t> res =
+      Inside<PRPair<prob_t, prob_t>,
+             PRWeightFunction<prob_t, EdgeProb, prob_t, ELengthWeightFunction> >(forest);
+    cerr << "  Expected length  (words): " << (res.r / res.p).as_float() << "\t" << res << endl;
   }
 
   if (conf.count("show_partition")) {
diff --git a/decoder/hg.cc b/decoder/hg.cc
index 3ad17f1a..180986d7 100644
--- a/decoder/hg.cc
+++ b/decoder/hg.cc
@@ -157,14 +157,14 @@ prob_t Hypergraph::ComputeEdgePosteriors(double scale, vector<prob_t>* posts) co
   const ScaledEdgeProb weight(scale);
   const ScaledTransitionEventWeightFunction w2(scale);
   SparseVector<prob_t> pv;
-  const double inside = InsideOutside<prob_t,
+  const prob_t inside = InsideOutside<prob_t,
                   ScaledEdgeProb,
                   SparseVector<prob_t>,
                   ScaledTransitionEventWeightFunction>(*this, &pv, weight, w2);
   posts->resize(edges_.size());
   for (int i = 0; i < edges_.size(); ++i)
     (*posts)[i] = prob_t(pv.value(i));
-  return prob_t(inside);
+  return inside;
 }
 
 prob_t Hypergraph::ComputeBestPathThroughEdges(vector<prob_t>* post) const {
diff --git a/decoder/rule_lexer.l b/decoder/rule_lexer.l
index 9331d8ed..083a5bb1 100644
--- a/decoder/rule_lexer.l
+++ b/decoder/rule_lexer.l
@@ -220,6 +220,8 @@ NT [^\t \[\],]+
                   std::cerr << "Line " << lex_line << ": LHS and RHS arity mismatch!\n";
                   abort();
                 }
+		// const bool ignore_grammar_features = false;
+		// if (ignore_grammar_features) scfglex_num_feats = 0;
 		TRulePtr rp(new TRule(scfglex_lhs, scfglex_src_rhs, scfglex_src_rhs_size, scfglex_trg_rhs, scfglex_trg_rhs_size, scfglex_feat_ids, scfglex_feat_vals, scfglex_num_feats, scfglex_src_arity, scfglex_als, scfglex_num_als));
     check_and_update_ctf_stack(rp);
     TRulePtr coarse_rp = ((ctf_level == 0) ? TRulePtr() : ctf_rule_stack.top());
diff --git a/decoder/trule.h b/decoder/trule.h
index 4df4ec90..8eb2a059 100644
--- a/decoder/trule.h
+++ b/decoder/trule.h
@@ -5,7 +5,9 @@
 #include <vector>
 #include <cassert>
 #include <iostream>
-#include <boost/shared_ptr.hpp>
+
+#include "boost/shared_ptr.hpp"
+#include "boost/functional/hash.hpp"
 
 #include "sparse_vector.h"
 #include "wordid.h"
@@ -162,4 +164,15 @@ class TRule {
   bool SanityCheck() const;
 };
 
+inline size_t hash_value(const TRule& r) {
+  size_t h = boost::hash_value(r.e_);
+  boost::hash_combine(h, -r.lhs_);
+  boost::hash_combine(h, boost::hash_value(r.f_));
+  return h;
+}
+
+inline bool operator==(const TRule& a, const TRule& b) {
+  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
+}
+
 #endif
diff --git a/gi/pf/brat.cc b/gi/pf/brat.cc
index 4c6ba3ef..7b60ef23 100644
--- a/gi/pf/brat.cc
+++ b/gi/pf/brat.cc
@@ -25,17 +25,6 @@ static unsigned kMAX_SRC_PHRASE;
 static unsigned kMAX_TRG_PHRASE;
 struct FSTState;
 
-size_t hash_value(const TRule& r) {
-  size_t h = 2 - r.lhs_;
-  boost::hash_combine(h, boost::hash_value(r.e_));
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 double log_poisson(unsigned x, const double& lambda) {
   assert(lambda > 0.0);
   return log(lambda) * x - lgamma(x + 1) - lambda;
diff --git a/gi/pf/cbgi.cc b/gi/pf/cbgi.cc
index 20204e8a..97f1ba34 100644
--- a/gi/pf/cbgi.cc
+++ b/gi/pf/cbgi.cc
@@ -27,16 +27,6 @@ double log_decay(unsigned x, const double& b) {
   return log(b - 1) - x * log(b);
 }
 
-size_t hash_value(const TRule& r) {
-  // TODO fix hash function
-  size_t h = boost::hash_value(r.e_) * boost::hash_value(r.f_) * r.lhs_;
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 struct SimpleBase {
   SimpleBase(unsigned esize, unsigned fsize, unsigned ntsize = 144) :
     uniform_e(-log(esize)),
diff --git a/gi/pf/dpnaive.cc b/gi/pf/dpnaive.cc
index 582d1be7..608f73d5 100644
--- a/gi/pf/dpnaive.cc
+++ b/gi/pf/dpnaive.cc
@@ -20,18 +20,6 @@ namespace po = boost::program_options;
 
 static unsigned kMAX_SRC_PHRASE;
 static unsigned kMAX_TRG_PHRASE;
-struct FSTState;
-
-size_t hash_value(const TRule& r) {
-  size_t h = 2 - r.lhs_;
-  boost::hash_combine(h, boost::hash_value(r.e_));
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
 
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
diff --git a/gi/pf/itg.cc b/gi/pf/itg.cc
index 2c2a86f9..ac3c16a3 100644
--- a/gi/pf/itg.cc
+++ b/gi/pf/itg.cc
@@ -27,17 +27,6 @@ ostream& operator<<(ostream& os, const vector<WordID>& p) {
   return os << ']';
 }
 
-size_t hash_value(const TRule& r) {
-  size_t h = boost::hash_value(r.e_);
-  boost::hash_combine(h, -r.lhs_);
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 double log_poisson(unsigned x, const double& lambda) {
   assert(lambda > 0.0);
   return log(lambda) * x - lgamma(x + 1) - lambda;
diff --git a/gi/pf/pfbrat.cc b/gi/pf/pfbrat.cc
index 4c6ba3ef..7b60ef23 100644
--- a/gi/pf/pfbrat.cc
+++ b/gi/pf/pfbrat.cc
@@ -25,17 +25,6 @@ static unsigned kMAX_SRC_PHRASE;
 static unsigned kMAX_TRG_PHRASE;
 struct FSTState;
 
-size_t hash_value(const TRule& r) {
-  size_t h = 2 - r.lhs_;
-  boost::hash_combine(h, boost::hash_value(r.e_));
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 double log_poisson(unsigned x, const double& lambda) {
   assert(lambda > 0.0);
   return log(lambda) * x - lgamma(x + 1) - lambda;
diff --git a/gi/pf/pfdist.cc b/gi/pf/pfdist.cc
index 18dfd03b..81abd61b 100644
--- a/gi/pf/pfdist.cc
+++ b/gi/pf/pfdist.cc
@@ -24,17 +24,6 @@ namespace po = boost::program_options;
 
 shared_ptr<MT19937> prng;
 
-size_t hash_value(const TRule& r) {
-  size_t h = boost::hash_value(r.e_);
-  boost::hash_combine(h, -r.lhs_);
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
diff --git a/gi/pf/pfnaive.cc b/gi/pf/pfnaive.cc
index 43c604c3..c30e7c4f 100644
--- a/gi/pf/pfnaive.cc
+++ b/gi/pf/pfnaive.cc
@@ -24,17 +24,6 @@ namespace po = boost::program_options;
 
 shared_ptr<MT19937> prng;
 
-size_t hash_value(const TRule& r) {
-  size_t h = boost::hash_value(r.e_);
-  boost::hash_combine(h, -r.lhs_);
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc
index 2867b36b..64a6a8bf 100644
--- a/mteval/mbr_kbest.cc
+++ b/mteval/mbr_kbest.cc
@@ -32,7 +32,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
 }
 
 struct LossComparer {
-  bool operator()(const pair<vector<WordID>, double>& a, const pair<vector<WordID>, double>& b) const {
+  bool operator()(const pair<vector<WordID>, prob_t>& a, const pair<vector<WordID>, prob_t>& b) const {
     return a.second < b.second;
   }
 };
@@ -108,7 +108,7 @@ int main(int argc, char** argv) {
           ScoreP s = scorer->ScoreCandidate(list[j].first);
           double loss = 1.0 - s->ComputeScore();
           if (type == TER || type == AER) loss = 1.0 - loss;
-          double weighted_loss = loss * (joints[j] / marginal);
+          double weighted_loss = loss * (joints[j] / marginal).as_float();
           wl_acc += weighted_loss;
           if ((!output_list) && wl_acc > mbr_loss) break;
         }
diff --git a/phrasinator/ccrp_nt.h b/phrasinator/ccrp_nt.h
index 163b643a..811bce73 100644
--- a/phrasinator/ccrp_nt.h
+++ b/phrasinator/ccrp_nt.h
@@ -50,15 +50,26 @@ class CCRP_NoTable {
     return it->second;
   }
 
-  void increment(const Dish& dish) {
-    ++custs_[dish];
+  int increment(const Dish& dish) {
+    int table_diff = 0;
+    if (++custs_[dish] == 1)
+      table_diff = 1;
     ++num_customers_;
+    return table_diff;
   }
 
-  void decrement(const Dish& dish) {
-    if ((--custs_[dish]) == 0)
+  int decrement(const Dish& dish) {
+    int table_diff = 0;
+    int nc = --custs_[dish];
+    if (nc == 0) {
       custs_.erase(dish);
+      table_diff = -1;
+    } else if (nc < 0) {
+      std::cerr << "Dish counts dropped below zero for: " << dish << std::endl;
+      abort();
+    }
     --num_customers_;
+    return table_diff;
   }
 
   double prob(const Dish& dish, const double& p0) const {
@@ -66,6 +77,11 @@ class CCRP_NoTable {
     return (at_table + p0 * concentration_) / (num_customers_ + concentration_);
   }
 
+  double logprob(const Dish& dish, const double& logp0) const {
+    const unsigned at_table = num_customers(dish);
+    return log(at_table + exp(logp0 + log(concentration_))) - log(num_customers_ + concentration_);
+  }
+
   double log_crp_prob() const {
     return log_crp_prob(concentration_);
   }
diff --git a/training/mpi_batch_optimize.cc b/training/mpi_batch_optimize.cc
index 0ba8c530..046e921c 100644
--- a/training/mpi_batch_optimize.cc
+++ b/training/mpi_batch_optimize.cc
@@ -92,7 +92,7 @@ struct TrainingObserver : public DecoderObserver {
   void SetLocalGradientAndObjective(vector<double>* g, double* o) const {
     *o = acc_obj;
     for (SparseVector<prob_t>::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it)
-      (*g)[it->first] = it->second;
+      (*g)[it->first] = it->second.as_float();
   }
 
   virtual void NotifyDecodingStart(const SentenceMetadata& smeta) {
diff --git a/training/mpi_compute_cllh.cc b/training/mpi_compute_cllh.cc
index b496d196..d5caa745 100644
--- a/training/mpi_compute_cllh.cc
+++ b/training/mpi_compute_cllh.cc
@@ -1,6 +1,4 @@
-#include <sstream>
 #include <iostream>
-#include <fstream>
 #include <vector>
 #include <cassert>
 #include <cmath>
@@ -12,6 +10,7 @@
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 
+#include "sentence_metadata.h"
 #include "verbose.h"
 #include "hg.h"
 #include "prob.h"
@@ -52,7 +51,8 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   return true;
 }
 
-void ReadTrainingCorpus(const string& fname, int rank, int size, vector<string>* c, vector<int>* ids) {
+void ReadInstances(const string& fname, int rank, int size, vector<string>* c) {
+  assert(fname != "-");
   ReadFile rf(fname);
   istream& in = *rf.stream();
   string line;
@@ -60,20 +60,16 @@ void ReadTrainingCorpus(const string& fname, int rank, int size, vector<string>*
   while(in) {
     getline(in, line);
     if (!in) break;
-    if (lc % size == rank) {
-      c->push_back(line);
-      ids->push_back(lc);
-    }
+    if (lc % size == rank) c->push_back(line);
     ++lc;
   }
 }
 
 static const double kMINUS_EPSILON = -1e-6;
 
-struct TrainingObserver : public DecoderObserver {
-  void Reset() {
-    acc_obj = 0;
-  } 
+struct ConditionalLikelihoodObserver : public DecoderObserver {
+
+  ConditionalLikelihoodObserver() : trg_words(), acc_obj(), cur_obj() {}
 
   virtual void NotifyDecodingStart(const SentenceMetadata&) {
     cur_obj = 0;
@@ -120,8 +116,10 @@ struct TrainingObserver : public DecoderObserver {
     }
     assert(!isnan(log_ref_z));
     acc_obj += (cur_obj - log_ref_z);
+    trg_words += smeta.GetReference().size();
   }
 
+  unsigned trg_words;
   double acc_obj;
   double cur_obj;
   int state;
@@ -161,35 +159,32 @@ int main(int argc, char** argv) {
   if (conf.count("weights"))
     Weights::InitFromFile(conf["weights"].as<string>(), &weights);
 
-  // freeze feature set
-  //const bool freeze_feature_set = conf.count("freeze_feature_set");
-  //if (freeze_feature_set) FD::Freeze();
-
-  vector<string> corpus; vector<int> ids;
-  ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus, &ids);
+  vector<string> corpus;
+  ReadInstances(conf["training_data"].as<string>(), rank, size, &corpus);
   assert(corpus.size() > 0);
-  assert(corpus.size() == ids.size());
-
-  TrainingObserver observer;
-  double objective = 0;
 
-  observer.Reset();
   if (rank == 0)
-    cerr << "Each processor is decoding " << corpus.size() << " training examples...\n";
+    cerr << "Each processor is decoding ~" << corpus.size() << " training examples...\n";
 
-  for (int i = 0; i < corpus.size(); ++i) {
-    decoder.SetId(ids[i]);
+  ConditionalLikelihoodObserver observer;
+  for (int i = 0; i < corpus.size(); ++i)
     decoder.Decode(corpus[i], &observer);
-  }
 
+  double objective = 0;
+  unsigned total_words = 0;
 #ifdef HAVE_MPI
   reduce(world, observer.acc_obj, objective, std::plus<double>(), 0);
+  reduce(world, observer.trg_words, total_words, std::plus<unsigned>(), 0);
 #else
   objective = observer.acc_obj;
 #endif
 
-  if (rank == 0)
-    cout << "OBJECTIVE: " << objective << endl;
+  if (rank == 0) {
+    cout << "CONDITIONAL LOG_e LIKELIHOOD: " << objective << endl;
+    cout << "CONDITIONAL LOG_2 LIKELIHOOD: " << (objective/log(2)) << endl;
+    cout << "         CONDITIONAL ENTROPY: " << (objective/log(2) / total_words) << endl;
+    cout << "                  PERPLEXITY: " << pow(2, (objective/log(2) / total_words)) << endl;
+  }
 
   return 0;
 }
diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc
index 2ef4a2e7..f87b7274 100644
--- a/training/mpi_online_optimize.cc
+++ b/training/mpi_online_optimize.cc
@@ -94,7 +94,7 @@ struct TrainingObserver : public DecoderObserver {
   void SetLocalGradientAndObjective(vector<double>* g, double* o) const {
     *o = acc_obj;
     for (SparseVector<prob_t>::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it)
-      (*g)[it->first] = it->second;
+      (*g)[it->first] = it->second.as_float();
   }
 
   virtual void NotifyDecodingStart(const SentenceMetadata& smeta) {
@@ -158,7 +158,7 @@ struct TrainingObserver : public DecoderObserver {
   void GetGradient(SparseVector<double>* g) const {
     g->clear();
     for (SparseVector<prob_t>::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it)
-      g->set_value(it->first, it->second);
+      g->set_value(it->first, it->second.as_float());
   }
 
   int total_complete;
diff --git a/utils/logval.h b/utils/logval.h
index 6fdc2c42..8a59d0b1 100644
--- a/utils/logval.h
+++ b/utils/logval.h
@@ -25,12 +25,13 @@ class LogVal {
   typedef LogVal<T> Self;
 
   LogVal() : s_(), v_(LOGVAL_LOG0) {}
-  explicit LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {}
+  LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {}
+  const Self& operator=(double x) { s_ = std::signbit(x); v_ = s_ ? std::log(-x) : std::log(x); return *this; }
   LogVal(init_minus_1) : s_(true),v_(0) {  }
   LogVal(init_1) : s_(),v_(0) {  }
   LogVal(init_0) : s_(),v_(LOGVAL_LOG0) {  }
-  LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {}
-  LogVal(unsigned x) : s_(0), v_(std::log(x)) { }
+  explicit LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {}
+  explicit LogVal(unsigned x) : s_(0), v_(std::log(x)) { }
   LogVal(double lnx,bool sign) : s_(sign),v_(lnx) {}
   LogVal(double lnx,init_lnx) : s_(),v_(lnx) {}
   static Self exp(T lnx) { return Self(lnx,false); }
@@ -141,9 +142,6 @@ class LogVal {
     return pow(1/root);
   }
 
-  operator T() const {
-    if (s_) return -std::exp(v_); else return std::exp(v_);
-  }
   T as_float() const {
     if (s_) return -std::exp(v_); else return std::exp(v_);
   }
-- 
cgit v1.2.3