From 2903e0a0daf941b20da812149f647cd4e0f4dd66 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Sat, 18 Feb 2012 22:09:47 -0500
Subject: initial lbl_model stub

---
 training/lbl_model.cc | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)
 create mode 100644 training/lbl_model.cc

(limited to 'training/lbl_model.cc')
diff --git a/training/lbl_model.cc b/training/lbl_model.cc
new file mode 100644
index 00000000..72d80a56
--- /dev/null
+++ b/training/lbl_model.cc
@@ -0,0 +1,131 @@
+#include <iostream>
+
+#include "config.h"
+#ifndef HAVE_EIGEN
+  int main() { std::cerr << "Please rebuild with --with-eigen PATH\n"; return 1; }
+#else
+
+#include <cmath>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+#include <Eigen/Dense>
+
+#include "m.h"
+#include "lattice.h"
+#include "stringlib.h"
+#include "filelib.h"
+#include "tdict.h"
+
+namespace po = boost::program_options;
+using namespace std;
+
+bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("iterations,i",po::value<unsigned>()->default_value(5),"Number of iterations of training")
+        ("diagonal_tension,T", po::value<double>()->default_value(4.0), "How sharp or flat around the diagonal is the alignment distribution (0 = uniform, >0 sharpens)")
+        ("testset,x", po::value<string>(), "After training completes, compute the log likelihood of this set of sentence pairs under the learned model");
+  po::options_description clo("Command line options");
+  clo.add_options()
+        ("config", po::value<string>(), "Configuration file")
+        ("help,h", "Print this help message and exit");
+  po::options_description dconfig_options, dcmdline_options;
+  dconfig_options.add(opts);
+  dcmdline_options.add(opts).add(clo);
+  
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  if (conf->count("config")) {
+    ifstream config((*conf)["config"].as<string>().c_str());
+    po::store(po::parse_config_file(config, dconfig_options), *conf);
+  }
+  po::notify(*conf);
+
+  if (argc < 2 || conf->count("help")) {
+    cerr << "Usage " << argv[0] << " [OPTIONS] corpus.fr-en\n";
+    cerr << dcmdline_options << endl;
+    return false;
+  }
+  return true;
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  if (!InitCommandLine(argc, argv, &conf)) return 1;
+  const string fname = argv[argc - 1];
+  const int ITERATIONS = conf["iterations"].as<unsigned>();
+  const double diagonal_tension = conf["diagonal_tension"].as<double>();
+  string testset;
+  if (conf.count("testset")) testset = conf["testset"].as<string>();
+
+  double tot_len_ratio = 0;
+  double mean_srclen_multiplier = 0;
+  vector<double> unnormed_a_i;
+  for (int iter = 0; iter < ITERATIONS; ++iter) {
+    cerr << "ITERATION " << (iter + 1) << endl;
+    ReadFile rf(fname);
+    istream& in = *rf.stream();
+    double likelihood = 0;
+    double denom = 0.0;
+    int lc = 0;
+    bool flag = false;
+    string line;
+    string ssrc, strg;
+    while(true) {
+      getline(in, line);
+      if (!in) break;
+      ++lc;
+      if (lc % 1000 == 0) { cerr << '.'; flag = true; }
+      if (lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; }
+      ParseTranslatorInput(line, &ssrc, &strg);
+      Lattice src, trg;
+      LatticeTools::ConvertTextToLattice(ssrc, &src);
+      LatticeTools::ConvertTextToLattice(strg, &trg);
+      if (src.size() == 0 || trg.size() == 0) {
+        cerr << "Error: " << lc << "\n" << line << endl;
+        assert(src.size() > 0);
+        assert(trg.size() > 0);
+      }
+      if (src.size() > unnormed_a_i.size())
+        unnormed_a_i.resize(src.size());
+      if (iter == 0)
+        tot_len_ratio += static_cast<double>(trg.size()) / static_cast<double>(src.size());
+      denom += trg.size();
+      vector<double> probs(src.size() + 1);
+      bool first_al = true;  // used for write_alignments
+      for (int j = 0; j < trg.size(); ++j) {
+        const WordID& f_j = trg[j][0].label;
+        double sum = 0;
+        const double j_over_ts = double(j) / trg.size();
+        double prob_a_i = 1.0 / src.size();
+        double az = 0;
+        for (int ta = 0; ta < src.size(); ++ta) {
+          unnormed_a_i[ta] = exp(-fabs(double(ta) / src.size() - j_over_ts) * diagonal_tension);
+          az += unnormed_a_i[ta];
+        }
+        for (int i = 1; i <= src.size(); ++i) {
+          prob_a_i = unnormed_a_i[i-1] / az;
+          probs[i] = 1; // tt.prob(src[i-1][0].label, f_j) * prob_a_i;
+          sum += probs[i];
+        }
+      }
+    }
+
+    // log(e) = 1.0
+    double base2_likelihood = likelihood / log(2);
+
+    if (flag) { cerr << endl; }
+    if (iter == 0) {
+      mean_srclen_multiplier = tot_len_ratio / lc;
+      cerr << "expected target length = source length * " << mean_srclen_multiplier << endl;
+    }
+    cerr << "  log_e likelihood: " << likelihood << endl;
+    cerr << "  log_2 likelihood: " << base2_likelihood << endl;
+    cerr << "   cross entropy: " << (-base2_likelihood / denom) << endl;
+    cerr << "      perplexity: " << pow(2.0, -base2_likelihood / denom) << endl;
+  }
+  return 0;
+}
+
+#endif
+
-- 
cgit v1.2.3


From c4ffa6df1fdd89e3db9c6d3829b7b84edac20bcf Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Sun, 19 Feb 2012 04:27:55 -0500
Subject: lbl preliminary clean up

---
 decoder/lattice.cc    |  1 +
 training/lbl_model.cc | 84 +++++++++++++++++++++++++++++++++------------------
 2 files changed, 55 insertions(+), 30 deletions(-)

(limited to 'training/lbl_model.cc')

diff --git a/decoder/lattice.cc b/decoder/lattice.cc
index e3631e59..89da3cd0 100644
--- a/decoder/lattice.cc
+++ b/decoder/lattice.cc
@@ -46,6 +46,7 @@ void LatticeTools::ConvertTextToLattice(const string& text, Lattice* pl) {
   Lattice& l = *pl;
   vector<WordID> ids;
   TD::ConvertSentence(text, &ids);
+  l.clear();
   l.resize(ids.size());
   for (int i = 0; i < l.size(); ++i)
     l[i].push_back(LatticeArc(ids[i], 0.0, 1));
diff --git a/training/lbl_model.cc b/training/lbl_model.cc
index 72d80a56..ccd29255 100644
--- a/training/lbl_model.cc
+++ b/training/lbl_model.cc
@@ -6,6 +6,7 @@
 #else
 
 #include <cmath>
+#include <set>
 
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
@@ -20,10 +21,17 @@
 namespace po = boost::program_options;
 using namespace std;
 
+#define kDIMENSIONS 10
+typedef Eigen::Matrix<float, kDIMENSIONS, 1> RVector;
+typedef Eigen::Matrix<float, 1, kDIMENSIONS> RTVector;
+typedef Eigen::Matrix<float, kDIMENSIONS, kDIMENSIONS> TMatrix;
+vector<RVector> r_src, r_trg;
+
 bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
-        ("iterations,i",po::value<unsigned>()->default_value(5),"Number of iterations of training")
+        ("input,i",po::value<string>(),"Input file")
+        ("iterations,I",po::value<unsigned>()->default_value(1000),"Number of iterations of training")
         ("diagonal_tension,T", po::value<double>()->default_value(4.0), "How sharp or flat around the diagonal is the alignment distribution (0 = uniform, >0 sharpens)")
         ("testset,x", po::value<string>(), "After training completes, compute the log likelihood of this set of sentence pairs under the learned model");
   po::options_description clo("Command line options");
@@ -42,7 +50,7 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::notify(*conf);
 
   if (argc < 2 || conf->count("help")) {
-    cerr << "Usage " << argv[0] << " [OPTIONS] corpus.fr-en\n";
+    cerr << "Usage " << argv[0] << " [OPTIONS] -i corpus.fr-en\n";
     cerr << dcmdline_options << endl;
     return false;
   }
@@ -52,33 +60,32 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
 int main(int argc, char** argv) {
   po::variables_map conf;
   if (!InitCommandLine(argc, argv, &conf)) return 1;
-  const string fname = argv[argc - 1];
+  const string fname = conf["input"].as<string>();
   const int ITERATIONS = conf["iterations"].as<unsigned>();
   const double diagonal_tension = conf["diagonal_tension"].as<double>();
+  if (diagonal_tension < 0.0) {
+    cerr << "Invalid value for diagonal_tension: must be >= 0\n";
+    return 1;
+  }
   string testset;
   if (conf.count("testset")) testset = conf["testset"].as<string>();
 
-  double tot_len_ratio = 0;
-  double mean_srclen_multiplier = 0;
+  int lc = 0;
   vector<double> unnormed_a_i;
-  for (int iter = 0; iter < ITERATIONS; ++iter) {
-    cerr << "ITERATION " << (iter + 1) << endl;
+  string line;
+  string ssrc, strg;
+  bool flag = false;
+  Lattice src, trg;
+  set<WordID> vocab_e;
+  { // read through corpus, initialize int map, check lines are good
+    cerr << "INITIAL READ OF " << fname << endl;
     ReadFile rf(fname);
     istream& in = *rf.stream();
-    double likelihood = 0;
-    double denom = 0.0;
-    int lc = 0;
-    bool flag = false;
-    string line;
-    string ssrc, strg;
-    while(true) {
-      getline(in, line);
-      if (!in) break;
+    while(getline(in, line)) {
       ++lc;
       if (lc % 1000 == 0) { cerr << '.'; flag = true; }
       if (lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; }
       ParseTranslatorInput(line, &ssrc, &strg);
-      Lattice src, trg;
       LatticeTools::ConvertTextToLattice(ssrc, &src);
       LatticeTools::ConvertTextToLattice(strg, &trg);
       if (src.size() == 0 || trg.size() == 0) {
@@ -88,37 +95,54 @@ int main(int argc, char** argv) {
       }
       if (src.size() > unnormed_a_i.size())
         unnormed_a_i.resize(src.size());
-      if (iter == 0)
-        tot_len_ratio += static_cast<double>(trg.size()) / static_cast<double>(src.size());
+      for (unsigned i = 0; i < trg.size(); ++i) {
+        assert(trg[i].size() == 1);
+        vocab_e.insert(trg[i][0].label);
+      }
+    }
+  }
+  if (flag) cerr << endl;
+
+  // do optimization
+  for (int iter = 0; iter < ITERATIONS; ++iter) {
+    cerr << "ITERATION " << (iter + 1) << endl;
+    ReadFile rf(fname);
+    istream& in = *rf.stream();
+    double likelihood = 0;
+    double denom = 0.0;
+    lc = 0;
+    flag = false;
+    while(true) {
+      getline(in, line);
+      if (!in) break;
+      ++lc;
+      if (lc % 1000 == 0) { cerr << '.'; flag = true; }
+      if (lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; }
+      ParseTranslatorInput(line, &ssrc, &strg);
+      LatticeTools::ConvertTextToLattice(ssrc, &src);
+      LatticeTools::ConvertTextToLattice(strg, &trg);
       denom += trg.size();
       vector<double> probs(src.size() + 1);
-      bool first_al = true;  // used for write_alignments
       for (int j = 0; j < trg.size(); ++j) {
         const WordID& f_j = trg[j][0].label;
         double sum = 0;
         const double j_over_ts = double(j) / trg.size();
-        double prob_a_i = 1.0 / src.size();
         double az = 0;
         for (int ta = 0; ta < src.size(); ++ta) {
           unnormed_a_i[ta] = exp(-fabs(double(ta) / src.size() - j_over_ts) * diagonal_tension);
           az += unnormed_a_i[ta];
         }
         for (int i = 1; i <= src.size(); ++i) {
-          prob_a_i = unnormed_a_i[i-1] / az;
+          const double prob_a_i = unnormed_a_i[i-1] / az;
+          // TODO
           probs[i] = 1; // tt.prob(src[i-1][0].label, f_j) * prob_a_i;
           sum += probs[i];
         }
       }
     }
-
-    // log(e) = 1.0
-    double base2_likelihood = likelihood / log(2);
-
     if (flag) { cerr << endl; }
-    if (iter == 0) {
-      mean_srclen_multiplier = tot_len_ratio / lc;
-      cerr << "expected target length = source length * " << mean_srclen_multiplier << endl;
-    }
+
+    const double base2_likelihood = likelihood / log(2);
     cerr << "  log_e likelihood: " << likelihood << endl;
     cerr << "  log_2 likelihood: " << base2_likelihood << endl;
     cerr << "   cross entropy: " << (-base2_likelihood / denom) << endl;
-- 
cgit v1.2.3


From 9e45f895aaec5c7a2f362aa532ca5ca4325e102b Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Tue, 21 Feb 2012 11:53:01 -0500
Subject: basic lbl model, nothing to see here

---
 training/lbl_model.cc | 147 ++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 126 insertions(+), 21 deletions(-)

(limited to 'training/lbl_model.cc')

diff --git a/training/lbl_model.cc b/training/lbl_model.cc
index ccd29255..4759eedc 100644
--- a/training/lbl_model.cc
+++ b/training/lbl_model.cc
@@ -5,13 +5,18 @@
   int main() { std::cerr << "Please rebuild with --with-eigen PATH\n"; return 1; }
 #else
 
+#include <cstdlib>
+#include <algorithm>
 #include <cmath>
 #include <set>
+#include <cstring> // memset
+#include <ctime>
 
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 #include <Eigen/Dense>
 
+#include "array2d.h"
 #include "m.h"
 #include "lattice.h"
 #include "stringlib.h"
@@ -21,7 +26,7 @@
 namespace po = boost::program_options;
 using namespace std;
 
-#define kDIMENSIONS 10
+#define kDIMENSIONS 25
 typedef Eigen::Matrix<float, kDIMENSIONS, 1> RVector;
 typedef Eigen::Matrix<float, 1, kDIMENSIONS> RTVector;
 typedef Eigen::Matrix<float, kDIMENSIONS, kDIMENSIONS> TMatrix;
@@ -32,6 +37,8 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   opts.add_options()
         ("input,i",po::value<string>(),"Input file")
         ("iterations,I",po::value<unsigned>()->default_value(1000),"Number of iterations of training")
+        ("eta,e", po::value<float>()->default_value(0.1f), "Eta for SGD")
+        ("random_seed", po::value<unsigned>(), "Random seed")
         ("diagonal_tension,T", po::value<double>()->default_value(4.0), "How sharp or flat around the diagonal is the alignment distribution (0 = uniform, >0 sharpens)")
         ("testset,x", po::value<string>(), "After training completes, compute the log likelihood of this set of sentence pairs under the learned model");
   po::options_description clo("Command line options");
@@ -57,12 +64,19 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   return true;
 }
 
+void Normalize(RVector* v) {
+  float norm = v->norm();
+  *v /= norm;
+}
+
 int main(int argc, char** argv) {
   po::variables_map conf;
   if (!InitCommandLine(argc, argv, &conf)) return 1;
   const string fname = conf["input"].as<string>();
   const int ITERATIONS = conf["iterations"].as<unsigned>();
+  const float eta = conf["eta"].as<float>();
   const double diagonal_tension = conf["diagonal_tension"].as<double>();
+  bool SGD = true;
   if (diagonal_tension < 0.0) {
     cerr << "Invalid value for diagonal_tension: must be >= 0\n";
     return 1;
@@ -70,14 +84,15 @@ int main(int argc, char** argv) {
   string testset;
   if (conf.count("testset")) testset = conf["testset"].as<string>();
 
-  int lc = 0;
+  unsigned lc = 0;
   vector<double> unnormed_a_i;
   string line;
   string ssrc, strg;
   bool flag = false;
   Lattice src, trg;
-  set<WordID> vocab_e;
+  vector<WordID> vocab_e;
   { // read through corpus, initialize int map, check lines are good
+    set<WordID> svocab_e;
     cerr << "INITIAL READ OF " << fname << endl;
     ReadFile rf(fname);
     istream& in = *rf.stream();
@@ -97,13 +112,39 @@ int main(int argc, char** argv) {
         unnormed_a_i.resize(src.size());
       for (unsigned i = 0; i < trg.size(); ++i) {
         assert(trg[i].size() == 1);
-        vocab_e.insert(trg[i][0].label);
+        svocab_e.insert(trg[i][0].label);
       }
     }
+    copy(svocab_e.begin(), svocab_e.end(), back_inserter(vocab_e));
   }
   if (flag) cerr << endl;
+  cerr << "Number of target word types: " << vocab_e.size() << endl;
+  const float num_examples = lc;
+
+  r_trg.resize(TD::NumWords() + 1);
+  r_src.resize(TD::NumWords() + 1);
+  if (conf.count("random_seed")) {
+    srand(conf["random_seed"].as<unsigned>());
+  } else {
+    unsigned seed = time(NULL);
+    cerr << "Random seed: " << seed << endl;
+    srand(seed);
+  }
+  TMatrix t = TMatrix::Random() / 100.0;
+  for (unsigned i = 1; i < r_trg.size(); ++i) {
+    r_trg[i] = RVector::Random();
+    r_src[i] = RVector::Random();
+    r_trg[i][i % kDIMENSIONS] = 0.5;
+    r_src[i][(i-1) % kDIMENSIONS] = 0.5;
+    Normalize(&r_trg[i]);
+    Normalize(&r_src[i]);
+  }
+  vector<set<unsigned> > trg_pos(TD::NumWords() + 1);
 
   // do optimization
+  TMatrix g;
+  vector<TMatrix> exp_src;
+  vector<double> z_src;
   for (int iter = 0; iter < ITERATIONS; ++iter) {
     cerr << "ITERATION " << (iter + 1) << endl;
     ReadFile rf(fname);
@@ -112,9 +153,8 @@ int main(int argc, char** argv) {
     double denom = 0.0;
     lc = 0;
     flag = false;
-    while(true) {
-      getline(in, line);
-      if (!in) break;
+    g *= 0;
+    while(getline(in, line)) {
       ++lc;
       if (lc % 1000 == 0) { cerr << '.'; flag = true; }
       if (lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; }
@@ -122,23 +162,86 @@ int main(int argc, char** argv) {
       LatticeTools::ConvertTextToLattice(ssrc, &src);
       LatticeTools::ConvertTextToLattice(strg, &trg);
       denom += trg.size();
-      vector<double> probs(src.size() + 1);
-      for (int j = 0; j < trg.size(); ++j) {
-        const WordID& f_j = trg[j][0].label;
-        double sum = 0;
-        const double j_over_ts = double(j) / trg.size();
-        double az = 0;
-        for (int ta = 0; ta < src.size(); ++ta) {
-          unnormed_a_i[ta] = exp(-fabs(double(ta) / src.size() - j_over_ts) * diagonal_tension);
-          az += unnormed_a_i[ta];
+
+      exp_src.clear(); exp_src.resize(src.size(), TMatrix::Zero());
+      z_src.clear(); z_src.resize(src.size(), 0.0);
+      Array2D<TMatrix> exp_refs(src.size(), trg.size(), TMatrix::Zero());
+      Array2D<double> z_refs(src.size(), trg.size(), 0.0);
+      for (unsigned j = 0; j < trg.size(); ++j)
+        trg_pos[trg[j][0].label].insert(j);
+
+      for (unsigned i = 0; i < src.size(); ++i) {
+        const RVector& r_s = r_src[src[i][0].label];
+        const RTVector pred = r_s.transpose() * t;
+        TMatrix& exp_m = exp_src[i];
+        double& z = z_src[i];
+        for (unsigned k = 0; k < vocab_e.size(); ++k) {
+          const WordID v_k = vocab_e[k];
+          const RVector& r_t = r_trg[v_k];
+          const double dot_prod = pred * r_t;
+          const double u = exp(dot_prod);
+          z += u;
+          const TMatrix v = r_s * r_t.transpose() * u;
+          exp_m += v;
+          set<unsigned>& ref_locs = trg_pos[v_k];
+          if (!ref_locs.empty()) {
+            for (set<unsigned>::iterator it = ref_locs.begin(); it != ref_locs.end(); ++it) {
+              TMatrix& exp_ref_ij = exp_refs(i, *it);
+              double& z_ref_ij = z_refs(i, *it);
+              z_ref_ij += u;
+              exp_ref_ij += v;
+            }
+          }
+        }
+      }
+      for (unsigned j = 0; j < trg.size(); ++j)
+        trg_pos[trg[j][0].label].clear();
+
+      // model expectations for a single target generation with
+      // uniform alignment prior
+      double m_z = 0;
+      TMatrix m_exp = TMatrix::Zero();
+      for (unsigned i = 0; i < src.size(); ++i) {
+        m_exp += exp_src[i];
+        m_z += z_src[i];
+      }
+      m_exp /= m_z;
+
+      Array2D<bool> al(src.size(), trg.size(), false);
+      for (unsigned j = 0; j < trg.size(); ++j) {
+        double ref_z = 0;
+        TMatrix ref_exp = TMatrix::Zero();
+        int max_i = 0;
+        double max_s = -9999999;
+        for (unsigned i = 0; i < src.size(); ++i) {
+          ref_exp += exp_refs(i, j);
+          ref_z += z_refs(i, j);
+          if (log(z_refs(i, j)) > max_s) {
+            max_s = log(z_refs(i, j));
+            max_i = i;
+          }
+          // TODO handle alignment prob
+        }
+        if (ref_z <= 0) { 
+          cerr << "TRG=" << TD::Convert(trg[j][0].label) << endl;
+          cerr << " LINE=" << line << endl;
+          cerr << " REF_EXP=\n" << ref_exp << endl;
+          cerr << " M_EXP=\n" << m_exp << endl;
+          abort();
         }
-        for (int i = 1; i <= src.size(); ++i) {
-          const double prob_a_i = unnormed_a_i[i-1] / az;
-          // TODO
-          probs[i] = 1; // tt.prob(src[i-1][0].label, f_j) * prob_a_i;
-          sum += probs[i];
+        al(max_i, j) = true;
+        ref_exp /= ref_z;
+        g += m_exp - ref_exp;
+        likelihood += log(ref_z) - log(m_z);
+        if (SGD) {
+          t -= g * eta / num_examples;
+          g *= 0;
+        } else {
+          assert(!"not implemented");
         }
       }
+      
+      if (iter == (ITERATIONS - 1) || lc == 28) { cerr << al << endl; }
     }
     if (flag) { cerr << endl; }
 
@@ -147,7 +250,9 @@ int main(int argc, char** argv) {
     cerr << "  log_2 likelihood: " << base2_likelihood << endl;
     cerr << "   cross entropy: " << (-base2_likelihood / denom) << endl;
     cerr << "      perplexity: " << pow(2.0, -base2_likelihood / denom) << endl;
+    cerr << t << endl;
   }
+  cerr << "TRANSLATION MATRIX:" << endl << t << endl;
   return 0;
 }
 
-- 
cgit v1.2.3


From c0e9dc2889b6beb039c5365ebd0af6486b7ec574 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Tue, 21 Feb 2012 17:51:44 -0500
Subject: use lbfgs

---
 training/Makefile.am  |  2 +-
 training/lbl_model.cc | 33 ++++++++++++++++++++++++++++-----
 2 files changed, 29 insertions(+), 6 deletions(-)

(limited to 'training/lbl_model.cc')

diff --git a/training/Makefile.am b/training/Makefile.am
index 330341ac..991ac210 100644
--- a/training/Makefile.am
+++ b/training/Makefile.am
@@ -50,7 +50,7 @@ test_ngram_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteva
 model1_SOURCES = model1.cc ttables.cc
 model1_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
-lbl_model_SOURCES = lbl_model.cc ttables.cc
+lbl_model_SOURCES = lbl_model.cc optimize.cc
 lbl_model_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 grammar_convert_SOURCES = grammar_convert.cc
diff --git a/training/lbl_model.cc b/training/lbl_model.cc
index 4759eedc..eb3e194d 100644
--- a/training/lbl_model.cc
+++ b/training/lbl_model.cc
@@ -16,6 +16,7 @@
 #include <boost/program_options/variables_map.hpp>
 #include <Eigen/Dense>
 
+#include "optimize.h"
 #include "array2d.h"
 #include "m.h"
 #include "lattice.h"
@@ -26,7 +27,7 @@
 namespace po = boost::program_options;
 using namespace std;
 
-#define kDIMENSIONS 25
+#define kDIMENSIONS 8
 typedef Eigen::Matrix<float, kDIMENSIONS, 1> RVector;
 typedef Eigen::Matrix<float, 1, kDIMENSIONS> RTVector;
 typedef Eigen::Matrix<float, kDIMENSIONS, kDIMENSIONS> TMatrix;
@@ -69,6 +70,21 @@ void Normalize(RVector* v) {
   *v /= norm;
 }
 
+void Flatten(const TMatrix& m, vector<double>* v) {
+  unsigned c = 0;
+  v->resize(kDIMENSIONS * kDIMENSIONS);
+  for (unsigned i = 0; i < kDIMENSIONS; ++i)
+    for (unsigned j = 0; j < kDIMENSIONS; ++j)
+      (*v)[c++] = m(i,j);
+}
+
+void Unflatten(const vector<double>& v, TMatrix* m) {
+  unsigned c = 0;
+  for (unsigned i = 0; i < kDIMENSIONS; ++i)
+    for (unsigned j = 0; j < kDIMENSIONS; ++j)
+      (*m)(i, j) = v[c++];
+}
+
 int main(int argc, char** argv) {
   po::variables_map conf;
   if (!InitCommandLine(argc, argv, &conf)) return 1;
@@ -76,7 +92,7 @@ int main(int argc, char** argv) {
   const int ITERATIONS = conf["iterations"].as<unsigned>();
   const float eta = conf["eta"].as<float>();
   const double diagonal_tension = conf["diagonal_tension"].as<double>();
-  bool SGD = true;
+  bool SGD = false;
   if (diagonal_tension < 0.0) {
     cerr << "Invalid value for diagonal_tension: must be >= 0\n";
     return 1;
@@ -121,6 +137,7 @@ int main(int argc, char** argv) {
   cerr << "Number of target word types: " << vocab_e.size() << endl;
   const float num_examples = lc;
 
+  LBFGSOptimizer lbfgs(kDIMENSIONS * kDIMENSIONS, 100);
   r_trg.resize(TD::NumWords() + 1);
   r_src.resize(TD::NumWords() + 1);
   if (conf.count("random_seed")) {
@@ -130,7 +147,7 @@ int main(int argc, char** argv) {
     cerr << "Random seed: " << seed << endl;
     srand(seed);
   }
-  TMatrix t = TMatrix::Random() / 100.0;
+  TMatrix t = TMatrix::Random() / 1024.0;
   for (unsigned i = 1; i < r_trg.size(); ++i) {
     r_trg[i] = RVector::Random();
     r_src[i] = RVector::Random();
@@ -145,6 +162,8 @@ int main(int argc, char** argv) {
   TMatrix g;
   vector<TMatrix> exp_src;
   vector<double> z_src;
+  vector<double> flat_g, flat_t;
+  Flatten(t, &flat_t);
   for (int iter = 0; iter < ITERATIONS; ++iter) {
     cerr << "ITERATION " << (iter + 1) << endl;
     ReadFile rf(fname);
@@ -236,8 +255,6 @@ int main(int argc, char** argv) {
         if (SGD) {
           t -= g * eta / num_examples;
           g *= 0;
-        } else {
-          assert(!"not implemented");
         }
       }
       
@@ -250,6 +267,12 @@ int main(int argc, char** argv) {
     cerr << "  log_2 likelihood: " << base2_likelihood << endl;
     cerr << "   cross entropy: " << (-base2_likelihood / denom) << endl;
     cerr << "      perplexity: " << pow(2.0, -base2_likelihood / denom) << endl;
+    if (!SGD) {
+      Flatten(g, &flat_g);
+      lbfgs.Optimize(-likelihood, flat_g, &flat_t);
+      Unflatten(flat_t, &t);
+      if (lbfgs.HasConverged()) break;
+    }
     cerr << t << endl;
   }
   cerr << "TRANSLATION MATRIX:" << endl << t << endl;
-- 
cgit v1.2.3


From dd16e83d4a593392465ee317c43ffc2c490add2e Mon Sep 17 00:00:00 2001
From: Chris Dyer <prguest11@taipan.cs>
Date: Wed, 22 Feb 2012 16:10:56 +0000
Subject: add regularization

---
 training/lbl_model.cc | 50 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 9 deletions(-)

(limited to 'training/lbl_model.cc')

diff --git a/training/lbl_model.cc b/training/lbl_model.cc
index eb3e194d..a114bba7 100644
--- a/training/lbl_model.cc
+++ b/training/lbl_model.cc
@@ -12,6 +12,7 @@
 #include <cstring> // memset
 #include <ctime>
 
+#include <boost/math/special_functions/fpclassify.hpp>
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 #include <Eigen/Dense>
@@ -27,7 +28,7 @@
 namespace po = boost::program_options;
 using namespace std;
 
-#define kDIMENSIONS 8
+#define kDIMENSIONS 110
 typedef Eigen::Matrix<float, kDIMENSIONS, 1> RVector;
 typedef Eigen::Matrix<float, 1, kDIMENSIONS> RTVector;
 typedef Eigen::Matrix<float, kDIMENSIONS, kDIMENSIONS> TMatrix;
@@ -38,8 +39,9 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   opts.add_options()
         ("input,i",po::value<string>(),"Input file")
         ("iterations,I",po::value<unsigned>()->default_value(1000),"Number of iterations of training")
+        ("regularization_strength,C",po::value<float>()->default_value(0.1),"L2 regularization strength (0 for no regularization)")
         ("eta,e", po::value<float>()->default_value(0.1f), "Eta for SGD")
-        ("random_seed", po::value<unsigned>(), "Random seed")
+        ("random_seed,s", po::value<unsigned>(), "Random seed")
         ("diagonal_tension,T", po::value<double>()->default_value(4.0), "How sharp or flat around the diagonal is the alignment distribution (0 = uniform, >0 sharpens)")
         ("testset,x", po::value<string>(), "After training completes, compute the log likelihood of this set of sentence pairs under the learned model");
   po::options_description clo("Command line options");
@@ -67,6 +69,7 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
 
 void Normalize(RVector* v) {
   float norm = v->norm();
+  assert(norm > 0.0f);
   *v /= norm;
 }
 
@@ -74,21 +77,42 @@ void Flatten(const TMatrix& m, vector<double>* v) {
   unsigned c = 0;
   v->resize(kDIMENSIONS * kDIMENSIONS);
   for (unsigned i = 0; i < kDIMENSIONS; ++i)
-    for (unsigned j = 0; j < kDIMENSIONS; ++j)
+    for (unsigned j = 0; j < kDIMENSIONS; ++j) {
+      assert(boost::math::isnormal(m(i, j)));
       (*v)[c++] = m(i,j);
+    }
 }
 
 void Unflatten(const vector<double>& v, TMatrix* m) {
   unsigned c = 0;
   for (unsigned i = 0; i < kDIMENSIONS; ++i)
-    for (unsigned j = 0; j < kDIMENSIONS; ++j)
+    for (unsigned j = 0; j < kDIMENSIONS; ++j) {
+      assert(boost::math::isnormal(v[c]));
       (*m)(i, j) = v[c++];
+    }
+}
+
+double ApplyRegularization(const double C,
+                           const vector<double>& weights,
+                           vector<double>* g) {
+  assert(weights.size() == g->size());
+  double reg = 0;
+  for (size_t i = 0; i < weights.size(); ++i) {
+    const double& w_i = weights[i];
+    double& g_i = (*g)[i];
+    reg += C * w_i * w_i;
+    g_i += 2 * C * w_i;
+  }
+  return reg;
 }
 
 int main(int argc, char** argv) {
   po::variables_map conf;
   if (!InitCommandLine(argc, argv, &conf)) return 1;
   const string fname = conf["input"].as<string>();
+  const float reg_strength = conf["regularization_strength"].as<float>();
+  const bool has_l2 = reg_strength;
+  assert(reg_strength >= 0.0f);
   const int ITERATIONS = conf["iterations"].as<unsigned>();
   const float eta = conf["eta"].as<float>();
   const double diagonal_tension = conf["diagonal_tension"].as<double>();
@@ -147,7 +171,7 @@ int main(int argc, char** argv) {
     cerr << "Random seed: " << seed << endl;
     srand(seed);
   }
-  TMatrix t = TMatrix::Random() / 1024.0;
+  TMatrix t = TMatrix::Random() / 50.0;
   for (unsigned i = 1; i < r_trg.size(); ++i) {
     r_trg[i] = RVector::Random();
     r_src[i] = RVector::Random();
@@ -159,7 +183,7 @@ int main(int argc, char** argv) {
   vector<set<unsigned> > trg_pos(TD::NumWords() + 1);
 
   // do optimization
-  TMatrix g;
+  TMatrix g = TMatrix::Zero();
   vector<TMatrix> exp_src;
   vector<double> z_src;
   vector<double> flat_g, flat_t;
@@ -265,11 +289,19 @@ int main(int argc, char** argv) {
     const double base2_likelihood = likelihood / log(2);
     cerr << "  log_e likelihood: " << likelihood << endl;
     cerr << "  log_2 likelihood: " << base2_likelihood << endl;
-    cerr << "   cross entropy: " << (-base2_likelihood / denom) << endl;
-    cerr << "      perplexity: " << pow(2.0, -base2_likelihood / denom) << endl;
+    cerr << "     cross entropy: " << (-base2_likelihood / denom) << endl;
+    cerr << "        perplexity: " << pow(2.0, -base2_likelihood / denom) << endl;
     if (!SGD) {
       Flatten(g, &flat_g);
-      lbfgs.Optimize(-likelihood, flat_g, &flat_t);
+      double obj = -likelihood;
+      if (has_l2) {
+        const double r = ApplyRegularization(reg_strength,
+                                             flat_t,
+                                             &flat_g);
+        obj += r;
+        cerr << "    regularization: " << r << endl;
+      }
+      lbfgs.Optimize(obj, flat_g, &flat_t);
       Unflatten(flat_t, &t);
       if (lbfgs.HasConverged()) break;
     }
-- 
cgit v1.2.3


From 2faca3e7b3b8e4eba6c036c635a5b23883e72337 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Fri, 24 Feb 2012 00:47:48 -0500
Subject: load embeddings from file

---
 training/lbl_model.cc | 69 ++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 63 insertions(+), 6 deletions(-)

(limited to 'training/lbl_model.cc')

diff --git a/training/lbl_model.cc b/training/lbl_model.cc
index a114bba7..2af848b5 100644
--- a/training/lbl_model.cc
+++ b/training/lbl_model.cc
@@ -28,7 +28,7 @@
 namespace po = boost::program_options;
 using namespace std;
 
-#define kDIMENSIONS 110
+#define kDIMENSIONS 100
 typedef Eigen::Matrix<float, kDIMENSIONS, 1> RVector;
 typedef Eigen::Matrix<float, 1, kDIMENSIONS> RTVector;
 typedef Eigen::Matrix<float, kDIMENSIONS, kDIMENSIONS> TMatrix;
@@ -40,7 +40,9 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
         ("input,i",po::value<string>(),"Input file")
         ("iterations,I",po::value<unsigned>()->default_value(1000),"Number of iterations of training")
         ("regularization_strength,C",po::value<float>()->default_value(0.1),"L2 regularization strength (0 for no regularization)")
-        ("eta,e", po::value<float>()->default_value(0.1f), "Eta for SGD")
+        ("eta", po::value<float>()->default_value(0.1f), "Eta for SGD")
+        ("source_embeddings,f", po::value<string>(), "File containing source embeddings (if unset, random vectors will be used)")
+        ("target_embeddings,e", po::value<string>(), "File containing target embeddings (if unset, random vectors will be used)")
         ("random_seed,s", po::value<unsigned>(), "Random seed")
         ("diagonal_tension,T", po::value<double>()->default_value(4.0), "How sharp or flat around the diagonal is the alignment distribution (0 = uniform, >0 sharpens)")
         ("testset,x", po::value<string>(), "After training completes, compute the log likelihood of this set of sentence pairs under the learned model");
@@ -106,6 +108,59 @@ double ApplyRegularization(const double C,
   return reg;
 }
 
+void LoadEmbeddings(const string& filename, vector<RVector>* pv) {
+  vector<RVector>& v = *pv;
+  cerr << "Reading embeddings from " << filename << " ...\n";
+  ReadFile rf(filename);
+  istream& in = *rf.stream();
+  string line;
+  unsigned lc = 0;
+  while(getline(in, line)) {
+    ++lc;
+    size_t cur = line.find(' ');
+    if (cur == string::npos || cur == 0) {
+      cerr << "Parse error reading line " << lc << ":\n" << line << endl;
+      abort();
+    }
+    WordID w = TD::Convert(line.substr(0, cur));
+    if (w >= v.size()) continue;
+    RVector& curv = v[w];
+    line[cur] = 0;
+    size_t start = cur + 1;
+    cur = start + 1;
+    size_t c = 0;
+    while(cur < line.size()) {
+      if (line[cur] == ' ') {
+        line[cur] = 0;
+        curv[c++] = strtod(&line[start], NULL);
+        start = cur + 1;
+        cur = start;
+        if (c == kDIMENSIONS) break;
+      }
+      ++cur;
+    }
+    if (c < kDIMENSIONS && cur != start) {
+      if (cur < line.size()) line[cur] = 0;
+      curv[c++] = strtod(&line[start], NULL);
+    }
+    if (c != kDIMENSIONS) {
+      static bool first = true;
+      if (first) {
+        cerr << " read " << c << " dimensions from embedding file, but built with " << kDIMENSIONS << " (filling in with random values)\n";
+        first = false;
+      }
+      for (; c < kDIMENSIONS; ++c) curv[c] = rand();
+    }
+    if (c == kDIMENSIONS && cur != line.size()) {
+      static bool first = true;
+      if (first) {
+        cerr << " embedding file contains more dimensions than configured with, truncating.\n";
+        first = false;
+      }
+    }
+  }
+}
+
 int main(int argc, char** argv) {
   po::variables_map conf;
   if (!InitCommandLine(argc, argv, &conf)) return 1;
@@ -175,11 +230,11 @@ int main(int argc, char** argv) {
   for (unsigned i = 1; i < r_trg.size(); ++i) {
     r_trg[i] = RVector::Random();
     r_src[i] = RVector::Random();
-    r_trg[i][i % kDIMENSIONS] = 0.5;
-    r_src[i][(i-1) % kDIMENSIONS] = 0.5;
-    Normalize(&r_trg[i]);
-    Normalize(&r_src[i]);
   }
+  if (conf.count("source_embeddings"))
+    LoadEmbeddings(conf["source_embeddings"].as<string>(), &r_src);
+  if (conf.count("target_embeddings"))
+    LoadEmbeddings(conf["target_embeddings"].as<string>(), &r_trg);
   vector<set<unsigned> > trg_pos(TD::NumWords() + 1);
 
   // do optimization
@@ -242,6 +297,8 @@ int main(int argc, char** argv) {
 
       // model expectations for a single target generation with
       // uniform alignment prior
+      // TODO: when using a non-uniform alignment, m_exp will be
+      // a function of j (below)
       double m_z = 0;
       TMatrix m_exp = TMatrix::Zero();
       for (unsigned i = 0; i < src.size(); ++i) {
-- 
cgit v1.2.3


From 54bcfb835232d190a5ab6f0bd825de8a50dae126 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Wed, 29 Feb 2012 01:12:40 -0500
Subject: cleanup, mpi-ify lblmodel

---
 training/lbl_model.cc      | 179 +++++++-------
 utils/agenda.h             | 140 -----------
 utils/best.h               |  32 ---
 utils/corpus_tools.cc      |  62 +++++
 utils/corpus_tools.h       |  19 ++
 utils/d_ary_heap.h         | 568 ---------------------------------------------
 utils/ftoa.h               | 403 --------------------------------
 utils/int_or_pointer.h     |  70 ------
 utils/intern_pool.h        | 158 -------------
 utils/lvalue_pmap.h        |  31 ---
 utils/max_plus.h           | 201 ----------------
 utils/maybe_update_bound.h |  17 --
 utils/nan.h                |  42 ----
 utils/string_to.h          | 314 -------------------------
 14 files changed, 178 insertions(+), 2058 deletions(-)
 delete mode 100644 utils/agenda.h
 delete mode 100644 utils/best.h
 create mode 100644 utils/corpus_tools.cc
 create mode 100644 utils/corpus_tools.h
 delete mode 100644 utils/d_ary_heap.h
 delete mode 100644 utils/ftoa.h
 delete mode 100644 utils/int_or_pointer.h
 delete mode 100644 utils/intern_pool.h
 delete mode 100644 utils/lvalue_pmap.h
 delete mode 100644 utils/max_plus.h
 delete mode 100644 utils/maybe_update_bound.h
 delete mode 100644 utils/nan.h
 delete mode 100644 utils/string_to.h

(limited to 'training/lbl_model.cc')

diff --git a/training/lbl_model.cc b/training/lbl_model.cc
index 2af848b5..def5075a 100644
--- a/training/lbl_model.cc
+++ b/training/lbl_model.cc
@@ -12,11 +12,17 @@
 #include <cstring> // memset
 #include <ctime>
 
+#ifdef HAVE_MPI
+#include <boost/mpi/timer.hpp>
+#include <boost/mpi.hpp>
+namespace mpi = boost::mpi;
+#endif
 #include <boost/math/special_functions/fpclassify.hpp>
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 #include <Eigen/Dense>
 
+#include "corpus_tools.h"
 #include "optimize.h"
 #include "array2d.h"
 #include "m.h"
@@ -29,9 +35,9 @@ namespace po = boost::program_options;
 using namespace std;
 
 #define kDIMENSIONS 100
-typedef Eigen::Matrix<float, kDIMENSIONS, 1> RVector;
-typedef Eigen::Matrix<float, 1, kDIMENSIONS> RTVector;
-typedef Eigen::Matrix<float, kDIMENSIONS, kDIMENSIONS> TMatrix;
+typedef Eigen::Matrix<double, kDIMENSIONS, 1> RVector;
+typedef Eigen::Matrix<double, 1, kDIMENSIONS> RTVector;
+typedef Eigen::Matrix<double, kDIMENSIONS, kDIMENSIONS> TMatrix;
 vector<RVector> r_src, r_trg;
 
 bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
@@ -39,8 +45,8 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   opts.add_options()
         ("input,i",po::value<string>(),"Input file")
         ("iterations,I",po::value<unsigned>()->default_value(1000),"Number of iterations of training")
-        ("regularization_strength,C",po::value<float>()->default_value(0.1),"L2 regularization strength (0 for no regularization)")
-        ("eta", po::value<float>()->default_value(0.1f), "Eta for SGD")
+        ("regularization_strength,C",po::value<double>()->default_value(0.1),"L2 regularization strength (0 for no regularization)")
+        ("eta", po::value<double>()->default_value(0.1f), "Eta for SGD")
         ("source_embeddings,f", po::value<string>(), "File containing source embeddings (if unset, random vectors will be used)")
         ("target_embeddings,e", po::value<string>(), "File containing target embeddings (if unset, random vectors will be used)")
         ("random_seed,s", po::value<unsigned>(), "Random seed")
@@ -70,7 +76,7 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
 }
 
 void Normalize(RVector* v) {
-  float norm = v->norm();
+  double norm = v->norm();
   assert(norm > 0.0f);
   *v /= norm;
 }
@@ -80,7 +86,7 @@ void Flatten(const TMatrix& m, vector<double>* v) {
   v->resize(kDIMENSIONS * kDIMENSIONS);
   for (unsigned i = 0; i < kDIMENSIONS; ++i)
     for (unsigned j = 0; j < kDIMENSIONS; ++j) {
-      assert(boost::math::isnormal(m(i, j)));
+      assert(boost::math::isfinite(m(i, j)));
       (*v)[c++] = m(i,j);
     }
 }
@@ -89,7 +95,7 @@ void Unflatten(const vector<double>& v, TMatrix* m) {
   unsigned c = 0;
   for (unsigned i = 0; i < kDIMENSIONS; ++i)
     for (unsigned j = 0; j < kDIMENSIONS; ++j) {
-      assert(boost::math::isnormal(v[c]));
+      assert(boost::math::isfinite(v[c]));
       (*m)(i, j) = v[c++];
     }
 }
@@ -162,14 +168,25 @@ void LoadEmbeddings(const string& filename, vector<RVector>* pv) {
 }
 
 int main(int argc, char** argv) {
+#ifdef HAVE_MPI
+  std::cerr << "**MPI enabled.\n";
+  mpi::environment env(argc, argv);
+  mpi::communicator world;
+  const int size = world.size(); 
+  const int rank = world.rank();
+#else
+  std::cerr << "**MPI disabled.\n";
+  const int rank = 0;
+  const int size = 1;
+#endif
   po::variables_map conf;
   if (!InitCommandLine(argc, argv, &conf)) return 1;
   const string fname = conf["input"].as<string>();
-  const float reg_strength = conf["regularization_strength"].as<float>();
+  const double reg_strength = conf["regularization_strength"].as<double>();
   const bool has_l2 = reg_strength;
   assert(reg_strength >= 0.0f);
   const int ITERATIONS = conf["iterations"].as<unsigned>();
-  const float eta = conf["eta"].as<float>();
+  const double eta = conf["eta"].as<double>();
   const double diagonal_tension = conf["diagonal_tension"].as<double>();
   bool SGD = false;
   if (diagonal_tension < 0.0) {
@@ -181,61 +198,44 @@ int main(int argc, char** argv) {
 
   unsigned lc = 0;
   vector<double> unnormed_a_i;
-  string line;
-  string ssrc, strg;
   bool flag = false;
-  Lattice src, trg;
+  vector<vector<WordID> > srcs, trgs;
   vector<WordID> vocab_e;
-  { // read through corpus, initialize int map, check lines are good
-    set<WordID> svocab_e;
-    cerr << "INITIAL READ OF " << fname << endl;
-    ReadFile rf(fname);
-    istream& in = *rf.stream();
-    while(getline(in, line)) {
-      ++lc;
-      if (lc % 1000 == 0) { cerr << '.'; flag = true; }
-      if (lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; }
-      ParseTranslatorInput(line, &ssrc, &strg);
-      LatticeTools::ConvertTextToLattice(ssrc, &src);
-      LatticeTools::ConvertTextToLattice(strg, &trg);
-      if (src.size() == 0 || trg.size() == 0) {
-        cerr << "Error: " << lc << "\n" << line << endl;
-        assert(src.size() > 0);
-        assert(trg.size() > 0);
-      }
-      if (src.size() > unnormed_a_i.size())
-        unnormed_a_i.resize(src.size());
-      for (unsigned i = 0; i < trg.size(); ++i) {
-        assert(trg[i].size() == 1);
-        svocab_e.insert(trg[i][0].label);
-      }
-    }
+  {
+    set<WordID> svocab_e, svocab_f;
+    CorpusTools::ReadFromFile(fname, &srcs, NULL, &trgs, &svocab_e, rank, size);
     copy(svocab_e.begin(), svocab_e.end(), back_inserter(vocab_e));
   }
-  if (flag) cerr << endl;
   cerr << "Number of target word types: " << vocab_e.size() << endl;
-  const float num_examples = lc;
+  const double num_examples = lc;
 
-  LBFGSOptimizer lbfgs(kDIMENSIONS * kDIMENSIONS, 100);
+  boost::shared_ptr<LBFGSOptimizer> lbfgs;
+  if (rank == 0)
+    lbfgs.reset(new LBFGSOptimizer(kDIMENSIONS * kDIMENSIONS, 100));
   r_trg.resize(TD::NumWords() + 1);
   r_src.resize(TD::NumWords() + 1);
+  vector<set<unsigned> > trg_pos(TD::NumWords() + 1);
+
   if (conf.count("random_seed")) {
     srand(conf["random_seed"].as<unsigned>());
   } else {
-    unsigned seed = time(NULL);
+    unsigned seed = time(NULL) + rank * 100;
     cerr << "Random seed: " << seed << endl;
     srand(seed);
   }
-  TMatrix t = TMatrix::Random() / 50.0;
-  for (unsigned i = 1; i < r_trg.size(); ++i) {
-    r_trg[i] = RVector::Random();
-    r_src[i] = RVector::Random();
+  
+  TMatrix t;
+  if (rank == 0) {
+    t = TMatrix::Random() / 50.0;
+    for (unsigned i = 1; i < r_trg.size(); ++i) {
+      r_trg[i] = RVector::Random();
+      r_src[i] = RVector::Random();
+    }
+    if (conf.count("source_embeddings"))
+      LoadEmbeddings(conf["source_embeddings"].as<string>(), &r_src);
+    if (conf.count("target_embeddings"))
+      LoadEmbeddings(conf["target_embeddings"].as<string>(), &r_trg);
   }
-  if (conf.count("source_embeddings"))
-    LoadEmbeddings(conf["source_embeddings"].as<string>(), &r_src);
-  if (conf.count("target_embeddings"))
-    LoadEmbeddings(conf["target_embeddings"].as<string>(), &r_trg);
-  vector<set<unsigned> > trg_pos(TD::NumWords() + 1);
 
   // do optimization
   TMatrix g = TMatrix::Zero();
@@ -243,22 +243,25 @@ int main(int argc, char** argv) {
   vector<double> z_src;
   vector<double> flat_g, flat_t;
   Flatten(t, &flat_t);
-  for (int iter = 0; iter < ITERATIONS; ++iter) {
+  bool converged = false;
+  // TODO broadcast embeddings
+  for (int iter = 0; !converged && iter < ITERATIONS; ++iter) {
+#ifdef HAVE_MPI
+    mpi::broadcast(world, &flat_t[0], flat_t.size(), 0);
+#endif
+    Unflatten(flat_t, &t);
     cerr << "ITERATION " << (iter + 1) << endl;
-    ReadFile rf(fname);
-    istream& in = *rf.stream();
     double likelihood = 0;
     double denom = 0.0;
     lc = 0;
     flag = false;
     g *= 0;
-    while(getline(in, line)) {
+    for (unsigned i = 0; i < srcs.size(); ++i) {
+      const vector<WordID>& src = srcs[i];
+      const vector<WordID>& trg = trgs[i];
       ++lc;
-      if (lc % 1000 == 0) { cerr << '.'; flag = true; }
-      if (lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; }
-      ParseTranslatorInput(line, &ssrc, &strg);
-      LatticeTools::ConvertTextToLattice(ssrc, &src);
-      LatticeTools::ConvertTextToLattice(strg, &trg);
+      if (rank == 0 && lc % 1000 == 0) { cerr << '.'; flag = true; }
+      if (rank == 0 && lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; }
       denom += trg.size();
 
       exp_src.clear(); exp_src.resize(src.size(), TMatrix::Zero());
@@ -266,10 +269,10 @@ int main(int argc, char** argv) {
       Array2D<TMatrix> exp_refs(src.size(), trg.size(), TMatrix::Zero());
       Array2D<double> z_refs(src.size(), trg.size(), 0.0);
       for (unsigned j = 0; j < trg.size(); ++j)
-        trg_pos[trg[j][0].label].insert(j);
+        trg_pos[trg[j]].insert(j);
 
       for (unsigned i = 0; i < src.size(); ++i) {
-        const RVector& r_s = r_src[src[i][0].label];
+        const RVector& r_s = r_src[src[i]];
         const RTVector pred = r_s.transpose() * t;
         TMatrix& exp_m = exp_src[i];
         double& z = z_src[i];
@@ -293,7 +296,7 @@ int main(int argc, char** argv) {
         }
       }
       for (unsigned j = 0; j < trg.size(); ++j)
-        trg_pos[trg[j][0].label].clear();
+        trg_pos[trg[j]].clear();
 
       // model expectations for a single target generation with
       // uniform alignment prior
@@ -323,8 +326,8 @@ int main(int argc, char** argv) {
           // TODO handle alignment prob
         }
         if (ref_z <= 0) { 
-          cerr << "TRG=" << TD::Convert(trg[j][0].label) << endl;
-          cerr << " LINE=" << line << endl;
+          cerr << "TRG=" << TD::Convert(trg[j]) << endl;
+          cerr << " LINE=" << lc << " (RANK=" << rank << "/" << size << ")" << endl;
           cerr << " REF_EXP=\n" << ref_exp << endl;
           cerr << " M_EXP=\n" << m_exp << endl;
           abort();
@@ -339,30 +342,42 @@ int main(int argc, char** argv) {
         }
       }
       
-      if (iter == (ITERATIONS - 1) || lc == 28) { cerr << al << endl; }
+      if (rank == 0 && (iter == (ITERATIONS - 1) || lc < 12)) { cerr << al << endl; }
     }
-    if (flag) { cerr << endl; }
+    if (flag && rank == 0) { cerr << endl; }
 
-    const double base2_likelihood = likelihood / log(2);
-    cerr << "  log_e likelihood: " << likelihood << endl;
-    cerr << "  log_2 likelihood: " << base2_likelihood << endl;
-    cerr << "     cross entropy: " << (-base2_likelihood / denom) << endl;
-    cerr << "        perplexity: " << pow(2.0, -base2_likelihood / denom) << endl;
+    double obj = 0;
     if (!SGD) {
       Flatten(g, &flat_g);
-      double obj = -likelihood;
-      if (has_l2) {
-        const double r = ApplyRegularization(reg_strength,
-                                             flat_t,
-                                             &flat_g);
-        obj += r;
-        cerr << "    regularization: " << r << endl;
+      obj = -likelihood;
+      // TODO - reduce gradient
+    }
+
+    if (rank == 0) {
+      double gn = 0;
+      for (unsigned i = 0; i < flat_g.size(); ++i)
+        gn += flat_g[i]*flat_g[i];
+      const double base2_likelihood = likelihood / log(2);
+      cerr << "  log_e likelihood: " << likelihood << endl;
+      cerr << "  log_2 likelihood: " << base2_likelihood << endl;
+      cerr << "     cross entropy: " << (-base2_likelihood / denom) << endl;
+      cerr << "        perplexity: " << pow(2.0, -base2_likelihood / denom) << endl;
+      cerr << "     gradient norm: " << sqrt(gn) << endl;
+      if (!SGD) {
+        if (has_l2) {
+          const double r = ApplyRegularization(reg_strength,
+                                               flat_t,
+                                               &flat_g);
+          obj += r;
+          cerr << "    regularization: " << r << endl;
+        }
+        lbfgs->Optimize(obj, flat_g, &flat_t);
+        converged = (lbfgs->HasConverged());
       }
-      lbfgs.Optimize(obj, flat_g, &flat_t);
-      Unflatten(flat_t, &t);
-      if (lbfgs.HasConverged()) break;
     }
-    cerr << t << endl;
+#ifdef HAVE_MPI
+    mpi::broadcast(world, converged, 0);
+#endif
   }
   cerr << "TRANSLATION MATRIX:" << endl << t << endl;
   return 0;
diff --git a/utils/agenda.h b/utils/agenda.h
deleted file mode 100644
index d4f13696..00000000
--- a/utils/agenda.h
+++ /dev/null
@@ -1,140 +0,0 @@
-#ifndef AGENDA_H
-#define AGENDA_H
-
-#define DBG_AGENDA(x) x
-/*
-  a priority queue where you expect to queue the same item at different
-  priorities several times before finally popping it.  higher priority = better.
-  so in best first you'd be using negative cost or e^-cost (probabilities, in
-  other words).
-
-  this means you have a way to look up a key and see its location in the queue,
-  so its priority can be adjusted (or, simpler implementation: so when you pop,
-  you see if you've already popped before at a lower cost, and skip the
-  subsequent pops).
-
-  it's assumed that you'll never queue an item @ a better priority after it has
-  already been popped.  that is, the agenda will track already completed items.
-  maybe in the future i will let you recompute a cheaper way to reach things
-  after first-pop also, it's assumed that we're always improving prios of
-  existing items, never making them worse (even though technically this is
-  possible and sensible if it hasn't been popped yet).
-
-  simple binary max heap for now.  there are better practical options w/
-  superior cache locaility.  movements in the heap need to update a record for
-  that key of where the key went.  i do this by creating canonical key pointers
-  out of boost object pools (if the key were lightweight e.g. an int, then it
-  would make sense to use the hash lookup too
-
-  since i'm doing key hashing to start with, i also allow you to attach some
-  arbitrary data (value) payload beyond key+priority.
-
-  hash map from key to done (has been popped) -> set where doneness is marked in key item?
-
-  a slightly different way to make an adjustable heap would be to use
-  tree-structured parent/children links intrusively (or mapped by key) in the
-  key, rather than indices in a compact binary-tree heap
-
- */
-
-#include "best.h"
-#include "intern_pool.h"
-#include "d_ary_heap.h"
-#include "lvalue_pmap.h"
-#include <vector>
-#include <functional>
-
-/*
-template <class P>
-struct priority_traits {
-  typedef typename P::priority_type priority_type;
-};
-*/
-
-typedef best_t agenda_best_t;
-typedef unsigned agenda_location_t;
-
-PMAP_MEMBER_INDIRECT(LocationMap,agenda_location_t,location)
-PMAP_MEMBER_INDIRECT(PriorityMap,agenda_best_t,priority)
-
-struct Less {
-  typedef bool result_type;
-  template <class A,class B>
-  bool operator()(A const& a,B const& b) const { return a<b; }
-};
-
-// LocMap and PrioMap are boost property maps put(locmap,key,size_t), Better(get(priomap,k1),get(priomap,k2)) means k1 should be above k2 (be popped first).  Locmap and PrioMap may have state; the rest are assumed stateless functors
-// make sure the (default) location is not -1 for anything you add, or else an assertion may trigger
-template <class Item,class Better=Less, /* intern_pool args */ class KeyF=get_key<Item>,class HashKey=boost::hash<typename KeyF::result_type>,class EqKey=std::equal_to<typename KeyF::result_type>, class Pool=boost::object_pool<Item> >
-struct Agenda : intern_pool<Item,KeyF,HashKey,EqKey,Pool> {
-  typedef intern_pool<Item,KeyF,HashKey,EqKey,Pool> Intern; // inherited because I want to use construct()
-  /* this is less generic than it could be, because I want to use a single hash mapping to intern to canonical mutable object pointers, where the property maps are just lvalue accessors */
-  typedef typename KeyF::result_type Key;
-  typedef Item * Handle;
-  typedef LocationMap<Handle> LocMap;
-  typedef PriorityMap<Handle> PrioMap;
-  LocMap locmap;
-  PrioMap priomap; // note: priomap[item] is set by caller before giving us the item; then tracks best (for canonicalized item) thereafter
-
-  Better better;
-  //NOT NEEDED: initialize function object state (there is none)
-
-  typedef Item *ItemC; //canonicalized pointer
-  typedef Item *ItemP;
-  static const std::size_t heap_arity=4; // might be fastest possible (depends on key size probably - cache locality is bad w/ arity=2)
-  typedef std::vector<ItemC> HeapStorage;
-  typedef d_ary_heap_indirect<Handle,heap_arity,LocMap,PrioMap,Better,HeapStorage,agenda_location_t> Heap;
-  Heap q;
-
-  // please don't call q.push etc. directly.
-  void add(ItemP i) {
-    bool fresh=interneq(i);
-    DBG_AGENDA(assert(fresh && !q.contains(i)));
-    q.push(i);
-  }
-  bool improve(ItemP i) {
-    ItemP c=i;
-    bool fresh=interneq(c);
-    if (fresh) {
-      add(c);
-      return true;
-    }
-    DBG_AGENDA(assert(q.contains(c)));
-    return q.maybe_improve(priomap[i]);
-  }
-  inline bool empty() {
-    return q.empty();
-  }
-  // no need to destroy the canon. item because we want to remember the best cost and reject more expensive ways of using it).
-  ItemC pop() {
-    ItemC r=q.top();
-    q.pop();
-    return r;
-  }
-  void pop_discard() {
-    q.pop();
-  }
-
-  ItemC top() {
-    DBG_AGENDA(assert(!empty()));
-    return q.top();
-  }
-
-  agenda_best_t best() const {
-    return q.best(); //TODO: cache/track the global best?
-  }
-
-  agenda_best_t second_best() const {
-    return q.second_best();
-  }
-
-  // add only if worse than queue current best, otherwise evaluate immediately (e.g. for early stopping w/ expensive to compute additional cost).  return true if postponed (added)
-  bool postpone(ItemP i) {
-    if (better(priomap[i],best())) return false;
-    return improve(i);
-  }
-
-  Agenda(unsigned reserve=1000000,LocMap const& lm=LocMap(),PrioMap const& pm=PrioMap(),EqKey const& eq=EqKey(),Better const& better=Better()) : locmap(lm), priomap(pm), better(better), q(priomap,locmap,better,reserve) {  }
-};
-
-#endif
diff --git a/utils/best.h b/utils/best.h
deleted file mode 100644
index ed15e0be..00000000
--- a/utils/best.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef UTILS__BEST_H
-#define UTILS__BEST_H
-
-#include "max_plus.h"
-
-typedef MaxPlus<double> best_t;
-
-inline bool better(best_t const& a,best_t const& b) {
-  return a.v_>b.v_; // intentionally reversed, so default min-heap, sort, etc. put best first.
-}
-
-inline bool operator <(best_t const& a,best_t const& b) {
-  return a.v_>b.v_; // intentionally reversed, so default min-heap, sort, etc. put best first.
-}
-struct BetterP {
-  inline bool operator ()(best_t const& a,best_t const& b) const {
-    return a.v_>b.v_; // intentionally reversed, so default min-heap, sort, etc. put best first.
-  }
-};
-
-inline void maybe_improve(best_t &a,best_t const& b) {
-  if (a.v_>b.v_)
-    a.v_=b.v_;
-}
-
-template <class O>
-inline void maybe_improve(best_t &a,O const& b) {
-  if (a.v_>b.v_)
-    a.v_=b.v_;
-}
-
-#endif
diff --git a/utils/corpus_tools.cc b/utils/corpus_tools.cc
new file mode 100644
index 00000000..a0542b6e
--- /dev/null
+++ b/utils/corpus_tools.cc
@@ -0,0 +1,62 @@
+#include "corpus_tools.h"
+
+#include <iostream>
+
+#include "tdict.h"
+#include "filelib.h"
+#include "verbose.h"
+
+using namespace std;
+
+void CorpusTools::ReadFromFile(const string& filename,
+                           vector<vector<WordID> >* src,
+                           set<WordID>* src_vocab,
+                           vector<vector<WordID> >* trg,
+                           set<WordID>* trg_vocab,
+                           int rank,
+                           int size) {
+  assert(rank >= 0);
+  assert(size > 0);
+  assert(rank < size);
+  if (src) src->clear();
+  if (src_vocab) src_vocab->clear();
+  if (trg) trg->clear();
+  if (trg_vocab) trg_vocab->clear();
+  const int expected_fields = 1 + (trg == NULL ? 0 : 1);
+  if (!SILENT) cerr << "Reading from " << filename << " ...\n";
+  ReadFile rf(filename);
+  istream& in = *rf.stream();
+  string line;
+  int lc = 0;
+  static const WordID kDIV = TD::Convert("|||");
+  vector<WordID> tmp;
+  while(getline(in, line)) {
+    const bool skip = (lc % size != rank);
+    ++lc;
+    if (skip) continue;
+    TD::ConvertSentence(line, &tmp);
+    src->push_back(vector<WordID>());
+    vector<WordID>* d = &src->back();
+    set<WordID>* v = src_vocab;
+    int s = 0;
+    for (unsigned i = 0; i < tmp.size(); ++i) {
+      if (tmp[i] == kDIV) {
+        ++s;
+        if (s > 1) { cerr << "Unexpected format in line " << lc << ": " << line << endl; abort(); }
+        assert(trg);
+        trg->push_back(vector<WordID>());
+        d = &trg->back();
+        v = trg_vocab;
+      } else {
+        d->push_back(tmp[i]);
+        if (v) v->insert(tmp[i]);
+      }
+    }
+    ++s;
+    if (expected_fields != s) {
+      cerr << "Wrong number of fields in line " << lc << ": " << line << endl; abort();
+    }
+  }
+}
+
+
diff --git a/utils/corpus_tools.h b/utils/corpus_tools.h
new file mode 100644
index 00000000..97bdaa94
--- /dev/null
+++ b/utils/corpus_tools.h
@@ -0,0 +1,19 @@
+#ifndef _CORPUS_TOOLS_H_
+#define _CORPUS_TOOLS_H_
+
+#include <string>
+#include <set>
+#include <vector>
+#include "wordid.h"
+
+struct CorpusTools {
+  static void ReadFromFile(const std::string& filename,
+                           std::vector<std::vector<WordID> >* src,
+                           std::set<WordID>* src_vocab = NULL,
+                           std::vector<std::vector<WordID> >* trg = NULL,
+                           std::set<WordID>* trg_vocab = NULL,
+                           int rank = 0,
+                           int size = 1);
+};
+
+#endif
diff --git a/utils/d_ary_heap.h b/utils/d_ary_heap.h
deleted file mode 100644
index 1270638a..00000000
--- a/utils/d_ary_heap.h
+++ /dev/null
@@ -1,568 +0,0 @@
-#ifndef D_ARY_HEAP_H
-#define D_ARY_HEAP_H
-
-#include "show.h"
-#define DDARY(x)
-
-#define D_ARY_PUSH_GRAEHL 0 // untested
-#define D_ARY_POP_GRAEHL 0 // untested
-#define D_ARY_DOWN_GRAEHL 0 // untested
-#define D_ARY_UP_GRAEHL 0 // untested
-#define D_ARY_APPEND_ALWAYS_PUSH 1 // heapify (0) is untested.  otherwise switch between push and heapify depending on size (cache effects, existing items vs. # appended ones)
-
-#define D_ARY_TRACK_OUT_OF_HEAP 0 // shouldn't need to track, because in contains() false positives looking up stale or random loc map values are impossible - we just check key.  note: if you enable this, you must init location to D_ARY_HEAP_NULL_INDEX yourself until it's been added or popped
-#define D_ARY_VERIFY_HEAP 1
-// This is a very expensive test so it should be disabled even when NDEBUG is not defined
-
-# undef D_ARY_HEAP_NULL_INDEX
-# define D_ARY_HEAP_NULL_INDEX (-1) // you may init location to this.
-
-/* adapted from boost/graph/detail/d_ary_heap.hpp
-
-  local modifications:
-
-  clear, heapify, append range/container, Size type template arg, reserve constructor arg
-
-  hole+move rather than swap.  note: swap would be more efficient for heavyweight keys, until move ctors exist
-
-  don't set locmap to -1 when removing from heap (waste of time)
-
-  // unlike arity=2 case, you don't gain anything by having indices start at 1, with 0-based child indices
-  // root @1, A=2, children indices m={0,1}: parent(i)=i/2, child(i,m)=2*i+m
-  // root @0: parent(i)=(i-1)/A child(i,n)=i*A+n+1 - can't improve on this except child(i,m)=i*A+m
-  (integer division, a/b=floor(a/b), so (i-1)/A = ceil(i/A)-1, or greatest int less than (i/A))
-
-  actually, no need to adjust child index, since child is called only once and inline
-
-  e.g. for A=3 gorn address in tree -> index
-
-  () = root -> 0
-  (1) -> 1
-  (2) -> 2
-  (3) (A) -> 3
-  (1,1) -> (1*A+1) = 4
-  (1,2) -> (1*A+2) = 5
-  (1,3) -> (1*A+3) = 6
-  (2,1) -> (2*A+1) = 7
-  etc.
-
-//TODO: block-align siblings!  assume data[0] is 16 or 32-byte aligned ... then we want root @ index (blocksize-1).  see http://www.lamarca.org/anthony/pubs/heaps.pdf pg8.  for pow2(e.g. 4)-ary heap, it may be reasonable to  use root @index A-1.  however, suppose the key size is not padded to a power of 2 (e.g. 12 bytes), then we would need internal gaps at times.  would want to use compile const template based inlineable alignment math for this?  possibly use a container like vector that lets you specify padding relative to some address multiple for v[0].
-
- optimal D: see http://www.lamarca.org/anthony/pubs/heaps.pdf pg 9.  depedns on relative cost of swap,compare, but in all cases except swap=free, 2 is worse than 3-4.  for expensive swap (3x compare), 4 still as good as 5.  so just use 4.  boost benchmarking djikstra agrees; 4 is best.
-
- cache-aligned 4-heap speedup over regular 2-heap is 10-80% (for huge heaps, the speedup is more)
-
- splay/skew heaps are worse than 2heap or aligned 4heap in practice.
-
- //TODO: switch from heapify (Floyd's method) to repeated push past some size limit (in bytes) due to cache effect -
- #define D_ARY_BYTES_OUT_OF_CACHE 0x1000000
-
- //TODO: assuming locmap is an lvalue pmap, we can be more efficient.  on the other hand, if it's an intrusive property map to an interned mutable object, there's no difference in performance, and that's what i'm going to do in my first uses.  plus, if keys are indices and the map is a vector, it's barely any overhead.
-
- */
-
-//
-//=======================================================================
-// Copyright 2009 Trustees of Indiana University
-// Authors: Jeremiah J. Willcock, Andrew Lumsdaine
-//
-// Distributed under the Boost Software License, Version 1.0. (See
-// accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
-//=======================================================================
-//
-
-#include <vector>
-#include <cstddef>
-#include <algorithm>
-#include <utility>
-#include <cassert>
-#include <boost/static_assert.hpp>
-#include <boost/shared_array.hpp>
-#include <boost/property_map/property_map.hpp>
-
-
-  // D-ary heap using an indirect compare operator (use identity_property_map
-  // as DistanceMap to get a direct compare operator).  This heap appears to be
-  // commonly used for Dijkstra's algorithm for its good practical performance
-  // on some platforms; asymptotically, it's not optimal; it has an O(lg N) decrease-key
-  // operation, which is (amortized) constant time on a relaxed heap or fibonacci heap.  The
-  // implementation is mostly based on the binary heap page on Wikipedia and
-  // online sources that state that the operations are the same for d-ary
-  // heaps.  This code is not based on the old Boost d-ary heap code.
-  //
-  // - d_ary_heap_indirect is a model of UpdatableQueue as is needed for
-  //   dijkstra_shortest_paths.
-  //
-  // - Value must model Assignable.
-  // - Arity must be at least 2 (optimal value appears to be 4, both in my and
-  //   third-party experiments).
-  // - IndexInHeapMap must be a ReadWritePropertyMap from Value to
-  //   Container::size_type (to store the index of each stored value within the
-  //   heap for decrease-key aka update).
-  // - DistanceMap must be a ReadablePropertyMap from Value to something
-  //   (typedef'ed as distance_type).
-  // - Compare must be a BinaryPredicate used as a less-than operator on
-  //   distance_type.
-  // - Container must be a random-access, contiguous container (in practice,
-  //   the operations used probably require that it is std::vector<Value>).
-  //
-  template <typename Value,
-            std::size_t Arity,
-            typename IndexInHeapPropertyMap,
-            typename DistanceMap,
-            typename Better = std::less<Value>,
-            typename Container = std::vector<Value>,
-            typename Size = typename Container::size_type,
-            typename Equal = std::equal_to<Value> >
-  class d_ary_heap_indirect {
-    BOOST_STATIC_ASSERT (Arity >= 2);
-    public:
-    typedef Container container_type;
-    typedef Size size_type;
-    typedef Value value_type;
-    typedef typename Container::const_iterator const_iterator;
-    typedef const_iterator iterator;
-    // The distances being compared using better and that are stored in the
-    // distance map
-    typedef typename boost::property_traits<DistanceMap>::value_type distance_type;
-    d_ary_heap_indirect(DistanceMap const& distance,
-                        IndexInHeapPropertyMap const& index_in_heap,
-                        const Better& better = Better(),
-                        size_type container_reserve = 100000,
-                        Equal const& equal = Equal()
-      )
-      : better(better), data(), distance(distance),
-        index_in_heap(index_in_heap),equal(equal) {
-      data.reserve(container_reserve);
-    }
-    /* Implicit copy constructor */
-    /* Implicit assignment operator */
-
-    template <class C>
-    void append_heapify(C const& c) {
-      data.reserve(data.size()+c.size());
-      append_heapify(c.begin(),c.end());
-    }
-
-    template <class I>
-    void append_heapify(I begin,I end) {
-      data.insert(data.end(),begin,end);
-      heapify();
-    }
-
-    template <class C>
-    void append_push(C const& c) {
-      data.reserve(data.size()+c.size());
-      append_push(c.begin(),c.end());
-    }
-
-    // past some threshold, this should be faster than append_heapify.  also, if there are many existing elements it will be faster.
-    template <class I>
-    void append_push(I begin,I end) {
-      for (;begin!=end;++begin)
-        push(*begin);
-    }
-
-    template <class C>
-    void append(C const& c) {
-      if (D_ARY_APPEND_ALWAYS_PUSH || data.size()>=c.size()/2)
-        append_push(c);
-      else
-        append_heapify(c);
-    }
-
-    // past some threshold, this should be faster than append_heapify.  also, if there are many existing elements it will be faster.
-    template <class I>
-    void append(I begin,I end) {
-      if (D_ARY_APPEND_ALWAYS_PUSH || data.size()>=0x10000)
-        append_push(begin,end);
-      else
-        append_heapify(begin,end);
-    }
-
-    // could allow mutation of data directly, e.g. push_back 1 at a time - but then they could forget to heapify()
-
-    //from bottom of heap tree up, turn that subtree into a heap by adjusting the root down
-    // for n=size, array elements indexed by floor(n/2) + 1, floor(n/2) + 2, ... , n are all leaves for the tree, thus each is an one-element heap already
-    // warning: this is many fewer instructions but, at some point (when heap doesn't fit in Lx cache) it will become slower than repeated push().
-    void heapify() {
-      for (size_type i=parent(data.size()-1);i>0;--i) // starting from parent of last node, ending at first child of root (i==1)
-        preserve_heap_property_down(i);
-    }
-
-    void reserve(size_type s) {
-      data.reserve(s);
-    }
-
-    size_type size() const {
-      return data.size();
-    }
-
-    bool empty() const {
-      return data.empty();
-    }
-
-    const_iterator begin() const {
-      return data.begin();
-    }
-
-    const_iterator end() const {
-      return data.end();
-    }
-
-    void clear() {
-#if D_ARY_TRACK_OUT_OF_HEAP
-      using boost::put;
-      for (typename Container::iterator i=data.begin(),e=data.end();i!=e;++i)
-        put(index_in_heap,*i,(size_type)D_ARY_HEAP_NULL_INDEX);
-#endif
-      data.clear();
-    }
-
-    void push(const Value& v) {
-      if (D_ARY_PUSH_GRAEHL) {
-        size_type i = data.size();
-        data.push_back(Value()); // (hoping default construct is cheap, construct-copy inline)
-        preserve_heap_property_up(v,i); // we don't have to recopy v, or init index_in_heap
-      } else {
-        size_type index = data.size();
-        data.push_back(v);
-        using boost::put;
-        put(index_in_heap, v, index);
-        preserve_heap_property_up(index);
-      }
-      verify_heap();
-    }
-
-    Value& top() {
-      return data[0];
-    }
-
-    const Value& top() const {
-      return data[0];
-    }
-
-    void pop() {
-      using boost::put;
-      if(D_ARY_TRACK_OUT_OF_HEAP)
-        put(index_in_heap, data[0], (size_type)D_ARY_HEAP_NULL_INDEX);
-      if (data.size() != 1) {
-        if (D_ARY_POP_GRAEHL) {
-          preserve_heap_property_down(data.back(),0,data.size()-1);
-          data.pop_back();
-        } else {
-          data[0] = data.back();
-          put(index_in_heap, data[0], 0);
-          data.pop_back();
-          preserve_heap_property_down();
-        }
-        verify_heap();
-      } else {
-        data.pop_back();
-      }
-    }
-
-    // This function assumes the key has been improved
-    // (distance has become smaller, so it may need to rise toward top().
-    // i.e. decrease-key in a min-heap
-    void update(const Value& v) {
-      using boost::get;
-      size_type index = get(index_in_heap, v);
-      preserve_heap_property_up(v,index);
-      verify_heap();
-    }
-
-    // return true if improved.
-    bool maybe_improve(const Value& v,distance_type dbetter) {
-      using boost::get;
-      if (better(dbetter,get(distance,v))) {
-        preserve_heap_property_up_dist(v,dbetter);
-        return true;
-      }
-      return false;
-    }
-
-    distance_type best(distance_type null=0) const {
-      return empty() ? null : get(distance,data[0]);
-    }
-    distance_type second_best(distance_type null=0) const {
-      if (data.size()<2) return null;
-      int m=std::min(data.size(),Arity+1);
-//      if (m>=Arity) m=Arity+1;
-      distance_type b=get(distance,data[1]);
-      for (int i=2;i<m;++i) {
-        distance_type d=get(distance,data[i]);
-        if (better(d,b))
-          b=d;
-      }
-      return b;
-    }
-
-
-#include "warning_push.h"
-#pragma GCC diagnostic ignored "-Wtype-limits"
-      // because maybe size_type is signed or unsigned
-    inline bool contains(const Value &v,size_type i) const {
-      if (D_ARY_TRACK_OUT_OF_HEAP)
-        return i != (size_type)D_ARY_HEAP_NULL_INDEX;
-      size_type sz=data.size();
-      SHOWM2(DDARY,"d_ary_heap contains",i,data.size());
-      return i>=0 && i<sz && equal(v,data[i]); // note: size_type may be signed (don't recommend it, though) - thus i>=0 check to catch uninit. data
-    }
-#include "warning_pop.h"
-
-    inline bool contains(const Value& v) const {
-      using boost::get;
-      return contains(v,get(index_in_heap, v));
-    }
-
-    void push_or_update(const Value& v) { /* insert if not present, else update */
-      using boost::get;
-      size_type index = get(index_in_heap, v);
-      if (D_ARY_PUSH_GRAEHL) {
-        if (contains(v,index))
-          preserve_heap_property_up(v,index);
-        else
-          push(v);
-      } else {
-        if (!contains(v,index)) {
-          index = data.size();
-          data.push_back(v);
-          using boost::put;
-          put(index_in_heap, v, index);
-        }
-        preserve_heap_property_up(index);
-      }
-      verify_heap();
-    }
-
-    private:
-    Better better;
-    Container data;
-    DistanceMap distance;
-    IndexInHeapPropertyMap index_in_heap;
-    Equal equal;
-
-    // Get the parent of a given node in the heap
-    static inline size_type parent(size_type index) {
-      return (index - 1) / Arity;
-    }
-
-    // Get the child_idx'th child of a given node; 0 <= child_idx < Arity
-    static inline size_type child(size_type index, std::size_t child_idx) {
-      return index * Arity + child_idx + 1;
-    }
-
-    // Swap two elements in the heap by index, updating index_in_heap
-    inline void swap_heap_elements(size_type index_a, size_type index_b) {
-      using std::swap;
-      Value value_a = data[index_a];
-      Value value_b = data[index_b];
-      data[index_a] = value_b;
-      data[index_b] = value_a;
-      using boost::put;
-      put(index_in_heap, value_a, index_b);
-      put(index_in_heap, value_b, index_a);
-    }
-
-    inline void move_heap_element(Value const& v,size_type ito) {
-      using boost::put;
-      put(index_in_heap,v,ito);
-      data[ito]=v; //todo: move assign?
-    }
-
-    // Verify that the array forms a heap; commented out by default
-    void verify_heap() const {
-      // This is a very expensive test so it should be disabled even when
-      // NDEBUG is not defined
-#if D_ARY_VERIFY_HEAP
-      using boost::get;
-      for (size_t i = 1; i < data.size(); ++i) {
-        if (better(get(distance,data[i]), get(distance,data[parent(i)]))) {
-          assert (!"Element is smaller than its parent");
-        }
-      }
-#endif
-    }
-
-    // we have a copy of the key, so we don't need to do that stupid find # of levels to move then move.  we act as though data[index]=currently_being_moved, but in fact it's an uninitialized "hole", which we fill at the very end
-    inline void preserve_heap_property_up(Value const& currently_being_moved,size_type index) {
-      using boost::get;
-      preserve_heap_property_up(currently_being_moved,index,get(distance,currently_being_moved));
-    }
-
-    inline void preserve_heap_property_up_set_dist(Value const& currently_being_moved,distance_type dbetter) {
-      using boost::get;
-      using boost::put;
-      put(distance,currently_being_moved,dbetter);
-      preserve_heap_property_up(currently_being_moved,get(index_in_heap,currently_being_moved),dbetter);
-      verify_heap();
-    }
-
-    void preserve_heap_property_up(Value const& currently_being_moved,size_type index,distance_type currently_being_moved_dist) {
-      using boost::put;
-      using boost::get;
-      if (D_ARY_UP_GRAEHL) {
-        for (;;) {
-          if (index == 0) break; // Stop at root
-          size_type parent_index = parent(index);
-          Value const& parent_value = data[parent_index];
-          if (better(currently_being_moved_dist, get(distance, parent_value))) {
-            move_heap_element(parent_value,index);
-            index = parent_index;
-          } else {
-            break; // Heap property satisfied
-          }
-        }
-        //finish "swap chain" by filling hole w/ currently_being_moved
-        move_heap_element(currently_being_moved,index); // note: it's ok not to return early on index==0 at start, even if self-assignment isn't supported by Value - because currently_being_moved is a copy.
-      } else {
-        put(index_in_heap,currently_being_moved,index);
-        put(distance,currently_being_moved,currently_being_moved_dist);
-        preserve_heap_property_up(index);
-      }
-    }
-
-    // Starting at a node, move up the tree swapping elements to preserve the
-    // heap property.  doesn't actually use swap; uses hole
-    void preserve_heap_property_up(size_type index) {
-      using boost::get;
-      if (index == 0) return; // Do nothing on root
-      if (D_ARY_UP_GRAEHL) {
-        Value copyi=data[index];
-        preserve_heap_property_up(copyi,index);
-        return;
-      }
-      size_type orig_index = index;
-      size_type num_levels_moved = 0;
-      // The first loop just saves swaps that need to be done in order to avoid
-      // aliasing issues in its search; there is a second loop that does the
-      // necessary swap operations
-      Value currently_being_moved = data[index];
-      distance_type currently_being_moved_dist =
-        get(distance, currently_being_moved);
-      for (;;) {
-        if (index == 0) break; // Stop at root
-        size_type parent_index = parent(index);
-        Value parent_value = data[parent_index];
-        if (better(currently_being_moved_dist, get(distance, parent_value))) {
-          ++num_levels_moved;
-          index = parent_index;
-          continue;
-        } else {
-          break; // Heap property satisfied
-        }
-      }
-      // Actually do the moves -- move num_levels_moved elements down in the
-      // tree, then put currently_being_moved at the top
-      index = orig_index;
-      using boost::put;
-      for (size_type i = 0; i < num_levels_moved; ++i) {
-        size_type parent_index = parent(index);
-        Value parent_value = data[parent_index];
-        put(index_in_heap, parent_value, index);
-        data[index] = parent_value;
-        index = parent_index;
-      }
-      data[index] = currently_being_moved;
-      put(index_in_heap, currently_being_moved, index);
-      verify_heap();
-    }
-
-
-    // From the root, swap elements (each one with its smallest child) if there
-    // are any parent-child pairs that violate the heap property.  v is placed at data[i], but then pushed down (note: data[i] won't be read explicitly; it will instead be overwritten by percolation).  this also means that v must be a copy of data[i] if it was already at i.
-    // e.g. v=data.back(), i=0, sz=data.size()-1 for pop(), implicitly swapping data[i], data.back(), and doing data.pop_back(), then adjusting from 0 down w/ swaps.  updates index_in_heap for v.
-    inline void preserve_heap_property_down(Value const& currently_being_moved,size_type i,size_type heap_size) {
-      using boost::get;
-      distance_type currently_being_moved_dist=get(distance,currently_being_moved);
-      Value* data_ptr = &data[0];
-      size_type index = 0; // hole at index - currently_being_moved to be put here when we find the final hole spot
-      for (;;) {
-        size_type first_child_index = child(index, 0);
-        if (first_child_index >= heap_size) break; /* No children */
-        Value* child_base_ptr = data_ptr + first_child_index; // using index of first_child_index+smallest_child_index because we hope optimizer will be smart enough to const-unroll a loop below if we do this.  i think the optimizer would have gotten it even without our help (i.e. store root-relative index)
-
-        // begin find best child index/distance
-        size_type smallest_child_index = 0; // don't add to base first_child_index every time we update which is smallest.
-        distance_type smallest_child_dist = get(distance, child_base_ptr[smallest_child_index]);
-#undef D_ARY_MAYBE_IMPROVE_CHILD_I
-#define D_ARY_MAYBE_IMPROVE_CHILD_I \
-            distance_type i_dist = get(distance, child_base_ptr[i]); \
-            if (better(i_dist, smallest_child_dist)) { \
-              smallest_child_index = i; \
-              smallest_child_dist = i_dist; \
-            }
-        if (first_child_index + Arity <= heap_size) {
-          // avoid repeated heap_size boundcheck (should test if this is really a speedup - instruction cache tradeoff - could use upperbound = min(Arity,heap_size-first_child_index) instead.  but this optimizes to a fixed number of iterations (compile time known) so probably worth it
-          for (size_t i = 1; i < Arity; ++i) {
-            D_ARY_MAYBE_IMPROVE_CHILD_I
-          }
-        } else {
-          for (size_t i = 1,e=heap_size - first_child_index; i < e; ++i) {
-            D_ARY_MAYBE_IMPROVE_CHILD_I
-          }
-        }
-        //end: know best child
-
-        if (better(smallest_child_dist, currently_being_moved_dist)) {
-          // instead of swapping, move.
-          move_heap_element(child_base_ptr[smallest_child_index],index); // move up
-          index=first_child_index+smallest_child_index; // descend - hole is now here
-        } else {
-          move_heap_element(currently_being_moved,index); // finish "swap chain" by filling hole
-          break;
-        }
-      }
-      verify_heap();
-    }
-
-    inline void preserve_heap_property_down(size_type i) {
-      preserve_heap_property_down(data[i],i,data.size());
-    }
-
-    void preserve_heap_property_down() {
-      using boost::get;
-      if (data.empty()) return;
-      if (D_ARY_DOWN_GRAEHL) { // this *should* be more efficient because i avoid swaps.
-        Value copy0=data[0];
-        preserve_heap_property_down(copy0,0,data.size());
-        return;
-      }
-      size_type index = 0;
-      Value currently_being_moved = data[0];
-      distance_type currently_being_moved_dist =
-        get(distance, currently_being_moved);
-      size_type heap_size = data.size();
-      Value* data_ptr = &data[0];
-      for (;;) {
-        size_type first_child_index = child(index, 0);
-        if (first_child_index >= heap_size) break; /* No children */
-        Value* child_base_ptr = data_ptr + first_child_index;
-        size_type smallest_child_index = 0;
-        distance_type smallest_child_dist = get(distance, child_base_ptr[smallest_child_index]);
-        if (first_child_index + Arity <= heap_size) {
-          for (size_t i = 1; i < Arity; ++i) { // can be unrolled completely.
-
-            D_ARY_MAYBE_IMPROVE_CHILD_I
-          }
-        } else {
-          for (size_t i = 1,e=heap_size - first_child_index; i < e; ++i) {
-            D_ARY_MAYBE_IMPROVE_CHILD_I
-          }
-        }
-        if (better(smallest_child_dist, currently_being_moved_dist)) {
-          swap_heap_elements(smallest_child_index + first_child_index, index);
-          index = smallest_child_index + first_child_index;
-          continue;
-        } else {
-          break; // Heap property satisfied
-        }
-      }
-      verify_heap();
-    }
-
-  };
-
-#endif
diff --git a/utils/ftoa.h b/utils/ftoa.h
deleted file mode 100644
index 3dba528d..00000000
--- a/utils/ftoa.h
+++ /dev/null
@@ -1,403 +0,0 @@
-#ifndef FTOA_H
-#define FTOA_H
-
-
-//TODO: for fractional digits/non-sci, determine the right amount of left padding (more if the whole number is indeed <1, to keep the significant digits), less if sci notation and/or mantissa has sig. digits (don't want N before . and N after!)
-
-#ifndef FTOA_ROUNDTRIP
-# define FTOA_ROUNDTRIP 1
-#endif
-
-#ifndef FTOA_DEBUG
-# define FTOA_DEBUG 0
-#endif
-
-#ifndef FTOA_USE_SPRINTF
-#define FTOA_USE_SPRINTF 0
-#endif
-
-#if FTOA_DEBUG
-# define FTOAassert(x) assert(x)
-# define DBFTOA(x) std::cerr<<"\nFTOA " <<__func__<<"("<<__LINE__<<"): " #x "="<<x<<"\n"
-# define DBFTOA2(x0,x1) std::cerr<<"\nFTOA " <<__func__<<"("<<__LINE__<<"): " #x0 "="<<x0<<" " #x1 "="<<x1 <<"\n"
-#else
-# define FTOAassert(x)
-# define DBFTOA(x)
-# define DBFTOA2(x0,x1)
-#endif
-
-/* DECIMAL_FOR_WHOLE ; ftos(123)
-   0 ; 123
-   1 ; 123
-   2 ; 123.
-     ; ftos(0) is always just "0" (not "0.0")
-     ; ftos(.01)
-   0 ; .01
-   1 ; 0.01
-   2 ; 0.01
-
-*/
-
-#ifndef DECIMAL_FOR_WHOLE
-# define DECIMAL_FOR_WHOLE 1
-#endif
-
-#include <limits>
-#include <stdint.h>
-#include <iostream>
-#include <cmath>
-#include <assert.h>
-#include <cstdio>
-#include "utoa.h"
-#include "nan.h"
-
-template <class Float>
-struct ftoa_traits {
-};
-
-//eP10,
-// sigd decimal places normally printed, roundtripd needed so that round-trip float->string->float is identity
-
-#define DEFINE_FTOA_TRAITS(FLOATT,INTT,sigd,roundtripd,small,large,used,P10)   \
-template <> \
-struct ftoa_traits<FLOATT> { \
-  typedef INTT int_t; \
-  typedef u ## INTT uint_t; \
-  typedef FLOATT float_t; \
-  enum { digits10=std::numeric_limits<INTT>::digits10, chars_block=P10, usedig=used, sigdig=sigd, roundtripdig=roundtripd, bufsize=roundtripdig+7 }; \
-  static const double pow10_block = 1e ## P10; \
-  static const float_t small_f = small; \
-  static const float_t large_f = large; \
-  static inline int sprintf(char *buf,double f) { return std::sprintf(buf,"%." #used "g",f); } \
-  static inline int sprintf_sci(char *buf,double f) { return std::sprintf(buf,"%." #used "e",f); } \
-  static inline int sprintf_nonsci(char *buf,double f) { return std::sprintf(buf,"%." #used "f",f); } \
-  static inline uint_t fracblock(double frac) { FTOAassert(frac>=0 && frac<1); double f=frac*pow10_block;uint_t i=(uint_t)f;FTOAassert(i<pow10_block);return i; } \
-  static inline uint_t rounded_fracblock(double frac) { FTOAassert(frac>=0 && frac<1); double f=frac*pow10_block;uint_t i=(uint_t)(f+.5);FTOAassert(i<pow10_block);return i; }  \
-  static inline float_t mantexp10(float_t f,int &exp) { float_t e=std::log10(f); float_t ef=std::floor(e); exp=ef;  return f/std::pow((float_t)10,ef); } \
-  static inline bool use_sci_abs(float_t fa) { return fa<small || fa>large; } \
-  static inline bool use_sci(float_t f) { return use_sci_abs(std::fabs(f)); }   \
-};
-//TODO: decide on computations in double (would hurt long double) or in native float type - any advantage?  more precision is usually better.
-
-//10^22 = 0x1.0f0cf064dd592p73 is the largest exactly representable power of 10 in the binary64 format.  but round down to 18 so int64_t can hold it.
-
-#if FTOA_ROUNDTRIP
-#define DEFINE_FTOA_TRAITS_ROUNDTRIP(FLOATT,INTT,sigd,roundtripd,small,large) DEFINE_FTOA_TRAITS(FLOATT,INTT,sigd,roundtripd,small,large,roundtripd,roundtripd)
-#else
-#define DEFINE_FTOA_TRAITS_ROUNDTRIP(FLOATT,INTT,sigd,roundtripd,small,large) DEFINE_FTOA_TRAITS(FLOATT,INTT,sigd,roundtripd,small,large,sigd,sigd)
-#endif
-
-DEFINE_FTOA_TRAITS_ROUNDTRIP(double,int64_t,15,17,1e-5,1e8)
-//i've heard that 1e10 is fine for float.  but we only have 1e9 (9 decimal places) in int32.
-DEFINE_FTOA_TRAITS_ROUNDTRIP(float,int32_t,6,9,1e-3,1e8)
-
-
-template <class F>
-inline void ftoa_error(F f,char const* msg="") {
-  using namespace std;
-  cerr<<"ftoa error: "<<msg<<" f="<<f<<endl;
-  assert(!"ftoa error");
-}
-
-// all of the below prepend and return new cursor.  null terminate yourself (like itoa/utoa)
-
-//possibly empty string for ~0 (no sci notation fallback).  left padded with the right number of 0s (tricky).  [ret,p) are the digits.
-template <class F>
-char *prepend_pos_frac_digits(char *p,F f) {
-  FTOAassert(f<1 && f >0);
-  typedef ftoa_traits<F> FT;
-  //repeat if very small???  nah, require sci notation to take care of it.
-  typename FT::uint_t i=FT::rounded_fracblock(f);
-  DBFTOA2(f,i);
-  if (i>0) {
-    unsigned n_skipped;
-    char *d=utoa_drop_trailing_0(p,i,n_skipped);
-    char *b=p-FT::chars_block+n_skipped;
-    FTOAassert(b<=d);
-    left_pad(b,d,'0');
-    return b;
-  } else {
-    return p;
-  }
-}
-
-template <class F>
-char *append_pos_frac_digits(char *p,F f) { // '0' right-padded, nul terminated, return position of nul.  [p,ret) are the digits
-  if (f==0) {
-    *p++='0';
-    return p;
-  }
-  FTOAassert(f<1 && f >0);
-  typedef ftoa_traits<F> FT;
-  //repeat if very small???  nah, require sci notation to take care of it.
-  typename FT::uint_t i=FT::rounded_fracblock(f);
-  DBFTOA2(f,i);
-  if (i>0) {
-    char *e=p+FT::chars_block;
-    utoa_left_pad(p,e,i,'0');
-    *e=0;
-    return e;
-  } else {
-    *p=0;
-    return p;
-  }
-}
-
-template <class F>
-inline char *prepend_pos_frac(char *p,F f) {
-  FTOAassert(f<1 && f>=0);
-  if (f==0) {
-    *--p='0';
-    return p;
-  }
-  p=prepend_pos_frac_digits(p,f);
-  *--p='.';
-  if (DECIMAL_FOR_WHOLE>0)
-    *--p='0';
-  return p;
-}
-
-template <class F>
-inline char *append_pos_frac(char *p,F f) {
-  DBFTOA(f);
-  if (DECIMAL_FOR_WHOLE>0)
-    *p++='0';
-  *p++='.';
-  return append_pos_frac_digits(p,f);
-}
-
-template <class F>
-inline char *prepend_frac(char *p,F f,bool positive_sign=false) {
-  FTOAassert(f<1 && f>-1);
-  if (f==0)
-    *--p='0';
-  else if (f<0) {
-    p=prepend_pos_frac(p,-f);
-    *--p='-';
-  } else {
-    p=prepend_pos_frac(p,f);
-    if (positive_sign)
-      *--p='+';
-  }
-  return p;
-}
-
-
-template <class F>
-inline char *append_sign(char *p,F f,bool positive_sign=false) {
-  if (f<0) {
-    *p++='-';
-  } else if (positive_sign)
-    *p++='+';
-  return p;
-}
-
-template <class F>
-inline char *append_frac(char *p,F f,bool positive_sign=false) {
-  FTOAassert(f<1 && f>-1);
-  if (f==0) {
-    *p++='0';
-    return p;
-  } else if (f<0) {
-    *p++='-';
-    return append_pos_frac(p,-f);
-  }
-  if (positive_sign) {
-    *p++='+';
-    return append_pos_frac(p,f);
-  }
-
-}
-
-
-//append_frac, append_pos_sci, append_sci.  notice these are all composed according to a pattern (but reversing order of composition in pre vs app).  or can implement with copy through buffer
-
-/* will switch to sci notation if integer part is too big for the int type. but for very small values, will simply display 0 (i.e. //TODO: find out log10 and leftpad 0s then convert rest) */
-template <class F>
-char *prepend_pos_nonsci(char *p,F f) {
-  typedef ftoa_traits<F> FT;
-  typedef typename FT::uint_t uint_t;
-  DBFTOA(f);
-  FTOAassert(f>0);
-  if (f>std::numeric_limits<uint_t>::max())
-    return prepend_pos_sci(p,f);
-  //which is faster - modf is weird and returns negative frac part if f is negative.  while we could deal with this using fabs, we instead only handle positive here (put - sign in front and negate, then call us) - ?
-#if 0
-  F intpart;
-  F frac=std::modf(f,&intpart);
-  uint_t u=intpart;
-#else
-  uint_t u=f;
-  F frac=f-u;
-#endif
-  DBFTOA2(u,frac);
-  if (frac == 0) {
-    if (DECIMAL_FOR_WHOLE>1)
-      *--p='.';
-  } else {
-    p=prepend_pos_frac_digits(p,frac);
-    *--p='.';
-  }
-  if (u==0) {
-    if (DECIMAL_FOR_WHOLE>0)
-      *--p='0';
-  } else
-    p=utoa(p,u);
-  return p;
-}
-
-// modify p; return true if handled
-template <class F>
-inline bool prepend_0_etc(char *&p,F f,bool positive_sign=false) {
-  if (f==0) {
-    *--p='0';
-    return true;
-  }
-  if (is_nan(f)) {
-    p-=3;
-    p[0]='N';p[1]='A';p[2]='N';
-    return true;
-  }
-  if (is_pos_inf(f)) {
-    p-=3;
-    p[0]='I';p[1]='N';p[2]='F';
-    if (positive_sign)
-      *--p='+';
-    return true;
-  }
-  if (is_neg_inf(f)) {
-    p-=4;
-    p[0]='-';p[1]='I';p[2]='N';p[3]='F';
-    return true;
-  }
-  return false;
-}
-
-template <class F>
-inline char *prepend_nonsci(char *p,F f,bool positive_sign=false) {
-  if (prepend_0_etc(p,f,positive_sign)) return p;
-  if (f<0) {
-    p=prepend_pos_nonsci(p,-f);
-    *--p='-';
-  } else {
-    p=prepend_pos_nonsci(p,f);
-    if (positive_sign)
-      *--p='+';
-  }
-  return p;
-}
-
-template <class F>
-inline char *prepend_pos_sci(char *p,F f,bool positive_sign_exp=false) {
-  FTOAassert(f>0);
-  typedef ftoa_traits<F> FT;
-  int e10;
-  F mant=FT::mantexp10(f,e10);
-  DBFTOA(f);
-  DBFTOA2(mant,e10);
-  FTOAassert(mant<10.00001);
-  if (mant>=10.) {
-    ++e10;
-    mant*=.1;
-  } else if (mant < 1.) {
-    --e10;
-    mant*=10;
-  }
-  p=itoa(p,e10,positive_sign_exp);
-  *--p='e';
-  return prepend_pos_nonsci(p,mant);
-}
-
-template <class F>
-inline char *prepend_sci(char *p,F f,bool positive_sign_mant=false,bool positive_sign_exp=false) {
-  if (prepend_0_etc(p,f,positive_sign_mant)) return p;
-  if (f==0)
-    *--p='0';
-  else if (f<0) {
-    p=prepend_pos_sci(p,-f,positive_sign_exp);
-    *--p='-';
-  } else {
-    p=prepend_pos_sci(p,f,positive_sign_exp);
-    if (positive_sign_mant)
-      *--p='+';
-  }
-  return p;
-}
-
-template <class F>
-inline char *append_nonsci(char *p,F f,bool positive_sign=false) {
-  if (positive_sign&&f>=0) *p++='+';
-  return p+ftoa_traits<F>::sprintf_nonsci(p,f);
-}
-
-template <class F>
-inline char *append_sci(char *p,F f,bool positive_sign=false) {
-  if (positive_sign&&f>=0) *p++='+';
-  return p+ftoa_traits<F>::sprintf_sci(p,f);
-}
-
-template <class F>
-inline char *append_ftoa(char *p,F f,bool positive_sign=false) {
-  if (positive_sign&&f>=0) *p++='+';
-  return p+ftoa_traits<F>::sprintf(p,f);
-}
-
-template <class F>
-inline char *prepend_ftoa(char *p,F f)
-{
-  typedef ftoa_traits<F> FT;
-  return FT::use_sci(f) ? prepend_sci(p,f) : prepend_nonsci(p,f);
-}
-
-template <class F>
-inline std::string ftos_append(F f) {
-  typedef ftoa_traits<F> FT;
-  char buf[FT::bufsize];
-  return std::string(buf,append_ftoa(buf,f));
-}
-
-template <class F>
-inline std::string ftos_prepend(F f) {
-  typedef ftoa_traits<F> FT;
-  char buf[FT::bufsize];
-  char *end=buf+FT::bufsize;
-  return std::string(prepend_ftoa(end,f),end);
-}
-
-
-template <class F>
-inline std::string ftos(F f) {
-#if 0
-  // trust RVO?  no extra copies?
-  return FTOA_USE_SPRINTF ? ftos_append(f) : ftos_prepend(f);
-#else
-  typedef ftoa_traits<F> FT;
-  char buf[FT::bufsize];
-  if (FTOA_USE_SPRINTF) {
-    return std::string(buf,append_ftoa(buf,f));
-  } else {
-    char *end=buf+FT::bufsize;
-    return std::string(prepend_ftoa(end,f),end);
-  }
-#endif
-}
-
-namespace {
-  const int ftoa_bufsize=30;
-  char ftoa_outbuf[ftoa_bufsize];
-}
-
-// not even THREADLOCAL - don't use.
-inline char *static_ftoa(float f)
-{
-  if (FTOA_USE_SPRINTF) {
-    append_ftoa(ftoa_outbuf,f);
-    return ftoa_outbuf;
-  } else {
-    char *end=ftoa_outbuf+ftoa_bufsize;
-    return prepend_ftoa(end,f);
-  }
-}
-
-
-#endif
diff --git a/utils/int_or_pointer.h b/utils/int_or_pointer.h
deleted file mode 100644
index 4b6a9e4a..00000000
--- a/utils/int_or_pointer.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#ifndef INT_OR_POINTER_H
-#define INT_OR_POINTER_H
-
-// if you ever wanted to store a discriminated union of pointer/integer without an extra boolean flag, this will do it, assuming your pointers are never odd.
-
-// check lsb for expected tag?
-#ifndef IOP_CHECK_LSB
-# define IOP_CHECK_LSB 1
-#endif
-#if IOP_CHECK_LSB
-# define iop_assert(x) assert(x)
-#else
-# define iop_assert(x)
-#endif
-
-#include <assert.h>
-#include <iostream>
-
-template <class Pointed=void,class Int=size_t>
-struct IntOrPointer {
-  typedef Pointed pointed_type;
-  typedef Int integer_type;
-  typedef Pointed *value_type;
-  typedef IntOrPointer<Pointed,Int> self_type;
-  IntOrPointer(int j) { *this=j; }
-  IntOrPointer(size_t j) { *this=j; }
-  IntOrPointer(value_type v) { *this=v; }
-  bool is_integer() const { return i&1; }
-  bool is_pointer() const { return !(i&1); }
-  value_type & pointer() { return p; }
-  const value_type & pointer() const { iop_assert(is_pointer()); return p; }
-  integer_type integer() const { iop_assert(is_integer()); return i >> 1; }
-  void set_integer(Int j) { i=2*j+1; }
-  void set_pointer(value_type p_) { p=p_;iop_assert(is_pointer()); }
-  void operator=(unsigned j) { i = 2*(integer_type)j+1; }
-  void operator=(int j) { i = 2*(integer_type)j+1; }
-  template <class C>
-  void operator=(C j) { i = 2*(integer_type)j+1; }
-  void operator=(value_type v) { p=v; }
-  IntOrPointer() {}
-  IntOrPointer(const self_type &s) : p(s.p) {}
-  void operator=(const self_type &s) { p=s.p; }
-  template <class C>
-  bool operator ==(C* v) const { return p==v; }
-  template <class C>
-  bool operator ==(const C* v) const { return p==v; }
-  template <class C>
-  bool operator ==(C j) const { return integer() == j; }
-  bool operator ==(self_type s) const { return p==s.p; }
-  bool operator !=(self_type s) const { return p!=s.p; }
-  template <class O> void print(O&o) const
-  {
-    if (is_integer())
-      o << integer();
-    else {
-      o << "0x" << std::hex << (size_t)pointer() << std::dec;
-    }
-  }
-  friend inline std::ostream& operator<<(std::ostream &o,self_type const& s) {
-    s.print(o); return o;
-  }
-protected:
-  union {
-    value_type p; // must be even (guaranteed unless you're pointing at packed chars)
-    integer_type i; // stored as 2*data+1, so only has half the range (one less bit) of a normal integer_type
-  };
-};
-
-
-#endif
diff --git a/utils/intern_pool.h b/utils/intern_pool.h
deleted file mode 100644
index 7c739add..00000000
--- a/utils/intern_pool.h
+++ /dev/null
@@ -1,158 +0,0 @@
-#ifndef INTERN_POOL_H
-#define INTERN_POOL_H
-
-#define DEBUG_INTERN_POOL(x) x
-
-/* to "intern" a string in lisp is to make a symbol from it (a pointer to a canonical copy whose pointer can be equality-compared/hashed directly with other interned things).  we take an Item that has a key part and some mutable parts (that aren't in its identity), and we hash-by-value the key part to map to a canonical on-heap Item - and we use a boost object pool to allocate them */
-
-//FIXME: actually store function object state (assumed stateless so far)
-
-#include <boost/pool/object_pool.hpp>
-#include "hash.h"
-//#include "null_traits.h"
-#include <functional>
-
-template <class I>
-struct get_key { // default accessor for I = like pair<key,val>
-  typedef typename I::first_type const& result_type;
-  typedef I const& argument_type;
-  result_type operator()(I const& i) const {
-    return i.first;
-  }
-};
-
-// Arg type should be the non-pointer version.  this saves me from using boost type traits to remove_pointer.  f may be binary or unary
-template <class KeyF,class F,class Arg=typename KeyF::argument_type>
-struct compose_indirect {
-  typedef Arg *argument_type; // we also accept Arg &
-  KeyF kf;
-  F f;
-  typedef typename F::result_type result_type;
-  result_type operator()(Arg const& p) const {
-    return f(kf(p));
-  }
-  result_type operator()(Arg & p) const {
-    return f(kf(p));
-  }
-  result_type operator()(Arg * p) const {
-    return f(kf(*p));
-  }
-  template <class V>
-  result_type operator()(V const& v) const {
-    return f(kf(*v));
-  }
-
-  result_type operator()(Arg const& a1,Arg const& a2) const {
-    return f(kf(a1),kf(a2));
-  }
-  result_type operator()(Arg & a1,Arg & a2) const {
-    return f(kf(a1),kf(a2));
-  }
-  result_type operator()(Arg * a1,Arg * a2) const {
-    return f(kf(*a1),kf(*a2));
-  }
-  template <class V,class W>
-  result_type operator()(V const& v,W const&w) const {
-    return f(kf(*v),kf(*w));
-  }
-
-
-};
-
-template <class KeyF,class F,class Arg=typename KeyF::argument_type>
-struct equal_indirect {
-  typedef Arg *argument_type; // we also accept Arg &
-  KeyF kf;
-  F f;
-  typedef bool result_type;
-
-  result_type operator()(Arg const& a1,Arg const& a2) const {
-    return f(kf(a1),kf(a2));
-  }
-  result_type operator()(Arg & a1,Arg & a2) const {
-    return f(kf(a1),kf(a2));
-  }
-  result_type operator()(Arg * a1,Arg * a2) const {
-    return a1==a2||(a1&&a2&&f(kf(*a1),kf(*a2)));
-  }
-  template <class V,class W>
-  result_type operator()(V const& v,W const&w) const {
-    return v==w||(v&&w&&f(kf(*v),kf(*w)));
-  }
-
-
-};
-
-/*
-
-template <class F>
-struct indirect_function {
-  F f;
-  explicit indirect_function(F const& f=F()) : f(f) {}
-  typedef typename F::result_type result_type;
-  template <class V>
-  result_type operator()(V *p) const {
-    return f(*p);
-  }
-};
-*/
-
-template <class Item,class KeyF=get_key<Item>,class HashKey=boost::hash<typename KeyF::result_type>,class EqKey=std::equal_to<typename KeyF::result_type>, class Pool=boost::object_pool<Item> >
-struct intern_pool : Pool {
-  KeyF key;
-  typedef typename KeyF::result_type Key;
-  typedef Item *Handle;
-  typedef compose_indirect<KeyF,HashKey,Item> HashDeep;
-  typedef equal_indirect<KeyF,EqKey,Item> EqDeep;
-  typedef HASH_SET<Handle,HashDeep,EqDeep> Canonical;
-  typedef typename Canonical::iterator CFind;
-  typedef std::pair<CFind,bool> CInsert;
-  Canonical canonical;
-  bool interneq(Handle &i) { // returns true if i is newly interned, false if it already existed
-    CInsert i_new=canonical.insert(i);
-    i=*i_new.first;
-    return i_new.second;
-  }
-// inherited: Handle construct(...)
-  Handle construct_fresh() { return Pool::construct(); }
-  Handle intern(Handle i) { // (maybe invalidating i, returning a valid canonical handle (pointer)
-    CInsert i_new=canonical.insert(i);
-    if (i_new.second)
-      return i;
-    else {
-      free(i);
-      return *i_new->first;
-    }
-  }
-  void destroy_interned(Handle i) {
-    DEBUG_INTERN_POOL(assert(canonical.find(i)!=canonical.end()));
-    canonical.erase(i);
-    destroy(i);
-  }
-  bool destroy_fresh(Handle i) {
-    DEBUG_INTERN_POOL(assert(canonical.find(i)!=canonical.end()||*canonical.find(i)!=i)); // i is a constructed item not yet interned.
-    destroy(i);
-  }
-  void destroy_both(Handle i) { // i must have come from this pool.  may be interned, or not.  destroy both the noninterned and interned.
-    if (!destroy_if_interned(i)) destroy(i);
-  }
-  // destroy intern(i) if it exists.  return true if it existed AND its address was i.  otherwise return false (whether or not a value-equal item existed and was destroyed)
-  bool destroy_if_interned(Handle i) {
-    CFind f=canonical.find(i);
-    if (f!=canonical.end()) {
-      Handle interned=*f;
-      canonical.erase(f);
-      destroy(f);
-      if (f==i) return true;
-    }
-    return false;
-  }
-
-  intern_pool() {
-    HASH_MAP_EMPTY(canonical,(Handle)0);
-  }
-};
-
-
-
-#endif
diff --git a/utils/lvalue_pmap.h b/utils/lvalue_pmap.h
deleted file mode 100644
index 5b9403c0..00000000
--- a/utils/lvalue_pmap.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef LVALUE_PMAP_H
-#define LVALUE_PMAP_H
-
-#include <boost/property_map/property_map.hpp>
-
-// i checked: boost provides get and put given [] - but it's not being found by ADL so instead i define them myself
-
-// lvalue property map pmapname<P> that is: P p; valtype &v=p->name;
-#define PMAP_MEMBER_INDIRECT(pmapname,valtype,name) template <class P> struct pmapname {  \
-  typedef P key_type; \
-  typedef valtype value_type; \
-  typedef value_type & reference; \
-  typedef boost::lvalue_property_map_tag category;          \
-  reference operator[](key_type p) const { return p->name; } \
-  typedef pmapname<P> self_type; \
-  friend inline value_type const& get(self_type const&,key_type p) { return p->name; } \
-  friend inline void put(self_type &,key_type p,value_type const& v) { p->name = v; }             \
-};
-
-#define PMAP_MEMBER_INDIRECT_2(pmapname,name) template <class P,class R> struct pmapname {    \
-  typedef P key_type; \
-  typedef R value_type; \
-  typedef value_type & reference; \
-  typedef boost::lvalue_property_map_tag category; \
-  reference operator[](key_type p) const { return p->name; } \
-  typedef pmapname<P,R> self_type;                                                      \
-  friend inline value_type const& get(self_type const&,key_type p) { return p->name; } \
-  friend inline void put(self_type &,key_type p,value_type const& v) { p->name = v; }             \
-};
-
-#endif
diff --git a/utils/max_plus.h b/utils/max_plus.h
deleted file mode 100644
index 2e56f85e..00000000
--- a/utils/max_plus.h
+++ /dev/null
@@ -1,201 +0,0 @@
-#ifndef MAX_PLUS_H_
-#define MAX_PLUS_H_
-
-#define MAX_PLUS_ORDER 0
-#define MAX_PLUS_DEBUG(x)
-
-// max-plus algebra.  ordering a > b really means that (i.e. default a<b sorting will do worst (closest to 0) first.  so get used to passing predicates like std::greater<MaxPlus<T> > around
-// x+y := max{x,y}
-// x*y := x+y
-// 0 := -inf
-// 1 := 0
-// additive inverse does not, but mult. does. (inverse()) and x/y := x-y = x+y.inverse()
-//WARNING: default order is reversed, on purpose, i.e. a<b means a "better than" b, i.e. log(p_a)>log(p_b).  sorry.  defaults in libs are to order ascending, but we want best first.
-
-#include <boost/functional/hash.hpp>
-#include <iostream>
-#include <cstdlib>
-#include <cmath>
-#include <cassert>
-#include <limits>
-#include "semiring.h"
-#include "show.h"
-//#include "logval.h"
-
-template <class T>
-class MaxPlus {
- public:
-  void print(std::ostream &o) const {
-    o<<v_;
-  }
-  PRINT_SELF(MaxPlus<T>)
-  template <class O>
-  void operator=(O const& o) {
-    v_=o.v_;
-  }
-  template <class O>
-  MaxPlus(O const& o) : v_(o.v_) {  }
-
-  typedef MaxPlus<T> Self;
-  MaxPlus() : v_(LOGVAL_LOG0) {}
-  explicit MaxPlus(double x) : v_(std::log(x)) {}
-  MaxPlus(init_1) : v_(0) {  }
-  MaxPlus(init_0) : v_(LOGVAL_LOG0) {  }
-  MaxPlus(int x) : v_(std::log(x)) {}
-  MaxPlus(unsigned x) : v_(std::log(x)) { }
-  MaxPlus(double lnx,bool sign) : v_(lnx) { MAX_PLUS_DEBUG(assert(!sign)); }
-  MaxPlus(double lnx,init_lnx) : v_(lnx) {}
-  static Self exp(T lnx) { return MaxPlus(lnx,false); }
-
-  // maybe the below are faster than == 1 and == 0.  i don't know.
-  bool is_1() const { return v_==0; }
-  bool is_0() const { return v_==LOGVAL_LOG0; }
-
-  static Self One() { return Self(init_1()); }
-  static Self Zero() { return Self(init_0()); }
-  static Self e() { return Self(1,false); }
-  void logeq(const T& v) { v_ = v; }
-  bool signbit() const { return false; }
-
-  Self& logpluseq(const Self& a) {
-    if (a.is_0()) return *this;
-    if (a.v_ < v_) {
-      v_ = v_ + log1p(std::exp(a.v_ - v_));
-    } else {
-      v_ = a.v_ + log1p(std::exp(v_ - a.v_));
-    }
-    return *this;
-  }
-
-  Self& besteq(const Self& a) {
-    if (a.v_ < v_)
-      v_=a.v_;
-    return *this;
-  }
-
-  Self& operator+=(const Self& a) {
-    if (a.v_ < v_)
-      v_=a.v_;
-    return *this;
-  }
-
-  Self& operator*=(const Self& a) {
-    v_ += a.v_;
-    return *this;
-  }
-
-  Self& operator/=(const Self& a) {
-    v_ -= a.v_;
-    return *this;
-  }
-
-  // Self(fabs(log(x)),x.s_)
-  friend Self abslog(Self x) {
-    if (x.v_<0) x.v_=-x.v_;
-    return x;
-  }
-
-  Self& poweq(const T& power) {
-    v_ *= power;
-    return *this;
-  }
-
-  Self inverse() const {
-    return Self(-v_,false);
-  }
-
-  Self pow(const T& power) const {
-    Self res = *this;
-    res.poweq(power);
-    return res;
-  }
-
-  Self root(const T& root) const {
-    return pow(1/root);
-  }
-
-// copy elision - as opposed to explicit copy of Self const& o1, we should be able to construct Logval r=a+(b+c) as a single result in place in r.  todo: return std::move(o1) - C++0x
-  friend inline Self operator+(Self a,Self const& b) {
-    a+=b;
-    return a;
-  }
-  friend inline Self operator*(Self a,Self const& b) {
-    a*=b;
-    return a;
-  }
-  friend inline Self operator/(Self a,Self const& b) {
-    a/=b;
-    return a;
-  }
-  friend inline T log(Self const& a) {
-    return a.v_;
-  }
-  friend inline T pow(Self const& a,T const& e) {
-    return a.pow(e);
-  }
-
-  // intentionally not defining an operator < or operator > - because you may want to default (for library convenience) a<b means a better than b (i.e. gt)
-  inline bool lt(Self const& o) const {
-    return v_ < o.v_;
-  }
-  inline bool gt(Self const& o) const {
-    return o.v_ > v_;
-  }
-  friend inline bool operator==(Self const& lhs, Self const& rhs) {
-    return lhs.v_ == rhs.v_;
-  }
-  friend inline bool operator!=(Self const& lhs, Self const& rhs) {
-    return lhs.v_ != rhs.v_;
-  }
-  std::size_t hash() const {
-    using namespace boost;
-    return hash_value(v_);
-  }
-  friend inline std::size_t hash_value(Self const& x) {
-    return x.hash();
-  }
-
-/*
-  operator T() const {
-  return std::exp(v_);
-  }
-*/
-  T as_float() const {
-    return std::exp(v_);
-  }
-
-  T v_;
-};
-
-template <class T>
-struct semiring_traits<MaxPlus<T> > : default_semiring_traits<MaxPlus<T> > {
-  static const bool has_logplus=true;
-  static const bool has_besteq=true;
-#if MAX_PLUS_ORDER
-  static const bool have_order=true;
-#endif
-};
-
-#if MAX_PLUS_ORDER
-template <class T>
-bool operator<(const MaxPlus<T>& lhs, const MaxPlus<T>& rhs) {
-  return (lhs.v_ < rhs.v_);
-}
-
-template <class T>
-bool operator<=(const MaxPlus<T>& lhs, const MaxPlus<T>& rhs) {
-  return (lhs.v_ <= rhs.v_);
-}
-
-template <class T>
-bool operator>(const MaxPlus<T>& lhs, const MaxPlus<T>& rhs) {
-  return (lhs.v_ > rhs.v_);
-}
-
-template <class T>
-bool operator>=(const MaxPlus<T>& lhs, const MaxPlus<T>& rhs) {
-  return (lhs.v_ >= rhs.v_);
-}
-#endif
-
-#endif
diff --git a/utils/maybe_update_bound.h b/utils/maybe_update_bound.h
deleted file mode 100644
index d57215d0..00000000
--- a/utils/maybe_update_bound.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef MAYBE_UPDATE_BOUND_H
-#define MAYBE_UPDATE_BOUND_H
-
-template <class To,class From>
-inline void maybe_increase_max(To &to,const From &from) {
-    if (to<from)
-        to=from;
-}
-
-template <class To,class From>
-inline void maybe_decrease_min(To &to,const From &from) {
-    if (from<to)
-        to=from;
-}
-
-
-#endif
diff --git a/utils/nan.h b/utils/nan.h
deleted file mode 100644
index 257364d5..00000000
--- a/utils/nan.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef NAN_H
-#define NAN_H
-//TODO: switch to C99 isnan isfinite isinf etc. (faster)
-
-#include <limits>
-
-template <bool> struct nan_static_assert;
-template <> struct nan_static_assert<true> { };
-
-// is_iec559 i.e. only IEEE 754 float has x != x <=> x is nan
-template<typename T>
-inline bool is_nan(T x) {
-//    static_cast<void>(sizeof(nan_static_assert<std::numeric_limits<T>::has_quiet_NaN>));
-    return std::numeric_limits<T>::has_quiet_NaN && (x != x);
-}
-
-template <typename T>
-inline bool is_inf(T x) {
-//    static_cast<void>(sizeof(nan_static_assert<std::numeric_limits<T>::has_infinity>));
-    return x == std::numeric_limits<T>::infinity() || x == -std::numeric_limits<T>::infinity();
-}
-
-template <typename T>
-inline bool is_pos_inf(T x) {
-//    static_cast<void>(sizeof(nan_static_assert<std::numeric_limits<T>::has_infinity>));
-    return x == std::numeric_limits<T>::infinity();
-}
-
-template <typename T>
-inline bool is_neg_inf(T x) {
-//    static_cast<void>(sizeof(nan_static_assert<std::numeric_limits<T>::has_infinity>));
-    return x == -std::numeric_limits<T>::infinity();
-}
-
-//c99 isfinite macro shoudl be much faster
-template <typename T>
-inline bool is_finite(T x) {
-  return !is_nan(x) && !is_inf(x);
-}
-
-
-#endif
diff --git a/utils/string_to.h b/utils/string_to.h
deleted file mode 100644
index c78a5394..00000000
--- a/utils/string_to.h
+++ /dev/null
@@ -1,314 +0,0 @@
-#ifndef STRING_TO_H
-#define STRING_TO_H
-
-/*
-   may not be any faster than boost::lexical_cast in later incarnations (see http://accu.org/index.php/journals/1375)
-   but is slightly simpler.  no wide char or locale.
-
-   X string_to<X>(string);
-   string to_string(X);
-   X& string_into(string,X &); // note: returns the same ref you passed in, for convenience of use
-
-   default implementation via stringstreams (quite slow, I'm sure)
-
-   fast implementation for string, int<->string, unsigned<->string, float<->string, double<->string
-
-*/
-
-#ifndef USE_FTOA
-#define USE_FTOA 1
-#endif
-#ifndef HAVE_STRTOUL
-# define HAVE_STRTOUL 1
-#endif
-
-#include <string>
-#include <sstream>
-#include <stdexcept>
-#include <cstdlib>
-
-#include "have_64_bits.h"
-#include "utoa.h"
-#if USE_FTOA
-# include "ftoa.h"
-#endif
-
-namespace {
-// for faster numeric to/from string.  TODO: separate into optional header
-#include <stdio.h>
-#include <ctype.h>
-#include <stdlib.h> // access to evil (fast) C isspace etc.
-#include <limits.h> //strtoul
-}
-
-inline void throw_string_to(std::string const& msg,char const* prefix="string_to: ") {
-  throw std::runtime_error(prefix+msg);
-}
-
-template <class I,class To>
-bool try_stream_into(I & i,To &to,bool complete=true)
-{
-    i >> to;
-    if (i.fail()) return false;
-    if (complete) {
-        char c;
-        return !(i >> c);
-    }
-    return true;
-}
-
-template <class Str,class To>
-bool try_string_into(Str const& str,To &to,bool complete=true)
-{
-    std::istringstream i(str);
-    return try_stream_into(i,to,complete);
-}
-
-template <class Str,class Data> inline
-Data & string_into(const Str &str,Data &data)
-{
-    if (!try_string_into(str,data))
-        throw std::runtime_error(std::string("Couldn't convert (string_into): ")+str);
-    return data;
-}
-
-
-template <class Data,class Str> inline
-Data string_to(const Str &str)
-{
-    Data ret;
-    string_into(str,ret);
-    return ret;
-}
-
-template <class D> inline
-std::string to_string(D const &d)
-{
-    std::ostringstream o;
-    o << d;
-    return o.str();
-}
-
-inline std::string to_string(unsigned x) {
-  return utos(x);
-}
-
-inline std::string to_string(int x) {
-  return itos(x);
-}
-
-inline long strtol_complete(char const* s,int base=10) {
-  char *e;
-  if (*s) {
-    long r=strtol(s,&e,base);
-    char c=*e;
-    if (!c || isspace(c)) //simplifying assumption: we're happy if there's other stuff in the string, so long as the number ends in a space or eos.  TODO: loop consuming spaces until end?
-      return r;
-  }
-  throw_string_to(s,"Couldn't convert to integer: ");
-}
-
-// returns -INT_MAX or INT_MAX if number is too large/small
-inline int strtoi_complete_bounded(char const* s,int base=10) {
-  long l=strtol_complete(s,base);
-  if (l<std::numeric_limits<int>::min())
-    return std::numeric_limits<int>::min();
-  if (l>std::numeric_limits<int>::max())
-    return std::numeric_limits<int>::max();
-  return l;
-}
-#define RANGE_STR(x) #x
-#ifdef INT_MIN
-# define INTRANGE_STR "[" RANGE_STR(INT_MIN) "," RANGE_STR(INT_MAX) "]"
-#else
-# define INTRANGE_STR "[-2137483648,2147483647]"
-#endif
-
-  // throw if out of int range
-inline int strtoi_complete_exact(char const* s,int base=10) {
-  long l=strtol_complete(s,base);
-  if (l<std::numeric_limits<int>::min() || l>std::numeric_limits<int>::max())
-    throw_string_to(s,"Out of range for int " INTRANGE_STR ": ");
-  return l;
-}
-
-#if HAVE_LONGER_LONG
-inline int& string_into(std::string const& s,int &x) {
-  x=strtoi_complete_exact(s.c_str());
-  return x;
-}
-inline int& string_into(char const* s,int &x) {
-  x=strtoi_complete_exact(s);
-  return x;
-}
-#endif
-
-inline long& string_into(std::string const& s,long &x) {
-  x=strtol_complete(s.c_str());
-  return x;
-}
-inline long& string_into(char const* s,long &x) {
-  x=strtol_complete(s);
-  return x;
-}
-
-
-//FIXME: preprocessor separation for tokens int<->unsigned int, long<->unsigned long, strtol<->strtoul ?  massive code duplication
-inline unsigned long strtoul_complete(char const* s,int base=10) {
-  char *e;
-  if (*s) {
-#if HAVE_STRTOUL
-    unsigned long r=strtoul(s,&e,base);
-#else
-//    unsigned long r=strtol(s,&e,base); //FIXME: not usually safe
-    unsigned long r;
-    sscanf(s,"%ul",&r);
-#endif
-    char c=*e;
-    if (!c || isspace(c)) //simplifying assumption: we're happy if there's other stuff in the string, so long as the number ends in a space or eos.  TODO: loop consuming spaces until end?
-      return r;
-  }
-  throw_string_to(s,"Couldn't convert to integer: ");
-}
-
-inline unsigned strtou_complete_bounded(char const* s,int base=10) {
-  unsigned long l=strtoul_complete(s,base);
-  if (l<std::numeric_limits<unsigned>::min())
-    return std::numeric_limits<unsigned>::min();
-  if (l>std::numeric_limits<unsigned>::max())
-    return std::numeric_limits<unsigned>::max();
-  return l;
-}
-
-#ifdef UINT_MIN
-# define UINTRANGE_STR "[" RANGE_STR(UINT_MIN) "," RANGE_STR(UINT_MAX) "]"
-#else
-# define UINTRANGE_STR "[0,4,294,967,295]"
-#endif
-
-  // throw if out of int range
-inline unsigned strtou_complete_exact(char const* s,int base=10) {
-  unsigned long l=strtoul_complete(s,base);
-  if (l<std::numeric_limits<unsigned>::min() || l>std::numeric_limits<unsigned>::max())
-    throw_string_to(s,"Out of range for uint " UINTRANGE_STR ": ");
-  return l;
-}
-
-#if HAVE_LONGER_LONG
-inline unsigned& string_into(std::string const& s,unsigned &x) {
-  x=strtou_complete_exact(s.c_str());
-  return x;
-}
-inline unsigned& string_into(char const* s,unsigned &x) {
-  x=strtou_complete_exact(s);
-  return x;
-}
-#endif
-
-inline unsigned long& string_into(std::string const& s,unsigned long &x) {
-  x=strtoul_complete(s.c_str());
-  return x;
-}
-inline unsigned long& string_into(char const* s,unsigned long &x) {
-  x=strtoul_complete(s);
-  return x;
-}
-
-//FIXME: end code duplication
-
-
-/* 9 decimal places needed to avoid rounding error in float->string->float.  17 for double->string->double
-   in terms of usable decimal places, there are 6 for float and 15 for double
- */
-inline std::string to_string_roundtrip(float x) {
-  char buf[17];
-  return std::string(buf,buf+sprintf(buf,"%.9g",x));
-}
-inline std::string to_string(float x) {
-#if USE_FTOA
-  return ftos(x);
-#else
-  char buf[15];
-  return std::string(buf,buf+sprintf(buf,"%.7g",x));
-#endif
-}
-inline std::string to_string_roundtrip(double x) {
-  char buf[32];
-  return std::string(buf,buf+sprintf(buf,"%.17g",x));
-}
-inline std::string to_string(double x) {
-#if USE_FTOA
-  return ftos(x);
-#else
-  char buf[30];
-  return std::string(buf,buf+sprintf(buf,"%.15g",x));
-#endif
-}
-
-inline double& string_into(char const* s,double &x) {
-  x=std::atof(s);
-  return x;
-}
-inline float& string_into(char const* s,float &x) {
-  x=std::atof(s);
-  return x;
-}
-
-inline double& string_into(std::string const& s,double &x) {
-  x=std::atof(s.c_str());
-  return x;
-}
-inline float& string_into(std::string const& s,float &x) {
-  x=std::atof(s.c_str());
-  return x;
-}
-
-
-template <class Str>
-bool try_string_into(Str const& str,Str &to,bool complete=true)
-{
-    str=to;
-    return true;
-}
-
-inline std::string const& to_string(std::string const& d)
-{
-    return d;
-}
-
-template <class Str>
-Str const& string_to(Str const &s)
-{
-    return s;
-}
-
-template <class Str>
-Str & string_into(Str const &s,Str &d)
-{
-    return d=s;
-}
-
-/*
-
-template <class Str,class Data,class size_type> inline
-void substring_into(const Str &str,size_type pos,size_type n,Data &data)
-{
-//    std::istringstream i(str,pos,n); // doesn't exist!
-    std::istringstream i(str.substr(pos,n));
-    if (!(i>>*data))
-        throw std::runtime_error("Couldn't convert (string_into): "+str);
-}
-
-template <class Data,class Str,class size_type> inline
-Data string_to(const Str &str,size_type pos,size_type n)
-{
-    Data ret;
-    substring_into(str,pos,n,ret);
-    return ret;
-}
-
-*/
-
-
-
-#endif
-- 
cgit v1.2.3


From a872f46ce1212703b8bed562c894ea1a932c0746 Mon Sep 17 00:00:00 2001
From: Chris Dyer <prguest11@taipan.cs>
Date: Wed, 29 Feb 2012 07:00:49 +0000
Subject: mpi fixes

---
 training/lbl_model.cc | 54 +++++++++++++++++++++++++++++++++++++++++----------
 utils/corpus_tools.cc | 16 +++++++++------
 2 files changed, 54 insertions(+), 16 deletions(-)

(limited to 'training/lbl_model.cc')

diff --git a/training/lbl_model.cc b/training/lbl_model.cc
index def5075a..a46ce33c 100644
--- a/training/lbl_model.cc
+++ b/training/lbl_model.cc
@@ -15,6 +15,7 @@
 #ifdef HAVE_MPI
 #include <boost/mpi/timer.hpp>
 #include <boost/mpi.hpp>
+#include <boost/archive/text_oarchive.hpp>
 namespace mpi = boost::mpi;
 #endif
 #include <boost/math/special_functions/fpclassify.hpp>
@@ -34,12 +35,26 @@ namespace mpi = boost::mpi;
 namespace po = boost::program_options;
 using namespace std;
 
-#define kDIMENSIONS 100
+#define kDIMENSIONS 10
 typedef Eigen::Matrix<double, kDIMENSIONS, 1> RVector;
 typedef Eigen::Matrix<double, 1, kDIMENSIONS> RTVector;
 typedef Eigen::Matrix<double, kDIMENSIONS, kDIMENSIONS> TMatrix;
 vector<RVector> r_src, r_trg;
 
+#if HAVE_MPI
+namespace boost {
+namespace serialization {
+
+template<class Archive>
+void serialize(Archive & ar, RVector & v, const unsigned int version) {
+  for (unsigned i = 0; i < kDIMENSIONS; ++i)
+    ar & v[i];
+}
+
+} // namespace serialization
+} // namespace boost
+#endif
+
 bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
@@ -224,7 +239,7 @@ int main(int argc, char** argv) {
     srand(seed);
   }
   
-  TMatrix t;
+  TMatrix t = TMatrix::Zero();
   if (rank == 0) {
     t = TMatrix::Random() / 50.0;
     for (unsigned i = 1; i < r_trg.size(); ++i) {
@@ -241,16 +256,18 @@ int main(int argc, char** argv) {
   TMatrix g = TMatrix::Zero();
   vector<TMatrix> exp_src;
   vector<double> z_src;
-  vector<double> flat_g, flat_t;
+  vector<double> flat_g, flat_t, rcv_grad;
   Flatten(t, &flat_t);
   bool converged = false;
-  // TODO broadcast embeddings
-  for (int iter = 0; !converged && iter < ITERATIONS; ++iter) {
-#ifdef HAVE_MPI
-    mpi::broadcast(world, &flat_t[0], flat_t.size(), 0);
+#if HAVE_MPI
+  mpi::broadcast(world, &flat_t[0], flat_t.size(), 0);
+  mpi::broadcast(world, r_trg, 0);
+  mpi::broadcast(world, r_src, 0);
 #endif
+  cerr << "rank=" << rank << ": " << r_trg[0][4] << endl;
+  for (int iter = 0; !converged && iter < ITERATIONS; ++iter) {
+    if (rank == 0) cerr << "ITERATION " << (iter + 1) << endl;
     Unflatten(flat_t, &t);
-    cerr << "ITERATION " << (iter + 1) << endl;
     double likelihood = 0;
     double denom = 0.0;
     lc = 0;
@@ -350,7 +367,22 @@ int main(int argc, char** argv) {
     if (!SGD) {
       Flatten(g, &flat_g);
       obj = -likelihood;
-      // TODO - reduce gradient
+#if HAVE_MPI
+      rcv_grad.resize(flat_g.size(), 0.0);
+      mpi::reduce(world, &flat_g[0], flat_g.size(), &rcv_grad[0], plus<double>(), 0);
+      swap(flat_g, rcv_grad);
+      rcv_grad.clear();
+
+      double to = 0;
+      mpi::reduce(world, obj, to, plus<double>(), 0);
+      obj = to;
+      double tlh = 0;
+      mpi::reduce(world, likelihood, tlh, plus<double>(), 0);
+      likelihood = tlh;
+      double td = 0;
+      mpi::reduce(world, denom, td, plus<double>(), 0);
+      denom = td;
+#endif
     }
 
     if (rank == 0) {
@@ -376,10 +408,12 @@ int main(int argc, char** argv) {
       }
     }
 #ifdef HAVE_MPI
+    mpi::broadcast(world, &flat_t[0], flat_t.size(), 0);
     mpi::broadcast(world, converged, 0);
 #endif
   }
-  cerr << "TRANSLATION MATRIX:" << endl << t << endl;
+  if (rank == 0)
+    cerr << "TRANSLATION MATRIX:" << endl << t << endl;
   return 0;
 }
 
diff --git a/utils/corpus_tools.cc b/utils/corpus_tools.cc
index a0542b6e..d17785af 100644
--- a/utils/corpus_tools.cc
+++ b/utils/corpus_tools.cc
@@ -33,10 +33,12 @@ void CorpusTools::ReadFromFile(const string& filename,
   while(getline(in, line)) {
     const bool skip = (lc % size != rank);
     ++lc;
-    if (skip) continue;
     TD::ConvertSentence(line, &tmp);
-    src->push_back(vector<WordID>());
-    vector<WordID>* d = &src->back();
+    vector<WordID>* d = NULL;
+    if (!skip) {
+      src->push_back(vector<WordID>());
+      d = &src->back();
+    }
     set<WordID>* v = src_vocab;
     int s = 0;
     for (unsigned i = 0; i < tmp.size(); ++i) {
@@ -44,11 +46,13 @@ void CorpusTools::ReadFromFile(const string& filename,
         ++s;
         if (s > 1) { cerr << "Unexpected format in line " << lc << ": " << line << endl; abort(); }
         assert(trg);
-        trg->push_back(vector<WordID>());
-        d = &trg->back();
+        if (!skip) {
+          trg->push_back(vector<WordID>());
+          d = &trg->back();
+        }
         v = trg_vocab;
       } else {
-        d->push_back(tmp[i]);
+        if (d) d->push_back(tmp[i]);
         if (v) v->insert(tmp[i]);
       }
     }
-- 
cgit v1.2.3