From ba939df399a160f9a8370911c840635d6cee4f58 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Tue, 20 Dec 2011 18:34:14 -0500
Subject: migrate fast_score to the new API

---
 vest/dist-vest.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'vest')
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index 11e791c1..c382a972 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -308,7 +308,7 @@ while (1){
 	    $retries++;
 	}
 	die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest);
-	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -l $metric");
+	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric");
 	chomp $dec_score;
 	print STDERR "DECODER SCORE: $dec_score\n";
 
-- 
cgit v1.2.3


From 3c1c98b5aec7aec34432ddc37385df06d301bdd5 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Fri, 27 Jan 2012 02:31:00 -0500
Subject: migrate mert to the new scorer interface

---
 gi/pf/base_distributions.cc | 241 ++++++++++++++++++++++++++++++++++++++++
 gi/pf/base_distributions.h  | 261 ++++++++++++++++++++++++++++++++++++++++++++
 gi/pf/base_measures.cc      | 241 ----------------------------------------
 gi/pf/base_measures.h       | 247 -----------------------------------------
 mteval/ns.cc                |   4 +
 mteval/ns.h                 |  10 +-
 vest/ces.cc                 |  42 +++----
 vest/ces.h                  |  10 +-
 vest/dist-vest.pl           |   4 +-
 vest/error_surface.cc       |  11 +-
 vest/error_surface.h        |   6 +-
 vest/line_optimizer.cc      |  20 ++--
 vest/line_optimizer.h       |   2 +
 vest/lo_test.cc             |  21 ++--
 vest/mr_vest_map.cc         |  16 +--
 vest/mr_vest_reduce.cc      |  34 +++---
 16 files changed, 602 insertions(+), 568 deletions(-)
 create mode 100644 gi/pf/base_distributions.cc
 create mode 100644 gi/pf/base_distributions.h
 delete mode 100644 gi/pf/base_measures.cc
 delete mode 100644 gi/pf/base_measures.h

(limited to 'vest')

diff --git a/gi/pf/base_distributions.cc b/gi/pf/base_distributions.cc
new file mode 100644
index 00000000..4b1863fa
--- /dev/null
+++ b/gi/pf/base_distributions.cc
@@ -0,0 +1,241 @@
+#include "base_measures.h"
+
+#include <iostream>
+
+#include "filelib.h"
+
+using namespace std;
+
+TableLookupBase::TableLookupBase(const string& fname) {
+  cerr << "TableLookupBase reading from " << fname << " ..." << endl;
+  ReadFile rf(fname);
+  istream& in = *rf.stream();
+  string line;
+  unsigned lc = 0;
+  const WordID kDIV = TD::Convert("|||");
+  vector<WordID> tmp;
+  vector<int> le, lf;
+  TRule x;
+  x.lhs_ = -TD::Convert("X");
+  bool flag = false;
+  while(getline(in, line)) {
+    ++lc;
+    if (lc % 1000000 == 0) { cerr << " [" << lc << ']' << endl; flag = false; }
+    else if (lc % 25000 == 0) { cerr << '.' << flush; flag = true; }
+    tmp.clear();
+    TD::ConvertSentence(line, &tmp);
+    x.f_.clear();
+    x.e_.clear();
+    size_t pos = 0;
+    int cc = 0;
+    while(pos < tmp.size()) {
+      const WordID cur = tmp[pos++];
+      if (cur == kDIV) {
+        ++cc;
+      } else if (cc == 0) {
+        x.f_.push_back(cur);    
+      } else if (cc == 1) {
+        x.e_.push_back(cur);
+      } else if (cc == 2) {
+        table[x].logeq(atof(TD::Convert(cur)));
+        ++cc;
+      } else {
+        if (flag) cerr << endl;
+        cerr << "Bad format in " << lc << ": " << line << endl; abort();
+      }
+    }
+    if (cc != 3) {
+      if (flag) cerr << endl;
+      cerr << "Bad format in " << lc << ": " << line << endl; abort();
+    }
+  }
+  if (flag) cerr << endl;
+  cerr << " read " << lc << " entries\n";
+}
+
+prob_t PhraseConditionalUninformativeUnigramBase::p0(const vector<WordID>& vsrc,
+                                                     const vector<WordID>& vtrg,
+                                                     int start_src, int start_trg) const {
+  const int flen = vsrc.size() - start_src;
+  const int elen = vtrg.size() - start_trg;
+  prob_t p;
+  p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
+  //p.logeq(log_poisson(elen, 1));       // elen | flen          ~Pois(flen + 0.01)
+  for (int i = 0; i < elen; ++i)
+    p *= u(vtrg[i + start_trg]);                        // draw e_i             ~Uniform
+  return p;
+}
+
+prob_t PhraseConditionalUninformativeBase::p0(const vector<WordID>& vsrc,
+                                              const vector<WordID>& vtrg,
+                                              int start_src, int start_trg) const {
+  const int flen = vsrc.size() - start_src;
+  const int elen = vtrg.size() - start_trg;
+  prob_t p;
+  //p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
+  p.logeq(log_poisson(elen, 1));       // elen | flen          ~Pois(flen + 0.01)
+  for (int i = 0; i < elen; ++i)
+    p *= kUNIFORM_TARGET;                        // draw e_i             ~Uniform
+  return p;
+}
+
+void Model1::LoadModel1(const string& fname) {
+  cerr << "Loading Model 1 parameters from " << fname << " ..." << endl;
+  ReadFile rf(fname);
+  istream& in = *rf.stream();
+  string line;
+  unsigned lc = 0;
+  while(getline(in, line)) {
+    ++lc;
+    int cur = 0;
+    int start = 0;
+    while(cur < line.size() && line[cur] != ' ') { ++cur; }
+    assert(cur != line.size());
+    line[cur] = 0;
+    const WordID src = TD::Convert(&line[0]);
+    ++cur;
+    start = cur;
+    while(cur < line.size() && line[cur] != ' ') { ++cur; }
+    assert(cur != line.size());
+    line[cur] = 0;
+    WordID trg = TD::Convert(&line[start]);
+    const double logprob = strtod(&line[cur + 1], NULL);
+    if (src >= ttable.size()) ttable.resize(src + 1);
+    ttable[src][trg].logeq(logprob);
+  }
+  cerr << "  read " << lc << " parameters.\n";
+}
+
+prob_t PhraseConditionalBase::p0(const vector<WordID>& vsrc,
+                                 const vector<WordID>& vtrg,
+                                 int start_src, int start_trg) const {
+  const int flen = vsrc.size() - start_src;
+  const int elen = vtrg.size() - start_trg;
+  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
+  prob_t p;
+  p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
+  for (int i = 0; i < elen; ++i) {               // for each position i in e-RHS
+    const WordID trg = vtrg[i + start_trg];
+    prob_t tp = prob_t::Zero();
+    for (int j = -1; j < flen; ++j) {
+      const WordID src = j < 0 ? 0 : vsrc[j + start_src];
+      tp += kM1MIXTURE * model1(src, trg);
+      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
+    }
+    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform
+    p *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
+  }
+  if (p.is_0()) {
+    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
+    abort();
+  }
+  return p;
+}
+
+prob_t PhraseJointBase::p0(const vector<WordID>& vsrc,
+                           const vector<WordID>& vtrg,
+                           int start_src, int start_trg) const {
+  const int flen = vsrc.size() - start_src;
+  const int elen = vtrg.size() - start_trg;
+  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
+  prob_t p;
+  p.logeq(log_poisson(flen, 1.0));               // flen                 ~Pois(1)
+                                                 // elen | flen          ~Pois(flen + 0.01)
+  prob_t ptrglen; ptrglen.logeq(log_poisson(elen, flen + 0.01));
+  p *= ptrglen;
+  p *= kUNIFORM_SOURCE.pow(flen);                // each f in F ~Uniform
+  for (int i = 0; i < elen; ++i) {               // for each position i in E
+    const WordID trg = vtrg[i + start_trg];
+    prob_t tp = prob_t::Zero();
+    for (int j = -1; j < flen; ++j) {
+      const WordID src = j < 0 ? 0 : vsrc[j + start_src];
+      tp += kM1MIXTURE * model1(src, trg);
+      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
+    }
+    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform
+    p *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
+  }
+  if (p.is_0()) {
+    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
+    abort();
+  }
+  return p;
+}
+
+prob_t PhraseJointBase_BiDir::p0(const vector<WordID>& vsrc,
+                                 const vector<WordID>& vtrg,
+                                 int start_src, int start_trg) const {
+  const int flen = vsrc.size() - start_src;
+  const int elen = vtrg.size() - start_trg;
+  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
+  prob_t uniform_trg_alignment; uniform_trg_alignment.logeq(-log(elen + 1));
+
+  prob_t p1;
+  p1.logeq(log_poisson(flen, 1.0));               // flen                 ~Pois(1)
+                                                 // elen | flen          ~Pois(flen + 0.01)
+  prob_t ptrglen; ptrglen.logeq(log_poisson(elen, flen + 0.01));
+  p1 *= ptrglen;
+  p1 *= kUNIFORM_SOURCE.pow(flen);                // each f in F ~Uniform
+  for (int i = 0; i < elen; ++i) {               // for each position i in E
+    const WordID trg = vtrg[i + start_trg];
+    prob_t tp = prob_t::Zero();
+    for (int j = -1; j < flen; ++j) {
+      const WordID src = j < 0 ? 0 : vsrc[j + start_src];
+      tp += kM1MIXTURE * model1(src, trg);
+      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
+    }
+    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform
+    p1 *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
+  }
+  if (p1.is_0()) {
+    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
+    abort();
+  }
+
+  prob_t p2;
+  p2.logeq(log_poisson(elen, 1.0));               // elen                 ~Pois(1)
+                                                 // flen | elen          ~Pois(flen + 0.01)
+  prob_t psrclen; psrclen.logeq(log_poisson(flen, elen + 0.01));
+  p2 *= psrclen;
+  p2 *= kUNIFORM_TARGET.pow(elen);                // each f in F ~Uniform
+  for (int i = 0; i < flen; ++i) {               // for each position i in E
+    const WordID src = vsrc[i + start_src];
+    prob_t tp = prob_t::Zero();
+    for (int j = -1; j < elen; ++j) {
+      const WordID trg = j < 0 ? 0 : vtrg[j + start_trg];
+      tp += kM1MIXTURE * invmodel1(trg, src);
+      tp += kUNIFORM_MIXTURE * kUNIFORM_SOURCE;
+    }
+    tp *= uniform_trg_alignment;                 //     draw a_i         ~uniform
+    p2 *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
+  }
+  if (p2.is_0()) {
+    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
+    abort();
+  }
+
+  static const prob_t kHALF(0.5);
+  return (p1 + p2) * kHALF;
+}
+
+JumpBase::JumpBase() : p(200) {
+  for (unsigned src_len = 1; src_len < 200; ++src_len) {
+    map<int, prob_t>& cpd = p[src_len];
+    int min_jump = 1 - src_len;
+    int max_jump = src_len;
+    prob_t z;
+    for (int j = min_jump; j <= max_jump; ++j) {
+      prob_t& cp = cpd[j];
+      if (j < 0)
+        cp.logeq(log_poisson(1.5-j, 1));
+      else if (j > 0)
+        cp.logeq(log_poisson(j, 1));
+      cp.poweq(0.2);
+      z += cp;
+    }
+    for (int j = min_jump; j <= max_jump; ++j) {
+      cpd[j] /= z;
+    }
+  }
+}
+
diff --git a/gi/pf/base_distributions.h b/gi/pf/base_distributions.h
new file mode 100644
index 00000000..a23ac32b
--- /dev/null
+++ b/gi/pf/base_distributions.h
@@ -0,0 +1,261 @@
+#ifndef _BASE_MEASURES_H_
+#define _BASE_MEASURES_H_
+
+#include <vector>
+#include <map>
+#include <string>
+#include <cmath>
+#include <iostream>
+#include <cassert>
+
+#include "unigrams.h"
+#include "trule.h"
+#include "prob.h"
+#include "tdict.h"
+#include "sampler.h"
+
+inline double log_poisson(unsigned x, const double& lambda) {
+  assert(lambda > 0.0);
+  return log(lambda) * x - lgamma(x + 1) - lambda;
+}
+
+inline double log_binom_coeff(unsigned n, unsigned k) {
+  assert(n >= k);
+  if (n == k) return 0.0;
+  return lgamma(n + 1) - lgamma(k + 1) - lgamma(n - k + 1);
+}
+
+// http://en.wikipedia.org/wiki/Negative_binomial_distribution
+inline double log_negative_binom(unsigned x, unsigned r, double p) {
+  assert(p > 0.0);
+  assert(p < 1.0);
+  return log_binom_coeff(x + r - 1, x) + r * log(1 - p) + x * log(p);
+}
+
+inline std::ostream& operator<<(std::ostream& os, const std::vector<WordID>& p) {
+  os << '[';
+  for (int i = 0; i < p.size(); ++i)
+    os << (i==0 ? "" : " ") << TD::Convert(p[i]);
+  return os << ']';
+}
+
+struct Model1 {
+  explicit Model1(const std::string& fname) :
+      kNULL(TD::Convert("<eps>")),
+      kZERO() {
+    LoadModel1(fname);
+  }
+
+  void LoadModel1(const std::string& fname);
+
+  // returns prob 0 if src or trg is not found
+  const prob_t& operator()(WordID src, WordID trg) const {
+    if (src == 0) src = kNULL;
+    if (src < ttable.size()) {
+      const std::map<WordID, prob_t>& cpd = ttable[src];
+      const std::map<WordID, prob_t>::const_iterator it = cpd.find(trg);
+      if (it != cpd.end())
+        return it->second;
+    }
+    return kZERO;
+  }
+
+  const WordID kNULL;
+  const prob_t kZERO;
+  std::vector<std::map<WordID, prob_t> > ttable;
+};
+
+struct PoissonUniformUninformativeBase {
+  explicit PoissonUniformUninformativeBase(const unsigned ves) : kUNIFORM(1.0 / ves) {}
+  prob_t operator()(const TRule& r) const {
+    prob_t p; p.logeq(log_poisson(r.e_.size(), 1.0));
+    prob_t q = kUNIFORM; q.poweq(r.e_.size());
+    p *= q;
+    return p;
+  }
+  void Summary() const {}
+  void ResampleHyperparameters(MT19937*) {}
+  void Increment(const TRule&) {}
+  void Decrement(const TRule&) {}
+  prob_t Likelihood() const { return prob_t::One(); }
+  const prob_t kUNIFORM;
+};
+
+struct CompletelyUniformBase {
+  explicit CompletelyUniformBase(const unsigned ves) : kUNIFORM(1.0 / ves) {}
+  prob_t operator()(const TRule&) const {
+    return kUNIFORM;
+  }
+  void Summary() const {}
+  void ResampleHyperparameters(MT19937*) {}
+  void Increment(const TRule&) {}
+  void Decrement(const TRule&) {}
+  prob_t Likelihood() const { return prob_t::One(); }
+  const prob_t kUNIFORM;
+};
+
+struct UnigramWordBase {
+  explicit UnigramWordBase(const std::string& fname) : un(fname) {}
+  prob_t operator()(const TRule& r) const {
+    return un(r.e_);
+  }
+  const UnigramWordModel un;
+};
+
+struct RuleHasher {
+  size_t operator()(const TRule& r) const {
+    return hash_value(r);
+  }
+};
+
+struct TableLookupBase {
+  TableLookupBase(const std::string& fname);
+
+  prob_t operator()(const TRule& rule) const {
+    const std::tr1::unordered_map<TRule,prob_t>::const_iterator it = table.find(rule);
+    if (it == table.end()) {
+      std::cerr << rule << " not found\n";
+      abort();
+    }
+    return it->second;
+  }
+
+  void ResampleHyperparameters(MT19937*) {}
+  void Increment(const TRule&) {}
+  void Decrement(const TRule&) {}
+  prob_t Likelihood() const { return prob_t::One(); }
+  void Summary() const {}
+
+  std::tr1::unordered_map<TRule,prob_t,RuleHasher> table;
+};
+
+struct PhraseConditionalUninformativeBase {
+  explicit PhraseConditionalUninformativeBase(const unsigned vocab_e_size) :
+      kUNIFORM_TARGET(1.0 / vocab_e_size) {
+    assert(vocab_e_size > 0);
+  }
+
+  // return p0 of rule.e_ | rule.f_
+  prob_t operator()(const TRule& rule) const {
+    return p0(rule.f_, rule.e_, 0, 0);
+  }
+
+  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
+
+  void Summary() const {}
+  void ResampleHyperparameters(MT19937*) {}
+  void Increment(const TRule&) {}
+  void Decrement(const TRule&) {}
+  prob_t Likelihood() const { return prob_t::One(); }
+  const prob_t kUNIFORM_TARGET;
+};
+
+struct PhraseConditionalUninformativeUnigramBase {
+  explicit PhraseConditionalUninformativeUnigramBase(const std::string& file, const unsigned vocab_e_size) : u(file, vocab_e_size) {}
+
+  // return p0 of rule.e_ | rule.f_
+  prob_t operator()(const TRule& rule) const {
+    return p0(rule.f_, rule.e_, 0, 0);
+  }
+
+  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
+
+  const UnigramModel u;
+};
+
+struct PhraseConditionalBase {
+  explicit PhraseConditionalBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size) :
+      model1(m1),
+      kM1MIXTURE(m1mixture),
+      kUNIFORM_MIXTURE(1.0 - m1mixture),
+      kUNIFORM_TARGET(1.0 / vocab_e_size) {
+    assert(m1mixture >= 0.0 && m1mixture <= 1.0);
+    assert(vocab_e_size > 0);
+  }
+
+  // return p0 of rule.e_ | rule.f_
+  prob_t operator()(const TRule& rule) const {
+    return p0(rule.f_, rule.e_, 0, 0);
+  }
+
+  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
+
+  const Model1& model1;
+  const prob_t kM1MIXTURE;  // Model 1 mixture component
+  const prob_t kUNIFORM_MIXTURE; // uniform mixture component
+  const prob_t kUNIFORM_TARGET;
+};
+
+struct PhraseJointBase {
+  explicit PhraseJointBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size, const unsigned vocab_f_size) :
+      model1(m1),
+      kM1MIXTURE(m1mixture),
+      kUNIFORM_MIXTURE(1.0 - m1mixture),
+      kUNIFORM_SOURCE(1.0 / vocab_f_size),
+      kUNIFORM_TARGET(1.0 / vocab_e_size) {
+    assert(m1mixture >= 0.0 && m1mixture <= 1.0);
+    assert(vocab_e_size > 0);
+  }
+
+  // return p0 of rule.e_ , rule.f_
+  prob_t operator()(const TRule& rule) const {
+    return p0(rule.f_, rule.e_, 0, 0);
+  }
+
+  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
+
+  const Model1& model1;
+  const prob_t kM1MIXTURE;  // Model 1 mixture component
+  const prob_t kUNIFORM_MIXTURE; // uniform mixture component
+  const prob_t kUNIFORM_SOURCE;
+  const prob_t kUNIFORM_TARGET;
+};
+
+struct PhraseJointBase_BiDir {
+  explicit PhraseJointBase_BiDir(const Model1& m1,
+                                 const Model1& im1,
+                                 const double m1mixture,
+                                 const unsigned vocab_e_size,
+                                 const unsigned vocab_f_size) :
+      model1(m1),
+      invmodel1(im1),
+      kM1MIXTURE(m1mixture),
+      kUNIFORM_MIXTURE(1.0 - m1mixture),
+      kUNIFORM_SOURCE(1.0 / vocab_f_size),
+      kUNIFORM_TARGET(1.0 / vocab_e_size) {
+    assert(m1mixture >= 0.0 && m1mixture <= 1.0);
+    assert(vocab_e_size > 0);
+  }
+
+  // return p0 of rule.e_ , rule.f_
+  prob_t operator()(const TRule& rule) const {
+    return p0(rule.f_, rule.e_, 0, 0);
+  }
+
+  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
+
+  const Model1& model1;
+  const Model1& invmodel1;
+  const prob_t kM1MIXTURE;  // Model 1 mixture component
+  const prob_t kUNIFORM_MIXTURE; // uniform mixture component
+  const prob_t kUNIFORM_SOURCE;
+  const prob_t kUNIFORM_TARGET;
+};
+
+// base distribution for jump size multinomials
+// basically p(0) = 0 and then, p(1) is max, and then
+// you drop as you move to the max jump distance
+struct JumpBase {
+  JumpBase();
+
+  const prob_t& operator()(int jump, unsigned src_len) const {
+    assert(jump != 0);
+    const std::map<int, prob_t>::const_iterator it = p[src_len].find(jump);
+    assert(it != p[src_len].end());
+    return it->second;
+  }
+  std::vector<std::map<int, prob_t> > p;
+};
+
+
+#endif
diff --git a/gi/pf/base_measures.cc b/gi/pf/base_measures.cc
deleted file mode 100644
index 4b1863fa..00000000
--- a/gi/pf/base_measures.cc
+++ /dev/null
@@ -1,241 +0,0 @@
-#include "base_measures.h"
-
-#include <iostream>
-
-#include "filelib.h"
-
-using namespace std;
-
-TableLookupBase::TableLookupBase(const string& fname) {
-  cerr << "TableLookupBase reading from " << fname << " ..." << endl;
-  ReadFile rf(fname);
-  istream& in = *rf.stream();
-  string line;
-  unsigned lc = 0;
-  const WordID kDIV = TD::Convert("|||");
-  vector<WordID> tmp;
-  vector<int> le, lf;
-  TRule x;
-  x.lhs_ = -TD::Convert("X");
-  bool flag = false;
-  while(getline(in, line)) {
-    ++lc;
-    if (lc % 1000000 == 0) { cerr << " [" << lc << ']' << endl; flag = false; }
-    else if (lc % 25000 == 0) { cerr << '.' << flush; flag = true; }
-    tmp.clear();
-    TD::ConvertSentence(line, &tmp);
-    x.f_.clear();
-    x.e_.clear();
-    size_t pos = 0;
-    int cc = 0;
-    while(pos < tmp.size()) {
-      const WordID cur = tmp[pos++];
-      if (cur == kDIV) {
-        ++cc;
-      } else if (cc == 0) {
-        x.f_.push_back(cur);    
-      } else if (cc == 1) {
-        x.e_.push_back(cur);
-      } else if (cc == 2) {
-        table[x].logeq(atof(TD::Convert(cur)));
-        ++cc;
-      } else {
-        if (flag) cerr << endl;
-        cerr << "Bad format in " << lc << ": " << line << endl; abort();
-      }
-    }
-    if (cc != 3) {
-      if (flag) cerr << endl;
-      cerr << "Bad format in " << lc << ": " << line << endl; abort();
-    }
-  }
-  if (flag) cerr << endl;
-  cerr << " read " << lc << " entries\n";
-}
-
-prob_t PhraseConditionalUninformativeUnigramBase::p0(const vector<WordID>& vsrc,
-                                                     const vector<WordID>& vtrg,
-                                                     int start_src, int start_trg) const {
-  const int flen = vsrc.size() - start_src;
-  const int elen = vtrg.size() - start_trg;
-  prob_t p;
-  p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
-  //p.logeq(log_poisson(elen, 1));       // elen | flen          ~Pois(flen + 0.01)
-  for (int i = 0; i < elen; ++i)
-    p *= u(vtrg[i + start_trg]);                        // draw e_i             ~Uniform
-  return p;
-}
-
-prob_t PhraseConditionalUninformativeBase::p0(const vector<WordID>& vsrc,
-                                              const vector<WordID>& vtrg,
-                                              int start_src, int start_trg) const {
-  const int flen = vsrc.size() - start_src;
-  const int elen = vtrg.size() - start_trg;
-  prob_t p;
-  //p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
-  p.logeq(log_poisson(elen, 1));       // elen | flen          ~Pois(flen + 0.01)
-  for (int i = 0; i < elen; ++i)
-    p *= kUNIFORM_TARGET;                        // draw e_i             ~Uniform
-  return p;
-}
-
-void Model1::LoadModel1(const string& fname) {
-  cerr << "Loading Model 1 parameters from " << fname << " ..." << endl;
-  ReadFile rf(fname);
-  istream& in = *rf.stream();
-  string line;
-  unsigned lc = 0;
-  while(getline(in, line)) {
-    ++lc;
-    int cur = 0;
-    int start = 0;
-    while(cur < line.size() && line[cur] != ' ') { ++cur; }
-    assert(cur != line.size());
-    line[cur] = 0;
-    const WordID src = TD::Convert(&line[0]);
-    ++cur;
-    start = cur;
-    while(cur < line.size() && line[cur] != ' ') { ++cur; }
-    assert(cur != line.size());
-    line[cur] = 0;
-    WordID trg = TD::Convert(&line[start]);
-    const double logprob = strtod(&line[cur + 1], NULL);
-    if (src >= ttable.size()) ttable.resize(src + 1);
-    ttable[src][trg].logeq(logprob);
-  }
-  cerr << "  read " << lc << " parameters.\n";
-}
-
-prob_t PhraseConditionalBase::p0(const vector<WordID>& vsrc,
-                                 const vector<WordID>& vtrg,
-                                 int start_src, int start_trg) const {
-  const int flen = vsrc.size() - start_src;
-  const int elen = vtrg.size() - start_trg;
-  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
-  prob_t p;
-  p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
-  for (int i = 0; i < elen; ++i) {               // for each position i in e-RHS
-    const WordID trg = vtrg[i + start_trg];
-    prob_t tp = prob_t::Zero();
-    for (int j = -1; j < flen; ++j) {
-      const WordID src = j < 0 ? 0 : vsrc[j + start_src];
-      tp += kM1MIXTURE * model1(src, trg);
-      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
-    }
-    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform
-    p *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
-  }
-  if (p.is_0()) {
-    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
-    abort();
-  }
-  return p;
-}
-
-prob_t PhraseJointBase::p0(const vector<WordID>& vsrc,
-                           const vector<WordID>& vtrg,
-                           int start_src, int start_trg) const {
-  const int flen = vsrc.size() - start_src;
-  const int elen = vtrg.size() - start_trg;
-  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
-  prob_t p;
-  p.logeq(log_poisson(flen, 1.0));               // flen                 ~Pois(1)
-                                                 // elen | flen          ~Pois(flen + 0.01)
-  prob_t ptrglen; ptrglen.logeq(log_poisson(elen, flen + 0.01));
-  p *= ptrglen;
-  p *= kUNIFORM_SOURCE.pow(flen);                // each f in F ~Uniform
-  for (int i = 0; i < elen; ++i) {               // for each position i in E
-    const WordID trg = vtrg[i + start_trg];
-    prob_t tp = prob_t::Zero();
-    for (int j = -1; j < flen; ++j) {
-      const WordID src = j < 0 ? 0 : vsrc[j + start_src];
-      tp += kM1MIXTURE * model1(src, trg);
-      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
-    }
-    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform
-    p *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
-  }
-  if (p.is_0()) {
-    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
-    abort();
-  }
-  return p;
-}
-
-prob_t PhraseJointBase_BiDir::p0(const vector<WordID>& vsrc,
-                                 const vector<WordID>& vtrg,
-                                 int start_src, int start_trg) const {
-  const int flen = vsrc.size() - start_src;
-  const int elen = vtrg.size() - start_trg;
-  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
-  prob_t uniform_trg_alignment; uniform_trg_alignment.logeq(-log(elen + 1));
-
-  prob_t p1;
-  p1.logeq(log_poisson(flen, 1.0));               // flen                 ~Pois(1)
-                                                 // elen | flen          ~Pois(flen + 0.01)
-  prob_t ptrglen; ptrglen.logeq(log_poisson(elen, flen + 0.01));
-  p1 *= ptrglen;
-  p1 *= kUNIFORM_SOURCE.pow(flen);                // each f in F ~Uniform
-  for (int i = 0; i < elen; ++i) {               // for each position i in E
-    const WordID trg = vtrg[i + start_trg];
-    prob_t tp = prob_t::Zero();
-    for (int j = -1; j < flen; ++j) {
-      const WordID src = j < 0 ? 0 : vsrc[j + start_src];
-      tp += kM1MIXTURE * model1(src, trg);
-      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
-    }
-    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform
-    p1 *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
-  }
-  if (p1.is_0()) {
-    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
-    abort();
-  }
-
-  prob_t p2;
-  p2.logeq(log_poisson(elen, 1.0));               // elen                 ~Pois(1)
-                                                 // flen | elen          ~Pois(flen + 0.01)
-  prob_t psrclen; psrclen.logeq(log_poisson(flen, elen + 0.01));
-  p2 *= psrclen;
-  p2 *= kUNIFORM_TARGET.pow(elen);                // each f in F ~Uniform
-  for (int i = 0; i < flen; ++i) {               // for each position i in E
-    const WordID src = vsrc[i + start_src];
-    prob_t tp = prob_t::Zero();
-    for (int j = -1; j < elen; ++j) {
-      const WordID trg = j < 0 ? 0 : vtrg[j + start_trg];
-      tp += kM1MIXTURE * invmodel1(trg, src);
-      tp += kUNIFORM_MIXTURE * kUNIFORM_SOURCE;
-    }
-    tp *= uniform_trg_alignment;                 //     draw a_i         ~uniform
-    p2 *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
-  }
-  if (p2.is_0()) {
-    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
-    abort();
-  }
-
-  static const prob_t kHALF(0.5);
-  return (p1 + p2) * kHALF;
-}
-
-JumpBase::JumpBase() : p(200) {
-  for (unsigned src_len = 1; src_len < 200; ++src_len) {
-    map<int, prob_t>& cpd = p[src_len];
-    int min_jump = 1 - src_len;
-    int max_jump = src_len;
-    prob_t z;
-    for (int j = min_jump; j <= max_jump; ++j) {
-      prob_t& cp = cpd[j];
-      if (j < 0)
-        cp.logeq(log_poisson(1.5-j, 1));
-      else if (j > 0)
-        cp.logeq(log_poisson(j, 1));
-      cp.poweq(0.2);
-      z += cp;
-    }
-    for (int j = min_jump; j <= max_jump; ++j) {
-      cpd[j] /= z;
-    }
-  }
-}
-
diff --git a/gi/pf/base_measures.h b/gi/pf/base_measures.h
deleted file mode 100644
index b0495bfd..00000000
--- a/gi/pf/base_measures.h
+++ /dev/null
@@ -1,247 +0,0 @@
-#ifndef _BASE_MEASURES_H_
-#define _BASE_MEASURES_H_
-
-#include <vector>
-#include <map>
-#include <string>
-#include <cmath>
-#include <iostream>
-
-#include "unigrams.h"
-#include "trule.h"
-#include "prob.h"
-#include "tdict.h"
-#include "sampler.h"
-
-inline double log_poisson(unsigned x, const double& lambda) {
-  assert(lambda > 0.0);
-  return log(lambda) * x - lgamma(x + 1) - lambda;
-}
-
-inline std::ostream& operator<<(std::ostream& os, const std::vector<WordID>& p) {
-  os << '[';
-  for (int i = 0; i < p.size(); ++i)
-    os << (i==0 ? "" : " ") << TD::Convert(p[i]);
-  return os << ']';
-}
-
-struct Model1 {
-  explicit Model1(const std::string& fname) :
-      kNULL(TD::Convert("<eps>")),
-      kZERO() {
-    LoadModel1(fname);
-  }
-
-  void LoadModel1(const std::string& fname);
-
-  // returns prob 0 if src or trg is not found
-  const prob_t& operator()(WordID src, WordID trg) const {
-    if (src == 0) src = kNULL;
-    if (src < ttable.size()) {
-      const std::map<WordID, prob_t>& cpd = ttable[src];
-      const std::map<WordID, prob_t>::const_iterator it = cpd.find(trg);
-      if (it != cpd.end())
-        return it->second;
-    }
-    return kZERO;
-  }
-
-  const WordID kNULL;
-  const prob_t kZERO;
-  std::vector<std::map<WordID, prob_t> > ttable;
-};
-
-struct PoissonUniformUninformativeBase {
-  explicit PoissonUniformUninformativeBase(const unsigned ves) : kUNIFORM(1.0 / ves) {}
-  prob_t operator()(const TRule& r) const {
-    prob_t p; p.logeq(log_poisson(r.e_.size(), 1.0));
-    prob_t q = kUNIFORM; q.poweq(r.e_.size());
-    p *= q;
-    return p;
-  }
-  void Summary() const {}
-  void ResampleHyperparameters(MT19937*) {}
-  void Increment(const TRule&) {}
-  void Decrement(const TRule&) {}
-  prob_t Likelihood() const { return prob_t::One(); }
-  const prob_t kUNIFORM;
-};
-
-struct CompletelyUniformBase {
-  explicit CompletelyUniformBase(const unsigned ves) : kUNIFORM(1.0 / ves) {}
-  prob_t operator()(const TRule&) const {
-    return kUNIFORM;
-  }
-  void Summary() const {}
-  void ResampleHyperparameters(MT19937*) {}
-  void Increment(const TRule&) {}
-  void Decrement(const TRule&) {}
-  prob_t Likelihood() const { return prob_t::One(); }
-  const prob_t kUNIFORM;
-};
-
-struct UnigramWordBase {
-  explicit UnigramWordBase(const std::string& fname) : un(fname) {}
-  prob_t operator()(const TRule& r) const {
-    return un(r.e_);
-  }
-  const UnigramWordModel un;
-};
-
-struct RuleHasher {
-  size_t operator()(const TRule& r) const {
-    return hash_value(r);
-  }
-};
-
-struct TableLookupBase {
-  TableLookupBase(const std::string& fname);
-
-  prob_t operator()(const TRule& rule) const {
-    const std::tr1::unordered_map<TRule,prob_t>::const_iterator it = table.find(rule);
-    if (it == table.end()) {
-      std::cerr << rule << " not found\n";
-      abort();
-    }
-    return it->second;
-  }
-
-  void ResampleHyperparameters(MT19937*) {}
-  void Increment(const TRule&) {}
-  void Decrement(const TRule&) {}
-  prob_t Likelihood() const { return prob_t::One(); }
-  void Summary() const {}
-
-  std::tr1::unordered_map<TRule,prob_t,RuleHasher> table;
-};
-
-struct PhraseConditionalUninformativeBase {
-  explicit PhraseConditionalUninformativeBase(const unsigned vocab_e_size) :
-      kUNIFORM_TARGET(1.0 / vocab_e_size) {
-    assert(vocab_e_size > 0);
-  }
-
-  // return p0 of rule.e_ | rule.f_
-  prob_t operator()(const TRule& rule) const {
-    return p0(rule.f_, rule.e_, 0, 0);
-  }
-
-  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
-
-  void Summary() const {}
-  void ResampleHyperparameters(MT19937*) {}
-  void Increment(const TRule&) {}
-  void Decrement(const TRule&) {}
-  prob_t Likelihood() const { return prob_t::One(); }
-  const prob_t kUNIFORM_TARGET;
-};
-
-struct PhraseConditionalUninformativeUnigramBase {
-  explicit PhraseConditionalUninformativeUnigramBase(const std::string& file, const unsigned vocab_e_size) : u(file, vocab_e_size) {}
-
-  // return p0 of rule.e_ | rule.f_
-  prob_t operator()(const TRule& rule) const {
-    return p0(rule.f_, rule.e_, 0, 0);
-  }
-
-  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
-
-  const UnigramModel u;
-};
-
-struct PhraseConditionalBase {
-  explicit PhraseConditionalBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size) :
-      model1(m1),
-      kM1MIXTURE(m1mixture),
-      kUNIFORM_MIXTURE(1.0 - m1mixture),
-      kUNIFORM_TARGET(1.0 / vocab_e_size) {
-    assert(m1mixture >= 0.0 && m1mixture <= 1.0);
-    assert(vocab_e_size > 0);
-  }
-
-  // return p0 of rule.e_ | rule.f_
-  prob_t operator()(const TRule& rule) const {
-    return p0(rule.f_, rule.e_, 0, 0);
-  }
-
-  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
-
-  const Model1& model1;
-  const prob_t kM1MIXTURE;  // Model 1 mixture component
-  const prob_t kUNIFORM_MIXTURE; // uniform mixture component
-  const prob_t kUNIFORM_TARGET;
-};
-
-struct PhraseJointBase {
-  explicit PhraseJointBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size, const unsigned vocab_f_size) :
-      model1(m1),
-      kM1MIXTURE(m1mixture),
-      kUNIFORM_MIXTURE(1.0 - m1mixture),
-      kUNIFORM_SOURCE(1.0 / vocab_f_size),
-      kUNIFORM_TARGET(1.0 / vocab_e_size) {
-    assert(m1mixture >= 0.0 && m1mixture <= 1.0);
-    assert(vocab_e_size > 0);
-  }
-
-  // return p0 of rule.e_ , rule.f_
-  prob_t operator()(const TRule& rule) const {
-    return p0(rule.f_, rule.e_, 0, 0);
-  }
-
-  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
-
-  const Model1& model1;
-  const prob_t kM1MIXTURE;  // Model 1 mixture component
-  const prob_t kUNIFORM_MIXTURE; // uniform mixture component
-  const prob_t kUNIFORM_SOURCE;
-  const prob_t kUNIFORM_TARGET;
-};
-
-struct PhraseJointBase_BiDir {
-  explicit PhraseJointBase_BiDir(const Model1& m1,
-                                 const Model1& im1,
-                                 const double m1mixture,
-                                 const unsigned vocab_e_size,
-                                 const unsigned vocab_f_size) :
-      model1(m1),
-      invmodel1(im1),
-      kM1MIXTURE(m1mixture),
-      kUNIFORM_MIXTURE(1.0 - m1mixture),
-      kUNIFORM_SOURCE(1.0 / vocab_f_size),
-      kUNIFORM_TARGET(1.0 / vocab_e_size) {
-    assert(m1mixture >= 0.0 && m1mixture <= 1.0);
-    assert(vocab_e_size > 0);
-  }
-
-  // return p0 of rule.e_ , rule.f_
-  prob_t operator()(const TRule& rule) const {
-    return p0(rule.f_, rule.e_, 0, 0);
-  }
-
-  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
-
-  const Model1& model1;
-  const Model1& invmodel1;
-  const prob_t kM1MIXTURE;  // Model 1 mixture component
-  const prob_t kUNIFORM_MIXTURE; // uniform mixture component
-  const prob_t kUNIFORM_SOURCE;
-  const prob_t kUNIFORM_TARGET;
-};
-
-// base distribution for jump size multinomials
-// basically p(0) = 0 and then, p(1) is max, and then
-// you drop as you move to the max jump distance
-struct JumpBase {
-  JumpBase();
-
-  const prob_t& operator()(int jump, unsigned src_len) const {
-    assert(jump != 0);
-    const std::map<int, prob_t>::const_iterator it = p[src_len].find(jump);
-    assert(it != p[src_len].end());
-    return it->second;
-  }
-  std::vector<std::map<int, prob_t> > p;
-};
-
-
-#endif
diff --git a/mteval/ns.cc b/mteval/ns.cc
index 68c8deaa..da678b84 100644
--- a/mteval/ns.cc
+++ b/mteval/ns.cc
@@ -136,6 +136,10 @@ struct BleuSegmentEvaluator : public SegmentEvaluator {
                          float* correct,  // N elements reserved
                          float* hyp,      // N elements reserved
                          bool clip_counts = true) const {
+    // clear clipping stats
+    for (typename NGramCountMap::iterator it = ngrams_.begin(); it != ngrams_.end(); ++it)
+      it->second.second = 0;
+
     vector<WordID> ngram(N);
     *correct *= 0;
     *hyp *= 0;
diff --git a/mteval/ns.h b/mteval/ns.h
index 622265db..d88c263b 100644
--- a/mteval/ns.h
+++ b/mteval/ns.h
@@ -6,6 +6,7 @@
 #include <map>
 #include <boost/shared_ptr.hpp>
 #include "wordid.h"
+#include <iostream>
 
 class SufficientStats {
  public:
@@ -43,6 +44,11 @@ class SufficientStats {
   bool operator==(const SufficientStats& other) const {
     return other.fields == fields;
   }
+  bool IsAdditiveIdentity() const {
+    for (unsigned i = 0; i < fields.size(); ++i)
+      if (fields[i]) return false;
+    return true;
+  }
   size_t size() const { return fields.size(); }
   float operator[](size_t i) const {
     if (i < fields.size()) return fields[i];
@@ -54,12 +60,12 @@ class SufficientStats {
   std::vector<float> fields;
 };
 
-inline const SufficientStats& operator+(const SufficientStats& a, const SufficientStats& b) {
+inline const SufficientStats operator+(const SufficientStats& a, const SufficientStats& b) {
   SufficientStats res(a);
   return res += b;
 }
 
-inline const SufficientStats& operator-(const SufficientStats& a, const SufficientStats& b) {
+inline const SufficientStats operator-(const SufficientStats& a, const SufficientStats& b) {
   SufficientStats res(a);
   return res -= b;
 }
diff --git a/vest/ces.cc b/vest/ces.cc
index 4ae6b695..cd89aa69 100644
--- a/vest/ces.cc
+++ b/vest/ces.cc
@@ -4,25 +4,32 @@
 #include <sstream>
 #include <boost/shared_ptr.hpp>
 
-#include "aligner.h"
+// TODO, if AER is to be optimized again, we will need this
+// #include "aligner.h"
 #include "lattice.h"
 #include "viterbi_envelope.h"
 #include "error_surface.h"
+#include "ns.h"
 
 using boost::shared_ptr;
 using namespace std;
 
 const bool minimize_segments = true;    // if adjacent segments have equal scores, merge them
 
-void ComputeErrorSurface(const SentenceScorer& ss, const ViterbiEnvelope& ve, ErrorSurface* env, const ScoreType type, const Hypergraph& hg) {
+void ComputeErrorSurface(const SegmentEvaluator& ss,
+                         const ViterbiEnvelope& ve,
+                         ErrorSurface* env,
+                         const EvaluationMetric* metric,
+                         const Hypergraph& hg) {
   vector<WordID> prev_trans;
   const vector<shared_ptr<Segment> >& ienv = ve.GetSortedSegs();
   env->resize(ienv.size());
-  ScoreP prev_score;
+  SufficientStats prev_score; // defaults to 0
   int j = 0;
   for (int i = 0; i < ienv.size(); ++i) {
     const Segment& seg = *ienv[i];
     vector<WordID> trans;
+#if 0
     if (type == AER) {
       vector<bool> edges(hg.edges_.size(), false);
       seg.CollectEdgesUsed(&edges);  // get the set of edges in the viterbi
@@ -46,34 +53,31 @@ void ComputeErrorSurface(const SentenceScorer& ss, const ViterbiEnvelope& ve, Er
       string tstr = os.str();
       TD::ConvertSentence(tstr.substr(tstr.rfind(" ||| ") + 5), &trans);
     } else {
+#endif
       seg.ConstructTranslation(&trans);
-    }
-    // cerr << "Scoring: " << TD::GetString(trans) << endl;
+    //}
+    //cerr << "Scoring: " << TD::GetString(trans) << endl;
     if (trans == prev_trans) {
       if (!minimize_segments) {
-        assert(prev_score); // if this fails, it means
-	                    // the decoder can generate null translations
         ErrorSegment& out = (*env)[j];
-        out.delta = prev_score->GetZero();
+        out.delta.fields.clear();
         out.x = seg.x;
 	++j;
       }
-      // cerr << "Identical translation, skipping scoring\n";
+      //cerr << "Identical translation, skipping scoring\n";
     } else {
-      ScoreP score = ss.ScoreCandidate(trans);
+      SufficientStats score;
+      ss.Evaluate(trans, &score);
       // cerr << "score= " << score->ComputeScore() << "\n";
-      ScoreP cur_delta_p = score->GetZero();
-      Score* cur_delta = cur_delta_p.get();
-      // just record the score diffs
-      if (!prev_score)
-        prev_score = score->GetZero();
-
-      score->Subtract(*prev_score, cur_delta);
+      //string x1; score.Encode(&x1); cerr << "STATS: " << x1 << endl;
+      const SufficientStats delta = score - prev_score;
+      //string x2; delta.Encode(&x2); cerr << "DELTA: " << x2 << endl;
+      //string xx; delta.Encode(&xx); cerr << xx << endl;
       prev_trans.swap(trans);
       prev_score = score;
-      if ((!minimize_segments) || (!cur_delta->IsAdditiveIdentity())) {
+      if ((!minimize_segments) || (!delta.IsAdditiveIdentity())) {
         ErrorSegment& out = (*env)[j];
-        out.delta = cur_delta_p;
+        out.delta = delta;
         out.x = seg.x;
         ++j;
       }
diff --git a/vest/ces.h b/vest/ces.h
index 2f098990..e021e715 100644
--- a/vest/ces.h
+++ b/vest/ces.h
@@ -1,12 +1,16 @@
 #ifndef _CES_H_
 #define _CES_H_
 
-#include "scorer.h"
-
 class ViterbiEnvelope;
 class Hypergraph;
+class SegmentEvaluator;
 class ErrorSurface;
+class EvaluationMetric;
 
-void ComputeErrorSurface(const SentenceScorer& ss, const ViterbiEnvelope& ve, ErrorSurface* es, const ScoreType type, const Hypergraph& hg);
+void ComputeErrorSurface(const SegmentEvaluator& ss,
+                         const ViterbiEnvelope& ve,
+                         ErrorSurface* es,
+                         const EvaluationMetric* metric,
+                         const Hypergraph& hg);
 
 #endif
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index c382a972..8cde748b 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -364,7 +364,7 @@ while (1){
 			$mapoutput =~ s/mapinput/mapoutput/;
 			push @mapoutputs, "$dir/splag.$im1/$mapoutput";
 			$o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard";
-			my $script = "$MAPPER -s $srcFile -l $metric $refs_comma_sep < $dir/splag.$im1/$shard | sort -t \$'\\t' -k 1 > $dir/splag.$im1/$mapoutput";
+			my $script = "$MAPPER -s $srcFile -m $metric $refs_comma_sep < $dir/splag.$im1/$shard | sort -t \$'\\t' -k 1 > $dir/splag.$im1/$mapoutput";
 			if ($use_make) {
 				my $script_file = "$dir/scripts/map.$shard";
 				open F, ">$script_file" or die "Can't write $script_file: $!";
@@ -424,7 +424,7 @@ while (1){
 		print STDERR "Results for $tol/$til lines\n";
 		print STDERR "\nSORTING AND RUNNING VEST REDUCER\n";
 		print STDERR unchecked_output("date");
-		$cmd="sort -t \$'\\t' -k 1 @mapoutputs | $REDUCER -l $metric > $dir/redoutput.$im1";
+		$cmd="sort -t \$'\\t' -k 1 @mapoutputs | $REDUCER -m $metric > $dir/redoutput.$im1";
 		print STDERR "COMMAND:\n$cmd\n";
 		check_bash_call($cmd);
 		$cmd="sort -nk3 $DIR_FLAG '-t|' $dir/redoutput.$im1 | head -1";
diff --git a/vest/error_surface.cc b/vest/error_surface.cc
index 754aa8de..515b67f8 100644
--- a/vest/error_surface.cc
+++ b/vest/error_surface.cc
@@ -5,8 +5,7 @@
 
 using namespace std;
 
-ErrorSurface::~ErrorSurface() {
-}
+ErrorSurface::~ErrorSurface() {}
 
 void ErrorSurface::Serialize(std::string* out) const {
   const int segments = this->size();
@@ -15,8 +14,8 @@ void ErrorSurface::Serialize(std::string* out) const {
   for (int i = 0; i < segments; ++i) {
     const ErrorSegment& cur = (*this)[i];
     string senc;
-    cur.delta->Encode(&senc);
-    assert(senc.size() < 256);
+    cur.delta.Encode(&senc);
+    assert(senc.size() < 1024);
     unsigned char len = senc.size();
     os.write((const char*)&cur.x, sizeof(cur.x));
     os.write((const char*)&len, sizeof(len));
@@ -25,7 +24,7 @@ void ErrorSurface::Serialize(std::string* out) const {
   *out = os.str();
 }
 
-void ErrorSurface::Deserialize(ScoreType type, const std::string& in) {
+void ErrorSurface::Deserialize(const std::string& in) {
   istringstream is(in, ios::binary);
   int segments;
   is.read((char*)&segments, sizeof(segments));
@@ -37,7 +36,7 @@ void ErrorSurface::Deserialize(ScoreType type, const std::string& in) {
     is.read((char*)&len, sizeof(len));
     string senc(len, '\0'); assert(senc.size() == len);
     is.read((char*)&senc[0], len);
-    cur.delta = SentenceScorer::CreateScoreFromString(type, senc);
+    cur.delta = SufficientStats(senc);
   }
 }
 
diff --git a/vest/error_surface.h b/vest/error_surface.h
index ad728cfa..bb65847b 100644
--- a/vest/error_surface.h
+++ b/vest/error_surface.h
@@ -4,13 +4,13 @@
 #include <vector>
 #include <string>
 
-#include "scorer.h"
+#include "ns.h"
 
 class Score;
 
 struct ErrorSegment {
   double x;
-  ScoreP delta;
+  SufficientStats delta;
   ErrorSegment() : x(0), delta() {}
 };
 
@@ -18,7 +18,7 @@ class ErrorSurface : public std::vector<ErrorSegment> {
  public:
   ~ErrorSurface();
   void Serialize(std::string* out) const;
-  void Deserialize(ScoreType type, const std::string& in);
+  void Deserialize(const std::string& in);
 };
 
 #endif
diff --git a/vest/line_optimizer.cc b/vest/line_optimizer.cc
index 7303df8d..49443fbe 100644
--- a/vest/line_optimizer.cc
+++ b/vest/line_optimizer.cc
@@ -4,7 +4,7 @@
 #include <algorithm>
 
 #include "sparse_vector.h"
-#include "scorer.h"
+#include "ns.h"
 
 using namespace std;
 
@@ -18,6 +18,7 @@ struct IntervalComp {
 };
 
 double LineOptimizer::LineOptimize(
+    const EvaluationMetric* metric,
     const vector<ErrorSurface>& surfaces,
     const LineOptimizer::ScoreType type,
     float* best_score,
@@ -32,8 +33,7 @@ double LineOptimizer::LineOptimize(
   }
   sort(all_ints.begin(), all_ints.end(), IntervalComp());
   double last_boundary = all_ints.front()->x;
-  ScoreP accp = all_ints.front()->delta->GetZero();
-  Score *acc=accp.get();
+  SufficientStats acc;
   float& cur_best_score = *best_score;
   cur_best_score = (type == MAXIMIZE_SCORE ?
     -numeric_limits<float>::max() : numeric_limits<float>::max());
@@ -42,9 +42,8 @@ double LineOptimizer::LineOptimize(
   for (vector<ErrorIter>::iterator i = all_ints.begin();
        i != all_ints.end(); ++i) {
     const ErrorSegment& seg = **i;
-    assert(seg.delta);
     if (seg.x - last_boundary > epsilon) {
-      float sco = acc->ComputeScore();
+      float sco = metric->ComputeScore(acc);
       if ((type == MAXIMIZE_SCORE && sco > cur_best_score) ||
           (type == MINIMIZE_SCORE && sco < cur_best_score) ) {
         cur_best_score = sco;
@@ -54,16 +53,18 @@ double LineOptimizer::LineOptimize(
 	} else {
 	  pos = last_boundary + (seg.x - last_boundary) / 2;
 	}
-	// cerr << "NEW BEST: " << pos << "  (score=" << cur_best_score << ")\n";
+	//cerr << "NEW BEST: " << pos << "  (score=" << cur_best_score << ")\n";
       }
-      // string xx; acc->ScoreDetails(&xx); cerr << "---- " << xx;
+      // string xx = metric->DetailedScore(acc); cerr << "---- " << xx;
       // cerr << "---- s=" << sco << "\n";
       last_boundary = seg.x;
     }
     // cerr << "x-boundary=" << seg.x << "\n";
-    acc->PlusEquals(*seg.delta);
+    //string x2; acc.Encode(&x2); cerr << "   ACC: " << x2 << endl;
+    //string x1; seg.delta.Encode(&x1); cerr << " DELTA: " << x1 << endl;
+    acc += seg.delta;
   }
-  float sco = acc->ComputeScore();
+  float sco = metric->ComputeScore(acc);
   if ((type == MAXIMIZE_SCORE && sco > cur_best_score) ||
       (type == MINIMIZE_SCORE && sco < cur_best_score) ) {
     cur_best_score = sco;
@@ -107,3 +108,4 @@ void LineOptimizer::CreateOptimizationDirections(
      RandomUnitVector(features_to_optimize, &out[i], rng);
   cerr << "Generated " << out.size() << " total axes to optimize along.\n";
 }
+
diff --git a/vest/line_optimizer.h b/vest/line_optimizer.h
index 99a591f4..83819f41 100644
--- a/vest/line_optimizer.h
+++ b/vest/line_optimizer.h
@@ -7,6 +7,7 @@
 #include "error_surface.h"
 #include "sampler.h"
 
+class EvaluationMetric;
 class Weights;
 
 struct LineOptimizer {
@@ -18,6 +19,7 @@ struct LineOptimizer {
   // merge all the error surfaces together into a global
   // error surface and find (the middle of) the best segment
   static double LineOptimize(
+     const EvaluationMetric* metric,
      const std::vector<ErrorSurface>& envs,
      const LineOptimizer::ScoreType type,
      float* best_score,
diff --git a/vest/lo_test.cc b/vest/lo_test.cc
index f5638600..a67f65e1 100644
--- a/vest/lo_test.cc
+++ b/vest/lo_test.cc
@@ -5,6 +5,8 @@
 #include <boost/shared_ptr.hpp>
 #include <gtest/gtest.h>
 
+#include "ns.h"
+#include "ns_docscorer.h"
 #include "ces.h"
 #include "fdict.h"
 #include "hg.h"
@@ -15,7 +17,6 @@
 #include "viterbi.h"
 #include "viterbi_envelope.h"
 #include "line_optimizer.h"
-#include "scorer.h"
 
 using namespace std;
 using boost::shared_ptr;
@@ -141,9 +142,6 @@ TEST_F(OptTest, TestS1) {
   TD::ConvertSentence(ref22, &refs2[1]);
   TD::ConvertSentence(ref32, &refs2[2]);
   TD::ConvertSentence(ref42, &refs2[3]);
-  ScoreType type = ScoreTypeFromString("ibm_bleu");
-  ScorerP scorer1 = SentenceScorer::CreateSentenceScorer(type, refs1);
-  ScorerP scorer2 = SentenceScorer::CreateSentenceScorer(type, refs2);
   vector<ViterbiEnvelope> envs(2);
 
   RandomNumberGenerator<boost::mt19937> rng;
@@ -167,14 +165,17 @@ TEST_F(OptTest, TestS1) {
   envs[1] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg2, NULL, wf);
 
   vector<ErrorSurface> es(2);
-  ComputeErrorSurface(*scorer1, envs[0], &es[0], IBM_BLEU, hg);
-  ComputeErrorSurface(*scorer2, envs[1], &es[1], IBM_BLEU, hg2);
+  EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU");
+  boost::shared_ptr<SegmentEvaluator> scorer1 = metric->CreateSegmentEvaluator(refs1);
+  boost::shared_ptr<SegmentEvaluator> scorer2 = metric->CreateSegmentEvaluator(refs2);
+  ComputeErrorSurface(*scorer1, envs[0], &es[0], metric, hg);
+  ComputeErrorSurface(*scorer2, envs[1], &es[1], metric, hg2);
   cerr << envs[0].size() << " " << envs[1].size() << endl;
   cerr << es[0].size() << " " << es[1].size() << endl;
   envs.clear();
   clock_t t_env=clock();
   float score;
-  double m = LineOptimizer::LineOptimize(es, LineOptimizer::MAXIMIZE_SCORE, &score);
+  double m = LineOptimizer::LineOptimize(metric,es, LineOptimizer::MAXIMIZE_SCORE, &score);
   clock_t t_opt=clock();
   cerr << "line optimizer returned: " << m << " (SCORE=" << score << ")\n";
   EXPECT_FLOAT_EQ(0.48719698, score);
@@ -217,15 +218,15 @@ TEST_F(OptTest,TestZeroOrigin) {
   vector<ViterbiEnvelope> envs(1);
   envs[0] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
 
-  ScoreType type = ScoreTypeFromString("ibm_bleu");
   vector<vector<WordID> > mr(4);
   TD::ConvertSentence("untitled", &mr[0]);
   TD::ConvertSentence("with no title", &mr[1]);
   TD::ConvertSentence("without a title", &mr[2]);
   TD::ConvertSentence("without title", &mr[3]);
-  ScorerP scorer1 = SentenceScorer::CreateSentenceScorer(type, mr);
+  EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU");
+  boost::shared_ptr<SegmentEvaluator> scorer1 = metric->CreateSegmentEvaluator(mr);
   vector<ErrorSurface> es(1);
-  ComputeErrorSurface(*scorer1, envs[0], &es[0], IBM_BLEU, hg);
+  ComputeErrorSurface(*scorer1, envs[0], &es[0], metric, hg);
 }
 
 int main(int argc, char **argv) {
diff --git a/vest/mr_vest_map.cc b/vest/mr_vest_map.cc
index 71dda6d7..8f6e085d 100644
--- a/vest/mr_vest_map.cc
+++ b/vest/mr_vest_map.cc
@@ -6,11 +6,12 @@
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 
+#include "ns.h"
+#include "ns_docscorer.h"
 #include "ces.h"
 #include "filelib.h"
 #include "stringlib.h"
 #include "sparse_vector.h"
-#include "scorer.h"
 #include "viterbi_envelope.h"
 #include "inside_outside.h"
 #include "error_surface.h"
@@ -25,7 +26,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   opts.add_options()
         ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation (tokenized text)")
         ("source,s",po::value<string>(), "Source file (ignored, except for AER)")
-        ("loss_function,l",po::value<string>()->default_value("ibm_bleu"), "Loss function being optimized")
+        ("evaluation_metric,m",po::value<string>()->default_value("ibm_bleu"), "Evaluation metric being optimized")
         ("input,i",po::value<string>()->default_value("-"), "Input file to map (- is STDIN)")
         ("help,h", "Help");
   po::options_description dcmdline_options;
@@ -67,10 +68,10 @@ bool ReadSparseVectorString(const string& s, SparseVector<double>* v) {
 int main(int argc, char** argv) {
   po::variables_map conf;
   InitCommandLine(argc, argv, &conf);
-  const string loss_function = conf["loss_function"].as<string>();
-  ScoreType type = ScoreTypeFromString(loss_function);
-  DocScorer ds(type, conf["reference"].as<vector<string> >(), conf["source"].as<string>());
-  cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl;
+  const string evaluation_metric = conf["evaluation_metric"].as<string>();
+  EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric);
+  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
+  cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl;
   Hypergraph hg;
   string last_file;
   ReadFile in_read(conf["input"].as<string>());
@@ -97,7 +98,8 @@ int main(int argc, char** argv) {
     ViterbiEnvelopeWeightFunction wf(origin, axis);
     ViterbiEnvelope ve = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
     ErrorSurface es;
-    ComputeErrorSurface(*ds[sent_id], ve, &es, type, hg);
+
+    ComputeErrorSurface(*ds[sent_id], ve, &es, metric, hg);
     //cerr << "Viterbi envelope has " << ve.size() << " segments\n";
     // cerr << "Error surface has " << es.size() << " segments\n";
     string val;
diff --git a/vest/mr_vest_reduce.cc b/vest/mr_vest_reduce.cc
index 3df52020..dda61f88 100644
--- a/vest/mr_vest_reduce.cc
+++ b/vest/mr_vest_reduce.cc
@@ -10,6 +10,7 @@
 #include "error_surface.h"
 #include "line_optimizer.h"
 #include "b64tools.h"
+#include "stringlib.h"
 
 using namespace std;
 namespace po = boost::program_options;
@@ -17,12 +18,12 @@ namespace po = boost::program_options;
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
-        ("loss_function,l",po::value<string>(), "Loss function being optimized")
+        ("evaluation_metric,m",po::value<string>(), "Evaluation metric (IBM_BLEU, etc.)")
         ("help,h", "Help");
   po::options_description dcmdline_options;
   dcmdline_options.add(opts);
   po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  bool flag = conf->count("loss_function") == 0;
+  bool flag = conf->count("evaluation_metric") == 0;
   if (flag || conf->count("help")) {
     cerr << dcmdline_options << endl;
     exit(1);
@@ -32,30 +33,27 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
 int main(int argc, char** argv) {
   po::variables_map conf;
   InitCommandLine(argc, argv, &conf);
-  const string loss_function = conf["loss_function"].as<string>();
-  ScoreType type = ScoreTypeFromString(loss_function);
+  const string evaluation_metric = conf["evaluation_metric"].as<string>();
   LineOptimizer::ScoreType opt_type = LineOptimizer::MAXIMIZE_SCORE;
-  if (type == TER || type == AER) {
+  if (UppercaseString(evaluation_metric) == "TER")
     opt_type = LineOptimizer::MINIMIZE_SCORE;
-  }
-  string last_key;
+  EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric);
+
   vector<ErrorSurface> esv;
-  while(cin) {
-    string line;
-    getline(cin, line);
-    if (line.empty()) continue;
+  string last_key, line, key, val;
+  while(getline(cin, line)) {
     size_t ks = line.find("\t");
     assert(string::npos != ks);
     assert(ks > 2);
-    string key = line.substr(2, ks - 2);
-    string val = line.substr(ks + 1);
+    key = line.substr(2, ks - 2);
+    val = line.substr(ks + 1);
     if (key != last_key) {
       if (!last_key.empty()) {
 	float score;
-        double x = LineOptimizer::LineOptimize(esv, opt_type, &score);
+        double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score);
 	cout << last_key << "|" << x << "|" << score << endl;
       }
-      last_key = key;
+      last_key.swap(key);
       esv.clear();
     }
     if (val.size() % 4 != 0) {
@@ -68,13 +66,11 @@ int main(int argc, char** argv) {
       continue;
     }
     esv.push_back(ErrorSurface());
-    esv.back().Deserialize(type, encoded);
+    esv.back().Deserialize(encoded);
   }
   if (!esv.empty()) {
-    // cerr << "ESV=" << esv.size() << endl;
-    // for (int i = 0; i < esv.size(); ++i) { cerr << esv[i].size() << endl; }
     float score;
-    double x = LineOptimizer::LineOptimize(esv, opt_type, &score);
+    double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score);
     cout << last_key << "|" << x << "|" << score << endl;
   }
   return 0;
-- 
cgit v1.2.3


From 3d17bf9ae1ba67cd091794839d4d5f4c393a0e2c Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Fri, 27 Jan 2012 13:19:27 -0500
Subject: migration to new metric api for vest, clean up of unsupported/not
 functional code

---
 mteval/mbr_kbest.cc                   |  21 +-
 utils/fast_sparse_vector.h            |   6 +
 vest/dist-vest.pl                     |  22 +--
 vest/mbr_kbest.cc                     | 138 -------------
 vest/mr_vest_generate_mapper_input.cc | 356 ++++++----------------------------
 vest/mr_vest_map.cc                   |  16 +-
 6 files changed, 84 insertions(+), 475 deletions(-)
 delete mode 100644 vest/mbr_kbest.cc

(limited to 'vest')

diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc
index 64a6a8bf..b5e4750c 100644
--- a/mteval/mbr_kbest.cc
+++ b/mteval/mbr_kbest.cc
@@ -5,7 +5,7 @@
 
 #include "prob.h"
 #include "tdict.h"
-#include "scorer.h"
+#include "ns.h"
 #include "filelib.h"
 #include "stringlib.h"
 
@@ -17,7 +17,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
         ("scale,a",po::value<double>()->default_value(1.0), "Posterior scaling factor (alpha)")
-        ("loss_function,l",po::value<string>()->default_value("bleu"), "Loss function")
+        ("evaluation_metric,m",po::value<string>()->default_value("ibm_bleu"), "Evaluation metric")
         ("input,i",po::value<string>()->default_value("-"), "File to read k-best lists from")
         ("output_list,L", "Show reranked list as output")
         ("help,h", "Help");
@@ -75,13 +75,14 @@ bool ReadKBestList(istream* in, string* sent_id, vector<pair<vector<WordID>, pro
 int main(int argc, char** argv) {
   po::variables_map conf;
   InitCommandLine(argc, argv, &conf);
-  const string metric = conf["loss_function"].as<string>();
+  const string smetric = conf["evaluation_metric"].as<string>();
+  EvaluationMetric* metric = EvaluationMetric::Instance(smetric);
+  const bool is_loss = (UppercaseString(smetric) == "TER");
   const bool output_list = conf.count("output_list") > 0;
   const string file = conf["input"].as<string>();
   const double mbr_scale = conf["scale"].as<double>();
   cerr << "Posterior scaling factor (alpha) = " << mbr_scale << endl;
 
-  ScoreType type = ScoreTypeFromString(metric);
   vector<pair<vector<WordID>, prob_t> > list;
   ReadFile rf(file);
   string sent_id;
@@ -99,15 +100,15 @@ int main(int argc, char** argv) {
     vector<double> mbr_scores(output_list ? list.size() : 0);
     double mbr_loss = numeric_limits<double>::max();
     for (int i = 0 ; i < list.size(); ++i) {
-      vector<vector<WordID> > refs(1, list[i].first);
-      //cerr << i << ": " << list[i].second <<"\t" << TD::GetString(list[i].first) << endl;
-      ScorerP scorer = SentenceScorer::CreateSentenceScorer(type, refs);
+      const vector<vector<WordID> > refs(1, list[i].first);
+
       double wl_acc = 0;
       for (int j = 0; j < list.size(); ++j) {
         if (i != j) {
-          ScoreP s = scorer->ScoreCandidate(list[j].first);
-          double loss = 1.0 - s->ComputeScore();
-          if (type == TER || type == AER) loss = 1.0 - loss;
+          SufficientStats ss;
+          metric->ComputeSufficientStatistics(list[j].first, refs, &ss);
+          double loss = 1.0 - metric->ComputeScore(ss);
+          if (is_loss) loss = 1.0 - loss;
           double weighted_loss = loss * (joints[j] / marginal).as_float();
           wl_acc += weighted_loss;
           if ((!output_list) && wl_acc > mbr_loss) break;
diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h
index 1301581a..17fa47bf 100644
--- a/utils/fast_sparse_vector.h
+++ b/utils/fast_sparse_vector.h
@@ -178,6 +178,12 @@ class FastSparseVector {
   T l2norm() const {
     return sqrt(l2norm_sq());
   }
+  T pnorm(const double p) const {
+    T sum = T();
+    for (const_iterator it = begin(), e = end(); it != e; ++it)
+      sum += pow(fabs(it->second), p);
+    return pow(sum, 1.0 / p);
+  }
   // if values are binary, gives |A intersect B|/|A union B|
   template<typename S>
   S tanimoto_coef(const FastSparseVector<S> &vec) const {
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index 8cde748b..1ec8c6b1 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -65,8 +65,6 @@ my $oraclen=0;
 my $oracleb=20;
 my $bleu_weight=1;
 my $use_make = 1;  # use make to parallelize line search
-my $dirargs='';
-my $density_prune;
 my $useqsub;
 my $pass_suffix = '';
 my $cpbin=1;
@@ -75,7 +73,6 @@ Getopt::Long::Configure("no_auto_abbrev");
 if (GetOptions(
 	"decoder=s" => \$decoderOpt,
 	"jobs=i" => \$jobs,
-	"density-prune=f" => \$density_prune,
 	"dont-clean" => \$disable_clean,
 	"pass-suffix=s" => \$pass_suffix,
 	"dry-run" => \$dryrun,
@@ -87,15 +84,7 @@ if (GetOptions(
 	"normalize=s" => \$normalize,
 	"pmem=s" => \$pmem,
         "cpbin!" => \$cpbin,
-	"rand-directions=i" => \$rand_directions,
-	"random_directions=i" => \$rand_directions,
-        "bleu_weight=s" => \$bleu_weight,
-        "no-primary!" => \$noprimary,
-        "max-similarity=s" => \$maxsim,
-        "oracle-directions=i" => \$oraclen,
-        "n-oracle=i" => \$oraclen,
-        "oracle-batch=i" => \$oracleb,
-        "directions-args=s" => \$dirargs,
+	"random-directions=i" => \$rand_directions,
 	"ref-files=s" => \$refFiles,
 	"metric=s" => \$metric,
 	"source-file=s" => \$srcFile,
@@ -107,10 +96,6 @@ if (GetOptions(
 	exit;
 }
 
-if (defined $density_prune) {
-  die "--density_prune n: n must be greater than 1.0\n" unless $density_prune > 1.0;
-}
-
 if ($useqsub) {
   $use_make = 0;
   die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub();
@@ -328,10 +313,7 @@ while (1){
 		print STDERR "\nGENERATE OPTIMIZATION STRATEGY (OPT-ITERATION $opt_iter/$optimization_iters)\n";
 		print STDERR unchecked_output("date");
 		$icc++;
-		my $nop=$noprimary?"--no_primary":"";
-		my $targs=$oraclen ? "--decoder_translations='$runFile.gz' ".get_comma_sep_refs('-references',$refFiles):"";
-		my $bwargs=$bleu_weight!=1 ? "--bleu_weight=$bleu_weight":"";
-		$cmd="$MAPINPUT -w $inweights -r $dir/hgs $bwargs -s $devSize -d $rand_directions --max_similarity=$maxsim --oracle_directions=$oraclen --oracle_batch=$oracleb $targs $dirargs > $dir/agenda.$im1-$opt_iter";
+		$cmd="$MAPINPUT -w $inweights -r $dir/hgs -s $devSize -d $rand_directions > $dir/agenda.$im1-$opt_iter";
 		print STDERR "COMMAND:\n$cmd\n";
 		check_call($cmd);
 		check_call("mkdir -p $dir/splag.$im1");
diff --git a/vest/mbr_kbest.cc b/vest/mbr_kbest.cc
deleted file mode 100644
index 2867b36b..00000000
--- a/vest/mbr_kbest.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-#include <iostream>
-#include <vector>
-
-#include <boost/program_options.hpp>
-
-#include "prob.h"
-#include "tdict.h"
-#include "scorer.h"
-#include "filelib.h"
-#include "stringlib.h"
-
-using namespace std;
-
-namespace po = boost::program_options;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("scale,a",po::value<double>()->default_value(1.0), "Posterior scaling factor (alpha)")
-        ("loss_function,l",po::value<string>()->default_value("bleu"), "Loss function")
-        ("input,i",po::value<string>()->default_value("-"), "File to read k-best lists from")
-        ("output_list,L", "Show reranked list as output")
-        ("help,h", "Help");
-  po::options_description dcmdline_options;
-  dcmdline_options.add(opts);
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  bool flag = false;
-  if (flag || conf->count("help")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-struct LossComparer {
-  bool operator()(const pair<vector<WordID>, double>& a, const pair<vector<WordID>, double>& b) const {
-    return a.second < b.second;
-  }
-};
-
-bool ReadKBestList(istream* in, string* sent_id, vector<pair<vector<WordID>, prob_t> >* list) {
-  static string cache_id;
-  static pair<vector<WordID>, prob_t> cache_pair;
-  list->clear();
-  string cur_id;
-  if (cache_pair.first.size() > 0) {
-    list->push_back(cache_pair);
-    cur_id = cache_id;
-    cache_pair.first.clear();
-  }
-  string line;
-  string tstr;
-  while(*in) {
-    getline(*in, line);
-    if (line.empty()) continue;
-    size_t p1 = line.find(" ||| ");
-    if (p1 == string::npos) { cerr << "Bad format: " << line << endl; abort(); }
-    size_t p2 = line.find(" ||| ", p1 + 4);
-    if (p2 == string::npos) { cerr << "Bad format: " << line << endl; abort(); }
-    size_t p3 = line.rfind(" ||| ");
-    cache_id = line.substr(0, p1);
-    tstr = line.substr(p1 + 5, p2 - p1 - 5);
-    double val = strtod(line.substr(p3 + 5).c_str(), NULL);
-    TD::ConvertSentence(tstr, &cache_pair.first);
-    cache_pair.second.logeq(val);
-    if (cur_id.empty()) cur_id = cache_id;
-    if (cur_id == cache_id) {
-      list->push_back(cache_pair);
-      *sent_id = cur_id;
-      cache_pair.first.clear();
-    } else { break; }
-  }
-  return !list->empty();
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-  const string metric = conf["loss_function"].as<string>();
-  const bool output_list = conf.count("output_list") > 0;
-  const string file = conf["input"].as<string>();
-  const double mbr_scale = conf["scale"].as<double>();
-  cerr << "Posterior scaling factor (alpha) = " << mbr_scale << endl;
-
-  ScoreType type = ScoreTypeFromString(metric);
-  vector<pair<vector<WordID>, prob_t> > list;
-  ReadFile rf(file);
-  string sent_id;
-  while(ReadKBestList(rf.stream(), &sent_id, &list)) {
-    vector<prob_t> joints(list.size());
-    const prob_t max_score = pow(list.front().second, mbr_scale);
-    prob_t marginal = prob_t::Zero();
-    for (int i = 0 ; i < list.size(); ++i) {
-      const prob_t joint = pow(list[i].second, mbr_scale) / max_score;
-      joints[i] = joint;
-      // cerr << "list[" << i << "] joint=" << log(joint) << endl;
-      marginal += joint;
-    }
-    int mbr_idx = -1;
-    vector<double> mbr_scores(output_list ? list.size() : 0);
-    double mbr_loss = numeric_limits<double>::max();
-    for (int i = 0 ; i < list.size(); ++i) {
-      vector<vector<WordID> > refs(1, list[i].first);
-      //cerr << i << ": " << list[i].second <<"\t" << TD::GetString(list[i].first) << endl;
-      ScorerP scorer = SentenceScorer::CreateSentenceScorer(type, refs);
-      double wl_acc = 0;
-      for (int j = 0; j < list.size(); ++j) {
-        if (i != j) {
-          ScoreP s = scorer->ScoreCandidate(list[j].first);
-          double loss = 1.0 - s->ComputeScore();
-          if (type == TER || type == AER) loss = 1.0 - loss;
-          double weighted_loss = loss * (joints[j] / marginal);
-          wl_acc += weighted_loss;
-          if ((!output_list) && wl_acc > mbr_loss) break;
-        }
-      }
-      if (output_list) mbr_scores[i] = wl_acc;
-      if (wl_acc < mbr_loss) {
-        mbr_loss = wl_acc;
-        mbr_idx = i;
-      }
-    }
-    // cerr << "ML translation: " << TD::GetString(list[0].first) << endl;
-    cerr << "MBR Best idx: " << mbr_idx << endl;
-    if (output_list) {
-      for (int i = 0; i < list.size(); ++i)
-        list[i].second.logeq(mbr_scores[i]);
-      sort(list.begin(), list.end(), LossComparer());
-      for (int i = 0; i < list.size(); ++i)
-        cout << sent_id << " ||| "
-             << TD::GetString(list[i].first) << " ||| "
-             << log(list[i].second) << endl;
-    } else {
-      cout << TD::GetString(list[mbr_idx].first) << endl;
-    }
-  }
-  return 0;
-}
-
diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc
index 0c094fd5..59d4f24f 100644
--- a/vest/mr_vest_generate_mapper_input.cc
+++ b/vest/mr_vest_generate_mapper_input.cc
@@ -1,320 +1,78 @@
-//TODO: debug segfault when references supplied, null shared_ptr when oracle
 #include <iostream>
 #include <vector>
-#include <sstream>
 
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 
-#include "sampler.h"
 #include "filelib.h"
 #include "weights.h"
 #include "line_optimizer.h"
-#include "hg.h"
-#include "hg_io.h"
-#include "scorer.h"
-#include "oracle_bleu.h"
-#include "ff_bleu.h"
-
-const bool DEBUG_ORACLE=true;
-
-//TODO: decide on cdec_ff ffs, or just bleumodel - if just bleumodel, then do existing features on serialized hypergraphs remain?  weights (origin) is passed to oracle_bleu.h:ComputeOracle
-//void register_feature_functions();
-//FFRegistry ff_registry;
-namespace {
-void init_bleumodel() {
-  ff_registry.clear();
-  ff_registry.Register(new FFFactory<BLEUModel>);
-}
-
-struct init_ff {
-  init_ff() {
-    init_bleumodel();
-  }
-};
-//init_ff reg; // order of initialization?  ff_registry may not be init yet.  call in Run() instead.
-}
 
 using namespace std;
 namespace po = boost::program_options;
 
-typedef SparseVector<double> Dir;
-typedef Dir Point;
-
-void compress_similar(vector<Dir> &dirs,double min_dist,ostream *log=&cerr,bool avg=true,bool verbose=true) {
-  //  return; //TODO: debug
-  if (min_dist<=0) return;
-  double max_s=1.-min_dist;
-  if (log&&verbose) *log<<"max allowed S="<<max_s<<endl;
-  unsigned N=dirs.size();
-  for (int i=0;i<N;++i) {
-    for (int j=i+1;j<N;++j) {
-      double s=dirs[i].tanimoto_coef(dirs[j]);
-      if (log&&verbose) *log<<"S["<<i<<","<<j<<"]="<<s<<' ';
-      if (s>max_s) {
-        if (log) *log << "Collapsing similar directions (T="<<s<<" > "<<max_s<<").  dirs["<<i<<"]="<<dirs[i]<<" dirs["<<j<<"]"<<endl;
-        if (avg) {
-          dirs[i]+=dirs[j];
-          dirs[i]/=2.;
-          if (log) *log<<" averaged="<<dirs[i];
-        }
-        if (log) *log<<endl;
-        swap(dirs[j],dirs[--N]);
-      }
-    }
-    if (log&&verbose) *log<<endl;
-
-  }
-  dirs.resize(N);
-}
-
-struct oracle_directions {
-  MT19937 rng;
-  OracleBleu oracle;
-  vector<Dir> directions;
-
-  bool start_random;
-  bool include_primary;
-  bool old_to_hope;
-  bool fear_to_hope;
-  unsigned n_random;
-  void AddPrimaryAndRandomDirections() {
-    LineOptimizer::CreateOptimizationDirections(
-      fids,n_random,&rng,&directions,include_primary);
-  }
-
-  void Print() {
-    for (int i = 0; i < dev_set_size; ++i)
-      for (int j = 0; j < directions.size(); ++j) {
-        cout << forest_file(i) <<" " << i<<" ";
-        print(cout,origin,"=",";");
-        cout<<" ";
-        print(cout,directions[j],"=",";");
-        cout<<"\n";
-      }
-  }
-
-  void AddOptions(po::options_description *opts) {
-    oracle.AddOptions(opts);
-    opts->add_options()
-      ("dev_set_size,s",po::value<unsigned>(&dev_set_size),"[REQD] Development set size (# of parallel sentences)")
-      ("forest_repository,r",po::value<string>(&forest_repository),"[REQD] Path to forest repository")
-      ("weights,w",po::value<string>(&weights_file),"[REQD] Current feature weights file")
-      ("optimize_feature,o",po::value<vector<string> >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)")
-      ("random_directions,d",po::value<unsigned>(&n_random)->default_value(10),"Number of random directions to run the line optimizer in")
-      ("no_primary,n","don't use the primary (orthogonal each feature alone) directions")
-      ("oracle_directions,O",po::value<unsigned>(&n_oracle)->default_value(0),"read the forests and choose this many directions based on heading toward a hope max (bleu+modelscore) translation.")
-      ("oracle_start_random",po::bool_switch(&start_random),"sample random subsets of dev set for ALL oracle directions, not just those after a sequential run through it")
-      ("oracle_batch,b",po::value<unsigned>(&oracle_batch)->default_value(10),"to produce each oracle direction, sum the 'gradient' over this many sentences")
-      ("max_similarity,m",po::value<double>(&max_similarity)->default_value(0),"remove directions that are too similar (Tanimoto coeff. less than (1-this)).  0 means don't filter, 1 means only 1 direction allowed?")
-      ("fear_to_hope,f",po::bool_switch(&fear_to_hope),"for each of the oracle_directions, also include a direction from fear to hope (as well as origin to hope)")
-      ("no_old_to_hope","don't emit the usual old -> hope oracle")
-      ("decoder_translations",po::value<string>(&decoder_translations_file)->default_value(""),"one per line decoder 1best translations for computing document BLEU vs. sentences-seen-so-far BLEU")
-      ;
-  }
-  void InitCommandLine(int argc, char *argv[], po::variables_map *conf) {
-    po::options_description opts("Configuration options");
-    AddOptions(&opts);
-    opts.add_options()("help,h", "Help");
-
-    po::options_description dcmdline_options;
-    dcmdline_options.add(opts);
-    po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-    po::notify(*conf);
-    if (conf->count("dev_set_size") == 0) {
-      cerr << "Please specify the size of the development set using -s N\n";
-      goto bad_cmdline;
-    }
-    if (conf->count("weights") == 0) {
-      cerr << "Please specify the starting-point weights using -w <weightfile.txt>\n";
-      goto bad_cmdline;
-    }
-    if (conf->count("forest_repository") == 0) {
-      cerr << "Please specify the forest repository location using -r <DIR>\n";
-      goto bad_cmdline;
-    }
-    if (n_oracle && oracle.refs.empty()) {
-      cerr<<"Specify references when using oracle directions\n";
-      goto bad_cmdline;
-    }
-    if (conf->count("help")) {
-      cout << dcmdline_options << endl;
-      exit(0);
-    }
-
-    return;
-    bad_cmdline:
-      cerr << dcmdline_options << endl;
-      exit(1);
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("dev_set_size,s",po::value<unsigned>(),"[REQD] Development set size (# of parallel sentences)")
+        ("forest_repository,r",po::value<string>(),"[REQD] Path to forest repository")
+        ("weights,w",po::value<string>(),"[REQD] Current feature weights file")
+        ("optimize_feature,o",po::value<vector<string> >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)")
+        ("random_directions,d",po::value<unsigned int>()->default_value(20),"Number of random directions to run the line optimizer in")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = false;
+  if (conf->count("dev_set_size") == 0) {
+    cerr << "Please specify the size of the development set using -d N\n";
+    flag = true;
   }
-
-  int main(int argc, char *argv[]) {
-    po::variables_map conf;
-    InitCommandLine(argc,argv,&conf);
-    init_bleumodel();
-    UseConf(conf);
-    Run();
-    return 0;
+  if (conf->count("weights") == 0) {
+    cerr << "Please specify the starting-point weights using -w <weightfile.txt>\n";
+    flag = true;
   }
-  bool verbose() const { return oracle.verbose; }
-  void Run() {
-//    register_feature_functions();
-    AddPrimaryAndRandomDirections();
-    AddOracleDirections();
-    compress_similar(directions,max_similarity,&cerr,true,verbose());
-    Print();
+  if (conf->count("forest_repository") == 0) {
+    cerr << "Please specify the forest repository location using -r <DIR>\n";
+    flag = true;
   }
-
-
-  Point origin; // old weights that gave model 1best.
-  vector<string> optimize_features;
-  void UseConf(po::variables_map const& conf) {
-    oracle.UseConf(conf);
-    include_primary=!conf.count("no_primary");
-    old_to_hope=!conf.count("no_old_to_hope");
-
-    if (conf.count("optimize_feature") > 0)
-      optimize_features=conf["optimize_feature"].as<vector<string> >();
-    Init();
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
   }
+}
 
-  string weights_file;
-  double max_similarity;
-  unsigned n_oracle, oracle_batch;
-  string forest_repository;
-  unsigned dev_set_size;
-  vector<Oracle> oracles;
-  vector<int> fids;
-  string forest_file(unsigned i) const {
-    ostringstream o;
-    o << forest_repository << '/' << i << ".json.gz";
-    return o.str();
-  }
-
-  oracle_directions() { }
-
-  Sentences model_hyps;
-
-  vector<ScoreP> model_scores;
-  bool have_doc;
-  void Init() {
-    have_doc=!decoder_translations_file.empty();
-    if (have_doc) {
-      model_hyps.Load(decoder_translations_file);
-      if (verbose()) model_hyps.Print(cerr,5);
-      model_scores.resize(model_hyps.size());
-      if (dev_set_size!=model_hyps.size()) {
-        cerr<<"You supplied decoder_translations with a different number of lines ("<<model_hyps.size()<<") than dev_set_size ("<<dev_set_size<<")"<<endl;
-        abort();
-      }
-      cerr << "Scoring model translations " << model_hyps << endl;
-      for (int i=0;i<model_hyps.size();++i) {
-        //TODO: what is scoreCcand? without clipping? do without for consistency w/ oracle
-        model_scores[i]=oracle.ds[i]->ScoreCandidate(model_hyps[i]);
-        assert(model_scores[i]);
-        if (verbose()) cerr<<"Before model["<<i<<"]: "<<ds().ScoreDetails()<<endl;
-        if (verbose()) cerr<<"model["<<i<<"]: "<<model_scores[i]->ScoreDetails()<<endl;
-        oracle.doc_score->PlusEquals(*model_scores[i]);
-        if (verbose()) cerr<<"After model["<<i<<"]: "<<ds().ScoreDetails()<<endl;
-      }
-      //TODO: compute doc bleu stats for each sentence, then when getting oracle temporarily exclude stats for that sentence (skip regular score updating)
-    }
-    start_random=false;
-    cerr << "Forest repo: " << forest_repository << endl;
-    assert(DirectoryExists(forest_repository));
-    vector<string> features;
-    vector<weight_t> dorigin;
-    Weights::InitFromFile(weights_file, &dorigin, &features);
-    if (optimize_features.size())
-      features=optimize_features;
-    Weights::InitSparseVector(dorigin, &origin);
-    fids.clear();
-    AddFeatureIds(features);
-    oracles.resize(dev_set_size);
-  }
-
-  void AddFeatureIds(vector<string> const& features) {
-    int i = fids.size();
-    fids.resize(fids.size()+features.size());
-    for (; i < features.size(); ++i)
-      fids[i] = FD::Convert(features[i]);
- }
-
-
-  std::string decoder_translations_file; // one per line
-  //TODO: is it worthwhile to get a complete document bleu first?  would take a list of 1best translations one per line from the decoders, rather than loading all the forests (expensive).  translations are in run.raw.N.gz - new arg
-  void adjust_doc(unsigned i,double scale=1.) {
-    oracle.doc_score->PlusEquals(*model_scores[i],scale);
-  }
-
-  Score &ds() {
-    return *oracle.doc_score;
-  }
-
-  Oracle const& ComputeOracle(unsigned i) {
-    Oracle &o=oracles[i];
-    if (o.is_null()) {
-      if (have_doc) {
-        if (verbose()) cerr<<"Before removing i="<<i<<" "<<ds().ScoreDetails()<<"\n";
-        adjust_doc(i,-1);
-      }
-      ReadFile rf(forest_file(i));
-      Hypergraph hg;
-      {
-        Timer t("Loading forest from JSON "+forest_file(i));
-        HypergraphIO::ReadFromJSON(rf.stream(), &hg);
-      }
-      if (verbose()) cerr<<"Before oracle["<<i<<"]: "<<ds().ScoreDetails()<<endl;
-      o=oracle.ComputeOracle(oracle.MakeMetadata(hg,i),&hg,origin);
-      if (verbose()) {
-        cerr << o;
-        ScoreP hopesc=oracle.GetScore(o.hope.sentence,i);
-        oracle.doc_score->PlusEquals(*hopesc,1);
-        cerr<<"With hope: "<<ds().ScoreDetails()<<endl;
-        oracle.doc_score->PlusEquals(*hopesc,-1);
-        cerr<<"Without hope: "<<ds().ScoreDetails()<<endl;
-        cerr<<" oracle="<<oracle.GetScore(o.hope.sentence,i)->ScoreDetails()<<endl
-            <<" model="<<oracle.GetScore(o.model.sentence,i)->ScoreDetails()<<endl;
-        if (have_doc)
-          cerr<<" doc (should = model): "<<model_scores[i]->ScoreDetails()<<endl;
-      }
-      if (have_doc) {
-        adjust_doc(i,1);
-      } else
-        oracle.IncludeLastScore();
-    }
-    return o;
-  }
-
-  // if start_random is true, immediately sample w/ replacement from src sentences; otherwise, consume them sequentially until exhausted, then random.  oracle vectors are summed
-  void AddOracleDirections() {
-    MT19937::IntRNG rsg=rng.inclusive(0,dev_set_size-1);
-    unsigned b=0;
-    for(unsigned i=0;i<n_oracle;++i) {
-      Dir o2hope;
-      Dir fear2hope;
-      for (unsigned j=0;j<oracle_batch;++j,++b) {
-        Oracle const& o=ComputeOracle((start_random||b>=dev_set_size) ? rsg() : b);
-
-        if (old_to_hope)
-          o2hope+=o.ModelHopeGradient();
-        if (fear_to_hope)
-          fear2hope+=o.FearHopeGradient();
-      }
-      double N=(double)oracle_batch;
-      if (old_to_hope) {
-        o2hope/=N;
-        directions.push_back(o2hope);
-      }
-      if (fear_to_hope) {
-        fear2hope/=N;
-        directions.push_back(fear2hope);
-      }
+int main(int argc, char** argv) {
+  RandomNumberGenerator<boost::mt19937> rng;
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  vector<string> features;
+  SparseVector<weight_t> origin;
+  vector<weight_t> w;
+  Weights::InitFromFile(conf["weights"].as<string>(), &w, &features);
+  Weights::InitSparseVector(w, &origin);
+  const string forest_repository = conf["forest_repository"].as<string>();
+  assert(DirectoryExists(forest_repository));
+  if (conf.count("optimize_feature") > 0)
+    features=conf["optimize_feature"].as<vector<string> >();
+  vector<SparseVector<weight_t> > directions;
+  vector<int> fids(features.size());
+  for (int i = 0; i < features.size(); ++i)
+    fids[i] = FD::Convert(features[i]);
+  LineOptimizer::CreateOptimizationDirections(
+     fids,
+     conf["random_directions"].as<unsigned int>(),
+     &rng,
+     &directions);
+  unsigned dev_set_size = conf["dev_set_size"].as<unsigned>();
+  for (unsigned i = 0; i < dev_set_size; ++i) {
+    for (unsigned j = 0; j < directions.size(); ++j) {
+      cout << forest_repository << '/' << i << ".json.gz " << i << ' ';
+      print(cout, origin, "=", ";");
+      cout << ' ';
+      print(cout, directions[j], "=", ";");
+      cout << endl;
     }
   }
-};
-
-int main(int argc, char** argv) {
-  oracle_directions od;
-  return od.main(argc,argv);
+  return 0;
 }
diff --git a/vest/mr_vest_map.cc b/vest/mr_vest_map.cc
index 8f6e085d..7d9625bc 100644
--- a/vest/mr_vest_map.cc
+++ b/vest/mr_vest_map.cc
@@ -82,20 +82,20 @@ int main(int argc, char** argv) {
     if (line.empty()) continue;
     istringstream is(line);
     int sent_id;
-    string file, s_origin, s_axis;
+    string file, s_origin, s_direction;
     // path-to-file (JSON) sent_ed starting-point search-direction
-    is >> file >> sent_id >> s_origin >> s_axis;
+    is >> file >> sent_id >> s_origin >> s_direction;
     SparseVector<double> origin;
-    assert(ReadSparseVectorString(s_origin, &origin));
-    SparseVector<double> axis;
-    assert(ReadSparseVectorString(s_axis, &axis));
-    // cerr << "File: " << file << "\nAxis: " << axis << "\n   X: " << origin << endl;
+    ReadSparseVectorString(s_origin, &origin);
+    SparseVector<double> direction;
+    ReadSparseVectorString(s_direction, &direction);
+    // cerr << "File: " << file << "\nDir: " << direction << "\n   X: " << origin << endl;
     if (last_file != file) {
       last_file = file;
       ReadFile rf(file);
       HypergraphIO::ReadFromJSON(rf.stream(), &hg);
     }
-    ViterbiEnvelopeWeightFunction wf(origin, axis);
+    ViterbiEnvelopeWeightFunction wf(origin, direction);
     ViterbiEnvelope ve = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
     ErrorSurface es;
 
@@ -104,7 +104,7 @@ int main(int argc, char** argv) {
     // cerr << "Error surface has " << es.size() << " segments\n";
     string val;
     es.Serialize(&val);
-    cout << 'M' << ' ' << s_origin << ' ' << s_axis << '\t';
+    cout << 'M' << ' ' << s_origin << ' ' << s_direction << '\t';
     B64::b64encode(val.c_str(), val.size(), &cout);
     cout << endl << flush;
   }
-- 
cgit v1.2.3


From 89b662b51373f0f466d62a65d3f0a164d1d31b1c Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Fri, 27 Jan 2012 14:49:08 -0500
Subject: rename vest to dpmert (dynamic programming mert), rename variables
 and types to correspond to standard geometric concepts

---
 Makefile.am                               |   2 +-
 configure.ac                              |   2 +-
 dpmert/Makefile.am                        |  35 ++
 dpmert/README.shared-mem                  |   9 +
 dpmert/cat.pl                             |   4 +
 dpmert/ces.cc                             |  91 ++++
 dpmert/ces.h                              |  16 +
 dpmert/dpmert.pl                          | 700 ++++++++++++++++++++++++++++++
 dpmert/error_surface.cc                   |  42 ++
 dpmert/error_surface.h                    |  24 +
 dpmert/libcall.pl                         |  71 +++
 dpmert/line_mediator.pl                   | 116 +++++
 dpmert/line_optimizer.cc                  | 111 +++++
 dpmert/line_optimizer.h                   |  48 ++
 dpmert/lo_test.cc                         | 236 ++++++++++
 dpmert/mert_geometry.cc                   | 186 ++++++++
 dpmert/mert_geometry.h                    |  81 ++++
 dpmert/mr_dpmert_generate_mapper_input.cc |  78 ++++
 dpmert/mr_dpmert_map.cc                   | 112 +++++
 dpmert/mr_dpmert_reduce.cc                |  77 ++++
 dpmert/parallelize.pl                     | 423 ++++++++++++++++++
 dpmert/sentclient.c                       |  76 ++++
 dpmert/sentserver.c                       | 515 ++++++++++++++++++++++
 dpmert/sentserver.h                       |   6 +
 dpmert/tac.pl                             |   8 +
 dpmert/test_aer/README                    |   8 +
 dpmert/test_aer/cdec.ini                  |   3 +
 dpmert/test_aer/corpus.src                |   3 +
 dpmert/test_aer/grammar                   |  12 +
 dpmert/test_aer/ref.0                     |   3 +
 dpmert/test_aer/weights                   |  13 +
 dpmert/test_data/0.json.gz                | Bin 0 -> 13709 bytes
 dpmert/test_data/1.json.gz                | Bin 0 -> 204803 bytes
 dpmert/test_data/c2e.txt.0                |   2 +
 dpmert/test_data/c2e.txt.1                |   2 +
 dpmert/test_data/c2e.txt.2                |   2 +
 dpmert/test_data/c2e.txt.3                |   2 +
 dpmert/test_data/re.txt.0                 |   5 +
 dpmert/test_data/re.txt.1                 |   5 +
 dpmert/test_data/re.txt.2                 |   5 +
 dpmert/test_data/re.txt.3                 |   5 +
 vest/Makefile.am                          |  35 --
 vest/README.shared-mem                    |   9 -
 vest/cat.pl                               |   4 -
 vest/ces.cc                               |  91 ----
 vest/ces.h                                |  16 -
 vest/dist-vest.pl                         | 700 ------------------------------
 vest/error_surface.cc                     |  42 --
 vest/error_surface.h                      |  24 -
 vest/libcall.pl                           |  71 ---
 vest/line_mediator.pl                     | 116 -----
 vest/line_optimizer.cc                    | 111 -----
 vest/line_optimizer.h                     |  48 --
 vest/lo_test.cc                           | 236 ----------
 vest/mr_vest_generate_mapper_input.cc     |  78 ----
 vest/mr_vest_map.cc                       | 112 -----
 vest/mr_vest_reduce.cc                    |  77 ----
 vest/parallelize.pl                       | 423 ------------------
 vest/sentclient.c                         |  76 ----
 vest/sentserver.c                         | 515 ----------------------
 vest/sentserver.h                         |   6 -
 vest/tac.pl                               |   8 -
 vest/test_aer/README                      |   8 -
 vest/test_aer/cdec.ini                    |   3 -
 vest/test_aer/corpus.src                  |   3 -
 vest/test_aer/grammar                     |  12 -
 vest/test_aer/ref.0                       |   3 -
 vest/test_aer/weights                     |  13 -
 vest/test_data/0.json.gz                  | Bin 13709 -> 0 bytes
 vest/test_data/1.json.gz                  | Bin 204803 -> 0 bytes
 vest/test_data/c2e.txt.0                  |   2 -
 vest/test_data/c2e.txt.1                  |   2 -
 vest/test_data/c2e.txt.2                  |   2 -
 vest/test_data/c2e.txt.3                  |   2 -
 vest/test_data/re.txt.0                   |   5 -
 vest/test_data/re.txt.1                   |   5 -
 vest/test_data/re.txt.2                   |   5 -
 vest/test_data/re.txt.3                   |   5 -
 vest/viterbi_envelope.cc                  | 177 --------
 vest/viterbi_envelope.h                   |  81 ----
 80 files changed, 3137 insertions(+), 3128 deletions(-)
 create mode 100644 dpmert/Makefile.am
 create mode 100644 dpmert/README.shared-mem
 create mode 100755 dpmert/cat.pl
 create mode 100644 dpmert/ces.cc
 create mode 100644 dpmert/ces.h
 create mode 100755 dpmert/dpmert.pl
 create mode 100644 dpmert/error_surface.cc
 create mode 100644 dpmert/error_surface.h
 create mode 100644 dpmert/libcall.pl
 create mode 100755 dpmert/line_mediator.pl
 create mode 100644 dpmert/line_optimizer.cc
 create mode 100644 dpmert/line_optimizer.h
 create mode 100644 dpmert/lo_test.cc
 create mode 100644 dpmert/mert_geometry.cc
 create mode 100644 dpmert/mert_geometry.h
 create mode 100644 dpmert/mr_dpmert_generate_mapper_input.cc
 create mode 100644 dpmert/mr_dpmert_map.cc
 create mode 100644 dpmert/mr_dpmert_reduce.cc
 create mode 100755 dpmert/parallelize.pl
 create mode 100644 dpmert/sentclient.c
 create mode 100644 dpmert/sentserver.c
 create mode 100644 dpmert/sentserver.h
 create mode 100755 dpmert/tac.pl
 create mode 100644 dpmert/test_aer/README
 create mode 100644 dpmert/test_aer/cdec.ini
 create mode 100644 dpmert/test_aer/corpus.src
 create mode 100644 dpmert/test_aer/grammar
 create mode 100644 dpmert/test_aer/ref.0
 create mode 100644 dpmert/test_aer/weights
 create mode 100644 dpmert/test_data/0.json.gz
 create mode 100644 dpmert/test_data/1.json.gz
 create mode 100644 dpmert/test_data/c2e.txt.0
 create mode 100644 dpmert/test_data/c2e.txt.1
 create mode 100644 dpmert/test_data/c2e.txt.2
 create mode 100644 dpmert/test_data/c2e.txt.3
 create mode 100644 dpmert/test_data/re.txt.0
 create mode 100644 dpmert/test_data/re.txt.1
 create mode 100644 dpmert/test_data/re.txt.2
 create mode 100644 dpmert/test_data/re.txt.3
 delete mode 100644 vest/Makefile.am
 delete mode 100644 vest/README.shared-mem
 delete mode 100755 vest/cat.pl
 delete mode 100644 vest/ces.cc
 delete mode 100644 vest/ces.h
 delete mode 100755 vest/dist-vest.pl
 delete mode 100644 vest/error_surface.cc
 delete mode 100644 vest/error_surface.h
 delete mode 100644 vest/libcall.pl
 delete mode 100755 vest/line_mediator.pl
 delete mode 100644 vest/line_optimizer.cc
 delete mode 100644 vest/line_optimizer.h
 delete mode 100644 vest/lo_test.cc
 delete mode 100644 vest/mr_vest_generate_mapper_input.cc
 delete mode 100644 vest/mr_vest_map.cc
 delete mode 100644 vest/mr_vest_reduce.cc
 delete mode 100755 vest/parallelize.pl
 delete mode 100644 vest/sentclient.c
 delete mode 100644 vest/sentserver.c
 delete mode 100644 vest/sentserver.h
 delete mode 100755 vest/tac.pl
 delete mode 100644 vest/test_aer/README
 delete mode 100644 vest/test_aer/cdec.ini
 delete mode 100644 vest/test_aer/corpus.src
 delete mode 100644 vest/test_aer/grammar
 delete mode 100644 vest/test_aer/ref.0
 delete mode 100644 vest/test_aer/weights
 delete mode 100644 vest/test_data/0.json.gz
 delete mode 100644 vest/test_data/1.json.gz
 delete mode 100644 vest/test_data/c2e.txt.0
 delete mode 100644 vest/test_data/c2e.txt.1
 delete mode 100644 vest/test_data/c2e.txt.2
 delete mode 100644 vest/test_data/c2e.txt.3
 delete mode 100644 vest/test_data/re.txt.0
 delete mode 100644 vest/test_data/re.txt.1
 delete mode 100644 vest/test_data/re.txt.2
 delete mode 100644 vest/test_data/re.txt.3
 delete mode 100644 vest/viterbi_envelope.cc
 delete mode 100644 vest/viterbi_envelope.h

(limited to 'vest')

diff --git a/Makefile.am b/Makefile.am
index 59c2fc0a..c0fcb1f6 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,7 +1,7 @@
 # warning - the subdirectories in the following list should
 # be kept in topologically sorted order. Also, DO NOT introduce
 # cyclic dependencies between these directories!
-SUBDIRS = utils mteval klm/util klm/lm decoder phrasinator training mira vest pro-train extools gi/pf gi/markov_al
+SUBDIRS = utils mteval klm/util klm/lm decoder phrasinator training mira dpmert pro-train extools gi/pf gi/markov_al
 
 #gi/pyp-topics/src gi/clda/src gi/posterior-regularisation/prjava
 
diff --git a/configure.ac b/configure.ac
index 131a1705..cd78ee72 100644
--- a/configure.ac
+++ b/configure.ac
@@ -113,4 +113,4 @@ then
   AM_CONDITIONAL([GLC], true)
 fi
 
-AC_OUTPUT(Makefile utils/Makefile mteval/Makefile extools/Makefile decoder/Makefile phrasinator/Makefile training/Makefile vest/Makefile pro-train/Makefile klm/util/Makefile klm/lm/Makefile mira/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile gi/pf/Makefile gi/markov_al/Makefile)
+AC_OUTPUT(Makefile utils/Makefile mteval/Makefile extools/Makefile decoder/Makefile phrasinator/Makefile training/Makefile dpmert/Makefile pro-train/Makefile klm/util/Makefile klm/lm/Makefile mira/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile gi/pf/Makefile gi/markov_al/Makefile)
diff --git a/dpmert/Makefile.am b/dpmert/Makefile.am
new file mode 100644
index 00000000..2676fb50
--- /dev/null
+++ b/dpmert/Makefile.am
@@ -0,0 +1,35 @@
+bin_PROGRAMS = \
+  mr_dpmert_map \
+  mr_dpmert_reduce \
+  mr_dpmert_generate_mapper_input \
+  sentserver \
+  sentclient
+
+if HAVE_GTEST
+noinst_PROGRAMS = \
+  lo_test
+TESTS = lo_test
+endif
+
+sentserver_SOURCES = sentserver.c
+sentserver_LDFLAGS = -all-static -pthread
+
+sentclient_SOURCES = sentclient.c
+sentclient_LDFLAGS = -all-static -pthread
+
+mr_dpmert_generate_mapper_input_SOURCES = mr_dpmert_generate_mapper_input.cc line_optimizer.cc
+mr_dpmert_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
+
+# nbest2hg_SOURCES = nbest2hg.cc
+# nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst -lz
+
+mr_dpmert_map_SOURCES = mert_geometry.cc ces.cc error_surface.cc mr_dpmert_map.cc line_optimizer.cc
+mr_dpmert_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
+
+mr_dpmert_reduce_SOURCES = error_surface.cc ces.cc mr_dpmert_reduce.cc line_optimizer.cc mert_geometry.cc
+mr_dpmert_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
+
+lo_test_SOURCES = lo_test.cc ces.cc mert_geometry.cc error_surface.cc line_optimizer.cc
+lo_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
+
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/dpmert/README.shared-mem b/dpmert/README.shared-mem
new file mode 100644
index 00000000..7728efc0
--- /dev/null
+++ b/dpmert/README.shared-mem
@@ -0,0 +1,9 @@
+If you want to run dist-vest.pl on a very large shared memory machine, do the
+following:
+
+  ./dist-vest.pl --use-make I --decode-nodes J --weights weights.init --source-file=dev.src --ref-files=dev.ref.* cdec.ini
+
+This will use I jobs for doing the line search and J jobs to run the decoder. Typically, since the
+decoder must load grammars, language models, etc., J should be smaller than I, but this will depend
+on the system you are running on and the complexity of the models used for decoding.
+
diff --git a/dpmert/cat.pl b/dpmert/cat.pl
new file mode 100755
index 00000000..2ecba3f9
--- /dev/null
+++ b/dpmert/cat.pl
@@ -0,0 +1,4 @@
+#!/usr/bin/perl
+
+$|=1;
+print while(<>);
diff --git a/dpmert/ces.cc b/dpmert/ces.cc
new file mode 100644
index 00000000..a85454da
--- /dev/null
+++ b/dpmert/ces.cc
@@ -0,0 +1,91 @@
+#include "ces.h"
+
+#include <vector>
+#include <sstream>
+#include <boost/shared_ptr.hpp>
+
+// TODO, if AER is to be optimized again, we will need this
+// #include "aligner.h"
+#include "lattice.h"
+#include "mert_geometry.h"
+#include "error_surface.h"
+#include "ns.h"
+
+using boost::shared_ptr;
+using namespace std;
+
+const bool minimize_segments = true;    // if adjacent segments have equal scores, merge them
+
+void ComputeErrorSurface(const SegmentEvaluator& ss,
+                         const ConvexHull& ve,
+                         ErrorSurface* env,
+                         const EvaluationMetric* metric,
+                         const Hypergraph& hg) {
+  vector<WordID> prev_trans;
+  const vector<shared_ptr<MERTPoint> >& ienv = ve.GetSortedSegs();
+  env->resize(ienv.size());
+  SufficientStats prev_score; // defaults to 0
+  int j = 0;
+  for (int i = 0; i < ienv.size(); ++i) {
+    const MERTPoint& seg = *ienv[i];
+    vector<WordID> trans;
+#if 0
+    if (type == AER) {
+      vector<bool> edges(hg.edges_.size(), false);
+      seg.CollectEdgesUsed(&edges);  // get the set of edges in the viterbi
+                                     // alignment
+      ostringstream os;
+      const string* psrc = ss.GetSource();
+      if (psrc == NULL) {
+        cerr << "AER scoring in VEST requires source, but it is missing!\n";
+        abort();
+      }
+      size_t pos = psrc->rfind(" ||| ");
+      if (pos == string::npos) {
+        cerr << "Malformed source for AER: expected |||\nINPUT: " << *psrc << endl;
+        abort();
+      }
+      Lattice src;
+      Lattice ref;
+      LatticeTools::ConvertTextOrPLF(psrc->substr(0, pos), &src);
+      LatticeTools::ConvertTextOrPLF(psrc->substr(pos + 5), &ref);
+      AlignerTools::WriteAlignment(src, ref, hg, &os, true, 0, &edges);
+      string tstr = os.str();
+      TD::ConvertSentence(tstr.substr(tstr.rfind(" ||| ") + 5), &trans);
+    } else {
+#endif
+      seg.ConstructTranslation(&trans);
+    //}
+    //cerr << "Scoring: " << TD::GetString(trans) << endl;
+    if (trans == prev_trans) {
+      if (!minimize_segments) {
+        ErrorSegment& out = (*env)[j];
+        out.delta.fields.clear();
+        out.x = seg.x;
+	++j;
+      }
+      //cerr << "Identical translation, skipping scoring\n";
+    } else {
+      SufficientStats score;
+      ss.Evaluate(trans, &score);
+      // cerr << "score= " << score->ComputeScore() << "\n";
+      //string x1; score.Encode(&x1); cerr << "STATS: " << x1 << endl;
+      const SufficientStats delta = score - prev_score;
+      //string x2; delta.Encode(&x2); cerr << "DELTA: " << x2 << endl;
+      //string xx; delta.Encode(&xx); cerr << xx << endl;
+      prev_trans.swap(trans);
+      prev_score = score;
+      if ((!minimize_segments) || (!delta.IsAdditiveIdentity())) {
+        ErrorSegment& out = (*env)[j];
+        out.delta = delta;
+        out.x = seg.x;
+        ++j;
+      }
+    }
+  }
+  // cerr << " In segments: " << ienv.size() << endl;
+  // cerr << "Out segments: " << j << endl;
+  assert(j > 0);
+  env->resize(j);
+}
+
diff --git a/dpmert/ces.h b/dpmert/ces.h
new file mode 100644
index 00000000..e4fa2080
--- /dev/null
+++ b/dpmert/ces.h
@@ -0,0 +1,16 @@
+#ifndef _CES_H_
+#define _CES_H_
+
+class ConvexHull;
+class Hypergraph;
+class SegmentEvaluator;
+class ErrorSurface;
+class EvaluationMetric;
+
+void ComputeErrorSurface(const SegmentEvaluator& ss,
+                         const ConvexHull& convex_hull,
+                         ErrorSurface* es,
+                         const EvaluationMetric* metric,
+                         const Hypergraph& hg);
+
+#endif
diff --git a/dpmert/dpmert.pl b/dpmert/dpmert.pl
new file mode 100755
index 00000000..52ce0fc0
--- /dev/null
+++ b/dpmert/dpmert.pl
@@ -0,0 +1,700 @@
+#!/usr/bin/env perl
+use strict;
+my @ORIG_ARGV=@ARGV;
+use Cwd qw(getcwd);
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; }
+
+# Skip local config (used for distributing jobs) if we're running in local-only mode
+use LocalConfig;
+use Getopt::Long;
+use IPC::Open2;
+use POSIX ":sys_wait_h";
+my $QSUB_CMD = qsub_args(mert_memory());
+
+require "libcall.pl";
+
+# Default settings
+my $srcFile;
+my $refFiles;
+my $default_jobs = env_default_jobs();
+my $bin_dir = $SCRIPT_DIR;
+die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
+my $FAST_SCORE="$bin_dir/../mteval/fast_score";
+die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
+my $MAPINPUT = "$bin_dir/mr_dpmert_generate_mapper_input";
+my $MAPPER = "$bin_dir/mr_dpmert_map";
+my $REDUCER = "$bin_dir/mr_dpmert_reduce";
+my $parallelize = "$bin_dir/parallelize.pl";
+my $libcall = "$bin_dir/libcall.pl";
+my $sentserver = "$bin_dir/sentserver";
+my $sentclient = "$bin_dir/sentclient";
+my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm";
+
+my $SCORER = $FAST_SCORE;
+die "Can't find $MAPPER" unless -x $MAPPER;
+my $cdec = "$bin_dir/../decoder/cdec";
+die "Can't find decoder in $cdec" unless -x $cdec;
+die "Can't find $parallelize" unless -x $parallelize;
+die "Can't find $libcall" unless -e $libcall;
+my $decoder = $cdec;
+my $lines_per_mapper = 400;
+my $rand_directions = 15;
+my $iteration = 1;
+my $best_weights;
+my $max_iterations = 15;
+my $optimization_iters = 6;
+my $jobs = $default_jobs;   # number of decode nodes
+my $pmem = "9g";
+my $disable_clean = 0;
+my %seen_weights;
+my $normalize;
+my $help = 0;
+my $epsilon = 0.0001;
+my $interval = 5;
+my $dryrun = 0;
+my $last_score = -10000000;
+my $metric = "ibm_bleu";
+my $dir;
+my $iniFile;
+my $weights;
+my $initialWeights;
+my $decoderOpt;
+my $noprimary;
+my $maxsim=0;
+my $oraclen=0;
+my $oracleb=20;
+my $bleu_weight=1;
+my $use_make = 1;  # use make to parallelize line search
+my $useqsub;
+my $pass_suffix = '';
+my $cpbin=1;
+# Process command-line options
+Getopt::Long::Configure("no_auto_abbrev");
+if (GetOptions(
+	"decoder=s" => \$decoderOpt,
+	"jobs=i" => \$jobs,
+	"dont-clean" => \$disable_clean,
+	"pass-suffix=s" => \$pass_suffix,
+	"dry-run" => \$dryrun,
+	"epsilon=s" => \$epsilon,
+	"help" => \$help,
+	"interval" => \$interval,
+	"qsub" => \$useqsub,
+	"max-iterations=i" => \$max_iterations,
+	"normalize=s" => \$normalize,
+	"pmem=s" => \$pmem,
+        "cpbin!" => \$cpbin,
+	"random-directions=i" => \$rand_directions,
+	"ref-files=s" => \$refFiles,
+	"metric=s" => \$metric,
+	"source-file=s" => \$srcFile,
+	"weights=s" => \$initialWeights,
+	"workdir=s" => \$dir,
+    "opt-iterations=i" => \$optimization_iters,
+) == 0 || @ARGV!=1 || $help) {
+	print_help();
+	exit;
+}
+
+if ($useqsub) {
+  $use_make = 0;
+  die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub();
+}
+
+my @missing_args = ();
+if (!defined $srcFile) { push @missing_args, "--source-file"; }
+if (!defined $refFiles) { push @missing_args, "--ref-files"; }
+if (!defined $initialWeights) { push @missing_args, "--weights"; }
+die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args);
+
+if ($metric =~ /^(combi|ter)$/i) {
+  $lines_per_mapper = 40;
+} elsif ($metric =~ /^meteor$/i) {
+  $lines_per_mapper = 2000;   # start up time is really high
+}
+
+($iniFile) = @ARGV;
+
+
+sub write_config;
+sub enseg;
+sub print_help;
+
+my $nodelist;
+my $host =check_output("hostname"); chomp $host;
+my $bleu;
+my $interval_count = 0;
+my $logfile;
+my $projected_score;
+
+# used in sorting scores
+my $DIR_FLAG = '-r';
+if ($metric =~ /^ter$|^aer$/i) {
+  $DIR_FLAG = '';
+}
+
+my $refs_comma_sep = get_comma_sep_refs('r',$refFiles);
+
+unless ($dir){
+	$dir = "dpmert";
+}
+unless ($dir =~ /^\//){  # convert relative path to absolute path
+	my $basedir = check_output("pwd");
+	chomp $basedir;
+	$dir = "$basedir/$dir";
+}
+
+if ($decoderOpt){ $decoder = $decoderOpt; }
+
+
+# Initializations and helper functions
+srand;
+
+my @childpids = ();
+my @cleanupcmds = ();
+
+sub cleanup {
+	print STDERR "Cleanup...\n";
+	for my $pid (@childpids){ unchecked_call("kill $pid"); }
+	for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); }
+	exit 1;
+};
+# Always call cleanup, no matter how we exit
+*CORE::GLOBAL::exit = 
+    sub{ cleanup(); }; 
+$SIG{INT} = "cleanup";
+$SIG{TERM} = "cleanup";
+$SIG{HUP} = "cleanup";
+
+my $decoderBase = check_output("basename $decoder"); chomp $decoderBase;
+my $newIniFile = "$dir/$decoderBase.ini";
+my $inputFileName = "$dir/input";
+my $user = $ENV{"USER"};
+
+
+# process ini file
+-e $iniFile || die "Error: could not open $iniFile for reading\n";
+open(INI, $iniFile);
+
+use File::Basename qw(basename);
+#pass bindir, refs to vars holding bin
+sub modbin {
+    local $_;
+    my $bindir=shift;
+    check_call("mkdir -p $bindir");
+    -d $bindir || die "couldn't make bindir $bindir";
+    for (@_) {
+        my $src=$$_;
+        $$_="$bindir/".basename($src);
+        check_call("cp -p $src $$_");
+    }
+}
+sub dirsize {
+    opendir ISEMPTY,$_[0];
+    return scalar(readdir(ISEMPTY))-1;
+}
+if ($dryrun){
+	write_config(*STDERR);
+	exit 0;
+} else {
+	if (-e $dir && dirsize($dir)>1 && -e "$dir/hgs" ){ # allow preexisting logfile, binaries, but not dist-dpmert.pl outputs
+	  die "ERROR: working dir $dir already exists\n\n";
+	} else {
+		-e $dir || mkdir $dir;
+		mkdir "$dir/hgs";
+        modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$REDUCER,\$parallelize,\$sentserver,\$sentclient,\$libcall) if $cpbin;
+    mkdir "$dir/scripts";
+        my $cmdfile="$dir/rerun-dpmert.sh";
+        open CMD,'>',$cmdfile;
+        print CMD "cd ",&getcwd,"\n";
+#        print CMD &escaped_cmdline,"\n"; #buggy - last arg is quoted.
+        my $cline=&cmdline."\n";
+        print CMD $cline;
+        close CMD;
+        print STDERR $cline;
+        chmod(0755,$cmdfile);
+		unless (-e $initialWeights) {
+			print STDERR "Please specify an initial weights file with --initial-weights\n";
+			print_help();
+			exit;
+		}
+		check_call("cp $initialWeights $dir/weights.0");
+		die "Can't find weights.0" unless (-e "$dir/weights.0");
+	}
+	write_config(*STDERR);
+}
+
+
+# Generate initial files and values
+check_call("cp $iniFile $newIniFile");
+$iniFile = $newIniFile;
+
+my $newsrc = "$dir/dev.input";
+enseg($srcFile, $newsrc);
+$srcFile = $newsrc;
+my $devSize = 0;
+open F, "<$srcFile" or die "Can't read $srcFile: $!";
+while(<F>) { $devSize++; }
+close F;
+
+unless($best_weights){ $best_weights = $weights; }
+unless($projected_score){ $projected_score = 0.0; }
+$seen_weights{$weights} = 1;
+
+my $random_seed = int(time / 1000);
+my $lastWeightsFile;
+my $lastPScore = 0;
+# main optimization loop
+while (1){
+	print STDERR "\n\nITERATION $iteration\n==========\n";
+
+	if ($iteration > $max_iterations){
+		print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n";
+		last;
+	}
+	# iteration-specific files
+	my $runFile="$dir/run.raw.$iteration";
+	my $onebestFile="$dir/1best.$iteration";
+	my $logdir="$dir/logs.$iteration";
+	my $decoderLog="$logdir/decoder.sentserver.log.$iteration";
+	my $scorerLog="$logdir/scorer.log.$iteration";
+	check_call("mkdir -p $logdir");
+
+
+	#decode
+	print STDERR "RUNNING DECODER AT ";
+	print STDERR unchecked_output("date");
+	my $im1 = $iteration - 1;
+	my $weightsFile="$dir/weights.$im1";
+	my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs";
+	my $pcmd;
+	if ($use_make) {
+		$pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --";
+	} else {
+		$pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --";
+	}
+	my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile";
+	print STDERR "COMMAND:\n$cmd\n";
+	check_bash_call($cmd);
+        my $num_hgs;
+        my $num_topbest;
+        my $retries = 0;
+	while($retries < 5) {
+	    $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l");
+	    $num_topbest = check_output("wc -l < $runFile");
+	    print STDERR "NUMBER OF HGs: $num_hgs\n";
+	    print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n";
+	    if($devSize == $num_hgs && $devSize == $num_topbest) {
+		last;
+	    } else {
+		print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n";
+		sleep(3);
+	    }
+	    $retries++;
+	}
+	die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest);
+	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric");
+	chomp $dec_score;
+	print STDERR "DECODER SCORE: $dec_score\n";
+
+	# save space
+	check_call("gzip -f $runFile");
+	check_call("gzip -f $decoderLog");
+
+	# run optimizer
+	print STDERR "RUNNING OPTIMIZER AT ";
+	print STDERR unchecked_output("date");
+	my $mergeLog="$logdir/prune-merge.log.$iteration";
+
+	my $score = 0;
+	my $icc = 0;
+	my $inweights="$dir/weights.$im1";
+	for (my $opt_iter=1; $opt_iter<$optimization_iters; $opt_iter++) {
+		print STDERR "\nGENERATE OPTIMIZATION STRATEGY (OPT-ITERATION $opt_iter/$optimization_iters)\n";
+		print STDERR unchecked_output("date");
+		$icc++;
+		$cmd="$MAPINPUT -w $inweights -r $dir/hgs -s $devSize -d $rand_directions > $dir/agenda.$im1-$opt_iter";
+		print STDERR "COMMAND:\n$cmd\n";
+		check_call($cmd);
+		check_call("mkdir -p $dir/splag.$im1");
+		$cmd="split -a 3 -l $lines_per_mapper $dir/agenda.$im1-$opt_iter $dir/splag.$im1/mapinput.";
+		print STDERR "COMMAND:\n$cmd\n";
+		check_call($cmd);
+		opendir(DIR, "$dir/splag.$im1") or die "Can't open directory: $!";
+		my @shards = grep { /^mapinput\./ } readdir(DIR);
+		closedir DIR;
+		die "No shards!" unless scalar @shards > 0;
+		my $joblist = "";
+		my $nmappers = 0;
+		my @mapoutputs = ();
+		@cleanupcmds = ();
+		my %o2i = ();
+		my $first_shard = 1;
+		my $mkfile; # only used with makefiles
+		my $mkfilename;
+		if ($use_make) {
+			$mkfilename = "$dir/splag.$im1/domap.mk";
+			open $mkfile, ">$mkfilename" or die "Couldn't write $mkfilename: $!";
+			print $mkfile "all: $dir/splag.$im1/map.done\n\n";
+		}
+		my @mkouts = ();  # only used with makefiles
+		for my $shard (@shards) {
+			my $mapoutput = $shard;
+			my $client_name = $shard;
+			$client_name =~ s/mapinput.//;
+			$client_name = "dpmert.$client_name";
+			$mapoutput =~ s/mapinput/mapoutput/;
+			push @mapoutputs, "$dir/splag.$im1/$mapoutput";
+			$o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard";
+			my $script = "$MAPPER -s $srcFile -m $metric $refs_comma_sep < $dir/splag.$im1/$shard | sort -t \$'\\t' -k 1 > $dir/splag.$im1/$mapoutput";
+			if ($use_make) {
+				my $script_file = "$dir/scripts/map.$shard";
+				open F, ">$script_file" or die "Can't write $script_file: $!";
+				print F "#!/bin/bash\n";
+				print F "$script\n";
+				close F;
+				my $output = "$dir/splag.$im1/$mapoutput";
+				push @mkouts, $output;
+				chmod(0755, $script_file) or die "Can't chmod $script_file: $!";
+				if ($first_shard) { print STDERR "$script\n"; $first_shard=0; }
+				print $mkfile "$output: $dir/splag.$im1/$shard\n\t$script_file\n\n";
+			} else {
+				my $script_file = "$dir/scripts/map.$shard";
+				open F, ">$script_file" or die "Can't write $script_file: $!";
+				print F "$script\n";
+				close F;
+				if ($first_shard) { print STDERR "$script\n"; $first_shard=0; }
+
+				$nmappers++;
+				my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file";
+				my $jobid = check_output("$qcmd");
+				chomp $jobid;
+				$jobid =~ s/^(\d+)(.*?)$/\1/g;
+				$jobid =~ s/^Your job (\d+) .*$/\1/;
+		 	 	push(@cleanupcmds, "qdel $jobid 2> /dev/null");
+				print STDERR " $jobid";
+				if ($joblist == "") { $joblist = $jobid; }
+				else {$joblist = $joblist . "\|" . $jobid; }
+			}
+		}
+		if ($use_make) {
+			print $mkfile "$dir/splag.$im1/map.done: @mkouts\n\ttouch $dir/splag.$im1/map.done\n\n";
+			close $mkfile;
+			my $mcmd = "make -j $jobs -f $mkfilename";
+			print STDERR "\nExecuting: $mcmd\n";
+			check_call($mcmd);
+		} else {
+			print STDERR "\nLaunched $nmappers mappers.\n";
+      			sleep 8;
+			print STDERR "Waiting for mappers to complete...\n";
+			while ($nmappers > 0) {
+			  sleep 5;
+			  my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat | grep -v ' C '")));
+			  $nmappers = scalar @livejobs;
+			}
+			print STDERR "All mappers complete.\n";
+		}
+		my $tol = 0;
+		my $til = 0;
+		for my $mo (@mapoutputs) {
+		  my $olines = get_lines($mo);
+		  my $ilines = get_lines($o2i{$mo});
+		  $tol += $olines;
+		  $til += $ilines;
+		  die "$mo: output lines ($olines) doesn't match input lines ($ilines)" unless $olines==$ilines;
+		}
+		print STDERR "Results for $tol/$til lines\n";
+		print STDERR "\nSORTING AND RUNNING VEST REDUCER\n";
+		print STDERR unchecked_output("date");
+		$cmd="sort -t \$'\\t' -k 1 @mapoutputs | $REDUCER -m $metric > $dir/redoutput.$im1";
+		print STDERR "COMMAND:\n$cmd\n";
+		check_bash_call($cmd);
+		$cmd="sort -nk3 $DIR_FLAG '-t|' $dir/redoutput.$im1 | head -1";
+		# sort returns failure even when it doesn't fail for some reason
+		my $best=unchecked_output("$cmd"); chomp $best;
+		print STDERR "$best\n";
+		my ($oa, $x, $xscore) = split /\|/, $best;
+		$score = $xscore;
+		print STDERR "PROJECTED SCORE: $score\n";
+		if (abs($x) < $epsilon) {
+			print STDERR "\nOPTIMIZER: no score improvement: abs($x) < $epsilon\n";
+			last;
+		}
+                my $psd = $score - $last_score;
+                $last_score = $score;
+		if (abs($psd) < $epsilon) {
+			print STDERR "\nOPTIMIZER: no score improvement: abs($psd) < $epsilon\n";
+			last;
+		}
+		my ($origin, $axis) = split /\s+/, $oa;
+
+		my %ori = convert($origin);
+		my %axi = convert($axis);
+
+		my $finalFile="$dir/weights.$im1-$opt_iter";
+		open W, ">$finalFile" or die "Can't write: $finalFile: $!";
+                my $norm = 0;
+		for my $k (sort keys %ori) {
+			my $dd = $ori{$k} + $axi{$k} * $x;
+                        $norm += $dd * $dd;
+		}
+                $norm = sqrt($norm);
+		$norm = 1;
+		for my $k (sort keys %ori) {
+			my $v = ($ori{$k} + $axi{$k} * $x) / $norm;
+			print W "$k $v\n";
+		}
+		check_call("rm $dir/splag.$im1/*");
+		$inweights = $finalFile;
+	}
+	$lastWeightsFile = "$dir/weights.$iteration";
+	check_call("cp $inweights $lastWeightsFile");
+	if ($icc < 2) {
+		print STDERR "\nREACHED STOPPING CRITERION: score change too little\n";
+		last;
+	}
+	$lastPScore = $score;
+	$iteration++;
+	print STDERR "\n==========\n";
+}
+
+print STDERR "\nFINAL WEIGHTS: $lastWeightsFile\n(Use -w <this file> with the decoder)\n\n";
+
+print STDOUT "$lastWeightsFile\n";
+
+exit 0;
+
+sub normalize_weights {
+  my ($rfn, $rpts, $feat) = @_;
+  my @feat_names = @$rfn;
+  my @pts = @$rpts;
+  my $z = 1.0;
+  for (my $i=0; $i < scalar @feat_names; $i++) {
+    if ($feat_names[$i] eq $feat) {
+      $z = $pts[$i];
+      last;
+    }
+  }
+  for (my $i=0; $i < scalar @feat_names; $i++) {
+    $pts[$i] /= $z;
+  }
+  print STDERR " NORM WEIGHTS: @pts\n";
+  return @pts;
+}
+
+sub get_lines {
+  my $fn = shift @_;
+  open FL, "<$fn" or die "Couldn't read $fn: $!";
+  my $lc = 0;
+  while(<FL>) { $lc++; }
+  return $lc;
+}
+
+sub get_comma_sep_refs {
+  my ($r,$p) = @_;
+  my $o = check_output("echo $p");
+  chomp $o;
+  my @files = split /\s+/, $o;
+  return "-$r " . join(" -$r ", @files);
+}
+
+sub read_weights_file {
+  my ($file) = @_;
+  open F, "<$file" or die "Couldn't read $file: $!";
+  my @r = ();
+  my $pm = -1;
+  while(<F>) {
+    next if /^#/;
+    next if /^\s*$/;
+    chomp;
+    if (/^(.+)\s+(.+)$/) {
+      my $m = $1;
+      my $w = $2;
+      die "Weights out of order: $m <= $pm" unless $m > $pm;
+      push @r, $w;
+    } else {
+      warn "Unexpected feature name in weight file: $_";
+    }
+  }
+  close F;
+  return join ' ', @r;
+}
+
+# subs
+sub write_config {
+	my $fh = shift;
+	my $cleanup = "yes";
+	if ($disable_clean) {$cleanup = "no";}
+
+	print $fh "\n";
+	print $fh "DECODER:          $decoder\n";
+	print $fh "INI FILE:         $iniFile\n";
+	print $fh "WORKING DIR:      $dir\n";
+	print $fh "SOURCE (DEV):     $srcFile\n";
+	print $fh "REFS (DEV):       $refFiles\n";
+	print $fh "EVAL METRIC:      $metric\n";
+	print $fh "START ITERATION:  $iteration\n";
+	print $fh "MAX ITERATIONS:   $max_iterations\n";
+	print $fh "PARALLEL JOBS:    $jobs\n";
+	print $fh "HEAD NODE:        $host\n";
+	print $fh "PMEM (DECODING):  $pmem\n";
+	print $fh "CLEANUP:          $cleanup\n";
+	print $fh "INITIAL WEIGHTS:  $initialWeights\n";
+}
+
+sub update_weights_file {
+  my ($neww, $rfn, $rpts) = @_;
+  my @feats = @$rfn;
+  my @pts = @$rpts;
+  my $num_feats = scalar @feats;
+  my $num_pts = scalar @pts;
+  die "$num_feats (num_feats) != $num_pts (num_pts)" unless $num_feats == $num_pts;
+  open G, ">$neww" or die;
+  for (my $i = 0; $i < $num_feats; $i++) {
+    my $f = $feats[$i];
+    my $lambda = $pts[$i];
+    print G "$f $lambda\n";
+  }
+  close G;
+}
+
+sub enseg {
+	my $src = shift;
+	my $newsrc = shift;
+	open(SRC, $src);
+	open(NEWSRC, ">$newsrc");
+	my $i=0;
+	while (my $line=<SRC>){
+		chomp $line;
+		if ($line =~ /^\s*<seg/i) {
+		    if($line =~ /id="[0-9]+"/) {
+			print NEWSRC "$line\n";
+		    } else {
+			die "When using segments with pre-generated <seg> tags, you must include a zero-based id attribute";
+		    }
+		} else {
+			print NEWSRC "<seg id=\"$i\">$line</seg>\n";
+		}
+		$i++;
+	}
+	close SRC;
+	close NEWSRC;
+}
+
+sub print_help {
+
+	my $executable = check_output("basename $0"); chomp $executable;
+    print << "Help";
+
+Usage: $executable [options] <ini file>
+
+	$executable [options] <ini file>
+		Runs a complete MERT optimization using the decoder configuration
+                in <ini file>. Required options are --weights, --source-file, and
+		--ref-files.
+
+Options:
+
+	--help
+		Print this message and exit.
+
+	--max-iterations <M>
+		Maximum number of iterations to run.  If not specified, defaults
+		to 10.
+
+	--pass-suffix <S>
+		If the decoder is doing multi-pass decoding, the pass suffix "2",
+		"3", etc., is used to control what iteration of weights is set.
+
+	--ref-files <files>
+		Dev set ref files.  This option takes only a single string argument.
+		To use multiple files (including file globbing), this argument should
+		be quoted.
+
+	--metric <method>
+		Metric to optimize.
+		Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi
+
+	--normalize <feature-name>
+		After each iteration, rescale all feature weights such that feature-
+		name has a weight of 1.0.
+
+	--rand-directions <num>
+		MERT will attempt to optimize along all of the principle directions,
+		set this parameter to explore other directions. Defaults to 5.
+
+	--source-file <file>
+		Dev set source file.
+
+	--weights <file>
+		A file specifying initial feature weights.  The format is
+		FeatureName_1 value1
+		FeatureName_2 value2
+		**All and only the weights listed in <file> will be optimized!**
+
+	--workdir <dir>
+		Directory for intermediate and output files.  If not specified, the
+		name is derived from the ini filename.  Assuming that the ini
+		filename begins with the decoder name and ends with ini, the default
+		name of the working directory is inferred from the middle part of
+		the filename.  E.g. an ini file named decoder.foo.ini would have
+		a default working directory name foo.
+
+Job control options:
+
+	--jobs <I>
+		Number of decoder processes to run in parallel. [default=$default_jobs]
+
+	--qsub
+		Use qsub to run jobs in parallel (qsub must be configured in
+		environment/LocalEnvironment.pm)
+
+	--pmem <N>
+		Amount of physical memory requested for parallel decoding jobs
+		(used with qsub requests only)
+
+Help
+}
+
+sub convert {
+  my ($str) = @_;
+  my @ps = split /;/, $str;
+  my %dict = ();
+  for my $p (@ps) {
+    my ($k, $v) = split /=/, $p;
+    $dict{$k} = $v;
+  }
+  return %dict;
+}
+
+
+
+sub cmdline {
+    return join ' ',($0,@ORIG_ARGV);
+}
+
+#buggy: last arg gets quoted sometimes?
+my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]};
+my $shell_escape_in_quote=qr{[\\"\$`!]};
+
+sub escape_shell {
+    my ($arg)=@_;
+    return undef unless defined $arg;
+    if ($arg =~ /$is_shell_special/) {
+        $arg =~ s/($shell_escape_in_quote)/\\$1/g;
+        return "\"$arg\"";
+    }
+    return $arg;
+}
+
+sub escaped_shell_args {
+    return map {local $_=$_;chomp;escape_shell($_)} @_;
+}
+
+sub escaped_shell_args_str {
+    return join ' ',&escaped_shell_args(@_);
+}
+
+sub escaped_cmdline {
+    return "$0 ".&escaped_shell_args_str(@ORIG_ARGV);
+}
diff --git a/dpmert/error_surface.cc b/dpmert/error_surface.cc
new file mode 100644
index 00000000..515b67f8
--- /dev/null
+++ b/dpmert/error_surface.cc
@@ -0,0 +1,42 @@
+#include "error_surface.h"
+
+#include <cassert>
+#include <sstream>
+
+using namespace std;
+
+ErrorSurface::~ErrorSurface() {}
+
+void ErrorSurface::Serialize(std::string* out) const {
+  const int segments = this->size();
+  ostringstream os(ios::binary);
+  os.write((const char*)&segments,sizeof(segments));
+  for (int i = 0; i < segments; ++i) {
+    const ErrorSegment& cur = (*this)[i];
+    string senc;
+    cur.delta.Encode(&senc);
+    assert(senc.size() < 1024);
+    unsigned char len = senc.size();
+    os.write((const char*)&cur.x, sizeof(cur.x));
+    os.write((const char*)&len, sizeof(len));
+    os.write((const char*)&senc[0], len);
+  }
+  *out = os.str();
+}
+
+void ErrorSurface::Deserialize(const std::string& in) {
+  istringstream is(in, ios::binary);
+  int segments;
+  is.read((char*)&segments, sizeof(segments));
+  this->resize(segments);
+  for (int i = 0; i < segments; ++i) {
+    ErrorSegment& cur = (*this)[i];
+    unsigned char len;
+    is.read((char*)&cur.x, sizeof(cur.x));
+    is.read((char*)&len, sizeof(len));
+    string senc(len, '\0'); assert(senc.size() == len);
+    is.read((char*)&senc[0], len);
+    cur.delta = SufficientStats(senc);
+  }
+}
+
diff --git a/dpmert/error_surface.h b/dpmert/error_surface.h
new file mode 100644
index 00000000..bb65847b
--- /dev/null
+++ b/dpmert/error_surface.h
@@ -0,0 +1,24 @@
+#ifndef _ERROR_SURFACE_H_
+#define _ERROR_SURFACE_H_
+
+#include <vector>
+#include <string>
+
+#include "ns.h"
+
+class Score;
+
+struct ErrorSegment {
+  double x;
+  SufficientStats delta;
+  ErrorSegment() : x(0), delta() {}
+};
+
+class ErrorSurface : public std::vector<ErrorSegment> {
+ public:
+  ~ErrorSurface();
+  void Serialize(std::string* out) const;
+  void Deserialize(const std::string& in);
+};
+
+#endif
diff --git a/dpmert/libcall.pl b/dpmert/libcall.pl
new file mode 100644
index 00000000..c7d0f128
--- /dev/null
+++ b/dpmert/libcall.pl
@@ -0,0 +1,71 @@
+use IPC::Open3;
+use Symbol qw(gensym);
+
+$DUMMY_STDERR = gensym();
+$DUMMY_STDIN = gensym();
+
+# Run the command and ignore failures
+sub unchecked_call {
+    system("@_")
+}
+
+# Run the command and return its output, if any ignoring failures
+sub unchecked_output {
+    return `@_`
+}
+
+# WARNING: Do not use this for commands that will return large amounts
+# of stdout or stderr -- they might block indefinitely
+sub check_output {
+    print STDERR "Executing and gathering output: @_\n";
+
+    my $pid = open3($DUMMY_STDIN, \*PH, $DUMMY_STDERR, @_);
+    my $proc_output = "";
+    while( <PH> ) {
+	$proc_output .= $_;
+    }
+    waitpid($pid, 0);
+    # TODO: Grab signal that the process died from
+    my $child_exit_status = $? >> 8;
+    if($child_exit_status == 0) {
+	return $proc_output;
+    } else {
+	print STDERR "ERROR: Execution of @_ failed.\n";
+	exit(1);
+    }
+}
+
+# Based on Moses' safesystem sub
+sub check_call {
+    print STDERR "Executing: @_\n";
+    system(@_);
+    my $exitcode = $? >> 8;
+    if($exitcode == 0) {
+	return 0;
+    } elsif ($? == -1) {
+	print STDERR "ERROR: Failed to execute: @_\n  $!\n";
+	exit(1);
+
+    } elsif ($? & 127) {
+      printf STDERR "ERROR: Execution of: @_\n  died with signal %d, %s coredump\n",
+      ($? & 127),  ($? & 128) ? 'with' : 'without';
+      exit(1);
+
+    } else {
+	print STDERR "Failed with exit code: $exitcode\n" if $exitcode;
+	exit($exitcode);
+    }
+}
+
+sub check_bash_call {
+    my @args = ( "bash", "-auxeo", "pipefail", "-c", "@_");
+    check_call(@args);
+}
+
+sub check_bash_output {
+    my @args = ( "bash", "-auxeo", "pipefail", "-c", "@_");
+    return check_output(@args);
+}
+
+# perl module weirdness...
+return 1;
diff --git a/dpmert/line_mediator.pl b/dpmert/line_mediator.pl
new file mode 100755
index 00000000..bc2bb24c
--- /dev/null
+++ b/dpmert/line_mediator.pl
@@ -0,0 +1,116 @@
+#!/usr/bin/perl -w
+#hooks up two processes, 2nd of which has one line of output per line of input, expected by the first, which starts off the communication
+
+# if you don't know how to fork/exec in a C program, this could be helpful under limited cirmustances (would be ok to liaise with sentserver)
+
+#WARNING: because it waits for the result from command 2 after sending every line, and especially if command 1 does the same, using sentserver as command 2 won't actually buy you any real parallelism.
+
+use strict;
+use IPC::Open2;
+use POSIX qw(pipe dup2 STDIN_FILENO STDOUT_FILENO);
+
+my $quiet=!$ENV{DEBUG};
+$quiet=1 if $ENV{QUIET};
+sub info {
+    local $,=' ';
+    print STDERR @_ unless $quiet;
+}
+
+my $mode='CROSS';
+my $ser='DIRECT';
+$mode='PIPE' if $ENV{PIPE};
+$mode='SNAKE' if $ENV{SNAKE};
+$mode='CROSS' if $ENV{CROSS};
+$ser='SERIAL' if $ENV{SERIAL};
+$ser='DIRECT' if $ENV{DIRECT};
+$ser='SERIAL' if $mode eq 'SNAKE';
+info("mode: $mode\n");
+info("connection: $ser\n");
+
+
+my @c1;
+if (scalar @ARGV) {
+    do {
+        push @c1,shift
+    } while scalar @ARGV && $c1[$#c1] ne '--';
+}
+pop @c1;
+my @c2=@ARGV;
+@ARGV=();
+(scalar @c1 && scalar @c2) || die qq{
+usage: $0 cmd1 args -- cmd2 args
+all options are environment variables.
+DEBUG=1 env var enables debugging output.
+CROSS=1 hooks up two processes, 2nd of which has one line of output per line of input, expected by the first, which starts off the communication.  crosses stdin/stderr of cmd1 and cmd2 line by line (both must flush on newline and output.  cmd1 initiates the conversation (sends the first line).    default: attempts to cross stdin/stdout of c1 and c2 directly (via two unidirectional posix pipes created before fork).
+SERIAL=1: (no parallelism possible) but lines exchanged are logged if DEBUG.
+if SNAKE then stdin -> c1 -> c2 -> c1 -> stdout.
+if PIPE then stdin -> c1 -> c2 -> stdout (same as shell c1|c2, but with SERIAL you can see the intermediate in real time; you could do similar with c1 | tee /dev/fd/2 |c2.
+DIRECT=1 (default) will override SERIAL=1.
+CROSS=1 (default) will override SNAKE or PIPE.
+};
+
+info("1 cmd:",@c1,"\n");
+info("2 cmd:",@c2,"\n");
+
+sub lineto {
+    select $_[0];
+    $|=1;
+    shift;
+    print @_;
+}
+
+if ($ser eq 'SERIAL') {
+    my ($R1,$W1,$R2,$W2);
+    my $c1p=open2($R1,$W1,@c1); # Open2 R W backward from Open3.
+    my $c2p=open2($R2,$W2,@c2);
+    if ($mode eq 'CROSS') {
+        while(<$R1>) {
+            info("1:",$_);
+            lineto($W2,$_);
+            last unless defined ($_=<$R2>);
+            info("1|2:",$_);
+            lineto($W1,$_);
+        }
+    } else {
+        my $snake=$mode eq 'SNAKE';
+        while(<STDIN>) {
+            info("IN:",$_);
+            lineto($W1,$_);
+            last unless defined ($_=<$R1>);
+            info("IN|1:",$_);
+            lineto($W2,$_);
+            last unless defined ($_=<$R2>);
+            info("IN|1|2:",$_);
+            if ($snake) {
+                lineto($W1,$_);
+                last unless defined ($_=<$R1>);
+                info("IN|1|2|1:",$_);
+            }
+            lineto(*STDOUT,$_);
+        }
+    }
+} else {
+    info("DIRECT mode\n");
+    my @rw1=POSIX::pipe();
+    my @rw2=POSIX::pipe();
+    my $pid=undef;
+    $SIG{CHLD} = sub { wait };
+    while (not defined ($pid=fork())) {
+        sleep 1;
+    }
+    my $pipe = $mode eq 'PIPE';
+    unless ($pipe) {
+        POSIX::close(STDOUT_FILENO);
+        POSIX::close(STDIN_FILENO);
+    }
+    if ($pid) {
+        POSIX::dup2($rw1[1],STDOUT_FILENO);
+        POSIX::dup2($rw2[0],STDIN_FILENO) unless $pipe;
+        exec @c1;
+    } else {
+        POSIX::dup2($rw2[1],STDOUT_FILENO) unless $pipe;
+        POSIX::dup2($rw1[0],STDIN_FILENO);
+        exec @c2;
+    }
+    while (wait()!=-1) {}
+}
diff --git a/dpmert/line_optimizer.cc b/dpmert/line_optimizer.cc
new file mode 100644
index 00000000..49443fbe
--- /dev/null
+++ b/dpmert/line_optimizer.cc
@@ -0,0 +1,111 @@
+#include "line_optimizer.h"
+
+#include <limits>
+#include <algorithm>
+
+#include "sparse_vector.h"
+#include "ns.h"
+
+using namespace std;
+
+typedef ErrorSurface::const_iterator ErrorIter;
+
+// sort by increasing x-ints
+struct IntervalComp {
+  bool operator() (const ErrorIter& a, const ErrorIter& b) const {
+    return a->x < b->x;
+  }
+};
+
+double LineOptimizer::LineOptimize(
+    const EvaluationMetric* metric,
+    const vector<ErrorSurface>& surfaces,
+    const LineOptimizer::ScoreType type,
+    float* best_score,
+    const double epsilon) {
+  // cerr << "MIN=" << MINIMIZE_SCORE << " MAX=" << MAXIMIZE_SCORE << "  MINE=" << type << endl;
+  vector<ErrorIter> all_ints;
+  for (vector<ErrorSurface>::const_iterator i = surfaces.begin();
+       i != surfaces.end(); ++i) {
+    const ErrorSurface& surface = *i;
+    for (ErrorIter j = surface.begin(); j != surface.end(); ++j)
+      all_ints.push_back(j);
+  }
+  sort(all_ints.begin(), all_ints.end(), IntervalComp());
+  double last_boundary = all_ints.front()->x;
+  SufficientStats acc;
+  float& cur_best_score = *best_score;
+  cur_best_score = (type == MAXIMIZE_SCORE ?
+    -numeric_limits<float>::max() : numeric_limits<float>::max());
+  bool left_edge = true;
+  double pos = numeric_limits<double>::quiet_NaN();
+  for (vector<ErrorIter>::iterator i = all_ints.begin();
+       i != all_ints.end(); ++i) {
+    const ErrorSegment& seg = **i;
+    if (seg.x - last_boundary > epsilon) {
+      float sco = metric->ComputeScore(acc);
+      if ((type == MAXIMIZE_SCORE && sco > cur_best_score) ||
+          (type == MINIMIZE_SCORE && sco < cur_best_score) ) {
+        cur_best_score = sco;
+	if (left_edge) {
+	  pos = seg.x - 0.1;
+	  left_edge = false;
+	} else {
+	  pos = last_boundary + (seg.x - last_boundary) / 2;
+	}
+	//cerr << "NEW BEST: " << pos << "  (score=" << cur_best_score << ")\n";
+      }
+      // string xx = metric->DetailedScore(acc); cerr << "---- " << xx;
+      // cerr << "---- s=" << sco << "\n";
+      last_boundary = seg.x;
+    }
+    // cerr << "x-boundary=" << seg.x << "\n";
+    //string x2; acc.Encode(&x2); cerr << "   ACC: " << x2 << endl;
+    //string x1; seg.delta.Encode(&x1); cerr << " DELTA: " << x1 << endl;
+    acc += seg.delta;
+  }
+  float sco = metric->ComputeScore(acc);
+  if ((type == MAXIMIZE_SCORE && sco > cur_best_score) ||
+      (type == MINIMIZE_SCORE && sco < cur_best_score) ) {
+    cur_best_score = sco;
+    if (left_edge) {
+      pos = 0;
+    } else {
+      pos = last_boundary + 1000.0;
+    }
+  }
+  return pos;
+}
+
+void LineOptimizer::RandomUnitVector(const vector<int>& features_to_optimize,
+                                     SparseVector<double>* axis,
+                                     RandomNumberGenerator<boost::mt19937>* rng) {
+  axis->clear();
+  for (int i = 0; i < features_to_optimize.size(); ++i)
+    axis->set_value(features_to_optimize[i], rng->NextNormal(0.0,1.0));
+  (*axis) /= axis->l2norm();
+}
+
+void LineOptimizer::CreateOptimizationDirections(
+     const vector<int>& features_to_optimize,
+     int additional_random_directions,
+     RandomNumberGenerator<boost::mt19937>* rng,
+     vector<SparseVector<double> >* dirs
+     , bool include_orthogonal
+  ) {
+  dirs->clear();
+  typedef SparseVector<double> Dir;
+  vector<Dir> &out=*dirs;
+  int i=0;
+  if (include_orthogonal)
+    for (;i<features_to_optimize.size();++i) {
+      Dir d;
+      d.set_value(features_to_optimize[i],1.);
+      out.push_back(d);
+    }
+  out.resize(i+additional_random_directions);
+  for (;i<out.size();++i)
+     RandomUnitVector(features_to_optimize, &out[i], rng);
+  cerr << "Generated " << out.size() << " total axes to optimize along.\n";
+}
+
diff --git a/dpmert/line_optimizer.h b/dpmert/line_optimizer.h
new file mode 100644
index 00000000..83819f41
--- /dev/null
+++ b/dpmert/line_optimizer.h
@@ -0,0 +1,48 @@
+#ifndef LINE_OPTIMIZER_H_
+#define LINE_OPTIMIZER_H_
+
+#include <vector>
+
+#include "sparse_vector.h"
+#include "error_surface.h"
+#include "sampler.h"
+
+class EvaluationMetric;
+class Weights;
+
+struct LineOptimizer {
+
+  // use MINIMIZE_SCORE for things like TER, WER
+  // MAXIMIZE_SCORE for things like BLEU
+  enum ScoreType { MAXIMIZE_SCORE, MINIMIZE_SCORE };
+
+  // merge all the error surfaces together into a global
+  // error surface and find (the middle of) the best segment
+  static double LineOptimize(
+     const EvaluationMetric* metric,
+     const std::vector<ErrorSurface>& envs,
+     const LineOptimizer::ScoreType type,
+     float* best_score,
+     const double epsilon = 1.0/65536.0);
+
+  // return a random vector of length 1 where all dimensions
+  // not listed in dimensions will be 0.
+  static void RandomUnitVector(const std::vector<int>& dimensions,
+                               SparseVector<double>* axis,
+                               RandomNumberGenerator<boost::mt19937>* rng);
+
+  // generate a list of directions to optimize; the list will
+  // contain the orthogonal vectors corresponding to the dimensions in
+  // primary and then additional_random_directions directions in those
+  // dimensions as well.  All vectors will be length 1.
+  static void CreateOptimizationDirections(
+     const std::vector<int>& primary,
+     int additional_random_directions,
+     RandomNumberGenerator<boost::mt19937>* rng,
+     std::vector<SparseVector<double> >* dirs
+     , bool include_primary=true
+    );
+
+};
+
+#endif
diff --git a/dpmert/lo_test.cc b/dpmert/lo_test.cc
new file mode 100644
index 00000000..d9b909b8
--- /dev/null
+++ b/dpmert/lo_test.cc
@@ -0,0 +1,236 @@
+#include <cmath>
+#include <iostream>
+#include <fstream>
+
+#include <boost/shared_ptr.hpp>
+#include <gtest/gtest.h>
+
+#include "ns.h"
+#include "ns_docscorer.h"
+#include "ces.h"
+#include "fdict.h"
+#include "hg.h"
+#include "kbest.h"
+#include "hg_io.h"
+#include "filelib.h"
+#include "inside_outside.h"
+#include "viterbi.h"
+#include "mert_geometry.h"
+#include "line_optimizer.h"
+
+using namespace std;
+using boost::shared_ptr;
+
+class OptTest : public testing::Test {
+ protected:
+   virtual void SetUp() { }
+   virtual void TearDown() { }
+};
+
+const char* ref11 = "australia reopens embassy in manila";
+const char* ref12 = "( afp , manila , january 2 ) australia reopened its embassy in the philippines today , which was shut down about seven weeks ago due to what was described as a specific threat of a terrorist attack .";
+const char* ref21 = "australia reopened manila embassy";
+const char* ref22 = "( agence france-presse , manila , 2nd ) - australia reopened its embassy in the philippines today . the embassy was closed seven weeks ago after what was described as a specific threat of a terrorist attack .";
+const char* ref31 = "australia to reopen embassy in manila";
+const char* ref32 = "( afp report from manila , january 2 ) australia reopened its embassy in the philippines today . seven weeks ago , the embassy was shut down due to so - called confirmed terrorist attack threats .";
+const char* ref41 = "australia to re - open its embassy to manila";
+const char* ref42 = "( afp , manila , thursday ) australia reopens its embassy to manila , which was closed for the so - called \" clear \" threat of terrorist attack 7 weeks ago .";
+
+TEST_F(OptTest, TestCheckNaN) {
+  double x = 0;
+  double y = 0;
+  double z = x / y;
+  EXPECT_EQ(true, isnan(z));
+}
+
+TEST_F(OptTest,TestConvexHull) {
+  shared_ptr<MERTPoint> a1(new MERTPoint(-1, 0));
+  shared_ptr<MERTPoint> b1(new MERTPoint(1, 0));
+  shared_ptr<MERTPoint> a2(new MERTPoint(-1, 1));
+  shared_ptr<MERTPoint> b2(new MERTPoint(1, -1));
+  vector<shared_ptr<MERTPoint> > sa; sa.push_back(a1); sa.push_back(b1);
+  vector<shared_ptr<MERTPoint> > sb; sb.push_back(a2); sb.push_back(b2);
+  ConvexHull a(sa);
+  cerr << a << endl;
+  ConvexHull b(sb);
+  ConvexHull c = a;
+  c *= b;
+  cerr << a << " (*) " << b << " = " << c << endl;
+  EXPECT_EQ(3, c.size());
+}
+
+TEST_F(OptTest,TestConvexHullInside) {
+  const string json = "{\"rules\":[1,\"[X] ||| a\",2,\"[X] ||| A [1]\",3,\"[X] ||| c\",4,\"[X] ||| C [1]\",5,\"[X] ||| [1] B [2]\",6,\"[X] ||| [1] b [2]\",7,\"[X] ||| X [1]\",8,\"[X] ||| Z [1]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.8,1,-0.1],\"rule\":2}],\"node\":{\"in_edges\":[1]},\"edges\":[{\"tail\":[],\"feats\":[1,-1],\"rule\":3}],\"node\":{\"in_edges\":[2]},\"edges\":[{\"tail\":[2],\"feats\":[0,-0.2,1,-0.1],\"rule\":4}],\"node\":{\"in_edges\":[3]},\"edges\":[{\"tail\":[1,3],\"feats\":[0,-1.2,1,-0.2],\"rule\":5},{\"tail\":[1,3],\"feats\":[0,-0.5,1,-1.3],\"rule\":6}],\"node\":{\"in_edges\":[4,5]},\"edges\":[{\"tail\":[4],\"feats\":[0,-0.5,1,-0.8],\"rule\":7},{\"tail\":[4],\"feats\":[0,-0.7,1,-0.9],\"rule\":8}],\"node\":{\"in_edges\":[6,7]}}";
+  Hypergraph hg;
+  istringstream instr(json);
+  HypergraphIO::ReadFromJSON(&instr, &hg);
+  SparseVector<double> wts;
+  wts.set_value(FD::Convert("f1"), 0.4);
+  wts.set_value(FD::Convert("f2"), 1.0);
+  hg.Reweight(wts);
+  vector<pair<vector<WordID>, prob_t> > list;
+  std::vector<SparseVector<double> > features;
+  KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(hg, 10);
+  for (int i = 0; i < 10; ++i) {
+    const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
+      kbest.LazyKthBest(hg.nodes_.size() - 1, i);
+    if (!d) break;
+    cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl;
+  }
+  SparseVector<double> dir; dir.set_value(FD::Convert("f1"), 1.0);
+  ConvexHullWeightFunction wf(wts, dir);
+  ConvexHull env = Inside<ConvexHull, ConvexHullWeightFunction>(hg, NULL, wf);
+  cerr << env << endl;
+  const vector<boost::shared_ptr<MERTPoint> >& segs = env.GetSortedSegs();
+  dir *= segs[1]->x;
+  wts += dir;
+  hg.Reweight(wts);
+  KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest2(hg, 10);
+  for (int i = 0; i < 10; ++i) {
+    const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
+      kbest2.LazyKthBest(hg.nodes_.size() - 1, i);
+    if (!d) break;
+    cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl;
+  }
+  for (int i = 0; i < segs.size(); ++i) {
+    cerr << "seg=" << i << endl;
+    vector<WordID> trans;
+    segs[i]->ConstructTranslation(&trans);
+    cerr << TD::GetString(trans) << endl;
+  }
+}
+
+TEST_F(OptTest, TestS1) {
+  int fPhraseModel_0 = FD::Convert("PhraseModel_0");
+  int fPhraseModel_1 = FD::Convert("PhraseModel_1");
+  int fPhraseModel_2 = FD::Convert("PhraseModel_2");
+  int fLanguageModel = FD::Convert("LanguageModel");
+  int fWordPenalty = FD::Convert("WordPenalty");
+  int fPassThrough = FD::Convert("PassThrough");
+  SparseVector<double> wts;
+  wts.set_value(fWordPenalty, 4.25);
+  wts.set_value(fLanguageModel, -1.1165);
+  wts.set_value(fPhraseModel_0, -0.96);
+  wts.set_value(fPhraseModel_1, -0.65);
+  wts.set_value(fPhraseModel_2, -0.77);
+  wts.set_value(fPassThrough, -10.0);
+
+  vector<int> to_optimize;
+  to_optimize.push_back(fWordPenalty);
+  to_optimize.push_back(fLanguageModel);
+  to_optimize.push_back(fPhraseModel_0);
+  to_optimize.push_back(fPhraseModel_1);
+  to_optimize.push_back(fPhraseModel_2);
+
+  Hypergraph hg;
+  ReadFile rf("./test_data/0.json.gz");
+  HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+  hg.Reweight(wts);
+
+  Hypergraph hg2;
+  ReadFile rf2("./test_data/1.json.gz");
+  HypergraphIO::ReadFromJSON(rf2.stream(), &hg2);
+  hg2.Reweight(wts);
+
+  vector<vector<WordID> > refs1(4);
+  TD::ConvertSentence(ref11, &refs1[0]);
+  TD::ConvertSentence(ref21, &refs1[1]);
+  TD::ConvertSentence(ref31, &refs1[2]);
+  TD::ConvertSentence(ref41, &refs1[3]);
+  vector<vector<WordID> > refs2(4);
+  TD::ConvertSentence(ref12, &refs2[0]);
+  TD::ConvertSentence(ref22, &refs2[1]);
+  TD::ConvertSentence(ref32, &refs2[2]);
+  TD::ConvertSentence(ref42, &refs2[3]);
+  vector<ConvexHull> envs(2);
+
+  RandomNumberGenerator<boost::mt19937> rng;
+
+  vector<SparseVector<double> > axes; // directions to search
+  LineOptimizer::CreateOptimizationDirections(
+     to_optimize,
+     10,
+     &rng,
+     &axes);
+  assert(axes.size() == 10 + to_optimize.size());
+  for (int i = 0; i < axes.size(); ++i)
+    cerr << axes[i] << endl;
+  const SparseVector<double>& axis = axes[0];
+
+  cerr << "Computing Viterbi envelope using inside algorithm...\n";
+  cerr << "axis: " << axis << endl;
+  clock_t t_start=clock();
+  ConvexHullWeightFunction wf(wts, axis);  // wts = starting point, axis = search direction
+  envs[0] = Inside<ConvexHull, ConvexHullWeightFunction>(hg, NULL, wf);
+  envs[1] = Inside<ConvexHull, ConvexHullWeightFunction>(hg2, NULL, wf);
+
+  vector<ErrorSurface> es(2);
+  EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU");
+  boost::shared_ptr<SegmentEvaluator> scorer1 = metric->CreateSegmentEvaluator(refs1);
+  boost::shared_ptr<SegmentEvaluator> scorer2 = metric->CreateSegmentEvaluator(refs2);
+  ComputeErrorSurface(*scorer1, envs[0], &es[0], metric, hg);
+  ComputeErrorSurface(*scorer2, envs[1], &es[1], metric, hg2);
+  cerr << envs[0].size() << " " << envs[1].size() << endl;
+  cerr << es[0].size() << " " << es[1].size() << endl;
+  envs.clear();
+  clock_t t_env=clock();
+  float score;
+  double m = LineOptimizer::LineOptimize(metric,es, LineOptimizer::MAXIMIZE_SCORE, &score);
+  clock_t t_opt=clock();
+  cerr << "line optimizer returned: " << m << " (SCORE=" << score << ")\n";
+  EXPECT_FLOAT_EQ(0.48719698, score);
+  SparseVector<double> res = axis;
+  res *= m;
+  res += wts;
+  cerr << "res: " << res << endl;
+  cerr << "ENVELOPE PROCESSING=" << (static_cast<double>(t_env - t_start) / 1000.0) << endl;
+  cerr << "  LINE OPTIMIZATION=" << (static_cast<double>(t_opt - t_env) / 1000.0) << endl;
+  hg.Reweight(res);
+  hg2.Reweight(res);
+  vector<WordID> t1,t2;
+  ViterbiESentence(hg, &t1);
+  ViterbiESentence(hg2, &t2);
+  cerr << TD::GetString(t1) << endl;
+  cerr << TD::GetString(t2) << endl;
+}
+
+TEST_F(OptTest,TestZeroOrigin) {
+  const string json = "{\"rules\":[1,\"[X7] ||| blA ||| without ||| LHSProb=3.92173 LexE2F=2.90799 LexF2E=1.85003 GenerativeProb=10.5381 RulePenalty=1 XFE=2.77259 XEF=0.441833 LabelledEF=2.63906 LabelledFE=4.96981 LogRuleCount=0.693147\",2,\"[X7] ||| blA ||| except ||| LHSProb=4.92173 LexE2F=3.90799 LexF2E=1.85003 GenerativeProb=11.5381 RulePenalty=1 XFE=2.77259 XEF=1.44183 LabelledEF=2.63906 LabelledFE=4.96981 LogRuleCount=1.69315\",3,\"[S] ||| [X7,1] ||| [1] ||| GlueTop=1\",4,\"[X28] ||| EnwAn ||| title ||| LHSProb=3.96802 LexE2F=2.22462 LexF2E=1.83258 GenerativeProb=10.0863 RulePenalty=1 XFE=0 XEF=1.20397 LabelledEF=1.20397 LabelledFE=-1.98341e-08 LogRuleCount=1.09861\",5,\"[X0] ||| EnwAn ||| funny ||| LHSProb=3.98479 LexE2F=1.79176 LexF2E=3.21888 GenerativeProb=11.1681 RulePenalty=1 XFE=0 XEF=2.30259 LabelledEF=2.30259 LabelledFE=0 LogRuleCount=0 SingletonRule=1\",6,\"[X8] ||| [X7,1] EnwAn ||| entitled [1] ||| LHSProb=3.82533 LexE2F=3.21888 LexF2E=2.52573 GenerativeProb=11.3276 RulePenalty=1 XFE=1.20397 XEF=1.20397 LabelledEF=2.30259 LabelledFE=2.30259 LogRuleCount=0 SingletonRule=1\",7,\"[S] ||| [S,1] [X28,2] ||| [1] [2] ||| Glue=1\",8,\"[S] ||| [S,1] [X0,2] ||| [1] [2] ||| Glue=1\",9,\"[S] ||| [X8,1] ||| [1] ||| GlueTop=1\",10,\"[Goal] ||| [S,1] ||| [1]\"],\"features\":[\"PassThrough\",\"Glue\",\"GlueTop\",\"LanguageModel\",\"WordPenalty\",\"LHSProb\",\"LexE2F\",\"LexF2E\",\"GenerativeProb\",\"RulePenalty\",\"XFE\",\"XEF\",\"LabelledEF\",\"LabelledFE\",\"LogRuleCount\",\"SingletonRule\"],\"edges\":[{\"tail\":[],\"spans\":[0,1,-1,-1],\"feats\":[5,3.92173,6,2.90799,7,1.85003,8,10.5381,9,1,10,2.77259,11,0.441833,12,2.63906,13,4.96981,14,0.693147],\"rule\":1},{\"tail\":[],\"spans\":[0,1,-1,-1],\"feats\":[5,4.92173,6,3.90799,7,1.85003,8,11.5381,9,1,10,2.77259,11,1.44183,12,2.63906,13,4.96981,14,1.69315],\"rule\":2}],\"node\":{\"in_edges\":[0,1],\"cat\":\"X7\"},\"edges\":[{\"tail\":[0],\"spans\":[0,1,-1,-1],\"feats\":[2,1],\"rule\":3}],\"node\":{\"in_edges\":[2],\"cat\":\"S\"},\"edges\":[{\"tail\":[],\"spans\":[1,2,-1,-1],\"feats\":[5,3.96802,6,2.22462,7,1.83258,8,10.0863,9,1,11,1.20397,12,1.20397,13,-1.98341e-08,14,1.09861],\"rule\":4}],\"node\":{\"in_edges\":[3],\"cat\":\"X28\"},\"edges\":[{\"tail\":[],\"spans\":[1,2,-1,-1],\"feats\":[5,3.98479,6,1.79176,7,3.21888,8,11.1681,9,1,11,2.30259,12,2.30259,15,1],\"rule\":5}],\"node\":{\"in_edges\":[4],\"cat\":\"X0\"},\"edges\":[{\"tail\":[0],\"spans\":[0,2,-1,-1],\"feats\":[5,3.82533,6,3.21888,7,2.52573,8,11.3276,9,1,10,1.20397,11,1.20397,12,2.30259,13,2.30259,15,1],\"rule\":6}],\"node\":{\"in_edges\":[5],\"cat\":\"X8\"},\"edges\":[{\"tail\":[1,2],\"spans\":[0,2,-1,-1],\"feats\":[1,1],\"rule\":7},{\"tail\":[1,3],\"spans\":[0,2,-1,-1],\"feats\":[1,1],\"rule\":8},{\"tail\":[4],\"spans\":[0,2,-1,-1],\"feats\":[2,1],\"rule\":9}],\"node\":{\"in_edges\":[6,7,8],\"cat\":\"S\"},\"edges\":[{\"tail\":[5],\"spans\":[0,2,-1,-1],\"feats\":[],\"rule\":10}],\"node\":{\"in_edges\":[9],\"cat\":\"Goal\"}}";
+  Hypergraph hg;
+  istringstream instr(json);
+  HypergraphIO::ReadFromJSON(&instr, &hg);
+  SparseVector<double> wts;
+  wts.set_value(FD::Convert("PassThrough"), -0.929201533002898);
+  hg.Reweight(wts);
+
+  vector<pair<vector<WordID>, prob_t> > list;
+  std::vector<SparseVector<double> > features;
+  KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(hg, 10);
+  for (int i = 0; i < 10; ++i) {
+    const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
+      kbest.LazyKthBest(hg.nodes_.size() - 1, i);
+    if (!d) break;
+    cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl;
+  }
+ 
+  SparseVector<double> axis; axis.set_value(FD::Convert("Glue"),1.0);
+  ConvexHullWeightFunction wf(wts, axis);  // wts = starting point, axis = search direction
+  vector<ConvexHull> envs(1);
+  envs[0] = Inside<ConvexHull, ConvexHullWeightFunction>(hg, NULL, wf);
+
+  vector<vector<WordID> > mr(4);
+  TD::ConvertSentence("untitled", &mr[0]);
+  TD::ConvertSentence("with no title", &mr[1]);
+  TD::ConvertSentence("without a title", &mr[2]);
+  TD::ConvertSentence("without title", &mr[3]);
+  EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU");
+  boost::shared_ptr<SegmentEvaluator> scorer1 = metric->CreateSegmentEvaluator(mr);
+  vector<ErrorSurface> es(1);
+  ComputeErrorSurface(*scorer1, envs[0], &es[0], metric, hg);
+}
+
+int main(int argc, char **argv) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
+
diff --git a/dpmert/mert_geometry.cc b/dpmert/mert_geometry.cc
new file mode 100644
index 00000000..81b25af9
--- /dev/null
+++ b/dpmert/mert_geometry.cc
@@ -0,0 +1,186 @@
+#include "mert_geometry.h"
+
+#include <cassert>
+#include <limits>
+
+using namespace std;
+using boost::shared_ptr;
+
+ConvexHull::ConvexHull(int i) {
+  if (i == 0) {
+    // do nothing - <>
+  } else if (i == 1) {
+    points.push_back(shared_ptr<MERTPoint>(new MERTPoint(0, 0, 0, shared_ptr<MERTPoint>(), shared_ptr<MERTPoint>())));
+    assert(this->IsMultiplicativeIdentity());
+  } else {
+    cerr << "Only can create ConvexHull semiring 0 and 1 with this constructor!\n";
+    abort();
+  }
+}
+
+const ConvexHull ConvexHullWeightFunction::operator()(const Hypergraph::Edge& e) const {
+  const double m = direction.dot(e.feature_values_);
+  const double b = origin.dot(e.feature_values_);
+  MERTPoint* point = new MERTPoint(m, b, e);
+  return ConvexHull(1, point);
+}
+
+ostream& operator<<(ostream& os, const ConvexHull& env) {
+  os << '<';
+  const vector<shared_ptr<MERTPoint> >& points = env.GetSortedSegs();
+  for (int i = 0; i < points.size(); ++i)
+    os << (i==0 ? "" : "|") << "x=" << points[i]->x << ",b=" << points[i]->b << ",m=" << points[i]->m << ",p1=" << points[i]->p1 << ",p2=" << points[i]->p2;
+  return os << '>';
+}
+
+#define ORIGINAL_MERT_IMPLEMENTATION 1
+#ifdef ORIGINAL_MERT_IMPLEMENTATION
+
+struct SlopeCompare {
+  bool operator() (const shared_ptr<MERTPoint>& a, const shared_ptr<MERTPoint>& b) const {
+    return a->m < b->m;
+  }
+};
+
+const ConvexHull& ConvexHull::operator+=(const ConvexHull& other) {
+  if (!other.is_sorted) other.Sort();
+  if (points.empty()) {
+    points = other.points;
+    return *this;
+  }
+  is_sorted = false;
+  int j = points.size();
+  points.resize(points.size() + other.points.size());
+  for (int i = 0; i < other.points.size(); ++i)
+    points[j++] = other.points[i];
+  assert(j == points.size());
+  return *this;
+}
+
+void ConvexHull::Sort() const {
+  sort(points.begin(), points.end(), SlopeCompare());
+  const int k = points.size();
+  int j = 0;
+  for (int i = 0; i < k; ++i) {
+    MERTPoint l = *points[i];
+    l.x = kMinusInfinity;
+    // cerr << "m=" << l.m << endl;
+    if (0 < j) {
+      if (points[j-1]->m == l.m) {   // lines are parallel
+        if (l.b <= points[j-1]->b) continue;
+        --j;
+      }
+      while(0 < j) {
+        l.x = (l.b - points[j-1]->b) / (points[j-1]->m - l.m);
+        if (points[j-1]->x < l.x) break;
+        --j;
+      }
+      if (0 == j) l.x = kMinusInfinity;
+    }
+    *points[j++] = l;
+  }
+  points.resize(j);
+  is_sorted = true;
+}
+
+const ConvexHull& ConvexHull::operator*=(const ConvexHull& other) {
+  if (other.IsMultiplicativeIdentity()) { return *this; }
+  if (this->IsMultiplicativeIdentity()) { (*this) = other; return *this; }
+
+  if (!is_sorted) Sort();
+  if (!other.is_sorted) other.Sort();
+
+  if (this->IsEdgeEnvelope()) {
+//    if (other.size() > 1)
+//      cerr << *this << " (TIMES) " << other << endl;
+    shared_ptr<MERTPoint> edge_parent = points[0];
+    const double& edge_b = edge_parent->b;
+    const double& edge_m = edge_parent->m;
+    points.clear();
+    for (int i = 0; i < other.points.size(); ++i) {
+      const MERTPoint& p = *other.points[i];
+      const double m = p.m + edge_m;
+      const double b = p.b + edge_b;
+      const double& x = p.x;       // x's don't change with *
+      points.push_back(shared_ptr<MERTPoint>(new MERTPoint(x, m, b, edge_parent, other.points[i])));
+      assert(points.back()->p1->edge);
+    }
+//    if (other.size() > 1)
+//      cerr << " = " << *this << endl;
+  } else {
+    vector<shared_ptr<MERTPoint> > new_points;
+    int this_i = 0;
+    int other_i = 0;
+    const int this_size  = points.size();
+    const int other_size = other.points.size();
+    double cur_x = kMinusInfinity;   // moves from left to right across the
+                                     // real numbers, stopping for all inter-
+                                     // sections
+    double this_next_val  = (1 < this_size  ? points[1]->x       : kPlusInfinity);
+    double other_next_val = (1 < other_size ? other.points[1]->x : kPlusInfinity);
+    while (this_i < this_size && other_i < other_size) {
+      const MERTPoint& this_point = *points[this_i];
+      const MERTPoint& other_point= *other.points[other_i];
+      const double m = this_point.m + other_point.m;
+      const double b = this_point.b + other_point.b;
+ 
+      new_points.push_back(shared_ptr<MERTPoint>(new MERTPoint(cur_x, m, b, points[this_i], other.points[other_i])));
+      int comp = 0;
+      if (this_next_val < other_next_val) comp = -1; else
+        if (this_next_val > other_next_val) comp = 1;
+      if (0 == comp) {  // the next values are equal, advance both indices
+        ++this_i;
+	++other_i;
+        cur_x = this_next_val;  // could be other_next_val (they're equal!)
+        this_next_val  = (this_i+1  < this_size  ? points[this_i+1]->x        : kPlusInfinity);
+        other_next_val = (other_i+1 < other_size ? other.points[other_i+1]->x : kPlusInfinity);
+      } else {  // advance the i with the lower x, update cur_x
+        if (-1 == comp) {
+          ++this_i;
+          cur_x = this_next_val;
+          this_next_val =  (this_i+1  < this_size  ? points[this_i+1]->x        : kPlusInfinity);
+        } else {
+          ++other_i;
+          cur_x = other_next_val;
+          other_next_val = (other_i+1 < other_size ? other.points[other_i+1]->x : kPlusInfinity);
+        }
+      }
+    }
+    points.swap(new_points);
+  }
+  //cerr << "Multiply: result=" << (*this) << endl;
+  return *this;
+}
+
+// recursively construct translation
+void MERTPoint::ConstructTranslation(vector<WordID>* trans) const {
+  const MERTPoint* cur = this;
+  vector<vector<WordID> > ant_trans;
+  while(!cur->edge) {
+    ant_trans.resize(ant_trans.size() + 1);
+    cur->p2->ConstructTranslation(&ant_trans.back());
+    cur = cur->p1.get();
+  }
+  size_t ant_size = ant_trans.size();
+  vector<const vector<WordID>*> pants(ant_size);
+  assert(ant_size == cur->edge->tail_nodes_.size());
+  --ant_size;
+  for (int i = 0; i < pants.size(); ++i) pants[ant_size - i] = &ant_trans[i];
+  cur->edge->rule_->ESubstitute(pants, trans);
+}
+
+void MERTPoint::CollectEdgesUsed(std::vector<bool>* edges_used) const {
+  if (edge) {
+    assert(edge->id_ < edges_used->size());
+    (*edges_used)[edge->id_] = true;
+  }
+  if (p1) p1->CollectEdgesUsed(edges_used);
+  if (p2) p2->CollectEdgesUsed(edges_used);
+}
+
+#else
+
+// THIS IS THE NEW FASTER IMPLEMENTATION OF THE MERT SEMIRING OPERATIONS
+
+#endif
+
diff --git a/dpmert/mert_geometry.h b/dpmert/mert_geometry.h
new file mode 100644
index 00000000..a8b6959e
--- /dev/null
+++ b/dpmert/mert_geometry.h
@@ -0,0 +1,81 @@
+#ifndef _MERT_GEOMETRY_H_
+#define _MERT_GEOMETRY_H_
+
+#include <vector>
+#include <iostream>
+#include <boost/shared_ptr.hpp>
+
+#include "hg.h"
+#include "sparse_vector.h"
+
+static const double kMinusInfinity = -std::numeric_limits<double>::infinity();
+static const double kPlusInfinity = std::numeric_limits<double>::infinity();
+
+struct MERTPoint {
+  MERTPoint() : x(), m(), b(), edge() {}
+  MERTPoint(double _m, double _b) :
+    x(kMinusInfinity), m(_m), b(_b), edge() {}
+  MERTPoint(double _x, double _m, double _b, const boost::shared_ptr<MERTPoint>& p1_, const boost::shared_ptr<MERTPoint>& p2_) :
+    x(_x), m(_m), b(_b), p1(p1_), p2(p2_), edge() {}
+  MERTPoint(double _m, double _b, const Hypergraph::Edge& edge) :
+    x(kMinusInfinity), m(_m), b(_b), edge(&edge) {}
+
+  double x;                   // x intersection with previous segment in env, or -inf if none
+  double m;                   // this line's slope
+  double b;                   // intercept with y-axis
+
+  // we keep a pointer to the "parents" of this segment so we can reconstruct
+  // the Viterbi translation corresponding to this segment
+  boost::shared_ptr<MERTPoint> p1;
+  boost::shared_ptr<MERTPoint> p2;
+
+  // only MERTPoints created from an edge using the ConvexHullWeightFunction
+  // have rules
+  // TRulePtr rule;
+  const Hypergraph::Edge* edge;
+
+  // recursively recover the Viterbi translation that will result from setting
+  // the weights to origin + axis * x, where x is any value from this->x up
+  // until the next largest x in the containing ConvexHull
+  void ConstructTranslation(std::vector<WordID>* trans) const;
+  void CollectEdgesUsed(std::vector<bool>* edges_used) const;
+};
+
+// this is the semiring value type,
+// it defines constructors for 0, 1, and the operations + and *
+struct ConvexHull {
+  // create semiring zero
+  ConvexHull() : is_sorted(true) {}  // zero
+  // for debugging:
+  ConvexHull(const std::vector<boost::shared_ptr<MERTPoint> >& s) : points(s) { Sort(); }
+  // create semiring 1 or 0
+  explicit ConvexHull(int i);
+  ConvexHull(int n, MERTPoint* point) : is_sorted(true), points(n, boost::shared_ptr<MERTPoint>(point)) {}
+  const ConvexHull& operator+=(const ConvexHull& other);
+  const ConvexHull& operator*=(const ConvexHull& other);
+  bool IsMultiplicativeIdentity() const {
+    return size() == 1 && (points[0]->b == 0.0 && points[0]->m == 0.0) && (!points[0]->edge) && (!points[0]->p1) && (!points[0]->p2); }
+  const std::vector<boost::shared_ptr<MERTPoint> >& GetSortedSegs() const {
+    if (!is_sorted) Sort();
+    return points;
+  }
+  size_t size() const { return points.size(); }
+
+ private:
+  bool IsEdgeEnvelope() const {
+    return points.size() == 1 && points[0]->edge; }
+  void Sort() const;
+  mutable bool is_sorted;
+  mutable std::vector<boost::shared_ptr<MERTPoint> > points;
+};
+std::ostream& operator<<(std::ostream& os, const ConvexHull& env);
+
+struct ConvexHullWeightFunction {
+  ConvexHullWeightFunction(const SparseVector<double>& ori,
+                           const SparseVector<double>& dir) : origin(ori), direction(dir) {}
+  const ConvexHull operator()(const Hypergraph::Edge& e) const;
+  const SparseVector<double> origin;
+  const SparseVector<double> direction;
+};
+
+#endif
diff --git a/dpmert/mr_dpmert_generate_mapper_input.cc b/dpmert/mr_dpmert_generate_mapper_input.cc
new file mode 100644
index 00000000..59d4f24f
--- /dev/null
+++ b/dpmert/mr_dpmert_generate_mapper_input.cc
@@ -0,0 +1,78 @@
+#include <iostream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "filelib.h"
+#include "weights.h"
+#include "line_optimizer.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("dev_set_size,s",po::value<unsigned>(),"[REQD] Development set size (# of parallel sentences)")
+        ("forest_repository,r",po::value<string>(),"[REQD] Path to forest repository")
+        ("weights,w",po::value<string>(),"[REQD] Current feature weights file")
+        ("optimize_feature,o",po::value<vector<string> >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)")
+        ("random_directions,d",po::value<unsigned int>()->default_value(20),"Number of random directions to run the line optimizer in")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = false;
+  if (conf->count("dev_set_size") == 0) {
+    cerr << "Please specify the size of the development set using -d N\n";
+    flag = true;
+  }
+  if (conf->count("weights") == 0) {
+    cerr << "Please specify the starting-point weights using -w <weightfile.txt>\n";
+    flag = true;
+  }
+  if (conf->count("forest_repository") == 0) {
+    cerr << "Please specify the forest repository location using -r <DIR>\n";
+    flag = true;
+  }
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+int main(int argc, char** argv) {
+  RandomNumberGenerator<boost::mt19937> rng;
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  vector<string> features;
+  SparseVector<weight_t> origin;
+  vector<weight_t> w;
+  Weights::InitFromFile(conf["weights"].as<string>(), &w, &features);
+  Weights::InitSparseVector(w, &origin);
+  const string forest_repository = conf["forest_repository"].as<string>();
+  assert(DirectoryExists(forest_repository));
+  if (conf.count("optimize_feature") > 0)
+    features=conf["optimize_feature"].as<vector<string> >();
+  vector<SparseVector<weight_t> > directions;
+  vector<int> fids(features.size());
+  for (int i = 0; i < features.size(); ++i)
+    fids[i] = FD::Convert(features[i]);
+  LineOptimizer::CreateOptimizationDirections(
+     fids,
+     conf["random_directions"].as<unsigned int>(),
+     &rng,
+     &directions);
+  unsigned dev_set_size = conf["dev_set_size"].as<unsigned>();
+  for (unsigned i = 0; i < dev_set_size; ++i) {
+    for (unsigned j = 0; j < directions.size(); ++j) {
+      cout << forest_repository << '/' << i << ".json.gz " << i << ' ';
+      print(cout, origin, "=", ";");
+      cout << ' ';
+      print(cout, directions[j], "=", ";");
+      cout << endl;
+    }
+  }
+  return 0;
+}
diff --git a/dpmert/mr_dpmert_map.cc b/dpmert/mr_dpmert_map.cc
new file mode 100644
index 00000000..f3304f0f
--- /dev/null
+++ b/dpmert/mr_dpmert_map.cc
@@ -0,0 +1,112 @@
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "ns.h"
+#include "ns_docscorer.h"
+#include "ces.h"
+#include "filelib.h"
+#include "stringlib.h"
+#include "sparse_vector.h"
+#include "mert_geometry.h"
+#include "inside_outside.h"
+#include "error_surface.h"
+#include "b64tools.h"
+#include "hg_io.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation (tokenized text)")
+        ("source,s",po::value<string>(), "Source file (ignored, except for AER)")
+        ("evaluation_metric,m",po::value<string>()->default_value("ibm_bleu"), "Evaluation metric being optimized")
+        ("input,i",po::value<string>()->default_value("-"), "Input file to map (- is STDIN)")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = false;
+  if (!conf->count("reference")) {
+    cerr << "Please specify one or more references using -r <REF.TXT>\n";
+    flag = true;
+  }
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+bool ReadSparseVectorString(const string& s, SparseVector<double>* v) {
+#if 0
+  // this should work, but untested.
+  std::istringstream i(s);
+  i>>*v;
+#else
+  vector<string> fields;
+  Tokenize(s, ';', &fields);
+  if (fields.empty()) return false;
+  for (int i = 0; i < fields.size(); ++i) {
+    vector<string> pair(2);
+    Tokenize(fields[i], '=', &pair);
+    if (pair.size() != 2) {
+      cerr << "Error parsing vector string: " << fields[i] << endl;
+      return false;
+    }
+    v->set_value(FD::Convert(pair[0]), atof(pair[1].c_str()));
+  }
+  return true;
+#endif
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  const string evaluation_metric = conf["evaluation_metric"].as<string>();
+  EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric);
+  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
+  cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl;
+  Hypergraph hg;
+  string last_file;
+  ReadFile in_read(conf["input"].as<string>());
+  istream &in=*in_read.stream();
+  while(in) {
+    string line;
+    getline(in, line);
+    if (line.empty()) continue;
+    istringstream is(line);
+    int sent_id;
+    string file, s_origin, s_direction;
+    // path-to-file (JSON) sent_ed starting-point search-direction
+    is >> file >> sent_id >> s_origin >> s_direction;
+    SparseVector<double> origin;
+    ReadSparseVectorString(s_origin, &origin);
+    SparseVector<double> direction;
+    ReadSparseVectorString(s_direction, &direction);
+    // cerr << "File: " << file << "\nDir: " << direction << "\n   X: " << origin << endl;
+    if (last_file != file) {
+      last_file = file;
+      ReadFile rf(file);
+      HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+    }
+    const ConvexHullWeightFunction wf(origin, direction);
+    const ConvexHull hull = Inside<ConvexHull, ConvexHullWeightFunction>(hg, NULL, wf);
+
+    ErrorSurface es;
+    ComputeErrorSurface(*ds[sent_id], hull, &es, metric, hg);
+    //cerr << "Viterbi envelope has " << ve.size() << " segments\n";
+    // cerr << "Error surface has " << es.size() << " segments\n";
+    string val;
+    es.Serialize(&val);
+    cout << 'M' << ' ' << s_origin << ' ' << s_direction << '\t';
+    B64::b64encode(val.c_str(), val.size(), &cout);
+    cout << endl << flush;
+  }
+  return 0;
+}
diff --git a/dpmert/mr_dpmert_reduce.cc b/dpmert/mr_dpmert_reduce.cc
new file mode 100644
index 00000000..dda61f88
--- /dev/null
+++ b/dpmert/mr_dpmert_reduce.cc
@@ -0,0 +1,77 @@
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "sparse_vector.h"
+#include "error_surface.h"
+#include "line_optimizer.h"
+#include "b64tools.h"
+#include "stringlib.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("evaluation_metric,m",po::value<string>(), "Evaluation metric (IBM_BLEU, etc.)")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = conf->count("evaluation_metric") == 0;
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  const string evaluation_metric = conf["evaluation_metric"].as<string>();
+  LineOptimizer::ScoreType opt_type = LineOptimizer::MAXIMIZE_SCORE;
+  if (UppercaseString(evaluation_metric) == "TER")
+    opt_type = LineOptimizer::MINIMIZE_SCORE;
+  EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric);
+
+  vector<ErrorSurface> esv;
+  string last_key, line, key, val;
+  while(getline(cin, line)) {
+    size_t ks = line.find("\t");
+    assert(string::npos != ks);
+    assert(ks > 2);
+    key = line.substr(2, ks - 2);
+    val = line.substr(ks + 1);
+    if (key != last_key) {
+      if (!last_key.empty()) {
+	float score;
+        double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score);
+	cout << last_key << "|" << x << "|" << score << endl;
+      }
+      last_key.swap(key);
+      esv.clear();
+    }
+    if (val.size() % 4 != 0) {
+      cerr << "B64 encoding error 1! Skipping.\n";
+      continue;
+    }
+    string encoded(val.size() / 4 * 3, '\0');
+    if (!B64::b64decode(reinterpret_cast<const unsigned char*>(&val[0]), val.size(), &encoded[0], encoded.size())) {
+      cerr << "B64 encoding error 2! Skipping.\n";
+      continue;
+    }
+    esv.push_back(ErrorSurface());
+    esv.back().Deserialize(encoded);
+  }
+  if (!esv.empty()) {
+    float score;
+    double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score);
+    cout << last_key << "|" << x << "|" << score << endl;
+  }
+  return 0;
+}
diff --git a/dpmert/parallelize.pl b/dpmert/parallelize.pl
new file mode 100755
index 00000000..7d0365cc
--- /dev/null
+++ b/dpmert/parallelize.pl
@@ -0,0 +1,423 @@
+#!/usr/bin/env perl
+
+# Author: Adam Lopez
+#
+# This script takes a command that processes input
+# from stdin one-line-at-time, and parallelizes it
+# on the cluster using David Chiang's sentserver/
+# sentclient architecture.
+#
+# Prerequisites: the command *must* read each line
+# without waiting for subsequent lines of input
+# (for instance, a command which must read all lines
+# of input before processing will not work) and
+# return it to the output *without* buffering
+# multiple lines.
+
+#TODO: if -j 1, run immediately, not via sentserver?  possible differences in environment might make debugging harder
+
+#ANNOYANCE: if input is shorter than -j n lines, or at the very last few lines, repeatedly sleeps.  time cut down to 15s from 60s
+
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; }
+use LocalConfig;
+
+use Cwd qw/ abs_path cwd getcwd /; 
+use File::Temp qw/ tempfile /;
+use Getopt::Long;
+use IPC::Open2;
+use strict;
+use POSIX ":sys_wait_h";
+
+use File::Basename;
+my $myDir = dirname(__FILE__);
+print STDERR __FILE__." -> $myDir\n";
+push(@INC, $myDir);
+require "libcall.pl";
+
+my $tailn=5; # +0 = concatenate all the client logs.  5 = last 5 lines
+my $recycle_clients;    # spawn new clients when previous ones terminate
+my $stay_alive;      # dont let server die when having zero clients
+my $joblist = "";
+my $errordir="";
+my $multiline;
+my @files_to_stage;
+my $numnodes = 8;
+my $user = $ENV{"USER"};
+my $pmem = "9g";
+my $basep=50300;
+my $randp=300;
+my $tryp=50;
+my $no_which;
+my $no_cd;
+
+my $DEBUG=$ENV{DEBUG};
+print STDERR "DEBUG=$DEBUG output enabled.\n" if $DEBUG;
+my $verbose = 1;
+sub verbose {
+    if ($verbose) {
+        print STDERR @_,"\n";
+    }
+}
+sub debug {
+    if ($DEBUG) {
+        my ($package, $filename, $line) = caller;
+        print STDERR "DEBUG: $filename($line): ",join(' ',@_),"\n";
+    }
+}
+my $is_shell_special=qr.[ \t\n\\><|&;"'`~*?{}$!()].;
+my $shell_escape_in_quote=qr.[\\"\$`!].;
+sub escape_shell {
+    my ($arg)=@_;
+    return undef unless defined $arg;
+    return '""' unless $arg;
+    if ($arg =~ /$is_shell_special/) {
+        $arg =~ s/($shell_escape_in_quote)/\\$1/g;
+        return "\"$arg\"";
+    }
+    return $arg;
+}
+sub preview_files {
+    my ($l,$skipempty,$footer,$n)=@_;
+    $n=$tailn unless defined $n;
+    my @f=grep { ! ($skipempty && -z $_) } @$l;
+    my $fn=join(' ',map {escape_shell($_)} @f);
+    my $cmd="tail -n $n $fn";
+    unchecked_output("$cmd").($footer?"\nNONEMPTY FILES:\n$fn\n":"");
+}
+sub prefix_dirname($) {
+    #like `dirname but if ends in / then return the whole thing
+    local ($_)=@_;
+    if (/\/$/) {
+        $_;
+    } else {
+        s#/[^/]$##;
+        $_ ? $_ : '';
+    }
+}
+sub ensure_final_slash($) {
+    local ($_)=@_;
+    m#/$# ? $_ : ($_."/");
+}
+sub extend_path($$;$$) {
+    my ($base,$ext,$mkdir,$baseisdir)=@_;
+    if (-d $base) {
+        $base.="/";
+    } else {
+        my $dir;
+        if ($baseisdir) {
+            $dir=$base;
+            $base.='/' unless $base =~ /\/$/;
+        } else {
+            $dir=prefix_dirname($base);
+        }
+        my @cmd=("/bin/mkdir","-p",$dir);
+        check_call(@cmd) if $mkdir;
+    }
+    return $base.$ext;
+}
+
+my $abscwd=abs_path(&getcwd);
+sub print_help;
+
+my $use_fork;
+my @pids;
+
+# Process command-line options
+unless (GetOptions(
+      "stay-alive" => \$stay_alive,
+      "recycle-clients" => \$recycle_clients,
+      "error-dir=s" => \$errordir,
+      "multi-line" => \$multiline,
+      "file=s" => \@files_to_stage,
+      "use-fork" => \$use_fork,
+      "verbose" => \$verbose,
+      "jobs=i" => \$numnodes,
+      "pmem=s" => \$pmem,
+        "baseport=i" => \$basep,
+#       "iport=i" => \$randp, #for short name -i
+        "no-which!" => \$no_which,
+            "no-cd!" => \$no_cd,
+            "tailn=s" => \$tailn,
+) && scalar @ARGV){
+  print_help();
+    die "bad options.";
+}
+
+my $cmd = "";
+my $prog=shift;
+if ($no_which) {
+    $cmd=$prog;
+} else {
+    $cmd=check_output("which $prog");
+    chomp $cmd;
+    die "$prog not found - $cmd" unless $cmd;
+}
+#$cmd=abs_path($cmd);
+for my $arg (@ARGV) {
+    $cmd .= " ".escape_shell($arg);
+}
+die "Please specify a command to parallelize\n" if $cmd eq '';
+
+my $cdcmd=$no_cd ? '' : ("cd ".escape_shell($abscwd)."\n");
+
+my $executable = $cmd;
+$executable =~ s/^\s*(\S+)($|\s.*)/$1/;
+$executable=check_output("basename $executable");
+chomp $executable;
+
+
+print STDERR "Parallelizing ($numnodes ways): $cmd\n\n";
+
+# create -e dir and save .sh
+use File::Temp qw/tempdir/;
+unless ($errordir) {
+    $errordir=tempdir("$executable.XXXXXX",CLEANUP=>1);
+}
+if ($errordir) {
+    my $scriptfile=extend_path("$errordir/","$executable.sh",1,1);
+    -d $errordir || die "should have created -e dir $errordir";
+    open SF,">",$scriptfile || die;
+    print SF "$cdcmd$cmd\n";
+    close SF;
+    chmod 0755,$scriptfile;
+    $errordir=abs_path($errordir);
+    &verbose("-e dir: $errordir");
+}
+
+# set cleanup handler
+my @cleanup_cmds;
+sub cleanup;
+sub cleanup_and_die;
+$SIG{INT} = "cleanup_and_die";
+$SIG{TERM} = "cleanup_and_die";
+$SIG{HUP} = "cleanup_and_die";
+
+# other subs:
+sub numof_live_jobs;
+sub launch_job_on_node;
+
+
+# vars
+my $mydir = check_output("dirname $0"); chomp $mydir;
+my $sentserver = "$mydir/sentserver";
+my $sentclient = "$mydir/sentclient";
+my $host = check_output("hostname");
+chomp $host;
+
+
+# find open port
+srand;
+my $port = 50300+int(rand($randp));
+my $endp=$port+$tryp;
+sub listening_port_lines {
+    my $quiet=$verbose?'':'2>/dev/null';
+    return unchecked_output("netstat -a -n $quiet | grep LISTENING | grep -i tcp");
+}
+my $netstat=&listening_port_lines;
+
+if ($verbose){ print STDERR "Testing port $port...";}
+
+while ($netstat=~/$port/ || &listening_port_lines=~/$port/){
+  if ($verbose){ print STDERR "port is busy\n";}
+  $port++;
+  if ($port > $endp){
+    die "Unable to find open port\n";
+  }
+  if ($verbose){ print STDERR "Testing port $port... "; }
+}
+if ($verbose){
+  print STDERR "port $port is available\n";
+}
+
+my $key = int(rand()*1000000);
+
+my $multiflag = "";
+if ($multiline){ $multiflag = "-m"; print STDERR "expecting multiline output.\n"; }
+my $stay_alive_flag = "";
+if ($stay_alive){ $stay_alive_flag = "--stay-alive"; print STDERR "staying alive while no clients are connected.\n"; }
+
+my $node_count = 0;
+my $script = "";
+# fork == one thread runs the sentserver, while the
+# other spawns the sentclient commands.
+my $pid = fork;
+if ($pid == 0) { # child
+  sleep 8; # give other thread time to start sentserver
+  $script = "$cdcmd$sentclient $host:$port:$key $cmd";
+
+  if ($verbose){
+    print STDERR "Client script:\n====\n";
+    print STDERR $script;
+    print STDERR "====\n";
+  }
+  for (my $jobn=0; $jobn<$numnodes; $jobn++){
+    launch_job();
+  }
+  if ($recycle_clients) {
+    my $ret;
+    my $livejobs;
+    while (1) {
+      $ret = waitpid($pid, WNOHANG);
+      #print STDERR "waitpid $pid ret = $ret \n";
+      last if ($ret != 0);
+      $livejobs = numof_live_jobs();
+      if ($numnodes >= $livejobs ) {  # a client terminated, OR # lines of input was less than -j
+        print STDERR "num of requested nodes = $numnodes; num of currently live jobs = $livejobs; Client terminated - launching another.\n";
+        launch_job();
+      } else {
+        sleep 15;
+      }
+    }
+  }
+  print STDERR "CHILD PROCESSES SPAWNED ... WAITING\n";
+  for my $p (@pids) {
+    waitpid($p, 0);
+  }
+} else {
+#  my $todo = "$sentserver -k $key $multiflag $port ";
+  my $todo = "$sentserver -k $key $multiflag $port $stay_alive_flag ";
+  if ($verbose){ print STDERR "Running: $todo\n"; }
+  check_call($todo);
+  print STDERR "Call to $sentserver returned.\n";
+  cleanup();
+  exit(0);
+}
+
+sub numof_live_jobs {
+  if ($use_fork) {
+    die "not implemented";
+  } else {
+    # We can probably continue decoding if the qstat error is only temporary
+    my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat")));
+    return ($#livejobs + 1);
+  }
+}
+my (@errors,@outs,@cmds);
+
+sub launch_job {
+    if ($use_fork) { return launch_job_fork(); }
+    my $errorfile = "/dev/null";
+    my $outfile = "/dev/null";
+    $node_count++;
+    my $clientname = $executable;
+    $clientname =~ s/^(.{4}).*$/$1/;
+    $clientname = "$clientname.$node_count";
+    if ($errordir){
+      $errorfile = "$errordir/$clientname.ER";
+      $outfile = "$errordir/$clientname.OU";
+      push @errors,$errorfile;
+      push @outs,$outfile;
+    }
+    my $todo = qsub_args($pmem) . " -N $clientname -o $outfile -e $errorfile";
+    push @cmds,$todo;
+
+    print STDERR "Running: $todo\n";
+    local(*QOUT, *QIN);
+    open2(\*QOUT, \*QIN, $todo) or die "Failed to open2: $!";
+    print QIN $script;
+    close QIN;
+    while (my $jobid=<QOUT>){
+      chomp $jobid;
+      if ($verbose){ print STDERR "Launched client job: $jobid"; }
+      $jobid =~ s/^(\d+)(.*?)$/\1/g;
+            $jobid =~ s/^Your job (\d+) .*$/\1/;
+      print STDERR " short job id $jobid\n";
+            if ($verbose){
+                print STDERR "cd: $abscwd\n";
+                print STDERR "cmd: $cmd\n";
+            }
+      if ($joblist == "") { $joblist = $jobid; }
+      else {$joblist = $joblist . "\|" . $jobid; }
+      my $cleanfn="qdel $jobid 2> /dev/null";
+      push(@cleanup_cmds, $cleanfn);
+    }
+    close QOUT;
+}
+
+sub launch_job_fork {
+  my $errorfile = "/dev/null";
+  my $outfile = "/dev/null";
+  $node_count++;
+  my $clientname = $executable;
+  $clientname =~ s/^(.{4}).*$/$1/;
+  $clientname = "$clientname.$node_count";
+  if ($errordir){
+    $errorfile = "$errordir/$clientname.ER";
+    $outfile = "$errordir/$clientname.OU";
+    push @errors,$errorfile;
+    push @outs,$outfile;
+  }
+  my $pid = fork;
+  if ($pid == 0) {
+    my ($fh, $scr_name) = get_temp_script();
+    print $fh $script;
+    close $fh;
+    my $todo = "/bin/bash -xeo pipefail $scr_name 1> $outfile 2> $errorfile";
+    print STDERR "EXEC: $todo\n";
+    my $out = check_output("$todo");
+    unlink $scr_name or warn "Failed to remove $scr_name";
+    exit 0;
+  } else {
+    push @pids, $pid;
+  }
+}
+
+sub get_temp_script {
+  my ($fh, $filename) = tempfile( "workXXXX", SUFFIX => '.sh');
+  return ($fh, $filename);
+}
+
+sub cleanup_and_die {
+  cleanup();
+  die "\n";
+}
+
+sub cleanup {
+  print STDERR "Cleaning up...\n";
+  for $cmd (@cleanup_cmds){
+    print STDERR "  Cleanup command: $cmd\n";
+    eval $cmd;
+  }
+  print STDERR "outputs:\n",preview_files(\@outs,1),"\n";
+  print STDERR "errors:\n",preview_files(\@errors,1),"\n";
+  print STDERR "cmd:\n",$cmd,"\n";
+  print STDERR " cat $errordir/*.ER\nfor logs.\n";
+  print STDERR "Cleanup finished.\n";
+}
+
+sub print_help
+{
+  my $name = check_output("basename $0"); chomp $name;
+  print << "Help";
+
+usage: $name [options]
+
+  Automatic black-box parallelization of commands.
+
+options:
+
+  --use-fork
+    Instead of using qsub, use fork.
+
+  -e, --error-dir <dir>
+    Retain output files from jobs in <dir>, rather
+    than silently deleting them.
+
+  -m, --multi-line
+    Expect that command may produce multiple output
+    lines for a single input line.  $name makes a
+    reasonable attempt to obtain all output before
+    processing additional inputs.  However, use of this
+    option is inherently unsafe.
+
+  -v, --verbose
+    Print diagnostic informatoin on stderr.
+
+  -j, --jobs
+    Number of jobs to use.
+
+  -p, --pmem
+    pmem setting for each job.
+
+Help
+}
diff --git a/dpmert/sentclient.c b/dpmert/sentclient.c
new file mode 100644
index 00000000..91d994ab
--- /dev/null
+++ b/dpmert/sentclient.c
@@ -0,0 +1,76 @@
+/* Copyright (c) 2001 by David Chiang. All rights reserved.*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <string.h>
+
+#include "sentserver.h"
+
+int main (int argc, char *argv[]) {
+  int sock, port;
+  char *s, *key;
+  struct hostent *hp;
+  struct sockaddr_in server;
+  int errors = 0;
+
+  if (argc < 3) {
+    fprintf(stderr, "Usage: sentclient host[:port[:key]] command [args ...]\n");
+    exit(1);
+  }
+
+  s = strchr(argv[1], ':');
+  key = NULL;
+
+  if (s == NULL) {
+    port = DEFAULT_PORT;
+  } else {
+    *s = '\0';
+    s+=1;
+	/* dumb hack */
+	key = strchr(s, ':');
+	if (key != NULL){
+		*key = '\0';
+		key += 1;
+	}
+    port = atoi(s);
+  }
+
+  sock = socket(AF_INET, SOCK_STREAM, 0);
+
+  hp = gethostbyname(argv[1]);
+  if (hp == NULL) {
+    fprintf(stderr, "unknown host %s\n", argv[1]);
+    exit(1);
+  }
+
+  bzero((char *)&server, sizeof(server));
+  bcopy(hp->h_addr, (char *)&server.sin_addr, hp->h_length);
+  server.sin_family = hp->h_addrtype;
+  server.sin_port = htons(port);
+
+  while (connect(sock, (struct sockaddr *)&server, sizeof(server)) < 0) {
+    perror("connect()");
+    sleep(1);
+    errors++;
+    if (errors > 5)
+      exit(1);
+  }
+
+  close(0);
+  close(1);
+  dup2(sock, 0);
+  dup2(sock, 1);
+
+  if (key != NULL){
+	write(1, key, strlen(key));
+	write(1, "\n", 1);
+  }
+
+  execvp(argv[2], argv+2);
+  return 0;
+}
diff --git a/dpmert/sentserver.c b/dpmert/sentserver.c
new file mode 100644
index 00000000..c20b4fa6
--- /dev/null
+++ b/dpmert/sentserver.c
@@ -0,0 +1,515 @@
+/* Copyright (c) 2001 by David Chiang. All rights reserved.*/
+
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <sched.h>
+#include <pthread.h>
+#include <errno.h>
+
+#include "sentserver.h"
+
+#define MAX_CLIENTS 64
+
+struct clientinfo {
+  int s;
+  struct sockaddr_in sin;
+};
+
+struct line {
+  int id;
+  char *s;
+  int status;
+  struct line *next;
+} *head, **ptail;
+
+int n_sent = 0, n_received=0, n_flushed=0;
+
+#define STATUS_RUNNING 0
+#define STATUS_ABORTED 1
+#define STATUS_FINISHED 2
+
+pthread_mutex_t queue_mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_mutex_t clients_mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_mutex_t input_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+int n_clients = 0;
+int s;
+int expect_multiline_output = 0;
+int log_mutex = 0;
+int stay_alive = 0;		/* dont panic and die with zero clients */
+
+void queue_finish(struct line *node, char *s, int fid);
+char * read_line(int fd, int multiline);
+void done (int code);
+
+struct line * queue_get(int fid) {
+	struct line *cur;
+	char *s, *synch;
+
+	if (log_mutex) fprintf(stderr, "Getting for data for fid %d\n", fid);
+	if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid);
+	pthread_mutex_lock(&queue_mutex);
+
+	/* First, check for aborted sentences. */
+
+	if (log_mutex) fprintf(stderr, "  Checking queue for aborted jobs (fid %d)\n", fid);
+	for (cur = head; cur != NULL; cur = cur->next) {
+		if (cur->status == STATUS_ABORTED) {
+			cur->status = STATUS_RUNNING;
+
+			if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
+			pthread_mutex_unlock(&queue_mutex);
+
+			return cur;
+		}
+	}
+	if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
+	pthread_mutex_unlock(&queue_mutex);
+
+	/* Otherwise, read a new one. */
+	if (log_mutex) fprintf(stderr, "Locking input mutex (%d)\n", fid);
+	if (log_mutex) fprintf(stderr, "  Reading input for new data (fid %d)\n", fid);
+	pthread_mutex_lock(&input_mutex);
+	s = read_line(0,0);
+
+	while (s) {
+		if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid);
+		pthread_mutex_lock(&queue_mutex);
+		if (log_mutex) fprintf(stderr, "Unlocking input mutex (%d)\n", fid);
+		pthread_mutex_unlock(&input_mutex);
+
+		cur = malloc(sizeof (struct line));
+		cur->id = n_sent;
+		cur->s = s;
+		cur->next = NULL;
+
+		*ptail = cur;
+		ptail = &cur->next;
+
+		n_sent++;
+
+		if (strcmp(s,"===SYNCH===\n")==0){
+			fprintf(stderr, "Received ===SYNCH=== signal (fid %d)\n", fid);
+			// Note: queue_finish calls free(cur->s).
+			// Therefore we need to create a new string here.
+			synch = malloc((strlen("===SYNCH===\n")+2) * sizeof (char));
+			synch = strcpy(synch, s);
+
+			if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
+			pthread_mutex_unlock(&queue_mutex);
+			queue_finish(cur, synch, fid); /* handles its own lock */
+
+			if (log_mutex) fprintf(stderr, "Locking input mutex (%d)\n", fid);
+			if (log_mutex) fprintf(stderr, "  Reading input for new data (fid %d)\n", fid);
+			pthread_mutex_lock(&input_mutex);
+
+			s = read_line(0,0);
+		} else {
+			if (log_mutex) fprintf(stderr, "  Received new data %d (fid %d)\n", cur->id, fid);
+			cur->status = STATUS_RUNNING;
+			if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
+			pthread_mutex_unlock(&queue_mutex);
+			return cur;
+		}
+	}
+
+	if (log_mutex) fprintf(stderr, "Unlocking input mutex (%d)\n", fid);
+	pthread_mutex_unlock(&input_mutex);
+	/* Only way to reach this point: no more output */
+
+	if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid);
+	pthread_mutex_lock(&queue_mutex);
+	if (head == NULL) {
+		fprintf(stderr, "Reached end of file. Exiting.\n");
+		done(0);
+	} else
+		ptail = NULL; /* This serves as a signal that there is no more input */
+	if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
+	pthread_mutex_unlock(&queue_mutex);
+
+	return NULL;
+}
+
+void queue_panic() {
+	struct line *next;
+	while (head && head->status == STATUS_FINISHED) {
+		/* Write out finished sentences */
+		if (head->status == STATUS_FINISHED) {
+			fputs(head->s, stdout);
+			fflush(stdout);
+		}
+		/* Write out blank line for unfinished sentences */
+		if (head->status == STATUS_ABORTED) {
+			fputs("\n", stdout);
+			fflush(stdout);
+		}
+		/* By defition, there cannot be any RUNNING sentences, since
+		function is only called when n_clients == 0 */
+		free(head->s);
+		next = head->next;
+		free(head);
+		head = next;
+		n_flushed++;
+	}
+	fclose(stdout);
+	fprintf(stderr, "All clients died. Panicking, flushing completed sentences and exiting.\n");
+	done(1);
+}
+
+void queue_abort(struct line *node, int fid) {
+	if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid);
+	pthread_mutex_lock(&queue_mutex);
+	node->status = STATUS_ABORTED;
+	if (n_clients == 0) {
+		if (stay_alive) {
+			fprintf(stderr, "Warning! No live clients detected! Staying alive, will retry soon.\n");
+		} else {
+			queue_panic();
+		}
+	}
+	if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
+	pthread_mutex_unlock(&queue_mutex);
+}
+
+
+void queue_print() {
+  struct line *cur;
+
+  fprintf(stderr, "  Queue\n");
+
+  for (cur = head; cur != NULL; cur = cur->next) {
+    switch(cur->status) {
+    case STATUS_RUNNING:
+      fprintf(stderr, "    %d running  ", cur->id); break;
+    case STATUS_ABORTED:
+      fprintf(stderr, "    %d aborted  ", cur->id); break;
+    case STATUS_FINISHED:
+      fprintf(stderr, "    %d finished ", cur->id); break;
+
+    }
+	fprintf(stderr, "\n");
+    //fprintf(stderr, cur->s);
+  }
+}
+
+void queue_finish(struct line *node, char *s, int fid) {
+  struct line *next;
+  if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid);
+  pthread_mutex_lock(&queue_mutex);
+
+  free(node->s);
+  node->s = s;
+  node->status = STATUS_FINISHED;
+  n_received++;
+
+  /* Flush out finished nodes */
+  while (head && head->status == STATUS_FINISHED) {
+
+    if (log_mutex) fprintf(stderr, "  Flushing finished node %d\n", head->id);
+
+    fputs(head->s, stdout);
+    fflush(stdout);
+    if (log_mutex) fprintf(stderr, "  Flushed node %d\n", head->id);
+    free(head->s);
+
+    next = head->next;
+    free(head);
+
+    head = next;
+
+    n_flushed++;
+
+    if (head == NULL) { /* empty queue */
+      if (ptail == NULL) { /* This can only happen if set in queue_get as signal that there is no more input. */
+        fprintf(stderr, "All sentences finished. Exiting.\n");
+        done(0);
+      } else /* ptail pointed at something which was just popped off the stack -- reset to head*/
+        ptail = &head;
+    }
+  }
+
+  if (log_mutex) fprintf(stderr, "  Flushing output %d\n", head->id);
+  fflush(stdout);
+  fprintf(stderr, "%d sentences sent, %d sentences finished, %d sentences flushed\n", n_sent, n_received, n_flushed);
+
+  if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
+  pthread_mutex_unlock(&queue_mutex);
+
+}
+
+char * read_line(int fd, int multiline) {
+  int size = 80;
+  char errorbuf[100];
+  char *s = malloc(size+2);
+  int result, errors=0;
+  int i = 0;
+
+  result = read(fd, s+i, 1);
+
+  while (1) {
+    if (result < 0) {
+      perror("read()");
+      sprintf(errorbuf, "Error code: %d\n", errno);
+      fprintf(stderr, errorbuf);
+      errors++;
+      if (errors > 5) {
+	free(s);
+	return NULL;
+      } else {
+	sleep(1); /* retry after delay */
+      }
+    } else if (result == 0) {
+      break;
+    } else if (multiline==0 && s[i] == '\n') {
+      break;
+    } else {
+      if (s[i] == '\n'){
+	/* if we've reached this point,
+	   then multiline must be 1, and we're
+	   going to poll the fd for an additional
+	   line of data.  The basic design is to
+	   run a select on the filedescriptor fd.
+	   Select will return under two conditions:
+	   if there is data on the fd, or if a
+	   timeout is reached.  We'll select on this
+	   fd.  If select returns because there's data
+	   ready, keep going; else assume there's no
+	   more and return the data we already have.
+	*/
+
+	fd_set set;
+	FD_ZERO(&set);
+	FD_SET(fd, &set);
+
+	struct timeval timeout;
+	timeout.tv_sec = 3; // number of seconds for timeout
+	timeout.tv_usec = 0;
+
+	int ready = select(FD_SETSIZE, &set, NULL, NULL, &timeout);
+	if (ready<1){
+	  break; // no more data, stop looping
+	}
+      }
+      i++;
+
+      if (i == size) {
+	size = size*2;
+	s = realloc(s, size+2);
+      }
+    }
+
+    result = read(fd, s+i, 1);
+  }
+
+  if (result == 0 && i == 0) { /* end of file */
+    free(s);
+    return NULL;
+  }
+
+  s[i] = '\n';
+  s[i+1] = '\0';
+
+  return s;
+}
+
+void * new_client(void *arg) {
+  struct clientinfo *client = (struct clientinfo *)arg;
+  struct line *cur;
+  int result;
+  char *s;
+  char errorbuf[100];
+
+  pthread_mutex_lock(&clients_mutex);
+  n_clients++;
+  pthread_mutex_unlock(&clients_mutex);
+
+  fprintf(stderr, "Client connected (%d connected)\n", n_clients);
+
+  for (;;) {
+
+    cur = queue_get(client->s);
+
+    if (cur) {
+      /* fprintf(stderr, "Sending to client: %s", cur->s); */
+      fprintf(stderr, "Sending data %d to client (fid %d)\n", cur->id, client->s);
+      result = write(client->s, cur->s, strlen(cur->s));
+      if (result < strlen(cur->s)){
+        perror("write()");
+        sprintf(errorbuf, "Error code: %d\n", errno);
+        fprintf(stderr, errorbuf);
+
+        pthread_mutex_lock(&clients_mutex);
+        n_clients--;
+        pthread_mutex_unlock(&clients_mutex);
+
+        fprintf(stderr, "Client died (%d connected)\n", n_clients);
+        queue_abort(cur, client->s);
+
+        close(client->s);
+        free(client);
+
+        pthread_exit(NULL);
+      }
+    } else {
+      close(client->s);
+      pthread_mutex_lock(&clients_mutex);
+      n_clients--;
+      pthread_mutex_unlock(&clients_mutex);
+      fprintf(stderr, "Client dismissed (%d connected)\n", n_clients);
+      pthread_exit(NULL);
+    }
+
+    s = read_line(client->s,expect_multiline_output);
+    if (s) {
+      /* fprintf(stderr, "Client (fid %d) returned: %s", client->s, s); */
+      fprintf(stderr, "Client (fid %d) returned data %d\n", client->s, cur->id);
+//      queue_print();
+      queue_finish(cur, s, client->s);
+    } else {
+      pthread_mutex_lock(&clients_mutex);
+      n_clients--;
+      pthread_mutex_unlock(&clients_mutex);
+
+      fprintf(stderr, "Client died (%d connected)\n", n_clients);
+      queue_abort(cur, client->s);
+
+      close(client->s);
+      free(client);
+
+      pthread_exit(NULL);
+    }
+
+  }
+  return 0;
+}
+
+void done (int code) {
+  close(s);
+  exit(code);
+}
+
+
+
+int main (int argc, char *argv[]) {
+  struct sockaddr_in sin, from;
+  int g;
+  socklen_t len;
+  struct clientinfo *client;
+  int port;
+  int opt;
+  int errors = 0;
+  int argi;
+  char *key = NULL, *client_key;
+  int use_key = 0;
+  /* the key stuff here doesn't provide any
+  real measure of security, it's mainly to keep
+  jobs from bumping into each other.  */
+
+  pthread_t tid;
+  port = DEFAULT_PORT;
+
+  for (argi=1; argi < argc; argi++){
+    if (strcmp(argv[argi], "-m")==0){
+      expect_multiline_output = 1;
+    } else if (strcmp(argv[argi], "-k")==0){
+      argi++;
+      if (argi == argc){
+      	fprintf(stderr, "Key must be specified after -k\n");
+      	exit(1);
+      }
+      key = argv[argi];
+      use_key = 1;
+    } else if (strcmp(argv[argi], "--stay-alive")==0){
+      stay_alive = 1;    /* dont panic and die with zero clients */
+    } else {
+      port = atoi(argv[argi]);
+    }
+  }
+
+  /* Initialize data structures */
+  head = NULL;
+  ptail = &head;
+
+  /* Set up listener */
+  s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+  opt = 1;
+  setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+
+  sin.sin_family = AF_INET;
+  sin.sin_addr.s_addr = htonl(INADDR_ANY);
+  sin.sin_port = htons(port);
+  while (bind(s, (struct sockaddr *) &sin, sizeof(sin)) < 0) {
+	perror("bind()");
+	sleep(1);
+	errors++;
+	if (errors > 100)
+	  exit(1);
+  }
+
+  len = sizeof(sin);
+  getsockname(s, (struct sockaddr *) &sin, &len);
+
+  fprintf(stderr, "Listening on port %hu\n", ntohs(sin.sin_port));
+
+  while (listen(s, MAX_CLIENTS) < 0) {
+	perror("listen()");
+	sleep(1);
+	errors++;
+	if (errors > 100)
+	  exit(1);
+  }
+
+  for (;;) {
+    len = sizeof(from);
+    g = accept(s, (struct sockaddr *)&from, &len);
+    if (g < 0) {
+      perror("accept()");
+      sleep(1);
+      continue;
+    }
+    client = malloc(sizeof(struct clientinfo));
+    client->s = g;
+    bcopy(&from, &client->sin, len);
+
+	if (use_key){
+		fd_set set;
+		FD_ZERO(&set);
+		FD_SET(client->s, &set);
+
+		struct timeval timeout;
+		timeout.tv_sec = 3; // number of seconds for timeout
+		timeout.tv_usec = 0;
+
+		int ready = select(FD_SETSIZE, &set, NULL, NULL, &timeout);
+		if (ready<1){
+			fprintf(stderr, "Prospective client failed to respond with correct key.\n");
+			close(client->s);
+			free(client);
+		} else {
+			client_key = read_line(client->s,0);
+			client_key[strlen(client_key)-1]='\0'; /* chop trailing newline */
+			if (strcmp(key, client_key)==0){
+				pthread_create(&tid, NULL, new_client, client);
+			} else {
+				fprintf(stderr, "Prospective client failed to respond with correct key.\n");
+				close(client->s);
+				free(client);
+			}
+			free(client_key);
+		}
+	} else {
+		pthread_create(&tid, NULL, new_client, client);
+	}
+  }
+
+}
+
+
+
diff --git a/dpmert/sentserver.h b/dpmert/sentserver.h
new file mode 100644
index 00000000..cd17a546
--- /dev/null
+++ b/dpmert/sentserver.h
@@ -0,0 +1,6 @@
+#ifndef SENTSERVER_H
+#define SENTSERVER_H
+
+#define DEFAULT_PORT 50000
+
+#endif
diff --git a/dpmert/tac.pl b/dpmert/tac.pl
new file mode 100755
index 00000000..9fb525c1
--- /dev/null
+++ b/dpmert/tac.pl
@@ -0,0 +1,8 @@
+#!/usr/bin/perl
+
+while(<>) {
+    chomp;
+    $|=1;
+    print (scalar reverse($_));
+    print "\n";
+}
diff --git a/dpmert/test_aer/README b/dpmert/test_aer/README
new file mode 100644
index 00000000..819b2e32
--- /dev/null
+++ b/dpmert/test_aer/README
@@ -0,0 +1,8 @@
+To run the test:
+
+../dist-vest.pl --local --metric aer cdec.ini --source-file corpus.src --ref-files=ref.0 --weights weights
+
+This will optimize the parameters of the tiny lexical translation model
+so as to minimize the AER of the Viterbi alignment on the development
+set in corpus.src according to the reference alignments in ref.0.
+
diff --git a/dpmert/test_aer/cdec.ini b/dpmert/test_aer/cdec.ini
new file mode 100644
index 00000000..08187848
--- /dev/null
+++ b/dpmert/test_aer/cdec.ini
@@ -0,0 +1,3 @@
+formalism=lextrans
+grammar=grammar
+aligner=true
diff --git a/dpmert/test_aer/corpus.src b/dpmert/test_aer/corpus.src
new file mode 100644
index 00000000..31b23971
--- /dev/null
+++ b/dpmert/test_aer/corpus.src
@@ -0,0 +1,3 @@
+el gato negro ||| the black cat
+el gato ||| the cat
+el libro ||| the book
diff --git a/dpmert/test_aer/grammar b/dpmert/test_aer/grammar
new file mode 100644
index 00000000..9d857824
--- /dev/null
+++ b/dpmert/test_aer/grammar
@@ -0,0 +1,12 @@
+el ||| cat ||| F1=1
+el ||| the ||| F2=1
+el ||| black ||| F3=1
+el ||| book ||| F11=1
+gato ||| cat ||| F4=1 NN=1
+gato ||| black ||| F5=1
+gato ||| the ||| F6=1
+negro ||| the ||| F7=1
+negro ||| cat ||| F8=1
+negro ||| black ||| F9=1
+libro ||| the ||| F10=1
+libro ||| book ||| F12=1 NN=1
diff --git a/dpmert/test_aer/ref.0 b/dpmert/test_aer/ref.0
new file mode 100644
index 00000000..734a9c5b
--- /dev/null
+++ b/dpmert/test_aer/ref.0
@@ -0,0 +1,3 @@
+0-0 1-2 2-1
+0-0 1-1
+0-0 1-1
diff --git a/dpmert/test_aer/weights b/dpmert/test_aer/weights
new file mode 100644
index 00000000..afc9282e
--- /dev/null
+++ b/dpmert/test_aer/weights
@@ -0,0 +1,13 @@
+F1 0.1
+F2 -.5980815
+F3 0.24235
+F4 0.625
+F5 0.4514
+F6 0.112316
+F7 -0.123415
+F8 -0.25390285
+F9 -0.23852
+F10 0.646
+F11 0.413141
+F12 0.343216
+NN -0.1215
diff --git a/dpmert/test_data/0.json.gz b/dpmert/test_data/0.json.gz
new file mode 100644
index 00000000..30f8dd77
Binary files /dev/null and b/dpmert/test_data/0.json.gz differ
diff --git a/dpmert/test_data/1.json.gz b/dpmert/test_data/1.json.gz
new file mode 100644
index 00000000..c82cc179
Binary files /dev/null and b/dpmert/test_data/1.json.gz differ
diff --git a/dpmert/test_data/c2e.txt.0 b/dpmert/test_data/c2e.txt.0
new file mode 100644
index 00000000..12c4abe9
--- /dev/null
+++ b/dpmert/test_data/c2e.txt.0
@@ -0,0 +1,2 @@
+australia reopens embassy in manila
+( afp , manila , january 2 ) australia reopened its embassy in the philippines today , which was shut down about seven weeks ago due to what was described as a specific threat of a terrorist attack .
diff --git a/dpmert/test_data/c2e.txt.1 b/dpmert/test_data/c2e.txt.1
new file mode 100644
index 00000000..4ac12df1
--- /dev/null
+++ b/dpmert/test_data/c2e.txt.1
@@ -0,0 +1,2 @@
+australia reopened manila embassy
+( agence france-presse , manila , 2nd ) - australia reopened its embassy in the philippines today . the embassy was closed seven weeks ago after what was described as a specific threat of a terrorist attack .
diff --git a/dpmert/test_data/c2e.txt.2 b/dpmert/test_data/c2e.txt.2
new file mode 100644
index 00000000..2f67b72f
--- /dev/null
+++ b/dpmert/test_data/c2e.txt.2
@@ -0,0 +1,2 @@
+australia to reopen embassy in manila
+( afp report from manila , january 2 ) australia reopened its embassy in the philippines today . seven weeks ago , the embassy was shut down due to so-called confirmed terrorist attack threats .
diff --git a/dpmert/test_data/c2e.txt.3 b/dpmert/test_data/c2e.txt.3
new file mode 100644
index 00000000..5483cef6
--- /dev/null
+++ b/dpmert/test_data/c2e.txt.3
@@ -0,0 +1,2 @@
+australia to re - open its embassy to manila
+( afp , manila , thursday ) australia reopens its embassy to manila , which was closed for the so-called " clear " threat of terrorist attack 7 weeks ago .
diff --git a/dpmert/test_data/re.txt.0 b/dpmert/test_data/re.txt.0
new file mode 100644
index 00000000..86eff087
--- /dev/null
+++ b/dpmert/test_data/re.txt.0
@@ -0,0 +1,5 @@
+erdogan states turkey to reject any pressures to urge it to recognize cyprus
+ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara will reject any pressure by the european union to urge it to recognize cyprus . this comes two weeks before the summit of european union state and government heads who will decide whether or nor membership negotiations with ankara should be opened .
+erdogan told " ntv " television station that " the european union cannot address us by imposing new conditions on us with regard to cyprus .
+we will discuss this dossier in the course of membership negotiations . "
+he added " let me be clear , i cannot sidestep turkey , this is something we cannot accept . "
diff --git a/dpmert/test_data/re.txt.1 b/dpmert/test_data/re.txt.1
new file mode 100644
index 00000000..2140f198
--- /dev/null
+++ b/dpmert/test_data/re.txt.1
@@ -0,0 +1,5 @@
+erdogan confirms turkey will resist any pressure to recognize cyprus
+ankara 12 - 1 ( afp ) - the turkish head of government , recep tayyip erdogan , announced today ( wednesday ) that ankara would resist any pressure the european union might exercise in order to force it into recognizing cyprus . this comes two weeks before a summit of european union heads of state and government , who will decide whether or not to open membership negotiations with ankara .
+erdogan said to the ntv television channel : " the european union cannot engage with us through imposing new conditions on us with regard to cyprus .
+we shall discuss this issue in the course of the membership negotiations . "
+he added : " let me be clear - i cannot confine turkey . this is something we do not accept . "
diff --git a/dpmert/test_data/re.txt.2 b/dpmert/test_data/re.txt.2
new file mode 100644
index 00000000..94e46286
--- /dev/null
+++ b/dpmert/test_data/re.txt.2
@@ -0,0 +1,5 @@
+erdogan confirms that turkey will reject any pressures to encourage it to recognize cyprus
+ankara , 12 / 1 ( afp ) - the turkish prime minister recep tayyip erdogan declared today , wednesday , that ankara will reject any pressures that the european union may apply on it to encourage to recognize cyprus . this comes two weeks before a summit of the heads of countries and governments of the european union , who will decide on whether or not to start negotiations on joining with ankara .
+erdogan told the ntv television station that " it is not possible for the european union to talk to us by imposing new conditions on us regarding cyprus .
+we shall discuss this dossier during the negotiations on joining . "
+and he added , " let me be clear . turkey's arm should not be twisted ; this is something we cannot accept . "
diff --git a/dpmert/test_data/re.txt.3 b/dpmert/test_data/re.txt.3
new file mode 100644
index 00000000..f87c3308
--- /dev/null
+++ b/dpmert/test_data/re.txt.3
@@ -0,0 +1,5 @@
+erdogan stresses that turkey will reject all pressures to force it to recognize cyprus
+ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara would refuse all pressures applied on it by the european union to force it to recognize cyprus . that came two weeks before the summit of the presidents and prime ministers of the european union , who would decide on whether to open negotiations on joining with ankara or not .
+erdogan said to " ntv " tv station that the " european union can not communicate with us by imposing on us new conditions related to cyprus .
+we will discuss this file during the negotiations on joining . "
+he added , " let me be clear . turkey's arm should not be twisted . this is unacceptable to us . "
diff --git a/vest/Makefile.am b/vest/Makefile.am
deleted file mode 100644
index 05fa5639..00000000
--- a/vest/Makefile.am
+++ /dev/null
@@ -1,35 +0,0 @@
-bin_PROGRAMS = \
-  mr_vest_map \
-  mr_vest_reduce \
-  mr_vest_generate_mapper_input \
-  sentserver \
-  sentclient
-
-if HAVE_GTEST
-noinst_PROGRAMS = \
-  lo_test
-TESTS = lo_test
-endif
-
-sentserver_SOURCES = sentserver.c
-sentserver_LDFLAGS = -all-static -pthread
-
-sentclient_SOURCES = sentclient.c
-sentclient_LDFLAGS = -all-static -pthread
-
-mr_vest_generate_mapper_input_SOURCES = mr_vest_generate_mapper_input.cc line_optimizer.cc
-mr_vest_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
-
-# nbest2hg_SOURCES = nbest2hg.cc
-# nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst -lz
-
-mr_vest_map_SOURCES = viterbi_envelope.cc ces.cc error_surface.cc mr_vest_map.cc line_optimizer.cc
-mr_vest_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
-
-mr_vest_reduce_SOURCES = error_surface.cc ces.cc mr_vest_reduce.cc line_optimizer.cc viterbi_envelope.cc
-mr_vest_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
-
-lo_test_SOURCES = lo_test.cc ces.cc viterbi_envelope.cc error_surface.cc line_optimizer.cc
-lo_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
-
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/vest/README.shared-mem b/vest/README.shared-mem
deleted file mode 100644
index 7728efc0..00000000
--- a/vest/README.shared-mem
+++ /dev/null
@@ -1,9 +0,0 @@
-If you want to run dist-vest.pl on a very large shared memory machine, do the
-following:
-
-  ./dist-vest.pl --use-make I --decode-nodes J --weights weights.init --source-file=dev.src --ref-files=dev.ref.* cdec.ini
-
-This will use I jobs for doing the line search and J jobs to run the decoder. Typically, since the
-decoder must load grammars, language models, etc., J should be smaller than I, but this will depend
-on the system you are running on and the complexity of the models used for decoding.
-
diff --git a/vest/cat.pl b/vest/cat.pl
deleted file mode 100755
index 2ecba3f9..00000000
--- a/vest/cat.pl
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/usr/bin/perl
-
-$|=1;
-print while(<>);
diff --git a/vest/ces.cc b/vest/ces.cc
deleted file mode 100644
index cd89aa69..00000000
--- a/vest/ces.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-#include "ces.h"
-
-#include <vector>
-#include <sstream>
-#include <boost/shared_ptr.hpp>
-
-// TODO, if AER is to be optimized again, we will need this
-// #include "aligner.h"
-#include "lattice.h"
-#include "viterbi_envelope.h"
-#include "error_surface.h"
-#include "ns.h"
-
-using boost::shared_ptr;
-using namespace std;
-
-const bool minimize_segments = true;    // if adjacent segments have equal scores, merge them
-
-void ComputeErrorSurface(const SegmentEvaluator& ss,
-                         const ViterbiEnvelope& ve,
-                         ErrorSurface* env,
-                         const EvaluationMetric* metric,
-                         const Hypergraph& hg) {
-  vector<WordID> prev_trans;
-  const vector<shared_ptr<Segment> >& ienv = ve.GetSortedSegs();
-  env->resize(ienv.size());
-  SufficientStats prev_score; // defaults to 0
-  int j = 0;
-  for (int i = 0; i < ienv.size(); ++i) {
-    const Segment& seg = *ienv[i];
-    vector<WordID> trans;
-#if 0
-    if (type == AER) {
-      vector<bool> edges(hg.edges_.size(), false);
-      seg.CollectEdgesUsed(&edges);  // get the set of edges in the viterbi
-                                     // alignment
-      ostringstream os;
-      const string* psrc = ss.GetSource();
-      if (psrc == NULL) {
-        cerr << "AER scoring in VEST requires source, but it is missing!\n";
-        abort();
-      }
-      size_t pos = psrc->rfind(" ||| ");
-      if (pos == string::npos) {
-        cerr << "Malformed source for AER: expected |||\nINPUT: " << *psrc << endl;
-        abort();
-      }
-      Lattice src;
-      Lattice ref;
-      LatticeTools::ConvertTextOrPLF(psrc->substr(0, pos), &src);
-      LatticeTools::ConvertTextOrPLF(psrc->substr(pos + 5), &ref);
-      AlignerTools::WriteAlignment(src, ref, hg, &os, true, 0, &edges);
-      string tstr = os.str();
-      TD::ConvertSentence(tstr.substr(tstr.rfind(" ||| ") + 5), &trans);
-    } else {
-#endif
-      seg.ConstructTranslation(&trans);
-    //}
-    //cerr << "Scoring: " << TD::GetString(trans) << endl;
-    if (trans == prev_trans) {
-      if (!minimize_segments) {
-        ErrorSegment& out = (*env)[j];
-        out.delta.fields.clear();
-        out.x = seg.x;
-	++j;
-      }
-      //cerr << "Identical translation, skipping scoring\n";
-    } else {
-      SufficientStats score;
-      ss.Evaluate(trans, &score);
-      // cerr << "score= " << score->ComputeScore() << "\n";
-      //string x1; score.Encode(&x1); cerr << "STATS: " << x1 << endl;
-      const SufficientStats delta = score - prev_score;
-      //string x2; delta.Encode(&x2); cerr << "DELTA: " << x2 << endl;
-      //string xx; delta.Encode(&xx); cerr << xx << endl;
-      prev_trans.swap(trans);
-      prev_score = score;
-      if ((!minimize_segments) || (!delta.IsAdditiveIdentity())) {
-        ErrorSegment& out = (*env)[j];
-        out.delta = delta;
-        out.x = seg.x;
-        ++j;
-      }
-    }
-  }
-  // cerr << " In segments: " << ienv.size() << endl;
-  // cerr << "Out segments: " << j << endl;
-  assert(j > 0);
-  env->resize(j);
-}
-
diff --git a/vest/ces.h b/vest/ces.h
deleted file mode 100644
index e021e715..00000000
--- a/vest/ces.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _CES_H_
-#define _CES_H_
-
-class ViterbiEnvelope;
-class Hypergraph;
-class SegmentEvaluator;
-class ErrorSurface;
-class EvaluationMetric;
-
-void ComputeErrorSurface(const SegmentEvaluator& ss,
-                         const ViterbiEnvelope& ve,
-                         ErrorSurface* es,
-                         const EvaluationMetric* metric,
-                         const Hypergraph& hg);
-
-#endif
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
deleted file mode 100755
index 1ec8c6b1..00000000
--- a/vest/dist-vest.pl
+++ /dev/null
@@ -1,700 +0,0 @@
-#!/usr/bin/env perl
-use strict;
-my @ORIG_ARGV=@ARGV;
-use Cwd qw(getcwd);
-my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; }
-
-# Skip local config (used for distributing jobs) if we're running in local-only mode
-use LocalConfig;
-use Getopt::Long;
-use IPC::Open2;
-use POSIX ":sys_wait_h";
-my $QSUB_CMD = qsub_args(mert_memory());
-
-require "libcall.pl";
-
-# Default settings
-my $srcFile;
-my $refFiles;
-my $default_jobs = env_default_jobs();
-my $bin_dir = $SCRIPT_DIR;
-die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
-my $FAST_SCORE="$bin_dir/../mteval/fast_score";
-die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
-my $MAPINPUT = "$bin_dir/mr_vest_generate_mapper_input";
-my $MAPPER = "$bin_dir/mr_vest_map";
-my $REDUCER = "$bin_dir/mr_vest_reduce";
-my $parallelize = "$bin_dir/parallelize.pl";
-my $libcall = "$bin_dir/libcall.pl";
-my $sentserver = "$bin_dir/sentserver";
-my $sentclient = "$bin_dir/sentclient";
-my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm";
-
-my $SCORER = $FAST_SCORE;
-die "Can't find $MAPPER" unless -x $MAPPER;
-my $cdec = "$bin_dir/../decoder/cdec";
-die "Can't find decoder in $cdec" unless -x $cdec;
-die "Can't find $parallelize" unless -x $parallelize;
-die "Can't find $libcall" unless -e $libcall;
-my $decoder = $cdec;
-my $lines_per_mapper = 400;
-my $rand_directions = 15;
-my $iteration = 1;
-my $best_weights;
-my $max_iterations = 15;
-my $optimization_iters = 6;
-my $jobs = $default_jobs;   # number of decode nodes
-my $pmem = "9g";
-my $disable_clean = 0;
-my %seen_weights;
-my $normalize;
-my $help = 0;
-my $epsilon = 0.0001;
-my $interval = 5;
-my $dryrun = 0;
-my $last_score = -10000000;
-my $metric = "ibm_bleu";
-my $dir;
-my $iniFile;
-my $weights;
-my $initialWeights;
-my $decoderOpt;
-my $noprimary;
-my $maxsim=0;
-my $oraclen=0;
-my $oracleb=20;
-my $bleu_weight=1;
-my $use_make = 1;  # use make to parallelize line search
-my $useqsub;
-my $pass_suffix = '';
-my $cpbin=1;
-# Process command-line options
-Getopt::Long::Configure("no_auto_abbrev");
-if (GetOptions(
-	"decoder=s" => \$decoderOpt,
-	"jobs=i" => \$jobs,
-	"dont-clean" => \$disable_clean,
-	"pass-suffix=s" => \$pass_suffix,
-	"dry-run" => \$dryrun,
-	"epsilon=s" => \$epsilon,
-	"help" => \$help,
-	"interval" => \$interval,
-	"qsub" => \$useqsub,
-	"max-iterations=i" => \$max_iterations,
-	"normalize=s" => \$normalize,
-	"pmem=s" => \$pmem,
-        "cpbin!" => \$cpbin,
-	"random-directions=i" => \$rand_directions,
-	"ref-files=s" => \$refFiles,
-	"metric=s" => \$metric,
-	"source-file=s" => \$srcFile,
-	"weights=s" => \$initialWeights,
-	"workdir=s" => \$dir,
-    "opt-iterations=i" => \$optimization_iters,
-) == 0 || @ARGV!=1 || $help) {
-	print_help();
-	exit;
-}
-
-if ($useqsub) {
-  $use_make = 0;
-  die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub();
-}
-
-my @missing_args = ();
-if (!defined $srcFile) { push @missing_args, "--source-file"; }
-if (!defined $refFiles) { push @missing_args, "--ref-files"; }
-if (!defined $initialWeights) { push @missing_args, "--weights"; }
-die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args);
-
-if ($metric =~ /^(combi|ter)$/i) {
-  $lines_per_mapper = 40;
-} elsif ($metric =~ /^meteor$/i) {
-  $lines_per_mapper = 2000;   # start up time is really high
-}
-
-($iniFile) = @ARGV;
-
-
-sub write_config;
-sub enseg;
-sub print_help;
-
-my $nodelist;
-my $host =check_output("hostname"); chomp $host;
-my $bleu;
-my $interval_count = 0;
-my $logfile;
-my $projected_score;
-
-# used in sorting scores
-my $DIR_FLAG = '-r';
-if ($metric =~ /^ter$|^aer$/i) {
-  $DIR_FLAG = '';
-}
-
-my $refs_comma_sep = get_comma_sep_refs('r',$refFiles);
-
-unless ($dir){
-	$dir = "vest";
-}
-unless ($dir =~ /^\//){  # convert relative path to absolute path
-	my $basedir = check_output("pwd");
-	chomp $basedir;
-	$dir = "$basedir/$dir";
-}
-
-if ($decoderOpt){ $decoder = $decoderOpt; }
-
-
-# Initializations and helper functions
-srand;
-
-my @childpids = ();
-my @cleanupcmds = ();
-
-sub cleanup {
-	print STDERR "Cleanup...\n";
-	for my $pid (@childpids){ unchecked_call("kill $pid"); }
-	for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); }
-	exit 1;
-};
-# Always call cleanup, no matter how we exit
-*CORE::GLOBAL::exit = 
-    sub{ cleanup(); }; 
-$SIG{INT} = "cleanup";
-$SIG{TERM} = "cleanup";
-$SIG{HUP} = "cleanup";
-
-my $decoderBase = check_output("basename $decoder"); chomp $decoderBase;
-my $newIniFile = "$dir/$decoderBase.ini";
-my $inputFileName = "$dir/input";
-my $user = $ENV{"USER"};
-
-
-# process ini file
--e $iniFile || die "Error: could not open $iniFile for reading\n";
-open(INI, $iniFile);
-
-use File::Basename qw(basename);
-#pass bindir, refs to vars holding bin
-sub modbin {
-    local $_;
-    my $bindir=shift;
-    check_call("mkdir -p $bindir");
-    -d $bindir || die "couldn't make bindir $bindir";
-    for (@_) {
-        my $src=$$_;
-        $$_="$bindir/".basename($src);
-        check_call("cp -p $src $$_");
-    }
-}
-sub dirsize {
-    opendir ISEMPTY,$_[0];
-    return scalar(readdir(ISEMPTY))-1;
-}
-if ($dryrun){
-	write_config(*STDERR);
-	exit 0;
-} else {
-	if (-e $dir && dirsize($dir)>1 && -e "$dir/hgs" ){ # allow preexisting logfile, binaries, but not dist-vest.pl outputs
-	  die "ERROR: working dir $dir already exists\n\n";
-	} else {
-		-e $dir || mkdir $dir;
-		mkdir "$dir/hgs";
-        modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$REDUCER,\$parallelize,\$sentserver,\$sentclient,\$libcall) if $cpbin;
-    mkdir "$dir/scripts";
-        my $cmdfile="$dir/rerun-vest.sh";
-        open CMD,'>',$cmdfile;
-        print CMD "cd ",&getcwd,"\n";
-#        print CMD &escaped_cmdline,"\n"; #buggy - last arg is quoted.
-        my $cline=&cmdline."\n";
-        print CMD $cline;
-        close CMD;
-        print STDERR $cline;
-        chmod(0755,$cmdfile);
-		unless (-e $initialWeights) {
-			print STDERR "Please specify an initial weights file with --initial-weights\n";
-			print_help();
-			exit;
-		}
-		check_call("cp $initialWeights $dir/weights.0");
-		die "Can't find weights.0" unless (-e "$dir/weights.0");
-	}
-	write_config(*STDERR);
-}
-
-
-# Generate initial files and values
-check_call("cp $iniFile $newIniFile");
-$iniFile = $newIniFile;
-
-my $newsrc = "$dir/dev.input";
-enseg($srcFile, $newsrc);
-$srcFile = $newsrc;
-my $devSize = 0;
-open F, "<$srcFile" or die "Can't read $srcFile: $!";
-while(<F>) { $devSize++; }
-close F;
-
-unless($best_weights){ $best_weights = $weights; }
-unless($projected_score){ $projected_score = 0.0; }
-$seen_weights{$weights} = 1;
-
-my $random_seed = int(time / 1000);
-my $lastWeightsFile;
-my $lastPScore = 0;
-# main optimization loop
-while (1){
-	print STDERR "\n\nITERATION $iteration\n==========\n";
-
-	if ($iteration > $max_iterations){
-		print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n";
-		last;
-	}
-	# iteration-specific files
-	my $runFile="$dir/run.raw.$iteration";
-	my $onebestFile="$dir/1best.$iteration";
-	my $logdir="$dir/logs.$iteration";
-	my $decoderLog="$logdir/decoder.sentserver.log.$iteration";
-	my $scorerLog="$logdir/scorer.log.$iteration";
-	check_call("mkdir -p $logdir");
-
-
-	#decode
-	print STDERR "RUNNING DECODER AT ";
-	print STDERR unchecked_output("date");
-	my $im1 = $iteration - 1;
-	my $weightsFile="$dir/weights.$im1";
-	my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs";
-	my $pcmd;
-	if ($use_make) {
-		$pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --";
-	} else {
-		$pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --";
-	}
-	my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile";
-	print STDERR "COMMAND:\n$cmd\n";
-	check_bash_call($cmd);
-        my $num_hgs;
-        my $num_topbest;
-        my $retries = 0;
-	while($retries < 5) {
-	    $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l");
-	    $num_topbest = check_output("wc -l < $runFile");
-	    print STDERR "NUMBER OF HGs: $num_hgs\n";
-	    print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n";
-	    if($devSize == $num_hgs && $devSize == $num_topbest) {
-		last;
-	    } else {
-		print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n";
-		sleep(3);
-	    }
-	    $retries++;
-	}
-	die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest);
-	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric");
-	chomp $dec_score;
-	print STDERR "DECODER SCORE: $dec_score\n";
-
-	# save space
-	check_call("gzip -f $runFile");
-	check_call("gzip -f $decoderLog");
-
-	# run optimizer
-	print STDERR "RUNNING OPTIMIZER AT ";
-	print STDERR unchecked_output("date");
-	my $mergeLog="$logdir/prune-merge.log.$iteration";
-
-	my $score = 0;
-	my $icc = 0;
-	my $inweights="$dir/weights.$im1";
-	for (my $opt_iter=1; $opt_iter<$optimization_iters; $opt_iter++) {
-		print STDERR "\nGENERATE OPTIMIZATION STRATEGY (OPT-ITERATION $opt_iter/$optimization_iters)\n";
-		print STDERR unchecked_output("date");
-		$icc++;
-		$cmd="$MAPINPUT -w $inweights -r $dir/hgs -s $devSize -d $rand_directions > $dir/agenda.$im1-$opt_iter";
-		print STDERR "COMMAND:\n$cmd\n";
-		check_call($cmd);
-		check_call("mkdir -p $dir/splag.$im1");
-		$cmd="split -a 3 -l $lines_per_mapper $dir/agenda.$im1-$opt_iter $dir/splag.$im1/mapinput.";
-		print STDERR "COMMAND:\n$cmd\n";
-		check_call($cmd);
-		opendir(DIR, "$dir/splag.$im1") or die "Can't open directory: $!";
-		my @shards = grep { /^mapinput\./ } readdir(DIR);
-		closedir DIR;
-		die "No shards!" unless scalar @shards > 0;
-		my $joblist = "";
-		my $nmappers = 0;
-		my @mapoutputs = ();
-		@cleanupcmds = ();
-		my %o2i = ();
-		my $first_shard = 1;
-		my $mkfile; # only used with makefiles
-		my $mkfilename;
-		if ($use_make) {
-			$mkfilename = "$dir/splag.$im1/domap.mk";
-			open $mkfile, ">$mkfilename" or die "Couldn't write $mkfilename: $!";
-			print $mkfile "all: $dir/splag.$im1/map.done\n\n";
-		}
-		my @mkouts = ();  # only used with makefiles
-		for my $shard (@shards) {
-			my $mapoutput = $shard;
-			my $client_name = $shard;
-			$client_name =~ s/mapinput.//;
-			$client_name = "vest.$client_name";
-			$mapoutput =~ s/mapinput/mapoutput/;
-			push @mapoutputs, "$dir/splag.$im1/$mapoutput";
-			$o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard";
-			my $script = "$MAPPER -s $srcFile -m $metric $refs_comma_sep < $dir/splag.$im1/$shard | sort -t \$'\\t' -k 1 > $dir/splag.$im1/$mapoutput";
-			if ($use_make) {
-				my $script_file = "$dir/scripts/map.$shard";
-				open F, ">$script_file" or die "Can't write $script_file: $!";
-				print F "#!/bin/bash\n";
-				print F "$script\n";
-				close F;
-				my $output = "$dir/splag.$im1/$mapoutput";
-				push @mkouts, $output;
-				chmod(0755, $script_file) or die "Can't chmod $script_file: $!";
-				if ($first_shard) { print STDERR "$script\n"; $first_shard=0; }
-				print $mkfile "$output: $dir/splag.$im1/$shard\n\t$script_file\n\n";
-			} else {
-				my $script_file = "$dir/scripts/map.$shard";
-				open F, ">$script_file" or die "Can't write $script_file: $!";
-				print F "$script\n";
-				close F;
-				if ($first_shard) { print STDERR "$script\n"; $first_shard=0; }
-
-				$nmappers++;
-				my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file";
-				my $jobid = check_output("$qcmd");
-				chomp $jobid;
-				$jobid =~ s/^(\d+)(.*?)$/\1/g;
-				$jobid =~ s/^Your job (\d+) .*$/\1/;
-		 	 	push(@cleanupcmds, "qdel $jobid 2> /dev/null");
-				print STDERR " $jobid";
-				if ($joblist == "") { $joblist = $jobid; }
-				else {$joblist = $joblist . "\|" . $jobid; }
-			}
-		}
-		if ($use_make) {
-			print $mkfile "$dir/splag.$im1/map.done: @mkouts\n\ttouch $dir/splag.$im1/map.done\n\n";
-			close $mkfile;
-			my $mcmd = "make -j $jobs -f $mkfilename";
-			print STDERR "\nExecuting: $mcmd\n";
-			check_call($mcmd);
-		} else {
-			print STDERR "\nLaunched $nmappers mappers.\n";
-      			sleep 8;
-			print STDERR "Waiting for mappers to complete...\n";
-			while ($nmappers > 0) {
-			  sleep 5;
-			  my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat | grep -v ' C '")));
-			  $nmappers = scalar @livejobs;
-			}
-			print STDERR "All mappers complete.\n";
-		}
-		my $tol = 0;
-		my $til = 0;
-		for my $mo (@mapoutputs) {
-		  my $olines = get_lines($mo);
-		  my $ilines = get_lines($o2i{$mo});
-		  $tol += $olines;
-		  $til += $ilines;
-		  die "$mo: output lines ($olines) doesn't match input lines ($ilines)" unless $olines==$ilines;
-		}
-		print STDERR "Results for $tol/$til lines\n";
-		print STDERR "\nSORTING AND RUNNING VEST REDUCER\n";
-		print STDERR unchecked_output("date");
-		$cmd="sort -t \$'\\t' -k 1 @mapoutputs | $REDUCER -m $metric > $dir/redoutput.$im1";
-		print STDERR "COMMAND:\n$cmd\n";
-		check_bash_call($cmd);
-		$cmd="sort -nk3 $DIR_FLAG '-t|' $dir/redoutput.$im1 | head -1";
-		# sort returns failure even when it doesn't fail for some reason
-		my $best=unchecked_output("$cmd"); chomp $best;
-		print STDERR "$best\n";
-		my ($oa, $x, $xscore) = split /\|/, $best;
-		$score = $xscore;
-		print STDERR "PROJECTED SCORE: $score\n";
-		if (abs($x) < $epsilon) {
-			print STDERR "\nOPTIMIZER: no score improvement: abs($x) < $epsilon\n";
-			last;
-		}
-                my $psd = $score - $last_score;
-                $last_score = $score;
-		if (abs($psd) < $epsilon) {
-			print STDERR "\nOPTIMIZER: no score improvement: abs($psd) < $epsilon\n";
-			last;
-		}
-		my ($origin, $axis) = split /\s+/, $oa;
-
-		my %ori = convert($origin);
-		my %axi = convert($axis);
-
-		my $finalFile="$dir/weights.$im1-$opt_iter";
-		open W, ">$finalFile" or die "Can't write: $finalFile: $!";
-                my $norm = 0;
-		for my $k (sort keys %ori) {
-			my $dd = $ori{$k} + $axi{$k} * $x;
-                        $norm += $dd * $dd;
-		}
-                $norm = sqrt($norm);
-		$norm = 1;
-		for my $k (sort keys %ori) {
-			my $v = ($ori{$k} + $axi{$k} * $x) / $norm;
-			print W "$k $v\n";
-		}
-		check_call("rm $dir/splag.$im1/*");
-		$inweights = $finalFile;
-	}
-	$lastWeightsFile = "$dir/weights.$iteration";
-	check_call("cp $inweights $lastWeightsFile");
-	if ($icc < 2) {
-		print STDERR "\nREACHED STOPPING CRITERION: score change too little\n";
-		last;
-	}
-	$lastPScore = $score;
-	$iteration++;
-	print STDERR "\n==========\n";
-}
-
-print STDERR "\nFINAL WEIGHTS: $lastWeightsFile\n(Use -w <this file> with the decoder)\n\n";
-
-print STDOUT "$lastWeightsFile\n";
-
-exit 0;
-
-sub normalize_weights {
-  my ($rfn, $rpts, $feat) = @_;
-  my @feat_names = @$rfn;
-  my @pts = @$rpts;
-  my $z = 1.0;
-  for (my $i=0; $i < scalar @feat_names; $i++) {
-    if ($feat_names[$i] eq $feat) {
-      $z = $pts[$i];
-      last;
-    }
-  }
-  for (my $i=0; $i < scalar @feat_names; $i++) {
-    $pts[$i] /= $z;
-  }
-  print STDERR " NORM WEIGHTS: @pts\n";
-  return @pts;
-}
-
-sub get_lines {
-  my $fn = shift @_;
-  open FL, "<$fn" or die "Couldn't read $fn: $!";
-  my $lc = 0;
-  while(<FL>) { $lc++; }
-  return $lc;
-}
-
-sub get_comma_sep_refs {
-  my ($r,$p) = @_;
-  my $o = check_output("echo $p");
-  chomp $o;
-  my @files = split /\s+/, $o;
-  return "-$r " . join(" -$r ", @files);
-}
-
-sub read_weights_file {
-  my ($file) = @_;
-  open F, "<$file" or die "Couldn't read $file: $!";
-  my @r = ();
-  my $pm = -1;
-  while(<F>) {
-    next if /^#/;
-    next if /^\s*$/;
-    chomp;
-    if (/^(.+)\s+(.+)$/) {
-      my $m = $1;
-      my $w = $2;
-      die "Weights out of order: $m <= $pm" unless $m > $pm;
-      push @r, $w;
-    } else {
-      warn "Unexpected feature name in weight file: $_";
-    }
-  }
-  close F;
-  return join ' ', @r;
-}
-
-# subs
-sub write_config {
-	my $fh = shift;
-	my $cleanup = "yes";
-	if ($disable_clean) {$cleanup = "no";}
-
-	print $fh "\n";
-	print $fh "DECODER:          $decoder\n";
-	print $fh "INI FILE:         $iniFile\n";
-	print $fh "WORKING DIR:      $dir\n";
-	print $fh "SOURCE (DEV):     $srcFile\n";
-	print $fh "REFS (DEV):       $refFiles\n";
-	print $fh "EVAL METRIC:      $metric\n";
-	print $fh "START ITERATION:  $iteration\n";
-	print $fh "MAX ITERATIONS:   $max_iterations\n";
-	print $fh "PARALLEL JOBS:    $jobs\n";
-	print $fh "HEAD NODE:        $host\n";
-	print $fh "PMEM (DECODING):  $pmem\n";
-	print $fh "CLEANUP:          $cleanup\n";
-	print $fh "INITIAL WEIGHTS:  $initialWeights\n";
-}
-
-sub update_weights_file {
-  my ($neww, $rfn, $rpts) = @_;
-  my @feats = @$rfn;
-  my @pts = @$rpts;
-  my $num_feats = scalar @feats;
-  my $num_pts = scalar @pts;
-  die "$num_feats (num_feats) != $num_pts (num_pts)" unless $num_feats == $num_pts;
-  open G, ">$neww" or die;
-  for (my $i = 0; $i < $num_feats; $i++) {
-    my $f = $feats[$i];
-    my $lambda = $pts[$i];
-    print G "$f $lambda\n";
-  }
-  close G;
-}
-
-sub enseg {
-	my $src = shift;
-	my $newsrc = shift;
-	open(SRC, $src);
-	open(NEWSRC, ">$newsrc");
-	my $i=0;
-	while (my $line=<SRC>){
-		chomp $line;
-		if ($line =~ /^\s*<seg/i) {
-		    if($line =~ /id="[0-9]+"/) {
-			print NEWSRC "$line\n";
-		    } else {
-			die "When using segments with pre-generated <seg> tags, you must include a zero-based id attribute";
-		    }
-		} else {
-			print NEWSRC "<seg id=\"$i\">$line</seg>\n";
-		}
-		$i++;
-	}
-	close SRC;
-	close NEWSRC;
-}
-
-sub print_help {
-
-	my $executable = check_output("basename $0"); chomp $executable;
-    print << "Help";
-
-Usage: $executable [options] <ini file>
-
-	$executable [options] <ini file>
-		Runs a complete MERT optimization using the decoder configuration
-                in <ini file>. Required options are --weights, --source-file, and
-		--ref-files.
-
-Options:
-
-	--help
-		Print this message and exit.
-
-	--max-iterations <M>
-		Maximum number of iterations to run.  If not specified, defaults
-		to 10.
-
-	--pass-suffix <S>
-		If the decoder is doing multi-pass decoding, the pass suffix "2",
-		"3", etc., is used to control what iteration of weights is set.
-
-	--ref-files <files>
-		Dev set ref files.  This option takes only a single string argument.
-		To use multiple files (including file globbing), this argument should
-		be quoted.
-
-	--metric <method>
-		Metric to optimize.
-		Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi
-
-	--normalize <feature-name>
-		After each iteration, rescale all feature weights such that feature-
-		name has a weight of 1.0.
-
-	--rand-directions <num>
-		MERT will attempt to optimize along all of the principle directions,
-		set this parameter to explore other directions. Defaults to 5.
-
-	--source-file <file>
-		Dev set source file.
-
-	--weights <file>
-		A file specifying initial feature weights.  The format is
-		FeatureName_1 value1
-		FeatureName_2 value2
-		**All and only the weights listed in <file> will be optimized!**
-
-	--workdir <dir>
-		Directory for intermediate and output files.  If not specified, the
-		name is derived from the ini filename.  Assuming that the ini
-		filename begins with the decoder name and ends with ini, the default
-		name of the working directory is inferred from the middle part of
-		the filename.  E.g. an ini file named decoder.foo.ini would have
-		a default working directory name foo.
-
-Job control options:
-
-	--jobs <I>
-		Number of decoder processes to run in parallel. [default=$default_jobs]
-
-	--qsub
-		Use qsub to run jobs in parallel (qsub must be configured in
-		environment/LocalEnvironment.pm)
-
-	--pmem <N>
-		Amount of physical memory requested for parallel decoding jobs
-		(used with qsub requests only)
-
-Help
-}
-
-sub convert {
-  my ($str) = @_;
-  my @ps = split /;/, $str;
-  my %dict = ();
-  for my $p (@ps) {
-    my ($k, $v) = split /=/, $p;
-    $dict{$k} = $v;
-  }
-  return %dict;
-}
-
-
-
-sub cmdline {
-    return join ' ',($0,@ORIG_ARGV);
-}
-
-#buggy: last arg gets quoted sometimes?
-my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]};
-my $shell_escape_in_quote=qr{[\\"\$`!]};
-
-sub escape_shell {
-    my ($arg)=@_;
-    return undef unless defined $arg;
-    if ($arg =~ /$is_shell_special/) {
-        $arg =~ s/($shell_escape_in_quote)/\\$1/g;
-        return "\"$arg\"";
-    }
-    return $arg;
-}
-
-sub escaped_shell_args {
-    return map {local $_=$_;chomp;escape_shell($_)} @_;
-}
-
-sub escaped_shell_args_str {
-    return join ' ',&escaped_shell_args(@_);
-}
-
-sub escaped_cmdline {
-    return "$0 ".&escaped_shell_args_str(@ORIG_ARGV);
-}
diff --git a/vest/error_surface.cc b/vest/error_surface.cc
deleted file mode 100644
index 515b67f8..00000000
--- a/vest/error_surface.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-#include "error_surface.h"
-
-#include <cassert>
-#include <sstream>
-
-using namespace std;
-
-ErrorSurface::~ErrorSurface() {}
-
-void ErrorSurface::Serialize(std::string* out) const {
-  const int segments = this->size();
-  ostringstream os(ios::binary);
-  os.write((const char*)&segments,sizeof(segments));
-  for (int i = 0; i < segments; ++i) {
-    const ErrorSegment& cur = (*this)[i];
-    string senc;
-    cur.delta.Encode(&senc);
-    assert(senc.size() < 1024);
-    unsigned char len = senc.size();
-    os.write((const char*)&cur.x, sizeof(cur.x));
-    os.write((const char*)&len, sizeof(len));
-    os.write((const char*)&senc[0], len);
-  }
-  *out = os.str();
-}
-
-void ErrorSurface::Deserialize(const std::string& in) {
-  istringstream is(in, ios::binary);
-  int segments;
-  is.read((char*)&segments, sizeof(segments));
-  this->resize(segments);
-  for (int i = 0; i < segments; ++i) {
-    ErrorSegment& cur = (*this)[i];
-    unsigned char len;
-    is.read((char*)&cur.x, sizeof(cur.x));
-    is.read((char*)&len, sizeof(len));
-    string senc(len, '\0'); assert(senc.size() == len);
-    is.read((char*)&senc[0], len);
-    cur.delta = SufficientStats(senc);
-  }
-}
-
diff --git a/vest/error_surface.h b/vest/error_surface.h
deleted file mode 100644
index bb65847b..00000000
--- a/vest/error_surface.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef _ERROR_SURFACE_H_
-#define _ERROR_SURFACE_H_
-
-#include <vector>
-#include <string>
-
-#include "ns.h"
-
-class Score;
-
-struct ErrorSegment {
-  double x;
-  SufficientStats delta;
-  ErrorSegment() : x(0), delta() {}
-};
-
-class ErrorSurface : public std::vector<ErrorSegment> {
- public:
-  ~ErrorSurface();
-  void Serialize(std::string* out) const;
-  void Deserialize(const std::string& in);
-};
-
-#endif
diff --git a/vest/libcall.pl b/vest/libcall.pl
deleted file mode 100644
index c7d0f128..00000000
--- a/vest/libcall.pl
+++ /dev/null
@@ -1,71 +0,0 @@
-use IPC::Open3;
-use Symbol qw(gensym);
-
-$DUMMY_STDERR = gensym();
-$DUMMY_STDIN = gensym();
-
-# Run the command and ignore failures
-sub unchecked_call {
-    system("@_")
-}
-
-# Run the command and return its output, if any ignoring failures
-sub unchecked_output {
-    return `@_`
-}
-
-# WARNING: Do not use this for commands that will return large amounts
-# of stdout or stderr -- they might block indefinitely
-sub check_output {
-    print STDERR "Executing and gathering output: @_\n";
-
-    my $pid = open3($DUMMY_STDIN, \*PH, $DUMMY_STDERR, @_);
-    my $proc_output = "";
-    while( <PH> ) {
-	$proc_output .= $_;
-    }
-    waitpid($pid, 0);
-    # TODO: Grab signal that the process died from
-    my $child_exit_status = $? >> 8;
-    if($child_exit_status == 0) {
-	return $proc_output;
-    } else {
-	print STDERR "ERROR: Execution of @_ failed.\n";
-	exit(1);
-    }
-}
-
-# Based on Moses' safesystem sub
-sub check_call {
-    print STDERR "Executing: @_\n";
-    system(@_);
-    my $exitcode = $? >> 8;
-    if($exitcode == 0) {
-	return 0;
-    } elsif ($? == -1) {
-	print STDERR "ERROR: Failed to execute: @_\n  $!\n";
-	exit(1);
-
-    } elsif ($? & 127) {
-      printf STDERR "ERROR: Execution of: @_\n  died with signal %d, %s coredump\n",
-      ($? & 127),  ($? & 128) ? 'with' : 'without';
-      exit(1);
-
-    } else {
-	print STDERR "Failed with exit code: $exitcode\n" if $exitcode;
-	exit($exitcode);
-    }
-}
-
-sub check_bash_call {
-    my @args = ( "bash", "-auxeo", "pipefail", "-c", "@_");
-    check_call(@args);
-}
-
-sub check_bash_output {
-    my @args = ( "bash", "-auxeo", "pipefail", "-c", "@_");
-    return check_output(@args);
-}
-
-# perl module weirdness...
-return 1;
diff --git a/vest/line_mediator.pl b/vest/line_mediator.pl
deleted file mode 100755
index bc2bb24c..00000000
--- a/vest/line_mediator.pl
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/usr/bin/perl -w
-#hooks up two processes, 2nd of which has one line of output per line of input, expected by the first, which starts off the communication
-
-# if you don't know how to fork/exec in a C program, this could be helpful under limited cirmustances (would be ok to liaise with sentserver)
-
-#WARNING: because it waits for the result from command 2 after sending every line, and especially if command 1 does the same, using sentserver as command 2 won't actually buy you any real parallelism.
-
-use strict;
-use IPC::Open2;
-use POSIX qw(pipe dup2 STDIN_FILENO STDOUT_FILENO);
-
-my $quiet=!$ENV{DEBUG};
-$quiet=1 if $ENV{QUIET};
-sub info {
-    local $,=' ';
-    print STDERR @_ unless $quiet;
-}
-
-my $mode='CROSS';
-my $ser='DIRECT';
-$mode='PIPE' if $ENV{PIPE};
-$mode='SNAKE' if $ENV{SNAKE};
-$mode='CROSS' if $ENV{CROSS};
-$ser='SERIAL' if $ENV{SERIAL};
-$ser='DIRECT' if $ENV{DIRECT};
-$ser='SERIAL' if $mode eq 'SNAKE';
-info("mode: $mode\n");
-info("connection: $ser\n");
-
-
-my @c1;
-if (scalar @ARGV) {
-    do {
-        push @c1,shift
-    } while scalar @ARGV && $c1[$#c1] ne '--';
-}
-pop @c1;
-my @c2=@ARGV;
-@ARGV=();
-(scalar @c1 && scalar @c2) || die qq{
-usage: $0 cmd1 args -- cmd2 args
-all options are environment variables.
-DEBUG=1 env var enables debugging output.
-CROSS=1 hooks up two processes, 2nd of which has one line of output per line of input, expected by the first, which starts off the communication.  crosses stdin/stderr of cmd1 and cmd2 line by line (both must flush on newline and output.  cmd1 initiates the conversation (sends the first line).    default: attempts to cross stdin/stdout of c1 and c2 directly (via two unidirectional posix pipes created before fork).
-SERIAL=1: (no parallelism possible) but lines exchanged are logged if DEBUG.
-if SNAKE then stdin -> c1 -> c2 -> c1 -> stdout.
-if PIPE then stdin -> c1 -> c2 -> stdout (same as shell c1|c2, but with SERIAL you can see the intermediate in real time; you could do similar with c1 | tee /dev/fd/2 |c2.
-DIRECT=1 (default) will override SERIAL=1.
-CROSS=1 (default) will override SNAKE or PIPE.
-};
-
-info("1 cmd:",@c1,"\n");
-info("2 cmd:",@c2,"\n");
-
-sub lineto {
-    select $_[0];
-    $|=1;
-    shift;
-    print @_;
-}
-
-if ($ser eq 'SERIAL') {
-    my ($R1,$W1,$R2,$W2);
-    my $c1p=open2($R1,$W1,@c1); # Open2 R W backward from Open3.
-    my $c2p=open2($R2,$W2,@c2);
-    if ($mode eq 'CROSS') {
-        while(<$R1>) {
-            info("1:",$_);
-            lineto($W2,$_);
-            last unless defined ($_=<$R2>);
-            info("1|2:",$_);
-            lineto($W1,$_);
-        }
-    } else {
-        my $snake=$mode eq 'SNAKE';
-        while(<STDIN>) {
-            info("IN:",$_);
-            lineto($W1,$_);
-            last unless defined ($_=<$R1>);
-            info("IN|1:",$_);
-            lineto($W2,$_);
-            last unless defined ($_=<$R2>);
-            info("IN|1|2:",$_);
-            if ($snake) {
-                lineto($W1,$_);
-                last unless defined ($_=<$R1>);
-                info("IN|1|2|1:",$_);
-            }
-            lineto(*STDOUT,$_);
-        }
-    }
-} else {
-    info("DIRECT mode\n");
-    my @rw1=POSIX::pipe();
-    my @rw2=POSIX::pipe();
-    my $pid=undef;
-    $SIG{CHLD} = sub { wait };
-    while (not defined ($pid=fork())) {
-        sleep 1;
-    }
-    my $pipe = $mode eq 'PIPE';
-    unless ($pipe) {
-        POSIX::close(STDOUT_FILENO);
-        POSIX::close(STDIN_FILENO);
-    }
-    if ($pid) {
-        POSIX::dup2($rw1[1],STDOUT_FILENO);
-        POSIX::dup2($rw2[0],STDIN_FILENO) unless $pipe;
-        exec @c1;
-    } else {
-        POSIX::dup2($rw2[1],STDOUT_FILENO) unless $pipe;
-        POSIX::dup2($rw1[0],STDIN_FILENO);
-        exec @c2;
-    }
-    while (wait()!=-1) {}
-}
diff --git a/vest/line_optimizer.cc b/vest/line_optimizer.cc
deleted file mode 100644
index 49443fbe..00000000
--- a/vest/line_optimizer.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-#include "line_optimizer.h"
-
-#include <limits>
-#include <algorithm>
-
-#include "sparse_vector.h"
-#include "ns.h"
-
-using namespace std;
-
-typedef ErrorSurface::const_iterator ErrorIter;
-
-// sort by increasing x-ints
-struct IntervalComp {
-  bool operator() (const ErrorIter& a, const ErrorIter& b) const {
-    return a->x < b->x;
-  }
-};
-
-double LineOptimizer::LineOptimize(
-    const EvaluationMetric* metric,
-    const vector<ErrorSurface>& surfaces,
-    const LineOptimizer::ScoreType type,
-    float* best_score,
-    const double epsilon) {
-  // cerr << "MIN=" << MINIMIZE_SCORE << " MAX=" << MAXIMIZE_SCORE << "  MINE=" << type << endl;
-  vector<ErrorIter> all_ints;
-  for (vector<ErrorSurface>::const_iterator i = surfaces.begin();
-       i != surfaces.end(); ++i) {
-    const ErrorSurface& surface = *i;
-    for (ErrorIter j = surface.begin(); j != surface.end(); ++j)
-      all_ints.push_back(j);
-  }
-  sort(all_ints.begin(), all_ints.end(), IntervalComp());
-  double last_boundary = all_ints.front()->x;
-  SufficientStats acc;
-  float& cur_best_score = *best_score;
-  cur_best_score = (type == MAXIMIZE_SCORE ?
-    -numeric_limits<float>::max() : numeric_limits<float>::max());
-  bool left_edge = true;
-  double pos = numeric_limits<double>::quiet_NaN();
-  for (vector<ErrorIter>::iterator i = all_ints.begin();
-       i != all_ints.end(); ++i) {
-    const ErrorSegment& seg = **i;
-    if (seg.x - last_boundary > epsilon) {
-      float sco = metric->ComputeScore(acc);
-      if ((type == MAXIMIZE_SCORE && sco > cur_best_score) ||
-          (type == MINIMIZE_SCORE && sco < cur_best_score) ) {
-        cur_best_score = sco;
-	if (left_edge) {
-	  pos = seg.x - 0.1;
-	  left_edge = false;
-	} else {
-	  pos = last_boundary + (seg.x - last_boundary) / 2;
-	}
-	//cerr << "NEW BEST: " << pos << "  (score=" << cur_best_score << ")\n";
-      }
-      // string xx = metric->DetailedScore(acc); cerr << "---- " << xx;
-      // cerr << "---- s=" << sco << "\n";
-      last_boundary = seg.x;
-    }
-    // cerr << "x-boundary=" << seg.x << "\n";
-    //string x2; acc.Encode(&x2); cerr << "   ACC: " << x2 << endl;
-    //string x1; seg.delta.Encode(&x1); cerr << " DELTA: " << x1 << endl;
-    acc += seg.delta;
-  }
-  float sco = metric->ComputeScore(acc);
-  if ((type == MAXIMIZE_SCORE && sco > cur_best_score) ||
-      (type == MINIMIZE_SCORE && sco < cur_best_score) ) {
-    cur_best_score = sco;
-    if (left_edge) {
-      pos = 0;
-    } else {
-      pos = last_boundary + 1000.0;
-    }
-  }
-  return pos;
-}
-
-void LineOptimizer::RandomUnitVector(const vector<int>& features_to_optimize,
-                                     SparseVector<double>* axis,
-                                     RandomNumberGenerator<boost::mt19937>* rng) {
-  axis->clear();
-  for (int i = 0; i < features_to_optimize.size(); ++i)
-    axis->set_value(features_to_optimize[i], rng->NextNormal(0.0,1.0));
-  (*axis) /= axis->l2norm();
-}
-
-void LineOptimizer::CreateOptimizationDirections(
-     const vector<int>& features_to_optimize,
-     int additional_random_directions,
-     RandomNumberGenerator<boost::mt19937>* rng,
-     vector<SparseVector<double> >* dirs
-     , bool include_orthogonal
-  ) {
-  dirs->clear();
-  typedef SparseVector<double> Dir;
-  vector<Dir> &out=*dirs;
-  int i=0;
-  if (include_orthogonal)
-    for (;i<features_to_optimize.size();++i) {
-      Dir d;
-      d.set_value(features_to_optimize[i],1.);
-      out.push_back(d);
-    }
-  out.resize(i+additional_random_directions);
-  for (;i<out.size();++i)
-     RandomUnitVector(features_to_optimize, &out[i], rng);
-  cerr << "Generated " << out.size() << " total axes to optimize along.\n";
-}
-
diff --git a/vest/line_optimizer.h b/vest/line_optimizer.h
deleted file mode 100644
index 83819f41..00000000
--- a/vest/line_optimizer.h
+++ /dev/null
@@ -1,48 +0,0 @@
-#ifndef LINE_OPTIMIZER_H_
-#define LINE_OPTIMIZER_H_
-
-#include <vector>
-
-#include "sparse_vector.h"
-#include "error_surface.h"
-#include "sampler.h"
-
-class EvaluationMetric;
-class Weights;
-
-struct LineOptimizer {
-
-  // use MINIMIZE_SCORE for things like TER, WER
-  // MAXIMIZE_SCORE for things like BLEU
-  enum ScoreType { MAXIMIZE_SCORE, MINIMIZE_SCORE };
-
-  // merge all the error surfaces together into a global
-  // error surface and find (the middle of) the best segment
-  static double LineOptimize(
-     const EvaluationMetric* metric,
-     const std::vector<ErrorSurface>& envs,
-     const LineOptimizer::ScoreType type,
-     float* best_score,
-     const double epsilon = 1.0/65536.0);
-
-  // return a random vector of length 1 where all dimensions
-  // not listed in dimensions will be 0.
-  static void RandomUnitVector(const std::vector<int>& dimensions,
-                               SparseVector<double>* axis,
-                               RandomNumberGenerator<boost::mt19937>* rng);
-
-  // generate a list of directions to optimize; the list will
-  // contain the orthogonal vectors corresponding to the dimensions in
-  // primary and then additional_random_directions directions in those
-  // dimensions as well.  All vectors will be length 1.
-  static void CreateOptimizationDirections(
-     const std::vector<int>& primary,
-     int additional_random_directions,
-     RandomNumberGenerator<boost::mt19937>* rng,
-     std::vector<SparseVector<double> >* dirs
-     , bool include_primary=true
-    );
-
-};
-
-#endif
diff --git a/vest/lo_test.cc b/vest/lo_test.cc
deleted file mode 100644
index a67f65e1..00000000
--- a/vest/lo_test.cc
+++ /dev/null
@@ -1,236 +0,0 @@
-#include <cmath>
-#include <iostream>
-#include <fstream>
-
-#include <boost/shared_ptr.hpp>
-#include <gtest/gtest.h>
-
-#include "ns.h"
-#include "ns_docscorer.h"
-#include "ces.h"
-#include "fdict.h"
-#include "hg.h"
-#include "kbest.h"
-#include "hg_io.h"
-#include "filelib.h"
-#include "inside_outside.h"
-#include "viterbi.h"
-#include "viterbi_envelope.h"
-#include "line_optimizer.h"
-
-using namespace std;
-using boost::shared_ptr;
-
-class OptTest : public testing::Test {
- protected:
-   virtual void SetUp() { }
-   virtual void TearDown() { }
-};
-
-const char* ref11 = "australia reopens embassy in manila";
-const char* ref12 = "( afp , manila , january 2 ) australia reopened its embassy in the philippines today , which was shut down about seven weeks ago due to what was described as a specific threat of a terrorist attack .";
-const char* ref21 = "australia reopened manila embassy";
-const char* ref22 = "( agence france-presse , manila , 2nd ) - australia reopened its embassy in the philippines today . the embassy was closed seven weeks ago after what was described as a specific threat of a terrorist attack .";
-const char* ref31 = "australia to reopen embassy in manila";
-const char* ref32 = "( afp report from manila , january 2 ) australia reopened its embassy in the philippines today . seven weeks ago , the embassy was shut down due to so - called confirmed terrorist attack threats .";
-const char* ref41 = "australia to re - open its embassy to manila";
-const char* ref42 = "( afp , manila , thursday ) australia reopens its embassy to manila , which was closed for the so - called \" clear \" threat of terrorist attack 7 weeks ago .";
-
-TEST_F(OptTest, TestCheckNaN) {
-  double x = 0;
-  double y = 0;
-  double z = x / y;
-  EXPECT_EQ(true, isnan(z));
-}
-
-TEST_F(OptTest,TestViterbiEnvelope) {
-  shared_ptr<Segment> a1(new Segment(-1, 0));
-  shared_ptr<Segment> b1(new Segment(1, 0));
-  shared_ptr<Segment> a2(new Segment(-1, 1));
-  shared_ptr<Segment> b2(new Segment(1, -1));
-  vector<shared_ptr<Segment> > sa; sa.push_back(a1); sa.push_back(b1);
-  vector<shared_ptr<Segment> > sb; sb.push_back(a2); sb.push_back(b2);
-  ViterbiEnvelope a(sa);
-  cerr << a << endl;
-  ViterbiEnvelope b(sb);
-  ViterbiEnvelope c = a;
-  c *= b;
-  cerr << a << " (*) " << b << " = " << c << endl;
-  EXPECT_EQ(3, c.size());
-}
-
-TEST_F(OptTest,TestViterbiEnvelopeInside) {
-  const string json = "{\"rules\":[1,\"[X] ||| a\",2,\"[X] ||| A [1]\",3,\"[X] ||| c\",4,\"[X] ||| C [1]\",5,\"[X] ||| [1] B [2]\",6,\"[X] ||| [1] b [2]\",7,\"[X] ||| X [1]\",8,\"[X] ||| Z [1]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.8,1,-0.1],\"rule\":2}],\"node\":{\"in_edges\":[1]},\"edges\":[{\"tail\":[],\"feats\":[1,-1],\"rule\":3}],\"node\":{\"in_edges\":[2]},\"edges\":[{\"tail\":[2],\"feats\":[0,-0.2,1,-0.1],\"rule\":4}],\"node\":{\"in_edges\":[3]},\"edges\":[{\"tail\":[1,3],\"feats\":[0,-1.2,1,-0.2],\"rule\":5},{\"tail\":[1,3],\"feats\":[0,-0.5,1,-1.3],\"rule\":6}],\"node\":{\"in_edges\":[4,5]},\"edges\":[{\"tail\":[4],\"feats\":[0,-0.5,1,-0.8],\"rule\":7},{\"tail\":[4],\"feats\":[0,-0.7,1,-0.9],\"rule\":8}],\"node\":{\"in_edges\":[6,7]}}";
-  Hypergraph hg;
-  istringstream instr(json);
-  HypergraphIO::ReadFromJSON(&instr, &hg);
-  SparseVector<double> wts;
-  wts.set_value(FD::Convert("f1"), 0.4);
-  wts.set_value(FD::Convert("f2"), 1.0);
-  hg.Reweight(wts);
-  vector<pair<vector<WordID>, prob_t> > list;
-  std::vector<SparseVector<double> > features;
-  KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(hg, 10);
-  for (int i = 0; i < 10; ++i) {
-    const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
-      kbest.LazyKthBest(hg.nodes_.size() - 1, i);
-    if (!d) break;
-    cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl;
-  }
-  SparseVector<double> dir; dir.set_value(FD::Convert("f1"), 1.0);
-  ViterbiEnvelopeWeightFunction wf(wts, dir);
-  ViterbiEnvelope env = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
-  cerr << env << endl;
-  const vector<boost::shared_ptr<Segment> >& segs = env.GetSortedSegs();
-  dir *= segs[1]->x;
-  wts += dir;
-  hg.Reweight(wts);
-  KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest2(hg, 10);
-  for (int i = 0; i < 10; ++i) {
-    const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
-      kbest2.LazyKthBest(hg.nodes_.size() - 1, i);
-    if (!d) break;
-    cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl;
-  }
-  for (int i = 0; i < segs.size(); ++i) {
-    cerr << "seg=" << i << endl;
-    vector<WordID> trans;
-    segs[i]->ConstructTranslation(&trans);
-    cerr << TD::GetString(trans) << endl;
-  }
-}
-
-TEST_F(OptTest, TestS1) {
-  int fPhraseModel_0 = FD::Convert("PhraseModel_0");
-  int fPhraseModel_1 = FD::Convert("PhraseModel_1");
-  int fPhraseModel_2 = FD::Convert("PhraseModel_2");
-  int fLanguageModel = FD::Convert("LanguageModel");
-  int fWordPenalty = FD::Convert("WordPenalty");
-  int fPassThrough = FD::Convert("PassThrough");
-  SparseVector<double> wts;
-  wts.set_value(fWordPenalty, 4.25);
-  wts.set_value(fLanguageModel, -1.1165);
-  wts.set_value(fPhraseModel_0, -0.96);
-  wts.set_value(fPhraseModel_1, -0.65);
-  wts.set_value(fPhraseModel_2, -0.77);
-  wts.set_value(fPassThrough, -10.0);
-
-  vector<int> to_optimize;
-  to_optimize.push_back(fWordPenalty);
-  to_optimize.push_back(fLanguageModel);
-  to_optimize.push_back(fPhraseModel_0);
-  to_optimize.push_back(fPhraseModel_1);
-  to_optimize.push_back(fPhraseModel_2);
-
-  Hypergraph hg;
-  ReadFile rf("./test_data/0.json.gz");
-  HypergraphIO::ReadFromJSON(rf.stream(), &hg);
-  hg.Reweight(wts);
-
-  Hypergraph hg2;
-  ReadFile rf2("./test_data/1.json.gz");
-  HypergraphIO::ReadFromJSON(rf2.stream(), &hg2);
-  hg2.Reweight(wts);
-
-  vector<vector<WordID> > refs1(4);
-  TD::ConvertSentence(ref11, &refs1[0]);
-  TD::ConvertSentence(ref21, &refs1[1]);
-  TD::ConvertSentence(ref31, &refs1[2]);
-  TD::ConvertSentence(ref41, &refs1[3]);
-  vector<vector<WordID> > refs2(4);
-  TD::ConvertSentence(ref12, &refs2[0]);
-  TD::ConvertSentence(ref22, &refs2[1]);
-  TD::ConvertSentence(ref32, &refs2[2]);
-  TD::ConvertSentence(ref42, &refs2[3]);
-  vector<ViterbiEnvelope> envs(2);
-
-  RandomNumberGenerator<boost::mt19937> rng;
-
-  vector<SparseVector<double> > axes; // directions to search
-  LineOptimizer::CreateOptimizationDirections(
-     to_optimize,
-     10,
-     &rng,
-     &axes);
-  assert(axes.size() == 10 + to_optimize.size());
-  for (int i = 0; i < axes.size(); ++i)
-    cerr << axes[i] << endl;
-  const SparseVector<double>& axis = axes[0];
-
-  cerr << "Computing Viterbi envelope using inside algorithm...\n";
-  cerr << "axis: " << axis << endl;
-  clock_t t_start=clock();
-  ViterbiEnvelopeWeightFunction wf(wts, axis);  // wts = starting point, axis = search direction
-  envs[0] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
-  envs[1] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg2, NULL, wf);
-
-  vector<ErrorSurface> es(2);
-  EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU");
-  boost::shared_ptr<SegmentEvaluator> scorer1 = metric->CreateSegmentEvaluator(refs1);
-  boost::shared_ptr<SegmentEvaluator> scorer2 = metric->CreateSegmentEvaluator(refs2);
-  ComputeErrorSurface(*scorer1, envs[0], &es[0], metric, hg);
-  ComputeErrorSurface(*scorer2, envs[1], &es[1], metric, hg2);
-  cerr << envs[0].size() << " " << envs[1].size() << endl;
-  cerr << es[0].size() << " " << es[1].size() << endl;
-  envs.clear();
-  clock_t t_env=clock();
-  float score;
-  double m = LineOptimizer::LineOptimize(metric,es, LineOptimizer::MAXIMIZE_SCORE, &score);
-  clock_t t_opt=clock();
-  cerr << "line optimizer returned: " << m << " (SCORE=" << score << ")\n";
-  EXPECT_FLOAT_EQ(0.48719698, score);
-  SparseVector<double> res = axis;
-  res *= m;
-  res += wts;
-  cerr << "res: " << res << endl;
-  cerr << "ENVELOPE PROCESSING=" << (static_cast<double>(t_env - t_start) / 1000.0) << endl;
-  cerr << "  LINE OPTIMIZATION=" << (static_cast<double>(t_opt - t_env) / 1000.0) << endl;
-  hg.Reweight(res);
-  hg2.Reweight(res);
-  vector<WordID> t1,t2;
-  ViterbiESentence(hg, &t1);
-  ViterbiESentence(hg2, &t2);
-  cerr << TD::GetString(t1) << endl;
-  cerr << TD::GetString(t2) << endl;
-}
-
-TEST_F(OptTest,TestZeroOrigin) {
-  const string json = "{\"rules\":[1,\"[X7] ||| blA ||| without ||| LHSProb=3.92173 LexE2F=2.90799 LexF2E=1.85003 GenerativeProb=10.5381 RulePenalty=1 XFE=2.77259 XEF=0.441833 LabelledEF=2.63906 LabelledFE=4.96981 LogRuleCount=0.693147\",2,\"[X7] ||| blA ||| except ||| LHSProb=4.92173 LexE2F=3.90799 LexF2E=1.85003 GenerativeProb=11.5381 RulePenalty=1 XFE=2.77259 XEF=1.44183 LabelledEF=2.63906 LabelledFE=4.96981 LogRuleCount=1.69315\",3,\"[S] ||| [X7,1] ||| [1] ||| GlueTop=1\",4,\"[X28] ||| EnwAn ||| title ||| LHSProb=3.96802 LexE2F=2.22462 LexF2E=1.83258 GenerativeProb=10.0863 RulePenalty=1 XFE=0 XEF=1.20397 LabelledEF=1.20397 LabelledFE=-1.98341e-08 LogRuleCount=1.09861\",5,\"[X0] ||| EnwAn ||| funny ||| LHSProb=3.98479 LexE2F=1.79176 LexF2E=3.21888 GenerativeProb=11.1681 RulePenalty=1 XFE=0 XEF=2.30259 LabelledEF=2.30259 LabelledFE=0 LogRuleCount=0 SingletonRule=1\",6,\"[X8] ||| [X7,1] EnwAn ||| entitled [1] ||| LHSProb=3.82533 LexE2F=3.21888 LexF2E=2.52573 GenerativeProb=11.3276 RulePenalty=1 XFE=1.20397 XEF=1.20397 LabelledEF=2.30259 LabelledFE=2.30259 LogRuleCount=0 SingletonRule=1\",7,\"[S] ||| [S,1] [X28,2] ||| [1] [2] ||| Glue=1\",8,\"[S] ||| [S,1] [X0,2] ||| [1] [2] ||| Glue=1\",9,\"[S] ||| [X8,1] ||| [1] ||| GlueTop=1\",10,\"[Goal] ||| [S,1] ||| [1]\"],\"features\":[\"PassThrough\",\"Glue\",\"GlueTop\",\"LanguageModel\",\"WordPenalty\",\"LHSProb\",\"LexE2F\",\"LexF2E\",\"GenerativeProb\",\"RulePenalty\",\"XFE\",\"XEF\",\"LabelledEF\",\"LabelledFE\",\"LogRuleCount\",\"SingletonRule\"],\"edges\":[{\"tail\":[],\"spans\":[0,1,-1,-1],\"feats\":[5,3.92173,6,2.90799,7,1.85003,8,10.5381,9,1,10,2.77259,11,0.441833,12,2.63906,13,4.96981,14,0.693147],\"rule\":1},{\"tail\":[],\"spans\":[0,1,-1,-1],\"feats\":[5,4.92173,6,3.90799,7,1.85003,8,11.5381,9,1,10,2.77259,11,1.44183,12,2.63906,13,4.96981,14,1.69315],\"rule\":2}],\"node\":{\"in_edges\":[0,1],\"cat\":\"X7\"},\"edges\":[{\"tail\":[0],\"spans\":[0,1,-1,-1],\"feats\":[2,1],\"rule\":3}],\"node\":{\"in_edges\":[2],\"cat\":\"S\"},\"edges\":[{\"tail\":[],\"spans\":[1,2,-1,-1],\"feats\":[5,3.96802,6,2.22462,7,1.83258,8,10.0863,9,1,11,1.20397,12,1.20397,13,-1.98341e-08,14,1.09861],\"rule\":4}],\"node\":{\"in_edges\":[3],\"cat\":\"X28\"},\"edges\":[{\"tail\":[],\"spans\":[1,2,-1,-1],\"feats\":[5,3.98479,6,1.79176,7,3.21888,8,11.1681,9,1,11,2.30259,12,2.30259,15,1],\"rule\":5}],\"node\":{\"in_edges\":[4],\"cat\":\"X0\"},\"edges\":[{\"tail\":[0],\"spans\":[0,2,-1,-1],\"feats\":[5,3.82533,6,3.21888,7,2.52573,8,11.3276,9,1,10,1.20397,11,1.20397,12,2.30259,13,2.30259,15,1],\"rule\":6}],\"node\":{\"in_edges\":[5],\"cat\":\"X8\"},\"edges\":[{\"tail\":[1,2],\"spans\":[0,2,-1,-1],\"feats\":[1,1],\"rule\":7},{\"tail\":[1,3],\"spans\":[0,2,-1,-1],\"feats\":[1,1],\"rule\":8},{\"tail\":[4],\"spans\":[0,2,-1,-1],\"feats\":[2,1],\"rule\":9}],\"node\":{\"in_edges\":[6,7,8],\"cat\":\"S\"},\"edges\":[{\"tail\":[5],\"spans\":[0,2,-1,-1],\"feats\":[],\"rule\":10}],\"node\":{\"in_edges\":[9],\"cat\":\"Goal\"}}";
-  Hypergraph hg;
-  istringstream instr(json);
-  HypergraphIO::ReadFromJSON(&instr, &hg);
-  SparseVector<double> wts;
-  wts.set_value(FD::Convert("PassThrough"), -0.929201533002898);
-  hg.Reweight(wts);
-
-  vector<pair<vector<WordID>, prob_t> > list;
-  std::vector<SparseVector<double> > features;
-  KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(hg, 10);
-  for (int i = 0; i < 10; ++i) {
-    const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
-      kbest.LazyKthBest(hg.nodes_.size() - 1, i);
-    if (!d) break;
-    cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl;
-  }
- 
-  SparseVector<double> axis; axis.set_value(FD::Convert("Glue"),1.0);
-  ViterbiEnvelopeWeightFunction wf(wts, axis);  // wts = starting point, axis = search direction
-  vector<ViterbiEnvelope> envs(1);
-  envs[0] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
-
-  vector<vector<WordID> > mr(4);
-  TD::ConvertSentence("untitled", &mr[0]);
-  TD::ConvertSentence("with no title", &mr[1]);
-  TD::ConvertSentence("without a title", &mr[2]);
-  TD::ConvertSentence("without title", &mr[3]);
-  EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU");
-  boost::shared_ptr<SegmentEvaluator> scorer1 = metric->CreateSegmentEvaluator(mr);
-  vector<ErrorSurface> es(1);
-  ComputeErrorSurface(*scorer1, envs[0], &es[0], metric, hg);
-}
-
-int main(int argc, char **argv) {
-  testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
-
diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc
deleted file mode 100644
index 59d4f24f..00000000
--- a/vest/mr_vest_generate_mapper_input.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-#include <iostream>
-#include <vector>
-
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "filelib.h"
-#include "weights.h"
-#include "line_optimizer.h"
-
-using namespace std;
-namespace po = boost::program_options;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("dev_set_size,s",po::value<unsigned>(),"[REQD] Development set size (# of parallel sentences)")
-        ("forest_repository,r",po::value<string>(),"[REQD] Path to forest repository")
-        ("weights,w",po::value<string>(),"[REQD] Current feature weights file")
-        ("optimize_feature,o",po::value<vector<string> >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)")
-        ("random_directions,d",po::value<unsigned int>()->default_value(20),"Number of random directions to run the line optimizer in")
-        ("help,h", "Help");
-  po::options_description dcmdline_options;
-  dcmdline_options.add(opts);
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  bool flag = false;
-  if (conf->count("dev_set_size") == 0) {
-    cerr << "Please specify the size of the development set using -d N\n";
-    flag = true;
-  }
-  if (conf->count("weights") == 0) {
-    cerr << "Please specify the starting-point weights using -w <weightfile.txt>\n";
-    flag = true;
-  }
-  if (conf->count("forest_repository") == 0) {
-    cerr << "Please specify the forest repository location using -r <DIR>\n";
-    flag = true;
-  }
-  if (flag || conf->count("help")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-int main(int argc, char** argv) {
-  RandomNumberGenerator<boost::mt19937> rng;
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-  vector<string> features;
-  SparseVector<weight_t> origin;
-  vector<weight_t> w;
-  Weights::InitFromFile(conf["weights"].as<string>(), &w, &features);
-  Weights::InitSparseVector(w, &origin);
-  const string forest_repository = conf["forest_repository"].as<string>();
-  assert(DirectoryExists(forest_repository));
-  if (conf.count("optimize_feature") > 0)
-    features=conf["optimize_feature"].as<vector<string> >();
-  vector<SparseVector<weight_t> > directions;
-  vector<int> fids(features.size());
-  for (int i = 0; i < features.size(); ++i)
-    fids[i] = FD::Convert(features[i]);
-  LineOptimizer::CreateOptimizationDirections(
-     fids,
-     conf["random_directions"].as<unsigned int>(),
-     &rng,
-     &directions);
-  unsigned dev_set_size = conf["dev_set_size"].as<unsigned>();
-  for (unsigned i = 0; i < dev_set_size; ++i) {
-    for (unsigned j = 0; j < directions.size(); ++j) {
-      cout << forest_repository << '/' << i << ".json.gz " << i << ' ';
-      print(cout, origin, "=", ";");
-      cout << ' ';
-      print(cout, directions[j], "=", ";");
-      cout << endl;
-    }
-  }
-  return 0;
-}
diff --git a/vest/mr_vest_map.cc b/vest/mr_vest_map.cc
deleted file mode 100644
index 7d9625bc..00000000
--- a/vest/mr_vest_map.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-#include <sstream>
-#include <iostream>
-#include <fstream>
-#include <vector>
-
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "ns.h"
-#include "ns_docscorer.h"
-#include "ces.h"
-#include "filelib.h"
-#include "stringlib.h"
-#include "sparse_vector.h"
-#include "viterbi_envelope.h"
-#include "inside_outside.h"
-#include "error_surface.h"
-#include "b64tools.h"
-#include "hg_io.h"
-
-using namespace std;
-namespace po = boost::program_options;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation (tokenized text)")
-        ("source,s",po::value<string>(), "Source file (ignored, except for AER)")
-        ("evaluation_metric,m",po::value<string>()->default_value("ibm_bleu"), "Evaluation metric being optimized")
-        ("input,i",po::value<string>()->default_value("-"), "Input file to map (- is STDIN)")
-        ("help,h", "Help");
-  po::options_description dcmdline_options;
-  dcmdline_options.add(opts);
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  bool flag = false;
-  if (!conf->count("reference")) {
-    cerr << "Please specify one or more references using -r <REF.TXT>\n";
-    flag = true;
-  }
-  if (flag || conf->count("help")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-bool ReadSparseVectorString(const string& s, SparseVector<double>* v) {
-#if 0
-  // this should work, but untested.
-  std::istringstream i(s);
-  i>>*v;
-#else
-  vector<string> fields;
-  Tokenize(s, ';', &fields);
-  if (fields.empty()) return false;
-  for (int i = 0; i < fields.size(); ++i) {
-    vector<string> pair(2);
-    Tokenize(fields[i], '=', &pair);
-    if (pair.size() != 2) {
-      cerr << "Error parsing vector string: " << fields[i] << endl;
-      return false;
-    }
-    v->set_value(FD::Convert(pair[0]), atof(pair[1].c_str()));
-  }
-  return true;
-#endif
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-  const string evaluation_metric = conf["evaluation_metric"].as<string>();
-  EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric);
-  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
-  cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl;
-  Hypergraph hg;
-  string last_file;
-  ReadFile in_read(conf["input"].as<string>());
-  istream &in=*in_read.stream();
-  while(in) {
-    string line;
-    getline(in, line);
-    if (line.empty()) continue;
-    istringstream is(line);
-    int sent_id;
-    string file, s_origin, s_direction;
-    // path-to-file (JSON) sent_ed starting-point search-direction
-    is >> file >> sent_id >> s_origin >> s_direction;
-    SparseVector<double> origin;
-    ReadSparseVectorString(s_origin, &origin);
-    SparseVector<double> direction;
-    ReadSparseVectorString(s_direction, &direction);
-    // cerr << "File: " << file << "\nDir: " << direction << "\n   X: " << origin << endl;
-    if (last_file != file) {
-      last_file = file;
-      ReadFile rf(file);
-      HypergraphIO::ReadFromJSON(rf.stream(), &hg);
-    }
-    ViterbiEnvelopeWeightFunction wf(origin, direction);
-    ViterbiEnvelope ve = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
-    ErrorSurface es;
-
-    ComputeErrorSurface(*ds[sent_id], ve, &es, metric, hg);
-    //cerr << "Viterbi envelope has " << ve.size() << " segments\n";
-    // cerr << "Error surface has " << es.size() << " segments\n";
-    string val;
-    es.Serialize(&val);
-    cout << 'M' << ' ' << s_origin << ' ' << s_direction << '\t';
-    B64::b64encode(val.c_str(), val.size(), &cout);
-    cout << endl << flush;
-  }
-  return 0;
-}
diff --git a/vest/mr_vest_reduce.cc b/vest/mr_vest_reduce.cc
deleted file mode 100644
index dda61f88..00000000
--- a/vest/mr_vest_reduce.cc
+++ /dev/null
@@ -1,77 +0,0 @@
-#include <sstream>
-#include <iostream>
-#include <fstream>
-#include <vector>
-
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "sparse_vector.h"
-#include "error_surface.h"
-#include "line_optimizer.h"
-#include "b64tools.h"
-#include "stringlib.h"
-
-using namespace std;
-namespace po = boost::program_options;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("evaluation_metric,m",po::value<string>(), "Evaluation metric (IBM_BLEU, etc.)")
-        ("help,h", "Help");
-  po::options_description dcmdline_options;
-  dcmdline_options.add(opts);
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  bool flag = conf->count("evaluation_metric") == 0;
-  if (flag || conf->count("help")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-  const string evaluation_metric = conf["evaluation_metric"].as<string>();
-  LineOptimizer::ScoreType opt_type = LineOptimizer::MAXIMIZE_SCORE;
-  if (UppercaseString(evaluation_metric) == "TER")
-    opt_type = LineOptimizer::MINIMIZE_SCORE;
-  EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric);
-
-  vector<ErrorSurface> esv;
-  string last_key, line, key, val;
-  while(getline(cin, line)) {
-    size_t ks = line.find("\t");
-    assert(string::npos != ks);
-    assert(ks > 2);
-    key = line.substr(2, ks - 2);
-    val = line.substr(ks + 1);
-    if (key != last_key) {
-      if (!last_key.empty()) {
-	float score;
-        double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score);
-	cout << last_key << "|" << x << "|" << score << endl;
-      }
-      last_key.swap(key);
-      esv.clear();
-    }
-    if (val.size() % 4 != 0) {
-      cerr << "B64 encoding error 1! Skipping.\n";
-      continue;
-    }
-    string encoded(val.size() / 4 * 3, '\0');
-    if (!B64::b64decode(reinterpret_cast<const unsigned char*>(&val[0]), val.size(), &encoded[0], encoded.size())) {
-      cerr << "B64 encoding error 2! Skipping.\n";
-      continue;
-    }
-    esv.push_back(ErrorSurface());
-    esv.back().Deserialize(encoded);
-  }
-  if (!esv.empty()) {
-    float score;
-    double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score);
-    cout << last_key << "|" << x << "|" << score << endl;
-  }
-  return 0;
-}
diff --git a/vest/parallelize.pl b/vest/parallelize.pl
deleted file mode 100755
index 7d0365cc..00000000
--- a/vest/parallelize.pl
+++ /dev/null
@@ -1,423 +0,0 @@
-#!/usr/bin/env perl
-
-# Author: Adam Lopez
-#
-# This script takes a command that processes input
-# from stdin one-line-at-time, and parallelizes it
-# on the cluster using David Chiang's sentserver/
-# sentclient architecture.
-#
-# Prerequisites: the command *must* read each line
-# without waiting for subsequent lines of input
-# (for instance, a command which must read all lines
-# of input before processing will not work) and
-# return it to the output *without* buffering
-# multiple lines.
-
-#TODO: if -j 1, run immediately, not via sentserver?  possible differences in environment might make debugging harder
-
-#ANNOYANCE: if input is shorter than -j n lines, or at the very last few lines, repeatedly sleeps.  time cut down to 15s from 60s
-
-my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; }
-use LocalConfig;
-
-use Cwd qw/ abs_path cwd getcwd /; 
-use File::Temp qw/ tempfile /;
-use Getopt::Long;
-use IPC::Open2;
-use strict;
-use POSIX ":sys_wait_h";
-
-use File::Basename;
-my $myDir = dirname(__FILE__);
-print STDERR __FILE__." -> $myDir\n";
-push(@INC, $myDir);
-require "libcall.pl";
-
-my $tailn=5; # +0 = concatenate all the client logs.  5 = last 5 lines
-my $recycle_clients;    # spawn new clients when previous ones terminate
-my $stay_alive;      # dont let server die when having zero clients
-my $joblist = "";
-my $errordir="";
-my $multiline;
-my @files_to_stage;
-my $numnodes = 8;
-my $user = $ENV{"USER"};
-my $pmem = "9g";
-my $basep=50300;
-my $randp=300;
-my $tryp=50;
-my $no_which;
-my $no_cd;
-
-my $DEBUG=$ENV{DEBUG};
-print STDERR "DEBUG=$DEBUG output enabled.\n" if $DEBUG;
-my $verbose = 1;
-sub verbose {
-    if ($verbose) {
-        print STDERR @_,"\n";
-    }
-}
-sub debug {
-    if ($DEBUG) {
-        my ($package, $filename, $line) = caller;
-        print STDERR "DEBUG: $filename($line): ",join(' ',@_),"\n";
-    }
-}
-my $is_shell_special=qr.[ \t\n\\><|&;"'`~*?{}$!()].;
-my $shell_escape_in_quote=qr.[\\"\$`!].;
-sub escape_shell {
-    my ($arg)=@_;
-    return undef unless defined $arg;
-    return '""' unless $arg;
-    if ($arg =~ /$is_shell_special/) {
-        $arg =~ s/($shell_escape_in_quote)/\\$1/g;
-        return "\"$arg\"";
-    }
-    return $arg;
-}
-sub preview_files {
-    my ($l,$skipempty,$footer,$n)=@_;
-    $n=$tailn unless defined $n;
-    my @f=grep { ! ($skipempty && -z $_) } @$l;
-    my $fn=join(' ',map {escape_shell($_)} @f);
-    my $cmd="tail -n $n $fn";
-    unchecked_output("$cmd").($footer?"\nNONEMPTY FILES:\n$fn\n":"");
-}
-sub prefix_dirname($) {
-    #like `dirname but if ends in / then return the whole thing
-    local ($_)=@_;
-    if (/\/$/) {
-        $_;
-    } else {
-        s#/[^/]$##;
-        $_ ? $_ : '';
-    }
-}
-sub ensure_final_slash($) {
-    local ($_)=@_;
-    m#/$# ? $_ : ($_."/");
-}
-sub extend_path($$;$$) {
-    my ($base,$ext,$mkdir,$baseisdir)=@_;
-    if (-d $base) {
-        $base.="/";
-    } else {
-        my $dir;
-        if ($baseisdir) {
-            $dir=$base;
-            $base.='/' unless $base =~ /\/$/;
-        } else {
-            $dir=prefix_dirname($base);
-        }
-        my @cmd=("/bin/mkdir","-p",$dir);
-        check_call(@cmd) if $mkdir;
-    }
-    return $base.$ext;
-}
-
-my $abscwd=abs_path(&getcwd);
-sub print_help;
-
-my $use_fork;
-my @pids;
-
-# Process command-line options
-unless (GetOptions(
-      "stay-alive" => \$stay_alive,
-      "recycle-clients" => \$recycle_clients,
-      "error-dir=s" => \$errordir,
-      "multi-line" => \$multiline,
-      "file=s" => \@files_to_stage,
-      "use-fork" => \$use_fork,
-      "verbose" => \$verbose,
-      "jobs=i" => \$numnodes,
-      "pmem=s" => \$pmem,
-        "baseport=i" => \$basep,
-#       "iport=i" => \$randp, #for short name -i
-        "no-which!" => \$no_which,
-            "no-cd!" => \$no_cd,
-            "tailn=s" => \$tailn,
-) && scalar @ARGV){
-  print_help();
-    die "bad options.";
-}
-
-my $cmd = "";
-my $prog=shift;
-if ($no_which) {
-    $cmd=$prog;
-} else {
-    $cmd=check_output("which $prog");
-    chomp $cmd;
-    die "$prog not found - $cmd" unless $cmd;
-}
-#$cmd=abs_path($cmd);
-for my $arg (@ARGV) {
-    $cmd .= " ".escape_shell($arg);
-}
-die "Please specify a command to parallelize\n" if $cmd eq '';
-
-my $cdcmd=$no_cd ? '' : ("cd ".escape_shell($abscwd)."\n");
-
-my $executable = $cmd;
-$executable =~ s/^\s*(\S+)($|\s.*)/$1/;
-$executable=check_output("basename $executable");
-chomp $executable;
-
-
-print STDERR "Parallelizing ($numnodes ways): $cmd\n\n";
-
-# create -e dir and save .sh
-use File::Temp qw/tempdir/;
-unless ($errordir) {
-    $errordir=tempdir("$executable.XXXXXX",CLEANUP=>1);
-}
-if ($errordir) {
-    my $scriptfile=extend_path("$errordir/","$executable.sh",1,1);
-    -d $errordir || die "should have created -e dir $errordir";
-    open SF,">",$scriptfile || die;
-    print SF "$cdcmd$cmd\n";
-    close SF;
-    chmod 0755,$scriptfile;
-    $errordir=abs_path($errordir);
-    &verbose("-e dir: $errordir");
-}
-
-# set cleanup handler
-my @cleanup_cmds;
-sub cleanup;
-sub cleanup_and_die;
-$SIG{INT} = "cleanup_and_die";
-$SIG{TERM} = "cleanup_and_die";
-$SIG{HUP} = "cleanup_and_die";
-
-# other subs:
-sub numof_live_jobs;
-sub launch_job_on_node;
-
-
-# vars
-my $mydir = check_output("dirname $0"); chomp $mydir;
-my $sentserver = "$mydir/sentserver";
-my $sentclient = "$mydir/sentclient";
-my $host = check_output("hostname");
-chomp $host;
-
-
-# find open port
-srand;
-my $port = 50300+int(rand($randp));
-my $endp=$port+$tryp;
-sub listening_port_lines {
-    my $quiet=$verbose?'':'2>/dev/null';
-    return unchecked_output("netstat -a -n $quiet | grep LISTENING | grep -i tcp");
-}
-my $netstat=&listening_port_lines;
-
-if ($verbose){ print STDERR "Testing port $port...";}
-
-while ($netstat=~/$port/ || &listening_port_lines=~/$port/){
-  if ($verbose){ print STDERR "port is busy\n";}
-  $port++;
-  if ($port > $endp){
-    die "Unable to find open port\n";
-  }
-  if ($verbose){ print STDERR "Testing port $port... "; }
-}
-if ($verbose){
-  print STDERR "port $port is available\n";
-}
-
-my $key = int(rand()*1000000);
-
-my $multiflag = "";
-if ($multiline){ $multiflag = "-m"; print STDERR "expecting multiline output.\n"; }
-my $stay_alive_flag = "";
-if ($stay_alive){ $stay_alive_flag = "--stay-alive"; print STDERR "staying alive while no clients are connected.\n"; }
-
-my $node_count = 0;
-my $script = "";
-# fork == one thread runs the sentserver, while the
-# other spawns the sentclient commands.
-my $pid = fork;
-if ($pid == 0) { # child
-  sleep 8; # give other thread time to start sentserver
-  $script = "$cdcmd$sentclient $host:$port:$key $cmd";
-
-  if ($verbose){
-    print STDERR "Client script:\n====\n";
-    print STDERR $script;
-    print STDERR "====\n";
-  }
-  for (my $jobn=0; $jobn<$numnodes; $jobn++){
-    launch_job();
-  }
-  if ($recycle_clients) {
-    my $ret;
-    my $livejobs;
-    while (1) {
-      $ret = waitpid($pid, WNOHANG);
-      #print STDERR "waitpid $pid ret = $ret \n";
-      last if ($ret != 0);
-      $livejobs = numof_live_jobs();
-      if ($numnodes >= $livejobs ) {  # a client terminated, OR # lines of input was less than -j
-        print STDERR "num of requested nodes = $numnodes; num of currently live jobs = $livejobs; Client terminated - launching another.\n";
-        launch_job();
-      } else {
-        sleep 15;
-      }
-    }
-  }
-  print STDERR "CHILD PROCESSES SPAWNED ... WAITING\n";
-  for my $p (@pids) {
-    waitpid($p, 0);
-  }
-} else {
-#  my $todo = "$sentserver -k $key $multiflag $port ";
-  my $todo = "$sentserver -k $key $multiflag $port $stay_alive_flag ";
-  if ($verbose){ print STDERR "Running: $todo\n"; }
-  check_call($todo);
-  print STDERR "Call to $sentserver returned.\n";
-  cleanup();
-  exit(0);
-}
-
-sub numof_live_jobs {
-  if ($use_fork) {
-    die "not implemented";
-  } else {
-    # We can probably continue decoding if the qstat error is only temporary
-    my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat")));
-    return ($#livejobs + 1);
-  }
-}
-my (@errors,@outs,@cmds);
-
-sub launch_job {
-    if ($use_fork) { return launch_job_fork(); }
-    my $errorfile = "/dev/null";
-    my $outfile = "/dev/null";
-    $node_count++;
-    my $clientname = $executable;
-    $clientname =~ s/^(.{4}).*$/$1/;
-    $clientname = "$clientname.$node_count";
-    if ($errordir){
-      $errorfile = "$errordir/$clientname.ER";
-      $outfile = "$errordir/$clientname.OU";
-      push @errors,$errorfile;
-      push @outs,$outfile;
-    }
-    my $todo = qsub_args($pmem) . " -N $clientname -o $outfile -e $errorfile";
-    push @cmds,$todo;
-
-    print STDERR "Running: $todo\n";
-    local(*QOUT, *QIN);
-    open2(\*QOUT, \*QIN, $todo) or die "Failed to open2: $!";
-    print QIN $script;
-    close QIN;
-    while (my $jobid=<QOUT>){
-      chomp $jobid;
-      if ($verbose){ print STDERR "Launched client job: $jobid"; }
-      $jobid =~ s/^(\d+)(.*?)$/\1/g;
-            $jobid =~ s/^Your job (\d+) .*$/\1/;
-      print STDERR " short job id $jobid\n";
-            if ($verbose){
-                print STDERR "cd: $abscwd\n";
-                print STDERR "cmd: $cmd\n";
-            }
-      if ($joblist == "") { $joblist = $jobid; }
-      else {$joblist = $joblist . "\|" . $jobid; }
-      my $cleanfn="qdel $jobid 2> /dev/null";
-      push(@cleanup_cmds, $cleanfn);
-    }
-    close QOUT;
-}
-
-sub launch_job_fork {
-  my $errorfile = "/dev/null";
-  my $outfile = "/dev/null";
-  $node_count++;
-  my $clientname = $executable;
-  $clientname =~ s/^(.{4}).*$/$1/;
-  $clientname = "$clientname.$node_count";
-  if ($errordir){
-    $errorfile = "$errordir/$clientname.ER";
-    $outfile = "$errordir/$clientname.OU";
-    push @errors,$errorfile;
-    push @outs,$outfile;
-  }
-  my $pid = fork;
-  if ($pid == 0) {
-    my ($fh, $scr_name) = get_temp_script();
-    print $fh $script;
-    close $fh;
-    my $todo = "/bin/bash -xeo pipefail $scr_name 1> $outfile 2> $errorfile";
-    print STDERR "EXEC: $todo\n";
-    my $out = check_output("$todo");
-    unlink $scr_name or warn "Failed to remove $scr_name";
-    exit 0;
-  } else {
-    push @pids, $pid;
-  }
-}
-
-sub get_temp_script {
-  my ($fh, $filename) = tempfile( "workXXXX", SUFFIX => '.sh');
-  return ($fh, $filename);
-}
-
-sub cleanup_and_die {
-  cleanup();
-  die "\n";
-}
-
-sub cleanup {
-  print STDERR "Cleaning up...\n";
-  for $cmd (@cleanup_cmds){
-    print STDERR "  Cleanup command: $cmd\n";
-    eval $cmd;
-  }
-  print STDERR "outputs:\n",preview_files(\@outs,1),"\n";
-  print STDERR "errors:\n",preview_files(\@errors,1),"\n";
-  print STDERR "cmd:\n",$cmd,"\n";
-  print STDERR " cat $errordir/*.ER\nfor logs.\n";
-  print STDERR "Cleanup finished.\n";
-}
-
-sub print_help
-{
-  my $name = check_output("basename $0"); chomp $name;
-  print << "Help";
-
-usage: $name [options]
-
-  Automatic black-box parallelization of commands.
-
-options:
-
-  --use-fork
-    Instead of using qsub, use fork.
-
-  -e, --error-dir <dir>
-    Retain output files from jobs in <dir>, rather
-    than silently deleting them.
-
-  -m, --multi-line
-    Expect that command may produce multiple output
-    lines for a single input line.  $name makes a
-    reasonable attempt to obtain all output before
-    processing additional inputs.  However, use of this
-    option is inherently unsafe.
-
-  -v, --verbose
-    Print diagnostic informatoin on stderr.
-
-  -j, --jobs
-    Number of jobs to use.
-
-  -p, --pmem
-    pmem setting for each job.
-
-Help
-}
diff --git a/vest/sentclient.c b/vest/sentclient.c
deleted file mode 100644
index 91d994ab..00000000
--- a/vest/sentclient.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/* Copyright (c) 2001 by David Chiang. All rights reserved.*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <netinet/in.h>
-#include <netdb.h>
-#include <string.h>
-
-#include "sentserver.h"
-
-int main (int argc, char *argv[]) {
-  int sock, port;
-  char *s, *key;
-  struct hostent *hp;
-  struct sockaddr_in server;
-  int errors = 0;
-
-  if (argc < 3) {
-    fprintf(stderr, "Usage: sentclient host[:port[:key]] command [args ...]\n");
-    exit(1);
-  }
-
-  s = strchr(argv[1], ':');
-  key = NULL;
-
-  if (s == NULL) {
-    port = DEFAULT_PORT;
-  } else {
-    *s = '\0';
-    s+=1;
-	/* dumb hack */
-	key = strchr(s, ':');
-	if (key != NULL){
-		*key = '\0';
-		key += 1;
-	}
-    port = atoi(s);
-  }
-
-  sock = socket(AF_INET, SOCK_STREAM, 0);
-
-  hp = gethostbyname(argv[1]);
-  if (hp == NULL) {
-    fprintf(stderr, "unknown host %s\n", argv[1]);
-    exit(1);
-  }
-
-  bzero((char *)&server, sizeof(server));
-  bcopy(hp->h_addr, (char *)&server.sin_addr, hp->h_length);
-  server.sin_family = hp->h_addrtype;
-  server.sin_port = htons(port);
-
-  while (connect(sock, (struct sockaddr *)&server, sizeof(server)) < 0) {
-    perror("connect()");
-    sleep(1);
-    errors++;
-    if (errors > 5)
-      exit(1);
-  }
-
-  close(0);
-  close(1);
-  dup2(sock, 0);
-  dup2(sock, 1);
-
-  if (key != NULL){
-	write(1, key, strlen(key));
-	write(1, "\n", 1);
-  }
-
-  execvp(argv[2], argv+2);
-  return 0;
-}
diff --git a/vest/sentserver.c b/vest/sentserver.c
deleted file mode 100644
index c20b4fa6..00000000
--- a/vest/sentserver.c
+++ /dev/null
@@ -1,515 +0,0 @@
-/* Copyright (c) 2001 by David Chiang. All rights reserved.*/
-
-#include <string.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <netinet/in.h>
-#include <sched.h>
-#include <pthread.h>
-#include <errno.h>
-
-#include "sentserver.h"
-
-#define MAX_CLIENTS 64
-
-struct clientinfo {
-  int s;
-  struct sockaddr_in sin;
-};
-
-struct line {
-  int id;
-  char *s;
-  int status;
-  struct line *next;
-} *head, **ptail;
-
-int n_sent = 0, n_received=0, n_flushed=0;
-
-#define STATUS_RUNNING 0
-#define STATUS_ABORTED 1
-#define STATUS_FINISHED 2
-
-pthread_mutex_t queue_mutex = PTHREAD_MUTEX_INITIALIZER;
-pthread_mutex_t clients_mutex = PTHREAD_MUTEX_INITIALIZER;
-pthread_mutex_t input_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-int n_clients = 0;
-int s;
-int expect_multiline_output = 0;
-int log_mutex = 0;
-int stay_alive = 0;		/* dont panic and die with zero clients */
-
-void queue_finish(struct line *node, char *s, int fid);
-char * read_line(int fd, int multiline);
-void done (int code);
-
-struct line * queue_get(int fid) {
-	struct line *cur;
-	char *s, *synch;
-
-	if (log_mutex) fprintf(stderr, "Getting for data for fid %d\n", fid);
-	if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid);
-	pthread_mutex_lock(&queue_mutex);
-
-	/* First, check for aborted sentences. */
-
-	if (log_mutex) fprintf(stderr, "  Checking queue for aborted jobs (fid %d)\n", fid);
-	for (cur = head; cur != NULL; cur = cur->next) {
-		if (cur->status == STATUS_ABORTED) {
-			cur->status = STATUS_RUNNING;
-
-			if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
-			pthread_mutex_unlock(&queue_mutex);
-
-			return cur;
-		}
-	}
-	if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
-	pthread_mutex_unlock(&queue_mutex);
-
-	/* Otherwise, read a new one. */
-	if (log_mutex) fprintf(stderr, "Locking input mutex (%d)\n", fid);
-	if (log_mutex) fprintf(stderr, "  Reading input for new data (fid %d)\n", fid);
-	pthread_mutex_lock(&input_mutex);
-	s = read_line(0,0);
-
-	while (s) {
-		if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid);
-		pthread_mutex_lock(&queue_mutex);
-		if (log_mutex) fprintf(stderr, "Unlocking input mutex (%d)\n", fid);
-		pthread_mutex_unlock(&input_mutex);
-
-		cur = malloc(sizeof (struct line));
-		cur->id = n_sent;
-		cur->s = s;
-		cur->next = NULL;
-
-		*ptail = cur;
-		ptail = &cur->next;
-
-		n_sent++;
-
-		if (strcmp(s,"===SYNCH===\n")==0){
-			fprintf(stderr, "Received ===SYNCH=== signal (fid %d)\n", fid);
-			// Note: queue_finish calls free(cur->s).
-			// Therefore we need to create a new string here.
-			synch = malloc((strlen("===SYNCH===\n")+2) * sizeof (char));
-			synch = strcpy(synch, s);
-
-			if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
-			pthread_mutex_unlock(&queue_mutex);
-			queue_finish(cur, synch, fid); /* handles its own lock */
-
-			if (log_mutex) fprintf(stderr, "Locking input mutex (%d)\n", fid);
-			if (log_mutex) fprintf(stderr, "  Reading input for new data (fid %d)\n", fid);
-			pthread_mutex_lock(&input_mutex);
-
-			s = read_line(0,0);
-		} else {
-			if (log_mutex) fprintf(stderr, "  Received new data %d (fid %d)\n", cur->id, fid);
-			cur->status = STATUS_RUNNING;
-			if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
-			pthread_mutex_unlock(&queue_mutex);
-			return cur;
-		}
-	}
-
-	if (log_mutex) fprintf(stderr, "Unlocking input mutex (%d)\n", fid);
-	pthread_mutex_unlock(&input_mutex);
-	/* Only way to reach this point: no more output */
-
-	if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid);
-	pthread_mutex_lock(&queue_mutex);
-	if (head == NULL) {
-		fprintf(stderr, "Reached end of file. Exiting.\n");
-		done(0);
-	} else
-		ptail = NULL; /* This serves as a signal that there is no more input */
-	if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
-	pthread_mutex_unlock(&queue_mutex);
-
-	return NULL;
-}
-
-void queue_panic() {
-	struct line *next;
-	while (head && head->status == STATUS_FINISHED) {
-		/* Write out finished sentences */
-		if (head->status == STATUS_FINISHED) {
-			fputs(head->s, stdout);
-			fflush(stdout);
-		}
-		/* Write out blank line for unfinished sentences */
-		if (head->status == STATUS_ABORTED) {
-			fputs("\n", stdout);
-			fflush(stdout);
-		}
-		/* By defition, there cannot be any RUNNING sentences, since
-		function is only called when n_clients == 0 */
-		free(head->s);
-		next = head->next;
-		free(head);
-		head = next;
-		n_flushed++;
-	}
-	fclose(stdout);
-	fprintf(stderr, "All clients died. Panicking, flushing completed sentences and exiting.\n");
-	done(1);
-}
-
-void queue_abort(struct line *node, int fid) {
-	if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid);
-	pthread_mutex_lock(&queue_mutex);
-	node->status = STATUS_ABORTED;
-	if (n_clients == 0) {
-		if (stay_alive) {
-			fprintf(stderr, "Warning! No live clients detected! Staying alive, will retry soon.\n");
-		} else {
-			queue_panic();
-		}
-	}
-	if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
-	pthread_mutex_unlock(&queue_mutex);
-}
-
-
-void queue_print() {
-  struct line *cur;
-
-  fprintf(stderr, "  Queue\n");
-
-  for (cur = head; cur != NULL; cur = cur->next) {
-    switch(cur->status) {
-    case STATUS_RUNNING:
-      fprintf(stderr, "    %d running  ", cur->id); break;
-    case STATUS_ABORTED:
-      fprintf(stderr, "    %d aborted  ", cur->id); break;
-    case STATUS_FINISHED:
-      fprintf(stderr, "    %d finished ", cur->id); break;
-
-    }
-	fprintf(stderr, "\n");
-    //fprintf(stderr, cur->s);
-  }
-}
-
-void queue_finish(struct line *node, char *s, int fid) {
-  struct line *next;
-  if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid);
-  pthread_mutex_lock(&queue_mutex);
-
-  free(node->s);
-  node->s = s;
-  node->status = STATUS_FINISHED;
-  n_received++;
-
-  /* Flush out finished nodes */
-  while (head && head->status == STATUS_FINISHED) {
-
-    if (log_mutex) fprintf(stderr, "  Flushing finished node %d\n", head->id);
-
-    fputs(head->s, stdout);
-    fflush(stdout);
-    if (log_mutex) fprintf(stderr, "  Flushed node %d\n", head->id);
-    free(head->s);
-
-    next = head->next;
-    free(head);
-
-    head = next;
-
-    n_flushed++;
-
-    if (head == NULL) { /* empty queue */
-      if (ptail == NULL) { /* This can only happen if set in queue_get as signal that there is no more input. */
-        fprintf(stderr, "All sentences finished. Exiting.\n");
-        done(0);
-      } else /* ptail pointed at something which was just popped off the stack -- reset to head*/
-        ptail = &head;
-    }
-  }
-
-  if (log_mutex) fprintf(stderr, "  Flushing output %d\n", head->id);
-  fflush(stdout);
-  fprintf(stderr, "%d sentences sent, %d sentences finished, %d sentences flushed\n", n_sent, n_received, n_flushed);
-
-  if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
-  pthread_mutex_unlock(&queue_mutex);
-
-}
-
-char * read_line(int fd, int multiline) {
-  int size = 80;
-  char errorbuf[100];
-  char *s = malloc(size+2);
-  int result, errors=0;
-  int i = 0;
-
-  result = read(fd, s+i, 1);
-
-  while (1) {
-    if (result < 0) {
-      perror("read()");
-      sprintf(errorbuf, "Error code: %d\n", errno);
-      fprintf(stderr, errorbuf);
-      errors++;
-      if (errors > 5) {
-	free(s);
-	return NULL;
-      } else {
-	sleep(1); /* retry after delay */
-      }
-    } else if (result == 0) {
-      break;
-    } else if (multiline==0 && s[i] == '\n') {
-      break;
-    } else {
-      if (s[i] == '\n'){
-	/* if we've reached this point,
-	   then multiline must be 1, and we're
-	   going to poll the fd for an additional
-	   line of data.  The basic design is to
-	   run a select on the filedescriptor fd.
-	   Select will return under two conditions:
-	   if there is data on the fd, or if a
-	   timeout is reached.  We'll select on this
-	   fd.  If select returns because there's data
-	   ready, keep going; else assume there's no
-	   more and return the data we already have.
-	*/
-
-	fd_set set;
-	FD_ZERO(&set);
-	FD_SET(fd, &set);
-
-	struct timeval timeout;
-	timeout.tv_sec = 3; // number of seconds for timeout
-	timeout.tv_usec = 0;
-
-	int ready = select(FD_SETSIZE, &set, NULL, NULL, &timeout);
-	if (ready<1){
-	  break; // no more data, stop looping
-	}
-      }
-      i++;
-
-      if (i == size) {
-	size = size*2;
-	s = realloc(s, size+2);
-      }
-    }
-
-    result = read(fd, s+i, 1);
-  }
-
-  if (result == 0 && i == 0) { /* end of file */
-    free(s);
-    return NULL;
-  }
-
-  s[i] = '\n';
-  s[i+1] = '\0';
-
-  return s;
-}
-
-void * new_client(void *arg) {
-  struct clientinfo *client = (struct clientinfo *)arg;
-  struct line *cur;
-  int result;
-  char *s;
-  char errorbuf[100];
-
-  pthread_mutex_lock(&clients_mutex);
-  n_clients++;
-  pthread_mutex_unlock(&clients_mutex);
-
-  fprintf(stderr, "Client connected (%d connected)\n", n_clients);
-
-  for (;;) {
-
-    cur = queue_get(client->s);
-
-    if (cur) {
-      /* fprintf(stderr, "Sending to client: %s", cur->s); */
-      fprintf(stderr, "Sending data %d to client (fid %d)\n", cur->id, client->s);
-      result = write(client->s, cur->s, strlen(cur->s));
-      if (result < strlen(cur->s)){
-        perror("write()");
-        sprintf(errorbuf, "Error code: %d\n", errno);
-        fprintf(stderr, errorbuf);
-
-        pthread_mutex_lock(&clients_mutex);
-        n_clients--;
-        pthread_mutex_unlock(&clients_mutex);
-
-        fprintf(stderr, "Client died (%d connected)\n", n_clients);
-        queue_abort(cur, client->s);
-
-        close(client->s);
-        free(client);
-
-        pthread_exit(NULL);
-      }
-    } else {
-      close(client->s);
-      pthread_mutex_lock(&clients_mutex);
-      n_clients--;
-      pthread_mutex_unlock(&clients_mutex);
-      fprintf(stderr, "Client dismissed (%d connected)\n", n_clients);
-      pthread_exit(NULL);
-    }
-
-    s = read_line(client->s,expect_multiline_output);
-    if (s) {
-      /* fprintf(stderr, "Client (fid %d) returned: %s", client->s, s); */
-      fprintf(stderr, "Client (fid %d) returned data %d\n", client->s, cur->id);
-//      queue_print();
-      queue_finish(cur, s, client->s);
-    } else {
-      pthread_mutex_lock(&clients_mutex);
-      n_clients--;
-      pthread_mutex_unlock(&clients_mutex);
-
-      fprintf(stderr, "Client died (%d connected)\n", n_clients);
-      queue_abort(cur, client->s);
-
-      close(client->s);
-      free(client);
-
-      pthread_exit(NULL);
-    }
-
-  }
-  return 0;
-}
-
-void done (int code) {
-  close(s);
-  exit(code);
-}
-
-
-
-int main (int argc, char *argv[]) {
-  struct sockaddr_in sin, from;
-  int g;
-  socklen_t len;
-  struct clientinfo *client;
-  int port;
-  int opt;
-  int errors = 0;
-  int argi;
-  char *key = NULL, *client_key;
-  int use_key = 0;
-  /* the key stuff here doesn't provide any
-  real measure of security, it's mainly to keep
-  jobs from bumping into each other.  */
-
-  pthread_t tid;
-  port = DEFAULT_PORT;
-
-  for (argi=1; argi < argc; argi++){
-    if (strcmp(argv[argi], "-m")==0){
-      expect_multiline_output = 1;
-    } else if (strcmp(argv[argi], "-k")==0){
-      argi++;
-      if (argi == argc){
-      	fprintf(stderr, "Key must be specified after -k\n");
-      	exit(1);
-      }
-      key = argv[argi];
-      use_key = 1;
-    } else if (strcmp(argv[argi], "--stay-alive")==0){
-      stay_alive = 1;    /* dont panic and die with zero clients */
-    } else {
-      port = atoi(argv[argi]);
-    }
-  }
-
-  /* Initialize data structures */
-  head = NULL;
-  ptail = &head;
-
-  /* Set up listener */
-  s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
-  opt = 1;
-  setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
-
-  sin.sin_family = AF_INET;
-  sin.sin_addr.s_addr = htonl(INADDR_ANY);
-  sin.sin_port = htons(port);
-  while (bind(s, (struct sockaddr *) &sin, sizeof(sin)) < 0) {
-	perror("bind()");
-	sleep(1);
-	errors++;
-	if (errors > 100)
-	  exit(1);
-  }
-
-  len = sizeof(sin);
-  getsockname(s, (struct sockaddr *) &sin, &len);
-
-  fprintf(stderr, "Listening on port %hu\n", ntohs(sin.sin_port));
-
-  while (listen(s, MAX_CLIENTS) < 0) {
-	perror("listen()");
-	sleep(1);
-	errors++;
-	if (errors > 100)
-	  exit(1);
-  }
-
-  for (;;) {
-    len = sizeof(from);
-    g = accept(s, (struct sockaddr *)&from, &len);
-    if (g < 0) {
-      perror("accept()");
-      sleep(1);
-      continue;
-    }
-    client = malloc(sizeof(struct clientinfo));
-    client->s = g;
-    bcopy(&from, &client->sin, len);
-
-	if (use_key){
-		fd_set set;
-		FD_ZERO(&set);
-		FD_SET(client->s, &set);
-
-		struct timeval timeout;
-		timeout.tv_sec = 3; // number of seconds for timeout
-		timeout.tv_usec = 0;
-
-		int ready = select(FD_SETSIZE, &set, NULL, NULL, &timeout);
-		if (ready<1){
-			fprintf(stderr, "Prospective client failed to respond with correct key.\n");
-			close(client->s);
-			free(client);
-		} else {
-			client_key = read_line(client->s,0);
-			client_key[strlen(client_key)-1]='\0'; /* chop trailing newline */
-			if (strcmp(key, client_key)==0){
-				pthread_create(&tid, NULL, new_client, client);
-			} else {
-				fprintf(stderr, "Prospective client failed to respond with correct key.\n");
-				close(client->s);
-				free(client);
-			}
-			free(client_key);
-		}
-	} else {
-		pthread_create(&tid, NULL, new_client, client);
-	}
-  }
-
-}
-
-
-
diff --git a/vest/sentserver.h b/vest/sentserver.h
deleted file mode 100644
index cd17a546..00000000
--- a/vest/sentserver.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef SENTSERVER_H
-#define SENTSERVER_H
-
-#define DEFAULT_PORT 50000
-
-#endif
diff --git a/vest/tac.pl b/vest/tac.pl
deleted file mode 100755
index 9fb525c1..00000000
--- a/vest/tac.pl
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/perl
-
-while(<>) {
-    chomp;
-    $|=1;
-    print (scalar reverse($_));
-    print "\n";
-}
diff --git a/vest/test_aer/README b/vest/test_aer/README
deleted file mode 100644
index 819b2e32..00000000
--- a/vest/test_aer/README
+++ /dev/null
@@ -1,8 +0,0 @@
-To run the test:
-
-../dist-vest.pl --local --metric aer cdec.ini --source-file corpus.src --ref-files=ref.0 --weights weights
-
-This will optimize the parameters of the tiny lexical translation model
-so as to minimize the AER of the Viterbi alignment on the development
-set in corpus.src according to the reference alignments in ref.0.
-
diff --git a/vest/test_aer/cdec.ini b/vest/test_aer/cdec.ini
deleted file mode 100644
index 08187848..00000000
--- a/vest/test_aer/cdec.ini
+++ /dev/null
@@ -1,3 +0,0 @@
-formalism=lextrans
-grammar=grammar
-aligner=true
diff --git a/vest/test_aer/corpus.src b/vest/test_aer/corpus.src
deleted file mode 100644
index 31b23971..00000000
--- a/vest/test_aer/corpus.src
+++ /dev/null
@@ -1,3 +0,0 @@
-el gato negro ||| the black cat
-el gato ||| the cat
-el libro ||| the book
diff --git a/vest/test_aer/grammar b/vest/test_aer/grammar
deleted file mode 100644
index 9d857824..00000000
--- a/vest/test_aer/grammar
+++ /dev/null
@@ -1,12 +0,0 @@
-el ||| cat ||| F1=1
-el ||| the ||| F2=1
-el ||| black ||| F3=1
-el ||| book ||| F11=1
-gato ||| cat ||| F4=1 NN=1
-gato ||| black ||| F5=1
-gato ||| the ||| F6=1
-negro ||| the ||| F7=1
-negro ||| cat ||| F8=1
-negro ||| black ||| F9=1
-libro ||| the ||| F10=1
-libro ||| book ||| F12=1 NN=1
diff --git a/vest/test_aer/ref.0 b/vest/test_aer/ref.0
deleted file mode 100644
index 734a9c5b..00000000
--- a/vest/test_aer/ref.0
+++ /dev/null
@@ -1,3 +0,0 @@
-0-0 1-2 2-1
-0-0 1-1
-0-0 1-1
diff --git a/vest/test_aer/weights b/vest/test_aer/weights
deleted file mode 100644
index afc9282e..00000000
--- a/vest/test_aer/weights
+++ /dev/null
@@ -1,13 +0,0 @@
-F1 0.1
-F2 -.5980815
-F3 0.24235
-F4 0.625
-F5 0.4514
-F6 0.112316
-F7 -0.123415
-F8 -0.25390285
-F9 -0.23852
-F10 0.646
-F11 0.413141
-F12 0.343216
-NN -0.1215
diff --git a/vest/test_data/0.json.gz b/vest/test_data/0.json.gz
deleted file mode 100644
index 30f8dd77..00000000
Binary files a/vest/test_data/0.json.gz and /dev/null differ
diff --git a/vest/test_data/1.json.gz b/vest/test_data/1.json.gz
deleted file mode 100644
index c82cc179..00000000
Binary files a/vest/test_data/1.json.gz and /dev/null differ
diff --git a/vest/test_data/c2e.txt.0 b/vest/test_data/c2e.txt.0
deleted file mode 100644
index 12c4abe9..00000000
--- a/vest/test_data/c2e.txt.0
+++ /dev/null
@@ -1,2 +0,0 @@
-australia reopens embassy in manila
-( afp , manila , january 2 ) australia reopened its embassy in the philippines today , which was shut down about seven weeks ago due to what was described as a specific threat of a terrorist attack .
diff --git a/vest/test_data/c2e.txt.1 b/vest/test_data/c2e.txt.1
deleted file mode 100644
index 4ac12df1..00000000
--- a/vest/test_data/c2e.txt.1
+++ /dev/null
@@ -1,2 +0,0 @@
-australia reopened manila embassy
-( agence france-presse , manila , 2nd ) - australia reopened its embassy in the philippines today . the embassy was closed seven weeks ago after what was described as a specific threat of a terrorist attack .
diff --git a/vest/test_data/c2e.txt.2 b/vest/test_data/c2e.txt.2
deleted file mode 100644
index 2f67b72f..00000000
--- a/vest/test_data/c2e.txt.2
+++ /dev/null
@@ -1,2 +0,0 @@
-australia to reopen embassy in manila
-( afp report from manila , january 2 ) australia reopened its embassy in the philippines today . seven weeks ago , the embassy was shut down due to so-called confirmed terrorist attack threats .
diff --git a/vest/test_data/c2e.txt.3 b/vest/test_data/c2e.txt.3
deleted file mode 100644
index 5483cef6..00000000
--- a/vest/test_data/c2e.txt.3
+++ /dev/null
@@ -1,2 +0,0 @@
-australia to re - open its embassy to manila
-( afp , manila , thursday ) australia reopens its embassy to manila , which was closed for the so-called " clear " threat of terrorist attack 7 weeks ago .
diff --git a/vest/test_data/re.txt.0 b/vest/test_data/re.txt.0
deleted file mode 100644
index 86eff087..00000000
--- a/vest/test_data/re.txt.0
+++ /dev/null
@@ -1,5 +0,0 @@
-erdogan states turkey to reject any pressures to urge it to recognize cyprus
-ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara will reject any pressure by the european union to urge it to recognize cyprus . this comes two weeks before the summit of european union state and government heads who will decide whether or nor membership negotiations with ankara should be opened .
-erdogan told " ntv " television station that " the european union cannot address us by imposing new conditions on us with regard to cyprus .
-we will discuss this dossier in the course of membership negotiations . "
-he added " let me be clear , i cannot sidestep turkey , this is something we cannot accept . "
diff --git a/vest/test_data/re.txt.1 b/vest/test_data/re.txt.1
deleted file mode 100644
index 2140f198..00000000
--- a/vest/test_data/re.txt.1
+++ /dev/null
@@ -1,5 +0,0 @@
-erdogan confirms turkey will resist any pressure to recognize cyprus
-ankara 12 - 1 ( afp ) - the turkish head of government , recep tayyip erdogan , announced today ( wednesday ) that ankara would resist any pressure the european union might exercise in order to force it into recognizing cyprus . this comes two weeks before a summit of european union heads of state and government , who will decide whether or not to open membership negotiations with ankara .
-erdogan said to the ntv television channel : " the european union cannot engage with us through imposing new conditions on us with regard to cyprus .
-we shall discuss this issue in the course of the membership negotiations . "
-he added : " let me be clear - i cannot confine turkey . this is something we do not accept . "
diff --git a/vest/test_data/re.txt.2 b/vest/test_data/re.txt.2
deleted file mode 100644
index 94e46286..00000000
--- a/vest/test_data/re.txt.2
+++ /dev/null
@@ -1,5 +0,0 @@
-erdogan confirms that turkey will reject any pressures to encourage it to recognize cyprus
-ankara , 12 / 1 ( afp ) - the turkish prime minister recep tayyip erdogan declared today , wednesday , that ankara will reject any pressures that the european union may apply on it to encourage to recognize cyprus . this comes two weeks before a summit of the heads of countries and governments of the european union , who will decide on whether or not to start negotiations on joining with ankara .
-erdogan told the ntv television station that " it is not possible for the european union to talk to us by imposing new conditions on us regarding cyprus .
-we shall discuss this dossier during the negotiations on joining . "
-and he added , " let me be clear . turkey's arm should not be twisted ; this is something we cannot accept . "
diff --git a/vest/test_data/re.txt.3 b/vest/test_data/re.txt.3
deleted file mode 100644
index f87c3308..00000000
--- a/vest/test_data/re.txt.3
+++ /dev/null
@@ -1,5 +0,0 @@
-erdogan stresses that turkey will reject all pressures to force it to recognize cyprus
-ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara would refuse all pressures applied on it by the european union to force it to recognize cyprus . that came two weeks before the summit of the presidents and prime ministers of the european union , who would decide on whether to open negotiations on joining with ankara or not .
-erdogan said to " ntv " tv station that the " european union can not communicate with us by imposing on us new conditions related to cyprus .
-we will discuss this file during the negotiations on joining . "
-he added , " let me be clear . turkey's arm should not be twisted . this is unacceptable to us . "
diff --git a/vest/viterbi_envelope.cc b/vest/viterbi_envelope.cc
deleted file mode 100644
index 9fcf75a0..00000000
--- a/vest/viterbi_envelope.cc
+++ /dev/null
@@ -1,177 +0,0 @@
-#include "viterbi_envelope.h"
-
-#include <cassert>
-#include <limits>
-
-using namespace std;
-using boost::shared_ptr;
-
-ostream& operator<<(ostream& os, const ViterbiEnvelope& env) {
-  os << '<';
-  const vector<shared_ptr<Segment> >& segs = env.GetSortedSegs();
-  for (int i = 0; i < segs.size(); ++i)
-    os << (i==0 ? "" : "|") << "x=" << segs[i]->x << ",b=" << segs[i]->b << ",m=" << segs[i]->m << ",p1=" << segs[i]->p1 << ",p2=" << segs[i]->p2;
-  return os << '>';
-}
-
-ViterbiEnvelope::ViterbiEnvelope(int i) {
-  if (i == 0) {
-    // do nothing - <>
-  } else if (i == 1) {
-    segs.push_back(shared_ptr<Segment>(new Segment(0, 0, 0, shared_ptr<Segment>(), shared_ptr<Segment>())));
-    assert(this->IsMultiplicativeIdentity());
-  } else {
-    cerr << "Only can create ViterbiEnvelope semiring 0 and 1 with this constructor!\n";
-    abort();
-  }
-}
-
-struct SlopeCompare {
-  bool operator() (const shared_ptr<Segment>& a, const shared_ptr<Segment>& b) const {
-    return a->m < b->m;
-  }
-};
-
-const ViterbiEnvelope& ViterbiEnvelope::operator+=(const ViterbiEnvelope& other) {
-  if (!other.is_sorted) other.Sort();
-  if (segs.empty()) {
-    segs = other.segs;
-    return *this;
-  }
-  is_sorted = false;
-  int j = segs.size();
-  segs.resize(segs.size() + other.segs.size());
-  for (int i = 0; i < other.segs.size(); ++i)
-    segs[j++] = other.segs[i];
-  assert(j == segs.size());
-  return *this;
-}
-
-void ViterbiEnvelope::Sort() const {
-  sort(segs.begin(), segs.end(), SlopeCompare());
-  const int k = segs.size();
-  int j = 0;
-  for (int i = 0; i < k; ++i) {
-    Segment l = *segs[i];
-    l.x = kMinusInfinity;
-    // cerr << "m=" << l.m << endl;
-    if (0 < j) {
-      if (segs[j-1]->m == l.m) {   // lines are parallel
-        if (l.b <= segs[j-1]->b) continue;
-        --j;
-      }
-      while(0 < j) {
-        l.x = (l.b - segs[j-1]->b) / (segs[j-1]->m - l.m);
-        if (segs[j-1]->x < l.x) break;
-        --j;
-      }
-      if (0 == j) l.x = kMinusInfinity;
-    }
-    *segs[j++] = l;
-  }
-  segs.resize(j);
-  is_sorted = true;
-}
-
-const ViterbiEnvelope& ViterbiEnvelope::operator*=(const ViterbiEnvelope& other) {
-  if (other.IsMultiplicativeIdentity()) { return *this; }
-  if (this->IsMultiplicativeIdentity()) { (*this) = other; return *this; }
-
-  if (!is_sorted) Sort();
-  if (!other.is_sorted) other.Sort();
-
-  if (this->IsEdgeEnvelope()) {
-//    if (other.size() > 1)
-//      cerr << *this << " (TIMES) " << other << endl;
-    shared_ptr<Segment> edge_parent = segs[0];
-    const double& edge_b = edge_parent->b;
-    const double& edge_m = edge_parent->m;
-    segs.clear();
-    for (int i = 0; i < other.segs.size(); ++i) {
-      const Segment& seg = *other.segs[i];
-      const double m = seg.m + edge_m;
-      const double b = seg.b + edge_b;
-      const double& x = seg.x;       // x's don't change with *
-      segs.push_back(shared_ptr<Segment>(new Segment(x, m, b, edge_parent, other.segs[i])));
-      assert(segs.back()->p1->edge);
-    }
-//    if (other.size() > 1)
-//      cerr << " = " << *this << endl;
-  } else {
-    vector<shared_ptr<Segment> > new_segs;
-    int this_i = 0;
-    int other_i = 0;
-    const int this_size  = segs.size();
-    const int other_size = other.segs.size();
-    double cur_x = kMinusInfinity;   // moves from left to right across the
-                                     // real numbers, stopping for all inter-
-                                     // sections
-    double this_next_val  = (1 < this_size  ? segs[1]->x       : kPlusInfinity);
-    double other_next_val = (1 < other_size ? other.segs[1]->x : kPlusInfinity);
-    while (this_i < this_size && other_i < other_size) {
-      const Segment& this_seg = *segs[this_i];
-      const Segment& other_seg= *other.segs[other_i];
-      const double m = this_seg.m + other_seg.m;
-      const double b = this_seg.b + other_seg.b;
- 
-      new_segs.push_back(shared_ptr<Segment>(new Segment(cur_x, m, b, segs[this_i], other.segs[other_i])));
-      int comp = 0;
-      if (this_next_val < other_next_val) comp = -1; else
-        if (this_next_val > other_next_val) comp = 1;
-      if (0 == comp) {  // the next values are equal, advance both indices
-        ++this_i;
-	++other_i;
-        cur_x = this_next_val;  // could be other_next_val (they're equal!)
-        this_next_val  = (this_i+1  < this_size  ? segs[this_i+1]->x        : kPlusInfinity);
-        other_next_val = (other_i+1 < other_size ? other.segs[other_i+1]->x : kPlusInfinity);
-      } else {  // advance the i with the lower x, update cur_x
-        if (-1 == comp) {
-          ++this_i;
-          cur_x = this_next_val;
-          this_next_val =  (this_i+1  < this_size  ? segs[this_i+1]->x        : kPlusInfinity);
-        } else {
-          ++other_i;
-          cur_x = other_next_val;
-          other_next_val = (other_i+1 < other_size ? other.segs[other_i+1]->x : kPlusInfinity);
-        }
-      }
-    }
-    segs.swap(new_segs);
-  }
-  //cerr << "Multiply: result=" << (*this) << endl;
-  return *this;
-}
-
-// recursively construct translation
-void Segment::ConstructTranslation(vector<WordID>* trans) const {
-  const Segment* cur = this;
-  vector<vector<WordID> > ant_trans;
-  while(!cur->edge) {
-    ant_trans.resize(ant_trans.size() + 1);
-    cur->p2->ConstructTranslation(&ant_trans.back());
-    cur = cur->p1.get();
-  }
-  size_t ant_size = ant_trans.size();
-  vector<const vector<WordID>*> pants(ant_size);
-  assert(ant_size == cur->edge->tail_nodes_.size());
-  --ant_size;
-  for (int i = 0; i < pants.size(); ++i) pants[ant_size - i] = &ant_trans[i];
-  cur->edge->rule_->ESubstitute(pants, trans);
-}
-
-void Segment::CollectEdgesUsed(std::vector<bool>* edges_used) const {
-  if (edge) {
-    assert(edge->id_ < edges_used->size());
-    (*edges_used)[edge->id_] = true;
-  }
-  if (p1) p1->CollectEdgesUsed(edges_used);
-  if (p2) p2->CollectEdgesUsed(edges_used);
-}
-
-ViterbiEnvelope ViterbiEnvelopeWeightFunction::operator()(const Hypergraph::Edge& e) const {
-  const double m = direction.dot(e.feature_values_);
-  const double b = origin.dot(e.feature_values_);
-  Segment* seg = new Segment(m, b, e);
-  return ViterbiEnvelope(1, seg);
-}
-
diff --git a/vest/viterbi_envelope.h b/vest/viterbi_envelope.h
deleted file mode 100644
index 60ad82d8..00000000
--- a/vest/viterbi_envelope.h
+++ /dev/null
@@ -1,81 +0,0 @@
-#ifndef _VITERBI_ENVELOPE_H_
-#define _VITERBI_ENVELOPE_H_
-
-#include <vector>
-#include <iostream>
-#include <boost/shared_ptr.hpp>
-
-#include "hg.h"
-#include "sparse_vector.h"
-
-static const double kMinusInfinity = -std::numeric_limits<double>::infinity();
-static const double kPlusInfinity = std::numeric_limits<double>::infinity();
-
-struct Segment {
-  Segment() : x(), m(), b(), edge() {}
-  Segment(double _m, double _b) :
-    x(kMinusInfinity), m(_m), b(_b), edge() {}
-  Segment(double _x, double _m, double _b, const boost::shared_ptr<Segment>& p1_, const boost::shared_ptr<Segment>& p2_) :
-    x(_x), m(_m), b(_b), p1(p1_), p2(p2_), edge() {}
-  Segment(double _m, double _b, const Hypergraph::Edge& edge) :
-    x(kMinusInfinity), m(_m), b(_b), edge(&edge) {}
-
-  double x;                   // x intersection with previous segment in env, or -inf if none
-  double m;                   // this line's slope
-  double b;                   // intercept with y-axis
-
-  // we keep a pointer to the "parents" of this segment so we can reconstruct
-  // the Viterbi translation corresponding to this segment
-  boost::shared_ptr<Segment> p1;
-  boost::shared_ptr<Segment> p2;
-
-  // only Segments created from an edge using the ViterbiEnvelopeWeightFunction
-  // have rules
-  // TRulePtr rule;
-  const Hypergraph::Edge* edge;
-
-  // recursively recover the Viterbi translation that will result from setting
-  // the weights to origin + axis * x, where x is any value from this->x up
-  // until the next largest x in the containing ViterbiEnvelope
-  void ConstructTranslation(std::vector<WordID>* trans) const;
-  void CollectEdgesUsed(std::vector<bool>* edges_used) const;
-};
-
-// this is the semiring value type,
-// it defines constructors for 0, 1, and the operations + and *
-struct ViterbiEnvelope {
-  // create semiring zero
-  ViterbiEnvelope() : is_sorted(true) {}  // zero
-  // for debugging:
-  ViterbiEnvelope(const std::vector<boost::shared_ptr<Segment> >& s) : segs(s) { Sort(); }
-  // create semiring 1 or 0
-  explicit ViterbiEnvelope(int i);
-  ViterbiEnvelope(int n, Segment* seg) : is_sorted(true), segs(n, boost::shared_ptr<Segment>(seg)) {}
-  const ViterbiEnvelope& operator+=(const ViterbiEnvelope& other);
-  const ViterbiEnvelope& operator*=(const ViterbiEnvelope& other);
-  bool IsMultiplicativeIdentity() const {
-    return size() == 1 && (segs[0]->b == 0.0 && segs[0]->m == 0.0) && (!segs[0]->edge) && (!segs[0]->p1) && (!segs[0]->p2); }
-  const std::vector<boost::shared_ptr<Segment> >& GetSortedSegs() const {
-    if (!is_sorted) Sort();
-    return segs;
-  }
-  size_t size() const { return segs.size(); }
-
- private:
-  bool IsEdgeEnvelope() const {
-    return segs.size() == 1 && segs[0]->edge; }
-  void Sort() const;
-  mutable bool is_sorted;
-  mutable std::vector<boost::shared_ptr<Segment> > segs;
-};
-std::ostream& operator<<(std::ostream& os, const ViterbiEnvelope& env);
-
-struct ViterbiEnvelopeWeightFunction {
-  ViterbiEnvelopeWeightFunction(const SparseVector<double>& ori,
-                                const SparseVector<double>& dir) : origin(ori), direction(dir) {}
-  ViterbiEnvelope operator()(const Hypergraph::Edge& e) const;
-  const SparseVector<double> origin;
-  const SparseVector<double> direction;
-};
-
-#endif
-- 
cgit v1.2.3