From a53461650fbdcd3cfe7543d28af9647ac3e5e47e Mon Sep 17 00:00:00 2001
From: redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>
Date: Wed, 11 Aug 2010 02:37:10 +0000
Subject: major refactor, break bad circular deps

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@509 ec762483-ff6d-05da-a07a-a48fb63a330f
---
 Makefile.am                          |   2 +-
 configure.ac                         |   2 +-
 decoder/Makefile.am                  |  37 +-
 decoder/aligner.cc                   |  74 +---
 decoder/aligner.h                    |   2 -
 decoder/array2d.h                    | 172 ---------
 decoder/cdec.cc                      |  11 +-
 decoder/dict.cc                      |  27 --
 decoder/dict.h                       |  66 ----
 decoder/dict_test.cc                 |  50 ---
 decoder/fdict.cc                     | 143 -------
 decoder/fdict.h                      |  34 --
 decoder/feature_vector.h             |  18 -
 decoder/ff_bleu.cc                   |   2 +-
 decoder/ff_lm.cc                     |   2 +-
 decoder/ff_wordalign.cc              |   3 +-
 decoder/filelib.cc                   |  22 --
 decoder/filelib.h                    | 106 ------
 decoder/gzstream.cc                  | 182 ---------
 decoder/gzstream.h                   | 127 -------
 decoder/hash.h                       |  54 ---
 decoder/have_64_bits.h               |  17 -
 decoder/hg.h                         |   4 +-
 decoder/hg_io.cc                     |  53 ---
 decoder/hg_io.h                      |   5 -
 decoder/int_or_pointer.h             |  70 ----
 decoder/intrusive_refcount.hpp       |  84 -----
 decoder/logval.h                     | 174 ---------
 decoder/logval_test.cc               |  73 ----
 decoder/murmur_hash.h                | 186 ---------
 decoder/null_deleter.h               |   9 -
 decoder/oracle_bleu.h                |   2 +-
 decoder/phrasebased_translator.cc    |   4 +-
 decoder/prob.h                       |   8 -
 decoder/sampler.h                    | 147 --------
 decoder/sentence_metadata.h          |   2 +-
 decoder/small_vector.h               | 265 -------------
 decoder/small_vector_test.cc         | 129 -------
 decoder/sparse_vector.cc             |  98 -----
 decoder/sparse_vector.h              | 512 -------------------------
 decoder/static_utoa.h                | 115 ------
 decoder/stringlib.cc                 |  98 -----
 decoder/stringlib.h                  | 267 -------------
 decoder/stringlib_test.cc            |  17 -
 decoder/tdict.cc                     | 154 --------
 decoder/tdict.h                      |  50 ---
 decoder/test_data/weights            |   8 -
 decoder/threadlocal.h                |  71 ----
 decoder/timing_stats.cc              |  24 --
 decoder/timing_stats.h               |  25 --
 decoder/weights.cc                   |  77 ----
 decoder/weights.h                    |  21 --
 decoder/weights_test.cc              |  28 --
 decoder/wordid.h                     |   6 -
 extools/Makefile.am                  |  12 +-
 extools/sg_lexer.l                   |   3 -
 gi/clda/src/Makefile.am              |   4 +-
 gi/pyp-topics/src/Makefile.am        |   8 +-
 gi/pyp-topics/src/contexts_corpus.hh |   2 +-
 gi/pyp-topics/src/contexts_lexer.h   |   2 +-
 gi/pyp-topics/src/contexts_lexer.l   |   2 +-
 mteval/Makefile.am                   |  23 ++
 mteval/aer_scorer.cc                 | 135 +++++++
 mteval/aer_scorer.h                  |  23 ++
 mteval/comb_scorer.cc                |  97 +++++
 mteval/comb_scorer.h                 |  17 +
 mteval/fast_score.cc                 |  72 ++++
 mteval/mbr_kbest.cc                  | 138 +++++++
 mteval/scorer.cc                     | 630 +++++++++++++++++++++++++++++++
 mteval/scorer.h                      | 110 ++++++
 mteval/scorer_test.cc                | 182 +++++++++
 mteval/ter.cc                        | 535 ++++++++++++++++++++++++++
 mteval/ter.h                         |  19 +
 mteval/test_data/re.txt.0            |   5 +
 mteval/test_data/re.txt.1            |   5 +
 mteval/test_data/re.txt.2            |   5 +
 mteval/test_data/re.txt.3            |   5 +
 training/Makefile.am                 |  25 +-
 training/atools.cc                   |   7 +-
 utils/Makefile.am                    |  38 ++
 utils/alignment_pharaoh.cc           |  77 ++++
 utils/alignment_pharaoh.h            |  14 +
 utils/array2d.h                      | 172 +++++++++
 utils/b64tools.cc                    |  59 +++
 utils/b64tools.h                     |   9 +
 utils/dict.cc                        |  27 ++
 utils/dict.h                         |  66 ++++
 utils/dict_test.cc                   |  47 +++
 utils/fdict.cc                       | 143 +++++++
 utils/fdict.h                        |  34 ++
 utils/feature_accum.h                | 129 +++++++
 utils/feature_vector.h               |  18 +
 utils/filelib.cc                     |  22 ++
 utils/filelib.h                      | 106 ++++++
 utils/gzstream.cc                    | 182 +++++++++
 utils/gzstream.h                     | 127 +++++++
 utils/hash.h                         |  54 +++
 utils/have_64_bits.h                 |  17 +
 utils/int_or_pointer.h               |  70 ++++
 utils/intrusive_refcount.hpp         |  84 +++++
 utils/logval.h                       | 174 +++++++++
 utils/logval_test.cc                 |  73 ++++
 utils/murmur_hash.h                  | 186 +++++++++
 utils/null_deleter.h                 |   9 +
 utils/prob.h                         |   8 +
 utils/sampler.h                      | 147 ++++++++
 utils/small_vector.h                 | 265 +++++++++++++
 utils/small_vector_test.cc           | 129 +++++++
 utils/sparse_vector.cc               |  98 +++++
 utils/sparse_vector.h                | 512 +++++++++++++++++++++++++
 utils/static_utoa.h                  | 115 ++++++
 utils/stringlib.cc                   |  87 +++++
 utils/stringlib.h                    | 267 +++++++++++++
 utils/stringlib_test.cc              |  17 +
 utils/tdict.cc                       | 154 ++++++++
 utils/tdict.h                        |  50 +++
 utils/test_data/weights              |   8 +
 utils/threadlocal.h                  |  71 ++++
 utils/timing_stats.cc                |  24 ++
 utils/timing_stats.h                 |  25 ++
 utils/weights.cc                     |  77 ++++
 utils/weights.h                      |  21 ++
 utils/weights_test.cc                |  27 ++
 utils/wordid.h                       |   6 +
 vest/Makefile.am                     |  30 +-
 vest/aer_scorer.cc                   | 135 -------
 vest/aer_scorer.h                    |  23 --
 vest/comb_scorer.cc                  |  97 -----
 vest/comb_scorer.h                   |  17 -
 vest/fast_score.cc                   |  72 ----
 vest/lo_test.cc                      |   5 +-
 vest/mr_vest_map.cc                  |   5 +-
 vest/mr_vest_reduce.cc               |   2 +-
 vest/scorer.cc                       | 708 -----------------------------------
 vest/scorer.h                        | 111 ------
 vest/ter.cc                          | 535 --------------------------
 vest/ter.h                           |  19 -
 137 files changed, 6124 insertions(+), 5685 deletions(-)
 delete mode 100644 decoder/array2d.h
 delete mode 100644 decoder/dict.cc
 delete mode 100644 decoder/dict.h
 delete mode 100644 decoder/dict_test.cc
 delete mode 100644 decoder/fdict.cc
 delete mode 100644 decoder/fdict.h
 delete mode 100755 decoder/feature_vector.h
 delete mode 100644 decoder/filelib.cc
 delete mode 100644 decoder/filelib.h
 delete mode 100644 decoder/gzstream.cc
 delete mode 100644 decoder/gzstream.h
 delete mode 100755 decoder/hash.h
 delete mode 100755 decoder/have_64_bits.h
 delete mode 100755 decoder/int_or_pointer.h
 delete mode 100755 decoder/intrusive_refcount.hpp
 delete mode 100644 decoder/logval.h
 delete mode 100644 decoder/logval_test.cc
 delete mode 100755 decoder/murmur_hash.h
 delete mode 100755 decoder/null_deleter.h
 delete mode 100644 decoder/prob.h
 delete mode 100644 decoder/sampler.h
 delete mode 100644 decoder/small_vector.h
 delete mode 100644 decoder/small_vector_test.cc
 delete mode 100644 decoder/sparse_vector.cc
 delete mode 100644 decoder/sparse_vector.h
 delete mode 100755 decoder/static_utoa.h
 delete mode 100644 decoder/stringlib.cc
 delete mode 100644 decoder/stringlib.h
 delete mode 100755 decoder/stringlib_test.cc
 delete mode 100644 decoder/tdict.cc
 delete mode 100644 decoder/tdict.h
 delete mode 100644 decoder/test_data/weights
 delete mode 100755 decoder/threadlocal.h
 delete mode 100644 decoder/timing_stats.cc
 delete mode 100644 decoder/timing_stats.h
 delete mode 100644 decoder/weights.cc
 delete mode 100644 decoder/weights.h
 delete mode 100644 decoder/weights_test.cc
 delete mode 100644 decoder/wordid.h
 create mode 100644 mteval/Makefile.am
 create mode 100644 mteval/aer_scorer.cc
 create mode 100644 mteval/aer_scorer.h
 create mode 100644 mteval/comb_scorer.cc
 create mode 100644 mteval/comb_scorer.h
 create mode 100644 mteval/fast_score.cc
 create mode 100644 mteval/mbr_kbest.cc
 create mode 100644 mteval/scorer.cc
 create mode 100644 mteval/scorer.h
 create mode 100644 mteval/scorer_test.cc
 create mode 100644 mteval/ter.cc
 create mode 100644 mteval/ter.h
 create mode 100644 mteval/test_data/re.txt.0
 create mode 100644 mteval/test_data/re.txt.1
 create mode 100644 mteval/test_data/re.txt.2
 create mode 100644 mteval/test_data/re.txt.3
 create mode 100644 utils/Makefile.am
 create mode 100644 utils/alignment_pharaoh.cc
 create mode 100644 utils/alignment_pharaoh.h
 create mode 100644 utils/array2d.h
 create mode 100644 utils/b64tools.cc
 create mode 100644 utils/b64tools.h
 create mode 100644 utils/dict.cc
 create mode 100644 utils/dict.h
 create mode 100644 utils/dict_test.cc
 create mode 100644 utils/fdict.cc
 create mode 100644 utils/fdict.h
 create mode 100755 utils/feature_accum.h
 create mode 100755 utils/feature_vector.h
 create mode 100644 utils/filelib.cc
 create mode 100644 utils/filelib.h
 create mode 100644 utils/gzstream.cc
 create mode 100644 utils/gzstream.h
 create mode 100755 utils/hash.h
 create mode 100755 utils/have_64_bits.h
 create mode 100755 utils/int_or_pointer.h
 create mode 100755 utils/intrusive_refcount.hpp
 create mode 100644 utils/logval.h
 create mode 100644 utils/logval_test.cc
 create mode 100755 utils/murmur_hash.h
 create mode 100755 utils/null_deleter.h
 create mode 100644 utils/prob.h
 create mode 100644 utils/sampler.h
 create mode 100644 utils/small_vector.h
 create mode 100644 utils/small_vector_test.cc
 create mode 100644 utils/sparse_vector.cc
 create mode 100644 utils/sparse_vector.h
 create mode 100755 utils/static_utoa.h
 create mode 100644 utils/stringlib.cc
 create mode 100644 utils/stringlib.h
 create mode 100755 utils/stringlib_test.cc
 create mode 100644 utils/tdict.cc
 create mode 100644 utils/tdict.h
 create mode 100644 utils/test_data/weights
 create mode 100755 utils/threadlocal.h
 create mode 100644 utils/timing_stats.cc
 create mode 100644 utils/timing_stats.h
 create mode 100644 utils/weights.cc
 create mode 100644 utils/weights.h
 create mode 100644 utils/weights_test.cc
 create mode 100644 utils/wordid.h
 delete mode 100644 vest/aer_scorer.cc
 delete mode 100644 vest/aer_scorer.h
 delete mode 100644 vest/comb_scorer.cc
 delete mode 100644 vest/comb_scorer.h
 delete mode 100644 vest/fast_score.cc
 delete mode 100644 vest/scorer.cc
 delete mode 100644 vest/scorer.h
 delete mode 100644 vest/ter.cc
 delete mode 100644 vest/ter.h

diff --git a/Makefile.am b/Makefile.am
index e82e2352..98c2561e 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,4 +1,4 @@
-SUBDIRS = decoder training vest extools gi/pyp-topics/src gi/clda/src gi/posterior-regularisation/prjava
+SUBDIRS = utils mteval decoder training vest extools gi/pyp-topics/src gi/clda/src gi/posterior-regularisation/prjava
 AUTOMAKE_OPTIONS = foreign
 
 ACLOCAL_AMFLAGS = -I m4
diff --git a/configure.ac b/configure.ac
index e627c1cc..302eebed 100644
--- a/configure.ac
+++ b/configure.ac
@@ -76,4 +76,4 @@ then
   AM_CONDITIONAL([RAND_LM], true)
 fi
 
-AC_OUTPUT(Makefile extools/Makefile decoder/Makefile training/Makefile vest/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile)
+AC_OUTPUT(Makefile utils/Makefile mteval/Makefile extools/Makefile decoder/Makefile training/Makefile vest/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile)
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index 68a7d765..f514b340 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -2,24 +2,16 @@ bin_PROGRAMS = cdec
 
 if HAVE_GTEST
 noinst_PROGRAMS = \
-  dict_test \
-  weights_test \
   trule_test \
   hg_test \
   ff_test \
-  logval_test \
   parser_test \
-  grammar_test \
-  small_vector_test
+  grammar_test
 endif
 
-cdec_SOURCES = cdec.cc forest_writer.cc maxtrans_blunsom.cc cdec_ff.cc timing_stats.cc
-small_vector_test_SOURCES = small_vector_test.cc
-small_vector_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
+cdec_SOURCES = cdec.cc forest_writer.cc maxtrans_blunsom.cc cdec_ff.cc
 parser_test_SOURCES = parser_test.cc
 parser_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
-dict_test_SOURCES = dict_test.cc
-dict_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
 ff_test_SOURCES = ff_test.cc
 ff_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
 grammar_test_SOURCES = grammar_test.cc
@@ -28,15 +20,12 @@ hg_test_SOURCES = hg_test.cc
 hg_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
 trule_test_SOURCES = trule_test.cc
 trule_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
-weights_test_SOURCES = weights_test.cc
-weights_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
-logval_test_SOURCES = logval_test.cc
-logval_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS)
 
-LDADD = libcdec.a
+LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a
 
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I..
-AM_LDFLAGS = -lz
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils
+
+AM_LDFLAGS = ../utils/libutils.a -lz
 
 rule_lexer.cc: rule_lexer.l
 	$(LEX) -s -CF -8 -o$@ $<
@@ -49,7 +38,6 @@ libcdec_a_SOURCES = \
   rule_lexer.cc \
   fst_translator.cc \
   csplit.cc \
-  dict.cc \
   translator.cc \
   scfg_translator.cc \
   hg.cc \
@@ -58,17 +46,10 @@ libcdec_a_SOURCES = \
   viterbi.cc \
   lattice.cc \
   aligner.cc \
-  gzstream.cc \
   apply_models.cc \
   earley_composer.cc \
   phrasetable_fst.cc \
-  sparse_vector.cc \
   trule.cc \
-  filelib.cc \
-  stringlib.cc \
-  fdict.cc \
-  tdict.cc \
-  weights.cc \
   ttables.cc \
   ff.cc \
   ff_lm.cc \
@@ -78,12 +59,6 @@ libcdec_a_SOURCES = \
   ff_tagger.cc \
   ff_bleu.cc \
   ff_factory.cc \
-  ../vest/scorer.cc \
-  ../vest/ter.cc \
-  ../vest/aer_scorer.cc \
-  ../vest/comb_scorer.cc \
-  ../vest/error_surface.cc \
-  ../vest/viterbi_envelope.cc \
   freqdict.cc \
   lexalign.cc \
   lextrans.cc \
diff --git a/decoder/aligner.cc b/decoder/aligner.cc
index b089f52e..92431be4 100644
--- a/decoder/aligner.cc
+++ b/decoder/aligner.cc
@@ -5,81 +5,11 @@
 #include "sentence_metadata.h"
 #include "inside_outside.h"
 #include "viterbi.h"
+#include "alignment_pharaoh.h"
 #include <set>
 
 using namespace std;
 
-static bool is_digit(char x) { return x >= '0' && x <= '9'; }
-
-boost::shared_ptr<Array2D<bool> > AlignerTools::ReadPharaohAlignmentGrid(const string& al) {
-  int max_x = 0;
-  int max_y = 0;
-  int i = 0;
-  size_t pos = al.rfind(" ||| ");
-  if (pos != string::npos) { i = pos + 5; }
-  while (i < al.size()) {
-    if (al[i] == '\n' || al[i] == '\r') break;
-    int x = 0;
-    while(i < al.size() && is_digit(al[i])) {
-      x *= 10;
-      x += al[i] - '0';
-      ++i;
-    }
-    if (x > max_x) max_x = x;
-    assert(i < al.size());
-    if(al[i] != '-') {
-      cerr << "BAD ALIGNMENT: " << al << endl;
-      abort();
-    }
-    ++i;
-    int y = 0;
-    while(i < al.size() && is_digit(al[i])) {
-      y *= 10;
-      y += al[i] - '0';
-      ++i;
-    }
-    if (y > max_y) max_y = y;
-    while(i < al.size() && al[i] == ' ') { ++i; }
-  }
-
-  boost::shared_ptr<Array2D<bool> > grid(new Array2D<bool>(max_x + 1, max_y + 1));
-  i = 0;
-  if (pos != string::npos) { i = pos + 5; }
-  while (i < al.size()) {
-    if (al[i] == '\n' || al[i] == '\r') break;
-    int x = 0;
-    while(i < al.size() && is_digit(al[i])) {
-      x *= 10;
-      x += al[i] - '0';
-      ++i;
-    }
-    assert(i < al.size());
-    assert(al[i] == '-');
-    ++i;
-    int y = 0;
-    while(i < al.size() && is_digit(al[i])) {
-      y *= 10;
-      y += al[i] - '0';
-      ++i;
-    }
-    (*grid)(x, y) = true;
-    while(i < al.size() && al[i] == ' ') { ++i; }
-  }
-  // cerr << *grid << endl;
-  return grid;
-}
-
-void AlignerTools::SerializePharaohFormat(const Array2D<bool>& alignment, ostream* out) {
-  bool need_space = false;
-  for (int i = 0; i < alignment.width(); ++i)
-    for (int j = 0; j < alignment.height(); ++j)
-      if (alignment(i,j)) {
-        if (need_space) (*out) << ' '; else need_space = true;
-        (*out) << i << '-' << j;
-      }
-  (*out) << endl;
-}
-
 // used with lexical models since they may not fully generate the
 // source string
 void SourceEdgeCoveragesUsingParseIndices(const Hypergraph& g,
@@ -317,6 +247,6 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice,
     cerr << grid << endl;
   }
   (*out) << TD::GetString(src_sent) << " ||| " << TD::GetString(trg_sent) << " ||| ";
-  SerializePharaohFormat(grid, out);
+  AlignmentPharaoh::SerializePharaohFormat(grid, out);
 };
 
diff --git a/decoder/aligner.h b/decoder/aligner.h
index cd159119..a088ba6c 100644
--- a/decoder/aligner.h
+++ b/decoder/aligner.h
@@ -10,8 +10,6 @@ class Hypergraph;
 class SentenceMetadata;
 
 struct AlignerTools {
-  static boost::shared_ptr<Array2D<bool> > ReadPharaohAlignmentGrid(const std::string& al);
-  static void SerializePharaohFormat(const Array2D<bool>& alignment, std::ostream* out);
 
   // assumption: g contains derivations of input/ref and
   // ONLY input/ref.
diff --git a/decoder/array2d.h b/decoder/array2d.h
deleted file mode 100644
index e63eda0d..00000000
--- a/decoder/array2d.h
+++ /dev/null
@@ -1,172 +0,0 @@
-#ifndef ARRAY2D_H_
-#define ARRAY2D_H_
-
-#include <iostream>
-#include <algorithm>
-#include <cassert>
-#include <vector>
-#include <string>
-
-template<typename T>
-class Array2D {
- public:
-  typedef typename std::vector<T>::reference reference;
-  typedef typename std::vector<T>::const_reference const_reference;
-  typedef typename std::vector<T>::iterator iterator;
-  typedef typename std::vector<T>::const_iterator const_iterator;
-  Array2D() : width_(0), height_(0) {}
-  Array2D(int w, int h, const T& d = T()) :
-    width_(w), height_(h), data_(w*h, d) {}
-  Array2D(const Array2D& rhs) :
-    width_(rhs.width_), height_(rhs.height_), data_(rhs.data_) {}
-  bool empty() const { return data_.empty(); }
-  void resize(int w, int h, const T& d = T()) {
-    data_.resize(w * h, d);
-    width_ = w;
-    height_ = h;
-  }
-  const Array2D& operator=(const Array2D& rhs) {
-    data_ = rhs.data_;
-    width_ = rhs.width_;
-    height_ = rhs.height_;
-    return *this;
-  }
-  void fill(const T& v) { data_.assign(data_.size(), v); }
-  int width() const { return width_; }
-  int height() const { return height_; }
-  reference operator()(int i, int j) {
-    return data_[offset(i, j)];
-  }
-  void clear() { data_.clear(); width_=0; height_=0; }
-  const_reference operator()(int i, int j) const {
-    return data_[offset(i, j)];
-  }
-  iterator begin_col(int j) {
-    return data_.begin() + offset(0,j);
-  }
-  const_iterator begin_col(int j) const {
-    return data_.begin() + offset(0,j);
-  }
-  iterator end_col(int j) {
-    return data_.begin() + offset(0,j) + width_;
-  }
-  const_iterator end_col(int j) const {
-    return data_.begin() + offset(0,j) + width_;
-  }
-  iterator end() { return data_.end(); }
-  const_iterator end() const { return data_.end(); }
-  const Array2D<T>& operator*=(const T& x) {
-    std::transform(data_.begin(), data_.end(), data_.begin(),
-        std::bind2nd(std::multiplies<T>(), x));
-  }
-  const Array2D<T>& operator/=(const T& x) {
-    std::transform(data_.begin(), data_.end(), data_.begin(),
-        std::bind2nd(std::divides<T>(), x));
-  }
-  const Array2D<T>& operator+=(const Array2D<T>& m) {
-    std::transform(m.data_.begin(), m.data_.end(), data_.begin(), data_.begin(), std::plus<T>());
-  }
-  const Array2D<T>& operator-=(const Array2D<T>& m) {
-    std::transform(m.data_.begin(), m.data_.end(), data_.begin(), data_.begin(), std::minus<T>());
-  }
-
- private:
-  inline int offset(int i, int j) const {
-    assert(i<width_);
-    assert(j<height_);
-    return i + j * width_;
-  }
-
-  int width_;
-  int height_;
-
-  std::vector<T> data_;
-};
-
-template <typename T>
-Array2D<T> operator*(const Array2D<T>& l, const T& scalar) {
-  Array2D<T> res(l);
-  res *= scalar;
-  return res;
-}
-
-template <typename T>
-Array2D<T> operator*(const T& scalar, const Array2D<T>& l) {
-  Array2D<T> res(l);
-  res *= scalar;
-  return res;
-}
-
-template <typename T>
-Array2D<T> operator/(const Array2D<T>& l, const T& scalar) {
-  Array2D<T> res(l);
-  res /= scalar;
-  return res;
-}
-
-template <typename T>
-Array2D<T> operator+(const Array2D<T>& l, const Array2D<T>& r) {
-  Array2D<T> res(l);
-  res += r;
-  return res;
-}
-
-template <typename T>
-Array2D<T> operator-(const Array2D<T>& l, const Array2D<T>& r) {
-  Array2D<T> res(l);
-  res -= r;
-  return res;
-}
-
-template <typename T>
-inline std::ostream& operator<<(std::ostream& os, const Array2D<T>& m) {
-  for (int i=0; i<m.width(); ++i) {
-    for (int j=0; j<m.height(); ++j)
-      os << '\t' << m(i,j);
-    os << '\n';
-  }
-  return os;
-}
-
-inline std::ostream& operator<<(std::ostream& os, const Array2D<bool>& m) {
-  os << ' ';
-  for (int j=0; j<m.height(); ++j)
-    os << (j%10);
-  os << "\n";
-  for (int i=0; i<m.width(); ++i) {
-    os << (i%10);
-    for (int j=0; j<m.height(); ++j)
-      os << (m(i,j) ? '*' : '.');
-    os << (i%10) << "\n";
-  }
-  os << ' ';
-  for (int j=0; j<m.height(); ++j)
-    os << (j%10);
-  os << "\n";
-  return os;
-}
-
-inline std::ostream& operator<<(std::ostream& os, const Array2D<std::vector<bool> >& m) {
-  os << ' ';
-  for (int j=0; j<m.height(); ++j)
-    os << (j%10) << "\t";
-  os << "\n";
-  for (int i=0; i<m.width(); ++i) {
-    os << (i%10);
-    for (int j=0; j<m.height(); ++j) {
-      const std::vector<bool>& ar = m(i,j);
-      for (int k=0; k<ar.size(); ++k)
-        os << (ar[k] ? '*' : '.');
-    }
-    os << "\t";
-    os << (i%10) << "\n";
-  }
-  os << ' ';
-  for (int j=0; j<m.height(); ++j)
-    os << (j%10) << "\t";
-  os << "\n";
-  return os;
-}
-
-#endif
-
diff --git a/decoder/cdec.cc b/decoder/cdec.cc
index 8c4a25e0..3633febd 100644
--- a/decoder/cdec.cc
+++ b/decoder/cdec.cc
@@ -34,7 +34,7 @@
 #include "inside_outside.h"
 #include "exp_semiring.h"
 #include "sentence_metadata.h"
-#include "../vest/scorer.h"
+#include "scorer.h"
 #include "apply_fsa_models.h"
 #include "program_options.h"
 #include "cfg_options.h"
@@ -59,6 +59,15 @@ void ShowBanner() {
   cerr << "cdec v1.0 (c) 2009-2010 by Chris Dyer\n";
 }
 
+void ParseTranslatorInputLattice(const string& line, string* input, Lattice* ref) {
+  string sref;
+  ParseTranslatorInput(line, input, &sref);
+  if (sref.size() > 0) {
+    assert(ref);
+    LatticeTools::ConvertTextOrPLF(sref, ref);
+  }
+}
+
 void ConvertSV(const SparseVector<prob_t>& src, SparseVector<double>* trg) {
   for (SparseVector<prob_t>::const_iterator it = src.begin(); it != src.end(); ++it)
     trg->set_value(it->first, it->second);
diff --git a/decoder/dict.cc b/decoder/dict.cc
deleted file mode 100644
index 2d6986c8..00000000
--- a/decoder/dict.cc
+++ /dev/null
@@ -1,27 +0,0 @@
-#include "dict.h"
-
-#include <string>
-#include <vector>
-
-void TokenizeStringSeparator(
-          const std::string& str,
-          const std::string& separator,
-          std::vector<std::string>* tokens) {
-
-  size_t pos = 0;
-  std::string::size_type nextPos = str.find(separator, pos);
-
-  while (nextPos != std::string::npos) {
-    tokens->push_back(str.substr(pos, nextPos - pos));
-    pos = nextPos + separator.size();
-    nextPos = str.find(separator, pos);
-  }
-  tokens->push_back(str.substr(pos, nextPos - pos));
-}
-
-
-void Dict::AsVector(const WordID& id, std::vector<std::string>* results) const {
-  results->clear();
-  TokenizeStringSeparator(Convert(id), " ||| ", results);
-}
-
diff --git a/decoder/dict.h b/decoder/dict.h
deleted file mode 100644
index 348a97e3..00000000
--- a/decoder/dict.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#ifndef DICT_H_
-#define DICT_H_
-
-
-#include <cassert>
-#include <cstring>
-
-#include <string>
-#include <vector>
-#include "hash.h"
-#include "wordid.h"
-
-class Dict {
- typedef
- HASH_MAP<std::string, WordID, boost::hash<std::string> > Map;
- public:
-  Dict() : b0_("<bad0>") {
-    HASH_MAP_EMPTY(d_,"<bad1>");
-    words_.reserve(1000);
-  }
-
-  inline int max() const { return words_.size(); }
-
-  inline WordID Convert(const std::string& word, bool frozen = false) {
-    Map::iterator i = d_.find(word);
-    if (i == d_.end()) {
-      if (frozen)
-        return 0;
-      words_.push_back(word);
-      d_[word] = words_.size();
-      return words_.size();
-    } else {
-      return i->second;
-    }
-  }
-
-  inline WordID Convert(const std::vector<std::string>& words, bool frozen = false)
-  { return Convert(toString(words), frozen); }
-
-  static inline std::string toString(const std::vector<std::string>& words) {
-    std::string word= "";
-    for (std::vector<std::string>::const_iterator it=words.begin();
-         it != words.end(); ++it) {
-      if (it != words.begin()) word += " ||| ";
-      word += *it;
-    }
-    return word;
-  }
-
-  inline const std::string& Convert(const WordID& id) const {
-    if (id == 0) return b0_;
-    assert(id <= (int)words_.size());
-    return words_[id-1];
-  }
-
-  void AsVector(const WordID& id, std::vector<std::string>* results) const;
-
-  void clear() { words_.clear(); d_.clear(); }
-
- private:
-  const std::string b0_;
-  std::vector<std::string> words_;
-  Map d_;
-};
-
-#endif
diff --git a/decoder/dict_test.cc b/decoder/dict_test.cc
deleted file mode 100644
index 694877fa..00000000
--- a/decoder/dict_test.cc
+++ /dev/null
@@ -1,50 +0,0 @@
-#include "dict.h"
-
-#include "fdict.h"
-
-#include <iostream>
-#include <gtest/gtest.h>
-#include <cassert>
-#include "filelib.h"
-
-#include "tdict.h"
-
-using namespace std;
-
-class DTest : public testing::Test {
- public:
-  DTest() {}
- protected:
-  virtual void SetUp() { }
-  virtual void TearDown() { }
-};
-
-TEST_F(DTest, Convert) {
-  Dict d;
-  WordID a = d.Convert("foo");
-  WordID b = d.Convert("bar");
-  std::string x = "foo";
-  WordID c = d.Convert(x);
-  EXPECT_NE(a, b);
-  EXPECT_EQ(a, c);
-  EXPECT_EQ(d.Convert(a), "foo");
-  EXPECT_EQ(d.Convert(b), "bar");
-}
-
-TEST_F(DTest, FDictTest) {
-  int fid = FD::Convert("First");
-  EXPECT_GT(fid, 0);
-  EXPECT_EQ(FD::Convert(fid), "First");
-  string x = FD::Escape("=");
-  cerr << x << endl;
-  EXPECT_NE(x, "=");
-  x = FD::Escape(";");
-  cerr << x << endl;
-  EXPECT_NE(x, ";");
-}
-
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
-
diff --git a/decoder/fdict.cc b/decoder/fdict.cc
deleted file mode 100644
index baa0b552..00000000
--- a/decoder/fdict.cc
+++ /dev/null
@@ -1,143 +0,0 @@
-#include "fdict.h"
-#include "stdlib.h"
-//for malloc (need on cygwin); todo <cstdlib> and std::malloc
-#include <string>
-#include <sstream>
-
-using namespace std;
-
-Dict FD::dict_;
-bool FD::frozen_ = false;
-
-std::string FD::Convert(std::vector<WordID> const& v) {
-    return Convert(&*v.begin(),&*v.end());
-}
-
-std::string FD::Convert(WordID const *b,WordID const* e) {
-  ostringstream o;
-  for (WordID const* i=b;i<e;++i) {
-    if (i>b) o << ' ';
-    o << FD::Convert(*i);
-  }
-  return o.str();
-}
-
-static int HexPairValue(const char * code) {
-  int value = 0;
-  const char * pch = code;
-  for (;;) {
-    int digit = *pch++;
-    if (digit >= '0' && digit <= '9') {
-      value += digit - '0';
-    }
-    else if (digit >= 'A' && digit <= 'F') {
-      value += digit - 'A' + 10;
-    }
-    else if (digit >= 'a' && digit <= 'f') {
-      value += digit - 'a' + 10;
-    }
-    else {
-      return -1;
-    }
-    if (pch == code + 2)
-      return value;
-    value <<= 4;
-  }
-}
-
-int UrlDecode(const char *source, char *dest)
-{
-  char * start = dest;
-
-  while (*source) {
-    switch (*source) {
-    case '+':
-      *(dest++) = ' ';
-      break;
-    case '%':
-      if (source[1] && source[2]) {
-        int value = HexPairValue(source + 1);
-        if (value >= 0) {
-          *(dest++) = value;
-          source += 2;
-        }
-        else {
-          *dest++ = '?';
-        }
-      }
-      else {
-        *dest++ = '?';
-      }
-      break;
-    default:
-      *dest++ = *source;
-    }
-    source++;
-  }
-
-  *dest = 0;
-  return dest - start;
-}
-
-int UrlEncode(const char *source, char *dest, unsigned max) {
-  static const char *digits = "0123456789ABCDEF";
-  unsigned char ch;
-  unsigned len = 0;
-  char *start = dest;
-
-  while (len < max - 4 && *source)
-  {
-    ch = (unsigned char)*source;
-    if (*source == ' ') {
-      *dest++ = '+';
-    }
-    else if (strchr("=:;,_| %", ch)) {
-      *dest++ = '%';
-      *dest++ = digits[(ch >> 4) & 0x0F];
-      *dest++ = digits[       ch & 0x0F];
-    }
-    else {
-      *dest++ = *source;
-    }
-    source++;
-  }
-  *dest = 0;
-  return start - dest;
-}
-
-std::string UrlDecodeString(const std::string & encoded) {
-  const char * sz_encoded = encoded.c_str();
-  size_t needed_length = encoded.length();
-  for (const char * pch = sz_encoded; *pch; pch++) {
-    if (*pch == '%')
-      needed_length += 2;
-  }
-  needed_length += 10;
-  char stackalloc[64];
-  char * buf = needed_length > sizeof(stackalloc)/sizeof(*stackalloc) ?
-    (char *)malloc(needed_length) : stackalloc;
-  UrlDecode(encoded.c_str(), buf);
-  std::string result(buf);
-  if (buf != stackalloc) {
-    free(buf);
-  }
-  return result;
-}
-
-std::string UrlEncodeString(const std::string & decoded) {
-  size_t needed_length = decoded.length() * 3 + 3;
-  char stackalloc[64];
-  char * buf = needed_length > sizeof(stackalloc)/sizeof(*stackalloc) ?
-    (char *)malloc(needed_length) : stackalloc;
-  UrlEncode(decoded.c_str(), buf, needed_length);
-  std::string result(buf);
-  if (buf != stackalloc) {
-    free(buf);
-  }
-  return result;
-}
-
-string FD::Escape(const string& s) {
-  return UrlEncodeString(s);
-}
-
diff --git a/decoder/fdict.h b/decoder/fdict.h
deleted file mode 100644
index f9673023..00000000
--- a/decoder/fdict.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef _FDICT_H_
-#define _FDICT_H_
-
-#include <string>
-#include <vector>
-#include "dict.h"
-
-struct FD {
-  // once the FD is frozen, new features not already in the
-  // dictionary will return 0
-  static void Freeze() {
-    frozen_ = true;
-  }
-  static inline int NumFeats() {
-    return dict_.max() + 1;
-  }
-  static inline WordID Convert(const std::string& s) {
-    return dict_.Convert(s, frozen_);
-  }
-  static inline const std::string& Convert(const WordID& w) {
-    return dict_.Convert(w);
-  }
-  static std::string Convert(WordID const *i,WordID const* e);
-  static std::string Convert(std::vector<WordID> const& v);
-
-  // Escape any string to a form that can be used as the name
-  // of a weight in a weights file
-  static std::string Escape(const std::string& s);
-  static Dict dict_;
- private:
-  static bool frozen_;
-};
-
-#endif
diff --git a/decoder/feature_vector.h b/decoder/feature_vector.h
deleted file mode 100755
index be378a6a..00000000
--- a/decoder/feature_vector.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef _FEATURE_VECTOR_H_
-#define _FEATURE_VECTOR_H_
-
-#include <vector>
-#include "sparse_vector.h"
-#include "fdict.h"
-
-typedef double Featval;
-typedef SparseVectorList<Featval> FeatureVectorList;
-typedef SparseVector<Featval> FeatureVector;
-typedef SparseVector<Featval> WeightVector;
-typedef std::vector<Featval> DenseWeightVector;
-
-inline void sparse_to_dense(WeightVector const& wv,DenseWeightVector *dv) {
-  wv.init_vector(dv);
-}
-
-#endif
diff --git a/decoder/ff_bleu.cc b/decoder/ff_bleu.cc
index 77989331..aa4e6d85 100644
--- a/decoder/ff_bleu.cc
+++ b/decoder/ff_bleu.cc
@@ -18,7 +18,7 @@ char const* bleu_usage_verbose="Uses feature id 0!  Make sure there are no other
 #include "hg.h"
 #include "stringlib.h"
 #include "sentence_metadata.h"
-#include "../vest/scorer.h"
+#include "scorer.h"
 
 using namespace std;
 
diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc
index f3e65cb7..a9929253 100644
--- a/decoder/ff_lm.cc
+++ b/decoder/ff_lm.cc
@@ -728,7 +728,7 @@ LanguageModelRandLM::LanguageModelRandLM(const string& param) :
       filename = argv[0];
     }
   }
-  set_order(order);
+//  set_order(order);
   int cache_MB = 200; // increase cache size
   randlm::RandLM* rlm = randlm::RandLM::initRandLM(filename, order, cache_MB);
   assert(rlm != NULL);
diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc
index 0ba2bf92..087bff0c 100644
--- a/decoder/ff_wordalign.cc
+++ b/decoder/ff_wordalign.cc
@@ -5,6 +5,7 @@
 #include <string>
 #include <cmath>
 
+#include "alignment_pharaoh.h"
 #include "stringlib.h"
 #include "sentence_metadata.h"
 #include "hg.h"
@@ -354,7 +355,7 @@ AlignerResults::AlignerResults(const std::string& param) :
     getline(in, line);
     if (!in) break;
     ++lc;
-    is_aligned_.push_back(AlignerTools::ReadPharaohAlignmentGrid(line));
+    is_aligned_.push_back(AlignmentPharaoh::ReadPharaohAlignmentGrid(line));
   }
   cerr << "  Loaded " << lc << " refs\n";
 }
diff --git a/decoder/filelib.cc b/decoder/filelib.cc
deleted file mode 100644
index 79ad2847..00000000
--- a/decoder/filelib.cc
+++ /dev/null
@@ -1,22 +0,0 @@
-#include "filelib.h"
-
-#include <unistd.h>
-#include <sys/stat.h>
-
-using namespace std;
-
-bool FileExists(const std::string& fn) {
-  struct stat info;
-  int s = stat(fn.c_str(), &info);
-  return (s==0);
-}
-
-bool DirectoryExists(const string& dir) {
-  if (access(dir.c_str(),0) == 0) {
-    struct stat status;
-    stat(dir.c_str(), &status);
-    if (status.st_mode & S_IFDIR) return true;
-  }
-  return false;
-}
-
diff --git a/decoder/filelib.h b/decoder/filelib.h
deleted file mode 100644
index b9fef9a7..00000000
--- a/decoder/filelib.h
+++ /dev/null
@@ -1,106 +0,0 @@
-#ifndef _FILELIB_H_
-#define _FILELIB_H_
-
-#include <cassert>
-#include <string>
-#include <iostream>
-#include <cstdlib>
-#include <boost/shared_ptr.hpp>
-#include <stdexcept>
-#include "gzstream.h"
-#include "null_deleter.h"
-
-bool FileExists(const std::string& file_name);
-bool DirectoryExists(const std::string& dir_name);
-
-// reads from standard in if filename is -
-// uncompresses if file ends with .gz
-// otherwise, reads from a normal file
-
-template <class Stream>
-struct BaseFile {
-  typedef Stream S;
-  typedef boost::shared_ptr<Stream> PS;
-  void Reset() {
-    ps_.reset();
-  }
-  bool is_null() const { return !ps_; }
-  operator bool() const {
-    return ps_;
-  }
-  S* stream() { return ps_.get(); }
-  S* operator->() { return ps_.get(); } // compat with old ReadFile * -> new Readfile. remove?
-  S &operator *() const { return get(); }
-  S &get() const { return *ps_; }
-  bool is_std() {
-    return filename_=="-";
-  }
-  std::string filename_;
-protected:
-  void error(std::string const& reason,std::string const& filename) {
-    throw std::runtime_error("File "+filename+" - "+reason);
-  }
-
-  PS ps_;
-  static bool EndsWith(const std::string& f, const std::string& suf) {
-    return (f.size() > suf.size()) && (f.rfind(suf) == f.size() - suf.size());
-  }
-};
-
-class ReadFile : public BaseFile<std::istream> {
- public:
-  ReadFile() {  }
-  explicit ReadFile(const std::string& filename) {
-    Init(filename);
-  }
-  void Init(const std::string& filename) {
-    filename_=filename;
-    if (is_std()) {
-      ps_=PS(&std::cin,null_deleter());
-    } else {
-      if (!FileExists(filename)) {
-        std::cerr << "File does not exist: " << filename << std::endl;
-        error(filename," couldn't read nonexistant file.");
-        abort();
-      }
-      char const* file=filename_.c_str(); // just in case the gzstream keeps using the filename for longer than the constructor, e.g. inflateReset2.  warning in valgrind that I'm hoping will disappear - it makes no sense.
-      ps_=PS(EndsWith(filename, ".gz") ?
-                static_cast<std::istream*>(new igzstream(file)) :
-             static_cast<std::istream*>(new std::ifstream(file)));
-      if (!*ps_) {
-        std::cerr << "Failed to open " << filename << std::endl;
-        error(filename," open for reading failed.");
-        abort();
-      }
-    }
-  }
-
-};
-
-class WriteFile : public BaseFile<std::ostream> {
- public:
-  WriteFile() {}
-  explicit WriteFile(std::string const& filename) { Init(filename); }
-  void Init(const std::string& filename) {
-    filename_=filename;
-    if (is_std()) {
-      ps_=PS(&std::cout,null_deleter());
-    } else {
-      char const* file=filename_.c_str(); // just in case the gzstream keeps using the filename for longer than the constructor, e.g. inflateReset2.  warning in valgrind that I'm hoping will disappear - it makes no sense.
-      ps_=PS(EndsWith(filename, ".gz") ?
-                static_cast<std::ostream*>(new ogzstream(file)) :
-                static_cast<std::ostream*>(new std::ofstream(file)));
-      if (!*ps_) {
-        std::cerr << "Failed to open " << filename << std::endl;
-        error(filename," open for writing failed.");
-        abort();
-      }
-    }
-  }
-  ~WriteFile() {
-    if (ps_)
-      get() << std::flush;
-  }
-};
-
-#endif
diff --git a/decoder/gzstream.cc b/decoder/gzstream.cc
deleted file mode 100644
index 88cd1bd2..00000000
--- a/decoder/gzstream.cc
+++ /dev/null
@@ -1,182 +0,0 @@
-// ============================================================================
-// gzstream, C++ iostream classes wrapping the zlib compression library.
-// Copyright (C) 2001  Deepak Bandyopadhyay, Lutz Kettner
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-// Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-// ============================================================================
-//
-// File          : gzstream.C
-// Revision      : $Revision: 1.7 $
-// Revision_date : $Date: 2003/01/08 14:41:27 $
-// Author(s)     : Deepak Bandyopadhyay, Lutz Kettner
-//
-// Standard streambuf implementation following Nicolai Josuttis, "The
-// Standard C++ Library".
-// ============================================================================
-
-#include <gzstream.h>
-#include <iostream>
-#include <cstring>  // for memcpy
-#include <stdexcept>
-
-#ifdef GZSTREAM_NAMESPACE
-namespace GZSTREAM_NAMESPACE {
-#endif
-
-// ----------------------------------------------------------------------------
-// Internal classes to implement gzstream. See header file for user classes.
-// ----------------------------------------------------------------------------
-
-// --------------------------------------
-// class gzstreambuf:
-// --------------------------------------
-
-gzstreambuf* gzstreambuf::open( const char* name, int open_mode) {
-    if ( is_open())
-        return (gzstreambuf*)0;
-    mode = open_mode;
-    // no append nor read/write mode
-    if ((mode & std::ios::ate) || (mode & std::ios::app)
-        || ((mode & std::ios::in) && (mode & std::ios::out)))
-        return (gzstreambuf*)0;
-    const int Nmode=10;
-    char  fmode[Nmode];
-    char* fmodeptr = fmode;
-    if ( mode & std::ios::in)
-        *fmodeptr++ = 'r';
-    else if ( mode & std::ios::out)
-        *fmodeptr++ = 'w';
-    *fmodeptr++ = 'b';
-    while (fmodeptr<fmode+Nmode) // hopefully wil help valgrind
-      *fmodeptr++ = '\0';
-    file = gzopen( name, fmode);
-    if (!file) handle_gzerror();
-    if (file == 0)
-        return (gzstreambuf*)0;
-    opened = 1;
-    return this;
-}
-
-gzstreambuf * gzstreambuf::close() {
-    if ( is_open()) {
-        sync();
-        opened = 0;
-        if ( gzclose( file) == Z_OK)
-            return this;
-        else
-            handle_gzerror();
-    }
-    return (gzstreambuf*)0;
-}
-
-void gzstreambuf::handle_gzerror() {
-    int errnum;
-    const char *errmsg=gzerror(file,&errnum);
-    if (errnum==Z_DATA_ERROR) errmsg="CRC error reading gzip";
-    throw std::runtime_error(std::string("gzstreambuf error: ")+errmsg);
-}
-
-int gzstreambuf::underflow() { // used for input buffer only
-    if ( gptr() && ( gptr() < egptr()))
-        return * reinterpret_cast<unsigned char *>( gptr());
-
-    if ( ! (mode & std::ios::in) || ! opened)
-        return EOF;
-    // Josuttis' implementation of inbuf
-    int n_putback = gptr() - eback();
-    if ( n_putback > 4)
-        n_putback = 4;
-    std::memcpy( buffer + (4 - n_putback), gptr() - n_putback, n_putback);
-
-    int num = gzread( file, buffer+4, bufferSize-4);
-    if (num <= 0) // ERROR or EOF
-    {
-        if (gzeof(file))
-            return EOF;
-        handle_gzerror();
-    }
-
-    // reset buffer pointers
-    setg( buffer + (4 - n_putback),   // beginning of putback area
-          buffer + 4,                 // read position
-          buffer + 4 + num);          // end of buffer
-
-    // return next character
-    return * reinterpret_cast<unsigned char *>( gptr());
-}
-
-int gzstreambuf::flush_buffer() {
-    // Separate the writing of the buffer from overflow() and
-    // sync() operation.
-    int w = pptr() - pbase();
-    if ( gzwrite( file, pbase(), w) != w)
-        handle_gzerror();
-    pbump( -w);
-    return w;
-}
-
-int gzstreambuf::overflow( int c) { // used for output buffer only
-    if ( ! ( mode & std::ios::out) || ! opened)
-        return EOF;
-    if (c != EOF) {
-        *pptr() = c;
-        pbump(1);
-    }
-    if ( flush_buffer() == EOF)
-        return EOF;
-    return c;
-}
-
-int gzstreambuf::sync() {
-    // Changed to use flush_buffer() instead of overflow( EOF)
-    // which caused improper behavior with std::endl and flush(),
-    // bug reported by Vincent Ricard.
-    if ( pptr() && pptr() > pbase()) {
-        if ( flush_buffer() == EOF)
-            return -1;
-    }
-    return 0;
-}
-
-// --------------------------------------
-// class gzstreambase:
-// --------------------------------------
-
-gzstreambase::gzstreambase( const char* name, int mode) {
-    init( &buf);
-    open( name, mode);
-}
-
-gzstreambase::~gzstreambase() {
-    buf.close();
-}
-
-void gzstreambase::open( const char* name, int open_mode) {
-    if ( ! buf.open( name, open_mode))
-        clear( rdstate() | std::ios::badbit);
-}
-
-void gzstreambase::close() {
-    if ( buf.is_open())
-        if ( ! buf.close())
-            clear( rdstate() | std::ios::badbit);
-}
-
-#ifdef GZSTREAM_NAMESPACE
-} // namespace GZSTREAM_NAMESPACE
-#endif
-
-// ============================================================================
-// EOF //
diff --git a/decoder/gzstream.h b/decoder/gzstream.h
deleted file mode 100644
index a7effd90..00000000
--- a/decoder/gzstream.h
+++ /dev/null
@@ -1,127 +0,0 @@
-// ============================================================================
-// gzstream, C++ iostream classes wrapping the zlib compression library.
-// Copyright (C) 2001  Deepak Bandyopadhyay, Lutz Kettner
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-// Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-// ============================================================================
-//
-// File          : gzstream.h
-// Revision      : $Revision: 1.5 $
-// Revision_date : $Date: 2002/04/26 23:30:15 $
-// Author(s)     : Deepak Bandyopadhyay, Lutz Kettner
-//
-// Standard streambuf implementation following Nicolai Josuttis, "The
-// Standard C++ Library".
-// ============================================================================
-
-#ifndef GZSTREAM_H
-#define GZSTREAM_H 1
-
-// standard C++ with new header file names and std:: namespace
-#include <iostream>
-#include <fstream>
-#include <zlib.h>
-
-#ifdef GZSTREAM_NAMESPACE
-namespace GZSTREAM_NAMESPACE {
-#endif
-
-// ----------------------------------------------------------------------------
-// Internal classes to implement gzstream. See below for user classes.
-// ----------------------------------------------------------------------------
-
-class gzstreambuf : public std::streambuf {
-private:
-  static const int bufferSize = 47+(1024*256);    // size of data buff
-    // totals 512 bytes under g++ for igzstream at the end.
-
-    gzFile           file;               // file handle for compressed file
-    char             buffer[bufferSize]; // data buffer
-    char             opened;             // open/close state of stream
-    int              mode;               // I/O mode
-
-    int flush_buffer();
-    void handle_gzerror(); // throws exception
-public:
-#if defined(_WIN32) && !defined(CYGWIN) && !defined(EOF)
-	enum {
-		EOF = -1
-	};
-#endif
-    gzstreambuf() : opened(0) {
-        setp( buffer, buffer + (bufferSize-1));
-        setg( buffer + 4,     // beginning of putback area
-              buffer + 4,     // read position
-              buffer + 4);    // end position
-        // ASSERT: both input & output capabilities will not be used together
-    }
-    int is_open() { return opened; }
-    gzstreambuf* open( const char* name, int open_mode);
-    gzstreambuf* close();
-    ~gzstreambuf() { close(); }
-
-    virtual int     overflow( int c = EOF);
-    virtual int     underflow();
-    virtual int     sync();
-};
-
-class gzstreambase : virtual public std::ios {
-protected:
-    gzstreambuf buf;
-public:
-    gzstreambase() { init(&buf); }
-    gzstreambase( const char* name, int open_mode);
-    ~gzstreambase();
-    void open( const char* name, int open_mode);
-    void close();
-    gzstreambuf* rdbuf() { return &buf; }
-};
-
-// ----------------------------------------------------------------------------
-// User classes. Use igzstream and ogzstream analogously to ifstream and
-// ofstream respectively. They read and write files based on the gz*
-// function interface of the zlib. Files are compatible with gzip compression.
-// ----------------------------------------------------------------------------
-
-class igzstream : public gzstreambase, public std::istream {
-public:
-    igzstream() : std::istream( &buf) {}
-    igzstream( const char* name, int open_mode = std::ios::in)
-        : gzstreambase( name, std::ios::in | open_mode), std::istream( &buf) {}
-    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
-    void open( const char* name, int open_mode = std::ios::in) {
-        gzstreambase::open( name, open_mode);
-    }
-};
-
-class ogzstream : public gzstreambase, public std::ostream {
-public:
-    ogzstream() : std::ostream( &buf) {}
-    ogzstream( const char* name, int mode = std::ios::out)
-        : gzstreambase( name, mode), std::ostream( &buf) {}
-    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
-    void open( const char* name, int open_mode = std::ios::out) {
-        gzstreambase::open( name, open_mode);
-    }
-};
-
-#ifdef GZSTREAM_NAMESPACE
-} // namespace GZSTREAM_NAMESPACE
-#endif
-
-#endif // GZSTREAM_H
-// ============================================================================
-// EOF //
-
diff --git a/decoder/hash.h b/decoder/hash.h
deleted file mode 100755
index 3a60a429..00000000
--- a/decoder/hash.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#ifndef CDEC_HASH_H
-#define CDEC_HASH_H
-
-#include "murmur_hash.h"
-
-#include "config.h"
-#ifdef HAVE_SPARSEHASH
-# include <google/dense_hash_map>
-# define HASH_MAP google::dense_hash_map
-# define HASH_MAP_RESERVED(h,empty,deleted) do { h.set_empty_key(empty); h.set_deleted_key(deleted); } while(0)
-# define HASH_MAP_EMPTY(h,empty) do { h.set_empty_key(empty); } while(0)
-#else
-# include <tr1/unordered_map>
-# define HASH_MAP std::tr1::unordered_map
-# define HASH_MAP_RESERVED(h,empty,deleted)
-# define HASH_MAP_EMPTY(h,empty)
-#endif
-
-#include <boost/functional/hash.hpp>
-
-// assumes C is POD
-template <class C>
-struct murmur_hash
-{
-  typedef MurmurInt return_type;
-  typedef C /*const&*/ argument_type;
-  return_type operator()(argument_type const& c) const {
-    return MurmurHash((void*)&c,sizeof(c));
-  }
-};
-
-// murmur_hash_array isn't std guaranteed safe (you need to use string::data())
-template <>
-struct murmur_hash<std::string>
-{
-  typedef MurmurInt return_type;
-  typedef std::string /*const&*/ argument_type;
-  return_type operator()(argument_type const& c) const {
-    return MurmurHash(c.data(),c.size());
-  }
-};
-
-// uses begin(),size() assuming contiguous layout and POD
-template <class C>
-struct murmur_hash_array
-{
-  typedef MurmurInt return_type;
-  typedef C /*const&*/ argument_type;
-  return_type operator()(argument_type const& c) const {
-    return MurmurHash(&*c.begin(),c.size()*sizeof(*c.begin()));
-  }
-};
-
-#endif
diff --git a/decoder/have_64_bits.h b/decoder/have_64_bits.h
deleted file mode 100755
index d1e6064f..00000000
--- a/decoder/have_64_bits.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef HAVE_64_BITS_H
-#define HAVE_64_BITS_H
-
-#include <stdint.h>
-
-#undef HAVE_64_BITS
-
-#if INTPTR_MAX == INT32_MAX
-# define HAVE_64_BITS 0
-#elif INTPTR_MAX >= INT64_MAX
-# define HAVE_64_BITS 1
-#else
-# error "couldn't tell if HAVE_64_BITS from INTPTR_MAX INT32_MAX INT64_MAX"
-#endif
-
-
-#endif
diff --git a/decoder/hg.h b/decoder/hg.h
index d5c8e197..e9510997 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -102,6 +102,8 @@ public:
     void copy_info(Edge const& o) {
 #if USE_INFO_EDGE
       set_info(o.info_.str()); // by convention, each person putting info here starts with a separator (e.g. space).  it's empty if nobody put any info there.
+#else
+      (void) o;
 #endif
     }
     void copy_pod(Edge const& o) {
@@ -142,7 +144,7 @@ public:
 #else
     std::string info() const { return std::string(); }
     void reset_info() {  }
-    void set_info(std::string const& s) {  }
+    void set_info(std::string const& ) {  }
 #endif
     void show(std::ostream &o,unsigned mask=SPAN|RULE) const {
       o<<'{';
diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc
index 52a8565a..1af8261e 100644
--- a/decoder/hg_io.cc
+++ b/decoder/hg_io.cc
@@ -622,56 +622,3 @@ void HypergraphIO::WriteAsCFG(const Hypergraph& hg) {
   }
 }
 
-namespace B64 {
-
-static const char cb64[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-static const char cd64[]="|$$$}rstuvwxyz{$$$$$$$>?@ABCDEFGHIJKLMNOPQRSTUVW$$$$$$XYZ[\\]^_`abcdefghijklmnopq";
-
-static void encodeblock(const unsigned char* in, ostream* os, int len) {
-  char out[4];
-  out[0] = cb64[ in[0] >> 2 ];
-  out[1] = cb64[ ((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4) ];
-  out[2] = (len > 1 ? cb64[ ((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6) ] : '=');
-  out[3] = (len > 2 ? cb64[ in[2] & 0x3f ] : '=');
-  os->write(out, 4);
-}
-
-void b64encode(const char* data, const size_t size, ostream* out) {
-  size_t cur = 0;
-  while(cur < size) {
-    int len = min(static_cast<size_t>(3), size - cur);
-    encodeblock(reinterpret_cast<const unsigned char*>(&data[cur]), out, len);
-    cur += len;
-  }
-}
-
-static void decodeblock(const unsigned char* in, unsigned char* out) {
-  out[0] = (unsigned char ) (in[0] << 2 | in[1] >> 4);
-  out[1] = (unsigned char ) (in[1] << 4 | in[2] >> 2);
-  out[2] = (unsigned char ) (((in[2] << 6) & 0xc0) | in[3]);
-}
-
-bool b64decode(const unsigned char* data, const size_t insize, char* out, const size_t outsize) {
-  size_t cur = 0;
-  size_t ocur = 0;
-  unsigned char in[4];
-  while(cur < insize) {
-    assert(ocur < outsize);
-    for (int i = 0; i < 4; ++i) {
-      unsigned char v = data[cur];
-      v = (unsigned char) ((v < 43 || v > 122) ? '\0' : cd64[ v - 43 ]);
-      if (!v) {
-        cerr << "B64 decode error at offset " << cur << " offending character: " << (int)data[cur] << endl;
-        return false;
-      }
-      v = (unsigned char) ((v == '$') ? '\0' : v - 61);
-      if (v) in[i] = v - 1; else in[i] = 0;
-      ++cur;
-    }
-    decodeblock(in, reinterpret_cast<unsigned char*>(&out[ocur]));
-    ocur += 3;
-  }
-  return true;
-}
-}
-
diff --git a/decoder/hg_io.h b/decoder/hg_io.h
index b6a176ab..082489d8 100644
--- a/decoder/hg_io.h
+++ b/decoder/hg_io.h
@@ -31,9 +31,4 @@ struct HypergraphIO {
   static std::string Escape(const std::string& s);  // PLF helper
 };
 
-namespace B64 {
-  bool b64decode(const unsigned char* data, const size_t insize, char* out, const size_t outsize);
-  void b64encode(const char* data, const size_t size, std::ostream* out);
-}
-
 #endif
diff --git a/decoder/int_or_pointer.h b/decoder/int_or_pointer.h
deleted file mode 100755
index 4b6a9e4a..00000000
--- a/decoder/int_or_pointer.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#ifndef INT_OR_POINTER_H
-#define INT_OR_POINTER_H
-
-// if you ever wanted to store a discriminated union of pointer/integer without an extra boolean flag, this will do it, assuming your pointers are never odd.
-
-// check lsb for expected tag?
-#ifndef IOP_CHECK_LSB
-# define IOP_CHECK_LSB 1
-#endif
-#if IOP_CHECK_LSB
-# define iop_assert(x) assert(x)
-#else
-# define iop_assert(x)
-#endif
-
-#include <assert.h>
-#include <iostream>
-
-template <class Pointed=void,class Int=size_t>
-struct IntOrPointer {
-  typedef Pointed pointed_type;
-  typedef Int integer_type;
-  typedef Pointed *value_type;
-  typedef IntOrPointer<Pointed,Int> self_type;
-  IntOrPointer(int j) { *this=j; }
-  IntOrPointer(size_t j) { *this=j; }
-  IntOrPointer(value_type v) { *this=v; }
-  bool is_integer() const { return i&1; }
-  bool is_pointer() const { return !(i&1); }
-  value_type & pointer() { return p; }
-  const value_type & pointer() const { iop_assert(is_pointer()); return p; }
-  integer_type integer() const { iop_assert(is_integer()); return i >> 1; }
-  void set_integer(Int j) { i=2*j+1; }
-  void set_pointer(value_type p_) { p=p_;iop_assert(is_pointer()); }
-  void operator=(unsigned j) { i = 2*(integer_type)j+1; }
-  void operator=(int j) { i = 2*(integer_type)j+1; }
-  template <class C>
-  void operator=(C j) { i = 2*(integer_type)j+1; }
-  void operator=(value_type v) { p=v; }
-  IntOrPointer() {}
-  IntOrPointer(const self_type &s) : p(s.p) {}
-  void operator=(const self_type &s) { p=s.p; }
-  template <class C>
-  bool operator ==(C* v) const { return p==v; }
-  template <class C>
-  bool operator ==(const C* v) const { return p==v; }
-  template <class C>
-  bool operator ==(C j) const { return integer() == j; }
-  bool operator ==(self_type s) const { return p==s.p; }
-  bool operator !=(self_type s) const { return p!=s.p; }
-  template <class O> void print(O&o) const
-  {
-    if (is_integer())
-      o << integer();
-    else {
-      o << "0x" << std::hex << (size_t)pointer() << std::dec;
-    }
-  }
-  friend inline std::ostream& operator<<(std::ostream &o,self_type const& s) {
-    s.print(o); return o;
-  }
-protected:
-  union {
-    value_type p; // must be even (guaranteed unless you're pointing at packed chars)
-    integer_type i; // stored as 2*data+1, so only has half the range (one less bit) of a normal integer_type
-  };
-};
-
-
-#endif
diff --git a/decoder/intrusive_refcount.hpp b/decoder/intrusive_refcount.hpp
deleted file mode 100755
index 4a4b0187..00000000
--- a/decoder/intrusive_refcount.hpp
+++ /dev/null
@@ -1,84 +0,0 @@
-#ifndef GRAEHL__SHARED__INTRUSIVE_REFCOUNT_HPP
-#define GRAEHL__SHARED__INTRUSIVE_REFCOUNT_HPP
-
-#include <boost/intrusive_ptr.hpp>
-#include <boost/noncopyable.hpp>
-#include <boost/detail/atomic_count.hpp>
-#include <cassert>
-
-/** usage:
-    struct mine : public boost::instrusive_refcount<mine> {};
-
-    boost::intrusive_ptr<mine> p(new mine());
-*/
-
-namespace boost {
-// note: the free functions need to be in boost namespace, OR namespace of involved type.  this is the only way to do it.
-
-template <class T>
-class intrusive_refcount;
-
-template <class T>
-class atomic_intrusive_refcount;
-
-template<class T>
-void intrusive_ptr_add_ref(intrusive_refcount<T>* ptr)
-{
-    ++(ptr->refs);
-}
-
-template<class T>
-void intrusive_ptr_release(intrusive_refcount<T>* ptr)
-{
-    if (!--(ptr->refs)) delete static_cast<T*>(ptr);
-}
-
-
-//WARNING: only 2^32 (unsigned) refs allowed.  hope that's ok :)
-template<class T>
-class intrusive_refcount : boost::noncopyable
-{
- protected:
-//    typedef intrusive_refcount<T> pointed_type;
-    friend void intrusive_ptr_add_ref<T>(intrusive_refcount<T>* ptr);
-    friend void intrusive_ptr_release<T>(intrusive_refcount<T>* ptr);
-//    friend class intrusive_ptr<T>;
-
-    intrusive_refcount(): refs(0) {}
-    ~intrusive_refcount() { assert(refs==0); }
-
-private:
-    unsigned refs;
-};
-
-
-template<class T>
-void intrusive_ptr_add_ref(atomic_intrusive_refcount<T>* ptr)
-{
-    ++(ptr->refs);
-}
-
-template<class T>
-void intrusive_ptr_release(atomic_intrusive_refcount<T>* ptr)
-{
-    if(!--(ptr->refs)) delete static_cast<T*>(ptr);
-}
-
-template<class T>
-class atomic_intrusive_refcount : boost::noncopyable
-{
- protected:
-    friend void intrusive_ptr_add_ref<T>(atomic_intrusive_refcount<T>* ptr);
-    friend void intrusive_ptr_release<T>(atomic_intrusive_refcount<T>* ptr);
-
-    atomic_intrusive_refcount(): refs(0) {}
-    ~atomic_intrusive_refcount() { assert(refs==0); }
-
-private:
-    boost::detail::atomic_count refs;
-};
-
-}
-
-
-#endif
diff --git a/decoder/logval.h b/decoder/logval.h
deleted file mode 100644
index 37f14ae5..00000000
--- a/decoder/logval.h
+++ /dev/null
@@ -1,174 +0,0 @@
-#ifndef LOGVAL_H_
-#define LOGVAL_H_
-
-#define LOGVAL_CHECK_NEG false
-
-#include <iostream>
-#include <cstdlib>
-#include <cmath>
-#include <limits>
-
-template <typename T>
-class LogVal {
- public:
-  LogVal() : s_(), v_(-std::numeric_limits<T>::infinity()) {}
-  explicit LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {}
-  LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {}
-  LogVal(unsigned x) : s_(0), v_(std::log(x)) { }
-  LogVal(double lnx,bool sign) : s_(sign),v_(lnx) {}
-  static LogVal<T> exp(T lnx) { return LogVal(lnx,false); }
-
-  static LogVal<T> One() { return LogVal(1); }
-  static LogVal<T> Zero() { return LogVal(); }
-  static LogVal<T> e() { return LogVal(1,false); }
-  void logeq(const T& v) { s_ = false; v_ = v; }
-
-  LogVal& operator+=(const LogVal& a) {
-    if (a.v_ == -std::numeric_limits<T>::infinity()) return *this;
-    if (a.s_ == s_) {
-      if (a.v_ < v_) {
-        v_ = v_ + log1p(std::exp(a.v_ - v_));
-      } else {
-        v_ = a.v_ + log1p(std::exp(v_ - a.v_));
-      }
-    } else {
-      if (a.v_ < v_) {
-        v_ = v_ + log1p(-std::exp(a.v_ - v_));
-      } else {
-        v_ = a.v_ + log1p(-std::exp(v_ - a.v_));
-        s_ = !s_;
-      }
-    }
-    return *this;
-  }
-
-  LogVal& operator*=(const LogVal& a) {
-    s_ = (s_ != a.s_);
-    v_ += a.v_;
-    return *this;
-  }
-
-  LogVal& operator/=(const LogVal& a) {
-    s_ = (s_ != a.s_);
-    v_ -= a.v_;
-    return *this;
-  }
-
-  LogVal& operator-=(const LogVal& a) {
-    LogVal b = a;
-    b.invert();
-    return *this += b;
-  }
-
-  // LogVal(fabs(log(x)),x.s_)
-  friend LogVal abslog(LogVal x) {
-    if (x.v_<0) x.v_=-x.v_;
-    return x;
-  }
-
-  LogVal& poweq(const T& power) {
-#if LOGVAL_CHECK_NEG
-    if (s_) {
-      std::cerr << "poweq(T) not implemented when s_ is true\n";
-      std::abort();
-    } else
-#endif
-      v_ *= power;
-    return *this;
-  }
-
-  void invert() { s_ = !s_; }
-
-  LogVal pow(const T& power) const {
-    LogVal res = *this;
-    res.poweq(power);
-    return res;
-  }
-
-  LogVal root(const T& root) const {
-    return pow(1/root);
-  }
-
-  operator T() const {
-    if (s_) return -std::exp(v_); else return std::exp(v_);
-  }
-
-  bool s_;
-  T v_;
-};
-
-// copy elision - as opposed to explicit copy of LogVal<T> const& o1, we should be able to construct Logval r=a+(b+c) as a single result in place in r.  todo: return std::move(o1) - C++0x
-template<typename T>
-LogVal<T> operator+(LogVal<T> o1, const LogVal<T>& o2) {
-  o1 += o2;
-  return o1;
-}
-
-template<typename T>
-LogVal<T> operator*(LogVal<T> o1, const LogVal<T>& o2) {
-  o1 *= o2;
-  return o1;
-}
-
-template<typename T>
-LogVal<T> operator/(LogVal<T> o1, const LogVal<T>& o2) {
-  o1 /= o2;
-  return o1;
-}
-
-template<typename T>
-LogVal<T> operator-(LogVal<T> o1, const LogVal<T>& o2) {
-  o1 -= o2;
-  return o1;
-}
-
-template<typename T>
-T log(const LogVal<T>& o) {
-#ifdef LOGVAL_CHECK_NEG
-  if (o.s_) return log(-1.0);
-#endif
-  return o.v_;
-}
-
-template <typename T>
-LogVal<T> pow(const LogVal<T>& b, const T& e) {
-  return b.pow(e);
-}
-
-template <typename T>
-bool operator<(const LogVal<T>& lhs, const LogVal<T>& rhs) {
-  if (lhs.s_ == rhs.s_) {
-    return (lhs.v_ < rhs.v_);
-  } else {
-    return lhs.s_ > rhs.s_;
-  }
-}
-
-#if 0
-template <typename T>
-bool operator<=(const LogVal<T>& lhs, const LogVal<T>& rhs) {
-  return (lhs.v_ <= rhs.v_);
-}
-
-template <typename T>
-bool operator>(const LogVal<T>& lhs, const LogVal<T>& rhs) {
-  return (lhs.v_ > rhs.v_);
-}
-
-template <typename T>
-bool operator>=(const LogVal<T>& lhs, const LogVal<T>& rhs) {
-  return (lhs.v_ >= rhs.v_);
-}
-#endif
-
-template <typename T>
-bool operator==(const LogVal<T>& lhs, const LogVal<T>& rhs) {
-  return (lhs.v_ == rhs.v_) && (lhs.s_ == rhs.s_);
-}
-
-template <typename T>
-bool operator!=(const LogVal<T>& lhs, const LogVal<T>& rhs) {
-  return !(lhs == rhs);
-}
-
-#endif
diff --git a/decoder/logval_test.cc b/decoder/logval_test.cc
deleted file mode 100644
index 1a23177d..00000000
--- a/decoder/logval_test.cc
+++ /dev/null
@@ -1,73 +0,0 @@
-#include "logval.h"
-
-#include <gtest/gtest.h>
-#include <iostream>
-
-class LogValTest : public testing::Test {
- protected:
-  virtual void SetUp() { }
-  virtual void TearDown() { }
-};
-
-using namespace std;
-
-TEST_F(LogValTest,Order) {
-  LogVal<double> a(-0.3);
-  LogVal<double> b(0.3);
-  LogVal<double> c(2.4);
-  EXPECT_LT(a,b);
-  EXPECT_LT(b,c);
-  EXPECT_LT(a,c);
-  EXPECT_FALSE(b < a);
-  EXPECT_FALSE(c < a);
-  EXPECT_FALSE(c < b);
-  EXPECT_FALSE(c < c);
-  EXPECT_FALSE(b < b);
-  EXPECT_FALSE(a < a);
-}
-
-TEST_F(LogValTest,Invert) {
-  LogVal<double> x(-2.4);
-  LogVal<double> y(2.4);
-  y.invert();
-  EXPECT_FLOAT_EQ(x,y);
-}
-
-TEST_F(LogValTest,Minus) {
-  LogVal<double> x(12);
-  LogVal<double> y(2);
-  LogVal<double> z1 = x - y;
-  LogVal<double> z2 = x;
-  z2 -= y;
-  EXPECT_FLOAT_EQ(z1, z2);
-  EXPECT_FLOAT_EQ(z1, 10.0);
-  EXPECT_FLOAT_EQ(y - x, -10.0);
-}
-
-TEST_F(LogValTest,TestOps) {
-  LogVal<double> x(-12.12);
-  LogVal<double> y(x);
-  cerr << x << endl;
-  cerr << (x*y) << endl;
-  cerr << (x*y + x) << endl;
-  cerr << (x + x*y) << endl;
-  cerr << log1p(-0.5) << endl;
-  LogVal<double> aa(0.2);
-  LogVal<double> bb(-0.3);
-  cerr << (aa + bb) << endl;
-  cerr << (bb + aa) << endl;
-  EXPECT_FLOAT_EQ((aa + bb), (bb + aa));
-  EXPECT_FLOAT_EQ((aa + bb), -0.1);
-}
-
-TEST_F(LogValTest,TestSizes) {
-  cerr << sizeof(LogVal<double>) << endl;
-  cerr << sizeof(LogVal<float>) << endl;
-  cerr << sizeof(void*) << endl;
-}
-
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
-
diff --git a/decoder/murmur_hash.h b/decoder/murmur_hash.h
deleted file mode 100755
index 8dbd7807..00000000
--- a/decoder/murmur_hash.h
+++ /dev/null
@@ -1,186 +0,0 @@
-#ifndef _MURMUR_HASH_H_
-#define _MURMUR_HASH_H_
-
-//NOTE: quite fast, nice collision properties, but endian dependent hash values
-
-#include "have_64_bits.h"
-typedef uintptr_t MurmurInt;
-
-// MurmurHash2, by Austin Appleby
-
-static const uint32_t DEFAULT_SEED=2654435769U;
-
-#if HAVE_64_BITS
-//MurmurInt MurmurHash(void const *key, int len, uint32_t seed=DEFAULT_SEED);
-
-inline uint64_t MurmurHash64( const void * key, int len, unsigned int seed=DEFAULT_SEED )
-{
-  const uint64_t m = 0xc6a4a7935bd1e995;
-  const int r = 47;
-
-  uint64_t h = seed ^ (len * m);
-
-  const uint64_t * data = (const uint64_t *)key;
-  const uint64_t * end = data + (len/8);
-
-  while(data != end)
-  {
-    uint64_t k = *data++;
-
-    k *= m;
-    k ^= k >> r;
-    k *= m;
-
-    h ^= k;
-    h *= m;
-  }
-
-  const unsigned char * data2 = (const unsigned char*)data;
-
-  switch(len & 7)
-  {
-  case 7: h ^= uint64_t(data2[6]) << 48;
-  case 6: h ^= uint64_t(data2[5]) << 40;
-  case 5: h ^= uint64_t(data2[4]) << 32;
-  case 4: h ^= uint64_t(data2[3]) << 24;
-  case 3: h ^= uint64_t(data2[2]) << 16;
-  case 2: h ^= uint64_t(data2[1]) << 8;
-  case 1: h ^= uint64_t(data2[0]);
-    h *= m;
-  };
-
-  h ^= h >> r;
-  h *= m;
-  h ^= h >> r;
-
-  return h;
-}
-
-inline uint32_t MurmurHash32(void const *key, int len, uint32_t seed=DEFAULT_SEED)
-{
-  return (uint32_t) MurmurHash64(key,len,seed);
-}
-
-inline MurmurInt MurmurHash(void const *key, int len, uint32_t seed=DEFAULT_SEED)
-{
-  return MurmurHash64(key,len,seed);
-}
-
-#else
-// 32-bit
-
-// Note - This code makes a few assumptions about how your machine behaves -
-// 1. We can read a 4-byte value from any address without crashing
-// 2. sizeof(int) == 4
-inline uint32_t MurmurHash32 ( const void * key, int len, uint32_t seed=DEFAULT_SEED)
-{
-  // 'm' and 'r' are mixing constants generated offline.
-  // They're not really 'magic', they just happen to work well.
-
-  const uint32_t m = 0x5bd1e995;
-  const int r = 24;
-
-  // Initialize the hash to a 'random' value
-
-  uint32_t h = seed ^ len;
-
-  // Mix 4 bytes at a time into the hash
-
-  const unsigned char * data = (const unsigned char *)key;
-
-  while(len >= 4)
-  {
-    uint32_t k = *(uint32_t *)data;
-
-    k *= m;
-    k ^= k >> r;
-    k *= m;
-
-    h *= m;
-    h ^= k;
-
-    data += 4;
-    len -= 4;
-  }
-
-  // Handle the last few bytes of the input array
-
-  switch(len)
-  {
-  case 3: h ^= data[2] << 16;
-  case 2: h ^= data[1] << 8;
-  case 1: h ^= data[0];
-    h *= m;
-  };
-
-  // Do a few final mixes of the hash to ensure the last few
-  // bytes are well-incorporated.
-
-  h ^= h >> 13;
-  h *= m;
-  h ^= h >> 15;
-
-  return h;
-}
-
-inline MurmurInt MurmurHash ( const void * key, int len, uint32_t seed=DEFAULT_SEED) {
-  return MurmurHash32(key,len,seed);
-}
-
-// 64-bit hash for 32-bit platforms
-
-inline uint64_t MurmurHash64 ( const void * key, int len, uint32_t seed=DEFAULT_SEED)
-{
-  const uint32_t m = 0x5bd1e995;
-  const int r = 24;
-
-  uint32_t h1 = seed ^ len;
-  uint32_t h2 = 0;
-
-  const uint32_t * data = (const uint32_t *)key;
-
-  while(len >= 8)
-  {
-    uint32_t k1 = *data++;
-    k1 *= m; k1 ^= k1 >> r; k1 *= m;
-    h1 *= m; h1 ^= k1;
-    len -= 4;
-
-    uint32_t k2 = *data++;
-    k2 *= m; k2 ^= k2 >> r; k2 *= m;
-    h2 *= m; h2 ^= k2;
-    len -= 4;
-  }
-
-  if(len >= 4)
-  {
-    uint32_t k1 = *data++;
-    k1 *= m; k1 ^= k1 >> r; k1 *= m;
-    h1 *= m; h1 ^= k1;
-    len -= 4;
-  }
-
-  switch(len)
-  {
-  case 3: h2 ^= ((unsigned char*)data)[2] << 16;
-  case 2: h2 ^= ((unsigned char*)data)[1] << 8;
-  case 1: h2 ^= ((unsigned char*)data)[0];
-    h2 *= m;
-  };
-
-  h1 ^= h2 >> 18; h1 *= m;
-  h2 ^= h1 >> 22; h2 *= m;
-  h1 ^= h2 >> 17; h1 *= m;
-  h2 ^= h1 >> 19; h2 *= m;
-
-  uint64_t h = h1;
-
-  h = (h << 32) | h2;
-
-  return h;
-}
-
-#endif
-//32bit
-
-#endif
diff --git a/decoder/null_deleter.h b/decoder/null_deleter.h
deleted file mode 100755
index 082ab453..00000000
--- a/decoder/null_deleter.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef NULL_DELETER_H
-#define NULL_DELETER_H
-
-struct null_deleter {
-    void operator()(void*) const {}
-    void operator()(void const*) const {}
-};
-
-#endif
diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h
index 81a584a7..145c84d1 100755
--- a/decoder/oracle_bleu.h
+++ b/decoder/oracle_bleu.h
@@ -9,7 +9,7 @@
 #include <vector>
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
-#include "../vest/scorer.h"
+#include "scorer.h"
 #include "hg.h"
 #include "ff_factory.h"
 #include "ff_bleu.h"
diff --git a/decoder/phrasebased_translator.cc b/decoder/phrasebased_translator.cc
index 726b3f9a..d65e44d1 100644
--- a/decoder/phrasebased_translator.cc
+++ b/decoder/phrasebased_translator.cc
@@ -68,7 +68,6 @@ struct PhraseBasedTranslatorImpl {
   PhraseBasedTranslatorImpl(const boost::program_options::variables_map& conf) :
       add_pass_through_rules(conf.count("add_pass_through_rules")),
       max_distortion(conf["pb_max_distortion"].as<int>()),
-      kSOURCE_RULE(new TRule("[X] ||| [X,1] ||| [X,1]", true)),
       kCONCAT_RULE(new TRule("[X] ||| [X,1] [X,2] ||| [X,1] [X,2]", true)),
       kNT_TYPE(TD::Convert("X") * -1) {
     assert(max_distortion >= 0);
@@ -141,6 +140,8 @@ struct PhraseBasedTranslatorImpl {
         for (int i = 0; i < phrases.size(); ++i) {
           Hypergraph::Edge* edge = minus_lm_forest->AddEdge(phrases[i], Hypergraph::TailNodeVector());
           edge->feature_values_ = edge->rule_->scores_;
+          edge->i_ = s.i;
+          edge->j_ = s.j;
           minus_lm_forest->ConnectEdgeToHeadNode(edge->id_, phrase_head_index);
         }
         CoverageNodeMap::iterator cit = c.find(s.coverage);
@@ -189,7 +190,6 @@ struct PhraseBasedTranslatorImpl {
 
   const bool add_pass_through_rules;
   const int max_distortion;
-  TRulePtr kSOURCE_RULE;
   const TRulePtr kCONCAT_RULE;
   const WordID kNT_TYPE;
   boost::shared_ptr<FSTNode> fst;
diff --git a/decoder/prob.h b/decoder/prob.h
deleted file mode 100644
index bc297870..00000000
--- a/decoder/prob.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _PROB_H_
-#define _PROB_H_
-
-#include "logval.h"
-
-typedef LogVal<double> prob_t;
-
-#endif
diff --git a/decoder/sampler.h b/decoder/sampler.h
deleted file mode 100644
index 5fef45d0..00000000
--- a/decoder/sampler.h
+++ /dev/null
@@ -1,147 +0,0 @@
-#ifndef SAMPLER_H_
-#define SAMPLER_H_
-
-#include <algorithm>
-#include <functional>
-#include <numeric>
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <ctime>
-
-#include <boost/random/mersenne_twister.hpp>
-#include <boost/random/uniform_real.hpp>
-#include <boost/random/variate_generator.hpp>
-#include <boost/random/normal_distribution.hpp>
-#include <boost/random/poisson_distribution.hpp>
-#include <boost/random/uniform_int.hpp>
-
-#include "prob.h"
-
-struct SampleSet;
-
-template <typename RNG>
-struct RandomNumberGenerator {
-  static uint32_t GetTrulyRandomSeed() {
-    uint32_t seed;
-    std::ifstream r("/dev/urandom");
-    if (r) {
-      r.read((char*)&seed,sizeof(uint32_t));
-    }
-    if (r.fail() || !r) {
-      std::cerr << "Warning: could not read from /dev/urandom. Seeding from clock" << std::endl;
-      seed = std::time(NULL);
-    }
-    std::cerr << "Seeding random number sequence to " << seed << std::endl;
-    return seed;
-  }
-
-  RandomNumberGenerator() : m_dist(0,1), m_generator(), m_random(m_generator,m_dist) {
-    uint32_t seed = GetTrulyRandomSeed();
-    m_generator.seed(seed);
-  }
-  explicit RandomNumberGenerator(uint32_t seed) : m_dist(0,1), m_generator(), m_random(m_generator,m_dist) {
-    if (!seed) seed = GetTrulyRandomSeed();
-    m_generator.seed(seed);
-  }
-
-  size_t SelectSample(const prob_t& a, const prob_t& b, double T = 1.0) {
-    if (T == 1.0) {
-      if (this->next() > (a / (a + b))) return 1; else return 0;
-    } else {
-      assert(!"not implemented");
-    }
-  }
-
-  // T is the annealing temperature, if desired
-  size_t SelectSample(const SampleSet& ss, double T = 1.0);
-
-  // draw a value from U(0,1)
-  double next() {return m_random();}
-
-  // draw a value from N(mean,var)
-  double NextNormal(double mean, double var) {
-    return boost::normal_distribution<double>(mean, var)(m_random);
-  }
-
-  // draw a value from a Poisson distribution
-  // lambda must be greater than 0
-  int NextPoisson(int lambda) {
-    return boost::poisson_distribution<int>(lambda)(m_random);
-  }
-
-  bool AcceptMetropolisHastings(const prob_t& p_cur,
-                                const prob_t& p_prev,
-                                const prob_t& q_cur,
-                                const prob_t& q_prev) {
-    const prob_t a = (p_cur / p_prev) * (q_prev / q_cur);
-    if (log(a) >= 0.0) return true;
-    return (prob_t(this->next()) < a);
-  }
-
-  RNG &gen() { return m_generator; }
-  typedef boost::variate_generator<RNG&, boost::uniform_int<> > IntRNG;
-  IntRNG inclusive(int low,int high_incl) {
-    assert(high_incl>=low);
-    return IntRNG(m_generator,boost::uniform_int<>(low,high_incl));
-  }
-
- private:
-  boost::uniform_real<> m_dist;
-  RNG m_generator;
-  boost::variate_generator<RNG&, boost::uniform_real<> > m_random;
-};
-
-typedef RandomNumberGenerator<boost::mt19937> MT19937;
-
-class SampleSet {
- public:
-  const prob_t& operator[](int i) const { return m_scores[i]; }
-  prob_t& operator[](int i) { return m_scores[i]; }
-  bool empty() const { return m_scores.empty(); }
-  void add(const prob_t& s) { m_scores.push_back(s); }
-  void clear() { m_scores.clear(); }
-  size_t size() const { return m_scores.size(); }
-  void resize(int size) { m_scores.resize(size); }
-  std::vector<prob_t> m_scores;
-};
-
-template <typename RNG>
-size_t RandomNumberGenerator<RNG>::SelectSample(const SampleSet& ss, double T) {
-  assert(T > 0.0);
-  assert(ss.m_scores.size() > 0);
-  if (ss.m_scores.size() == 1) return 0;
-  const prob_t annealing_factor(1.0 / T);
-  const bool anneal = (annealing_factor != prob_t::One());
-  prob_t sum = prob_t::Zero();
-  if (anneal) {
-    for (int i = 0; i < ss.m_scores.size(); ++i)
-      sum += ss.m_scores[i].pow(annealing_factor);  // p^(1/T)
-  } else {
-    sum = std::accumulate(ss.m_scores.begin(), ss.m_scores.end(), prob_t::Zero());
-  }
-  //for (size_t i = 0; i < ss.m_scores.size(); ++i) std::cerr << ss.m_scores[i] << ",";
-  //std::cerr << std::endl;
-
-  prob_t random(this->next());    // random number between 0 and 1
-  random *= sum;                  // scale with normalization factor
-  //std::cerr << "Random number " << random << std::endl;
-
-  //now figure out which sample
-  size_t position = 1;
-  sum = ss.m_scores[0];
-  if (anneal) {
-    sum.poweq(annealing_factor);
-    for (; position < ss.m_scores.size() && sum < random; ++position)
-      sum += ss.m_scores[position].pow(annealing_factor);
-  } else {
-    for (; position < ss.m_scores.size() && sum < random; ++position)
-      sum += ss.m_scores[position];
-  }
-  //std::cout << "random: " << random <<  " sample: " << position << std::endl;
-  //std::cerr << "Sample: " << position-1 << std::endl;
-  //exit(1);
-  return position-1;
-}
-
-#endif
diff --git a/decoder/sentence_metadata.h b/decoder/sentence_metadata.h
index 21be9b21..593019c8 100644
--- a/decoder/sentence_metadata.h
+++ b/decoder/sentence_metadata.h
@@ -3,7 +3,7 @@
 
 #include <cassert>
 #include "lattice.h"
-#include "../vest/scorer.h"
+#include "scorer.h"
 
 struct SentenceMetadata {
   SentenceMetadata(int id, const Lattice& ref) :
diff --git a/decoder/small_vector.h b/decoder/small_vector.h
deleted file mode 100644
index 25c52359..00000000
--- a/decoder/small_vector.h
+++ /dev/null
@@ -1,265 +0,0 @@
-#ifndef _SMALL_VECTOR_H_
-#define _SMALL_VECTOR_H_
-
-/* REQUIRES that T is POD (can be memcpy).  won't work (yet) due to union with SMALL_VECTOR_POD==0 - may be possible to handle movable types that have ctor/dtor, by using  explicit allocation, ctor/dtor calls.  but for now JUST USE THIS FOR no-meaningful ctor/dtor POD types.
-
-   stores small element (<=SV_MAX items) vectors inline.  recommend SV_MAX=sizeof(T)/sizeof(T*)>1?sizeof(T)/sizeof(T*):1.  may not work if SV_MAX==0.
- */
-
-#define SMALL_VECTOR_POD 1
-
-#include <streambuf>  // std::max - where to get this?
-#include <cstring>
-#include <cassert>
-#include <stdint.h>
-#include <new>
-#include <stdint.h>
-//sizeof(T)/sizeof(T*)>1?sizeof(T)/sizeof(T*):1
-
-template <class T,int SV_MAX=2>
-class SmallVector {
-//  typedef unsigned short uint16_t;
- public:
-  typedef SmallVector<T,SV_MAX> Self;
-  SmallVector() : size_(0) {}
-
-  typedef T const* const_iterator;
-  typedef T* iterator;
-  typedef T value_type;
-  typedef T &reference;
-  typedef T const& const_reference;
-
-  T *begin() { return size_>SV_MAX?data_.ptr:data_.vals; }
-  T const* begin() const { return const_cast<Self*>(this)->begin(); }
-  T *end() { return begin()+size_; }
-  T const* end() const { return begin()+size_; }
-
-  explicit SmallVector(size_t s) : size_(s) {
-    assert(s < 0xA000);
-    if (s <= SV_MAX) {
-      for (int i = 0; i < s; ++i) new(&data_.vals[i]) T();
-    } else {
-      capacity_ = s;
-      size_ = s;
-      data_.ptr = new T[s]; // TODO: replace this with allocator or ::operator new(sizeof(T)*s) everywhere
-      for (int i = 0; i < size_; ++i) new(&data_.ptr[i]) T();
-    }
-  }
-
-  SmallVector(size_t s, T const& v) : size_(s) {
-    assert(s < 0xA000);
-    if (s <= SV_MAX) {
-      for (int i = 0; i < s; ++i) data_.vals[i] = v;
-    } else {
-      capacity_ = s;
-      size_ = s;
-      data_.ptr = new T[s];
-      for (int i = 0; i < size_; ++i) data_.ptr[i] = v;
-    }
-  }
-
-  SmallVector(const Self& o) : size_(o.size_) {
-    if (size_ <= SV_MAX) {
-      std::memcpy(data_.vals,o.data_.vals,size_*sizeof(T));
-//      for (int i = 0; i < size_; ++i) data_.vals[i] = o.data_.vals[i];
-    } else {
-      capacity_ = size_ = o.size_;
-      data_.ptr = new T[capacity_];
-      std::memcpy(data_.ptr, o.data_.ptr, size_ * sizeof(T));
-    }
-  }
-
-  const Self& operator=(const Self& o) {
-    if (size_ <= SV_MAX) {
-      if (o.size_ <= SV_MAX) {
-        size_ = o.size_;
-        for (int i = 0; i < SV_MAX; ++i) data_.vals[i] = o.data_.vals[i];
-      } else {
-        capacity_ = size_ = o.size_;
-        data_.ptr = new T[capacity_];
-        std::memcpy(data_.ptr, o.data_.ptr, size_ * sizeof(T));
-      }
-    } else {
-      if (o.size_ <= SV_MAX) {
-        delete[] data_.ptr;
-        size_ = o.size_;
-        for (int i = 0; i < size_; ++i) data_.vals[i] = o.data_.vals[i];
-      } else {
-        if (capacity_ < o.size_) {
-          delete[] data_.ptr;
-          capacity_ = o.size_;
-          data_.ptr = new T[capacity_];
-        }
-        size_ = o.size_;
-        for (int i = 0; i < size_; ++i)
-          data_.ptr[i] = o.data_.ptr[i];
-      }
-    }
-    return *this;
-  }
-
-  ~SmallVector() {
-    if (size_ <= SV_MAX) {
-      // skip if pod?  yes, we required pod anyway.  no need to destruct
-#if !SMALL_VECTOR_POD
-      for (int i=0;i<size_;++i) data_.vals[i].~T();
-#endif
-    } else
-      delete[] data_.ptr;
-  }
-
-  void clear() {
-    if (size_ > SV_MAX) {
-      delete[] data_.ptr;
-    }
-    size_ = 0;
-  }
-
-  bool empty() const { return size_ == 0; }
-  size_t size() const { return size_; }
-
-  inline void ensure_capacity(uint16_t min_size) {
-    assert(min_size > SV_MAX);
-    if (min_size < capacity_) return;
-    uint16_t new_cap = std::max(static_cast<uint16_t>(capacity_ << 1), min_size);
-    T* tmp = new T[new_cap];
-    std::memcpy(tmp, data_.ptr, capacity_ * sizeof(T));
-    delete[] data_.ptr;
-    data_.ptr = tmp;
-    capacity_ = new_cap;
-  }
-
-private:
-  inline void copy_vals_to_ptr() {
-    capacity_ = SV_MAX * 2;
-    T* tmp = new T[capacity_];
-    for (int i = 0; i < SV_MAX; ++i) tmp[i] = data_.vals[i];
-    data_.ptr = tmp;
-  }
-  inline void ptr_to_small() {
-    assert(size_<=SV_MAX);
-    int *tmp=data_.ptr;
-    for (int i=0;i<size_;++i)
-      data_.vals[i]=tmp[i];
-    delete[] tmp;
-  }
-
-public:
-
-  inline void push_back(T const& v) {
-    if (size_ < SV_MAX) {
-      data_.vals[size_] = v;
-      ++size_;
-      return;
-    } else if (size_ == SV_MAX) {
-      copy_vals_to_ptr();
-    } else if (size_ == capacity_) {
-      ensure_capacity(size_ + 1);
-    }
-    data_.ptr[size_] = v;
-    ++size_;
-  }
-
-  T& back() { return this->operator[](size_ - 1); }
-  const T& back() const { return this->operator[](size_ - 1); }
-  T& front() { return this->operator[](0); }
-  const T& front() const { return this->operator[](0); }
-
-  void pop_back() {
-    assert(size_>0);
-    --size_;
-    if (size_==SV_MAX)
-      ptr_to_small();
-  }
-
-  void compact() {
-    compact(size_);
-  }
-
-  // size must be <= size_ - TODO: test
-  void compact(uint16_t size) {
-    assert(size<=size_);
-    if (size_>SV_MAX) {
-      size_=size;
-      if (size<=SV_MAX)
-        ptr_to_small();
-    } else
-      size_=size;
-  }
-
-  void resize(size_t s, int v = 0) {
-    if (s <= SV_MAX) {
-      if (size_ > SV_MAX) {
-        T *tmp=data_.ptr;
-        for (int i = 0; i < s; ++i) data_.vals[i] = tmp[i];
-        delete[] tmp;
-        size_ = s;
-        return;
-      }
-      if (s <= size_) {
-        size_ = s;
-        return;
-      } else {
-        for (int i = size_; i < s; ++i)
-          data_.vals[i] = v;
-        size_ = s;
-        return;
-      }
-    } else {
-      if (size_ <= SV_MAX)
-        copy_vals_to_ptr();
-      if (s > capacity_)
-        ensure_capacity(s);
-      if (s > size_) {
-        for (int i = size_; i < s; ++i)
-          data_.ptr[i] = v;
-      }
-      size_ = s;
-    }
-  }
-
-  T& operator[](size_t i) {
-    if (size_ <= SV_MAX) return data_.vals[i];
-    return data_.ptr[i];
-  }
-
-  const T& operator[](size_t i) const {
-    if (size_ <= SV_MAX) return data_.vals[i];
-    return data_.ptr[i];
-  }
-
-  bool operator==(const Self& o) const {
-    if (size_ != o.size_) return false;
-    if (size_ <= SV_MAX) {
-      for (size_t i = 0; i < size_; ++i)
-        if (data_.vals[i] != o.data_.vals[i]) return false;
-      return true;
-    } else {
-      for (size_t i = 0; i < size_; ++i)
-        if (data_.ptr[i] != o.data_.ptr[i]) return false;
-      return true;
-    }
-  }
-
-  friend bool operator!=(const Self& a, const Self& b) {
-    return !(a==b);
-  }
-
- private:
-  union StorageType {
-    T vals[SV_MAX];
-    T* ptr;
-  };
-  StorageType data_;
-  uint16_t size_;
-  uint16_t capacity_;  // only defined when size_ > __SV_MAX_STATIC
-};
-
-typedef SmallVector<int,2> SmallVectorInt;
-
-template <class T,int N>
-void memcpy(void *out,SmallVector<T,N> const& v) {
-  std::memcpy(out,v.begin(),v.size()*sizeof(T));
-}
-
-#endif
diff --git a/decoder/small_vector_test.cc b/decoder/small_vector_test.cc
deleted file mode 100644
index d1d8dcab..00000000
--- a/decoder/small_vector_test.cc
+++ /dev/null
@@ -1,129 +0,0 @@
-#include "small_vector.h"
-
-#include <gtest/gtest.h>
-#include <iostream>
-#include <cassert>
-#include <vector>
-
-using namespace std;
-
-class SVTest : public testing::Test {
- protected:
-  virtual void SetUp() { }
-  virtual void TearDown() { }
-};
-
-TEST_F(SVTest, LargerThan2) {
-  SmallVectorInt v;
-  SmallVectorInt v2;
-  v.push_back(0);
-  v.push_back(1);
-  v.push_back(2);
-  assert(v.size() == 3);
-  assert(v[2] == 2);
-  assert(v[1] == 1);
-  assert(v[0] == 0);
-  v2 = v;
-  SmallVectorInt copy(v);
-  assert(copy.size() == 3);
-  assert(copy[0] == 0);
-  assert(copy[1] == 1);
-  assert(copy[2] == 2);
-  assert(copy == v2);
-  copy[1] = 99;
-  assert(copy != v2);
-  assert(v2.size() == 3);
-  assert(v2[2] == 2);
-  assert(v2[1] == 1);
-  assert(v2[0] == 0);
-  v2[0] = -2;
-  v2[1] = -1;
-  v2[2] = 0;
-  assert(v2[2] == 0);
-  assert(v2[1] == -1);
-  assert(v2[0] == -2);
-  SmallVectorInt v3(1,1);
-  assert(v3[0] == 1);
-  v2 = v3;
-  assert(v2.size() == 1);
-  assert(v2[0] == 1);
-  SmallVectorInt v4(10, 1);
-  assert(v4.size() == 10);
-  assert(v4[5] == 1);
-  assert(v4[9] == 1);
-  v4 = v;
-  assert(v4.size() == 3);
-  assert(v4[2] == 2);
-  assert(v4[1] == 1);
-  assert(v4[0] == 0);
-  SmallVectorInt v5(10, 2);
-  assert(v5.size() == 10);
-  assert(v5[7] == 2);
-  assert(v5[0] == 2);
-  assert(v.size() == 3);
-  v = v5;
-  assert(v.size() == 10);
-  assert(v[2] == 2);
-  assert(v[9] == 2);
-  SmallVectorInt cc;
-  for (int i = 0; i < 33; ++i)
-    cc.push_back(i);
-  for (int i = 0; i < 33; ++i)
-    assert(cc[i] == i);
-  cc.resize(20);
-  assert(cc.size() == 20);
-  for (int i = 0; i < 20; ++i)
-    assert(cc[i] == i);
-  cc[0]=-1;
-  cc.resize(1, 999);
-  assert(cc.size() == 1);
-  assert(cc[0] == -1);
-  cc.resize(99, 99);
-  for (int i = 1; i < 99; ++i) {
-    cerr << i << " " << cc[i] << endl;
-    assert(cc[i] == 99);
-  }
-  cc.clear();
-  assert(cc.size() == 0);
-}
-
-TEST_F(SVTest, Small) {
-  SmallVectorInt v;
-  SmallVectorInt v1(1,0);
-  SmallVectorInt v2(2,10);
-  SmallVectorInt v1a(2,0);
-  EXPECT_TRUE(v1 != v1a);
-  EXPECT_TRUE(v1 == v1);
-  EXPECT_EQ(v1[0], 0);
-  EXPECT_EQ(v2[1], 10);
-  EXPECT_EQ(v2[0], 10);
-  ++v2[1];
-  --v2[0];
-  EXPECT_EQ(v2[0], 9);
-  EXPECT_EQ(v2[1], 11);
-  SmallVectorInt v3(v2);
-  assert(v3[0] == 9);
-  assert(v3[1] == 11);
-  assert(!v3.empty());
-  assert(v3.size() == 2);
-  v3.clear();
-  assert(v3.empty());
-  assert(v3.size() == 0);
-  assert(v3 != v2);
-  assert(v2 != v3);
-  v3 = v2;
-  assert(v3 == v2);
-  assert(v2 == v3);
-  assert(v3[0] == 9);
-  assert(v3[1] == 11);
-  assert(!v3.empty());
-  assert(v3.size() == 2);
-  cerr << sizeof(SmallVectorInt) << endl;
-  cerr << sizeof(vector<int>) << endl;
-}
-
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
-
diff --git a/decoder/sparse_vector.cc b/decoder/sparse_vector.cc
deleted file mode 100644
index 4035b9ef..00000000
--- a/decoder/sparse_vector.cc
+++ /dev/null
@@ -1,98 +0,0 @@
-#include "sparse_vector.h"
-
-#include <iostream>
-#include <cstring>
-
-#include "hg_io.h"
-
-using namespace std;
-
-namespace B64 {
-
-void Encode(double objective, const SparseVector<double>& v, ostream* out) {
-  const int num_feats = v.num_active();
-  size_t tot_size = 0;
-  const size_t off_objective = tot_size;
-  tot_size += sizeof(double);                   // objective
-  const size_t off_num_feats = tot_size;
-  tot_size += sizeof(int);                      // num_feats
-  const size_t off_data = tot_size;
-  tot_size += sizeof(unsigned char) * num_feats; // lengths of feature names;
-  typedef SparseVector<double>::const_iterator const_iterator;
-  for (const_iterator it = v.begin(); it != v.end(); ++it)
-    tot_size += FD::Convert(it->first).size();   // feature names;
-  tot_size += sizeof(double) * num_feats;        // gradient
-  const size_t off_magic = tot_size;
-  tot_size += 4;                                 // magic
-
-  // size_t b64_size = tot_size * 4 / 3;
-  // cerr << "Sparse vector binary size: " << tot_size << "  (b64 size=" << b64_size << ")\n";
-  char* data = new char[tot_size];
-  *reinterpret_cast<double*>(&data[off_objective]) = objective;
-  *reinterpret_cast<int*>(&data[off_num_feats]) = num_feats;
-  char* cur = &data[off_data];
-  assert(cur - data == off_data);
-  for (const_iterator it = v.begin(); it != v.end(); ++it) {
-    const string& fname = FD::Convert(it->first);
-    *cur++ = static_cast<char>(fname.size());   // name len
-    memcpy(cur, &fname[0], fname.size());
-    cur += fname.size();
-    *reinterpret_cast<double*>(cur) = it->second;
-    cur += sizeof(double);
-  }
-  assert(cur - data == off_magic);
-  *reinterpret_cast<unsigned int*>(cur) = 0xBAABABBAu;
-  cur += sizeof(unsigned int);
-  assert(cur - data == tot_size);
-  b64encode(data, tot_size, out);
-  delete[] data;
-}
-
-bool Decode(double* objective, SparseVector<double>* v, const char* in, size_t size) {
-  v->clear();
-  if (size % 4 != 0) {
-    cerr << "B64 error - line % 4 != 0\n";
-    return false;
-  }
-  const size_t decoded_size = size * 3 / 4 - sizeof(unsigned int);
-  const size_t buf_size = decoded_size + sizeof(unsigned int);
-  if (decoded_size < 6) { cerr << "SparseVector decoding error: too short!\n"; return false; }
-  char* data = new char[buf_size];
-  if (!b64decode(reinterpret_cast<const unsigned char*>(in), size, data, buf_size)) {
-    delete[] data;
-    return false;
-  }
-  size_t cur = 0;
-  *objective = *reinterpret_cast<double*>(data);
-  cur += sizeof(double);
-  const int num_feats = *reinterpret_cast<int*>(&data[cur]);
-  cur += sizeof(int);
-  int fc = 0;
-  while(fc < num_feats && cur < decoded_size) {
-    ++fc;
-    const int fname_len = data[cur++];
-    assert(fname_len > 0);
-    assert(fname_len < 256);
-    string fname(fname_len, '\0');
-    memcpy(&fname[0], &data[cur], fname_len);
-    cur += fname_len;
-    const double val = *reinterpret_cast<double*>(&data[cur]);
-    cur += sizeof(double);
-    int fid = FD::Convert(fname);
-    v->set_value(fid, val);
-  }
-  if(num_feats != fc) {
-    cerr << "Expected " << num_feats << " but only decoded " << fc << "!\n";
-    delete[] data;
-    return false;
-  }
-  if (*reinterpret_cast<unsigned int*>(&data[cur]) != 0xBAABABBAu) {
-    cerr << "SparseVector decodeding error : magic does not match!\n";
-    delete[] data;
-    return false;
-  }
-  delete[] data;
-  return true;
-}
-
-}
diff --git a/decoder/sparse_vector.h b/decoder/sparse_vector.h
deleted file mode 100644
index 207489c5..00000000
--- a/decoder/sparse_vector.h
+++ /dev/null
@@ -1,512 +0,0 @@
-#ifndef _SPARSE_VECTOR_H_
-#define _SPARSE_VECTOR_H_
-
-//#define SPARSE_VECTOR_HASH
-
-#ifdef SPARSE_VECTOR_HASH
-#include "hash.h"
-# define SPARSE_VECTOR_MAP HASH_MAP
-# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted) HASH_MAP_RESERVED(h,empty,deleted)
-#else
-# define SPARSE_VECTOR_MAP std::map
-# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted)
-#endif
-/*
-   use SparseVectorList (pair smallvector) for feat funcs / hypergraphs (you rarely need random access; just append a feature to the list)
-*/
-/* hack: index 0 never gets printed because cdyer is creative and efficient. features which have no weight got feature dict id 0, see, and the models all clobered that value.  nobody wants to see it.  except that vlad is also creative and efficient and stored the oracle bleu there. */
-/* NOTE: zero vals may or may not be dropped from map (sparse, but not guaranteed to be so).
-
-   I rely on !v the same as !((bool)v) the same as v==0 and v() same as v(0).
-
-   one exception:
-
-   a local:
-   T sum = 0;
-   is used instead of
-   T sum;
-
-   because T may be a primitive type, and
-
-   T sum();
-
-   is parsed as a function decl :(
-
-   the alternative T sum=T() is also be reasonable.  i've switched to that.
-*/
-
-// this is a modified version of code originally written
-// by Phil Blunsom
-
-#include <iostream>
-#include <map>
-#include <tr1/unordered_map>
-#include <vector>
-#include <valarray>
-
-#include "fdict.h"
-#include "small_vector.h"
-
-template <class T>
-inline T & extend_vector(std::vector<T> &v,int i) {
-  if (i>=v.size())
-    v.resize(i+1);
-  return v[i];
-}
-
-template <typename T>
-class SparseVector {
-  void init_reserved() {
-    SPARSE_VECTOR_MAP_RESERVED(values_,-1,-2);
-  }
-public:
-  T const& get_singleton() const {
-    assert(values_.size()==1);
-    return values_.begin()->second;
-  }
-
-  typedef SparseVector<T> Self;
-  typedef SPARSE_VECTOR_MAP<int, T> MapType;
-  typedef typename MapType::const_iterator const_iterator;
-  SparseVector() {
-    init_reserved();
-  }
-  explicit SparseVector(std::vector<T> const& v) {
-    init_reserved();
-    typename MapType::iterator p=values_.begin();
-    const T z=0;
-    for (unsigned i=0;i<v.size();++i) {
-      T const& t=v[i];
-      if (t!=z)
-        p=values_.insert(p,typename MapType::value_type(i,t)); //hint makes insertion faster
-    }
-  }
-
-
-  void init_vector(std::vector<T> *vp) const {
-    init_vector(*vp);
-  }
-
-  void init_vector(std::vector<T> &v) const {
-    v.clear();
-    for (const_iterator i=values_.begin(),e=values_.end();i!=e;++i)
-      extend_vector(v,i->first)=i->second;
-  }
-
-  void set_new_value(int index, T const& val) {
-    assert(values_.find(index)==values_.end());
-    values_[index]=val;
-  }
-
-
-  // warning: exploits the fact that 0 values are always removed from map.  change this if you change that.
-  bool nonzero(int index) const {
-    typename MapType::const_iterator found = values_.find(index);
-    return found==values_.end() || !found->second;
-  }
-
-
-  T get(int index) const {
-    typename MapType::const_iterator found = values_.find(index);
-    return found==values_.end()?T():found->second;
-  }
-
-  T value(int i) const { return get(i); }
-
-  // same as above but may add a 0 entry.  TODO: check that people relying on no entry use get
-  T & operator[](int index){
-    return values_[index];
-  }
-
-  inline void set_value(int index, const T &value) {
-    values_[index] = value;
-  }
-
-  inline void maybe_add(int index, const T& value) {
-    if (value) add_value(index,value);
-  }
-
-    T& add_value(int index, const T &value) {
-#if 1
-      return values_[index]+=value;
-#else
-      // this is not really going to be any faster, and we already rely on default init = 0 init
-      std::pair<typename MapType::iterator,bool> art=values_.insert(std::make_pair(index,value));
-      T &val=art.first->second;
-      if (!art.second) val += value; // already existed
-      return val;
-#endif
-    }
-
-
-    void store(std::valarray<T>* target) const {
-      (*target) *= 0;
-      for (typename MapType::const_iterator
-              it = values_.begin(); it != values_.end(); ++it) {
-        if (it->first >= target->size()) break;
-        (*target)[it->first] = it->second;
-      }
-    }
-
-    int max_index() const {
-      if (empty()) return 0;
-        typename MapType::const_iterator found =values_.end();
-        --found;
-        return found->first;
-    }
-
-    // dot product with a unit vector of the same length
-    // as the sparse vector
-    T dot() const {
-        T sum = T();
-        for (typename MapType::const_iterator
-                it = values_.begin(); it != values_.end(); ++it)
-            sum += it->second;
-        return sum;
-    }
-
-    template<typename S>
-    S cosine_sim(const SparseVector<S> &vec) const {
-      return dot(vec)/(l2norm()*vec.l2norm());
-    }
-
-  // if values are binary, gives |A intersect B|/|A union B|
-    template<typename S>
-    S tanimoto_coef(const SparseVector<S> &vec) const {
-      S dp=dot(vec);
-      return dp/(l2norm_sq()+vec.l2norm_sq()-dp);
-    }
-
-    template<typename S>
-    S dot(const SparseVector<S> &vec) const {
-        S sum = S();
-        for (typename MapType::const_iterator
-                it = values_.begin(); it != values_.end(); ++it)
-        {
-            typename MapType::const_iterator
-                found = vec.values_.find(it->first);
-            if (found != vec.values_.end())
-                sum += it->second * found->second;
-        }
-        return sum;
-    }
-
-    template<typename S>
-    S dot(const std::vector<S> &vec) const {
-      S sum = S();
-        for (typename MapType::const_iterator
-                it = values_.begin(); it != values_.end(); ++it)
-        {
-            if (it->first < static_cast<int>(vec.size()))
-                sum += it->second * vec[it->first];
-        }
-        return sum;
-    }
-
-    template<typename S>
-    S dot(const S *vec) const {
-        // this is not range checked!
-        S sum = S();
-        for (typename MapType::const_iterator
-                it = values_.begin(); it != values_.end(); ++it)
-            sum += it->second * vec[it->first];
-        std::cout << "dot(*vec) " << sum << std::endl;
-        return sum;
-    }
-
-    T l1norm() const {
-      T sum = T();
-      for (typename MapType::const_iterator
-              it = values_.begin(); it != values_.end(); ++it)
-        sum += fabs(it->second);
-      return sum;
-    }
-
-  T l2norm_sq() const {
-      T sum = T();
-      for (typename MapType::const_iterator
-              it = values_.begin(); it != values_.end(); ++it)
-        sum += it->second * it->second;
-      return sum;
-  }
-
-    T l2norm() const {
-      return sqrt(l2norm_sq());
-    }
-
-  void erase(int key) {
-    values_.erase(key);
-/*    typename MapType::iterator found = values_.find(key);
-    if (found!=values_end())
-    values_.erase(found);*/
-  }
-
-  template <class T2>
-  void set_from(SparseVector<T2> const& other) {
-    for (typename MapType::const_iterator
-           it = other.values_.begin(); it != other.values_.end(); ++it)
-    {
-      values_[it->first]=it->second;
-    }
-  }
-
-    SparseVector<T> &operator+=(const SparseVector<T> &other) {
-        for (typename MapType::const_iterator
-                it = other.values_.begin(); it != other.values_.end(); ++it)
-        {
-//            T v =
-              (values_[it->first] += it->second);
-//            if (!v) values_.erase(it->first);
-        }
-        return *this;
-    }
-
-    SparseVector<T> &operator-=(const SparseVector<T> &other) {
-        for (typename MapType::const_iterator
-                it = other.values_.begin(); it != other.values_.end(); ++it)
-        {
-//            T v =
-          (values_[it->first] -= it->second);
-//            if (!v) values_.erase(it->first);
-        }
-        return *this;
-    }
-
-  friend SparseVector<T> operator -(SparseVector<T> x,SparseVector<T> const& y) {
-    x-=y;
-    return x;
-  }
-  friend SparseVector<T> operator +(SparseVector<T> x,SparseVector<T> const& y) {
-    x+=y;
-    return x;
-  }
-
-private:
-  // DEPRECATED: becuase 0 values are dropped from the map, this doesn't even make sense if you have a fully populated (not really sparse re: what you'll ever use) vector
-    SparseVector<T> &operator-=(T const& x) {
-        for (typename MapType::iterator
-                it = values_.begin(); it != values_.end(); ++it)
-            it->second -= x;
-        return *this;
-    }
-
-    SparseVector<T> &operator+=(T const& x) {
-        for (typename MapType::iterator
-                it = values_.begin(); it != values_.end(); ++it)
-            it->second += x;
-        return *this;
-    }
-public:
-    SparseVector<T> &operator/=(const T &x) {
-        for (typename MapType::iterator
-                it = values_.begin(); it != values_.end(); ++it)
-            it->second /= x;
-        return *this;
-    }
-
-    SparseVector<T> &operator*=(const T& x) {
-        for (typename MapType::iterator
-                it = values_.begin(); it != values_.end(); ++it)
-            it->second *= x;
-        return *this;
-    }
-
-    SparseVector<T> operator+(T const& x) const {
-        SparseVector<T> result = *this;
-        return result += x;
-    }
-
-    SparseVector<T> operator-(T const& x) const {
-        SparseVector<T> result = *this;
-        return result -= x;
-    }
-
-    SparseVector<T> operator/(T const& x) const {
-        SparseVector<T> result = *this;
-        return result /= x;
-    }
-
-    std::ostream &operator<<(std::ostream& out) const {
-      Write(true, &out);
-      return out;
-    }
-
-    void Write(const bool with_semi, std::ostream* os) const {
-        bool first = true;
-        for (typename MapType::const_iterator
-                it = values_.begin(); it != values_.end(); ++it) {
-          // by definition feature id 0 is a dummy value
-          if (!it->first) continue;
-          if (with_semi) {
-            (*os) << (first ? "" : ";")
-	         << FD::Convert(it->first) << '=' << it->second;
-          } else {
-            (*os) << (first ? "" : " ")
-	         << FD::Convert(it->first) << '=' << it->second;
-          }
-          first = false;
-        }
-    }
-
-  bool operator==(Self const & other) const {
-    return size()==other.size() && contains_keys_of(other) && other.contains_i(*this);
-  }
-
-  bool contains(Self const &o) const {
-    return size()>o.size() && contains(o);
-  }
-
-  bool at_equals(int i,T const& val) const {
-    const_iterator it=values_.find(i);
-    if (it==values_.end()) return !val;
-    return it->second==val;
-  }
-
-  bool contains_i(Self const& o) const {
-    for (typename MapType::const_iterator i=o.begin(),e=o.end();i!=e;++i)
-      if (!at_equals(i->first,i->second))
-        return false;
-    return true;
-  }
-
-  bool contains_keys_of(Self const& o) const {
-    for (typename MapType::const_iterator i=o.begin(),e=o.end();i!=e;++i)
-      if (values_.find(i)==values_.end())
-        return false;
-    return true;
-  }
-
-#ifndef SPARSE_VECTOR_HASH
-    bool operator<(const SparseVector<T> &other) const {
-        typename MapType::const_iterator it = values_.begin();
-        typename MapType::const_iterator other_it = other.values_.begin();
-
-        for (; it != values_.end() && other_it != other.values_.end(); ++it, ++other_it)
-        {
-            if (it->first < other_it->first) return true;
-            if (it->first > other_it->first) return false;
-            if (it->second < other_it->second) return true;
-            if (it->second > other_it->second) return false;
-        }
-        return values_.size() < other.values_.size();
-    }
-#endif
-
-  int size() const { return values_.size(); }
-
-    int num_active() const { return values_.size(); }
-    bool empty() const { return values_.empty(); }
-
-    const_iterator begin() const { return values_.begin(); }
-    const_iterator end() const { return values_.end(); }
-
-    void clear() {
-        values_.clear();
-    }
-
-    void swap(SparseVector<T>& other) {
-      values_.swap(other.values_);
-    }
-
-private:
-  MapType values_;
-};
-
-//like a pair but can live in a union, because it lacks default+copy ctors, dtor.
-template <class T>
-struct feature_val {
-  int fid;
-  T val;
-};
-
-template <class T>
-inline feature_val<T> featval(int fid,T const &val) {
-  feature_val<T> f;
-  f.fid=fid;
-  f.val=val;
-  return f;
-}
-
-
-// doesn't support fast indexing directly
-template <class T>
-class SparseVectorList {
-  typedef feature_val<T> Pair;
-  typedef SmallVector<Pair,1> List;
-  typedef typename List::const_iterator const_iterator;
-  SparseVectorList() {  }
-  template <class I>
-  SparseVectorList(I i,I const& end) {
-    int c=0;
-    for (;i<end;++i,++c) {
-      if (*i)
-        p.push_back(featval(c,*i));
-    }
-    p.compact();
-  }
-  explicit SparseVectorList(std::vector<T> const& v) {
-    for (unsigned i=0;i<v.size();++i) {
-      T const& t=v[i];
-      if (t)
-        p.push_back(featval(i,t));
-    }
-    p.compact();
-  }
-  // unlike SparseVector, this doesn't overwrite - but conversion to SparseVector will use last value, which is the same
-  void set_value(int i,T const& val) {
-    p.push_back(Pair(i,val));
-  }
-  void overlay(SparseVector<T> *to) const {
-    for (int i=0;i<p.size();++i)
-      to->set_value(p[i].fid,p[i].val);
-  }
-  void copy_to(SparseVector<T> *to) const {
-    to->clear();
-    overlay(to);
-  }
-  SparseVector<T> sparse() const {
-    SparseVector<T> r;
-    copy_to(r);
-    return r;
-  }
-private:
-  List p;
-};
-
-template <typename T>
-SparseVector<T> operator+(const SparseVector<T>& a, const SparseVector<T>& b) {
-  SparseVector<T> result = a;
-  return result += b;
-}
-
-template <typename T>
-SparseVector<T> operator*(const SparseVector<T>& a, const double& b) {
-  SparseVector<T> result = a;
-  return result *= b;
-}
-
-template <typename T>
-SparseVector<T> operator*(const SparseVector<T>& a, const T& b) {
-  SparseVector<T> result = a;
-  return result *= b;
-}
-
-template <typename T>
-SparseVector<T> operator*(const double& a, const SparseVector<T>& b) {
-  SparseVector<T> result = b;
-  return result *= a;
-}
-
-template <typename T>
-std::ostream &operator<<(std::ostream &out, const SparseVector<T> &vec)
-{
-    return vec.operator<<(out);
-}
-
-namespace B64 {
-  void Encode(double objective, const SparseVector<double>& v, std::ostream* out);
-  // returns false if failed to decode
-  bool Decode(double* objective, SparseVector<double>* v, const char* data, size_t size);
-}
-
-#endif
diff --git a/decoder/static_utoa.h b/decoder/static_utoa.h
deleted file mode 100755
index fe5f6d92..00000000
--- a/decoder/static_utoa.h
+++ /dev/null
@@ -1,115 +0,0 @@
-#ifndef STATIC_UTOA_H
-#define STATIC_UTOA_H
-
-#include "threadlocal.h"
-
-
-#include <string>
-#include <cstring>
-
-#define DIGIT_LOOKUP_TABLE 0
-
-namespace {
-THREADLOCAL char utoa_buf[] = "01234567890123456789"; // to put end of string character at buf[20]
-const unsigned utoa_bufsize=sizeof(utoa_buf);
-const unsigned utoa_bufsizem1=utoa_bufsize-1;
-#ifdef DIGIT_LOOKUP_TABLE
-char digits[] = "0123456789";
-#endif
-}
-
-inline char digit_to_char(int d) {
-  return
-#ifdef DIGIT_LOOKUP_TABLE
-    digits[d];
-#else
-    '0'+d;
-#endif
-}
-
-// returns n in string [return,num); *num=0 yourself before calling if you want a c_str
-inline char *utoa(char *num,unsigned n) {
-  if ( !n ) {
-    *--num='0';
-  } else {
-    unsigned rem;
-    // 3digit lookup table, divide by 1000 faster?
-    while ( n ) {
-#if 1
-      rem = n;
-      n /= 10;
-      rem -= 10*n;		// maybe this is faster than mod because we are already dividing
-#else
-      rem = n%10; // would optimizer combine these together?
-      n   = n/10;
-#endif
-      *--num = digit_to_char(rem);
-    }
-  }
-  return num;
-}
-
-inline char *static_utoa(unsigned n) {
-  return utoa(utoa_buf+utoa_bufsizem1,n);
-}
-
-//returns position of '\0' terminating number written starting at to
-inline char* append_utoa(char *to,unsigned n) {
-  char *s=static_utoa(n);
-  int ns=(utoa_buf+utoa_bufsize)-s;
-  std::memcpy(to,s,ns);
-  return to+ns;
-}
-
-// so named to avoid gcc segfault when named itoa
-inline char *itoa(char *p,int n) {
-  if (n<0) {
-    p=utoa(p,-n); // TODO: check that (unsigned)(-INT_MIN) == 0x1000000 in 2s complement and not == 0
-    *--p='-';
-    return p;
-  } else
-    return utoa(p,n);
-}
-
-inline char *static_itoa(int n) {
-  return itoa(utoa_buf+utoa_bufsizem1,n);
-}
-
-
-inline std::string utos(unsigned n) {
-  const int bufsz=20;
-  char buf[bufsz];
-  char *end=buf+bufsz;
-  char *p=utoa(end,n);
-  return std::string(p,end);
-}
-
-inline std::string itos(int n) {
-  const int bufsz=20;
-  char buf[bufsz];
-  char *end=buf+bufsz;
-  char *p=itoa(end,n);
-  return std::string(p,end);
-}
-
-#ifdef ITOA_SAMPLE
-# include <cstdio>
-# include <sstream>
-# include <iostream>
-using namespace std;
-
-int main(int argc,char *argv[]) {
-  printf("d U d U d U\n");
-  for (int i=1;i<argc;++i) {
-    int n;
-    unsigned un;
-    sscanf(argv[i],"%d",&n);
-    sscanf(argv[i],"%u",&un);
-    printf("%d %u %s",n,un,static_itoa(n));
-    printf(" %s %s %s\n",static_utoa(un),itos(n).c_str(),utos(un).c_str());
-  }
-  return 0;
-}
-#endif
-
-#endif
diff --git a/decoder/stringlib.cc b/decoder/stringlib.cc
deleted file mode 100644
index 3e52ae87..00000000
--- a/decoder/stringlib.cc
+++ /dev/null
@@ -1,98 +0,0 @@
-#include "stringlib.h"
-
-#include <cstring>
-#include <cstdlib>
-#include <cassert>
-#include <iostream>
-#include <map>
-
-#include "lattice.h"
-
-using namespace std;
-
-void ParseTranslatorInput(const string& line, string* input, string* ref) {
-  size_t hint = 0;
-  if (line.find("{\"rules\":") == 0) {
-    hint = line.find("}}");
-    if (hint == string::npos) {
-      cerr << "Syntax error: " << line << endl;
-      abort();
-    }
-    hint += 2;
-  }
-  size_t pos = line.find("|||", hint);
-  if (pos == string::npos) { *input = line; return; }
-  ref->clear();
-  *input = line.substr(0, pos - 1);
-  string rline = line.substr(pos + 4);
-  if (rline.size() > 0) {
-    assert(ref);
-    *ref = rline;
-  }
-}
-
-void ParseTranslatorInputLattice(const string& line, string* input, Lattice* ref) {
-  string sref;
-  ParseTranslatorInput(line, input, &sref);
-  if (sref.size() > 0) {
-    assert(ref);
-    LatticeTools::ConvertTextOrPLF(sref, ref);
-  }
-}
-
-void ProcessAndStripSGML(string* pline, map<string, string>* out) {
-  map<string, string>& meta = *out;
-  string& line = *pline;
-  string lline = LowercaseString(line);
-  if (lline.find("<seg")!=0) return;
-  size_t close = lline.find(">");
-  if (close == string::npos) return; // error
-  size_t end = lline.find("</seg>");
-  string seg = Trim(lline.substr(4, close-4));
-  string text = line.substr(close+1, end - close - 1);
-  for (size_t i = 1; i < seg.size(); i++) {
-    if (seg[i] == '=' && seg[i-1] == ' ') {
-      string less = seg.substr(0, i-1) + seg.substr(i);
-      seg = less; i = 0; continue;
-    }
-    if (seg[i] == '=' && seg[i+1] == ' ') {
-      string less = seg.substr(0, i+1);
-      if (i+2 < seg.size()) less += seg.substr(i+2);
-      seg = less; i = 0; continue;
-    }
-  }
-  line = Trim(text);
-  if (seg == "") return;
-  for (size_t i = 1; i < seg.size(); i++) {
-    if (seg[i] == '=') {
-      string label = seg.substr(0, i);
-      string val = seg.substr(i+1);
-      if (val[0] == '"') {
-        val = val.substr(1);
-        size_t close = val.find('"');
-        if (close == string::npos) {
-          cerr << "SGML parse error: missing \"\n";
-          seg = "";
-          i = 0;
-        } else {
-          seg = val.substr(close+1);
-          val = val.substr(0, close);
-          i = 0;
-        }
-      } else {
-        size_t close = val.find(' ');
-        if (close == string::npos) {
-          seg = "";
-          i = 0;
-        } else {
-          seg = val.substr(close+1);
-          val = val.substr(0, close);
-        }
-      }
-      label = Trim(label);
-      seg = Trim(seg);
-      meta[label] = val;
-    }
-  }
-}
-
diff --git a/decoder/stringlib.h b/decoder/stringlib.h
deleted file mode 100644
index 84e95d44..00000000
--- a/decoder/stringlib.h
+++ /dev/null
@@ -1,267 +0,0 @@
-#ifndef CDEC_STRINGLIB_H_
-#define CDEC_STRINGLIB_H_
-
-//usage: string s=MAKESTRE(1<<" "<<c);
-#define MAKESTR(expr) ((dynamic_cast<ostringstream &>(ostringstream()<<std::dec<<expr)).str())
-// std::dec (or seekp, or another manip) is needed to convert to std::ostream reference.
-
-#ifdef STRINGLIB_DEBUG
-#include <iostream>
-#define SLIBDBG(x) do { std::cerr<<"DBG(stringlib): "<<x<<std::endl; } while(0)
-#else
-#define SLIBDBG(x)
-#endif
-
-#include <map>
-#include <vector>
-#include <cctype>
-#include <cstring>
-#include <string>
-#include <sstream>
-#include <algorithm>
-
-inline std::size_t skip_ws(std::string const& s,std::size_t starting=0,char const* ws=" \t\n\r") {
-  return s.find_first_not_of(ws,starting);
-}
-
-// returns position of end of all non-ws chars before ending, i.e. string(s.begin()+skip_ws(s),s.begin()+trailing_ws(s)) strips both ends
-inline std::size_t trailing_ws(std::string const& s,std::size_t ending=std::string::npos,char const* ws=" \t\n\r") {
-  std::size_t n=s.find_last_not_of(ws,ending);
-  if (n==std::string::npos) return n;
-  else return n+1;
-}
-
-//TEST: if string is all whitespace, make sure that string(a+npos,a+npos) can't segfault (i.e. won't access any memory because begin==end)
-inline std::string strip_ws(std::string const& s) {
-  return std::string(s.begin()+skip_ws(s),s.begin()+trailing_ws(s));
-}
-
-
-inline bool is_single_line(std::string const& line) {
-  return std::count(line.begin(),line.end(),'\n')==0; // but we want to allow terminal newlines/blanks
-}
-
-// is_single_line(strip_ws(line))
-inline bool is_single_line_stripped(std::string const& line) {
-  std::size_t b=skip_ws(line),e=trailing_ws(line);
-  std::size_t n=line.find('\n',b);
-  return n==std::string::npos || n>=e;
-}
-
-struct toupperc {
-  inline char operator()(char c) const {
-    return std::toupper(c);
-  }
-};
-
-inline std::string toupper(std::string s) {
-  std::transform(s.begin(),s.end(),s.begin(),toupperc());
-  return s;
-}
-
-template <class Istr, class Isubstr> inline
-bool match_begin(Istr bstr,Istr estr,Isubstr bsub,Isubstr esub)
-{
-  while (bsub != esub) {
-    if (bstr == estr)
-      return false;
-    if (*bsub++ != *bstr++)
-      return false;
-  }
-  return true;
-}
-
-template <class Istr, class Prefix> inline
-bool match_begin(Istr bstr,Istr estr,Prefix prefix)
-{
-  return match_begin(bstr,estr,prefix.begin(),prefix.end());
-}
-
-template <class Str, class Prefix> inline
-bool match_begin(Str const& str,Prefix const& prefix)
-{
-  return match_begin(str.begin(),str.end(),prefix.begin(),prefix.end());
-}
-
-
-// read line in the form of either:
-//   source
-//   source ||| target
-// source will be returned as a string, target must be a sentence or
-// a lattice (in PLF format) and will be returned as a Lattice object
-void ParseTranslatorInput(const std::string& line, std::string* input, std::string* ref);
-struct Lattice;
-void ParseTranslatorInputLattice(const std::string& line, std::string* input, Lattice* ref);
-
-inline std::string Trim(const std::string& str, const std::string& dropChars = " \t") {
-  std::string res = str;
-  res.erase(str.find_last_not_of(dropChars)+1);
-  return res.erase(0, res.find_first_not_of(dropChars));
-}
-
-inline void Tokenize(const std::string& str, char delimiter, std::vector<std::string>* res) {
-  std::string s = str;
-  int last = 0;
-  res->clear();
-  for (int i=0; i < s.size(); ++i)
-    if (s[i] == delimiter) {
-      s[i]=0;
-      if (last != i) {
-        res->push_back(&s[last]);
-      }
-      last = i + 1;
-    }
-  if (last != s.size())
-    res->push_back(&s[last]);
-}
-
-inline unsigned NTokens(const std::string& str, char delimiter)
-{
-  std::vector<std::string> r;
-  Tokenize(str,delimiter,&r);
-  return r.size();
-}
-
-inline std::string LowercaseString(const std::string& in) {
-  std::string res(in.size(),' ');
-  for (int i = 0; i < in.size(); ++i)
-    res[i] = tolower(in[i]);
-  return res;
-}
-
-inline int CountSubstrings(const std::string& str, const std::string& sub) {
-  size_t p = 0;
-  int res = 0;
-  while (p < str.size()) {
-    p = str.find(sub, p);
-    if (p == std::string::npos) break;
-    ++res;
-    p += sub.size();
-  }
-  return res;
-}
-
-inline int SplitOnWhitespace(const std::string& in, std::vector<std::string>* out) {
-  out->clear();
-  int i = 0;
-  int start = 0;
-  std::string cur;
-  while(i < in.size()) {
-    if (in[i] == ' ' || in[i] == '\t') {
-      if (i - start > 0)
-        out->push_back(in.substr(start, i - start));
-      start = i + 1;
-    }
-    ++i;
-  }
-  if (i > start)
-    out->push_back(in.substr(start, i - start));
-  return out->size();
-}
-
-inline std::vector<std::string> SplitOnWhitespace(std::string const& in)
-{
-  std::vector<std::string> r;
-  SplitOnWhitespace(in,&r);
-  return r;
-}
-
-
-struct mutable_c_str {
-  // because making a copy of a string might not copy its storage, so modifying a c_str() could screw up original (nobody uses cow nowadays because it needs locking under threading)
-  char *p;
-  mutable_c_str(std::string const& s) : p((char *)::operator new(s.size()+1)) {
-    std::memcpy(p,s.data(),s.size());
-    p[s.size()]=0;
-  }
-  ~mutable_c_str() { ::operator delete(p); }
-private:
-  mutable_c_str(mutable_c_str const&);
-};
-
-// ' ' '\t' tokens hardcoded
-//NOTE: you should have stripped endline chars out first.
-inline bool IsWordSep(char c) {
-  return c==' '||c=='\t';
-}
-
-
-template <class F>
-// *end must be 0 (i.e. [p,end] is valid storage, which will be written to with 0 to separate c string tokens
-void VisitTokens(char *p,char *const end,F f) {
-  SLIBDBG("VisitTokens. p="<<p<<" Nleft="<<end-p);
-  if (p==end) return;
-  char *last; // 0 terminated already.  this is ok to mutilate because s is a copy of the string passed in.  well, barring copy on write i guess.
-  while(IsWordSep(*p)) { ++p;if (p==end) return; } // skip init whitespace
-  last=p; // first non-ws char
-  for(;;) {
-    SLIBDBG("Start of word. last="<<last<<" *p="<<*p<<" Nleft="<<end-p);
-    // last==p, pointing at first non-ws char not yet translated into f(word) call
-    for(;;) {// p to end of word
-      ++p;
-      if (p==end) {
-        f(last);
-        SLIBDBG("Returning. word="<<last<<" *p="<<*p<<" Nleft="<<end-p);
-        return;
-      }
-      if (IsWordSep(*p)) break;
-    }
-    *p=0;
-    f(last);
-    SLIBDBG("End of word. word="<<last<<" rest="<<p+1<<" Nleft="<<end-p);
-    for(;;) { // again skip extra whitespace
-      ++p;
-      if (p==end) return;
-      if (!IsWordSep(*p)) break;
-    }
-    last=p;
-  }
-}
-
-template <class F>
-void VisitTokens(char *p,F f) {
-  VisitTokens(p,p+std::strlen(p),f);
-}
-
-
-template <class F>
-void VisitTokens(std::string const& s,F f) {
-  if (0) {
-  std::vector<std::string> ss=SplitOnWhitespace(s);
-  for (int i=0;i<ss.size();++i)
-    f(ss[i]);
-  return;
-  }
-  //FIXME:
-  if (s.empty()) return;
-  mutable_c_str mp(s);
-  SLIBDBG("mp="<<mp.p);
-  VisitTokens(mp.p,mp.p+s.size(),f);
-}
-
-inline void SplitCommandAndParam(const std::string& in, std::string* cmd, std::string* param) {
-  cmd->clear();
-  param->clear();
-  std::vector<std::string> x;
-  SplitOnWhitespace(in, &x);
-  if (x.size() == 0) return;
-  *cmd = x[0];
-  for (int i = 1; i < x.size(); ++i) {
-    if (i > 1) { *param += " "; }
-    *param += x[i];
-  }
-}
-
-void ProcessAndStripSGML(std::string* line, std::map<std::string, std::string>* out);
-
-// given the first character of a UTF8 block, find out how wide it is
-// see http://en.wikipedia.org/wiki/UTF-8 for more info
-inline unsigned int UTF8Len(unsigned char x) {
-  if (x < 0x80) return 1;
-  else if ((x >> 5) == 0x06) return 2;
-  else if ((x >> 4) == 0x0e) return 3;
-  else if ((x >> 3) == 0x1e) return 4;
-  else return 0;
-}
-
-#endif
diff --git a/decoder/stringlib_test.cc b/decoder/stringlib_test.cc
deleted file mode 100755
index f66cdbeb..00000000
--- a/decoder/stringlib_test.cc
+++ /dev/null
@@ -1,17 +0,0 @@
-#define STRINGLIB_DEBUG
-#include "stringlib.h"
-
-using namespace std;
-struct print {
-  template <class S>
-  void operator()(S const& s) const {
-    cout<<s<<endl;
-  }
-};
-
-char p[]=" 1 are u 2 serious?";
-int main(int argc, char *argv[]) {
-  std::string const& w="verylongword";
-  VisitTokens(p,print());
-  VisitTokens(w,print());
-}
diff --git a/decoder/tdict.cc b/decoder/tdict.cc
deleted file mode 100644
index 1f68feae..00000000
--- a/decoder/tdict.cc
+++ /dev/null
@@ -1,154 +0,0 @@
-#define TD_ALLOW_UNDEFINED_WORDIDS 0
-
-// if 1, word ids that are >= end() will give a numeric token name (single per-thread shared buffer), which of course won't be Convert-able back to the id, because it's not added to the dict.  This is a convenience for logging fake token indices.  Any tokens actually added to the dict may cause end() to overlap the range of fake ids you were using - that's up to you to prevent.
-
-#include <stdlib.h>
-#include <cstring>
-#include <sstream>
-#include "Ngram.h"
-#include "dict.h"
-#include "tdict.h"
-#include "Vocab.h"
-#include "stringlib.h"
-#include "threadlocal.h"
-
-using namespace std;
-
-Vocab TD::dict_(0,TD::max_wordid);
-WordID TD::ss=dict_.ssIndex();
-WordID TD::se=dict_.seIndex();
-WordID TD::unk=dict_.unkIndex();
-char const*const TD::ss_str=Vocab_SentStart;
-char const*const TD::se_str=Vocab_SentEnd;
-char const*const TD::unk_str=Vocab_Unknown;
-
-// pre+(i-base)+">" for i in [base,e)
-inline void pad(std::string const& pre,int base,int e) {
-  assert(base<=e);
-  ostringstream o;
-  for (int i=base;i<e;++i) {
-    o.str(pre);
-    o<<(i-base)<<'>';
-    WordID id=TD::Convert(o.str());
-    assert(id==i); // this fails.  why?
-  }
-}
-
-
-namespace {
-struct TD_init {
-  TD_init() {
-    /*
-      // disabled for now since it's breaking trunk
-    assert(TD::Convert(TD::ss_str)==TD::ss);
-    assert(TD::Convert(TD::se_str)==TD::se);
-    assert(TD::Convert(TD::unk_str)==TD::unk);
-    assert(TD::none==Vocab_None);
-    pad("<FILLER",TD::end(),TD::reserved_begin);
-    assert(TD::end()==TD::reserved_begin);
-    int reserved_end=TD::begin();
-    pad("<RESERVED",TD::end(),reserved_end);
-    assert(TD::end()==reserved_end);
-    */
-  }
-};
-
-TD_init td_init;
-}
-
-unsigned int TD::NumWords() {
-  return dict_.numWords();
-}
-WordID TD::end() {
-  return dict_.highIndex();
-}
-
-WordID TD::Convert(const std::string& s) {
-  return dict_.addWord((VocabString)s.c_str());
-}
-
-WordID TD::Convert(char const* s) {
-  return dict_.addWord((VocabString)s);
-}
-
-
-#if TD_ALLOW_UNDEFINED_WORDIDS
-# include "static_utoa.h"
-char undef_prefix[]="UNDEF_";
-static const int undefpre_n=sizeof(undef_prefix)/sizeof(undef_prefix[0]);
-THREADLOCAL char undef_buf[]="UNDEF_________________";
-inline char const* undef_token(WordID w)
-{
-  append_utoa(undef_buf+undefpre_n,w);
-  return undef_buf;
-}
-#endif
-
-const char* TD::Convert(WordID w) {
-#if TD_ALLOW_UNDEFINED_WORDIDS
-  if (w>=dict_.highIndex()) return undef_token(w);
-#endif
-  return dict_.getWord((VocabIndex)w);
-}
-
-
-void TD::GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids) {
-  ids->clear();
-  for (vector<string>::const_iterator i = strings.begin(); i != strings.end(); ++i)
-    ids->push_back(TD::Convert(*i));
-}
-
-std::string TD::GetString(const std::vector<WordID>& str) {
-  ostringstream o;
-  for (int i=0;i<str.size();++i) {
-    if (i) o << ' ';
-    o << TD::Convert(str[i]);
-  }
-  return o.str();
-}
-
-std::string TD::GetString(WordID const* i,WordID const* e) {
-  ostringstream o;
-  bool sp=false;
-  for (;i<e;++i,sp=true) {
-    if (sp)
-      o << ' ';
-    o << TD::Convert(*i);
-  }
-  return o.str();
-}
-
-int TD::AppendString(const WordID& w, int pos, int bufsize, char* buffer)
-{
-  const char* word = TD::Convert(w);
-  const char* const end_buf = buffer + bufsize;
-  char* dest = buffer + pos;
-  while(dest < end_buf && *word) {
-    *dest = *word;
-    ++dest;
-    ++word;
-  }
-  return (dest - buffer);
-}
-
-
-namespace {
-struct add_wordids {
-  typedef std::vector<WordID> Ws;
-  Ws *ids;
-  explicit add_wordids(Ws *i) : ids(i) {  }
-  add_wordids(const add_wordids& o) : ids(o.ids) {  }
-  void operator()(char const* s) {
-    ids->push_back(TD::Convert(s));
-  }
-  void operator()(std::string const& s) {
-    ids->push_back(TD::Convert(s));
-  }
-};
-
-}
-
-void TD::ConvertSentence(std::string const& s, std::vector<WordID>* ids) {
-  ids->clear();
-  VisitTokens(s,add_wordids(ids));
-}
diff --git a/decoder/tdict.h b/decoder/tdict.h
deleted file mode 100644
index a7b3ee1c..00000000
--- a/decoder/tdict.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef _TDICT_H_
-#define _TDICT_H_
-
-#include <string>
-#include <vector>
-#include "wordid.h"
-#include <assert.h>
-
-class Vocab;
-
-struct TD {
-  /* // disabled for now
-  static const int reserved_begin=10; // allow room for SRI special tokens e.g. unk ss se pause.  tokens until this get "<FILLERi>"
-  static const int n_reserved=10; // 0...n_reserved-1 get token '<RESERVEDi>'
-  static inline WordID reserved(int i) {
-    assert(i>=0 && i<n_reserved);
-    return (WordID)(reserved_begin+i);
-  }
-  static inline WordID begin() {
-    return reserved(n_reserved);
-  }
-  */
-  static const WordID max_wordid=0x7fffffff;
-  static const WordID none=(WordID)-1; // Vocab_None
-  static char const* const ss_str;  //="<s>";
-  static char const* const se_str;  //="</s>";
-  static char const* const unk_str; //="<unk>";
-  static WordID ss,se,unk; // x=Convert(x_str)
-  static WordID end(); // next id to be assigned; [begin,end) give the non-reserved tokens seen so far
-  static Vocab dict_;
-  static void ConvertSentence(std::string const& sent, std::vector<WordID>* ids);
-  static void GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids);
-  static std::string GetString(const std::vector<WordID>& str);
-  static std::string GetString(WordID const* i,WordID const* e);
-  static int AppendString(const WordID& w, int pos, int bufsize, char* buffer);
-  static unsigned int NumWords();
-  static WordID Convert(const std::string& s);
-  static WordID Convert(char const* s);
-  static const char* Convert(WordID w);
-};
-
-struct ToTD {
-  typedef WordID result_type;
-  result_type operator()(std::string const& t) const {
-    return TD::Convert(t);
-  }
-};
-
-
-#endif
diff --git a/decoder/test_data/weights b/decoder/test_data/weights
deleted file mode 100644
index ea70229c..00000000
--- a/decoder/test_data/weights
+++ /dev/null
@@ -1,8 +0,0 @@
-# hiero
-WordPenalty -0.387029
-LanguageModel 0.253195
-PhraseModel_0 0.142926
-PhraseModel_1 0.465119
-PhraseModel_2 0.079503
-CNPosteriorProbability 0.09259
-Inf -inf
diff --git a/decoder/threadlocal.h b/decoder/threadlocal.h
deleted file mode 100755
index d79f5d9d..00000000
--- a/decoder/threadlocal.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#ifndef THREADLOCAL_H
-#define THREADLOCAL_H
-
-#ifndef SETLOCAL_SWAP
-# define SETLOCAL_SWAP 0
-#endif
-
-#ifdef BOOST_NO_MT
-
-# define THREADLOCAL
-
-#else
-
-#ifdef _MSC_VER
-
-//FIXME: doesn't work with DLLs ... use TLS apis instead (http://www.boost.org/libs/thread/doc/tss.html)
-# define THREADLOCAL __declspec(thread)
-
-#else
-
-# define THREADLOCAL __thread
-
-#endif
-
-#endif
-
-#include <algorithm> //swap
-
-// naturally, the below are only thread-safe if value is THREADLOCAL
-template <class D>
-struct SaveLocal {
-    D &value;
-    D old_value;
-    SaveLocal(D& val) : value(val), old_value(val) {}
-    ~SaveLocal() {
-#if SETLOCAL_SWAP
-      swap(value,old_value);
-#else
-      value=old_value;
-#endif
-    }
-};
-
-template <class D>
-struct SetLocal {
-    D &value;
-    D old_value;
-    SetLocal(D& val,const D &new_value) : value(val), old_value(
-#if SETLOCAL_SWAP
-      new_value
-#else
-      val
-#endif
-      ) {
-#if SETLOCAL_SWAP
-      swap(value,old_value);
-#else
-      value=new_value;
-#endif
-    }
-    ~SetLocal() {
-#if SETLOCAL_SWAP
-      swap(value,old_value);
-#else
-      value=old_value;
-#endif
-    }
-};
-
-
-#endif
diff --git a/decoder/timing_stats.cc b/decoder/timing_stats.cc
deleted file mode 100644
index fc8e9df1..00000000
--- a/decoder/timing_stats.cc
+++ /dev/null
@@ -1,24 +0,0 @@
-#include "timing_stats.h"
-
-#include <iostream>
-#include "time.h" //cygwin needs
-using namespace std;
-
-map<string, TimerInfo> Timer::stats;
-
-Timer::Timer(const string& timername) : start_t(clock()), cur(stats[timername]) {}
-
-Timer::~Timer() {
-  ++cur.calls;
-  const clock_t end_t = clock();
-  const double elapsed = (end_t - start_t) / 1000000.0;
-  cur.total_time += elapsed;
-}
-
-void Timer::Summarize() {
-  for (map<string, TimerInfo>::iterator it = stats.begin(); it != stats.end(); ++it) {
-    cerr << it->first << ": " << it->second.total_time << " secs (" << it->second.calls << " calls)\n";
-  }
-  stats.clear();
-}
-
diff --git a/decoder/timing_stats.h b/decoder/timing_stats.h
deleted file mode 100644
index 0a9f7656..00000000
--- a/decoder/timing_stats.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef _TIMING_STATS_H_
-#define _TIMING_STATS_H_
-
-#include <string>
-#include <map>
-
-struct TimerInfo {
-  int calls;
-  double total_time;
-  TimerInfo() : calls(), total_time() {}
-};
-
-struct Timer {
-  Timer(const std::string& info);
-  ~Timer();
-  static void Summarize();
- private:
-  static std::map<std::string, TimerInfo> stats;
-  clock_t start_t;
-  TimerInfo& cur;
-  Timer(const Timer& other);
-  const Timer& operator=(const Timer& other);
-};
-
-#endif
diff --git a/decoder/weights.cc b/decoder/weights.cc
deleted file mode 100644
index 84647585..00000000
--- a/decoder/weights.cc
+++ /dev/null
@@ -1,77 +0,0 @@
-#include "weights.h"
-
-#include <sstream>
-
-#include "fdict.h"
-#include "filelib.h"
-
-using namespace std;
-
-void Weights::InitFromFile(const std::string& filename, vector<string>* feature_list) {
-  cerr << "Reading weights from " << filename << endl;
-  ReadFile in_file(filename);
-  istream& in = *in_file.stream();
-  assert(in);
-  int weight_count = 0;
-  bool fl = false;
-  while (in) {
-    double val = 0;
-    string buf;
-    getline(in, buf);
-    if (buf.size() == 0) continue;
-    if (buf[0] == '#') continue;
-    for (int i = 0; i < buf.size(); ++i)
-      if (buf[i] == '=') buf[i] = ' ';
-    int start = 0;
-    while(start < buf.size() && buf[start] == ' ') ++start;
-    int end = 0;
-    while(end < buf.size() && buf[end] != ' ') ++end;
-    int fid = FD::Convert(buf.substr(start, end - start));
-    while(end < buf.size() && buf[end] == ' ') ++end;
-    val = strtod(&buf.c_str()[end], NULL);
-    if (isnan(val)) {
-      cerr << FD::Convert(fid) << " has weight NaN!\n";
-      abort();
-    }
-    if (wv_.size() <= fid)
-      wv_.resize(fid + 1);
-    wv_[fid] = val;
-    if (feature_list) { feature_list->push_back(FD::Convert(fid)); }
-    ++weight_count;
-    if (weight_count %   50000 == 0) { cerr << '.' << flush; fl = true; }
-    if (weight_count % 2000000 == 0) { cerr << " [" << weight_count << "]\n"; fl = false; }
-  }
-  if (fl) { cerr << endl; }
-  cerr << "Loaded " << weight_count << " feature weights\n";
-}
-
-void Weights::WriteToFile(const std::string& fname, bool hide_zero_value_features) const {
-  WriteFile out(fname);
-  ostream& o = *out.stream();
-  assert(o);
-  o.precision(17);
-  const int num_feats = FD::NumFeats();
-  for (int i = 1; i < num_feats; ++i) {
-    const double val = (i < wv_.size() ? wv_[i] : 0.0);
-    if (hide_zero_value_features && val == 0.0) continue;
-    o << FD::Convert(i) << ' ' << val << endl;
-  }
-}
-
-void Weights::InitVector(std::vector<double>* w) const {
-  *w = wv_;
-}
-
-void Weights::InitSparseVector(SparseVector<double>* w) const {
-  for (int i = 1; i < wv_.size(); ++i) {
-    const double& weight = wv_[i];
-    if (weight) w->set_value(i, weight);
-  }
-}
-
-void Weights::InitFromVector(const std::vector<double>& w) {
-  wv_ = w;
-  if (wv_.size() > FD::NumFeats())
-    cerr << "WARNING: initializing weight vector has more features than the global feature dictionary!\n";
-  wv_.resize(FD::NumFeats(), 0);
-}
diff --git a/decoder/weights.h b/decoder/weights.h
deleted file mode 100644
index f19aa3ce..00000000
--- a/decoder/weights.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef _WEIGHTS_H_
-#define _WEIGHTS_H_
-
-#include <string>
-#include <map>
-#include <vector>
-#include "sparse_vector.h"
-
-class Weights {
- public:
-  Weights() {}
-  void InitFromFile(const std::string& fname, std::vector<std::string>* feature_list = NULL);
-  void WriteToFile(const std::string& fname, bool hide_zero_value_features = true) const;
-  void InitVector(std::vector<double>* w) const;
-  void InitSparseVector(SparseVector<double>* w) const;
-  void InitFromVector(const std::vector<double>& w);
- private:
-  std::vector<double> wv_;
-};
-
-#endif
diff --git a/decoder/weights_test.cc b/decoder/weights_test.cc
deleted file mode 100644
index aa6b3db2..00000000
--- a/decoder/weights_test.cc
+++ /dev/null
@@ -1,28 +0,0 @@
-#include <cassert>
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <gtest/gtest.h>
-#include "weights.h"
-#include "tdict.h"
-#include "hg.h"
-
-using namespace std;
-
-class WeightsTest : public testing::Test {
- protected:
-  virtual void SetUp() { }
-  virtual void TearDown() { }
-};
-       
-
-TEST_F(WeightsTest,Load) {
-  Weights w;
-  w.InitFromFile("test_data/weights");
-  w.WriteToFile("-");
-}
-
-int main(int argc, char **argv) {
-  testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/decoder/wordid.h b/decoder/wordid.h
deleted file mode 100644
index fb50bcc1..00000000
--- a/decoder/wordid.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _WORD_ID_H_
-#define _WORD_ID_H_
-
-typedef int WordID;
-
-#endif
diff --git a/extools/Makefile.am b/extools/Makefile.am
index 1e82287d..ee363264 100644
--- a/extools/Makefile.am
+++ b/extools/Makefile.am
@@ -11,20 +11,20 @@ sg_lexer.cc: sg_lexer.l
 	$(LEX) -s -CF -8 -o$@ $<
 
 filter_grammar_SOURCES = filter_grammar.cc extract.cc sentence_pair.cc striped_grammar.cc sg_lexer.cc
-filter_grammar_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+filter_grammar_LDADD = $(top_srcdir)/utils/libutils.a -lz
 #filter_grammar_LDFLAGS = -all-static
 
 featurize_grammar_SOURCES = featurize_grammar.cc extract.cc sentence_pair.cc sg_lexer.cc striped_grammar.cc
-featurize_grammar_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+featurize_grammar_LDADD = $(top_srcdir)/utils/libutils.a -lz
 
 mr_stripe_rule_reduce_SOURCES = mr_stripe_rule_reduce.cc extract.cc sentence_pair.cc striped_grammar.cc sg_lexer.cc
-mr_stripe_rule_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+mr_stripe_rule_reduce_LDADD = $(top_srcdir)/utils/libutils.a -lz
 
 extractor_SOURCES = sentence_pair.cc extract.cc extractor.cc striped_grammar.cc
-extractor_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+extractor_LDADD = $(top_srcdir)/utils/libutils.a -lz
 
 extractor_monolingual_SOURCES = extractor_monolingual.cc
-extractor_monolingual_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+extractor_monolingual_LDADD = $(top_srcdir)/utils/libutils.a -lz
 
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils
 
diff --git a/extools/sg_lexer.l b/extools/sg_lexer.l
index 168b835a..d60bd0fc 100644
--- a/extools/sg_lexer.l
+++ b/extools/sg_lexer.l
@@ -1,6 +1,4 @@
 %{
-#include "rule_lexer.h"
-
 #include <string>
 #include <iostream>
 #include <sstream>
@@ -8,7 +6,6 @@
 #include <cassert>
 #include "tdict.h"
 #include "fdict.h"
-#include "trule.h"
 #include "striped_grammar.h"
 
 int lex_line = 0;
diff --git a/gi/clda/src/Makefile.am b/gi/clda/src/Makefile.am
index 688746bb..2b1393ac 100644
--- a/gi/clda/src/Makefile.am
+++ b/gi/clda/src/Makefile.am
@@ -2,5 +2,5 @@ bin_PROGRAMS = clda
 
 clda_SOURCES = clda.cc
 
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I$(top_srcdir)/decoder
-AM_LDFLAGS = $(top_srcdir)/decoder/libcdec.a -lz
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I$(top_srcdir)/utils
+AM_LDFLAGS = $(top_srcdir)/utils/libutils.a -lz
diff --git a/gi/pyp-topics/src/Makefile.am b/gi/pyp-topics/src/Makefile.am
index c22819db..d3f95d0b 100644
--- a/gi/pyp-topics/src/Makefile.am
+++ b/gi/pyp-topics/src/Makefile.am
@@ -4,13 +4,13 @@ contexts_lexer.cc: contexts_lexer.l
 	$(LEX) -s -CF -8 -o$@ $<
 
 pyp_topics_train_SOURCES = mt19937ar.c corpus.cc gzstream.cc pyp-topics.cc train.cc contexts_lexer.cc contexts_corpus.cc
-pyp_topics_train_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+pyp_topics_train_LDADD = $(top_srcdir)/utils/libutils.a -lz
 
 pyp_contexts_train_SOURCES = mt19937ar.c corpus.cc gzstream.cc pyp-topics.cc contexts_lexer.cc contexts_corpus.cc train-contexts.cc
-pyp_contexts_train_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+pyp_contexts_train_LDADD = $(top_srcdir)/utils/libutils.a -lz
 
 #mpi_pyp_contexts_train_SOURCES = mt19937ar.c corpus.cc gzstream.cc mpi-pyp-topics.cc contexts_lexer.cc contexts_corpus.cc mpi-train-contexts.cc
-#mpi_pyp_contexts_train_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+#mpi_pyp_contexts_train_LDADD = $(top_srcdir)/utils/libutils.a -lz
 
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I../../../utils
 
diff --git a/gi/pyp-topics/src/contexts_corpus.hh b/gi/pyp-topics/src/contexts_corpus.hh
index dd721361..b2d235cb 100644
--- a/gi/pyp-topics/src/contexts_corpus.hh
+++ b/gi/pyp-topics/src/contexts_corpus.hh
@@ -10,7 +10,7 @@
 
 #include "corpus.hh"
 #include "contexts_lexer.h"
-#include "../../../decoder/dict.h"
+#include "dict.h"
 
 
 class BackoffGenerator {
diff --git a/gi/pyp-topics/src/contexts_lexer.h b/gi/pyp-topics/src/contexts_lexer.h
index 1b79c6fd..66004990 100644
--- a/gi/pyp-topics/src/contexts_lexer.h
+++ b/gi/pyp-topics/src/contexts_lexer.h
@@ -5,7 +5,7 @@
 #include <vector>
 #include <string>
 
-#include "../../../decoder/dict.h" 
+#include "dict.h" 
 
 struct ContextsLexer {
   typedef std::vector<std::string> Context;
diff --git a/gi/pyp-topics/src/contexts_lexer.l b/gi/pyp-topics/src/contexts_lexer.l
index 7a5d9460..64cd7ca3 100644
--- a/gi/pyp-topics/src/contexts_lexer.l
+++ b/gi/pyp-topics/src/contexts_lexer.l
@@ -101,7 +101,7 @@ INT [\-+]?[0-9]+|inf|[\-+]inf
 
 %%
 
-#include "../../../decoder/filelib.h" 
+#include "filelib.h" 
 
 void ContextsLexer::ReadContexts(std::istream* in, ContextsLexer::ContextsCallback func, void* extra) {
   lex_line = 1;
diff --git a/mteval/Makefile.am b/mteval/Makefile.am
new file mode 100644
index 00000000..7ae14045
--- /dev/null
+++ b/mteval/Makefile.am
@@ -0,0 +1,23 @@
+bin_PROGRAMS = \
+  fast_score \
+  mbr_kbest
+
+if HAVE_GTEST
+noinst_PROGRAMS = \
+  scorer_test
+endif
+
+noinst_LIBRARIES = libmteval.a
+
+libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc
+
+fast_score_SOURCES = fast_score.cc
+fast_score_LDADD = $(top_srcdir)/utils/libutils.a libmteval.a -lz
+
+mbr_kbest_SOURCES = mbr_kbest.cc
+mbr_kbest_LDADD = $(top_srcdir)/utils/libutils.a libmteval.a -lz
+
+scorer_test_SOURCES = scorer_test.cc
+scorer_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/utils/libutils.a libmteval.a -lz
+
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils
diff --git a/mteval/aer_scorer.cc b/mteval/aer_scorer.cc
new file mode 100644
index 00000000..edd4390f
--- /dev/null
+++ b/mteval/aer_scorer.cc
@@ -0,0 +1,135 @@
+#include "aer_scorer.h"
+
+#include <cmath>
+#include <cassert>
+#include <sstream>
+
+#include "tdict.h"
+#include "alignment_pharaoh.h"
+
+using namespace std;
+
+class AERScore : public ScoreBase<AERScore> {
+  friend class AERScorer;
+ public:
+  AERScore() : num_matches(), num_predicted(), num_in_ref() {}
+  AERScore(int m, int p, int r) :
+    num_matches(m), num_predicted(p), num_in_ref(r) {}
+  virtual void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){}
+  virtual void PlusEquals(const Score& delta, const float scale) {
+    const AERScore& other = static_cast<const AERScore&>(delta);
+    num_matches   += scale*other.num_matches;
+    num_predicted += scale*other.num_predicted;
+    num_in_ref    += scale*other.num_in_ref;
+  }
+ virtual void PlusEquals(const Score& delta) {
+    const AERScore& other = static_cast<const AERScore&>(delta);
+    num_matches   += other.num_matches;
+    num_predicted += other.num_predicted;
+    num_in_ref    += other.num_in_ref;
+  }
+
+
+  virtual ScoreP GetZero() const {
+    return ScoreP(new AERScore);
+  }
+  virtual ScoreP GetOne() const {
+    return ScoreP(new AERScore);
+  }
+  virtual void Subtract(const Score& rhs, Score* out) const {
+    AERScore* res = static_cast<AERScore*>(out);
+    const AERScore& other = static_cast<const AERScore&>(rhs);
+    res->num_matches   = num_matches   - other.num_matches;
+    res->num_predicted = num_predicted - other.num_predicted;
+    res->num_in_ref    = num_in_ref    - other.num_in_ref;
+  }
+  float Precision() const {
+    return static_cast<float>(num_matches) / num_predicted;
+  }
+  float Recall() const {
+    return static_cast<float>(num_matches) / num_in_ref;
+  }
+  float ComputePartialScore() const { return 0.0;}
+  virtual float ComputeScore() const {
+    const float prec = Precision();
+    const float rec = Recall();
+    const float f = (2.0 * prec * rec) / (rec + prec);
+    if (isnan(f)) return 1.0f;
+    return 1.0f - f;
+  }
+  virtual bool IsAdditiveIdentity() const {
+    return (num_matches == 0) && (num_predicted == 0) && (num_in_ref == 0);
+  }
+  virtual void ScoreDetails(std::string* out) const {
+    ostringstream os;
+    os << "AER=" << (ComputeScore() * 100.0)
+       << " F=" << (100 - ComputeScore() * 100.0)
+       << " P=" << (Precision() * 100.0) << " R=" << (Recall() * 100.0)
+       << " [" << num_matches << " " << num_predicted << " " << num_in_ref << "]";
+    *out = os.str();
+  }
+  virtual void Encode(std::string*out) const {
+    out->resize(sizeof(int) * 3);
+    *(int *)&(*out)[sizeof(int) * 0] = num_matches;
+    *(int *)&(*out)[sizeof(int) * 1] = num_predicted;
+    *(int *)&(*out)[sizeof(int) * 2] = num_in_ref;
+  }
+ private:
+  int num_matches;
+  int num_predicted;
+  int num_in_ref;
+};
+
+AERScorer::AERScorer(const vector<vector<WordID> >& refs, const string& src) : src_(src) {
+  if (refs.size() != 1) {
+    cerr << "AERScorer can only take a single reference!\n";
+    abort();
+  }
+  ref_ = AlignmentPharaoh::ReadPharaohAlignmentGrid(TD::GetString(refs.front()));
+}
+
+static inline bool Safe(const Array2D<bool>& a, int i, int j) {
+  if (i >= 0 && j >= 0 && i < a.width() && j < a.height())
+    return a(i,j);
+  else
+    return false;
+}
+
+ScoreP AERScorer::ScoreCCandidate(const vector<WordID>& shyp) const {
+  return ScoreP();
+}
+
+ScoreP AERScorer::ScoreCandidate(const vector<WordID>& shyp) const {
+  boost::shared_ptr<Array2D<bool> > hyp =
+    AlignmentPharaoh::ReadPharaohAlignmentGrid(TD::GetString(shyp));
+
+  int m = 0;
+  int r = 0;
+  int p = 0;
+  int i_len = ref_->width();
+  int j_len = ref_->height();
+  for (int i = 0; i < i_len; ++i) {
+    for (int j = 0; j < j_len; ++j) {
+      if ((*ref_)(i,j)) {
+        ++r;
+        if (Safe(*hyp, i, j)) ++m;
+      }
+    }
+  }
+  for (int i = 0; i < hyp->width(); ++i)
+    for (int j = 0; j < hyp->height(); ++j)
+      if ((*hyp)(i,j)) ++p;
+
+  return ScoreP(new AERScore(m,p,r));
+}
+
+ScoreP AERScorer::ScoreFromString(const string& in) {
+  AERScore* res = new AERScore;
+  res->num_matches   = *(const int *)&in[sizeof(int) * 0];
+  res->num_predicted = *(const int *)&in[sizeof(int) * 1];
+  res->num_in_ref    = *(const int *)&in[sizeof(int) * 2];
+  return ScoreP(res);
+}
+
+const std::string* AERScorer::GetSource() const { return &src_; }
+
diff --git a/mteval/aer_scorer.h b/mteval/aer_scorer.h
new file mode 100644
index 00000000..6d53d359
--- /dev/null
+++ b/mteval/aer_scorer.h
@@ -0,0 +1,23 @@
+#ifndef _AER_SCORER_
+#define _AER_SCORER_
+
+#include <boost/shared_ptr.hpp>
+
+#include "scorer.h"
+#include "array2d.h"
+
+class AERScorer : public SentenceScorer {
+ public:
+  // when constructing alignment strings from a hypergraph, the source
+  // is necessary.
+  AERScorer(const std::vector<std::vector<WordID> >& refs, const std::string& src = "");
+  ScoreP ScoreCandidate(const std::vector<WordID>& hyp) const;
+  ScoreP ScoreCCandidate(const std::vector<WordID>& hyp) const;
+  static ScoreP ScoreFromString(const std::string& in);
+  const std::string* GetSource() const;
+ private:
+  std::string src_;
+  boost::shared_ptr<Array2D<bool> > ref_;
+};
+
+#endif
diff --git a/mteval/comb_scorer.cc b/mteval/comb_scorer.cc
new file mode 100644
index 00000000..9fc37868
--- /dev/null
+++ b/mteval/comb_scorer.cc
@@ -0,0 +1,97 @@
+#include "comb_scorer.h"
+
+#include <cstdio>
+
+using namespace std;
+
+class BLEUTERCombinationScore : public ScoreBase<BLEUTERCombinationScore> {
+  friend class BLEUTERCombinationScorer;
+ public:
+  ~BLEUTERCombinationScore();
+  float ComputePartialScore() const { return 0.0;}
+  float ComputeScore() const {
+    return (bleu->ComputeScore() - ter->ComputeScore()) / 2.0f;
+  }
+  void ScoreDetails(string* details) const {
+    char buf[160];
+    sprintf(buf, "Combi = %.2f, BLEU = %.2f, TER = %.2f",
+      ComputeScore()*100.0f, bleu->ComputeScore()*100.0f, ter->ComputeScore()*100.0f);
+    *details = buf;
+  }
+  void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){}
+
+  void PlusEquals(const Score& delta, const float scale) {
+    bleu->PlusEquals(*static_cast<const BLEUTERCombinationScore&>(delta).bleu, scale);
+    ter->PlusEquals(*static_cast<const BLEUTERCombinationScore&>(delta).ter, scale);
+  }
+  void PlusEquals(const Score& delta) {
+    bleu->PlusEquals(*static_cast<const BLEUTERCombinationScore&>(delta).bleu);
+    ter->PlusEquals(*static_cast<const BLEUTERCombinationScore&>(delta).ter);
+  }
+
+
+
+  ScoreP GetOne() const {
+    BLEUTERCombinationScore* res = new BLEUTERCombinationScore;
+    res->bleu = bleu->GetOne();
+    res->ter = ter->GetOne();
+    return ScoreP(res);
+  }
+  ScoreP GetZero() const {
+    BLEUTERCombinationScore* res = new BLEUTERCombinationScore;
+    res->bleu = bleu->GetZero();
+    res->ter = ter->GetZero();
+    return ScoreP(res);
+  }
+  void Subtract(const Score& rhs, Score* res) const {
+    bleu->Subtract(*static_cast<const BLEUTERCombinationScore&>(rhs).bleu,
+                   static_cast<BLEUTERCombinationScore*>(res)->bleu.get());
+    ter->Subtract(*static_cast<const BLEUTERCombinationScore&>(rhs).ter,
+                  static_cast<BLEUTERCombinationScore*>(res)->ter.get());
+  }
+  void Encode(std::string* out) const {
+    string bs, ts;
+    bleu->Encode(&bs);
+    ter->Encode(&ts);
+    out->clear();
+    (*out) += static_cast<char>(bs.size());
+    (*out) += bs;
+    (*out) += ts;
+  }
+  bool IsAdditiveIdentity() const {
+    return bleu->IsAdditiveIdentity() && ter->IsAdditiveIdentity();
+  }
+ private:
+  ScoreP bleu;
+  ScoreP ter;
+};
+
+BLEUTERCombinationScore::~BLEUTERCombinationScore() {
+}
+
+BLEUTERCombinationScorer::BLEUTERCombinationScorer(const vector<vector<WordID> >& refs) {
+  bleu_ = SentenceScorer::CreateSentenceScorer(IBM_BLEU, refs);
+  ter_ = SentenceScorer::CreateSentenceScorer(TER, refs);
+}
+
+BLEUTERCombinationScorer::~BLEUTERCombinationScorer() {
+}
+
+ScoreP BLEUTERCombinationScorer::ScoreCCandidate(const vector<WordID>& hyp) const {
+  return ScoreP();
+}
+
+ScoreP BLEUTERCombinationScorer::ScoreCandidate(const std::vector<WordID>& hyp) const {
+  BLEUTERCombinationScore* res = new BLEUTERCombinationScore;
+  res->bleu = bleu_->ScoreCandidate(hyp);
+  res->ter = ter_->ScoreCandidate(hyp);
+  return ScoreP(res);
+}
+
+ScoreP BLEUTERCombinationScorer::ScoreFromString(const std::string& in) {
+  int bss = in[0];
+  BLEUTERCombinationScore* r = new BLEUTERCombinationScore;
+  r->bleu = SentenceScorer::CreateScoreFromString(IBM_BLEU, in.substr(1, bss));
+  r->ter = SentenceScorer::CreateScoreFromString(TER, in.substr(1 + bss));
+  return ScoreP(r);
+}
diff --git a/mteval/comb_scorer.h b/mteval/comb_scorer.h
new file mode 100644
index 00000000..346be576
--- /dev/null
+++ b/mteval/comb_scorer.h
@@ -0,0 +1,17 @@
+#ifndef _COMB_SCORER_
+#define _COMB_SCORER_
+
+#include "scorer.h"
+
+class BLEUTERCombinationScorer : public SentenceScorer {
+ public:
+  BLEUTERCombinationScorer(const std::vector<std::vector<WordID> >& refs);
+  ~BLEUTERCombinationScorer();
+  ScoreP ScoreCandidate(const std::vector<WordID>& hyp) const;
+  ScoreP ScoreCCandidate(const std::vector<WordID>& hyp) const;
+  static ScoreP ScoreFromString(const std::string& in);
+ private:
+  ScorerP bleu_,ter_;
+};
+
+#endif
diff --git a/mteval/fast_score.cc b/mteval/fast_score.cc
new file mode 100644
index 00000000..5ee264a6
--- /dev/null
+++ b/mteval/fast_score.cc
@@ -0,0 +1,72 @@
+#include <iostream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "filelib.h"
+#include "tdict.h"
+#include "scorer.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation(s) (tokenized text file)")
+        ("loss_function,l",po::value<string>()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)")
+        ("in_file,i", po::value<string>()->default_value("-"), "Input file")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = false;
+  if (!conf->count("reference")) {
+    cerr << "Please specify one or more references using -r <REF1.TXT> -r <REF2.TXT> ...\n";
+    flag = true;
+  }
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  const string loss_function = conf["loss_function"].as<string>();
+  ScoreType type = ScoreTypeFromString(loss_function);
+  DocScorer ds(type, conf["reference"].as<vector<string> >(), "");
+  cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl;
+
+  ReadFile rf(conf["in_file"].as<string>());
+  ScoreP acc;
+  istream& in = *rf.stream();
+  int lc = 0;
+  while(in) {
+    string line;
+    getline(in, line);
+    if (line.empty() && !in) break;
+    vector<WordID> sent;
+    TD::ConvertSentence(line, &sent);
+    ScoreP sentscore = ds[lc]->ScoreCandidate(sent);
+    if (!acc) { acc = sentscore->GetZero(); }
+    acc->PlusEquals(*sentscore);
+    ++lc;
+  }
+  assert(lc > 0);
+  if (lc > ds.size()) {
+    cerr << "Too many (" << lc << ") translations in input, expected " << ds.size() << endl;
+    return 1;
+  }
+  if (lc != ds.size())
+    cerr << "Fewer sentences in hyp (" << lc << ") than refs ("
+         << ds.size() << "): scoring partial set!\n";
+  float score = acc->ComputeScore();
+  string details;
+  acc->ScoreDetails(&details);
+  cerr << details << endl;
+  cout << score << endl;
+  return 0;
+}
diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc
new file mode 100644
index 00000000..2867b36b
--- /dev/null
+++ b/mteval/mbr_kbest.cc
@@ -0,0 +1,138 @@
+#include <iostream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+
+#include "prob.h"
+#include "tdict.h"
+#include "scorer.h"
+#include "filelib.h"
+#include "stringlib.h"
+
+using namespace std;
+
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("scale,a",po::value<double>()->default_value(1.0), "Posterior scaling factor (alpha)")
+        ("loss_function,l",po::value<string>()->default_value("bleu"), "Loss function")
+        ("input,i",po::value<string>()->default_value("-"), "File to read k-best lists from")
+        ("output_list,L", "Show reranked list as output")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = false;
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+struct LossComparer {
+  bool operator()(const pair<vector<WordID>, double>& a, const pair<vector<WordID>, double>& b) const {
+    return a.second < b.second;
+  }
+};
+
+bool ReadKBestList(istream* in, string* sent_id, vector<pair<vector<WordID>, prob_t> >* list) {
+  static string cache_id;
+  static pair<vector<WordID>, prob_t> cache_pair;
+  list->clear();
+  string cur_id;
+  if (cache_pair.first.size() > 0) {
+    list->push_back(cache_pair);
+    cur_id = cache_id;
+    cache_pair.first.clear();
+  }
+  string line;
+  string tstr;
+  while(*in) {
+    getline(*in, line);
+    if (line.empty()) continue;
+    size_t p1 = line.find(" ||| ");
+    if (p1 == string::npos) { cerr << "Bad format: " << line << endl; abort(); }
+    size_t p2 = line.find(" ||| ", p1 + 4);
+    if (p2 == string::npos) { cerr << "Bad format: " << line << endl; abort(); }
+    size_t p3 = line.rfind(" ||| ");
+    cache_id = line.substr(0, p1);
+    tstr = line.substr(p1 + 5, p2 - p1 - 5);
+    double val = strtod(line.substr(p3 + 5).c_str(), NULL);
+    TD::ConvertSentence(tstr, &cache_pair.first);
+    cache_pair.second.logeq(val);
+    if (cur_id.empty()) cur_id = cache_id;
+    if (cur_id == cache_id) {
+      list->push_back(cache_pair);
+      *sent_id = cur_id;
+      cache_pair.first.clear();
+    } else { break; }
+  }
+  return !list->empty();
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  const string metric = conf["loss_function"].as<string>();
+  const bool output_list = conf.count("output_list") > 0;
+  const string file = conf["input"].as<string>();
+  const double mbr_scale = conf["scale"].as<double>();
+  cerr << "Posterior scaling factor (alpha) = " << mbr_scale << endl;
+
+  ScoreType type = ScoreTypeFromString(metric);
+  vector<pair<vector<WordID>, prob_t> > list;
+  ReadFile rf(file);
+  string sent_id;
+  while(ReadKBestList(rf.stream(), &sent_id, &list)) {
+    vector<prob_t> joints(list.size());
+    const prob_t max_score = pow(list.front().second, mbr_scale);
+    prob_t marginal = prob_t::Zero();
+    for (int i = 0 ; i < list.size(); ++i) {
+      const prob_t joint = pow(list[i].second, mbr_scale) / max_score;
+      joints[i] = joint;
+      // cerr << "list[" << i << "] joint=" << log(joint) << endl;
+      marginal += joint;
+    }
+    int mbr_idx = -1;
+    vector<double> mbr_scores(output_list ? list.size() : 0);
+    double mbr_loss = numeric_limits<double>::max();
+    for (int i = 0 ; i < list.size(); ++i) {
+      vector<vector<WordID> > refs(1, list[i].first);
+      //cerr << i << ": " << list[i].second <<"\t" << TD::GetString(list[i].first) << endl;
+      ScorerP scorer = SentenceScorer::CreateSentenceScorer(type, refs);
+      double wl_acc = 0;
+      for (int j = 0; j < list.size(); ++j) {
+        if (i != j) {
+          ScoreP s = scorer->ScoreCandidate(list[j].first);
+          double loss = 1.0 - s->ComputeScore();
+          if (type == TER || type == AER) loss = 1.0 - loss;
+          double weighted_loss = loss * (joints[j] / marginal);
+          wl_acc += weighted_loss;
+          if ((!output_list) && wl_acc > mbr_loss) break;
+        }
+      }
+      if (output_list) mbr_scores[i] = wl_acc;
+      if (wl_acc < mbr_loss) {
+        mbr_loss = wl_acc;
+        mbr_idx = i;
+      }
+    }
+    // cerr << "ML translation: " << TD::GetString(list[0].first) << endl;
+    cerr << "MBR Best idx: " << mbr_idx << endl;
+    if (output_list) {
+      for (int i = 0; i < list.size(); ++i)
+        list[i].second.logeq(mbr_scores[i]);
+      sort(list.begin(), list.end(), LossComparer());
+      for (int i = 0; i < list.size(); ++i)
+        cout << sent_id << " ||| "
+             << TD::GetString(list[i].first) << " ||| "
+             << log(list[i].second) << endl;
+    } else {
+      cout << TD::GetString(list[mbr_idx].first) << endl;
+    }
+  }
+  return 0;
+}
+
diff --git a/mteval/scorer.cc b/mteval/scorer.cc
new file mode 100644
index 00000000..04eeaa93
--- /dev/null
+++ b/mteval/scorer.cc
@@ -0,0 +1,630 @@
+#include "scorer.h"
+
+#include <boost/lexical_cast.hpp>
+#include <map>
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <cstdio>
+#include <valarray>
+#include <algorithm>
+
+#include <boost/shared_ptr.hpp>
+
+#include "filelib.h"
+#include "ter.h"
+#include "aer_scorer.h"
+#include "comb_scorer.h"
+#include "tdict.h"
+#include "stringlib.h"
+
+using boost::shared_ptr;
+using namespace std;
+
+void Score::TimesEquals(float scale) {
+  cerr<<"UNIMPLEMENTED except for BLEU (for MIRA): Score::TimesEquals"<<endl;abort();
+}
+
+ScoreType ScoreTypeFromString(const string& st) {
+  const string sl = LowercaseString(st);
+  if (sl == "ser")
+    return SER;
+  if (sl == "ter")
+    return TER;
+  if (sl == "aer")
+    return AER;
+  if (sl == "bleu" || sl == "ibm_bleu")
+    return IBM_BLEU;
+  if (sl == "ibm_bleu_3")
+    return IBM_BLEU_3;
+  if (sl == "nist_bleu")
+    return NIST_BLEU;
+  if (sl == "koehn_bleu")
+    return Koehn_BLEU;
+  if (sl == "combi")
+    return BLEU_minus_TER_over_2;
+  cerr << "Don't understand score type '" << st << "', defaulting to ibm_bleu.\n";
+  return IBM_BLEU;
+}
+
+static char const* score_names[]={
+  "IBM_BLEU", "NIST_BLEU", "Koehn_BLEU", "TER", "BLEU_minus_TER_over_2", "SER", "AER", "IBM_BLEU_3"
+};
+
+std::string StringFromScoreType(ScoreType st) {
+  assert(st>=0 && st<sizeof(score_names)/sizeof(score_names[0]));
+  return score_names[(int)st];
+}
+
+
+Score::~Score() {}
+SentenceScorer::~SentenceScorer() {}
+
+struct length_accum {
+  template <class S>
+  float operator()(float sum,S const& ref) const {
+    return sum+ref.size();
+  }
+};
+
+template <class S>
+float avg_reflength(vector<S> refs) {
+  unsigned n=refs.size();
+  return n?accumulate(refs.begin(),refs.end(),0.,length_accum())/n:0.;
+}
+
+
+float SentenceScorer::ComputeRefLength(const Sentence &hyp) const {
+  return hyp.size(); // reasonable default? :)
+}
+
+const std::string* SentenceScorer::GetSource() const { return NULL; }
+
+class SERScore : public ScoreBase<SERScore> {
+  friend class SERScorer;
+ public:
+  SERScore() : correct(0), total(0) {}
+  float ComputePartialScore() const { return 0.0;}
+  float ComputeScore() const {
+    return static_cast<float>(correct) / static_cast<float>(total);
+  }
+  void ScoreDetails(string* details) const {
+    ostringstream os;
+    os << "SER= " << ComputeScore() << " (" << correct << '/' << total << ')';
+    *details = os.str();
+  }
+  void PlusPartialEquals(const Score& /* delta */, int /* oracle_e_cover */, int /* oracle_f_cover */, int /* src_len */){}
+
+  void PlusEquals(const Score& delta, const float scale) {
+    correct += scale*static_cast<const SERScore&>(delta).correct;
+    total += scale*static_cast<const SERScore&>(delta).total;
+  }
+  void PlusEquals(const Score& delta) {
+    correct += static_cast<const SERScore&>(delta).correct;
+    total += static_cast<const SERScore&>(delta).total;
+    }
+  ScoreP GetZero() const { return ScoreP(new SERScore); }
+  ScoreP GetOne() const { return ScoreP(new SERScore); }
+  void Subtract(const Score& rhs, Score* res) const {
+    SERScore* r = static_cast<SERScore*>(res);
+    r->correct = correct - static_cast<const SERScore&>(rhs).correct;
+    r->total = total - static_cast<const SERScore&>(rhs).total;
+  }
+  void Encode(string* out) const {
+    assert(!"not implemented");
+  }
+  bool IsAdditiveIdentity() const {
+    return (total == 0 && correct == 0);  // correct is always 0 <= n <= total
+  }
+ private:
+  int correct, total;
+};
+
+std::string SentenceScorer::verbose_desc() const {
+  return desc+",ref0={ "+TD::GetString(refs[0])+" }";
+}
+
+class SERScorer : public SentenceScorer {
+ public:
+  SERScorer(const vector<vector<WordID> >& references) : SentenceScorer("SERScorer",references),refs_(references) {}
+  ScoreP ScoreCCandidate(const vector<WordID>& /* hyp */) const {
+    return ScoreP();
+  }
+  ScoreP ScoreCandidate(const vector<WordID>& hyp) const {
+    SERScore* res = new SERScore;
+    res->total = 1;
+    for (int i = 0; i < refs_.size(); ++i)
+      if (refs_[i] == hyp) res->correct = 1;
+    return ScoreP(res);
+  }
+  static ScoreP ScoreFromString(const string& data) {
+    assert(!"Not implemented");
+  }
+ private:
+  vector<vector<WordID> > refs_;
+};
+
+class BLEUScore : public ScoreBase<BLEUScore> {
+  friend class BLEUScorerBase;
+ public:
+  BLEUScore(int n) : correct_ngram_hit_counts(float(0),n), hyp_ngram_counts(float(0),n) {
+    ref_len = 0;
+    hyp_len = 0; }
+  BLEUScore(int n, int k) :  correct_ngram_hit_counts(float(k),n), hyp_ngram_counts(float(k),n) {
+    ref_len = k;
+    hyp_len = k; }
+  float ComputeScore() const;
+  float ComputePartialScore() const;
+  void ScoreDetails(string* details) const;
+  void TimesEquals(float scale);
+  void PlusEquals(const Score& delta);
+  void PlusEquals(const Score& delta, const float scale);
+  void PlusPartialEquals(const Score& delta, int oracle_e_cover, int oracle_f_cover, int src_len);
+  ScoreP GetZero() const;
+  ScoreP GetOne() const;
+  void Subtract(const Score& rhs, Score* res) const;
+  void Encode(string* out) const;
+  bool IsAdditiveIdentity() const {
+    if (fabs(ref_len) > 0.1f || hyp_len != 0) return false;
+    for (int i = 0; i < correct_ngram_hit_counts.size(); ++i)
+      if (hyp_ngram_counts[i] != 0 ||
+        correct_ngram_hit_counts[i] != 0) return false;
+    return true;
+  }
+ private:
+  int N() const {
+    return hyp_ngram_counts.size();
+  }
+  float ComputeScore(vector<float>* precs, float* bp) const;
+  float ComputePartialScore(vector<float>* prec, float* bp) const;
+  valarray<float> correct_ngram_hit_counts;
+  valarray<float> hyp_ngram_counts;
+  float ref_len;
+  float hyp_len;
+};
+
+class BLEUScorerBase : public SentenceScorer {
+ public:
+  BLEUScorerBase(const vector<vector<WordID> >& references,
+                 int n
+             );
+  ScoreP ScoreCandidate(const vector<WordID>& hyp) const;
+  ScoreP ScoreCCandidate(const vector<WordID>& hyp) const;
+  static ScoreP ScoreFromString(const string& in);
+
+  virtual float ComputeRefLength(const vector<WordID>& hyp) const = 0;
+ private:
+  struct NGramCompare {
+    int operator() (const vector<WordID>& a, const vector<WordID>& b) {
+      size_t as = a.size();
+      size_t bs = b.size();
+      const size_t s = (as < bs ? as : bs);
+      for (size_t i = 0; i < s; ++i) {
+         int d = a[i] - b[i];
+         if (d < 0) return true;
+	 if (d > 0) return false;
+      }
+      return as < bs;
+    }
+  };
+  typedef map<vector<WordID>, pair<int,int>, NGramCompare> NGramCountMap;
+  void CountRef(const vector<WordID>& ref) {
+    NGramCountMap tc;
+    vector<WordID> ngram(n_);
+    int s = ref.size();
+    for (int j=0; j<s; ++j) {
+      int remaining = s-j;
+      int k = (n_ < remaining ? n_ : remaining);
+      ngram.clear();
+      for (int i=1; i<=k; ++i) {
+	ngram.push_back(ref[j + i - 1]);
+        tc[ngram].first++;
+      }
+    }
+    for (NGramCountMap::iterator i = tc.begin(); i != tc.end(); ++i) {
+      pair<int,int>& p = ngrams_[i->first];
+      if (p.first < i->second.first)
+        p = i->second;
+    }
+  }
+
+  void ComputeNgramStats(const vector<WordID>& sent,
+			 valarray<float>* correct,
+			 valarray<float>* hyp,
+			 bool clip_counts)
+    const {
+    assert(correct->size() == n_);
+    assert(hyp->size() == n_);
+    vector<WordID> ngram(n_);
+    (*correct) *= 0;
+    (*hyp) *= 0;
+    int s = sent.size();
+    for (int j=0; j<s; ++j) {
+      int remaining = s-j;
+      int k = (n_ < remaining ? n_ : remaining);
+      ngram.clear();
+      for (int i=1; i<=k; ++i) {
+	ngram.push_back(sent[j + i - 1]);
+        pair<int,int>& p = ngrams_[ngram];
+	if(clip_counts){
+	  if (p.second < p.first) {
+	    ++p.second;
+	    (*correct)[i-1]++;
+	  }}
+	else {
+	  ++p.second;
+	  (*correct)[i-1]++;
+	}
+	// if the 1 gram isn't found, don't try to match don't need to match any 2- 3- .. grams:
+	if (!p.first) {
+	  for (; i<=k; ++i)
+	    (*hyp)[i-1]++;
+	} else {
+          (*hyp)[i-1]++;
+        }
+      }
+    }
+  }
+
+  mutable NGramCountMap ngrams_;
+  int n_;
+  vector<int> lengths_;
+};
+
+ScoreP BLEUScorerBase::ScoreFromString(const string& in) {
+  istringstream is(in);
+  int n;
+  is >> n;
+  BLEUScore* r = new BLEUScore(n);
+  is >> r->ref_len >> r->hyp_len;
+
+  for (int i = 0; i < n; ++i) {
+    is >> r->correct_ngram_hit_counts[i];
+    is >> r->hyp_ngram_counts[i];
+  }
+  return ScoreP(r);
+}
+
+class IBM_BLEUScorer : public BLEUScorerBase {
+ public:
+    IBM_BLEUScorer(const vector<vector<WordID> >& references,
+		   int n=4) : BLEUScorerBase(references, n), lengths_(references.size()) {
+   for (int i=0; i < references.size(); ++i)
+     lengths_[i] = references[i].size();
+ }
+  float ComputeRefLength(const vector<WordID>& hyp) const {
+    if (lengths_.size() == 1) return lengths_[0];
+    int bestd = 2000000;
+    int hl = hyp.size();
+    int bl = -1;
+    for (vector<int>::const_iterator ci = lengths_.begin(); ci != lengths_.end(); ++ci) {
+      int cl = *ci;
+      if (abs(cl - hl) < bestd) {
+        bestd = abs(cl - hl);
+        bl = cl;
+      }
+    }
+    return bl;
+  }
+ private:
+  vector<int> lengths_;
+};
+
+class NIST_BLEUScorer : public BLEUScorerBase {
+ public:
+    NIST_BLEUScorer(const vector<vector<WordID> >& references,
+                    int n=4) : BLEUScorerBase(references, n),
+		    shortest_(references[0].size()) {
+   for (int i=1; i < references.size(); ++i)
+     if (references[i].size() < shortest_)
+       shortest_ = references[i].size();
+ }
+  float ComputeRefLength(const vector<WordID>& /* hyp */) const {
+    return shortest_;
+  }
+ private:
+  float shortest_;
+};
+
+class Koehn_BLEUScorer : public BLEUScorerBase {
+ public:
+    Koehn_BLEUScorer(const vector<vector<WordID> >& references,
+                     int n=4) : BLEUScorerBase(references, n),
+                     avg_(0) {
+   for (int i=0; i < references.size(); ++i)
+     avg_ += references[i].size();
+   avg_ /= references.size();
+ }
+  float ComputeRefLength(const vector<WordID>& /* hyp */) const {
+    return avg_;
+  }
+ private:
+  float avg_;
+};
+
+ScorerP SentenceScorer::CreateSentenceScorer(const ScoreType type,
+      const vector<vector<WordID> >& refs,
+      const string& src)
+{
+  SentenceScorer *r=0;
+  switch (type) {
+  case IBM_BLEU: r = new IBM_BLEUScorer(refs, 4);break;
+  case IBM_BLEU_3 : r = new IBM_BLEUScorer(refs,3);break;
+    case NIST_BLEU: r = new NIST_BLEUScorer(refs, 4);break;
+    case Koehn_BLEU: r = new Koehn_BLEUScorer(refs, 4);break;
+    case AER: r = new AERScorer(refs, src);break;
+    case TER: r = new TERScorer(refs);break;
+    case SER: r = new SERScorer(refs);break;
+    case BLEU_minus_TER_over_2: r = new BLEUTERCombinationScorer(refs);break;
+    default:
+      assert(!"Not implemented!");
+  }
+  return ScorerP(r);
+}
+
+ScoreP SentenceScorer::GetOne() const {
+  Sentence s;
+  return ScoreCCandidate(s)->GetOne();
+}
+
+ScoreP SentenceScorer::GetZero() const {
+  Sentence s;
+  return ScoreCCandidate(s)->GetZero();
+}
+
+ScoreP Score::GetOne(ScoreType type) {
+  std::vector<SentenceScorer::Sentence > refs;
+  return SentenceScorer::CreateSentenceScorer(type,refs)->GetOne();
+}
+
+ScoreP Score::GetZero(ScoreType type) {
+  std::vector<SentenceScorer::Sentence > refs;
+  return SentenceScorer::CreateSentenceScorer(type,refs)->GetZero();
+}
+
+
+ScoreP SentenceScorer::CreateScoreFromString(const ScoreType type, const string& in) {
+  switch (type) {
+    case IBM_BLEU:
+  case IBM_BLEU_3:
+    case NIST_BLEU:
+    case Koehn_BLEU:
+      return BLEUScorerBase::ScoreFromString(in);
+    case TER:
+      return TERScorer::ScoreFromString(in);
+    case AER:
+      return AERScorer::ScoreFromString(in);
+    case SER:
+      return SERScorer::ScoreFromString(in);
+    case BLEU_minus_TER_over_2:
+      return BLEUTERCombinationScorer::ScoreFromString(in);
+    default:
+      assert(!"Not implemented!");
+  }
+}
+
+void BLEUScore::ScoreDetails(string* details) const {
+  char buf[2000];
+  vector<float> precs(max(N(),4));
+  float bp;
+  float bleu = ComputeScore(&precs, &bp);
+  for (int i=N();i<4;++i)
+    precs[i]=0.;
+  char *bufn;
+  bufn=buf+sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)",
+       bleu*100.0,
+       precs[0]*100.0,
+       precs[1]*100.0,
+       precs[2]*100.0,
+       precs[3]*100.0,
+       bp);
+  *details = buf;
+}
+
+float BLEUScore::ComputeScore(vector<float>* precs, float* bp) const {
+  float log_bleu = 0;
+  if (precs) precs->clear();
+  int count = 0;
+  for (int i = 0; i < N(); ++i) {
+    if (hyp_ngram_counts[i] > 0) {
+      float lprec = log(correct_ngram_hit_counts[i]) - log(hyp_ngram_counts[i]);
+      if (precs) precs->push_back(exp(lprec));
+      log_bleu += lprec;
+      ++count;
+    }
+  }
+  log_bleu /= static_cast<float>(count);
+  float lbp = 0.0;
+  if (hyp_len < ref_len)
+    lbp = (hyp_len - ref_len) / hyp_len;
+  log_bleu += lbp;
+  if (bp) *bp = exp(lbp);
+  return exp(log_bleu);
+}
+
+
+//comptue scaled score for oracle retrieval
+float BLEUScore::ComputePartialScore(vector<float>* precs, float* bp) const {
+  // cerr << "Then here " << endl;
+  float log_bleu = 0;
+  if (precs) precs->clear();
+  int count = 0;
+  for (int i = 0; i < N(); ++i) {
+    //  cerr << "In CPS " << hyp_ngram_counts[i] << " " << correct_ngram_hit_counts[i] << endl;
+    if (hyp_ngram_counts[i] > 0) {
+      float lprec = log(correct_ngram_hit_counts[i]) - log(hyp_ngram_counts[i]);
+      if (precs) precs->push_back(exp(lprec));
+      log_bleu += lprec;
+      ++count;
+    }
+  }
+  log_bleu /= static_cast<float>(count);
+  float lbp = 0.0;
+  if (hyp_len < ref_len)
+    lbp = (hyp_len - ref_len) / hyp_len;
+  log_bleu += lbp;
+  if (bp) *bp = exp(lbp);
+  return exp(log_bleu);
+}
+
+float BLEUScore::ComputePartialScore() const {
+  // cerr << "In here first " << endl;
+  return ComputePartialScore(NULL, NULL);
+}
+
+float BLEUScore::ComputeScore() const {
+  return ComputeScore(NULL, NULL);
+}
+
+void BLEUScore::Subtract(const Score& rhs, Score* res) const {
+  const BLEUScore& d = static_cast<const BLEUScore&>(rhs);
+  BLEUScore* o = static_cast<BLEUScore*>(res);
+  o->ref_len = ref_len - d.ref_len;
+  o->hyp_len = hyp_len - d.hyp_len;
+  o->correct_ngram_hit_counts = correct_ngram_hit_counts - d.correct_ngram_hit_counts;
+  o->hyp_ngram_counts = hyp_ngram_counts - d.hyp_ngram_counts;
+}
+
+void BLEUScore::PlusEquals(const Score& delta) {
+  const BLEUScore& d = static_cast<const BLEUScore&>(delta);
+  correct_ngram_hit_counts += d.correct_ngram_hit_counts;
+  hyp_ngram_counts += d.hyp_ngram_counts;
+  ref_len += d.ref_len;
+  hyp_len += d.hyp_len;
+}
+
+void BLEUScore::TimesEquals(float scale) {
+  correct_ngram_hit_counts *= scale;
+  hyp_ngram_counts *= scale;
+  ref_len *= scale;
+  hyp_len *= scale;
+}
+
+void BLEUScore::PlusEquals(const Score& delta, const float scale) {
+  const BLEUScore& d = static_cast<const BLEUScore&>(delta);
+  correct_ngram_hit_counts = correct_ngram_hit_counts + (d.correct_ngram_hit_counts * scale);
+  hyp_ngram_counts = hyp_ngram_counts + (d.hyp_ngram_counts * scale);
+  ref_len = ref_len + (d.ref_len * scale);
+  hyp_len = hyp_len + (d.hyp_len * scale);
+}
+
+void BLEUScore::PlusPartialEquals(const Score& delta, int oracle_e_cover, int oracle_f_cover, int src_len){
+  const BLEUScore& d = static_cast<const BLEUScore&>(delta);
+  correct_ngram_hit_counts += d.correct_ngram_hit_counts;
+  hyp_ngram_counts += d.hyp_ngram_counts;
+  //scale the reference length according to the size of the input sentence covered by this rule
+
+  ref_len *= (float)oracle_f_cover / src_len;
+  ref_len += d.ref_len;
+
+  hyp_len = oracle_e_cover;
+  hyp_len += d.hyp_len;
+}
+
+
+ScoreP BLEUScore::GetZero() const {
+  return ScoreP(new BLEUScore(N()));
+}
+
+ScoreP BLEUScore::GetOne() const {
+  return ScoreP(new BLEUScore(N(),1));
+}
+
+
+void BLEUScore::Encode(string* out) const {
+  ostringstream os;
+  const int n = correct_ngram_hit_counts.size();
+  os << n << ' ' << ref_len << ' ' << hyp_len;
+  for (int i = 0; i < n; ++i)
+    os << ' ' << correct_ngram_hit_counts[i] << ' ' << hyp_ngram_counts[i];
+  *out = os.str();
+}
+
+BLEUScorerBase::BLEUScorerBase(const vector<vector<WordID> >& references,
+                               int n) : SentenceScorer("BLEU"+boost::lexical_cast<string>(n),references),n_(n) {
+  for (vector<vector<WordID> >::const_iterator ci = references.begin();
+       ci != references.end(); ++ci) {
+    lengths_.push_back(ci->size());
+    CountRef(*ci);
+  }
+}
+
+ScoreP BLEUScorerBase::ScoreCandidate(const vector<WordID>& hyp) const {
+  BLEUScore* bs = new BLEUScore(n_);
+  for (NGramCountMap::iterator i=ngrams_.begin(); i != ngrams_.end(); ++i)
+    i->second.second = 0;
+  ComputeNgramStats(hyp, &bs->correct_ngram_hit_counts, &bs->hyp_ngram_counts, true);
+  bs->ref_len = ComputeRefLength(hyp);
+  bs->hyp_len = hyp.size();
+  return ScoreP(bs);
+}
+
+ScoreP BLEUScorerBase::ScoreCCandidate(const vector<WordID>& hyp) const {
+  BLEUScore* bs = new BLEUScore(n_);
+  for (NGramCountMap::iterator i=ngrams_.begin(); i != ngrams_.end(); ++i)
+    i->second.second = 0;
+  bool clip = false;
+  ComputeNgramStats(hyp, &bs->correct_ngram_hit_counts, &bs->hyp_ngram_counts,clip);
+  bs->ref_len = ComputeRefLength(hyp);
+  bs->hyp_len = hyp.size();
+  return ScoreP(bs);
+}
+
+
+DocScorer::~DocScorer() {
+}
+
+void DocScorer::Init(
+      const ScoreType type,
+      const vector<string>& ref_files,
+      const string& src_file, bool verbose) {
+  scorers_.clear();
+  // TODO stop using valarray, start using ReadFile
+  cerr << "Loading references (" << ref_files.size() << " files)\n";
+  ReadFile srcrf;
+  if (type == AER && src_file.size() > 0) {
+    cerr << "  (source=" << src_file << ")\n";
+    srcrf.Init(src_file);
+  }
+  std::vector<ReadFile> ifs(ref_files.begin(),ref_files.end());
+  for (int i=0; i < ref_files.size(); ++i) ifs[i].Init(ref_files[i]);
+  char buf[64000];
+  bool expect_eof = false;
+  int line=0;
+  while (ifs[0].get()) {
+    vector<vector<WordID> > refs(ref_files.size());
+    for (int i=0; i < ref_files.size(); ++i) {
+      istream &in=ifs[i].get();
+      if (in.eof()) break;
+      in.getline(buf, 64000);
+      refs[i].clear();
+      if (strlen(buf) == 0) {
+        if (in.eof()) {
+          if (!expect_eof) {
+            assert(i == 0);
+            expect_eof = true;
+          }
+          break;
+        }
+      } else {
+        TD::ConvertSentence(buf, &refs[i]);
+        assert(!refs[i].empty());
+      }
+      assert(!expect_eof);
+    }
+    if (!expect_eof) {
+      string src_line;
+      if (srcrf) {
+        getline(srcrf.get(), src_line);
+        map<string,string> dummy;
+        ProcessAndStripSGML(&src_line, &dummy);
+      }
+      scorers_.push_back(ScorerP(SentenceScorer::CreateSentenceScorer(type, refs, src_line)));
+      if (verbose)
+        cerr<<"doc_scorer["<<line<<"] = "<<scorers_.back()->verbose_desc()<<endl;
+      ++line;
+    }
+  }
+  cerr << "Loaded reference translations for " << scorers_.size() << " sentences.\n";
+}
+
diff --git a/mteval/scorer.h b/mteval/scorer.h
new file mode 100644
index 00000000..f18c8c7f
--- /dev/null
+++ b/mteval/scorer.h
@@ -0,0 +1,110 @@
+#ifndef SCORER_H_
+#define SCORER_H_
+#include <vector>
+#include <string>
+#include <boost/shared_ptr.hpp>
+//TODO: use intrusive shared_ptr in Score (because there are many of them on ErrorSurfaces)
+#include "wordid.h"
+#include "intrusive_refcount.hpp"
+
+class Score;
+class SentenceScorer;
+typedef boost::intrusive_ptr<Score> ScoreP;
+typedef boost::shared_ptr<SentenceScorer> ScorerP;
+
+class ViterbiEnvelope;
+class ErrorSurface;
+class Hypergraph;  // needed for alignment
+
+//TODO: BLEU N (N separate arg, not part of enum)?
+enum ScoreType { IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, BLEU_minus_TER_over_2, SER, AER, IBM_BLEU_3 };
+ScoreType ScoreTypeFromString(const std::string& st);
+std::string StringFromScoreType(ScoreType st);
+
+class Score : public boost::intrusive_refcount<Score> {
+ public:
+  virtual ~Score();
+  virtual float ComputeScore() const = 0;
+  virtual float ComputePartialScore() const =0;
+  virtual void ScoreDetails(std::string* details) const = 0;
+  std::string ScoreDetails() {
+    std::string d;
+    ScoreDetails(&d);
+    return d;
+  }
+  virtual void TimesEquals(float scale); // only for bleu; for mira oracle
+  /// same as rhs.TimesEquals(scale);PlusEquals(rhs) except doesn't modify rhs.
+  virtual void PlusEquals(const Score& rhs, const float scale) = 0;
+  virtual void PlusEquals(const Score& rhs) = 0;
+  virtual void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len) = 0;
+  virtual void Subtract(const Score& rhs, Score *res) const = 0;
+  virtual ScoreP GetZero() const = 0;
+  virtual ScoreP GetOne() const = 0;
+  virtual bool IsAdditiveIdentity() const = 0; // returns true if adding this delta
+                                      // to another score results in no score change
+				      // under any circumstances
+  virtual void Encode(std::string* out) const = 0;
+  static ScoreP GetZero(ScoreType type);
+  static ScoreP GetOne(ScoreType type);
+  virtual ScoreP Clone() const = 0;
+protected:
+  Score() {  } // we define these explicitly because refcount is noncopyable
+  Score(Score const&) {  }
+};
+
+//TODO: make sure default copy ctors for score types do what we want.
+template <class Derived>
+struct ScoreBase : public Score {
+  ScoreP Clone() const  {
+    return ScoreP(new Derived(dynamic_cast<Derived const&>(*this)));
+  }
+};
+
+class SentenceScorer {
+ public:
+  typedef std::vector<WordID> Sentence;
+  typedef std::vector<Sentence> Sentences;
+  std::string desc;
+  Sentences refs;
+  SentenceScorer(std::string desc="SentenceScorer_unknown", Sentences const& refs=Sentences()) : desc(desc),refs(refs) {  }
+  std::string verbose_desc() const;
+  virtual float ComputeRefLength(const Sentence& hyp) const; // default: avg of refs.length
+  virtual ~SentenceScorer();
+  virtual ScoreP GetOne() const;
+  virtual ScoreP GetZero() const;
+  virtual ScoreP ScoreCandidate(const Sentence& hyp) const = 0;
+  virtual ScoreP ScoreCCandidate(const Sentence& hyp) const =0;
+  virtual const std::string* GetSource() const;
+  static ScoreP CreateScoreFromString(const ScoreType type, const std::string& in);
+  static ScorerP CreateSentenceScorer(const ScoreType type,
+    const std::vector<Sentence >& refs,
+    const std::string& src = "");
+};
+
+//TODO: should be able to GetOne GetZero without supplying sentence (just type)
+class DocScorer {
+ public:
+  ~DocScorer();
+  DocScorer() {  }
+  void Init(const ScoreType type,
+            const std::vector<std::string>& ref_files,
+            const std::string& src_file = "",
+            bool verbose=false
+    );
+  DocScorer(const ScoreType type,
+            const std::vector<std::string>& ref_files,
+            const std::string& src_file = "",
+            bool verbose=false
+    )
+  {
+    Init(type,ref_files,src_file,verbose);
+  }
+
+  int size() const { return scorers_.size(); }
+  ScorerP operator[](size_t i) const { return scorers_[i]; }
+ private:
+  std::vector<ScorerP> scorers_;
+};
+
+
+#endif
diff --git a/mteval/scorer_test.cc b/mteval/scorer_test.cc
new file mode 100644
index 00000000..a07a8c4b
--- /dev/null
+++ b/mteval/scorer_test.cc
@@ -0,0 +1,182 @@
+#include <iostream>
+#include <fstream>
+#include <valarray>
+#include <gtest/gtest.h>
+
+#include "tdict.h"
+#include "scorer.h"
+#include "aer_scorer.h"
+
+using namespace std;
+
+class ScorerTest : public testing::Test {
+ protected:
+   virtual void SetUp() {
+     refs0.resize(4);
+     refs1.resize(4);
+     TD::ConvertSentence("export of high-tech products in guangdong in first two months this year reached 3.76 billion us dollars", &refs0[0]);
+     TD::ConvertSentence("guangdong's export of new high technology products amounts to us $ 3.76 billion in first two months of this year", &refs0[1]);
+     TD::ConvertSentence("guangdong exports us $ 3.76 billion worth of high technology products in the first two months of this year", &refs0[2]);
+     TD::ConvertSentence("in the first 2 months this year , the export volume of new hi-tech products in guangdong province reached 3.76 billion us dollars .", &refs0[3]);
+     TD::ConvertSentence("xinhua news agency , guangzhou , march 16 ( reporter chen ji ) the latest statistics show that from january through february this year , the export of high-tech products in guangdong province reached 3.76 billion us dollars , up 34.8 \% over the same period last year and accounted for 25.5 \% of the total export in the province .", &refs1[0]);
+     TD::ConvertSentence("xinhua news agency , guangzhou , march 16 ( reporter : chen ji ) -- latest statistic indicates that guangdong's export of new high technology products amounts to us $ 3.76 billion , up 34.8 \% over corresponding period and accounts for 25.5 \% of the total exports of the province .", &refs1[1]);
+     TD::ConvertSentence("xinhua news agency report of march 16 from guangzhou ( by staff reporter chen ji ) - latest statistics indicate guangdong province exported us $ 3.76 billion worth of high technology products , up 34.8 percent from the same period last year , which account for 25.5 percent of the total exports of the province .", &refs1[2]);
+     TD::ConvertSentence("guangdong , march 16 , ( xinhua ) -- ( chen ji reports ) as the newest statistics shows , in january and feberuary this year , the export volume of new hi-tech products in guangdong province reached 3.76 billion us dollars , up 34.8 \% than last year , making up 25.5 \% of the province's total .", &refs1[3]);
+     TD::ConvertSentence("one guangdong province will next export us $ 3.76 high-tech product two months first this year 3.76 billion us dollars", &hyp1);
+     TD::ConvertSentence("xinhua news agency , guangzhou , 16th of march ( reporter chen ) -- latest statistics suggest that guangdong exports new advanced technology product totals $ 3.76 million , 34.8 percent last corresponding period and accounts for 25.5 percent of the total export province .", &hyp2);
+   }
+
+   virtual void TearDown() { }
+
+   vector<vector<WordID> > refs0;
+   vector<vector<WordID> > refs1;
+   vector<WordID> hyp1;
+   vector<WordID> hyp2;
+};
+
+TEST_F(ScorerTest, TestCreateFromFiles) {
+  vector<string> files;
+  files.push_back("test_data/re.txt.0");
+  files.push_back("test_data/re.txt.1");
+  files.push_back("test_data/re.txt.2");
+  files.push_back("test_data/re.txt.3");
+  DocScorer ds(IBM_BLEU, files);
+}
+
+TEST_F(ScorerTest, TestBLEUScorer) {
+  ScorerP s1 = SentenceScorer::CreateSentenceScorer(IBM_BLEU, refs0);
+  ScorerP s2 = SentenceScorer::CreateSentenceScorer(IBM_BLEU, refs1);
+  ScoreP b1 = s1->ScoreCandidate(hyp1);
+  EXPECT_FLOAT_EQ(0.23185077, b1->ComputeScore());
+  ScoreP b2 = s2->ScoreCandidate(hyp2);
+  EXPECT_FLOAT_EQ(0.38101241, b2->ComputeScore());
+  b1->PlusEquals(*b2);
+  EXPECT_FLOAT_EQ(0.348854, b1->ComputeScore());
+  EXPECT_FALSE(b1->IsAdditiveIdentity());
+  string details;
+  b1->ScoreDetails(&details);
+  EXPECT_EQ("BLEU = 34.89, 81.5|50.8|29.5|18.6 (brev=0.898)", details);
+  cerr << details << endl;
+  string enc;
+  b1->Encode(&enc);
+  ScoreP b3 = SentenceScorer::CreateScoreFromString(IBM_BLEU, enc);
+  details.clear();
+  cerr << "Encoded BLEU score size: " << enc.size() << endl;
+  b3->ScoreDetails(&details);
+  cerr << details << endl;
+  EXPECT_FALSE(b3->IsAdditiveIdentity());
+  EXPECT_EQ("BLEU = 34.89, 81.5|50.8|29.5|18.6 (brev=0.898)", details);
+  ScoreP bz = b3->GetZero();
+  EXPECT_TRUE(bz->IsAdditiveIdentity());
+}
+
+TEST_F(ScorerTest, TestTERScorer) {
+  ScorerP s1 = SentenceScorer::CreateSentenceScorer(TER, refs0);
+  ScorerP s2 = SentenceScorer::CreateSentenceScorer(TER, refs1);
+  string details;
+  ScoreP t1 = s1->ScoreCandidate(hyp1);
+  t1->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  cerr << t1->ComputeScore() << endl;
+  ScoreP t2 = s2->ScoreCandidate(hyp2);
+  t2->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  cerr << t2->ComputeScore() << endl;
+  t1->PlusEquals(*t2);
+  cerr << t1->ComputeScore() << endl;
+  t1->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  EXPECT_EQ("TER = 44.16,   4|  8| 16|  6 (len=77)", details);
+  string enc;
+  t1->Encode(&enc);
+  ScoreP t3 = SentenceScorer::CreateScoreFromString(TER, enc);
+  details.clear();
+  t3->ScoreDetails(&details);
+  EXPECT_EQ("TER = 44.16,   4|  8| 16|  6 (len=77)", details);
+  EXPECT_FALSE(t3->IsAdditiveIdentity());
+  ScoreP tz = t3->GetZero();
+  EXPECT_TRUE(tz->IsAdditiveIdentity());
+}
+
+TEST_F(ScorerTest, TestTERScorerSimple) {
+  vector<vector<WordID> > ref(1);
+  TD::ConvertSentence("1 2 3 A B", &ref[0]);
+  vector<WordID> hyp;
+  TD::ConvertSentence("A B 1 2 3", &hyp);
+  ScorerP s1 = SentenceScorer::CreateSentenceScorer(TER, ref);
+  string details;
+  ScoreP t1 = s1->ScoreCandidate(hyp);
+  t1->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+}
+
+TEST_F(ScorerTest, TestSERScorerSimple) {
+  vector<vector<WordID> > ref(1);
+  TD::ConvertSentence("A B C D", &ref[0]);
+  vector<WordID> hyp1;
+  TD::ConvertSentence("A B C", &hyp1);
+  vector<WordID> hyp2;
+  TD::ConvertSentence("A B C D", &hyp2);
+  ScorerP s1 = SentenceScorer::CreateSentenceScorer(SER, ref);
+  string details;
+  ScoreP t1 = s1->ScoreCandidate(hyp1);
+  t1->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  ScoreP t2 = s1->ScoreCandidate(hyp2);
+  t2->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  t2->PlusEquals(*t1);
+  t2->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+}
+
+TEST_F(ScorerTest, TestCombiScorer) {
+  ScorerP s1 = SentenceScorer::CreateSentenceScorer(BLEU_minus_TER_over_2, refs0);
+  string details;
+  ScoreP t1 = s1->ScoreCandidate(hyp1);
+  t1->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  cerr << t1->ComputeScore() << endl;
+  string enc;
+  t1->Encode(&enc);
+  ScoreP t2 = SentenceScorer::CreateScoreFromString(BLEU_minus_TER_over_2, enc);
+  details.clear();
+  t2->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  ScoreP cz = t2->GetZero();
+  EXPECT_FALSE(t2->IsAdditiveIdentity());
+  EXPECT_TRUE(cz->IsAdditiveIdentity());
+  cz->PlusEquals(*t2);
+  EXPECT_FALSE(cz->IsAdditiveIdentity());
+  string d2;
+  cz->ScoreDetails(&d2);
+  EXPECT_EQ(d2, details);
+}
+
+TEST_F(ScorerTest, AERTest) {
+  vector<vector<WordID> > refs0(1);
+  TD::ConvertSentence("0-0 2-1 1-2 3-3", &refs0[0]);
+
+  vector<WordID> hyp;
+  TD::ConvertSentence("0-0 1-1", &hyp);
+  AERScorer* as = new AERScorer(refs0);
+  ScoreP x = as->ScoreCandidate(hyp);
+  string details;
+  x->ScoreDetails(&details);
+  cerr << details << endl;
+  string enc;
+  x->Encode(&enc);
+  delete as;
+  cerr << "ENC size: " << enc.size() << endl;
+  ScoreP y = SentenceScorer::CreateScoreFromString(AER, enc);
+  string d2;
+  y->ScoreDetails(&d2);
+  cerr << d2 << endl;
+  EXPECT_EQ(d2, details);
+}
+
+int main(int argc, char **argv) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
+
diff --git a/mteval/ter.cc b/mteval/ter.cc
new file mode 100644
index 00000000..cacc5b00
--- /dev/null
+++ b/mteval/ter.cc
@@ -0,0 +1,535 @@
+#include "ter.h"
+
+#include <cstdio>
+#include <cassert>
+#include <iostream>
+#include <limits>
+#include <sstream>
+#include <tr1/unordered_map>
+#include <set>
+#include <valarray>
+#include <boost/functional/hash.hpp>
+#include <stdexcept>
+#include "tdict.h"
+
+const bool ter_use_average_ref_len = true;
+const int ter_short_circuit_long_sentences = -1;
+
+using namespace std;
+using namespace std::tr1;
+
+struct COSTS {
+  static const float substitution;
+  static const float deletion;
+  static const float insertion;
+  static const float shift;
+};
+const float COSTS::substitution = 1.0f;
+const float COSTS::deletion = 1.0f;
+const float COSTS::insertion = 1.0f;
+const float COSTS::shift = 1.0f;
+
+static const int MAX_SHIFT_SIZE = 10;
+static const int MAX_SHIFT_DIST = 50;
+
+struct Shift {
+  unsigned int d_;
+  Shift() : d_() {}
+  Shift(int b, int e, int m) : d_() {
+    begin(b);
+    end(e);
+    moveto(m);
+  }
+  inline int begin() const {
+    return d_ & 0x3ff;
+  }
+  inline int end() const {
+    return (d_ >> 10) & 0x3ff;
+  }
+  inline int moveto() const {
+    int m = (d_ >> 20) & 0x7ff;
+    if (m > 1024) { m -= 1024; m *= -1; }
+    return m;
+  }
+  inline void begin(int b) {
+    d_ &= 0xfffffc00u;
+    d_ |= (b & 0x3ff);
+  }
+  inline void end(int e) {
+    d_ &= 0xfff003ffu;
+    d_ |= (e & 0x3ff) << 10;
+  }
+  inline void moveto(int m) {
+    bool neg = (m < 0);
+    if (neg) { m *= -1; m += 1024; }
+    d_ &= 0xfffff;
+    d_ |= (m & 0x7ff) << 20;
+  }
+};
+
+class TERScorerImpl {
+
+ public:
+  enum TransType { MATCH, SUBSTITUTION, INSERTION, DELETION };
+
+  explicit TERScorerImpl(const vector<WordID>& ref) : ref_(ref) {
+    for (int i = 0; i < ref.size(); ++i)
+      rwexists_.insert(ref[i]);
+  }
+
+  float Calculate(const vector<WordID>& hyp, int* subs, int* ins, int* dels, int* shifts) const {
+    return CalculateAllShifts(hyp, subs, ins, dels, shifts);
+  }
+
+  inline int GetRefLength() const {
+    return ref_.size();
+  }
+
+ private:
+  vector<WordID> ref_;
+  set<WordID> rwexists_;
+
+  typedef unordered_map<vector<WordID>, set<int>, boost::hash<vector<WordID> > > NgramToIntsMap;
+  mutable NgramToIntsMap nmap_;
+
+  static float MinimumEditDistance(
+      const vector<WordID>& hyp,
+      const vector<WordID>& ref,
+      vector<TransType>* path) {
+    vector<vector<TransType> > bmat(hyp.size() + 1, vector<TransType>(ref.size() + 1, MATCH));
+    vector<vector<float> > cmat(hyp.size() + 1, vector<float>(ref.size() + 1, 0));
+    for (int i = 0; i <= hyp.size(); ++i)
+      cmat[i][0] = i;
+    for (int j = 0; j <= ref.size(); ++j)
+      cmat[0][j] = j;
+    for (int i = 1; i <= hyp.size(); ++i) {
+      const WordID& hw = hyp[i-1];
+      for (int j = 1; j <= ref.size(); ++j) {
+        const WordID& rw = ref[j-1];
+	float& cur_c = cmat[i][j];
+	TransType& cur_b = bmat[i][j];
+
+        if (rw == hw) {
+          cur_c = cmat[i-1][j-1];
+          cur_b = MATCH;
+        } else {
+          cur_c = cmat[i-1][j-1] + COSTS::substitution;
+          cur_b = SUBSTITUTION;
+        }
+	float cwoi = cmat[i-1][j];
+        if (cur_c > cwoi + COSTS::insertion) {
+          cur_c = cwoi + COSTS::insertion;
+          cur_b = INSERTION;
+        }
+        float cwod = cmat[i][j-1];
+        if (cur_c > cwod + COSTS::deletion) {
+          cur_c = cwod + COSTS::deletion;
+          cur_b = DELETION;
+        }
+      }
+    }
+
+    // trace back along the best path and record the transition types
+    path->clear();
+    int i = hyp.size();
+    int j = ref.size();
+    while (i > 0 || j > 0) {
+      if (j == 0) {
+        --i;
+        path->push_back(INSERTION);
+      } else if (i == 0) {
+        --j;
+        path->push_back(DELETION);
+      } else {
+        TransType t = bmat[i][j];
+        path->push_back(t);
+        switch (t) {
+          case SUBSTITUTION:
+          case MATCH:
+            --i; --j; break;
+          case INSERTION:
+            --i; break;
+          case DELETION:
+            --j; break;
+        }
+      }
+    }
+    reverse(path->begin(), path->end());
+    return cmat[hyp.size()][ref.size()];
+  }
+
+  void BuildWordMatches(const vector<WordID>& hyp, NgramToIntsMap* nmap) const {
+    nmap->clear();
+    set<WordID> exists_both;
+    for (int i = 0; i < hyp.size(); ++i)
+      if (rwexists_.find(hyp[i]) != rwexists_.end())
+        exists_both.insert(hyp[i]);
+    for (int start=0; start<ref_.size(); ++start) {
+      if (exists_both.find(ref_[start]) == exists_both.end()) continue;
+      vector<WordID> cp;
+      int mlen = min(MAX_SHIFT_SIZE, static_cast<int>(ref_.size() - start));
+      for (int len=0; len<mlen; ++len) {
+        if (len && exists_both.find(ref_[start + len]) == exists_both.end()) break;
+        cp.push_back(ref_[start + len]);
+	(*nmap)[cp].insert(start);
+      }
+    }
+  }
+
+  static void PerformShift(const vector<WordID>& in,
+    int start, int end, int moveto, vector<WordID>* out) {
+    // cerr << "ps: " << start << " " << end << " " << moveto << endl;
+    out->clear();
+    if (moveto == -1) {
+      for (int i = start; i <= end; ++i)
+       out->push_back(in[i]);
+      for (int i = 0; i < start; ++i)
+       out->push_back(in[i]);
+      for (int i = end+1; i < in.size(); ++i)
+       out->push_back(in[i]);
+    } else if (moveto < start) {
+      for (int i = 0; i <= moveto; ++i)
+       out->push_back(in[i]);
+      for (int i = start; i <= end; ++i)
+       out->push_back(in[i]);
+      for (int i = moveto+1; i < start; ++i)
+       out->push_back(in[i]);
+      for (int i = end+1; i < in.size(); ++i)
+       out->push_back(in[i]);
+    } else if (moveto > end) {
+      for (int i = 0; i < start; ++i)
+       out->push_back(in[i]);
+      for (int i = end+1; i <= moveto; ++i)
+       out->push_back(in[i]);
+      for (int i = start; i <= end; ++i)
+       out->push_back(in[i]);
+      for (int i = moveto+1; i < in.size(); ++i)
+       out->push_back(in[i]);
+    } else {
+      for (int i = 0; i < start; ++i)
+       out->push_back(in[i]);
+      for (int i = end+1; (i < in.size()) && (i <= end + (moveto - start)); ++i)
+       out->push_back(in[i]);
+      for (int i = start; i <= end; ++i)
+       out->push_back(in[i]);
+      for (int i = (end + (moveto - start))+1; i < in.size(); ++i)
+       out->push_back(in[i]);
+    }
+    if (out->size() != in.size()) {
+      cerr << "ps: " << start << " " << end << " " << moveto << endl;
+      cerr << "in=" << TD::GetString(in) << endl;
+      cerr << "out=" << TD::GetString(*out) << endl;
+    }
+    assert(out->size() == in.size());
+    // cerr << "ps: " << TD::GetString(*out) << endl;
+  }
+
+  void GetAllPossibleShifts(const vector<WordID>& hyp,
+      const vector<int>& ralign,
+      const vector<bool>& herr,
+      const vector<bool>& rerr,
+      const int min_size,
+      vector<vector<Shift> >* shifts) const {
+    for (int start = 0; start < hyp.size(); ++start) {
+      vector<WordID> cp(1, hyp[start]);
+      NgramToIntsMap::iterator niter = nmap_.find(cp);
+      if (niter == nmap_.end()) continue;
+      bool ok = false;
+      int moveto;
+      for (set<int>::iterator i = niter->second.begin(); i != niter->second.end(); ++i) {
+        moveto = *i;
+        int rm = ralign[moveto];
+        ok = (start != rm &&
+              (rm - start) < MAX_SHIFT_DIST &&
+              (start - rm - 1) < MAX_SHIFT_DIST);
+        if (ok) break;
+      }
+      if (!ok) continue;
+      cp.clear();
+      for (int end = start + min_size - 1;
+           ok && end < hyp.size() && end < (start + MAX_SHIFT_SIZE); ++end) {
+        cp.push_back(hyp[end]);
+	vector<Shift>& sshifts = (*shifts)[end - start];
+        ok = false;
+        NgramToIntsMap::iterator niter = nmap_.find(cp);
+        if (niter == nmap_.end()) break;
+        bool any_herr = false;
+        for (int i = start; i <= end && !any_herr; ++i)
+          any_herr = herr[i];
+        if (!any_herr) {
+          ok = true;
+          continue;
+        }
+        for (set<int>::iterator mi = niter->second.begin();
+             mi != niter->second.end(); ++mi) {
+          int moveto = *mi;
+	  int rm = ralign[moveto];
+	  if (! ((rm != start) &&
+	        ((rm < start) || (rm > end)) &&
+		(rm - start <= MAX_SHIFT_DIST) &&
+		((start - rm - 1) <= MAX_SHIFT_DIST))) continue;
+          ok = true;
+	  bool any_rerr = false;
+	  for (int i = 0; (i <= end - start) && (!any_rerr); ++i)
+            any_rerr = rerr[moveto+i];
+	  if (!any_rerr) continue;
+	  for (int roff = 0; roff <= (end - start); ++roff) {
+	    int rmr = ralign[moveto+roff];
+	    if ((start != rmr) && ((roff == 0) || (rmr != ralign[moveto])))
+	      sshifts.push_back(Shift(start, end, moveto + roff));
+	  }
+        }
+      }
+    }
+  }
+
+  bool CalculateBestShift(const vector<WordID>& cur,
+                          const vector<WordID>& hyp,
+                          float curerr,
+                          const vector<TransType>& path,
+                          vector<WordID>* new_hyp,
+                          float* newerr,
+                          vector<TransType>* new_path) const {
+    vector<bool> herr, rerr;
+    vector<int> ralign;
+    int hpos = -1;
+    for (int i = 0; i < path.size(); ++i) {
+      switch (path[i]) {
+        case MATCH:
+	  ++hpos;
+	  herr.push_back(false);
+	  rerr.push_back(false);
+	  ralign.push_back(hpos);
+          break;
+        case SUBSTITUTION:
+	  ++hpos;
+	  herr.push_back(true);
+	  rerr.push_back(true);
+	  ralign.push_back(hpos);
+          break;
+        case INSERTION:
+	  ++hpos;
+	  herr.push_back(true);
+          break;
+	case DELETION:
+	  rerr.push_back(true);
+	  ralign.push_back(hpos);
+          break;
+      }
+    }
+#if 0
+    cerr << "RALIGN: ";
+    for (int i = 0; i < rerr.size(); ++i)
+      cerr << ralign[i] << " ";
+    cerr << endl;
+    cerr << "RERR: ";
+    for (int i = 0; i < rerr.size(); ++i)
+      cerr << (bool)rerr[i] << " ";
+    cerr << endl;
+    cerr << "HERR: ";
+    for (int i = 0; i < herr.size(); ++i)
+      cerr << (bool)herr[i] << " ";
+    cerr << endl;
+#endif
+
+    vector<vector<Shift> > shifts(MAX_SHIFT_SIZE + 1);
+    GetAllPossibleShifts(cur, ralign, herr, rerr, 1, &shifts);
+    float cur_best_shift_cost = 0;
+    *newerr = curerr;
+    vector<TransType> cur_best_path;
+    vector<WordID> cur_best_hyp;
+
+    bool res = false;
+    for (int i = shifts.size() - 1; i >=0; --i) {
+      float curfix = curerr - (cur_best_shift_cost + *newerr);
+      float maxfix = 2.0f * (1 + i) - COSTS::shift;
+      if ((curfix > maxfix) || ((cur_best_shift_cost == 0) && (curfix == maxfix))) break;
+      for (int j = 0; j < shifts[i].size(); ++j) {
+        const Shift& s = shifts[i][j];
+	curfix = curerr - (cur_best_shift_cost + *newerr);
+	maxfix = 2.0f * (1 + i) - COSTS::shift;  // TODO remove?
+        if ((curfix > maxfix) || ((cur_best_shift_cost == 0) && (curfix == maxfix))) continue;
+	vector<WordID> shifted(cur.size());
+	PerformShift(cur, s.begin(), s.end(), ralign[s.moveto()], &shifted);
+	vector<TransType> try_path;
+	float try_cost = MinimumEditDistance(shifted, ref_, &try_path);
+	float gain = (*newerr + cur_best_shift_cost) - (try_cost + COSTS::shift);
+	if (gain > 0.0f || ((cur_best_shift_cost == 0.0f) && (gain == 0.0f))) {
+	  *newerr = try_cost;
+	  cur_best_shift_cost = COSTS::shift;
+	  new_path->swap(try_path);
+	  new_hyp->swap(shifted);
+	  res = true;
+	  // cerr << "Found better shift " << s.begin() << "..." << s.end() << " moveto " << s.moveto() << endl;
+	}
+      }
+    }
+
+    return res;
+  }
+
+  static void GetPathStats(const vector<TransType>& path, int* subs, int* ins, int* dels) {
+    *subs = *ins = *dels = 0;
+    for (int i = 0; i < path.size(); ++i) {
+      switch (path[i]) {
+        case SUBSTITUTION:
+	  ++(*subs);
+        case MATCH:
+          break;
+        case INSERTION:
+          ++(*ins); break;
+	case DELETION:
+          ++(*dels); break;
+      }
+    }
+  }
+
+  float CalculateAllShifts(const vector<WordID>& hyp,
+      int* subs, int* ins, int* dels, int* shifts) const {
+    BuildWordMatches(hyp, &nmap_);
+    vector<TransType> path;
+    float med_cost = MinimumEditDistance(hyp, ref_, &path);
+    float edits = 0;
+    vector<WordID> cur = hyp;
+    *shifts = 0;
+    if (ter_short_circuit_long_sentences < 0 ||
+        ref_.size() < ter_short_circuit_long_sentences) {
+      while (true) {
+        vector<WordID> new_hyp;
+        vector<TransType> new_path;
+        float new_med_cost;
+        if (!CalculateBestShift(cur, hyp, med_cost, path, &new_hyp, &new_med_cost, &new_path))
+          break;
+        edits += COSTS::shift;
+        ++(*shifts);
+        med_cost = new_med_cost;
+        path.swap(new_path);
+        cur.swap(new_hyp);
+      }
+    }
+    GetPathStats(path, subs, ins, dels);
+    return med_cost + edits;
+  }
+};
+
+class TERScore : public ScoreBase<TERScore> {
+  friend class TERScorer;
+
+ public:
+  static const unsigned kINSERTIONS = 0;
+  static const unsigned kDELETIONS = 1;
+  static const unsigned kSUBSTITUTIONS = 2;
+  static const unsigned kSHIFTS = 3;
+  static const unsigned kREF_WORDCOUNT = 4;
+  static const unsigned kDUMMY_LAST_ENTRY = 5;
+
+ TERScore() : stats(0,kDUMMY_LAST_ENTRY) {}
+  float ComputePartialScore() const { return 0.0;}
+  float ComputeScore() const {
+    float edits = static_cast<float>(stats[kINSERTIONS] + stats[kDELETIONS] + stats[kSUBSTITUTIONS] + stats[kSHIFTS]);
+    return edits / static_cast<float>(stats[kREF_WORDCOUNT]);
+  }
+  void ScoreDetails(string* details) const;
+  void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){}
+  void PlusEquals(const Score& delta, const float scale) {
+    if (scale==1)
+      stats += static_cast<const TERScore&>(delta).stats;
+    if (scale==-1)
+      stats -= static_cast<const TERScore&>(delta).stats;
+    throw std::runtime_error("TERScore::PlusEquals with scale != +-1");
+ }
+  void PlusEquals(const Score& delta) {
+    stats += static_cast<const TERScore&>(delta).stats;
+  }
+
+  ScoreP GetZero() const {
+    return ScoreP(new TERScore);
+  }
+  ScoreP GetOne() const {
+    return ScoreP(new TERScore);
+  }
+  void Subtract(const Score& rhs, Score* res) const {
+    static_cast<TERScore*>(res)->stats = stats - static_cast<const TERScore&>(rhs).stats;
+  }
+  void Encode(std::string* out) const {
+    ostringstream os;
+    os << stats[kINSERTIONS] << ' '
+       << stats[kDELETIONS] << ' '
+       << stats[kSUBSTITUTIONS] << ' '
+       << stats[kSHIFTS] << ' '
+       << stats[kREF_WORDCOUNT];
+    *out = os.str();
+  }
+  bool IsAdditiveIdentity() const {
+    for (int i = 0; i < kDUMMY_LAST_ENTRY; ++i)
+      if (stats[i] != 0) return false;
+    return true;
+  }
+ private:
+  valarray<int> stats;
+};
+
+ScoreP TERScorer::ScoreFromString(const std::string& data) {
+  istringstream is(data);
+  TERScore* r = new TERScore;
+  is >> r->stats[TERScore::kINSERTIONS]
+     >> r->stats[TERScore::kDELETIONS]
+     >> r->stats[TERScore::kSUBSTITUTIONS]
+     >> r->stats[TERScore::kSHIFTS]
+     >> r->stats[TERScore::kREF_WORDCOUNT];
+  return ScoreP(r);
+}
+
+void TERScore::ScoreDetails(std::string* details) const {
+  char buf[200];
+  sprintf(buf, "TER = %.2f, %3d|%3d|%3d|%3d (len=%d)",
+     ComputeScore() * 100.0f,
+     stats[kINSERTIONS],
+     stats[kDELETIONS],
+     stats[kSUBSTITUTIONS],
+     stats[kSHIFTS],
+     stats[kREF_WORDCOUNT]);
+  *details = buf;
+}
+
+TERScorer::~TERScorer() {
+  for (vector<TERScorerImpl*>::iterator i = impl_.begin(); i != impl_.end(); ++i)
+    delete *i;
+}
+
+TERScorer::TERScorer(const vector<vector<WordID> >& refs) : impl_(refs.size()) {
+  for (int i = 0; i < refs.size(); ++i)
+    impl_[i] = new TERScorerImpl(refs[i]);
+}
+
+ScoreP TERScorer::ScoreCCandidate(const vector<WordID>& hyp) const {
+  return ScoreP();
+}
+
+ScoreP TERScorer::ScoreCandidate(const std::vector<WordID>& hyp) const {
+  float best_score = numeric_limits<float>::max();
+  TERScore* res = new TERScore;
+  int avg_len = 0;
+  for (int i = 0; i < impl_.size(); ++i)
+    avg_len += impl_[i]->GetRefLength();
+  avg_len /= impl_.size();
+  for (int i = 0; i < impl_.size(); ++i) {
+    int subs, ins, dels, shifts;
+    float score = impl_[i]->Calculate(hyp, &subs, &ins, &dels, &shifts);
+    // cerr << "Component TER cost: " << score << endl;
+    if (score < best_score) {
+      res->stats[TERScore::kINSERTIONS] = ins;
+      res->stats[TERScore::kDELETIONS] = dels;
+      res->stats[TERScore::kSUBSTITUTIONS] = subs;
+      res->stats[TERScore::kSHIFTS] = shifts;
+      if (ter_use_average_ref_len) {
+        res->stats[TERScore::kREF_WORDCOUNT] = avg_len;
+      } else {
+        res->stats[TERScore::kREF_WORDCOUNT] = impl_[i]->GetRefLength();
+      }
+
+      best_score = score;
+    }
+  }
+  return ScoreP(res);
+}
diff --git a/mteval/ter.h b/mteval/ter.h
new file mode 100644
index 00000000..43314791
--- /dev/null
+++ b/mteval/ter.h
@@ -0,0 +1,19 @@
+#ifndef _TER_H_
+#define _TER_H_
+
+#include "scorer.h"
+
+class TERScorerImpl;
+
+class TERScorer : public SentenceScorer {
+ public:
+  TERScorer(const std::vector<std::vector<WordID> >& references);
+  ~TERScorer();
+  ScoreP ScoreCandidate(const std::vector<WordID>& hyp) const;
+  ScoreP ScoreCCandidate(const std::vector<WordID>& hyp) const;
+  static ScoreP ScoreFromString(const std::string& data);
+ private:
+  std::vector<TERScorerImpl*> impl_;
+};
+
+#endif
diff --git a/mteval/test_data/re.txt.0 b/mteval/test_data/re.txt.0
new file mode 100644
index 00000000..86eff087
--- /dev/null
+++ b/mteval/test_data/re.txt.0
@@ -0,0 +1,5 @@
+erdogan states turkey to reject any pressures to urge it to recognize cyprus
+ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara will reject any pressure by the european union to urge it to recognize cyprus . this comes two weeks before the summit of european union state and government heads who will decide whether or nor membership negotiations with ankara should be opened .
+erdogan told " ntv " television station that " the european union cannot address us by imposing new conditions on us with regard to cyprus .
+we will discuss this dossier in the course of membership negotiations . "
+he added " let me be clear , i cannot sidestep turkey , this is something we cannot accept . "
diff --git a/mteval/test_data/re.txt.1 b/mteval/test_data/re.txt.1
new file mode 100644
index 00000000..2140f198
--- /dev/null
+++ b/mteval/test_data/re.txt.1
@@ -0,0 +1,5 @@
+erdogan confirms turkey will resist any pressure to recognize cyprus
+ankara 12 - 1 ( afp ) - the turkish head of government , recep tayyip erdogan , announced today ( wednesday ) that ankara would resist any pressure the european union might exercise in order to force it into recognizing cyprus . this comes two weeks before a summit of european union heads of state and government , who will decide whether or not to open membership negotiations with ankara .
+erdogan said to the ntv television channel : " the european union cannot engage with us through imposing new conditions on us with regard to cyprus .
+we shall discuss this issue in the course of the membership negotiations . "
+he added : " let me be clear - i cannot confine turkey . this is something we do not accept . "
diff --git a/mteval/test_data/re.txt.2 b/mteval/test_data/re.txt.2
new file mode 100644
index 00000000..94e46286
--- /dev/null
+++ b/mteval/test_data/re.txt.2
@@ -0,0 +1,5 @@
+erdogan confirms that turkey will reject any pressures to encourage it to recognize cyprus
+ankara , 12 / 1 ( afp ) - the turkish prime minister recep tayyip erdogan declared today , wednesday , that ankara will reject any pressures that the european union may apply on it to encourage to recognize cyprus . this comes two weeks before a summit of the heads of countries and governments of the european union , who will decide on whether or not to start negotiations on joining with ankara .
+erdogan told the ntv television station that " it is not possible for the european union to talk to us by imposing new conditions on us regarding cyprus .
+we shall discuss this dossier during the negotiations on joining . "
+and he added , " let me be clear . turkey's arm should not be twisted ; this is something we cannot accept . "
diff --git a/mteval/test_data/re.txt.3 b/mteval/test_data/re.txt.3
new file mode 100644
index 00000000..f87c3308
--- /dev/null
+++ b/mteval/test_data/re.txt.3
@@ -0,0 +1,5 @@
+erdogan stresses that turkey will reject all pressures to force it to recognize cyprus
+ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara would refuse all pressures applied on it by the european union to force it to recognize cyprus . that came two weeks before the summit of the presidents and prime ministers of the european union , who would decide on whether to open negotiations on joining with ankara or not .
+erdogan said to " ntv " tv station that the " european union can not communicate with us by imposing on us new conditions related to cyprus .
+we will discuss this file during the negotiations on joining . "
+he added , " let me be clear . turkey's arm should not be twisted . this is unacceptable to us . "
diff --git a/training/Makefile.am b/training/Makefile.am
index 490de774..48b19932 100644
--- a/training/Makefile.am
+++ b/training/Makefile.am
@@ -14,37 +14,36 @@ noinst_PROGRAMS = \
   optimize_test
 
 atools_SOURCES = atools.cc
-atools_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+atools_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 model1_SOURCES = model1.cc
-model1_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+model1_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 grammar_convert_SOURCES = grammar_convert.cc
-grammar_convert_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+grammar_convert_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 optimize_test_SOURCES = optimize_test.cc optimize.cc
-optimize_test_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+optimize_test_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 collapse_weights_SOURCES = collapse_weights.cc
-collapse_weights_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+collapse_weights_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 lbfgs_test_SOURCES = lbfgs_test.cc
-lbfgs_test_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+lbfgs_test_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 mr_optimize_reduce_SOURCES = mr_optimize_reduce.cc optimize.cc
-mr_optimize_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+mr_optimize_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 mr_em_map_adapter_SOURCES = mr_em_map_adapter.cc
-mr_em_map_adapter_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+mr_em_map_adapter_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 mr_reduce_to_weights_SOURCES = mr_reduce_to_weights.cc
-mr_reduce_to_weights_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+mr_reduce_to_weights_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 mr_em_adapted_reduce_SOURCES = mr_em_adapted_reduce.cc
-mr_em_adapted_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+mr_em_adapted_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 plftools_SOURCES = plftools.cc
-plftools_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
-
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder
+plftools_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder -I$(top_srcdir)/utils -I$(top_srcdir)/mteval
diff --git a/training/atools.cc b/training/atools.cc
index af62804d..805e3c1d 100644
--- a/training/atools.cc
+++ b/training/atools.cc
@@ -9,6 +9,7 @@
 
 #include "filelib.h"
 #include "aligner.h"
+#include "alignment_pharaoh.h"
 
 namespace po = boost::program_options;
 using namespace std;
@@ -349,9 +350,9 @@ int main(int argc, char **argv) {
     }
     if (line1.empty() && !*in1) break;
     shared_ptr<Array2D<bool> > out(new Array2D<bool>);
-    shared_ptr<Array2D<bool> > a1 = AlignerTools::ReadPharaohAlignmentGrid(line1);
+    shared_ptr<Array2D<bool> > a1 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line1);
     if (in2) {
-      shared_ptr<Array2D<bool> > a2 = AlignerTools::ReadPharaohAlignmentGrid(line2);
+      shared_ptr<Array2D<bool> > a2 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line2);
       cmd.Apply(*a1, *a2, out.get());
     } else {
       Array2D<bool> dummy;
@@ -359,7 +360,7 @@ int main(int argc, char **argv) {
     }
     
     if (cmd.Result() == 1) {
-      AlignerTools::SerializePharaohFormat(*out, &cout);
+      AlignmentPharaoh::SerializePharaohFormat(*out, &cout);
     }
   }
   if (cmd.Result() == 2)
diff --git a/utils/Makefile.am b/utils/Makefile.am
new file mode 100644
index 00000000..e513febd
--- /dev/null
+++ b/utils/Makefile.am
@@ -0,0 +1,38 @@
+if HAVE_GTEST
+noinst_PROGRAMS = \
+  dict_test \
+  weights_test \
+  logval_test \
+  small_vector_test
+endif
+
+noinst_LIBRARIES = libutils.a
+
+libutils_a_SOURCES = \
+  alignment_pharaoh.cc \
+  b64tools.cc \
+  dict.cc \
+  tdict.cc \
+  fdict.cc \
+  gzstream.cc \
+  filelib.cc \
+  stringlib.cc \
+  sparse_vector.cc \
+  timing_stats.cc \
+  weights.cc
+
+dict_test_SOURCES = dict_test.cc
+dict_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS)
+weights_test_SOURCES = weights_test.cc
+weights_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS)
+logval_test_SOURCES = logval_test.cc
+logval_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS)
+small_vector_test_SOURCES = small_vector_test.cc
+small_vector_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS)
+
+AM_LDFLAGS = libutils.a -lz
+
+################################################################
+# do NOT NOT NOT add any other -I includes NO NO NO NO NO ######
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I.
+################################################################
diff --git a/utils/alignment_pharaoh.cc b/utils/alignment_pharaoh.cc
new file mode 100644
index 00000000..890ff565
--- /dev/null
+++ b/utils/alignment_pharaoh.cc
@@ -0,0 +1,77 @@
+#include "utils/alignment_pharaoh.h"
+
+#include <set>
+
+using namespace std;
+
+static bool is_digit(char x) { return x >= '0' && x <= '9'; }
+
+boost::shared_ptr<Array2D<bool> > AlignmentPharaoh::ReadPharaohAlignmentGrid(const string& al) {
+  int max_x = 0;
+  int max_y = 0;
+  int i = 0;
+  size_t pos = al.rfind(" ||| ");
+  if (pos != string::npos) { i = pos + 5; }
+  while (i < al.size()) {
+    if (al[i] == '\n' || al[i] == '\r') break;
+    int x = 0;
+    while(i < al.size() && is_digit(al[i])) {
+      x *= 10;
+      x += al[i] - '0';
+      ++i;
+    }
+    if (x > max_x) max_x = x;
+    assert(i < al.size());
+    if(al[i] != '-') {
+      cerr << "BAD ALIGNMENT: " << al << endl;
+      abort();
+    }
+    ++i;
+    int y = 0;
+    while(i < al.size() && is_digit(al[i])) {
+      y *= 10;
+      y += al[i] - '0';
+      ++i;
+    }
+    if (y > max_y) max_y = y;
+    while(i < al.size() && al[i] == ' ') { ++i; }
+  }
+
+  boost::shared_ptr<Array2D<bool> > grid(new Array2D<bool>(max_x + 1, max_y + 1));
+  i = 0;
+  if (pos != string::npos) { i = pos + 5; }
+  while (i < al.size()) {
+    if (al[i] == '\n' || al[i] == '\r') break;
+    int x = 0;
+    while(i < al.size() && is_digit(al[i])) {
+      x *= 10;
+      x += al[i] - '0';
+      ++i;
+    }
+    assert(i < al.size());
+    assert(al[i] == '-');
+    ++i;
+    int y = 0;
+    while(i < al.size() && is_digit(al[i])) {
+      y *= 10;
+      y += al[i] - '0';
+      ++i;
+    }
+    (*grid)(x, y) = true;
+    while(i < al.size() && al[i] == ' ') { ++i; }
+  }
+  // cerr << *grid << endl;
+  return grid;
+}
+
+void AlignmentPharaoh::SerializePharaohFormat(const Array2D<bool>& alignment, ostream* out) {
+  bool need_space = false;
+  for (int i = 0; i < alignment.width(); ++i)
+    for (int j = 0; j < alignment.height(); ++j)
+      if (alignment(i,j)) {
+        if (need_space) (*out) << ' '; else need_space = true;
+        (*out) << i << '-' << j;
+      }
+  (*out) << endl;
+}
+
diff --git a/utils/alignment_pharaoh.h b/utils/alignment_pharaoh.h
new file mode 100644
index 00000000..d111c8bf
--- /dev/null
+++ b/utils/alignment_pharaoh.h
@@ -0,0 +1,14 @@
+#ifndef _PHARAOH_ALIGNMENT_H_
+#define _PHARAOH_ALIGNMENT_H_
+
+#include <string>
+#include <iostream>
+#include <boost/shared_ptr.hpp>
+#include "array2d.h"
+
+struct AlignmentPharaoh {
+  static boost::shared_ptr<Array2D<bool> > ReadPharaohAlignmentGrid(const std::string& al);
+  static void SerializePharaohFormat(const Array2D<bool>& alignment, std::ostream* out);
+};
+
+#endif
diff --git a/utils/array2d.h b/utils/array2d.h
new file mode 100644
index 00000000..e63eda0d
--- /dev/null
+++ b/utils/array2d.h
@@ -0,0 +1,172 @@
+#ifndef ARRAY2D_H_
+#define ARRAY2D_H_
+
+#include <iostream>
+#include <algorithm>
+#include <cassert>
+#include <vector>
+#include <string>
+
+template<typename T>
+class Array2D {
+ public:
+  typedef typename std::vector<T>::reference reference;
+  typedef typename std::vector<T>::const_reference const_reference;
+  typedef typename std::vector<T>::iterator iterator;
+  typedef typename std::vector<T>::const_iterator const_iterator;
+  Array2D() : width_(0), height_(0) {}
+  Array2D(int w, int h, const T& d = T()) :
+    width_(w), height_(h), data_(w*h, d) {}
+  Array2D(const Array2D& rhs) :
+    width_(rhs.width_), height_(rhs.height_), data_(rhs.data_) {}
+  bool empty() const { return data_.empty(); }
+  void resize(int w, int h, const T& d = T()) {
+    data_.resize(w * h, d);
+    width_ = w;
+    height_ = h;
+  }
+  const Array2D& operator=(const Array2D& rhs) {
+    data_ = rhs.data_;
+    width_ = rhs.width_;
+    height_ = rhs.height_;
+    return *this;
+  }
+  void fill(const T& v) { data_.assign(data_.size(), v); }
+  int width() const { return width_; }
+  int height() const { return height_; }
+  reference operator()(int i, int j) {
+    return data_[offset(i, j)];
+  }
+  void clear() { data_.clear(); width_=0; height_=0; }
+  const_reference operator()(int i, int j) const {
+    return data_[offset(i, j)];
+  }
+  iterator begin_col(int j) {
+    return data_.begin() + offset(0,j);
+  }
+  const_iterator begin_col(int j) const {
+    return data_.begin() + offset(0,j);
+  }
+  iterator end_col(int j) {
+    return data_.begin() + offset(0,j) + width_;
+  }
+  const_iterator end_col(int j) const {
+    return data_.begin() + offset(0,j) + width_;
+  }
+  iterator end() { return data_.end(); }
+  const_iterator end() const { return data_.end(); }
+  const Array2D<T>& operator*=(const T& x) {
+    std::transform(data_.begin(), data_.end(), data_.begin(),
+        std::bind2nd(std::multiplies<T>(), x));
+  }
+  const Array2D<T>& operator/=(const T& x) {
+    std::transform(data_.begin(), data_.end(), data_.begin(),
+        std::bind2nd(std::divides<T>(), x));
+  }
+  const Array2D<T>& operator+=(const Array2D<T>& m) {
+    std::transform(m.data_.begin(), m.data_.end(), data_.begin(), data_.begin(), std::plus<T>());
+  }
+  const Array2D<T>& operator-=(const Array2D<T>& m) {
+    std::transform(m.data_.begin(), m.data_.end(), data_.begin(), data_.begin(), std::minus<T>());
+  }
+
+ private:
+  inline int offset(int i, int j) const {
+    assert(i<width_);
+    assert(j<height_);
+    return i + j * width_;
+  }
+
+  int width_;
+  int height_;
+
+  std::vector<T> data_;
+};
+
+template <typename T>
+Array2D<T> operator*(const Array2D<T>& l, const T& scalar) {
+  Array2D<T> res(l);
+  res *= scalar;
+  return res;
+}
+
+template <typename T>
+Array2D<T> operator*(const T& scalar, const Array2D<T>& l) {
+  Array2D<T> res(l);
+  res *= scalar;
+  return res;
+}
+
+template <typename T>
+Array2D<T> operator/(const Array2D<T>& l, const T& scalar) {
+  Array2D<T> res(l);
+  res /= scalar;
+  return res;
+}
+
+template <typename T>
+Array2D<T> operator+(const Array2D<T>& l, const Array2D<T>& r) {
+  Array2D<T> res(l);
+  res += r;
+  return res;
+}
+
+template <typename T>
+Array2D<T> operator-(const Array2D<T>& l, const Array2D<T>& r) {
+  Array2D<T> res(l);
+  res -= r;
+  return res;
+}
+
+template <typename T>
+inline std::ostream& operator<<(std::ostream& os, const Array2D<T>& m) {
+  for (int i=0; i<m.width(); ++i) {
+    for (int j=0; j<m.height(); ++j)
+      os << '\t' << m(i,j);
+    os << '\n';
+  }
+  return os;
+}
+
+inline std::ostream& operator<<(std::ostream& os, const Array2D<bool>& m) {
+  os << ' ';
+  for (int j=0; j<m.height(); ++j)
+    os << (j%10);
+  os << "\n";
+  for (int i=0; i<m.width(); ++i) {
+    os << (i%10);
+    for (int j=0; j<m.height(); ++j)
+      os << (m(i,j) ? '*' : '.');
+    os << (i%10) << "\n";
+  }
+  os << ' ';
+  for (int j=0; j<m.height(); ++j)
+    os << (j%10);
+  os << "\n";
+  return os;
+}
+
+inline std::ostream& operator<<(std::ostream& os, const Array2D<std::vector<bool> >& m) {
+  os << ' ';
+  for (int j=0; j<m.height(); ++j)
+    os << (j%10) << "\t";
+  os << "\n";
+  for (int i=0; i<m.width(); ++i) {
+    os << (i%10);
+    for (int j=0; j<m.height(); ++j) {
+      const std::vector<bool>& ar = m(i,j);
+      for (int k=0; k<ar.size(); ++k)
+        os << (ar[k] ? '*' : '.');
+    }
+    os << "\t";
+    os << (i%10) << "\n";
+  }
+  os << ' ';
+  for (int j=0; j<m.height(); ++j)
+    os << (j%10) << "\t";
+  os << "\n";
+  return os;
+}
+
+#endif
+
diff --git a/utils/b64tools.cc b/utils/b64tools.cc
new file mode 100644
index 00000000..5512f975
--- /dev/null
+++ b/utils/b64tools.cc
@@ -0,0 +1,59 @@
+#include <iostream>
+#include <cassert>
+
+using namespace std;
+
+namespace B64 {
+
+static const char cb64[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static const char cd64[]="|$$$}rstuvwxyz{$$$$$$$>?@ABCDEFGHIJKLMNOPQRSTUVW$$$$$$XYZ[\\]^_`abcdefghijklmnopq";
+
+static void encodeblock(const unsigned char* in, ostream* os, int len) {
+  char out[4];
+  out[0] = cb64[ in[0] >> 2 ];
+  out[1] = cb64[ ((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4) ];
+  out[2] = (len > 1 ? cb64[ ((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6) ] : '=');
+  out[3] = (len > 2 ? cb64[ in[2] & 0x3f ] : '=');
+  os->write(out, 4);
+}
+
+void b64encode(const char* data, const size_t size, ostream* out) {
+  size_t cur = 0;
+  while(cur < size) {
+    int len = min(static_cast<size_t>(3), size - cur);
+    encodeblock(reinterpret_cast<const unsigned char*>(&data[cur]), out, len);
+    cur += len;
+  }
+}
+
+static void decodeblock(const unsigned char* in, unsigned char* out) {
+  out[0] = (unsigned char ) (in[0] << 2 | in[1] >> 4);
+  out[1] = (unsigned char ) (in[1] << 4 | in[2] >> 2);
+  out[2] = (unsigned char ) (((in[2] << 6) & 0xc0) | in[3]);
+}
+
+bool b64decode(const unsigned char* data, const size_t insize, char* out, const size_t outsize) {
+  size_t cur = 0;
+  size_t ocur = 0;
+  unsigned char in[4];
+  while(cur < insize) {
+    assert(ocur < outsize);
+    for (int i = 0; i < 4; ++i) {
+      unsigned char v = data[cur];
+      v = (unsigned char) ((v < 43 || v > 122) ? '\0' : cd64[ v - 43 ]);
+      if (!v) {
+        cerr << "B64 decode error at offset " << cur << " offending character: " << (int)data[cur] << endl;
+        return false;
+      }
+      v = (unsigned char) ((v == '$') ? '\0' : v - 61);
+      if (v) in[i] = v - 1; else in[i] = 0;
+      ++cur;
+    }
+    decodeblock(in, reinterpret_cast<unsigned char*>(&out[ocur]));
+    ocur += 3;
+  }
+  return true;
+}
+
+}
+
diff --git a/utils/b64tools.h b/utils/b64tools.h
new file mode 100644
index 00000000..c821fc8f
--- /dev/null
+++ b/utils/b64tools.h
@@ -0,0 +1,9 @@
+#ifndef _B64_TOOLS_H_
+#define _B64_TOOLS_H_
+
+namespace B64 {
+  bool b64decode(const unsigned char* data, const size_t insize, char* out, const size_t outsize);
+  void b64encode(const char* data, const size_t size, std::ostream* out);
+}
+
+#endif
diff --git a/utils/dict.cc b/utils/dict.cc
new file mode 100644
index 00000000..2d6986c8
--- /dev/null
+++ b/utils/dict.cc
@@ -0,0 +1,27 @@
+#include "dict.h"
+
+#include <string>
+#include <vector>
+
+void TokenizeStringSeparator(
+          const std::string& str,
+          const std::string& separator,
+          std::vector<std::string>* tokens) {
+
+  size_t pos = 0;
+  std::string::size_type nextPos = str.find(separator, pos);
+
+  while (nextPos != std::string::npos) {
+    tokens->push_back(str.substr(pos, nextPos - pos));
+    pos = nextPos + separator.size();
+    nextPos = str.find(separator, pos);
+  }
+  tokens->push_back(str.substr(pos, nextPos - pos));
+}
+
+
+void Dict::AsVector(const WordID& id, std::vector<std::string>* results) const {
+  results->clear();
+  TokenizeStringSeparator(Convert(id), " ||| ", results);
+}
+
diff --git a/utils/dict.h b/utils/dict.h
new file mode 100644
index 00000000..348a97e3
--- /dev/null
+++ b/utils/dict.h
@@ -0,0 +1,66 @@
+#ifndef DICT_H_
+#define DICT_H_
+
+
+#include <cassert>
+#include <cstring>
+
+#include <string>
+#include <vector>
+#include "hash.h"
+#include "wordid.h"
+
+class Dict {
+ typedef
+ HASH_MAP<std::string, WordID, boost::hash<std::string> > Map;
+ public:
+  Dict() : b0_("<bad0>") {
+    HASH_MAP_EMPTY(d_,"<bad1>");
+    words_.reserve(1000);
+  }
+
+  inline int max() const { return words_.size(); }
+
+  inline WordID Convert(const std::string& word, bool frozen = false) {
+    Map::iterator i = d_.find(word);
+    if (i == d_.end()) {
+      if (frozen)
+        return 0;
+      words_.push_back(word);
+      d_[word] = words_.size();
+      return words_.size();
+    } else {
+      return i->second;
+    }
+  }
+
+  inline WordID Convert(const std::vector<std::string>& words, bool frozen = false)
+  { return Convert(toString(words), frozen); }
+
+  static inline std::string toString(const std::vector<std::string>& words) {
+    std::string word= "";
+    for (std::vector<std::string>::const_iterator it=words.begin();
+         it != words.end(); ++it) {
+      if (it != words.begin()) word += " ||| ";
+      word += *it;
+    }
+    return word;
+  }
+
+  inline const std::string& Convert(const WordID& id) const {
+    if (id == 0) return b0_;
+    assert(id <= (int)words_.size());
+    return words_[id-1];
+  }
+
+  void AsVector(const WordID& id, std::vector<std::string>* results) const;
+
+  void clear() { words_.clear(); d_.clear(); }
+
+ private:
+  const std::string b0_;
+  std::vector<std::string> words_;
+  Map d_;
+};
+
+#endif
diff --git a/utils/dict_test.cc b/utils/dict_test.cc
new file mode 100644
index 00000000..2049ec27
--- /dev/null
+++ b/utils/dict_test.cc
@@ -0,0 +1,47 @@
+#include "dict.h"
+
+#include "fdict.h"
+
+#include <iostream>
+#include <gtest/gtest.h>
+#include <cassert>
+
+using namespace std;
+
+class DTest : public testing::Test {
+ public:
+  DTest() {}
+ protected:
+  virtual void SetUp() { }
+  virtual void TearDown() { }
+};
+
+TEST_F(DTest, Convert) {
+  Dict d;
+  WordID a = d.Convert("foo");
+  WordID b = d.Convert("bar");
+  std::string x = "foo";
+  WordID c = d.Convert(x);
+  EXPECT_NE(a, b);
+  EXPECT_EQ(a, c);
+  EXPECT_EQ(d.Convert(a), "foo");
+  EXPECT_EQ(d.Convert(b), "bar");
+}
+
+TEST_F(DTest, FDictTest) {
+  int fid = FD::Convert("First");
+  EXPECT_GT(fid, 0);
+  EXPECT_EQ(FD::Convert(fid), "First");
+  string x = FD::Escape("=");
+  cerr << x << endl;
+  EXPECT_NE(x, "=");
+  x = FD::Escape(";");
+  cerr << x << endl;
+  EXPECT_NE(x, ";");
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
+
diff --git a/utils/fdict.cc b/utils/fdict.cc
new file mode 100644
index 00000000..baa0b552
--- /dev/null
+++ b/utils/fdict.cc
@@ -0,0 +1,143 @@
+#include "fdict.h"
+#include "stdlib.h"
+//for malloc (need on cygwin); todo <cstdlib> and std::malloc
+#include <string>
+#include <sstream>
+
+using namespace std;
+
+Dict FD::dict_;
+bool FD::frozen_ = false;
+
+std::string FD::Convert(std::vector<WordID> const& v) {
+    return Convert(&*v.begin(),&*v.end());
+}
+
+std::string FD::Convert(WordID const *b,WordID const* e) {
+  ostringstream o;
+  for (WordID const* i=b;i<e;++i) {
+    if (i>b) o << ' ';
+    o << FD::Convert(*i);
+  }
+  return o.str();
+}
+
+static int HexPairValue(const char * code) {
+  int value = 0;
+  const char * pch = code;
+  for (;;) {
+    int digit = *pch++;
+    if (digit >= '0' && digit <= '9') {
+      value += digit - '0';
+    }
+    else if (digit >= 'A' && digit <= 'F') {
+      value += digit - 'A' + 10;
+    }
+    else if (digit >= 'a' && digit <= 'f') {
+      value += digit - 'a' + 10;
+    }
+    else {
+      return -1;
+    }
+    if (pch == code + 2)
+      return value;
+    value <<= 4;
+  }
+}
+
+int UrlDecode(const char *source, char *dest)
+{
+  char * start = dest;
+
+  while (*source) {
+    switch (*source) {
+    case '+':
+      *(dest++) = ' ';
+      break;
+    case '%':
+      if (source[1] && source[2]) {
+        int value = HexPairValue(source + 1);
+        if (value >= 0) {
+          *(dest++) = value;
+          source += 2;
+        }
+        else {
+          *dest++ = '?';
+        }
+      }
+      else {
+        *dest++ = '?';
+      }
+      break;
+    default:
+      *dest++ = *source;
+    }
+    source++;
+  }
+
+  *dest = 0;
+  return dest - start;
+}
+
+int UrlEncode(const char *source, char *dest, unsigned max) {
+  static const char *digits = "0123456789ABCDEF";
+  unsigned char ch;
+  unsigned len = 0;
+  char *start = dest;
+
+  while (len < max - 4 && *source)
+  {
+    ch = (unsigned char)*source;
+    if (*source == ' ') {
+      *dest++ = '+';
+    }
+    else if (strchr("=:;,_| %", ch)) {
+      *dest++ = '%';
+      *dest++ = digits[(ch >> 4) & 0x0F];
+      *dest++ = digits[       ch & 0x0F];
+    }
+    else {
+      *dest++ = *source;
+    }
+    source++;
+  }
+  *dest = 0;
+  return start - dest;
+}
+
+std::string UrlDecodeString(const std::string & encoded) {
+  const char * sz_encoded = encoded.c_str();
+  size_t needed_length = encoded.length();
+  for (const char * pch = sz_encoded; *pch; pch++) {
+    if (*pch == '%')
+      needed_length += 2;
+  }
+  needed_length += 10;
+  char stackalloc[64];
+  char * buf = needed_length > sizeof(stackalloc)/sizeof(*stackalloc) ?
+    (char *)malloc(needed_length) : stackalloc;
+  UrlDecode(encoded.c_str(), buf);
+  std::string result(buf);
+  if (buf != stackalloc) {
+    free(buf);
+  }
+  return result;
+}
+
+std::string UrlEncodeString(const std::string & decoded) {
+  size_t needed_length = decoded.length() * 3 + 3;
+  char stackalloc[64];
+  char * buf = needed_length > sizeof(stackalloc)/sizeof(*stackalloc) ?
+    (char *)malloc(needed_length) : stackalloc;
+  UrlEncode(decoded.c_str(), buf, needed_length);
+  std::string result(buf);
+  if (buf != stackalloc) {
+    free(buf);
+  }
+  return result;
+}
+
+string FD::Escape(const string& s) {
+  return UrlEncodeString(s);
+}
+
diff --git a/utils/fdict.h b/utils/fdict.h
new file mode 100644
index 00000000..f9673023
--- /dev/null
+++ b/utils/fdict.h
@@ -0,0 +1,34 @@
+#ifndef _FDICT_H_
+#define _FDICT_H_
+
+#include <string>
+#include <vector>
+#include "dict.h"
+
+struct FD {
+  // once the FD is frozen, new features not already in the
+  // dictionary will return 0
+  static void Freeze() {
+    frozen_ = true;
+  }
+  static inline int NumFeats() {
+    return dict_.max() + 1;
+  }
+  static inline WordID Convert(const std::string& s) {
+    return dict_.Convert(s, frozen_);
+  }
+  static inline const std::string& Convert(const WordID& w) {
+    return dict_.Convert(w);
+  }
+  static std::string Convert(WordID const *i,WordID const* e);
+  static std::string Convert(std::vector<WordID> const& v);
+
+  // Escape any string to a form that can be used as the name
+  // of a weight in a weights file
+  static std::string Escape(const std::string& s);
+  static Dict dict_;
+ private:
+  static bool frozen_;
+};
+
+#endif
diff --git a/utils/feature_accum.h b/utils/feature_accum.h
new file mode 100755
index 00000000..851b29db
--- /dev/null
+++ b/utils/feature_accum.h
@@ -0,0 +1,129 @@
+#ifndef FEATURE_ACCUM_H
+#define FEATURE_ACCUM_H
+
+#include "ff.h"
+#include "sparse_vector.h"
+#include "value_array.h"
+
+struct SparseFeatureAccumulator : public FeatureVector {
+  typedef FeatureVector State;
+  SparseFeatureAccumulator() {  }
+  template <class FF>
+  FeatureVector const& describe(FF const& ) { return *this; }
+  void Store(FeatureVector *fv) const {
+    fv->set_from(*this);
+  }
+  template <class FF>
+  void Store(FF const& /* ff */,FeatureVector *fv) const {
+    fv->set_from(*this);
+  }
+  template <class FF>
+  void Add(FF const& /* ff */,FeatureVector const& fv) {
+    (*this)+=fv;
+  }
+  void Add(FeatureVector const& fv) {
+    (*this)+=fv;
+  }
+  /*
+  SparseFeatureAccumulator(FeatureVector const& fv) : State(fv) {}
+  FeatureAccumulator(Features const& fids) {}
+  FeatureAccumulator(Features const& fids,FeatureVector const& fv) : State(fv) {}
+  void Add(Features const& fids,FeatureVector const& fv) {
+    *this += fv;
+  }
+  */
+  void Add(int i,Featval v) {
+    (*this)[i]+=v;
+  }
+  void Add(Features const& fids,int i,Featval v) {
+    (*this)[i]+=v;
+  }
+};
+
+struct SingleFeatureAccumulator {
+  typedef Featval State;
+  typedef SingleFeatureAccumulator Self;
+  State v;
+  /*
+  void operator +=(State const& o) {
+    v+=o;
+  }
+  */
+  void operator +=(Self const& s) {
+    v+=s.v;
+  }
+  SingleFeatureAccumulator() : v() {}
+  template <class FF>
+  State const& describe(FF const& ) const { return v; }
+
+  template <class FF>
+  void Store(FF const& ff,FeatureVector *fv) const {
+    fv->set_value(ff.fid_,v);
+  }
+  void Store(Features const& fids,FeatureVector *fv) const {
+    assert(fids.size()==1);
+    fv->set_value(fids[0],v);
+  }
+  /*
+  SingleFeatureAccumulator(Features const& fids) { assert(fids.size()==1); }
+  SingleFeatureAccumulator(Features const& fids,FeatureVector const& fv)
+  {
+    assert(fids.size()==1);
+    v=fv.get_singleton();
+  }
+  */
+
+  template <class FF>
+  void Add(FF const& ff,FeatureVector const& fv) {
+    v+=fv.get(ff.fid_);
+  }
+  void Add(FeatureVector const& fv) {
+    v+=fv.get_singleton();
+  }
+
+  void Add(Features const& fids,FeatureVector const& fv) {
+    v += fv.get(fids[0]);
+  }
+  void Add(Featval dv) {
+    v+=dv;
+  }
+  void Add(int,Featval dv) {
+    v+=dv;
+  }
+  void Add(FeatureVector const& fids,int i,Featval dv) {
+    assert(fids.size()==1 && i==0);
+    v+=dv;
+  }
+};
+
+
+#if 0
+// omitting this so we can default construct an accum.  might be worth resurrecting in the future
+struct ArrayFeatureAccumulator : public ValueArray<Featval> {
+  typedef ValueArray<Featval> State;
+  template <class Fsa>
+  ArrayFeatureAccumulator(Fsa const& fsa) : State(fsa.features_.size()) { }
+  ArrayFeatureAccumulator(Features const& fids) : State(fids.size()) {  }
+  ArrayFeatureAccumulator(Features const& fids) : State(fids.size()) {  }
+  ArrayFeatureAccumulator(Features const& fids,FeatureVector const& fv) : State(fids.size()) {
+    for (int i=0,e=i<fids.size();i<e;++i)
+      (*this)[i]=fv.get(i);
+  }
+  State const& describe(Features const& fids) const { return *this; }
+  void Store(Features const& fids,FeatureVector *fv) const {
+    assert(fids.size()==size());
+    for (int i=0,e=i<fids.size();i<e;++i)
+      fv->set_value(fids[i],(*this)[i]);
+  }
+  void Add(Features const& fids,FeatureVector const& fv) {
+    for (int i=0,e=i<fids.size();i<e;++i)
+      (*this)[i]+=fv.get(i);
+  }
+  void Add(FeatureVector const& fids,int i,Featval v) {
+    (*this)[i]+=v;
+  }
+};
+#endif
+
+
+#endif
diff --git a/utils/feature_vector.h b/utils/feature_vector.h
new file mode 100755
index 00000000..be378a6a
--- /dev/null
+++ b/utils/feature_vector.h
@@ -0,0 +1,18 @@
+#ifndef _FEATURE_VECTOR_H_
+#define _FEATURE_VECTOR_H_
+
+#include <vector>
+#include "sparse_vector.h"
+#include "fdict.h"
+
+typedef double Featval;
+typedef SparseVectorList<Featval> FeatureVectorList;
+typedef SparseVector<Featval> FeatureVector;
+typedef SparseVector<Featval> WeightVector;
+typedef std::vector<Featval> DenseWeightVector;
+
+inline void sparse_to_dense(WeightVector const& wv,DenseWeightVector *dv) {
+  wv.init_vector(dv);
+}
+
+#endif
diff --git a/utils/filelib.cc b/utils/filelib.cc
new file mode 100644
index 00000000..79ad2847
--- /dev/null
+++ b/utils/filelib.cc
@@ -0,0 +1,22 @@
+#include "filelib.h"
+
+#include <unistd.h>
+#include <sys/stat.h>
+
+using namespace std;
+
+bool FileExists(const std::string& fn) {
+  struct stat info;
+  int s = stat(fn.c_str(), &info);
+  return (s==0);
+}
+
+bool DirectoryExists(const string& dir) {
+  if (access(dir.c_str(),0) == 0) {
+    struct stat status;
+    stat(dir.c_str(), &status);
+    if (status.st_mode & S_IFDIR) return true;
+  }
+  return false;
+}
+
diff --git a/utils/filelib.h b/utils/filelib.h
new file mode 100644
index 00000000..b9fef9a7
--- /dev/null
+++ b/utils/filelib.h
@@ -0,0 +1,106 @@
+#ifndef _FILELIB_H_
+#define _FILELIB_H_
+
+#include <cassert>
+#include <string>
+#include <iostream>
+#include <cstdlib>
+#include <boost/shared_ptr.hpp>
+#include <stdexcept>
+#include "gzstream.h"
+#include "null_deleter.h"
+
+bool FileExists(const std::string& file_name);
+bool DirectoryExists(const std::string& dir_name);
+
+// reads from standard in if filename is -
+// uncompresses if file ends with .gz
+// otherwise, reads from a normal file
+
+template <class Stream>
+struct BaseFile {
+  typedef Stream S;
+  typedef boost::shared_ptr<Stream> PS;
+  void Reset() {
+    ps_.reset();
+  }
+  bool is_null() const { return !ps_; }
+  operator bool() const {
+    return ps_;
+  }
+  S* stream() { return ps_.get(); }
+  S* operator->() { return ps_.get(); } // compat with old ReadFile * -> new Readfile. remove?
+  S &operator *() const { return get(); }
+  S &get() const { return *ps_; }
+  bool is_std() {
+    return filename_=="-";
+  }
+  std::string filename_;
+protected:
+  void error(std::string const& reason,std::string const& filename) {
+    throw std::runtime_error("File "+filename+" - "+reason);
+  }
+
+  PS ps_;
+  static bool EndsWith(const std::string& f, const std::string& suf) {
+    return (f.size() > suf.size()) && (f.rfind(suf) == f.size() - suf.size());
+  }
+};
+
+class ReadFile : public BaseFile<std::istream> {
+ public:
+  ReadFile() {  }
+  explicit ReadFile(const std::string& filename) {
+    Init(filename);
+  }
+  void Init(const std::string& filename) {
+    filename_=filename;
+    if (is_std()) {
+      ps_=PS(&std::cin,null_deleter());
+    } else {
+      if (!FileExists(filename)) {
+        std::cerr << "File does not exist: " << filename << std::endl;
+        error(filename," couldn't read nonexistant file.");
+        abort();
+      }
+      char const* file=filename_.c_str(); // just in case the gzstream keeps using the filename for longer than the constructor, e.g. inflateReset2.  warning in valgrind that I'm hoping will disappear - it makes no sense.
+      ps_=PS(EndsWith(filename, ".gz") ?
+                static_cast<std::istream*>(new igzstream(file)) :
+             static_cast<std::istream*>(new std::ifstream(file)));
+      if (!*ps_) {
+        std::cerr << "Failed to open " << filename << std::endl;
+        error(filename," open for reading failed.");
+        abort();
+      }
+    }
+  }
+
+};
+
+class WriteFile : public BaseFile<std::ostream> {
+ public:
+  WriteFile() {}
+  explicit WriteFile(std::string const& filename) { Init(filename); }
+  void Init(const std::string& filename) {
+    filename_=filename;
+    if (is_std()) {
+      ps_=PS(&std::cout,null_deleter());
+    } else {
+      char const* file=filename_.c_str(); // just in case the gzstream keeps using the filename for longer than the constructor, e.g. inflateReset2.  warning in valgrind that I'm hoping will disappear - it makes no sense.
+      ps_=PS(EndsWith(filename, ".gz") ?
+                static_cast<std::ostream*>(new ogzstream(file)) :
+                static_cast<std::ostream*>(new std::ofstream(file)));
+      if (!*ps_) {
+        std::cerr << "Failed to open " << filename << std::endl;
+        error(filename," open for writing failed.");
+        abort();
+      }
+    }
+  }
+  ~WriteFile() {
+    if (ps_)
+      get() << std::flush;
+  }
+};
+
+#endif
diff --git a/utils/gzstream.cc b/utils/gzstream.cc
new file mode 100644
index 00000000..88cd1bd2
--- /dev/null
+++ b/utils/gzstream.cc
@@ -0,0 +1,182 @@
+// ============================================================================
+// gzstream, C++ iostream classes wrapping the zlib compression library.
+// Copyright (C) 2001  Deepak Bandyopadhyay, Lutz Kettner
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+// ============================================================================
+//
+// File          : gzstream.C
+// Revision      : $Revision: 1.7 $
+// Revision_date : $Date: 2003/01/08 14:41:27 $
+// Author(s)     : Deepak Bandyopadhyay, Lutz Kettner
+//
+// Standard streambuf implementation following Nicolai Josuttis, "The
+// Standard C++ Library".
+// ============================================================================
+
+#include <gzstream.h>
+#include <iostream>
+#include <cstring>  // for memcpy
+#include <stdexcept>
+
+#ifdef GZSTREAM_NAMESPACE
+namespace GZSTREAM_NAMESPACE {
+#endif
+
+// ----------------------------------------------------------------------------
+// Internal classes to implement gzstream. See header file for user classes.
+// ----------------------------------------------------------------------------
+
+// --------------------------------------
+// class gzstreambuf:
+// --------------------------------------
+
+gzstreambuf* gzstreambuf::open( const char* name, int open_mode) {
+    if ( is_open())
+        return (gzstreambuf*)0;
+    mode = open_mode;
+    // no append nor read/write mode
+    if ((mode & std::ios::ate) || (mode & std::ios::app)
+        || ((mode & std::ios::in) && (mode & std::ios::out)))
+        return (gzstreambuf*)0;
+    const int Nmode=10;
+    char  fmode[Nmode];
+    char* fmodeptr = fmode;
+    if ( mode & std::ios::in)
+        *fmodeptr++ = 'r';
+    else if ( mode & std::ios::out)
+        *fmodeptr++ = 'w';
+    *fmodeptr++ = 'b';
+    while (fmodeptr<fmode+Nmode) // hopefully wil help valgrind
+      *fmodeptr++ = '\0';
+    file = gzopen( name, fmode);
+    if (!file) handle_gzerror();
+    if (file == 0)
+        return (gzstreambuf*)0;
+    opened = 1;
+    return this;
+}
+
+gzstreambuf * gzstreambuf::close() {
+    if ( is_open()) {
+        sync();
+        opened = 0;
+        if ( gzclose( file) == Z_OK)
+            return this;
+        else
+            handle_gzerror();
+    }
+    return (gzstreambuf*)0;
+}
+
+void gzstreambuf::handle_gzerror() {
+    int errnum;
+    const char *errmsg=gzerror(file,&errnum);
+    if (errnum==Z_DATA_ERROR) errmsg="CRC error reading gzip";
+    throw std::runtime_error(std::string("gzstreambuf error: ")+errmsg);
+}
+
+int gzstreambuf::underflow() { // used for input buffer only
+    if ( gptr() && ( gptr() < egptr()))
+        return * reinterpret_cast<unsigned char *>( gptr());
+
+    if ( ! (mode & std::ios::in) || ! opened)
+        return EOF;
+    // Josuttis' implementation of inbuf
+    int n_putback = gptr() - eback();
+    if ( n_putback > 4)
+        n_putback = 4;
+    std::memcpy( buffer + (4 - n_putback), gptr() - n_putback, n_putback);
+
+    int num = gzread( file, buffer+4, bufferSize-4);
+    if (num <= 0) // ERROR or EOF
+    {
+        if (gzeof(file))
+            return EOF;
+        handle_gzerror();
+    }
+
+    // reset buffer pointers
+    setg( buffer + (4 - n_putback),   // beginning of putback area
+          buffer + 4,                 // read position
+          buffer + 4 + num);          // end of buffer
+
+    // return next character
+    return * reinterpret_cast<unsigned char *>( gptr());
+}
+
+int gzstreambuf::flush_buffer() {
+    // Separate the writing of the buffer from overflow() and
+    // sync() operation.
+    int w = pptr() - pbase();
+    if ( gzwrite( file, pbase(), w) != w)
+        handle_gzerror();
+    pbump( -w);
+    return w;
+}
+
+int gzstreambuf::overflow( int c) { // used for output buffer only
+    if ( ! ( mode & std::ios::out) || ! opened)
+        return EOF;
+    if (c != EOF) {
+        *pptr() = c;
+        pbump(1);
+    }
+    if ( flush_buffer() == EOF)
+        return EOF;
+    return c;
+}
+
+int gzstreambuf::sync() {
+    // Changed to use flush_buffer() instead of overflow( EOF)
+    // which caused improper behavior with std::endl and flush(),
+    // bug reported by Vincent Ricard.
+    if ( pptr() && pptr() > pbase()) {
+        if ( flush_buffer() == EOF)
+            return -1;
+    }
+    return 0;
+}
+
+// --------------------------------------
+// class gzstreambase:
+// --------------------------------------
+
+gzstreambase::gzstreambase( const char* name, int mode) {
+    init( &buf);
+    open( name, mode);
+}
+
+gzstreambase::~gzstreambase() {
+    buf.close();
+}
+
+void gzstreambase::open( const char* name, int open_mode) {
+    if ( ! buf.open( name, open_mode))
+        clear( rdstate() | std::ios::badbit);
+}
+
+void gzstreambase::close() {
+    if ( buf.is_open())
+        if ( ! buf.close())
+            clear( rdstate() | std::ios::badbit);
+}
+
+#ifdef GZSTREAM_NAMESPACE
+} // namespace GZSTREAM_NAMESPACE
+#endif
+
+// ============================================================================
+// EOF //
diff --git a/utils/gzstream.h b/utils/gzstream.h
new file mode 100644
index 00000000..a7effd90
--- /dev/null
+++ b/utils/gzstream.h
@@ -0,0 +1,127 @@
+// ============================================================================
+// gzstream, C++ iostream classes wrapping the zlib compression library.
+// Copyright (C) 2001  Deepak Bandyopadhyay, Lutz Kettner
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+// ============================================================================
+//
+// File          : gzstream.h
+// Revision      : $Revision: 1.5 $
+// Revision_date : $Date: 2002/04/26 23:30:15 $
+// Author(s)     : Deepak Bandyopadhyay, Lutz Kettner
+//
+// Standard streambuf implementation following Nicolai Josuttis, "The
+// Standard C++ Library".
+// ============================================================================
+
+#ifndef GZSTREAM_H
+#define GZSTREAM_H 1
+
+// standard C++ with new header file names and std:: namespace
+#include <iostream>
+#include <fstream>
+#include <zlib.h>
+
+#ifdef GZSTREAM_NAMESPACE
+namespace GZSTREAM_NAMESPACE {
+#endif
+
+// ----------------------------------------------------------------------------
+// Internal classes to implement gzstream. See below for user classes.
+// ----------------------------------------------------------------------------
+
+class gzstreambuf : public std::streambuf {
+private:
+  static const int bufferSize = 47+(1024*256);    // size of data buff
+    // totals 512 bytes under g++ for igzstream at the end.
+
+    gzFile           file;               // file handle for compressed file
+    char             buffer[bufferSize]; // data buffer
+    char             opened;             // open/close state of stream
+    int              mode;               // I/O mode
+
+    int flush_buffer();
+    void handle_gzerror(); // throws exception
+public:
+#if defined(_WIN32) && !defined(CYGWIN) && !defined(EOF)
+	enum {
+		EOF = -1
+	};
+#endif
+    gzstreambuf() : opened(0) {
+        setp( buffer, buffer + (bufferSize-1));
+        setg( buffer + 4,     // beginning of putback area
+              buffer + 4,     // read position
+              buffer + 4);    // end position
+        // ASSERT: both input & output capabilities will not be used together
+    }
+    int is_open() { return opened; }
+    gzstreambuf* open( const char* name, int open_mode);
+    gzstreambuf* close();
+    ~gzstreambuf() { close(); }
+
+    virtual int     overflow( int c = EOF);
+    virtual int     underflow();
+    virtual int     sync();
+};
+
+class gzstreambase : virtual public std::ios {
+protected:
+    gzstreambuf buf;
+public:
+    gzstreambase() { init(&buf); }
+    gzstreambase( const char* name, int open_mode);
+    ~gzstreambase();
+    void open( const char* name, int open_mode);
+    void close();
+    gzstreambuf* rdbuf() { return &buf; }
+};
+
+// ----------------------------------------------------------------------------
+// User classes. Use igzstream and ogzstream analogously to ifstream and
+// ofstream respectively. They read and write files based on the gz*
+// function interface of the zlib. Files are compatible with gzip compression.
+// ----------------------------------------------------------------------------
+
+class igzstream : public gzstreambase, public std::istream {
+public:
+    igzstream() : std::istream( &buf) {}
+    igzstream( const char* name, int open_mode = std::ios::in)
+        : gzstreambase( name, std::ios::in | open_mode), std::istream( &buf) {}
+    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
+    void open( const char* name, int open_mode = std::ios::in) {
+        gzstreambase::open( name, open_mode);
+    }
+};
+
+class ogzstream : public gzstreambase, public std::ostream {
+public:
+    ogzstream() : std::ostream( &buf) {}
+    ogzstream( const char* name, int mode = std::ios::out)
+        : gzstreambase( name, mode), std::ostream( &buf) {}
+    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
+    void open( const char* name, int open_mode = std::ios::out) {
+        gzstreambase::open( name, open_mode);
+    }
+};
+
+#ifdef GZSTREAM_NAMESPACE
+} // namespace GZSTREAM_NAMESPACE
+#endif
+
+#endif // GZSTREAM_H
+// ============================================================================
+// EOF //
+
diff --git a/utils/hash.h b/utils/hash.h
new file mode 100755
index 00000000..3a60a429
--- /dev/null
+++ b/utils/hash.h
@@ -0,0 +1,54 @@
+#ifndef CDEC_HASH_H
+#define CDEC_HASH_H
+
+#include "murmur_hash.h"
+
+#include "config.h"
+#ifdef HAVE_SPARSEHASH
+# include <google/dense_hash_map>
+# define HASH_MAP google::dense_hash_map
+# define HASH_MAP_RESERVED(h,empty,deleted) do { h.set_empty_key(empty); h.set_deleted_key(deleted); } while(0)
+# define HASH_MAP_EMPTY(h,empty) do { h.set_empty_key(empty); } while(0)
+#else
+# include <tr1/unordered_map>
+# define HASH_MAP std::tr1::unordered_map
+# define HASH_MAP_RESERVED(h,empty,deleted)
+# define HASH_MAP_EMPTY(h,empty)
+#endif
+
+#include <boost/functional/hash.hpp>
+
+// assumes C is POD
+template <class C>
+struct murmur_hash
+{
+  typedef MurmurInt return_type;
+  typedef C /*const&*/ argument_type;
+  return_type operator()(argument_type const& c) const {
+    return MurmurHash((void*)&c,sizeof(c));
+  }
+};
+
+// murmur_hash_array isn't std guaranteed safe (you need to use string::data())
+template <>
+struct murmur_hash<std::string>
+{
+  typedef MurmurInt return_type;
+  typedef std::string /*const&*/ argument_type;
+  return_type operator()(argument_type const& c) const {
+    return MurmurHash(c.data(),c.size());
+  }
+};
+
+// uses begin(),size() assuming contiguous layout and POD
+template <class C>
+struct murmur_hash_array
+{
+  typedef MurmurInt return_type;
+  typedef C /*const&*/ argument_type;
+  return_type operator()(argument_type const& c) const {
+    return MurmurHash(&*c.begin(),c.size()*sizeof(*c.begin()));
+  }
+};
+
+#endif
diff --git a/utils/have_64_bits.h b/utils/have_64_bits.h
new file mode 100755
index 00000000..d1e6064f
--- /dev/null
+++ b/utils/have_64_bits.h
@@ -0,0 +1,17 @@
+#ifndef HAVE_64_BITS_H
+#define HAVE_64_BITS_H
+
+#include <stdint.h>
+
+#undef HAVE_64_BITS
+
+#if INTPTR_MAX == INT32_MAX
+# define HAVE_64_BITS 0
+#elif INTPTR_MAX >= INT64_MAX
+# define HAVE_64_BITS 1
+#else
+# error "couldn't tell if HAVE_64_BITS from INTPTR_MAX INT32_MAX INT64_MAX"
+#endif
+
+
+#endif
diff --git a/utils/int_or_pointer.h b/utils/int_or_pointer.h
new file mode 100755
index 00000000..4b6a9e4a
--- /dev/null
+++ b/utils/int_or_pointer.h
@@ -0,0 +1,70 @@
+#ifndef INT_OR_POINTER_H
+#define INT_OR_POINTER_H
+
+// if you ever wanted to store a discriminated union of pointer/integer without an extra boolean flag, this will do it, assuming your pointers are never odd.
+
+// check lsb for expected tag?
+#ifndef IOP_CHECK_LSB
+# define IOP_CHECK_LSB 1
+#endif
+#if IOP_CHECK_LSB
+# define iop_assert(x) assert(x)
+#else
+# define iop_assert(x)
+#endif
+
+#include <assert.h>
+#include <iostream>
+
+template <class Pointed=void,class Int=size_t>
+struct IntOrPointer {
+  typedef Pointed pointed_type;
+  typedef Int integer_type;
+  typedef Pointed *value_type;
+  typedef IntOrPointer<Pointed,Int> self_type;
+  IntOrPointer(int j) { *this=j; }
+  IntOrPointer(size_t j) { *this=j; }
+  IntOrPointer(value_type v) { *this=v; }
+  bool is_integer() const { return i&1; }
+  bool is_pointer() const { return !(i&1); }
+  value_type & pointer() { return p; }
+  const value_type & pointer() const { iop_assert(is_pointer()); return p; }
+  integer_type integer() const { iop_assert(is_integer()); return i >> 1; }
+  void set_integer(Int j) { i=2*j+1; }
+  void set_pointer(value_type p_) { p=p_;iop_assert(is_pointer()); }
+  void operator=(unsigned j) { i = 2*(integer_type)j+1; }
+  void operator=(int j) { i = 2*(integer_type)j+1; }
+  template <class C>
+  void operator=(C j) { i = 2*(integer_type)j+1; }
+  void operator=(value_type v) { p=v; }
+  IntOrPointer() {}
+  IntOrPointer(const self_type &s) : p(s.p) {}
+  void operator=(const self_type &s) { p=s.p; }
+  template <class C>
+  bool operator ==(C* v) const { return p==v; }
+  template <class C>
+  bool operator ==(const C* v) const { return p==v; }
+  template <class C>
+  bool operator ==(C j) const { return integer() == j; }
+  bool operator ==(self_type s) const { return p==s.p; }
+  bool operator !=(self_type s) const { return p!=s.p; }
+  template <class O> void print(O&o) const
+  {
+    if (is_integer())
+      o << integer();
+    else {
+      o << "0x" << std::hex << (size_t)pointer() << std::dec;
+    }
+  }
+  friend inline std::ostream& operator<<(std::ostream &o,self_type const& s) {
+    s.print(o); return o;
+  }
+protected:
+  union {
+    value_type p; // must be even (guaranteed unless you're pointing at packed chars)
+    integer_type i; // stored as 2*data+1, so only has half the range (one less bit) of a normal integer_type
+  };
+};
+
+
+#endif
diff --git a/utils/intrusive_refcount.hpp b/utils/intrusive_refcount.hpp
new file mode 100755
index 00000000..4a4b0187
--- /dev/null
+++ b/utils/intrusive_refcount.hpp
@@ -0,0 +1,84 @@
+#ifndef GRAEHL__SHARED__INTRUSIVE_REFCOUNT_HPP
+#define GRAEHL__SHARED__INTRUSIVE_REFCOUNT_HPP
+
+#include <boost/intrusive_ptr.hpp>
+#include <boost/noncopyable.hpp>
+#include <boost/detail/atomic_count.hpp>
+#include <cassert>
+
+/** usage:
+    struct mine : public boost::instrusive_refcount<mine> {};
+
+    boost::intrusive_ptr<mine> p(new mine());
+*/
+
+namespace boost {
+// note: the free functions need to be in boost namespace, OR namespace of involved type.  this is the only way to do it.
+
+template <class T>
+class intrusive_refcount;
+
+template <class T>
+class atomic_intrusive_refcount;
+
+template<class T>
+void intrusive_ptr_add_ref(intrusive_refcount<T>* ptr)
+{
+    ++(ptr->refs);
+}
+
+template<class T>
+void intrusive_ptr_release(intrusive_refcount<T>* ptr)
+{
+    if (!--(ptr->refs)) delete static_cast<T*>(ptr);
+}
+
+
+//WARNING: only 2^32 (unsigned) refs allowed.  hope that's ok :)
+template<class T>
+class intrusive_refcount : boost::noncopyable
+{
+ protected:
+//    typedef intrusive_refcount<T> pointed_type;
+    friend void intrusive_ptr_add_ref<T>(intrusive_refcount<T>* ptr);
+    friend void intrusive_ptr_release<T>(intrusive_refcount<T>* ptr);
+//    friend class intrusive_ptr<T>;
+
+    intrusive_refcount(): refs(0) {}
+    ~intrusive_refcount() { assert(refs==0); }
+
+private:
+    unsigned refs;
+};
+
+
+template<class T>
+void intrusive_ptr_add_ref(atomic_intrusive_refcount<T>* ptr)
+{
+    ++(ptr->refs);
+}
+
+template<class T>
+void intrusive_ptr_release(atomic_intrusive_refcount<T>* ptr)
+{
+    if(!--(ptr->refs)) delete static_cast<T*>(ptr);
+}
+
+template<class T>
+class atomic_intrusive_refcount : boost::noncopyable
+{
+ protected:
+    friend void intrusive_ptr_add_ref<T>(atomic_intrusive_refcount<T>* ptr);
+    friend void intrusive_ptr_release<T>(atomic_intrusive_refcount<T>* ptr);
+
+    atomic_intrusive_refcount(): refs(0) {}
+    ~atomic_intrusive_refcount() { assert(refs==0); }
+
+private:
+    boost::detail::atomic_count refs;
+};
+
+}
+
+
+#endif
diff --git a/utils/logval.h b/utils/logval.h
new file mode 100644
index 00000000..37f14ae5
--- /dev/null
+++ b/utils/logval.h
@@ -0,0 +1,174 @@
+#ifndef LOGVAL_H_
+#define LOGVAL_H_
+
+#define LOGVAL_CHECK_NEG false
+
+#include <iostream>
+#include <cstdlib>
+#include <cmath>
+#include <limits>
+
+template <typename T>
+class LogVal {
+ public:
+  LogVal() : s_(), v_(-std::numeric_limits<T>::infinity()) {}
+  explicit LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {}
+  LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {}
+  LogVal(unsigned x) : s_(0), v_(std::log(x)) { }
+  LogVal(double lnx,bool sign) : s_(sign),v_(lnx) {}
+  static LogVal<T> exp(T lnx) { return LogVal(lnx,false); }
+
+  static LogVal<T> One() { return LogVal(1); }
+  static LogVal<T> Zero() { return LogVal(); }
+  static LogVal<T> e() { return LogVal(1,false); }
+  void logeq(const T& v) { s_ = false; v_ = v; }
+
+  LogVal& operator+=(const LogVal& a) {
+    if (a.v_ == -std::numeric_limits<T>::infinity()) return *this;
+    if (a.s_ == s_) {
+      if (a.v_ < v_) {
+        v_ = v_ + log1p(std::exp(a.v_ - v_));
+      } else {
+        v_ = a.v_ + log1p(std::exp(v_ - a.v_));
+      }
+    } else {
+      if (a.v_ < v_) {
+        v_ = v_ + log1p(-std::exp(a.v_ - v_));
+      } else {
+        v_ = a.v_ + log1p(-std::exp(v_ - a.v_));
+        s_ = !s_;
+      }
+    }
+    return *this;
+  }
+
+  LogVal& operator*=(const LogVal& a) {
+    s_ = (s_ != a.s_);
+    v_ += a.v_;
+    return *this;
+  }
+
+  LogVal& operator/=(const LogVal& a) {
+    s_ = (s_ != a.s_);
+    v_ -= a.v_;
+    return *this;
+  }
+
+  LogVal& operator-=(const LogVal& a) {
+    LogVal b = a;
+    b.invert();
+    return *this += b;
+  }
+
+  // LogVal(fabs(log(x)),x.s_)
+  friend LogVal abslog(LogVal x) {
+    if (x.v_<0) x.v_=-x.v_;
+    return x;
+  }
+
+  LogVal& poweq(const T& power) {
+#if LOGVAL_CHECK_NEG
+    if (s_) {
+      std::cerr << "poweq(T) not implemented when s_ is true\n";
+      std::abort();
+    } else
+#endif
+      v_ *= power;
+    return *this;
+  }
+
+  void invert() { s_ = !s_; }
+
+  LogVal pow(const T& power) const {
+    LogVal res = *this;
+    res.poweq(power);
+    return res;
+  }
+
+  LogVal root(const T& root) const {
+    return pow(1/root);
+  }
+
+  operator T() const {
+    if (s_) return -std::exp(v_); else return std::exp(v_);
+  }
+
+  bool s_;
+  T v_;
+};
+
+// copy elision - as opposed to explicit copy of LogVal<T> const& o1, we should be able to construct Logval r=a+(b+c) as a single result in place in r.  todo: return std::move(o1) - C++0x
+template<typename T>
+LogVal<T> operator+(LogVal<T> o1, const LogVal<T>& o2) {
+  o1 += o2;
+  return o1;
+}
+
+template<typename T>
+LogVal<T> operator*(LogVal<T> o1, const LogVal<T>& o2) {
+  o1 *= o2;
+  return o1;
+}
+
+template<typename T>
+LogVal<T> operator/(LogVal<T> o1, const LogVal<T>& o2) {
+  o1 /= o2;
+  return o1;
+}
+
+template<typename T>
+LogVal<T> operator-(LogVal<T> o1, const LogVal<T>& o2) {
+  o1 -= o2;
+  return o1;
+}
+
+template<typename T>
+T log(const LogVal<T>& o) {
+#ifdef LOGVAL_CHECK_NEG
+  if (o.s_) return log(-1.0);
+#endif
+  return o.v_;
+}
+
+template <typename T>
+LogVal<T> pow(const LogVal<T>& b, const T& e) {
+  return b.pow(e);
+}
+
+template <typename T>
+bool operator<(const LogVal<T>& lhs, const LogVal<T>& rhs) {
+  if (lhs.s_ == rhs.s_) {
+    return (lhs.v_ < rhs.v_);
+  } else {
+    return lhs.s_ > rhs.s_;
+  }
+}
+
+#if 0
+template <typename T>
+bool operator<=(const LogVal<T>& lhs, const LogVal<T>& rhs) {
+  return (lhs.v_ <= rhs.v_);
+}
+
+template <typename T>
+bool operator>(const LogVal<T>& lhs, const LogVal<T>& rhs) {
+  return (lhs.v_ > rhs.v_);
+}
+
+template <typename T>
+bool operator>=(const LogVal<T>& lhs, const LogVal<T>& rhs) {
+  return (lhs.v_ >= rhs.v_);
+}
+#endif
+
+template <typename T>
+bool operator==(const LogVal<T>& lhs, const LogVal<T>& rhs) {
+  return (lhs.v_ == rhs.v_) && (lhs.s_ == rhs.s_);
+}
+
+template <typename T>
+bool operator!=(const LogVal<T>& lhs, const LogVal<T>& rhs) {
+  return !(lhs == rhs);
+}
+
+#endif
diff --git a/utils/logval_test.cc b/utils/logval_test.cc
new file mode 100644
index 00000000..1a23177d
--- /dev/null
+++ b/utils/logval_test.cc
@@ -0,0 +1,73 @@
+#include "logval.h"
+
+#include <gtest/gtest.h>
+#include <iostream>
+
+class LogValTest : public testing::Test {
+ protected:
+  virtual void SetUp() { }
+  virtual void TearDown() { }
+};
+
+using namespace std;
+
+TEST_F(LogValTest,Order) {
+  LogVal<double> a(-0.3);
+  LogVal<double> b(0.3);
+  LogVal<double> c(2.4);
+  EXPECT_LT(a,b);
+  EXPECT_LT(b,c);
+  EXPECT_LT(a,c);
+  EXPECT_FALSE(b < a);
+  EXPECT_FALSE(c < a);
+  EXPECT_FALSE(c < b);
+  EXPECT_FALSE(c < c);
+  EXPECT_FALSE(b < b);
+  EXPECT_FALSE(a < a);
+}
+
+TEST_F(LogValTest,Invert) {
+  LogVal<double> x(-2.4);
+  LogVal<double> y(2.4);
+  y.invert();
+  EXPECT_FLOAT_EQ(x,y);
+}
+
+TEST_F(LogValTest,Minus) {
+  LogVal<double> x(12);
+  LogVal<double> y(2);
+  LogVal<double> z1 = x - y;
+  LogVal<double> z2 = x;
+  z2 -= y;
+  EXPECT_FLOAT_EQ(z1, z2);
+  EXPECT_FLOAT_EQ(z1, 10.0);
+  EXPECT_FLOAT_EQ(y - x, -10.0);
+}
+
+TEST_F(LogValTest,TestOps) {
+  LogVal<double> x(-12.12);
+  LogVal<double> y(x);
+  cerr << x << endl;
+  cerr << (x*y) << endl;
+  cerr << (x*y + x) << endl;
+  cerr << (x + x*y) << endl;
+  cerr << log1p(-0.5) << endl;
+  LogVal<double> aa(0.2);
+  LogVal<double> bb(-0.3);
+  cerr << (aa + bb) << endl;
+  cerr << (bb + aa) << endl;
+  EXPECT_FLOAT_EQ((aa + bb), (bb + aa));
+  EXPECT_FLOAT_EQ((aa + bb), -0.1);
+}
+
+TEST_F(LogValTest,TestSizes) {
+  cerr << sizeof(LogVal<double>) << endl;
+  cerr << sizeof(LogVal<float>) << endl;
+  cerr << sizeof(void*) << endl;
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
+
diff --git a/utils/murmur_hash.h b/utils/murmur_hash.h
new file mode 100755
index 00000000..8dbd7807
--- /dev/null
+++ b/utils/murmur_hash.h
@@ -0,0 +1,186 @@
+#ifndef _MURMUR_HASH_H_
+#define _MURMUR_HASH_H_
+
+//NOTE: quite fast, nice collision properties, but endian dependent hash values
+
+#include "have_64_bits.h"
+typedef uintptr_t MurmurInt;
+
+// MurmurHash2, by Austin Appleby
+
+static const uint32_t DEFAULT_SEED=2654435769U;
+
+#if HAVE_64_BITS
+//MurmurInt MurmurHash(void const *key, int len, uint32_t seed=DEFAULT_SEED);
+
+inline uint64_t MurmurHash64( const void * key, int len, unsigned int seed=DEFAULT_SEED )
+{
+  const uint64_t m = 0xc6a4a7935bd1e995;
+  const int r = 47;
+
+  uint64_t h = seed ^ (len * m);
+
+  const uint64_t * data = (const uint64_t *)key;
+  const uint64_t * end = data + (len/8);
+
+  while(data != end)
+  {
+    uint64_t k = *data++;
+
+    k *= m;
+    k ^= k >> r;
+    k *= m;
+
+    h ^= k;
+    h *= m;
+  }
+
+  const unsigned char * data2 = (const unsigned char*)data;
+
+  switch(len & 7)
+  {
+  case 7: h ^= uint64_t(data2[6]) << 48;
+  case 6: h ^= uint64_t(data2[5]) << 40;
+  case 5: h ^= uint64_t(data2[4]) << 32;
+  case 4: h ^= uint64_t(data2[3]) << 24;
+  case 3: h ^= uint64_t(data2[2]) << 16;
+  case 2: h ^= uint64_t(data2[1]) << 8;
+  case 1: h ^= uint64_t(data2[0]);
+    h *= m;
+  };
+
+  h ^= h >> r;
+  h *= m;
+  h ^= h >> r;
+
+  return h;
+}
+
+inline uint32_t MurmurHash32(void const *key, int len, uint32_t seed=DEFAULT_SEED)
+{
+  return (uint32_t) MurmurHash64(key,len,seed);
+}
+
+inline MurmurInt MurmurHash(void const *key, int len, uint32_t seed=DEFAULT_SEED)
+{
+  return MurmurHash64(key,len,seed);
+}
+
+#else
+// 32-bit
+
+// Note - This code makes a few assumptions about how your machine behaves -
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+inline uint32_t MurmurHash32 ( const void * key, int len, uint32_t seed=DEFAULT_SEED)
+{
+  // 'm' and 'r' are mixing constants generated offline.
+  // They're not really 'magic', they just happen to work well.
+
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+
+  // Initialize the hash to a 'random' value
+
+  uint32_t h = seed ^ len;
+
+  // Mix 4 bytes at a time into the hash
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  while(len >= 4)
+  {
+    uint32_t k = *(uint32_t *)data;
+
+    k *= m;
+    k ^= k >> r;
+    k *= m;
+
+    h *= m;
+    h ^= k;
+
+    data += 4;
+    len -= 4;
+  }
+
+  // Handle the last few bytes of the input array
+
+  switch(len)
+  {
+  case 3: h ^= data[2] << 16;
+  case 2: h ^= data[1] << 8;
+  case 1: h ^= data[0];
+    h *= m;
+  };
+
+  // Do a few final mixes of the hash to ensure the last few
+  // bytes are well-incorporated.
+
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
+
+  return h;
+}
+
+inline MurmurInt MurmurHash ( const void * key, int len, uint32_t seed=DEFAULT_SEED) {
+  return MurmurHash32(key,len,seed);
+}
+
+// 64-bit hash for 32-bit platforms
+
+inline uint64_t MurmurHash64 ( const void * key, int len, uint32_t seed=DEFAULT_SEED)
+{
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+
+  uint32_t h1 = seed ^ len;
+  uint32_t h2 = 0;
+
+  const uint32_t * data = (const uint32_t *)key;
+
+  while(len >= 8)
+  {
+    uint32_t k1 = *data++;
+    k1 *= m; k1 ^= k1 >> r; k1 *= m;
+    h1 *= m; h1 ^= k1;
+    len -= 4;
+
+    uint32_t k2 = *data++;
+    k2 *= m; k2 ^= k2 >> r; k2 *= m;
+    h2 *= m; h2 ^= k2;
+    len -= 4;
+  }
+
+  if(len >= 4)
+  {
+    uint32_t k1 = *data++;
+    k1 *= m; k1 ^= k1 >> r; k1 *= m;
+    h1 *= m; h1 ^= k1;
+    len -= 4;
+  }
+
+  switch(len)
+  {
+  case 3: h2 ^= ((unsigned char*)data)[2] << 16;
+  case 2: h2 ^= ((unsigned char*)data)[1] << 8;
+  case 1: h2 ^= ((unsigned char*)data)[0];
+    h2 *= m;
+  };
+
+  h1 ^= h2 >> 18; h1 *= m;
+  h2 ^= h1 >> 22; h2 *= m;
+  h1 ^= h2 >> 17; h1 *= m;
+  h2 ^= h1 >> 19; h2 *= m;
+
+  uint64_t h = h1;
+
+  h = (h << 32) | h2;
+
+  return h;
+}
+
+#endif
+//32bit
+
+#endif
diff --git a/utils/null_deleter.h b/utils/null_deleter.h
new file mode 100755
index 00000000..082ab453
--- /dev/null
+++ b/utils/null_deleter.h
@@ -0,0 +1,9 @@
+#ifndef NULL_DELETER_H
+#define NULL_DELETER_H
+
+struct null_deleter {
+    void operator()(void*) const {}
+    void operator()(void const*) const {}
+};
+
+#endif
diff --git a/utils/prob.h b/utils/prob.h
new file mode 100644
index 00000000..bc297870
--- /dev/null
+++ b/utils/prob.h
@@ -0,0 +1,8 @@
+#ifndef _PROB_H_
+#define _PROB_H_
+
+#include "logval.h"
+
+typedef LogVal<double> prob_t;
+
+#endif
diff --git a/utils/sampler.h b/utils/sampler.h
new file mode 100644
index 00000000..5fef45d0
--- /dev/null
+++ b/utils/sampler.h
@@ -0,0 +1,147 @@
+#ifndef SAMPLER_H_
+#define SAMPLER_H_
+
+#include <algorithm>
+#include <functional>
+#include <numeric>
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <ctime>
+
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real.hpp>
+#include <boost/random/variate_generator.hpp>
+#include <boost/random/normal_distribution.hpp>
+#include <boost/random/poisson_distribution.hpp>
+#include <boost/random/uniform_int.hpp>
+
+#include "prob.h"
+
+struct SampleSet;
+
+template <typename RNG>
+struct RandomNumberGenerator {
+  static uint32_t GetTrulyRandomSeed() {
+    uint32_t seed;
+    std::ifstream r("/dev/urandom");
+    if (r) {
+      r.read((char*)&seed,sizeof(uint32_t));
+    }
+    if (r.fail() || !r) {
+      std::cerr << "Warning: could not read from /dev/urandom. Seeding from clock" << std::endl;
+      seed = std::time(NULL);
+    }
+    std::cerr << "Seeding random number sequence to " << seed << std::endl;
+    return seed;
+  }
+
+  RandomNumberGenerator() : m_dist(0,1), m_generator(), m_random(m_generator,m_dist) {
+    uint32_t seed = GetTrulyRandomSeed();
+    m_generator.seed(seed);
+  }
+  explicit RandomNumberGenerator(uint32_t seed) : m_dist(0,1), m_generator(), m_random(m_generator,m_dist) {
+    if (!seed) seed = GetTrulyRandomSeed();
+    m_generator.seed(seed);
+  }
+
+  size_t SelectSample(const prob_t& a, const prob_t& b, double T = 1.0) {
+    if (T == 1.0) {
+      if (this->next() > (a / (a + b))) return 1; else return 0;
+    } else {
+      assert(!"not implemented");
+    }
+  }
+
+  // T is the annealing temperature, if desired
+  size_t SelectSample(const SampleSet& ss, double T = 1.0);
+
+  // draw a value from U(0,1)
+  double next() {return m_random();}
+
+  // draw a value from N(mean,var)
+  double NextNormal(double mean, double var) {
+    return boost::normal_distribution<double>(mean, var)(m_random);
+  }
+
+  // draw a value from a Poisson distribution
+  // lambda must be greater than 0
+  int NextPoisson(int lambda) {
+    return boost::poisson_distribution<int>(lambda)(m_random);
+  }
+
+  bool AcceptMetropolisHastings(const prob_t& p_cur,
+                                const prob_t& p_prev,
+                                const prob_t& q_cur,
+                                const prob_t& q_prev) {
+    const prob_t a = (p_cur / p_prev) * (q_prev / q_cur);
+    if (log(a) >= 0.0) return true;
+    return (prob_t(this->next()) < a);
+  }
+
+  RNG &gen() { return m_generator; }
+  typedef boost::variate_generator<RNG&, boost::uniform_int<> > IntRNG;
+  IntRNG inclusive(int low,int high_incl) {
+    assert(high_incl>=low);
+    return IntRNG(m_generator,boost::uniform_int<>(low,high_incl));
+  }
+
+ private:
+  boost::uniform_real<> m_dist;
+  RNG m_generator;
+  boost::variate_generator<RNG&, boost::uniform_real<> > m_random;
+};
+
+typedef RandomNumberGenerator<boost::mt19937> MT19937;
+
+class SampleSet {
+ public:
+  const prob_t& operator[](int i) const { return m_scores[i]; }
+  prob_t& operator[](int i) { return m_scores[i]; }
+  bool empty() const { return m_scores.empty(); }
+  void add(const prob_t& s) { m_scores.push_back(s); }
+  void clear() { m_scores.clear(); }
+  size_t size() const { return m_scores.size(); }
+  void resize(int size) { m_scores.resize(size); }
+  std::vector<prob_t> m_scores;
+};
+
+template <typename RNG>
+size_t RandomNumberGenerator<RNG>::SelectSample(const SampleSet& ss, double T) {
+  assert(T > 0.0);
+  assert(ss.m_scores.size() > 0);
+  if (ss.m_scores.size() == 1) return 0;
+  const prob_t annealing_factor(1.0 / T);
+  const bool anneal = (annealing_factor != prob_t::One());
+  prob_t sum = prob_t::Zero();
+  if (anneal) {
+    for (int i = 0; i < ss.m_scores.size(); ++i)
+      sum += ss.m_scores[i].pow(annealing_factor);  // p^(1/T)
+  } else {
+    sum = std::accumulate(ss.m_scores.begin(), ss.m_scores.end(), prob_t::Zero());
+  }
+  //for (size_t i = 0; i < ss.m_scores.size(); ++i) std::cerr << ss.m_scores[i] << ",";
+  //std::cerr << std::endl;
+
+  prob_t random(this->next());    // random number between 0 and 1
+  random *= sum;                  // scale with normalization factor
+  //std::cerr << "Random number " << random << std::endl;
+
+  //now figure out which sample
+  size_t position = 1;
+  sum = ss.m_scores[0];
+  if (anneal) {
+    sum.poweq(annealing_factor);
+    for (; position < ss.m_scores.size() && sum < random; ++position)
+      sum += ss.m_scores[position].pow(annealing_factor);
+  } else {
+    for (; position < ss.m_scores.size() && sum < random; ++position)
+      sum += ss.m_scores[position];
+  }
+  //std::cout << "random: " << random <<  " sample: " << position << std::endl;
+  //std::cerr << "Sample: " << position-1 << std::endl;
+  //exit(1);
+  return position-1;
+}
+
+#endif
diff --git a/utils/small_vector.h b/utils/small_vector.h
new file mode 100644
index 00000000..25c52359
--- /dev/null
+++ b/utils/small_vector.h
@@ -0,0 +1,265 @@
+#ifndef _SMALL_VECTOR_H_
+#define _SMALL_VECTOR_H_
+
+/* REQUIRES that T is POD (can be memcpy).  won't work (yet) due to union with SMALL_VECTOR_POD==0 - may be possible to handle movable types that have ctor/dtor, by using  explicit allocation, ctor/dtor calls.  but for now JUST USE THIS FOR no-meaningful ctor/dtor POD types.
+
+   stores small element (<=SV_MAX items) vectors inline.  recommend SV_MAX=sizeof(T)/sizeof(T*)>1?sizeof(T)/sizeof(T*):1.  may not work if SV_MAX==0.
+ */
+
+#define SMALL_VECTOR_POD 1
+
+#include <streambuf>  // std::max - where to get this?
+#include <cstring>
+#include <cassert>
+#include <stdint.h>
+#include <new>
+#include <stdint.h>
+//sizeof(T)/sizeof(T*)>1?sizeof(T)/sizeof(T*):1
+
+template <class T,int SV_MAX=2>
+class SmallVector {
+//  typedef unsigned short uint16_t;
+ public:
+  typedef SmallVector<T,SV_MAX> Self;
+  SmallVector() : size_(0) {}
+
+  typedef T const* const_iterator;
+  typedef T* iterator;
+  typedef T value_type;
+  typedef T &reference;
+  typedef T const& const_reference;
+
+  T *begin() { return size_>SV_MAX?data_.ptr:data_.vals; }
+  T const* begin() const { return const_cast<Self*>(this)->begin(); }
+  T *end() { return begin()+size_; }
+  T const* end() const { return begin()+size_; }
+
+  explicit SmallVector(size_t s) : size_(s) {
+    assert(s < 0xA000);
+    if (s <= SV_MAX) {
+      for (int i = 0; i < s; ++i) new(&data_.vals[i]) T();
+    } else {
+      capacity_ = s;
+      size_ = s;
+      data_.ptr = new T[s]; // TODO: replace this with allocator or ::operator new(sizeof(T)*s) everywhere
+      for (int i = 0; i < size_; ++i) new(&data_.ptr[i]) T();
+    }
+  }
+
+  SmallVector(size_t s, T const& v) : size_(s) {
+    assert(s < 0xA000);
+    if (s <= SV_MAX) {
+      for (int i = 0; i < s; ++i) data_.vals[i] = v;
+    } else {
+      capacity_ = s;
+      size_ = s;
+      data_.ptr = new T[s];
+      for (int i = 0; i < size_; ++i) data_.ptr[i] = v;
+    }
+  }
+
+  SmallVector(const Self& o) : size_(o.size_) {
+    if (size_ <= SV_MAX) {
+      std::memcpy(data_.vals,o.data_.vals,size_*sizeof(T));
+//      for (int i = 0; i < size_; ++i) data_.vals[i] = o.data_.vals[i];
+    } else {
+      capacity_ = size_ = o.size_;
+      data_.ptr = new T[capacity_];
+      std::memcpy(data_.ptr, o.data_.ptr, size_ * sizeof(T));
+    }
+  }
+
+  const Self& operator=(const Self& o) {
+    if (size_ <= SV_MAX) {
+      if (o.size_ <= SV_MAX) {
+        size_ = o.size_;
+        for (int i = 0; i < SV_MAX; ++i) data_.vals[i] = o.data_.vals[i];
+      } else {
+        capacity_ = size_ = o.size_;
+        data_.ptr = new T[capacity_];
+        std::memcpy(data_.ptr, o.data_.ptr, size_ * sizeof(T));
+      }
+    } else {
+      if (o.size_ <= SV_MAX) {
+        delete[] data_.ptr;
+        size_ = o.size_;
+        for (int i = 0; i < size_; ++i) data_.vals[i] = o.data_.vals[i];
+      } else {
+        if (capacity_ < o.size_) {
+          delete[] data_.ptr;
+          capacity_ = o.size_;
+          data_.ptr = new T[capacity_];
+        }
+        size_ = o.size_;
+        for (int i = 0; i < size_; ++i)
+          data_.ptr[i] = o.data_.ptr[i];
+      }
+    }
+    return *this;
+  }
+
+  ~SmallVector() {
+    if (size_ <= SV_MAX) {
+      // skip if pod?  yes, we required pod anyway.  no need to destruct
+#if !SMALL_VECTOR_POD
+      for (int i=0;i<size_;++i) data_.vals[i].~T();
+#endif
+    } else
+      delete[] data_.ptr;
+  }
+
+  void clear() {
+    if (size_ > SV_MAX) {
+      delete[] data_.ptr;
+    }
+    size_ = 0;
+  }
+
+  bool empty() const { return size_ == 0; }
+  size_t size() const { return size_; }
+
+  inline void ensure_capacity(uint16_t min_size) {
+    assert(min_size > SV_MAX);
+    if (min_size < capacity_) return;
+    uint16_t new_cap = std::max(static_cast<uint16_t>(capacity_ << 1), min_size);
+    T* tmp = new T[new_cap];
+    std::memcpy(tmp, data_.ptr, capacity_ * sizeof(T));
+    delete[] data_.ptr;
+    data_.ptr = tmp;
+    capacity_ = new_cap;
+  }
+
+private:
+  inline void copy_vals_to_ptr() {
+    capacity_ = SV_MAX * 2;
+    T* tmp = new T[capacity_];
+    for (int i = 0; i < SV_MAX; ++i) tmp[i] = data_.vals[i];
+    data_.ptr = tmp;
+  }
+  inline void ptr_to_small() {
+    assert(size_<=SV_MAX);
+    int *tmp=data_.ptr;
+    for (int i=0;i<size_;++i)
+      data_.vals[i]=tmp[i];
+    delete[] tmp;
+  }
+
+public:
+
+  inline void push_back(T const& v) {
+    if (size_ < SV_MAX) {
+      data_.vals[size_] = v;
+      ++size_;
+      return;
+    } else if (size_ == SV_MAX) {
+      copy_vals_to_ptr();
+    } else if (size_ == capacity_) {
+      ensure_capacity(size_ + 1);
+    }
+    data_.ptr[size_] = v;
+    ++size_;
+  }
+
+  T& back() { return this->operator[](size_ - 1); }
+  const T& back() const { return this->operator[](size_ - 1); }
+  T& front() { return this->operator[](0); }
+  const T& front() const { return this->operator[](0); }
+
+  void pop_back() {
+    assert(size_>0);
+    --size_;
+    if (size_==SV_MAX)
+      ptr_to_small();
+  }
+
+  void compact() {
+    compact(size_);
+  }
+
+  // size must be <= size_ - TODO: test
+  void compact(uint16_t size) {
+    assert(size<=size_);
+    if (size_>SV_MAX) {
+      size_=size;
+      if (size<=SV_MAX)
+        ptr_to_small();
+    } else
+      size_=size;
+  }
+
+  void resize(size_t s, int v = 0) {
+    if (s <= SV_MAX) {
+      if (size_ > SV_MAX) {
+        T *tmp=data_.ptr;
+        for (int i = 0; i < s; ++i) data_.vals[i] = tmp[i];
+        delete[] tmp;
+        size_ = s;
+        return;
+      }
+      if (s <= size_) {
+        size_ = s;
+        return;
+      } else {
+        for (int i = size_; i < s; ++i)
+          data_.vals[i] = v;
+        size_ = s;
+        return;
+      }
+    } else {
+      if (size_ <= SV_MAX)
+        copy_vals_to_ptr();
+      if (s > capacity_)
+        ensure_capacity(s);
+      if (s > size_) {
+        for (int i = size_; i < s; ++i)
+          data_.ptr[i] = v;
+      }
+      size_ = s;
+    }
+  }
+
+  T& operator[](size_t i) {
+    if (size_ <= SV_MAX) return data_.vals[i];
+    return data_.ptr[i];
+  }
+
+  const T& operator[](size_t i) const {
+    if (size_ <= SV_MAX) return data_.vals[i];
+    return data_.ptr[i];
+  }
+
+  bool operator==(const Self& o) const {
+    if (size_ != o.size_) return false;
+    if (size_ <= SV_MAX) {
+      for (size_t i = 0; i < size_; ++i)
+        if (data_.vals[i] != o.data_.vals[i]) return false;
+      return true;
+    } else {
+      for (size_t i = 0; i < size_; ++i)
+        if (data_.ptr[i] != o.data_.ptr[i]) return false;
+      return true;
+    }
+  }
+
+  friend bool operator!=(const Self& a, const Self& b) {
+    return !(a==b);
+  }
+
+ private:
+  union StorageType {
+    T vals[SV_MAX];
+    T* ptr;
+  };
+  StorageType data_;
+  uint16_t size_;
+  uint16_t capacity_;  // only defined when size_ > __SV_MAX_STATIC
+};
+
+typedef SmallVector<int,2> SmallVectorInt;
+
+template <class T,int N>
+void memcpy(void *out,SmallVector<T,N> const& v) {
+  std::memcpy(out,v.begin(),v.size()*sizeof(T));
+}
+
+#endif
diff --git a/utils/small_vector_test.cc b/utils/small_vector_test.cc
new file mode 100644
index 00000000..d1d8dcab
--- /dev/null
+++ b/utils/small_vector_test.cc
@@ -0,0 +1,129 @@
+#include "small_vector.h"
+
+#include <gtest/gtest.h>
+#include <iostream>
+#include <cassert>
+#include <vector>
+
+using namespace std;
+
+class SVTest : public testing::Test {
+ protected:
+  virtual void SetUp() { }
+  virtual void TearDown() { }
+};
+
+TEST_F(SVTest, LargerThan2) {
+  SmallVectorInt v;
+  SmallVectorInt v2;
+  v.push_back(0);
+  v.push_back(1);
+  v.push_back(2);
+  assert(v.size() == 3);
+  assert(v[2] == 2);
+  assert(v[1] == 1);
+  assert(v[0] == 0);
+  v2 = v;
+  SmallVectorInt copy(v);
+  assert(copy.size() == 3);
+  assert(copy[0] == 0);
+  assert(copy[1] == 1);
+  assert(copy[2] == 2);
+  assert(copy == v2);
+  copy[1] = 99;
+  assert(copy != v2);
+  assert(v2.size() == 3);
+  assert(v2[2] == 2);
+  assert(v2[1] == 1);
+  assert(v2[0] == 0);
+  v2[0] = -2;
+  v2[1] = -1;
+  v2[2] = 0;
+  assert(v2[2] == 0);
+  assert(v2[1] == -1);
+  assert(v2[0] == -2);
+  SmallVectorInt v3(1,1);
+  assert(v3[0] == 1);
+  v2 = v3;
+  assert(v2.size() == 1);
+  assert(v2[0] == 1);
+  SmallVectorInt v4(10, 1);
+  assert(v4.size() == 10);
+  assert(v4[5] == 1);
+  assert(v4[9] == 1);
+  v4 = v;
+  assert(v4.size() == 3);
+  assert(v4[2] == 2);
+  assert(v4[1] == 1);
+  assert(v4[0] == 0);
+  SmallVectorInt v5(10, 2);
+  assert(v5.size() == 10);
+  assert(v5[7] == 2);
+  assert(v5[0] == 2);
+  assert(v.size() == 3);
+  v = v5;
+  assert(v.size() == 10);
+  assert(v[2] == 2);
+  assert(v[9] == 2);
+  SmallVectorInt cc;
+  for (int i = 0; i < 33; ++i)
+    cc.push_back(i);
+  for (int i = 0; i < 33; ++i)
+    assert(cc[i] == i);
+  cc.resize(20);
+  assert(cc.size() == 20);
+  for (int i = 0; i < 20; ++i)
+    assert(cc[i] == i);
+  cc[0]=-1;
+  cc.resize(1, 999);
+  assert(cc.size() == 1);
+  assert(cc[0] == -1);
+  cc.resize(99, 99);
+  for (int i = 1; i < 99; ++i) {
+    cerr << i << " " << cc[i] << endl;
+    assert(cc[i] == 99);
+  }
+  cc.clear();
+  assert(cc.size() == 0);
+}
+
+TEST_F(SVTest, Small) {
+  SmallVectorInt v;
+  SmallVectorInt v1(1,0);
+  SmallVectorInt v2(2,10);
+  SmallVectorInt v1a(2,0);
+  EXPECT_TRUE(v1 != v1a);
+  EXPECT_TRUE(v1 == v1);
+  EXPECT_EQ(v1[0], 0);
+  EXPECT_EQ(v2[1], 10);
+  EXPECT_EQ(v2[0], 10);
+  ++v2[1];
+  --v2[0];
+  EXPECT_EQ(v2[0], 9);
+  EXPECT_EQ(v2[1], 11);
+  SmallVectorInt v3(v2);
+  assert(v3[0] == 9);
+  assert(v3[1] == 11);
+  assert(!v3.empty());
+  assert(v3.size() == 2);
+  v3.clear();
+  assert(v3.empty());
+  assert(v3.size() == 0);
+  assert(v3 != v2);
+  assert(v2 != v3);
+  v3 = v2;
+  assert(v3 == v2);
+  assert(v2 == v3);
+  assert(v3[0] == 9);
+  assert(v3[1] == 11);
+  assert(!v3.empty());
+  assert(v3.size() == 2);
+  cerr << sizeof(SmallVectorInt) << endl;
+  cerr << sizeof(vector<int>) << endl;
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
+
diff --git a/utils/sparse_vector.cc b/utils/sparse_vector.cc
new file mode 100644
index 00000000..6e42a216
--- /dev/null
+++ b/utils/sparse_vector.cc
@@ -0,0 +1,98 @@
+#include "sparse_vector.h"
+
+#include <iostream>
+#include <cstring>
+
+#include "b64tools.h"
+
+using namespace std;
+
+namespace B64 {
+
+void Encode(double objective, const SparseVector<double>& v, ostream* out) {
+  const int num_feats = v.num_active();
+  size_t tot_size = 0;
+  const size_t off_objective = tot_size;
+  tot_size += sizeof(double);                   // objective
+  const size_t off_num_feats = tot_size;
+  tot_size += sizeof(int);                      // num_feats
+  const size_t off_data = tot_size;
+  tot_size += sizeof(unsigned char) * num_feats; // lengths of feature names;
+  typedef SparseVector<double>::const_iterator const_iterator;
+  for (const_iterator it = v.begin(); it != v.end(); ++it)
+    tot_size += FD::Convert(it->first).size();   // feature names;
+  tot_size += sizeof(double) * num_feats;        // gradient
+  const size_t off_magic = tot_size;
+  tot_size += 4;                                 // magic
+
+  // size_t b64_size = tot_size * 4 / 3;
+  // cerr << "Sparse vector binary size: " << tot_size << "  (b64 size=" << b64_size << ")\n";
+  char* data = new char[tot_size];
+  *reinterpret_cast<double*>(&data[off_objective]) = objective;
+  *reinterpret_cast<int*>(&data[off_num_feats]) = num_feats;
+  char* cur = &data[off_data];
+  assert(cur - data == off_data);
+  for (const_iterator it = v.begin(); it != v.end(); ++it) {
+    const string& fname = FD::Convert(it->first);
+    *cur++ = static_cast<char>(fname.size());   // name len
+    memcpy(cur, &fname[0], fname.size());
+    cur += fname.size();
+    *reinterpret_cast<double*>(cur) = it->second;
+    cur += sizeof(double);
+  }
+  assert(cur - data == off_magic);
+  *reinterpret_cast<unsigned int*>(cur) = 0xBAABABBAu;
+  cur += sizeof(unsigned int);
+  assert(cur - data == tot_size);
+  b64encode(data, tot_size, out);
+  delete[] data;
+}
+
+bool Decode(double* objective, SparseVector<double>* v, const char* in, size_t size) {
+  v->clear();
+  if (size % 4 != 0) {
+    cerr << "B64 error - line % 4 != 0\n";
+    return false;
+  }
+  const size_t decoded_size = size * 3 / 4 - sizeof(unsigned int);
+  const size_t buf_size = decoded_size + sizeof(unsigned int);
+  if (decoded_size < 6) { cerr << "SparseVector decoding error: too short!\n"; return false; }
+  char* data = new char[buf_size];
+  if (!b64decode(reinterpret_cast<const unsigned char*>(in), size, data, buf_size)) {
+    delete[] data;
+    return false;
+  }
+  size_t cur = 0;
+  *objective = *reinterpret_cast<double*>(data);
+  cur += sizeof(double);
+  const int num_feats = *reinterpret_cast<int*>(&data[cur]);
+  cur += sizeof(int);
+  int fc = 0;
+  while(fc < num_feats && cur < decoded_size) {
+    ++fc;
+    const int fname_len = data[cur++];
+    assert(fname_len > 0);
+    assert(fname_len < 256);
+    string fname(fname_len, '\0');
+    memcpy(&fname[0], &data[cur], fname_len);
+    cur += fname_len;
+    const double val = *reinterpret_cast<double*>(&data[cur]);
+    cur += sizeof(double);
+    int fid = FD::Convert(fname);
+    v->set_value(fid, val);
+  }
+  if(num_feats != fc) {
+    cerr << "Expected " << num_feats << " but only decoded " << fc << "!\n";
+    delete[] data;
+    return false;
+  }
+  if (*reinterpret_cast<unsigned int*>(&data[cur]) != 0xBAABABBAu) {
+    cerr << "SparseVector decodeding error : magic does not match!\n";
+    delete[] data;
+    return false;
+  }
+  delete[] data;
+  return true;
+}
+
+}
diff --git a/utils/sparse_vector.h b/utils/sparse_vector.h
new file mode 100644
index 00000000..207489c5
--- /dev/null
+++ b/utils/sparse_vector.h
@@ -0,0 +1,512 @@
+#ifndef _SPARSE_VECTOR_H_
+#define _SPARSE_VECTOR_H_
+
+//#define SPARSE_VECTOR_HASH
+
+#ifdef SPARSE_VECTOR_HASH
+#include "hash.h"
+# define SPARSE_VECTOR_MAP HASH_MAP
+# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted) HASH_MAP_RESERVED(h,empty,deleted)
+#else
+# define SPARSE_VECTOR_MAP std::map
+# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted)
+#endif
+/*
+   use SparseVectorList (pair smallvector) for feat funcs / hypergraphs (you rarely need random access; just append a feature to the list)
+*/
+/* hack: index 0 never gets printed because cdyer is creative and efficient. features which have no weight got feature dict id 0, see, and the models all clobered that value.  nobody wants to see it.  except that vlad is also creative and efficient and stored the oracle bleu there. */
+/* NOTE: zero vals may or may not be dropped from map (sparse, but not guaranteed to be so).
+
+   I rely on !v the same as !((bool)v) the same as v==0 and v() same as v(0).
+
+   one exception:
+
+   a local:
+   T sum = 0;
+   is used instead of
+   T sum;
+
+   because T may be a primitive type, and
+
+   T sum();
+
+   is parsed as a function decl :(
+
+   the alternative T sum=T() is also be reasonable.  i've switched to that.
+*/
+
+// this is a modified version of code originally written
+// by Phil Blunsom
+
+#include <iostream>
+#include <map>
+#include <tr1/unordered_map>
+#include <vector>
+#include <valarray>
+
+#include "fdict.h"
+#include "small_vector.h"
+
+template <class T>
+inline T & extend_vector(std::vector<T> &v,int i) {
+  if (i>=v.size())
+    v.resize(i+1);
+  return v[i];
+}
+
+template <typename T>
+class SparseVector {
+  void init_reserved() {
+    SPARSE_VECTOR_MAP_RESERVED(values_,-1,-2);
+  }
+public:
+  T const& get_singleton() const {
+    assert(values_.size()==1);
+    return values_.begin()->second;
+  }
+
+  typedef SparseVector<T> Self;
+  typedef SPARSE_VECTOR_MAP<int, T> MapType;
+  typedef typename MapType::const_iterator const_iterator;
+  SparseVector() {
+    init_reserved();
+  }
+  explicit SparseVector(std::vector<T> const& v) {
+    init_reserved();
+    typename MapType::iterator p=values_.begin();
+    const T z=0;
+    for (unsigned i=0;i<v.size();++i) {
+      T const& t=v[i];
+      if (t!=z)
+        p=values_.insert(p,typename MapType::value_type(i,t)); //hint makes insertion faster
+    }
+  }
+
+
+  void init_vector(std::vector<T> *vp) const {
+    init_vector(*vp);
+  }
+
+  void init_vector(std::vector<T> &v) const {
+    v.clear();
+    for (const_iterator i=values_.begin(),e=values_.end();i!=e;++i)
+      extend_vector(v,i->first)=i->second;
+  }
+
+  void set_new_value(int index, T const& val) {
+    assert(values_.find(index)==values_.end());
+    values_[index]=val;
+  }
+
+
+  // warning: exploits the fact that 0 values are always removed from map.  change this if you change that.
+  bool nonzero(int index) const {
+    typename MapType::const_iterator found = values_.find(index);
+    return found==values_.end() || !found->second;
+  }
+
+
+  T get(int index) const {
+    typename MapType::const_iterator found = values_.find(index);
+    return found==values_.end()?T():found->second;
+  }
+
+  T value(int i) const { return get(i); }
+
+  // same as above but may add a 0 entry.  TODO: check that people relying on no entry use get
+  T & operator[](int index){
+    return values_[index];
+  }
+
+  inline void set_value(int index, const T &value) {
+    values_[index] = value;
+  }
+
+  inline void maybe_add(int index, const T& value) {
+    if (value) add_value(index,value);
+  }
+
+    T& add_value(int index, const T &value) {
+#if 1
+      return values_[index]+=value;
+#else
+      // this is not really going to be any faster, and we already rely on default init = 0 init
+      std::pair<typename MapType::iterator,bool> art=values_.insert(std::make_pair(index,value));
+      T &val=art.first->second;
+      if (!art.second) val += value; // already existed
+      return val;
+#endif
+    }
+
+
+    void store(std::valarray<T>* target) const {
+      (*target) *= 0;
+      for (typename MapType::const_iterator
+              it = values_.begin(); it != values_.end(); ++it) {
+        if (it->first >= target->size()) break;
+        (*target)[it->first] = it->second;
+      }
+    }
+
+    int max_index() const {
+      if (empty()) return 0;
+        typename MapType::const_iterator found =values_.end();
+        --found;
+        return found->first;
+    }
+
+    // dot product with a unit vector of the same length
+    // as the sparse vector
+    T dot() const {
+        T sum = T();
+        for (typename MapType::const_iterator
+                it = values_.begin(); it != values_.end(); ++it)
+            sum += it->second;
+        return sum;
+    }
+
+    template<typename S>
+    S cosine_sim(const SparseVector<S> &vec) const {
+      return dot(vec)/(l2norm()*vec.l2norm());
+    }
+
+  // if values are binary, gives |A intersect B|/|A union B|
+    template<typename S>
+    S tanimoto_coef(const SparseVector<S> &vec) const {
+      S dp=dot(vec);
+      return dp/(l2norm_sq()+vec.l2norm_sq()-dp);
+    }
+
+    template<typename S>
+    S dot(const SparseVector<S> &vec) const {
+        S sum = S();
+        for (typename MapType::const_iterator
+                it = values_.begin(); it != values_.end(); ++it)
+        {
+            typename MapType::const_iterator
+                found = vec.values_.find(it->first);
+            if (found != vec.values_.end())
+                sum += it->second * found->second;
+        }
+        return sum;
+    }
+
+    template<typename S>
+    S dot(const std::vector<S> &vec) const {
+      S sum = S();
+        for (typename MapType::const_iterator
+                it = values_.begin(); it != values_.end(); ++it)
+        {
+            if (it->first < static_cast<int>(vec.size()))
+                sum += it->second * vec[it->first];
+        }
+        return sum;
+    }
+
+    template<typename S>
+    S dot(const S *vec) const {
+        // this is not range checked!
+        S sum = S();
+        for (typename MapType::const_iterator
+                it = values_.begin(); it != values_.end(); ++it)
+            sum += it->second * vec[it->first];
+        std::cout << "dot(*vec) " << sum << std::endl;
+        return sum;
+    }
+
+    T l1norm() const {
+      T sum = T();
+      for (typename MapType::const_iterator
+              it = values_.begin(); it != values_.end(); ++it)
+        sum += fabs(it->second);
+      return sum;
+    }
+
+  T l2norm_sq() const {
+      T sum = T();
+      for (typename MapType::const_iterator
+              it = values_.begin(); it != values_.end(); ++it)
+        sum += it->second * it->second;
+      return sum;
+  }
+
+    T l2norm() const {
+      return sqrt(l2norm_sq());
+    }
+
+  void erase(int key) {
+    values_.erase(key);
+/*    typename MapType::iterator found = values_.find(key);
+    if (found!=values_end())
+    values_.erase(found);*/
+  }
+
+  template <class T2>
+  void set_from(SparseVector<T2> const& other) {
+    for (typename MapType::const_iterator
+           it = other.values_.begin(); it != other.values_.end(); ++it)
+    {
+      values_[it->first]=it->second;
+    }
+  }
+
+    SparseVector<T> &operator+=(const SparseVector<T> &other) {
+        for (typename MapType::const_iterator
+                it = other.values_.begin(); it != other.values_.end(); ++it)
+        {
+//            T v =
+              (values_[it->first] += it->second);
+//            if (!v) values_.erase(it->first);
+        }
+        return *this;
+    }
+
+    SparseVector<T> &operator-=(const SparseVector<T> &other) {
+        for (typename MapType::const_iterator
+                it = other.values_.begin(); it != other.values_.end(); ++it)
+        {
+//            T v =
+          (values_[it->first] -= it->second);
+//            if (!v) values_.erase(it->first);
+        }
+        return *this;
+    }
+
+  friend SparseVector<T> operator -(SparseVector<T> x,SparseVector<T> const& y) {
+    x-=y;
+    return x;
+  }
+  friend SparseVector<T> operator +(SparseVector<T> x,SparseVector<T> const& y) {
+    x+=y;
+    return x;
+  }
+
+private:
+  // DEPRECATED: becuase 0 values are dropped from the map, this doesn't even make sense if you have a fully populated (not really sparse re: what you'll ever use) vector
+    SparseVector<T> &operator-=(T const& x) {
+        for (typename MapType::iterator
+                it = values_.begin(); it != values_.end(); ++it)
+            it->second -= x;
+        return *this;
+    }
+
+    SparseVector<T> &operator+=(T const& x) {
+        for (typename MapType::iterator
+                it = values_.begin(); it != values_.end(); ++it)
+            it->second += x;
+        return *this;
+    }
+public:
+    SparseVector<T> &operator/=(const T &x) {
+        for (typename MapType::iterator
+                it = values_.begin(); it != values_.end(); ++it)
+            it->second /= x;
+        return *this;
+    }
+
+    SparseVector<T> &operator*=(const T& x) {
+        for (typename MapType::iterator
+                it = values_.begin(); it != values_.end(); ++it)
+            it->second *= x;
+        return *this;
+    }
+
+    SparseVector<T> operator+(T const& x) const {
+        SparseVector<T> result = *this;
+        return result += x;
+    }
+
+    SparseVector<T> operator-(T const& x) const {
+        SparseVector<T> result = *this;
+        return result -= x;
+    }
+
+    SparseVector<T> operator/(T const& x) const {
+        SparseVector<T> result = *this;
+        return result /= x;
+    }
+
+    std::ostream &operator<<(std::ostream& out) const {
+      Write(true, &out);
+      return out;
+    }
+
+    void Write(const bool with_semi, std::ostream* os) const {
+        bool first = true;
+        for (typename MapType::const_iterator
+                it = values_.begin(); it != values_.end(); ++it) {
+          // by definition feature id 0 is a dummy value
+          if (!it->first) continue;
+          if (with_semi) {
+            (*os) << (first ? "" : ";")
+	         << FD::Convert(it->first) << '=' << it->second;
+          } else {
+            (*os) << (first ? "" : " ")
+	         << FD::Convert(it->first) << '=' << it->second;
+          }
+          first = false;
+        }
+    }
+
+  bool operator==(Self const & other) const {
+    return size()==other.size() && contains_keys_of(other) && other.contains_i(*this);
+  }
+
+  bool contains(Self const &o) const {
+    return size()>o.size() && contains(o);
+  }
+
+  bool at_equals(int i,T const& val) const {
+    const_iterator it=values_.find(i);
+    if (it==values_.end()) return !val;
+    return it->second==val;
+  }
+
+  bool contains_i(Self const& o) const {
+    for (typename MapType::const_iterator i=o.begin(),e=o.end();i!=e;++i)
+      if (!at_equals(i->first,i->second))
+        return false;
+    return true;
+  }
+
+  bool contains_keys_of(Self const& o) const {
+    for (typename MapType::const_iterator i=o.begin(),e=o.end();i!=e;++i)
+      if (values_.find(i)==values_.end())
+        return false;
+    return true;
+  }
+
+#ifndef SPARSE_VECTOR_HASH
+    bool operator<(const SparseVector<T> &other) const {
+        typename MapType::const_iterator it = values_.begin();
+        typename MapType::const_iterator other_it = other.values_.begin();
+
+        for (; it != values_.end() && other_it != other.values_.end(); ++it, ++other_it)
+        {
+            if (it->first < other_it->first) return true;
+            if (it->first > other_it->first) return false;
+            if (it->second < other_it->second) return true;
+            if (it->second > other_it->second) return false;
+        }
+        return values_.size() < other.values_.size();
+    }
+#endif
+
+  int size() const { return values_.size(); }
+
+    int num_active() const { return values_.size(); }
+    bool empty() const { return values_.empty(); }
+
+    const_iterator begin() const { return values_.begin(); }
+    const_iterator end() const { return values_.end(); }
+
+    void clear() {
+        values_.clear();
+    }
+
+    void swap(SparseVector<T>& other) {
+      values_.swap(other.values_);
+    }
+
+private:
+  MapType values_;
+};
+
+//like a pair but can live in a union, because it lacks default+copy ctors, dtor.
+template <class T>
+struct feature_val {
+  int fid;
+  T val;
+};
+
+template <class T>
+inline feature_val<T> featval(int fid,T const &val) {
+  feature_val<T> f;
+  f.fid=fid;
+  f.val=val;
+  return f;
+}
+
+
+// doesn't support fast indexing directly
+template <class T>
+class SparseVectorList {
+  typedef feature_val<T> Pair;
+  typedef SmallVector<Pair,1> List;
+  typedef typename List::const_iterator const_iterator;
+  SparseVectorList() {  }
+  template <class I>
+  SparseVectorList(I i,I const& end) {
+    int c=0;
+    for (;i<end;++i,++c) {
+      if (*i)
+        p.push_back(featval(c,*i));
+    }
+    p.compact();
+  }
+  explicit SparseVectorList(std::vector<T> const& v) {
+    for (unsigned i=0;i<v.size();++i) {
+      T const& t=v[i];
+      if (t)
+        p.push_back(featval(i,t));
+    }
+    p.compact();
+  }
+  // unlike SparseVector, this doesn't overwrite - but conversion to SparseVector will use last value, which is the same
+  void set_value(int i,T const& val) {
+    p.push_back(Pair(i,val));
+  }
+  void overlay(SparseVector<T> *to) const {
+    for (int i=0;i<p.size();++i)
+      to->set_value(p[i].fid,p[i].val);
+  }
+  void copy_to(SparseVector<T> *to) const {
+    to->clear();
+    overlay(to);
+  }
+  SparseVector<T> sparse() const {
+    SparseVector<T> r;
+    copy_to(r);
+    return r;
+  }
+private:
+  List p;
+};
+
+template <typename T>
+SparseVector<T> operator+(const SparseVector<T>& a, const SparseVector<T>& b) {
+  SparseVector<T> result = a;
+  return result += b;
+}
+
+template <typename T>
+SparseVector<T> operator*(const SparseVector<T>& a, const double& b) {
+  SparseVector<T> result = a;
+  return result *= b;
+}
+
+template <typename T>
+SparseVector<T> operator*(const SparseVector<T>& a, const T& b) {
+  SparseVector<T> result = a;
+  return result *= b;
+}
+
+template <typename T>
+SparseVector<T> operator*(const double& a, const SparseVector<T>& b) {
+  SparseVector<T> result = b;
+  return result *= a;
+}
+
+template <typename T>
+std::ostream &operator<<(std::ostream &out, const SparseVector<T> &vec)
+{
+    return vec.operator<<(out);
+}
+
+namespace B64 {
+  void Encode(double objective, const SparseVector<double>& v, std::ostream* out);
+  // returns false if failed to decode
+  bool Decode(double* objective, SparseVector<double>* v, const char* data, size_t size);
+}
+
+#endif
diff --git a/utils/static_utoa.h b/utils/static_utoa.h
new file mode 100755
index 00000000..fe5f6d92
--- /dev/null
+++ b/utils/static_utoa.h
@@ -0,0 +1,115 @@
+#ifndef STATIC_UTOA_H
+#define STATIC_UTOA_H
+
+#include "threadlocal.h"
+
+
+#include <string>
+#include <cstring>
+
+#define DIGIT_LOOKUP_TABLE 0
+
+namespace {
+THREADLOCAL char utoa_buf[] = "01234567890123456789"; // to put end of string character at buf[20]
+const unsigned utoa_bufsize=sizeof(utoa_buf);
+const unsigned utoa_bufsizem1=utoa_bufsize-1;
+#ifdef DIGIT_LOOKUP_TABLE
+char digits[] = "0123456789";
+#endif
+}
+
+inline char digit_to_char(int d) {
+  return
+#ifdef DIGIT_LOOKUP_TABLE
+    digits[d];
+#else
+    '0'+d;
+#endif
+}
+
+// returns n in string [return,num); *num=0 yourself before calling if you want a c_str
+inline char *utoa(char *num,unsigned n) {
+  if ( !n ) {
+    *--num='0';
+  } else {
+    unsigned rem;
+    // 3digit lookup table, divide by 1000 faster?
+    while ( n ) {
+#if 1
+      rem = n;
+      n /= 10;
+      rem -= 10*n;		// maybe this is faster than mod because we are already dividing
+#else
+      rem = n%10; // would optimizer combine these together?
+      n   = n/10;
+#endif
+      *--num = digit_to_char(rem);
+    }
+  }
+  return num;
+}
+
+inline char *static_utoa(unsigned n) {
+  return utoa(utoa_buf+utoa_bufsizem1,n);
+}
+
+//returns position of '\0' terminating number written starting at to
+inline char* append_utoa(char *to,unsigned n) {
+  char *s=static_utoa(n);
+  int ns=(utoa_buf+utoa_bufsize)-s;
+  std::memcpy(to,s,ns);
+  return to+ns;
+}
+
+// so named to avoid gcc segfault when named itoa
+inline char *itoa(char *p,int n) {
+  if (n<0) {
+    p=utoa(p,-n); // TODO: check that (unsigned)(-INT_MIN) == 0x1000000 in 2s complement and not == 0
+    *--p='-';
+    return p;
+  } else
+    return utoa(p,n);
+}
+
+inline char *static_itoa(int n) {
+  return itoa(utoa_buf+utoa_bufsizem1,n);
+}
+
+
+inline std::string utos(unsigned n) {
+  const int bufsz=20;
+  char buf[bufsz];
+  char *end=buf+bufsz;
+  char *p=utoa(end,n);
+  return std::string(p,end);
+}
+
+inline std::string itos(int n) {
+  const int bufsz=20;
+  char buf[bufsz];
+  char *end=buf+bufsz;
+  char *p=itoa(end,n);
+  return std::string(p,end);
+}
+
+#ifdef ITOA_SAMPLE
+# include <cstdio>
+# include <sstream>
+# include <iostream>
+using namespace std;
+
+int main(int argc,char *argv[]) {
+  printf("d U d U d U\n");
+  for (int i=1;i<argc;++i) {
+    int n;
+    unsigned un;
+    sscanf(argv[i],"%d",&n);
+    sscanf(argv[i],"%u",&un);
+    printf("%d %u %s",n,un,static_itoa(n));
+    printf(" %s %s %s\n",static_utoa(un),itos(n).c_str(),utos(un).c_str());
+  }
+  return 0;
+}
+#endif
+
+#endif
diff --git a/utils/stringlib.cc b/utils/stringlib.cc
new file mode 100644
index 00000000..7aaee9f0
--- /dev/null
+++ b/utils/stringlib.cc
@@ -0,0 +1,87 @@
+#include "stringlib.h"
+
+#include <cstring>
+#include <cstdlib>
+#include <cassert>
+#include <iostream>
+#include <map>
+
+using namespace std;
+
+void ParseTranslatorInput(const string& line, string* input, string* ref) {
+  size_t hint = 0;
+  if (line.find("{\"rules\":") == 0) {
+    hint = line.find("}}");
+    if (hint == string::npos) {
+      cerr << "Syntax error: " << line << endl;
+      abort();
+    }
+    hint += 2;
+  }
+  size_t pos = line.find("|||", hint);
+  if (pos == string::npos) { *input = line; return; }
+  ref->clear();
+  *input = line.substr(0, pos - 1);
+  string rline = line.substr(pos + 4);
+  if (rline.size() > 0) {
+    assert(ref);
+    *ref = rline;
+  }
+}
+
+void ProcessAndStripSGML(string* pline, map<string, string>* out) {
+  map<string, string>& meta = *out;
+  string& line = *pline;
+  string lline = LowercaseString(line);
+  if (lline.find("<seg")!=0) return;
+  size_t close = lline.find(">");
+  if (close == string::npos) return; // error
+  size_t end = lline.find("</seg>");
+  string seg = Trim(lline.substr(4, close-4));
+  string text = line.substr(close+1, end - close - 1);
+  for (size_t i = 1; i < seg.size(); i++) {
+    if (seg[i] == '=' && seg[i-1] == ' ') {
+      string less = seg.substr(0, i-1) + seg.substr(i);
+      seg = less; i = 0; continue;
+    }
+    if (seg[i] == '=' && seg[i+1] == ' ') {
+      string less = seg.substr(0, i+1);
+      if (i+2 < seg.size()) less += seg.substr(i+2);
+      seg = less; i = 0; continue;
+    }
+  }
+  line = Trim(text);
+  if (seg == "") return;
+  for (size_t i = 1; i < seg.size(); i++) {
+    if (seg[i] == '=') {
+      string label = seg.substr(0, i);
+      string val = seg.substr(i+1);
+      if (val[0] == '"') {
+        val = val.substr(1);
+        size_t close = val.find('"');
+        if (close == string::npos) {
+          cerr << "SGML parse error: missing \"\n";
+          seg = "";
+          i = 0;
+        } else {
+          seg = val.substr(close+1);
+          val = val.substr(0, close);
+          i = 0;
+        }
+      } else {
+        size_t close = val.find(' ');
+        if (close == string::npos) {
+          seg = "";
+          i = 0;
+        } else {
+          seg = val.substr(close+1);
+          val = val.substr(0, close);
+        }
+      }
+      label = Trim(label);
+      seg = Trim(seg);
+      meta[label] = val;
+    }
+  }
+}
+
diff --git a/utils/stringlib.h b/utils/stringlib.h
new file mode 100644
index 00000000..84e95d44
--- /dev/null
+++ b/utils/stringlib.h
@@ -0,0 +1,267 @@
+#ifndef CDEC_STRINGLIB_H_
+#define CDEC_STRINGLIB_H_
+
+//usage: string s=MAKESTRE(1<<" "<<c);
+#define MAKESTR(expr) ((dynamic_cast<ostringstream &>(ostringstream()<<std::dec<<expr)).str())
+// std::dec (or seekp, or another manip) is needed to convert to std::ostream reference.
+
+#ifdef STRINGLIB_DEBUG
+#include <iostream>
+#define SLIBDBG(x) do { std::cerr<<"DBG(stringlib): "<<x<<std::endl; } while(0)
+#else
+#define SLIBDBG(x)
+#endif
+
+#include <map>
+#include <vector>
+#include <cctype>
+#include <cstring>
+#include <string>
+#include <sstream>
+#include <algorithm>
+
+inline std::size_t skip_ws(std::string const& s,std::size_t starting=0,char const* ws=" \t\n\r") {
+  return s.find_first_not_of(ws,starting);
+}
+
+// returns position of end of all non-ws chars before ending, i.e. string(s.begin()+skip_ws(s),s.begin()+trailing_ws(s)) strips both ends
+inline std::size_t trailing_ws(std::string const& s,std::size_t ending=std::string::npos,char const* ws=" \t\n\r") {
+  std::size_t n=s.find_last_not_of(ws,ending);
+  if (n==std::string::npos) return n;
+  else return n+1;
+}
+
+//TEST: if string is all whitespace, make sure that string(a+npos,a+npos) can't segfault (i.e. won't access any memory because begin==end)
+inline std::string strip_ws(std::string const& s) {
+  return std::string(s.begin()+skip_ws(s),s.begin()+trailing_ws(s));
+}
+
+
+inline bool is_single_line(std::string const& line) {
+  return std::count(line.begin(),line.end(),'\n')==0; // but we want to allow terminal newlines/blanks
+}
+
+// is_single_line(strip_ws(line))
+inline bool is_single_line_stripped(std::string const& line) {
+  std::size_t b=skip_ws(line),e=trailing_ws(line);
+  std::size_t n=line.find('\n',b);
+  return n==std::string::npos || n>=e;
+}
+
+struct toupperc {
+  inline char operator()(char c) const {
+    return std::toupper(c);
+  }
+};
+
+inline std::string toupper(std::string s) {
+  std::transform(s.begin(),s.end(),s.begin(),toupperc());
+  return s;
+}
+
+template <class Istr, class Isubstr> inline
+bool match_begin(Istr bstr,Istr estr,Isubstr bsub,Isubstr esub)
+{
+  while (bsub != esub) {
+    if (bstr == estr)
+      return false;
+    if (*bsub++ != *bstr++)
+      return false;
+  }
+  return true;
+}
+
+template <class Istr, class Prefix> inline
+bool match_begin(Istr bstr,Istr estr,Prefix prefix)
+{
+  return match_begin(bstr,estr,prefix.begin(),prefix.end());
+}
+
+template <class Str, class Prefix> inline
+bool match_begin(Str const& str,Prefix const& prefix)
+{
+  return match_begin(str.begin(),str.end(),prefix.begin(),prefix.end());
+}
+
+
+// read line in the form of either:
+//   source
+//   source ||| target
+// source will be returned as a string, target must be a sentence or
+// a lattice (in PLF format) and will be returned as a Lattice object
+void ParseTranslatorInput(const std::string& line, std::string* input, std::string* ref);
+struct Lattice;
+void ParseTranslatorInputLattice(const std::string& line, std::string* input, Lattice* ref);
+
+inline std::string Trim(const std::string& str, const std::string& dropChars = " \t") {
+  std::string res = str;
+  res.erase(str.find_last_not_of(dropChars)+1);
+  return res.erase(0, res.find_first_not_of(dropChars));
+}
+
+inline void Tokenize(const std::string& str, char delimiter, std::vector<std::string>* res) {
+  std::string s = str;
+  int last = 0;
+  res->clear();
+  for (int i=0; i < s.size(); ++i)
+    if (s[i] == delimiter) {
+      s[i]=0;
+      if (last != i) {
+        res->push_back(&s[last]);
+      }
+      last = i + 1;
+    }
+  if (last != s.size())
+    res->push_back(&s[last]);
+}
+
+inline unsigned NTokens(const std::string& str, char delimiter)
+{
+  std::vector<std::string> r;
+  Tokenize(str,delimiter,&r);
+  return r.size();
+}
+
+inline std::string LowercaseString(const std::string& in) {
+  std::string res(in.size(),' ');
+  for (int i = 0; i < in.size(); ++i)
+    res[i] = tolower(in[i]);
+  return res;
+}
+
+inline int CountSubstrings(const std::string& str, const std::string& sub) {
+  size_t p = 0;
+  int res = 0;
+  while (p < str.size()) {
+    p = str.find(sub, p);
+    if (p == std::string::npos) break;
+    ++res;
+    p += sub.size();
+  }
+  return res;
+}
+
+inline int SplitOnWhitespace(const std::string& in, std::vector<std::string>* out) {
+  out->clear();
+  int i = 0;
+  int start = 0;
+  std::string cur;
+  while(i < in.size()) {
+    if (in[i] == ' ' || in[i] == '\t') {
+      if (i - start > 0)
+        out->push_back(in.substr(start, i - start));
+      start = i + 1;
+    }
+    ++i;
+  }
+  if (i > start)
+    out->push_back(in.substr(start, i - start));
+  return out->size();
+}
+
+inline std::vector<std::string> SplitOnWhitespace(std::string const& in)
+{
+  std::vector<std::string> r;
+  SplitOnWhitespace(in,&r);
+  return r;
+}
+
+
+struct mutable_c_str {
+  // because making a copy of a string might not copy its storage, so modifying a c_str() could screw up original (nobody uses cow nowadays because it needs locking under threading)
+  char *p;
+  mutable_c_str(std::string const& s) : p((char *)::operator new(s.size()+1)) {
+    std::memcpy(p,s.data(),s.size());
+    p[s.size()]=0;
+  }
+  ~mutable_c_str() { ::operator delete(p); }
+private:
+  mutable_c_str(mutable_c_str const&);
+};
+
+// ' ' '\t' tokens hardcoded
+//NOTE: you should have stripped endline chars out first.
+inline bool IsWordSep(char c) {
+  return c==' '||c=='\t';
+}
+
+
+template <class F>
+// *end must be 0 (i.e. [p,end] is valid storage, which will be written to with 0 to separate c string tokens
+void VisitTokens(char *p,char *const end,F f) {
+  SLIBDBG("VisitTokens. p="<<p<<" Nleft="<<end-p);
+  if (p==end) return;
+  char *last; // 0 terminated already.  this is ok to mutilate because s is a copy of the string passed in.  well, barring copy on write i guess.
+  while(IsWordSep(*p)) { ++p;if (p==end) return; } // skip init whitespace
+  last=p; // first non-ws char
+  for(;;) {
+    SLIBDBG("Start of word. last="<<last<<" *p="<<*p<<" Nleft="<<end-p);
+    // last==p, pointing at first non-ws char not yet translated into f(word) call
+    for(;;) {// p to end of word
+      ++p;
+      if (p==end) {
+        f(last);
+        SLIBDBG("Returning. word="<<last<<" *p="<<*p<<" Nleft="<<end-p);
+        return;
+      }
+      if (IsWordSep(*p)) break;
+    }
+    *p=0;
+    f(last);
+    SLIBDBG("End of word. word="<<last<<" rest="<<p+1<<" Nleft="<<end-p);
+    for(;;) { // again skip extra whitespace
+      ++p;
+      if (p==end) return;
+      if (!IsWordSep(*p)) break;
+    }
+    last=p;
+  }
+}
+
+template <class F>
+void VisitTokens(char *p,F f) {
+  VisitTokens(p,p+std::strlen(p),f);
+}
+
+
+template <class F>
+void VisitTokens(std::string const& s,F f) {
+  if (0) {
+  std::vector<std::string> ss=SplitOnWhitespace(s);
+  for (int i=0;i<ss.size();++i)
+    f(ss[i]);
+  return;
+  }
+  //FIXME:
+  if (s.empty()) return;
+  mutable_c_str mp(s);
+  SLIBDBG("mp="<<mp.p);
+  VisitTokens(mp.p,mp.p+s.size(),f);
+}
+
+inline void SplitCommandAndParam(const std::string& in, std::string* cmd, std::string* param) {
+  cmd->clear();
+  param->clear();
+  std::vector<std::string> x;
+  SplitOnWhitespace(in, &x);
+  if (x.size() == 0) return;
+  *cmd = x[0];
+  for (int i = 1; i < x.size(); ++i) {
+    if (i > 1) { *param += " "; }
+    *param += x[i];
+  }
+}
+
+void ProcessAndStripSGML(std::string* line, std::map<std::string, std::string>* out);
+
+// given the first character of a UTF8 block, find out how wide it is
+// see http://en.wikipedia.org/wiki/UTF-8 for more info
+inline unsigned int UTF8Len(unsigned char x) {
+  if (x < 0x80) return 1;
+  else if ((x >> 5) == 0x06) return 2;
+  else if ((x >> 4) == 0x0e) return 3;
+  else if ((x >> 3) == 0x1e) return 4;
+  else return 0;
+}
+
+#endif
diff --git a/utils/stringlib_test.cc b/utils/stringlib_test.cc
new file mode 100755
index 00000000..f66cdbeb
--- /dev/null
+++ b/utils/stringlib_test.cc
@@ -0,0 +1,17 @@
+#define STRINGLIB_DEBUG
+#include "stringlib.h"
+
+using namespace std;
+struct print {
+  template <class S>
+  void operator()(S const& s) const {
+    cout<<s<<endl;
+  }
+};
+
+char p[]=" 1 are u 2 serious?";
+int main(int argc, char *argv[]) {
+  std::string const& w="verylongword";
+  VisitTokens(p,print());
+  VisitTokens(w,print());
+}
diff --git a/utils/tdict.cc b/utils/tdict.cc
new file mode 100644
index 00000000..1f68feae
--- /dev/null
+++ b/utils/tdict.cc
@@ -0,0 +1,154 @@
+#define TD_ALLOW_UNDEFINED_WORDIDS 0
+
+// if 1, word ids that are >= end() will give a numeric token name (single per-thread shared buffer), which of course won't be Convert-able back to the id, because it's not added to the dict.  This is a convenience for logging fake token indices.  Any tokens actually added to the dict may cause end() to overlap the range of fake ids you were using - that's up to you to prevent.
+
+#include <stdlib.h>
+#include <cstring>
+#include <sstream>
+#include "Ngram.h"
+#include "dict.h"
+#include "tdict.h"
+#include "Vocab.h"
+#include "stringlib.h"
+#include "threadlocal.h"
+
+using namespace std;
+
+Vocab TD::dict_(0,TD::max_wordid);
+WordID TD::ss=dict_.ssIndex();
+WordID TD::se=dict_.seIndex();
+WordID TD::unk=dict_.unkIndex();
+char const*const TD::ss_str=Vocab_SentStart;
+char const*const TD::se_str=Vocab_SentEnd;
+char const*const TD::unk_str=Vocab_Unknown;
+
+// pre+(i-base)+">" for i in [base,e)
+inline void pad(std::string const& pre,int base,int e) {
+  assert(base<=e);
+  ostringstream o;
+  for (int i=base;i<e;++i) {
+    o.str(pre);
+    o<<(i-base)<<'>';
+    WordID id=TD::Convert(o.str());
+    assert(id==i); // this fails.  why?
+  }
+}
+
+
+namespace {
+struct TD_init {
+  TD_init() {
+    /*
+      // disabled for now since it's breaking trunk
+    assert(TD::Convert(TD::ss_str)==TD::ss);
+    assert(TD::Convert(TD::se_str)==TD::se);
+    assert(TD::Convert(TD::unk_str)==TD::unk);
+    assert(TD::none==Vocab_None);
+    pad("<FILLER",TD::end(),TD::reserved_begin);
+    assert(TD::end()==TD::reserved_begin);
+    int reserved_end=TD::begin();
+    pad("<RESERVED",TD::end(),reserved_end);
+    assert(TD::end()==reserved_end);
+    */
+  }
+};
+
+TD_init td_init;
+}
+
+unsigned int TD::NumWords() {
+  return dict_.numWords();
+}
+WordID TD::end() {
+  return dict_.highIndex();
+}
+
+WordID TD::Convert(const std::string& s) {
+  return dict_.addWord((VocabString)s.c_str());
+}
+
+WordID TD::Convert(char const* s) {
+  return dict_.addWord((VocabString)s);
+}
+
+
+#if TD_ALLOW_UNDEFINED_WORDIDS
+# include "static_utoa.h"
+char undef_prefix[]="UNDEF_";
+static const int undefpre_n=sizeof(undef_prefix)/sizeof(undef_prefix[0]);
+THREADLOCAL char undef_buf[]="UNDEF_________________";
+inline char const* undef_token(WordID w)
+{
+  append_utoa(undef_buf+undefpre_n,w);
+  return undef_buf;
+}
+#endif
+
+const char* TD::Convert(WordID w) {
+#if TD_ALLOW_UNDEFINED_WORDIDS
+  if (w>=dict_.highIndex()) return undef_token(w);
+#endif
+  return dict_.getWord((VocabIndex)w);
+}
+
+
+void TD::GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids) {
+  ids->clear();
+  for (vector<string>::const_iterator i = strings.begin(); i != strings.end(); ++i)
+    ids->push_back(TD::Convert(*i));
+}
+
+std::string TD::GetString(const std::vector<WordID>& str) {
+  ostringstream o;
+  for (int i=0;i<str.size();++i) {
+    if (i) o << ' ';
+    o << TD::Convert(str[i]);
+  }
+  return o.str();
+}
+
+std::string TD::GetString(WordID const* i,WordID const* e) {
+  ostringstream o;
+  bool sp=false;
+  for (;i<e;++i,sp=true) {
+    if (sp)
+      o << ' ';
+    o << TD::Convert(*i);
+  }
+  return o.str();
+}
+
+int TD::AppendString(const WordID& w, int pos, int bufsize, char* buffer)
+{
+  const char* word = TD::Convert(w);
+  const char* const end_buf = buffer + bufsize;
+  char* dest = buffer + pos;
+  while(dest < end_buf && *word) {
+    *dest = *word;
+    ++dest;
+    ++word;
+  }
+  return (dest - buffer);
+}
+
+
+namespace {
+struct add_wordids {
+  typedef std::vector<WordID> Ws;
+  Ws *ids;
+  explicit add_wordids(Ws *i) : ids(i) {  }
+  add_wordids(const add_wordids& o) : ids(o.ids) {  }
+  void operator()(char const* s) {
+    ids->push_back(TD::Convert(s));
+  }
+  void operator()(std::string const& s) {
+    ids->push_back(TD::Convert(s));
+  }
+};
+
+}
+
+void TD::ConvertSentence(std::string const& s, std::vector<WordID>* ids) {
+  ids->clear();
+  VisitTokens(s,add_wordids(ids));
+}
diff --git a/utils/tdict.h b/utils/tdict.h
new file mode 100644
index 00000000..a7b3ee1c
--- /dev/null
+++ b/utils/tdict.h
@@ -0,0 +1,50 @@
+#ifndef _TDICT_H_
+#define _TDICT_H_
+
+#include <string>
+#include <vector>
+#include "wordid.h"
+#include <assert.h>
+
+class Vocab;
+
+struct TD {
+  /* // disabled for now
+  static const int reserved_begin=10; // allow room for SRI special tokens e.g. unk ss se pause.  tokens until this get "<FILLERi>"
+  static const int n_reserved=10; // 0...n_reserved-1 get token '<RESERVEDi>'
+  static inline WordID reserved(int i) {
+    assert(i>=0 && i<n_reserved);
+    return (WordID)(reserved_begin+i);
+  }
+  static inline WordID begin() {
+    return reserved(n_reserved);
+  }
+  */
+  static const WordID max_wordid=0x7fffffff;
+  static const WordID none=(WordID)-1; // Vocab_None
+  static char const* const ss_str;  //="<s>";
+  static char const* const se_str;  //="</s>";
+  static char const* const unk_str; //="<unk>";
+  static WordID ss,se,unk; // x=Convert(x_str)
+  static WordID end(); // next id to be assigned; [begin,end) give the non-reserved tokens seen so far
+  static Vocab dict_;
+  static void ConvertSentence(std::string const& sent, std::vector<WordID>* ids);
+  static void GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids);
+  static std::string GetString(const std::vector<WordID>& str);
+  static std::string GetString(WordID const* i,WordID const* e);
+  static int AppendString(const WordID& w, int pos, int bufsize, char* buffer);
+  static unsigned int NumWords();
+  static WordID Convert(const std::string& s);
+  static WordID Convert(char const* s);
+  static const char* Convert(WordID w);
+};
+
+struct ToTD {
+  typedef WordID result_type;
+  result_type operator()(std::string const& t) const {
+    return TD::Convert(t);
+  }
+};
+
+
+#endif
diff --git a/utils/test_data/weights b/utils/test_data/weights
new file mode 100644
index 00000000..ea70229c
--- /dev/null
+++ b/utils/test_data/weights
@@ -0,0 +1,8 @@
+# hiero
+WordPenalty -0.387029
+LanguageModel 0.253195
+PhraseModel_0 0.142926
+PhraseModel_1 0.465119
+PhraseModel_2 0.079503
+CNPosteriorProbability 0.09259
+Inf -inf
diff --git a/utils/threadlocal.h b/utils/threadlocal.h
new file mode 100755
index 00000000..d79f5d9d
--- /dev/null
+++ b/utils/threadlocal.h
@@ -0,0 +1,71 @@
+#ifndef THREADLOCAL_H
+#define THREADLOCAL_H
+
+#ifndef SETLOCAL_SWAP
+# define SETLOCAL_SWAP 0
+#endif
+
+#ifdef BOOST_NO_MT
+
+# define THREADLOCAL
+
+#else
+
+#ifdef _MSC_VER
+
+//FIXME: doesn't work with DLLs ... use TLS apis instead (http://www.boost.org/libs/thread/doc/tss.html)
+# define THREADLOCAL __declspec(thread)
+
+#else
+
+# define THREADLOCAL __thread
+
+#endif
+
+#endif
+
+#include <algorithm> //swap
+
+// naturally, the below are only thread-safe if value is THREADLOCAL
+template <class D>
+struct SaveLocal {
+    D &value;
+    D old_value;
+    SaveLocal(D& val) : value(val), old_value(val) {}
+    ~SaveLocal() {
+#if SETLOCAL_SWAP
+      swap(value,old_value);
+#else
+      value=old_value;
+#endif
+    }
+};
+
+template <class D>
+struct SetLocal {
+    D &value;
+    D old_value;
+    SetLocal(D& val,const D &new_value) : value(val), old_value(
+#if SETLOCAL_SWAP
+      new_value
+#else
+      val
+#endif
+      ) {
+#if SETLOCAL_SWAP
+      swap(value,old_value);
+#else
+      value=new_value;
+#endif
+    }
+    ~SetLocal() {
+#if SETLOCAL_SWAP
+      swap(value,old_value);
+#else
+      value=old_value;
+#endif
+    }
+};
+
+
+#endif
diff --git a/utils/timing_stats.cc b/utils/timing_stats.cc
new file mode 100644
index 00000000..fc8e9df1
--- /dev/null
+++ b/utils/timing_stats.cc
@@ -0,0 +1,24 @@
+#include "timing_stats.h"
+
+#include <iostream>
+#include "time.h" //cygwin needs
+using namespace std;
+
+map<string, TimerInfo> Timer::stats;
+
+Timer::Timer(const string& timername) : start_t(clock()), cur(stats[timername]) {}
+
+Timer::~Timer() {
+  ++cur.calls;
+  const clock_t end_t = clock();
+  const double elapsed = (end_t - start_t) / 1000000.0;
+  cur.total_time += elapsed;
+}
+
+void Timer::Summarize() {
+  for (map<string, TimerInfo>::iterator it = stats.begin(); it != stats.end(); ++it) {
+    cerr << it->first << ": " << it->second.total_time << " secs (" << it->second.calls << " calls)\n";
+  }
+  stats.clear();
+}
+
diff --git a/utils/timing_stats.h b/utils/timing_stats.h
new file mode 100644
index 00000000..0a9f7656
--- /dev/null
+++ b/utils/timing_stats.h
@@ -0,0 +1,25 @@
+#ifndef _TIMING_STATS_H_
+#define _TIMING_STATS_H_
+
+#include <string>
+#include <map>
+
+struct TimerInfo {
+  int calls;
+  double total_time;
+  TimerInfo() : calls(), total_time() {}
+};
+
+struct Timer {
+  Timer(const std::string& info);
+  ~Timer();
+  static void Summarize();
+ private:
+  static std::map<std::string, TimerInfo> stats;
+  clock_t start_t;
+  TimerInfo& cur;
+  Timer(const Timer& other);
+  const Timer& operator=(const Timer& other);
+};
+
+#endif
diff --git a/utils/weights.cc b/utils/weights.cc
new file mode 100644
index 00000000..84647585
--- /dev/null
+++ b/utils/weights.cc
@@ -0,0 +1,77 @@
+#include "weights.h"
+
+#include <sstream>
+
+#include "fdict.h"
+#include "filelib.h"
+
+using namespace std;
+
+void Weights::InitFromFile(const std::string& filename, vector<string>* feature_list) {
+  cerr << "Reading weights from " << filename << endl;
+  ReadFile in_file(filename);
+  istream& in = *in_file.stream();
+  assert(in);
+  int weight_count = 0;
+  bool fl = false;
+  while (in) {
+    double val = 0;
+    string buf;
+    getline(in, buf);
+    if (buf.size() == 0) continue;
+    if (buf[0] == '#') continue;
+    for (int i = 0; i < buf.size(); ++i)
+      if (buf[i] == '=') buf[i] = ' ';
+    int start = 0;
+    while(start < buf.size() && buf[start] == ' ') ++start;
+    int end = 0;
+    while(end < buf.size() && buf[end] != ' ') ++end;
+    int fid = FD::Convert(buf.substr(start, end - start));
+    while(end < buf.size() && buf[end] == ' ') ++end;
+    val = strtod(&buf.c_str()[end], NULL);
+    if (isnan(val)) {
+      cerr << FD::Convert(fid) << " has weight NaN!\n";
+      abort();
+    }
+    if (wv_.size() <= fid)
+      wv_.resize(fid + 1);
+    wv_[fid] = val;
+    if (feature_list) { feature_list->push_back(FD::Convert(fid)); }
+    ++weight_count;
+    if (weight_count %   50000 == 0) { cerr << '.' << flush; fl = true; }
+    if (weight_count % 2000000 == 0) { cerr << " [" << weight_count << "]\n"; fl = false; }
+  }
+  if (fl) { cerr << endl; }
+  cerr << "Loaded " << weight_count << " feature weights\n";
+}
+
+void Weights::WriteToFile(const std::string& fname, bool hide_zero_value_features) const {
+  WriteFile out(fname);
+  ostream& o = *out.stream();
+  assert(o);
+  o.precision(17);
+  const int num_feats = FD::NumFeats();
+  for (int i = 1; i < num_feats; ++i) {
+    const double val = (i < wv_.size() ? wv_[i] : 0.0);
+    if (hide_zero_value_features && val == 0.0) continue;
+    o << FD::Convert(i) << ' ' << val << endl;
+  }
+}
+
+void Weights::InitVector(std::vector<double>* w) const {
+  *w = wv_;
+}
+
+void Weights::InitSparseVector(SparseVector<double>* w) const {
+  for (int i = 1; i < wv_.size(); ++i) {
+    const double& weight = wv_[i];
+    if (weight) w->set_value(i, weight);
+  }
+}
+
+void Weights::InitFromVector(const std::vector<double>& w) {
+  wv_ = w;
+  if (wv_.size() > FD::NumFeats())
+    cerr << "WARNING: initializing weight vector has more features than the global feature dictionary!\n";
+  wv_.resize(FD::NumFeats(), 0);
+}
diff --git a/utils/weights.h b/utils/weights.h
new file mode 100644
index 00000000..f19aa3ce
--- /dev/null
+++ b/utils/weights.h
@@ -0,0 +1,21 @@
+#ifndef _WEIGHTS_H_
+#define _WEIGHTS_H_
+
+#include <string>
+#include <map>
+#include <vector>
+#include "sparse_vector.h"
+
+class Weights {
+ public:
+  Weights() {}
+  void InitFromFile(const std::string& fname, std::vector<std::string>* feature_list = NULL);
+  void WriteToFile(const std::string& fname, bool hide_zero_value_features = true) const;
+  void InitVector(std::vector<double>* w) const;
+  void InitSparseVector(SparseVector<double>* w) const;
+  void InitFromVector(const std::vector<double>& w);
+ private:
+  std::vector<double> wv_;
+};
+
+#endif
diff --git a/utils/weights_test.cc b/utils/weights_test.cc
new file mode 100644
index 00000000..8a4c26ef
--- /dev/null
+++ b/utils/weights_test.cc
@@ -0,0 +1,27 @@
+#include <cassert>
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <gtest/gtest.h>
+#include "weights.h"
+#include "tdict.h"
+
+using namespace std;
+
+class WeightsTest : public testing::Test {
+ protected:
+  virtual void SetUp() { }
+  virtual void TearDown() { }
+};
+       
+
+TEST_F(WeightsTest,Load) {
+  Weights w;
+  w.InitFromFile("test_data/weights");
+  w.WriteToFile("-");
+}
+
+int main(int argc, char **argv) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/utils/wordid.h b/utils/wordid.h
new file mode 100644
index 00000000..fb50bcc1
--- /dev/null
+++ b/utils/wordid.h
@@ -0,0 +1,6 @@
+#ifndef _WORD_ID_H_
+#define _WORD_ID_H_
+
+typedef int WordID;
+
+#endif
diff --git a/vest/Makefile.am b/vest/Makefile.am
index abdc8146..b869672b 100644
--- a/vest/Makefile.am
+++ b/vest/Makefile.am
@@ -1,15 +1,12 @@
 bin_PROGRAMS = \
-  mbr_kbest \
   mr_vest_map \
   mr_vest_reduce \
   mr_vest_generate_mapper_input \
-  fast_score \
   sentserver \
   sentclient
 
 if HAVE_GTEST
 noinst_PROGRAMS = \
-  scorer_test \
   lo_test
 endif
 
@@ -17,25 +14,16 @@ sentserver_SOURCES = sentserver.c
 
 sentclient_SOURCES = sentclient.c
 
-mbr_kbest_SOURCES = mbr_kbest.cc ter.cc comb_scorer.cc aer_scorer.cc scorer.cc viterbi_envelope.cc
-mbr_kbest_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+mr_vest_generate_mapper_input_SOURCES = mr_vest_generate_mapper_input.cc line_optimizer.cc
+mr_vest_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
 
-fast_score_SOURCES = fast_score.cc ter.cc comb_scorer.cc aer_scorer.cc scorer.cc viterbi_envelope.cc
-fast_score_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+mr_vest_map_SOURCES = viterbi_envelope.cc ces.cc error_surface.cc mr_vest_map.cc line_optimizer.cc
+mr_vest_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
 
-mr_vest_generate_mapper_input_SOURCES = mr_vest_generate_mapper_input.cc line_optimizer.cc $(top_srcdir)/decoder/timing_stats.cc
-mr_vest_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+mr_vest_reduce_SOURCES = error_surface.cc ces.cc mr_vest_reduce.cc line_optimizer.cc viterbi_envelope.cc
+mr_vest_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
 
-mr_vest_map_SOURCES = viterbi_envelope.cc error_surface.cc aer_scorer.cc mr_vest_map.cc scorer.cc ter.cc comb_scorer.cc line_optimizer.cc
-mr_vest_map_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+lo_test_SOURCES = lo_test.cc ces.cc viterbi_envelope.cc error_surface.cc line_optimizer.cc
+lo_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
 
-mr_vest_reduce_SOURCES = error_surface.cc aer_scorer.cc mr_vest_reduce.cc scorer.cc ter.cc comb_scorer.cc line_optimizer.cc viterbi_envelope.cc
-mr_vest_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
-
-scorer_test_SOURCES = aer_scorer.cc scorer_test.cc scorer.cc ter.cc comb_scorer.cc viterbi_envelope.cc
-scorer_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a -lz
-
-lo_test_SOURCES = lo_test.cc scorer.cc ter.cc aer_scorer.cc comb_scorer.cc viterbi_envelope.cc error_surface.cc line_optimizer.cc
-lo_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a -lz
-
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/vest/aer_scorer.cc b/vest/aer_scorer.cc
deleted file mode 100644
index 25b58b5e..00000000
--- a/vest/aer_scorer.cc
+++ /dev/null
@@ -1,135 +0,0 @@
-#include "aer_scorer.h"
-
-#include <cmath>
-#include <cassert>
-#include <sstream>
-
-#include "tdict.h"
-#include "aligner.h"
-
-using namespace std;
-
-class AERScore : public ScoreBase<AERScore> {
-  friend class AERScorer;
- public:
-  AERScore() : num_matches(), num_predicted(), num_in_ref() {}
-  AERScore(int m, int p, int r) :
-    num_matches(m), num_predicted(p), num_in_ref(r) {}
-  virtual void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){}
-  virtual void PlusEquals(const Score& delta, const float scale) {
-    const AERScore& other = static_cast<const AERScore&>(delta);
-    num_matches   += scale*other.num_matches;
-    num_predicted += scale*other.num_predicted;
-    num_in_ref    += scale*other.num_in_ref;
-  }
- virtual void PlusEquals(const Score& delta) {
-    const AERScore& other = static_cast<const AERScore&>(delta);
-    num_matches   += other.num_matches;
-    num_predicted += other.num_predicted;
-    num_in_ref    += other.num_in_ref;
-  }
-
-
-  virtual ScoreP GetZero() const {
-    return ScoreP(new AERScore);
-  }
-  virtual ScoreP GetOne() const {
-    return ScoreP(new AERScore);
-  }
-  virtual void Subtract(const Score& rhs, Score* out) const {
-    AERScore* res = static_cast<AERScore*>(out);
-    const AERScore& other = static_cast<const AERScore&>(rhs);
-    res->num_matches   = num_matches   - other.num_matches;
-    res->num_predicted = num_predicted - other.num_predicted;
-    res->num_in_ref    = num_in_ref    - other.num_in_ref;
-  }
-  float Precision() const {
-    return static_cast<float>(num_matches) / num_predicted;
-  }
-  float Recall() const {
-    return static_cast<float>(num_matches) / num_in_ref;
-  }
-  float ComputePartialScore() const { return 0.0;}
-  virtual float ComputeScore() const {
-    const float prec = Precision();
-    const float rec = Recall();
-    const float f = (2.0 * prec * rec) / (rec + prec);
-    if (isnan(f)) return 1.0f;
-    return 1.0f - f;
-  }
-  virtual bool IsAdditiveIdentity() const {
-    return (num_matches == 0) && (num_predicted == 0) && (num_in_ref == 0);
-  }
-  virtual void ScoreDetails(std::string* out) const {
-    ostringstream os;
-    os << "AER=" << (ComputeScore() * 100.0)
-       << " F=" << (100 - ComputeScore() * 100.0)
-       << " P=" << (Precision() * 100.0) << " R=" << (Recall() * 100.0)
-       << " [" << num_matches << " " << num_predicted << " " << num_in_ref << "]";
-    *out = os.str();
-  }
-  virtual void Encode(std::string*out) const {
-    out->resize(sizeof(int) * 3);
-    *(int *)&(*out)[sizeof(int) * 0] = num_matches;
-    *(int *)&(*out)[sizeof(int) * 1] = num_predicted;
-    *(int *)&(*out)[sizeof(int) * 2] = num_in_ref;
-  }
- private:
-  int num_matches;
-  int num_predicted;
-  int num_in_ref;
-};
-
-AERScorer::AERScorer(const vector<vector<WordID> >& refs, const string& src) : src_(src) {
-  if (refs.size() != 1) {
-    cerr << "AERScorer can only take a single reference!\n";
-    abort();
-  }
-  ref_ = AlignerTools::ReadPharaohAlignmentGrid(TD::GetString(refs.front()));
-}
-
-static inline bool Safe(const Array2D<bool>& a, int i, int j) {
-  if (i >= 0 && j >= 0 && i < a.width() && j < a.height())
-    return a(i,j);
-  else
-    return false;
-}
-
-ScoreP AERScorer::ScoreCCandidate(const vector<WordID>& shyp) const {
-  return ScoreP();
-}
-
-ScoreP AERScorer::ScoreCandidate(const vector<WordID>& shyp) const {
-  boost::shared_ptr<Array2D<bool> > hyp =
-    AlignerTools::ReadPharaohAlignmentGrid(TD::GetString(shyp));
-
-  int m = 0;
-  int r = 0;
-  int p = 0;
-  int i_len = ref_->width();
-  int j_len = ref_->height();
-  for (int i = 0; i < i_len; ++i) {
-    for (int j = 0; j < j_len; ++j) {
-      if ((*ref_)(i,j)) {
-        ++r;
-        if (Safe(*hyp, i, j)) ++m;
-      }
-    }
-  }
-  for (int i = 0; i < hyp->width(); ++i)
-    for (int j = 0; j < hyp->height(); ++j)
-      if ((*hyp)(i,j)) ++p;
-
-  return ScoreP(new AERScore(m,p,r));
-}
-
-ScoreP AERScorer::ScoreFromString(const string& in) {
-  AERScore* res = new AERScore;
-  res->num_matches   = *(const int *)&in[sizeof(int) * 0];
-  res->num_predicted = *(const int *)&in[sizeof(int) * 1];
-  res->num_in_ref    = *(const int *)&in[sizeof(int) * 2];
-  return ScoreP(res);
-}
-
-const std::string* AERScorer::GetSource() const { return &src_; }
-
diff --git a/vest/aer_scorer.h b/vest/aer_scorer.h
deleted file mode 100644
index 6d53d359..00000000
--- a/vest/aer_scorer.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef _AER_SCORER_
-#define _AER_SCORER_
-
-#include <boost/shared_ptr.hpp>
-
-#include "scorer.h"
-#include "array2d.h"
-
-class AERScorer : public SentenceScorer {
- public:
-  // when constructing alignment strings from a hypergraph, the source
-  // is necessary.
-  AERScorer(const std::vector<std::vector<WordID> >& refs, const std::string& src = "");
-  ScoreP ScoreCandidate(const std::vector<WordID>& hyp) const;
-  ScoreP ScoreCCandidate(const std::vector<WordID>& hyp) const;
-  static ScoreP ScoreFromString(const std::string& in);
-  const std::string* GetSource() const;
- private:
-  std::string src_;
-  boost::shared_ptr<Array2D<bool> > ref_;
-};
-
-#endif
diff --git a/vest/comb_scorer.cc b/vest/comb_scorer.cc
deleted file mode 100644
index 9fc37868..00000000
--- a/vest/comb_scorer.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-#include "comb_scorer.h"
-
-#include <cstdio>
-
-using namespace std;
-
-class BLEUTERCombinationScore : public ScoreBase<BLEUTERCombinationScore> {
-  friend class BLEUTERCombinationScorer;
- public:
-  ~BLEUTERCombinationScore();
-  float ComputePartialScore() const { return 0.0;}
-  float ComputeScore() const {
-    return (bleu->ComputeScore() - ter->ComputeScore()) / 2.0f;
-  }
-  void ScoreDetails(string* details) const {
-    char buf[160];
-    sprintf(buf, "Combi = %.2f, BLEU = %.2f, TER = %.2f",
-      ComputeScore()*100.0f, bleu->ComputeScore()*100.0f, ter->ComputeScore()*100.0f);
-    *details = buf;
-  }
-  void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){}
-
-  void PlusEquals(const Score& delta, const float scale) {
-    bleu->PlusEquals(*static_cast<const BLEUTERCombinationScore&>(delta).bleu, scale);
-    ter->PlusEquals(*static_cast<const BLEUTERCombinationScore&>(delta).ter, scale);
-  }
-  void PlusEquals(const Score& delta) {
-    bleu->PlusEquals(*static_cast<const BLEUTERCombinationScore&>(delta).bleu);
-    ter->PlusEquals(*static_cast<const BLEUTERCombinationScore&>(delta).ter);
-  }
-
-
-
-  ScoreP GetOne() const {
-    BLEUTERCombinationScore* res = new BLEUTERCombinationScore;
-    res->bleu = bleu->GetOne();
-    res->ter = ter->GetOne();
-    return ScoreP(res);
-  }
-  ScoreP GetZero() const {
-    BLEUTERCombinationScore* res = new BLEUTERCombinationScore;
-    res->bleu = bleu->GetZero();
-    res->ter = ter->GetZero();
-    return ScoreP(res);
-  }
-  void Subtract(const Score& rhs, Score* res) const {
-    bleu->Subtract(*static_cast<const BLEUTERCombinationScore&>(rhs).bleu,
-                   static_cast<BLEUTERCombinationScore*>(res)->bleu.get());
-    ter->Subtract(*static_cast<const BLEUTERCombinationScore&>(rhs).ter,
-                  static_cast<BLEUTERCombinationScore*>(res)->ter.get());
-  }
-  void Encode(std::string* out) const {
-    string bs, ts;
-    bleu->Encode(&bs);
-    ter->Encode(&ts);
-    out->clear();
-    (*out) += static_cast<char>(bs.size());
-    (*out) += bs;
-    (*out) += ts;
-  }
-  bool IsAdditiveIdentity() const {
-    return bleu->IsAdditiveIdentity() && ter->IsAdditiveIdentity();
-  }
- private:
-  ScoreP bleu;
-  ScoreP ter;
-};
-
-BLEUTERCombinationScore::~BLEUTERCombinationScore() {
-}
-
-BLEUTERCombinationScorer::BLEUTERCombinationScorer(const vector<vector<WordID> >& refs) {
-  bleu_ = SentenceScorer::CreateSentenceScorer(IBM_BLEU, refs);
-  ter_ = SentenceScorer::CreateSentenceScorer(TER, refs);
-}
-
-BLEUTERCombinationScorer::~BLEUTERCombinationScorer() {
-}
-
-ScoreP BLEUTERCombinationScorer::ScoreCCandidate(const vector<WordID>& hyp) const {
-  return ScoreP();
-}
-
-ScoreP BLEUTERCombinationScorer::ScoreCandidate(const std::vector<WordID>& hyp) const {
-  BLEUTERCombinationScore* res = new BLEUTERCombinationScore;
-  res->bleu = bleu_->ScoreCandidate(hyp);
-  res->ter = ter_->ScoreCandidate(hyp);
-  return ScoreP(res);
-}
-
-ScoreP BLEUTERCombinationScorer::ScoreFromString(const std::string& in) {
-  int bss = in[0];
-  BLEUTERCombinationScore* r = new BLEUTERCombinationScore;
-  r->bleu = SentenceScorer::CreateScoreFromString(IBM_BLEU, in.substr(1, bss));
-  r->ter = SentenceScorer::CreateScoreFromString(TER, in.substr(1 + bss));
-  return ScoreP(r);
-}
diff --git a/vest/comb_scorer.h b/vest/comb_scorer.h
deleted file mode 100644
index 346be576..00000000
--- a/vest/comb_scorer.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _COMB_SCORER_
-#define _COMB_SCORER_
-
-#include "scorer.h"
-
-class BLEUTERCombinationScorer : public SentenceScorer {
- public:
-  BLEUTERCombinationScorer(const std::vector<std::vector<WordID> >& refs);
-  ~BLEUTERCombinationScorer();
-  ScoreP ScoreCandidate(const std::vector<WordID>& hyp) const;
-  ScoreP ScoreCCandidate(const std::vector<WordID>& hyp) const;
-  static ScoreP ScoreFromString(const std::string& in);
- private:
-  ScorerP bleu_,ter_;
-};
-
-#endif
diff --git a/vest/fast_score.cc b/vest/fast_score.cc
deleted file mode 100644
index 5ee264a6..00000000
--- a/vest/fast_score.cc
+++ /dev/null
@@ -1,72 +0,0 @@
-#include <iostream>
-#include <vector>
-
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "filelib.h"
-#include "tdict.h"
-#include "scorer.h"
-
-using namespace std;
-namespace po = boost::program_options;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation(s) (tokenized text file)")
-        ("loss_function,l",po::value<string>()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)")
-        ("in_file,i", po::value<string>()->default_value("-"), "Input file")
-        ("help,h", "Help");
-  po::options_description dcmdline_options;
-  dcmdline_options.add(opts);
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  bool flag = false;
-  if (!conf->count("reference")) {
-    cerr << "Please specify one or more references using -r <REF1.TXT> -r <REF2.TXT> ...\n";
-    flag = true;
-  }
-  if (flag || conf->count("help")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-  const string loss_function = conf["loss_function"].as<string>();
-  ScoreType type = ScoreTypeFromString(loss_function);
-  DocScorer ds(type, conf["reference"].as<vector<string> >(), "");
-  cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl;
-
-  ReadFile rf(conf["in_file"].as<string>());
-  ScoreP acc;
-  istream& in = *rf.stream();
-  int lc = 0;
-  while(in) {
-    string line;
-    getline(in, line);
-    if (line.empty() && !in) break;
-    vector<WordID> sent;
-    TD::ConvertSentence(line, &sent);
-    ScoreP sentscore = ds[lc]->ScoreCandidate(sent);
-    if (!acc) { acc = sentscore->GetZero(); }
-    acc->PlusEquals(*sentscore);
-    ++lc;
-  }
-  assert(lc > 0);
-  if (lc > ds.size()) {
-    cerr << "Too many (" << lc << ") translations in input, expected " << ds.size() << endl;
-    return 1;
-  }
-  if (lc != ds.size())
-    cerr << "Fewer sentences in hyp (" << lc << ") than refs ("
-         << ds.size() << "): scoring partial set!\n";
-  float score = acc->ComputeScore();
-  string details;
-  acc->ScoreDetails(&details);
-  cerr << details << endl;
-  cout << score << endl;
-  return 0;
-}
diff --git a/vest/lo_test.cc b/vest/lo_test.cc
index 577113bb..9200eb34 100644
--- a/vest/lo_test.cc
+++ b/vest/lo_test.cc
@@ -5,6 +5,7 @@
 #include <boost/shared_ptr.hpp>
 #include <gtest/gtest.h>
 
+#include "ces.h"
 #include "fdict.h"
 #include "hg.h"
 #include "kbest.h"
@@ -166,8 +167,8 @@ TEST_F(OptTest, TestS1) {
   envs[1] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg2, NULL, wf);
 
   vector<ErrorSurface> es(2);
-  scorer1->ComputeErrorSurface(envs[0], &es[0], IBM_BLEU, hg);
-  scorer2->ComputeErrorSurface(envs[1], &es[1], IBM_BLEU, hg2);
+  ComputeErrorSurface(*scorer1, envs[0], &es[0], IBM_BLEU, hg);
+  ComputeErrorSurface(*scorer2, envs[1], &es[1], IBM_BLEU, hg2);
   cerr << envs[0].size() << " " << envs[1].size() << endl;
   cerr << es[0].size() << " " << es[1].size() << endl;
   envs.clear();
diff --git a/vest/mr_vest_map.cc b/vest/mr_vest_map.cc
index b3acc5dd..1506a99f 100644
--- a/vest/mr_vest_map.cc
+++ b/vest/mr_vest_map.cc
@@ -6,6 +6,7 @@
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 
+#include "ces.h"
 #include "filelib.h"
 #include "stringlib.h"
 #include "sparse_vector.h"
@@ -13,7 +14,7 @@
 #include "viterbi_envelope.h"
 #include "inside_outside.h"
 #include "error_surface.h"
-#include "hg.h"
+#include "b64tools.h"
 #include "hg_io.h"
 
 using namespace std;
@@ -90,7 +91,7 @@ int main(int argc, char** argv) {
     ViterbiEnvelopeWeightFunction wf(origin, axis);
     ViterbiEnvelope ve = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
     ErrorSurface es;
-    ds[sent_id]->ComputeErrorSurface(ve, &es, type, hg);
+    ComputeErrorSurface(*ds[sent_id], ve, &es, type, hg);
     //cerr << "Viterbi envelope has " << ve.size() << " segments\n";
     // cerr << "Error surface has " << es.size() << " segments\n";
     string val;
diff --git a/vest/mr_vest_reduce.cc b/vest/mr_vest_reduce.cc
index 5efcc19a..3df52020 100644
--- a/vest/mr_vest_reduce.cc
+++ b/vest/mr_vest_reduce.cc
@@ -9,7 +9,7 @@
 #include "sparse_vector.h"
 #include "error_surface.h"
 #include "line_optimizer.h"
-#include "hg_io.h"
+#include "b64tools.h"
 
 using namespace std;
 namespace po = boost::program_options;
diff --git a/vest/scorer.cc b/vest/scorer.cc
deleted file mode 100644
index 70fdef34..00000000
--- a/vest/scorer.cc
+++ /dev/null
@@ -1,708 +0,0 @@
-#include "scorer.h"
-
-#include <boost/lexical_cast.hpp>
-#include <map>
-#include <sstream>
-#include <iostream>
-#include <fstream>
-#include <cstdio>
-#include <valarray>
-#include <algorithm>
-
-#include <boost/shared_ptr.hpp>
-
-#include "filelib.h"
-#include "aligner.h"
-#include "viterbi_envelope.h"
-#include "error_surface.h"
-#include "ter.h"
-#include "aer_scorer.h"
-#include "comb_scorer.h"
-#include "tdict.h"
-#include "stringlib.h"
-#include "lattice.h"
-
-
-using boost::shared_ptr;
-using namespace std;
-
-const bool minimize_segments = true;    // if adjacent segments have equal scores, merge them
-
-void Score::TimesEquals(float scale) {
-  cerr<<"UNIMPLEMENTED except for BLEU (for MIRA): Score::TimesEquals"<<endl;abort();
-}
-
-ScoreType ScoreTypeFromString(const string& st) {
-  const string sl = LowercaseString(st);
-  if (sl == "ser")
-    return SER;
-  if (sl == "ter")
-    return TER;
-  if (sl == "aer")
-    return AER;
-  if (sl == "bleu" || sl == "ibm_bleu")
-    return IBM_BLEU;
-  if (sl == "ibm_bleu_3")
-    return IBM_BLEU_3;
-  if (sl == "nist_bleu")
-    return NIST_BLEU;
-  if (sl == "koehn_bleu")
-    return Koehn_BLEU;
-  if (sl == "combi")
-    return BLEU_minus_TER_over_2;
-  cerr << "Don't understand score type '" << st << "', defaulting to ibm_bleu.\n";
-  return IBM_BLEU;
-}
-
-static char const* score_names[]={
-  "IBM_BLEU", "NIST_BLEU", "Koehn_BLEU", "TER", "BLEU_minus_TER_over_2", "SER", "AER", "IBM_BLEU_3"
-};
-
-std::string StringFromScoreType(ScoreType st) {
-  assert(st>=0 && st<sizeof(score_names)/sizeof(score_names[0]));
-  return score_names[(int)st];
-}
-
-
-Score::~Score() {}
-SentenceScorer::~SentenceScorer() {}
-
-struct length_accum {
-  template <class S>
-  float operator()(float sum,S const& ref) const {
-    return sum+ref.size();
-  }
-};
-
-template <class S>
-float avg_reflength(vector<S> refs) {
-  unsigned n=refs.size();
-  return n?accumulate(refs.begin(),refs.end(),0.,length_accum())/n:0.;
-}
-
-
-float SentenceScorer::ComputeRefLength(const Sentence &hyp) const {
-  return hyp.size(); // reasonable default? :)
-}
-
-const std::string* SentenceScorer::GetSource() const { return NULL; }
-
-class SERScore : public ScoreBase<SERScore> {
-  friend class SERScorer;
- public:
-  SERScore() : correct(0), total(0) {}
-  float ComputePartialScore() const { return 0.0;}
-  float ComputeScore() const {
-    return static_cast<float>(correct) / static_cast<float>(total);
-  }
-  void ScoreDetails(string* details) const {
-    ostringstream os;
-    os << "SER= " << ComputeScore() << " (" << correct << '/' << total << ')';
-    *details = os.str();
-  }
-  void PlusPartialEquals(const Score& /* delta */, int /* oracle_e_cover */, int /* oracle_f_cover */, int /* src_len */){}
-
-  void PlusEquals(const Score& delta, const float scale) {
-    correct += scale*static_cast<const SERScore&>(delta).correct;
-    total += scale*static_cast<const SERScore&>(delta).total;
-  }
-  void PlusEquals(const Score& delta) {
-    correct += static_cast<const SERScore&>(delta).correct;
-    total += static_cast<const SERScore&>(delta).total;
-    }
-  ScoreP GetZero() const { return ScoreP(new SERScore); }
-  ScoreP GetOne() const { return ScoreP(new SERScore); }
-  void Subtract(const Score& rhs, Score* res) const {
-    SERScore* r = static_cast<SERScore*>(res);
-    r->correct = correct - static_cast<const SERScore&>(rhs).correct;
-    r->total = total - static_cast<const SERScore&>(rhs).total;
-  }
-  void Encode(string* out) const {
-    assert(!"not implemented");
-  }
-  bool IsAdditiveIdentity() const {
-    return (total == 0 && correct == 0);  // correct is always 0 <= n <= total
-  }
- private:
-  int correct, total;
-};
-
-std::string SentenceScorer::verbose_desc() const {
-  return desc+",ref0={ "+TD::GetString(refs[0])+" }";
-}
-
-class SERScorer : public SentenceScorer {
- public:
-  SERScorer(const vector<vector<WordID> >& references) : SentenceScorer("SERScorer",references),refs_(references) {}
-  ScoreP ScoreCCandidate(const vector<WordID>& /* hyp */) const {
-    return ScoreP();
-  }
-  ScoreP ScoreCandidate(const vector<WordID>& hyp) const {
-    SERScore* res = new SERScore;
-    res->total = 1;
-    for (int i = 0; i < refs_.size(); ++i)
-      if (refs_[i] == hyp) res->correct = 1;
-    return ScoreP(res);
-  }
-  static ScoreP ScoreFromString(const string& data) {
-    assert(!"Not implemented");
-  }
- private:
-  vector<vector<WordID> > refs_;
-};
-
-class BLEUScore : public ScoreBase<BLEUScore> {
-  friend class BLEUScorerBase;
- public:
-  BLEUScore(int n) : correct_ngram_hit_counts(float(0),n), hyp_ngram_counts(float(0),n) {
-    ref_len = 0;
-    hyp_len = 0; }
-  BLEUScore(int n, int k) :  correct_ngram_hit_counts(float(k),n), hyp_ngram_counts(float(k),n) {
-    ref_len = k;
-    hyp_len = k; }
-  float ComputeScore() const;
-  float ComputePartialScore() const;
-  void ScoreDetails(string* details) const;
-  void TimesEquals(float scale);
-  void PlusEquals(const Score& delta);
-  void PlusEquals(const Score& delta, const float scale);
-  void PlusPartialEquals(const Score& delta, int oracle_e_cover, int oracle_f_cover, int src_len);
-  ScoreP GetZero() const;
-  ScoreP GetOne() const;
-  void Subtract(const Score& rhs, Score* res) const;
-  void Encode(string* out) const;
-  bool IsAdditiveIdentity() const {
-    if (fabs(ref_len) > 0.1f || hyp_len != 0) return false;
-    for (int i = 0; i < correct_ngram_hit_counts.size(); ++i)
-      if (hyp_ngram_counts[i] != 0 ||
-        correct_ngram_hit_counts[i] != 0) return false;
-    return true;
-  }
- private:
-  int N() const {
-    return hyp_ngram_counts.size();
-  }
-  float ComputeScore(vector<float>* precs, float* bp) const;
-  float ComputePartialScore(vector<float>* prec, float* bp) const;
-  valarray<float> correct_ngram_hit_counts;
-  valarray<float> hyp_ngram_counts;
-  float ref_len;
-  float hyp_len;
-};
-
-class BLEUScorerBase : public SentenceScorer {
- public:
-  BLEUScorerBase(const vector<vector<WordID> >& references,
-                 int n
-             );
-  ScoreP ScoreCandidate(const vector<WordID>& hyp) const;
-  ScoreP ScoreCCandidate(const vector<WordID>& hyp) const;
-  static ScoreP ScoreFromString(const string& in);
-
-  virtual float ComputeRefLength(const vector<WordID>& hyp) const = 0;
- private:
-  struct NGramCompare {
-    int operator() (const vector<WordID>& a, const vector<WordID>& b) {
-      size_t as = a.size();
-      size_t bs = b.size();
-      const size_t s = (as < bs ? as : bs);
-      for (size_t i = 0; i < s; ++i) {
-         int d = a[i] - b[i];
-         if (d < 0) return true;
-	 if (d > 0) return false;
-      }
-      return as < bs;
-    }
-  };
-  typedef map<vector<WordID>, pair<int,int>, NGramCompare> NGramCountMap;
-  void CountRef(const vector<WordID>& ref) {
-    NGramCountMap tc;
-    vector<WordID> ngram(n_);
-    int s = ref.size();
-    for (int j=0; j<s; ++j) {
-      int remaining = s-j;
-      int k = (n_ < remaining ? n_ : remaining);
-      ngram.clear();
-      for (int i=1; i<=k; ++i) {
-	ngram.push_back(ref[j + i - 1]);
-        tc[ngram].first++;
-      }
-    }
-    for (NGramCountMap::iterator i = tc.begin(); i != tc.end(); ++i) {
-      pair<int,int>& p = ngrams_[i->first];
-      if (p.first < i->second.first)
-        p = i->second;
-    }
-  }
-
-  void ComputeNgramStats(const vector<WordID>& sent,
-			 valarray<float>* correct,
-			 valarray<float>* hyp,
-			 bool clip_counts)
-    const {
-    assert(correct->size() == n_);
-    assert(hyp->size() == n_);
-    vector<WordID> ngram(n_);
-    (*correct) *= 0;
-    (*hyp) *= 0;
-    int s = sent.size();
-    for (int j=0; j<s; ++j) {
-      int remaining = s-j;
-      int k = (n_ < remaining ? n_ : remaining);
-      ngram.clear();
-      for (int i=1; i<=k; ++i) {
-	ngram.push_back(sent[j + i - 1]);
-        pair<int,int>& p = ngrams_[ngram];
-	if(clip_counts){
-	  if (p.second < p.first) {
-	    ++p.second;
-	    (*correct)[i-1]++;
-	  }}
-	else {
-	  ++p.second;
-	  (*correct)[i-1]++;
-	}
-	// if the 1 gram isn't found, don't try to match don't need to match any 2- 3- .. grams:
-	if (!p.first) {
-	  for (; i<=k; ++i)
-	    (*hyp)[i-1]++;
-	} else {
-          (*hyp)[i-1]++;
-        }
-      }
-    }
-  }
-
-  mutable NGramCountMap ngrams_;
-  int n_;
-  vector<int> lengths_;
-};
-
-ScoreP BLEUScorerBase::ScoreFromString(const string& in) {
-  istringstream is(in);
-  int n;
-  is >> n;
-  BLEUScore* r = new BLEUScore(n);
-  is >> r->ref_len >> r->hyp_len;
-
-  for (int i = 0; i < n; ++i) {
-    is >> r->correct_ngram_hit_counts[i];
-    is >> r->hyp_ngram_counts[i];
-  }
-  return ScoreP(r);
-}
-
-class IBM_BLEUScorer : public BLEUScorerBase {
- public:
-    IBM_BLEUScorer(const vector<vector<WordID> >& references,
-		   int n=4) : BLEUScorerBase(references, n), lengths_(references.size()) {
-   for (int i=0; i < references.size(); ++i)
-     lengths_[i] = references[i].size();
- }
-  float ComputeRefLength(const vector<WordID>& hyp) const {
-    if (lengths_.size() == 1) return lengths_[0];
-    int bestd = 2000000;
-    int hl = hyp.size();
-    int bl = -1;
-    for (vector<int>::const_iterator ci = lengths_.begin(); ci != lengths_.end(); ++ci) {
-      int cl = *ci;
-      if (abs(cl - hl) < bestd) {
-        bestd = abs(cl - hl);
-        bl = cl;
-      }
-    }
-    return bl;
-  }
- private:
-  vector<int> lengths_;
-};
-
-class NIST_BLEUScorer : public BLEUScorerBase {
- public:
-    NIST_BLEUScorer(const vector<vector<WordID> >& references,
-                    int n=4) : BLEUScorerBase(references, n),
-		    shortest_(references[0].size()) {
-   for (int i=1; i < references.size(); ++i)
-     if (references[i].size() < shortest_)
-       shortest_ = references[i].size();
- }
-  float ComputeRefLength(const vector<WordID>& /* hyp */) const {
-    return shortest_;
-  }
- private:
-  float shortest_;
-};
-
-class Koehn_BLEUScorer : public BLEUScorerBase {
- public:
-    Koehn_BLEUScorer(const vector<vector<WordID> >& references,
-                     int n=4) : BLEUScorerBase(references, n),
-                     avg_(0) {
-   for (int i=0; i < references.size(); ++i)
-     avg_ += references[i].size();
-   avg_ /= references.size();
- }
-  float ComputeRefLength(const vector<WordID>& /* hyp */) const {
-    return avg_;
-  }
- private:
-  float avg_;
-};
-
-ScorerP SentenceScorer::CreateSentenceScorer(const ScoreType type,
-      const vector<vector<WordID> >& refs,
-      const string& src)
-{
-  SentenceScorer *r=0;
-  switch (type) {
-  case IBM_BLEU: r = new IBM_BLEUScorer(refs, 4);break;
-  case IBM_BLEU_3 : r = new IBM_BLEUScorer(refs,3);break;
-    case NIST_BLEU: r = new NIST_BLEUScorer(refs, 4);break;
-    case Koehn_BLEU: r = new Koehn_BLEUScorer(refs, 4);break;
-    case AER: r = new AERScorer(refs, src);break;
-    case TER: r = new TERScorer(refs);break;
-    case SER: r = new SERScorer(refs);break;
-    case BLEU_minus_TER_over_2: r = new BLEUTERCombinationScorer(refs);break;
-    default:
-      assert(!"Not implemented!");
-  }
-  return ScorerP(r);
-}
-
-ScoreP SentenceScorer::GetOne() const {
-  Sentence s;
-  return ScoreCCandidate(s)->GetOne();
-}
-
-ScoreP SentenceScorer::GetZero() const {
-  Sentence s;
-  return ScoreCCandidate(s)->GetZero();
-}
-
-ScoreP Score::GetOne(ScoreType type) {
-  std::vector<SentenceScorer::Sentence > refs;
-  return SentenceScorer::CreateSentenceScorer(type,refs)->GetOne();
-}
-
-ScoreP Score::GetZero(ScoreType type) {
-  std::vector<SentenceScorer::Sentence > refs;
-  return SentenceScorer::CreateSentenceScorer(type,refs)->GetZero();
-}
-
-
-ScoreP SentenceScorer::CreateScoreFromString(const ScoreType type, const string& in) {
-  switch (type) {
-    case IBM_BLEU:
-  case IBM_BLEU_3:
-    case NIST_BLEU:
-    case Koehn_BLEU:
-      return BLEUScorerBase::ScoreFromString(in);
-    case TER:
-      return TERScorer::ScoreFromString(in);
-    case AER:
-      return AERScorer::ScoreFromString(in);
-    case SER:
-      return SERScorer::ScoreFromString(in);
-    case BLEU_minus_TER_over_2:
-      return BLEUTERCombinationScorer::ScoreFromString(in);
-    default:
-      assert(!"Not implemented!");
-  }
-}
-
-void SentenceScorer::ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface* env, const ScoreType type, const Hypergraph& hg) const {
-  vector<WordID> prev_trans;
-  const vector<shared_ptr<Segment> >& ienv = ve.GetSortedSegs();
-  env->resize(ienv.size());
-  ScoreP prev_score;
-  int j = 0;
-  for (int i = 0; i < ienv.size(); ++i) {
-    const Segment& seg = *ienv[i];
-    vector<WordID> trans;
-    if (type == AER) {
-      vector<bool> edges(hg.edges_.size(), false);
-      seg.CollectEdgesUsed(&edges);  // get the set of edges in the viterbi
-                                     // alignment
-      ostringstream os;
-      const string* psrc = this->GetSource();
-      if (psrc == NULL) {
-        cerr << "AER scoring in VEST requires source, but it is missing!\n";
-        abort();
-      }
-      size_t pos = psrc->rfind(" ||| ");
-      if (pos == string::npos) {
-        cerr << "Malformed source for AER: expected |||\nINPUT: " << *psrc << endl;
-        abort();
-      }
-      Lattice src;
-      Lattice ref;
-      LatticeTools::ConvertTextOrPLF(psrc->substr(0, pos), &src);
-      LatticeTools::ConvertTextOrPLF(psrc->substr(pos + 5), &ref);
-      AlignerTools::WriteAlignment(src, ref, hg, &os, true, &edges);
-      string tstr = os.str();
-      TD::ConvertSentence(tstr.substr(tstr.rfind(" ||| ") + 5), &trans);
-    } else {
-      seg.ConstructTranslation(&trans);
-    }
-    // cerr << "Scoring: " << TD::GetString(trans) << endl;
-    if (trans == prev_trans) {
-      if (!minimize_segments) {
-        assert(prev_score); // if this fails, it means
-	                    // the decoder can generate null translations
-        ErrorSegment& out = (*env)[j];
-        out.delta = prev_score->GetZero();
-        out.x = seg.x;
-	++j;
-      }
-      // cerr << "Identical translation, skipping scoring\n";
-    } else {
-      ScoreP score = ScoreCandidate(trans);
-      // cerr << "score= " << score->ComputeScore() << "\n";
-      ScoreP cur_delta_p = score->GetZero();
-      Score* cur_delta = cur_delta_p.get();
-      // just record the score diffs
-      if (!prev_score)
-        prev_score = score->GetZero();
-
-      score->Subtract(*prev_score, cur_delta);
-      prev_trans.swap(trans);
-      prev_score = score;
-      if ((!minimize_segments) || (!cur_delta->IsAdditiveIdentity())) {
-        ErrorSegment& out = (*env)[j];
-        out.delta = cur_delta_p;
-        out.x = seg.x;
-        ++j;
-      }
-    }
-  }
-  // cerr << " In segments: " << ienv.size() << endl;
-  // cerr << "Out segments: " << j << endl;
-  assert(j > 0);
-  env->resize(j);
-}
-
-void BLEUScore::ScoreDetails(string* details) const {
-  char buf[2000];
-  vector<float> precs(max(N(),4));
-  float bp;
-  float bleu = ComputeScore(&precs, &bp);
-  for (int i=N();i<4;++i)
-    precs[i]=0.;
-  char *bufn;
-  bufn=buf+sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)",
-       bleu*100.0,
-       precs[0]*100.0,
-       precs[1]*100.0,
-       precs[2]*100.0,
-       precs[3]*100.0,
-       bp);
-  *details = buf;
-}
-
-float BLEUScore::ComputeScore(vector<float>* precs, float* bp) const {
-  float log_bleu = 0;
-  if (precs) precs->clear();
-  int count = 0;
-  for (int i = 0; i < N(); ++i) {
-    if (hyp_ngram_counts[i] > 0) {
-      float lprec = log(correct_ngram_hit_counts[i]) - log(hyp_ngram_counts[i]);
-      if (precs) precs->push_back(exp(lprec));
-      log_bleu += lprec;
-      ++count;
-    }
-  }
-  log_bleu /= static_cast<float>(count);
-  float lbp = 0.0;
-  if (hyp_len < ref_len)
-    lbp = (hyp_len - ref_len) / hyp_len;
-  log_bleu += lbp;
-  if (bp) *bp = exp(lbp);
-  return exp(log_bleu);
-}
-
-
-//comptue scaled score for oracle retrieval
-float BLEUScore::ComputePartialScore(vector<float>* precs, float* bp) const {
-  // cerr << "Then here " << endl;
-  float log_bleu = 0;
-  if (precs) precs->clear();
-  int count = 0;
-  for (int i = 0; i < N(); ++i) {
-    //  cerr << "In CPS " << hyp_ngram_counts[i] << " " << correct_ngram_hit_counts[i] << endl;
-    if (hyp_ngram_counts[i] > 0) {
-      float lprec = log(correct_ngram_hit_counts[i]) - log(hyp_ngram_counts[i]);
-      if (precs) precs->push_back(exp(lprec));
-      log_bleu += lprec;
-      ++count;
-    }
-  }
-  log_bleu /= static_cast<float>(count);
-  float lbp = 0.0;
-  if (hyp_len < ref_len)
-    lbp = (hyp_len - ref_len) / hyp_len;
-  log_bleu += lbp;
-  if (bp) *bp = exp(lbp);
-  return exp(log_bleu);
-}
-
-float BLEUScore::ComputePartialScore() const {
-  // cerr << "In here first " << endl;
-  return ComputePartialScore(NULL, NULL);
-}
-
-float BLEUScore::ComputeScore() const {
-  return ComputeScore(NULL, NULL);
-}
-
-void BLEUScore::Subtract(const Score& rhs, Score* res) const {
-  const BLEUScore& d = static_cast<const BLEUScore&>(rhs);
-  BLEUScore* o = static_cast<BLEUScore*>(res);
-  o->ref_len = ref_len - d.ref_len;
-  o->hyp_len = hyp_len - d.hyp_len;
-  o->correct_ngram_hit_counts = correct_ngram_hit_counts - d.correct_ngram_hit_counts;
-  o->hyp_ngram_counts = hyp_ngram_counts - d.hyp_ngram_counts;
-}
-
-void BLEUScore::PlusEquals(const Score& delta) {
-  const BLEUScore& d = static_cast<const BLEUScore&>(delta);
-  correct_ngram_hit_counts += d.correct_ngram_hit_counts;
-  hyp_ngram_counts += d.hyp_ngram_counts;
-  ref_len += d.ref_len;
-  hyp_len += d.hyp_len;
-}
-
-void BLEUScore::TimesEquals(float scale) {
-  correct_ngram_hit_counts *= scale;
-  hyp_ngram_counts *= scale;
-  ref_len *= scale;
-  hyp_len *= scale;
-}
-
-void BLEUScore::PlusEquals(const Score& delta, const float scale) {
-  const BLEUScore& d = static_cast<const BLEUScore&>(delta);
-  correct_ngram_hit_counts = correct_ngram_hit_counts + (d.correct_ngram_hit_counts * scale);
-  hyp_ngram_counts = hyp_ngram_counts + (d.hyp_ngram_counts * scale);
-  ref_len = ref_len + (d.ref_len * scale);
-  hyp_len = hyp_len + (d.hyp_len * scale);
-}
-
-void BLEUScore::PlusPartialEquals(const Score& delta, int oracle_e_cover, int oracle_f_cover, int src_len){
-  const BLEUScore& d = static_cast<const BLEUScore&>(delta);
-  correct_ngram_hit_counts += d.correct_ngram_hit_counts;
-  hyp_ngram_counts += d.hyp_ngram_counts;
-  //scale the reference length according to the size of the input sentence covered by this rule
-
-  ref_len *= (float)oracle_f_cover / src_len;
-  ref_len += d.ref_len;
-
-  hyp_len = oracle_e_cover;
-  hyp_len += d.hyp_len;
-}
-
-
-ScoreP BLEUScore::GetZero() const {
-  return ScoreP(new BLEUScore(N()));
-}
-
-ScoreP BLEUScore::GetOne() const {
-  return ScoreP(new BLEUScore(N(),1));
-}
-
-
-void BLEUScore::Encode(string* out) const {
-  ostringstream os;
-  const int n = correct_ngram_hit_counts.size();
-  os << n << ' ' << ref_len << ' ' << hyp_len;
-  for (int i = 0; i < n; ++i)
-    os << ' ' << correct_ngram_hit_counts[i] << ' ' << hyp_ngram_counts[i];
-  *out = os.str();
-}
-
-BLEUScorerBase::BLEUScorerBase(const vector<vector<WordID> >& references,
-                               int n) : SentenceScorer("BLEU"+boost::lexical_cast<string>(n),references),n_(n) {
-  for (vector<vector<WordID> >::const_iterator ci = references.begin();
-       ci != references.end(); ++ci) {
-    lengths_.push_back(ci->size());
-    CountRef(*ci);
-  }
-}
-
-ScoreP BLEUScorerBase::ScoreCandidate(const vector<WordID>& hyp) const {
-  BLEUScore* bs = new BLEUScore(n_);
-  for (NGramCountMap::iterator i=ngrams_.begin(); i != ngrams_.end(); ++i)
-    i->second.second = 0;
-  ComputeNgramStats(hyp, &bs->correct_ngram_hit_counts, &bs->hyp_ngram_counts, true);
-  bs->ref_len = ComputeRefLength(hyp);
-  bs->hyp_len = hyp.size();
-  return ScoreP(bs);
-}
-
-ScoreP BLEUScorerBase::ScoreCCandidate(const vector<WordID>& hyp) const {
-  BLEUScore* bs = new BLEUScore(n_);
-  for (NGramCountMap::iterator i=ngrams_.begin(); i != ngrams_.end(); ++i)
-    i->second.second = 0;
-  bool clip = false;
-  ComputeNgramStats(hyp, &bs->correct_ngram_hit_counts, &bs->hyp_ngram_counts,clip);
-  bs->ref_len = ComputeRefLength(hyp);
-  bs->hyp_len = hyp.size();
-  return ScoreP(bs);
-}
-
-
-DocScorer::~DocScorer() {
-}
-
-void DocScorer::Init(
-      const ScoreType type,
-      const vector<string>& ref_files,
-      const string& src_file, bool verbose) {
-  scorers_.clear();
-  // TODO stop using valarray, start using ReadFile
-  cerr << "Loading references (" << ref_files.size() << " files)\n";
-  ReadFile srcrf;
-  if (type == AER && src_file.size() > 0) {
-    cerr << "  (source=" << src_file << ")\n";
-    srcrf.Init(src_file);
-  }
-  std::vector<ReadFile> ifs(ref_files.begin(),ref_files.end());
-  for (int i=0; i < ref_files.size(); ++i) ifs[i].Init(ref_files[i]);
-  char buf[64000];
-  bool expect_eof = false;
-  int line=0;
-  while (ifs[0].get()) {
-    vector<vector<WordID> > refs(ref_files.size());
-    for (int i=0; i < ref_files.size(); ++i) {
-      istream &in=ifs[i].get();
-      if (in.eof()) break;
-      in.getline(buf, 64000);
-      refs[i].clear();
-      if (strlen(buf) == 0) {
-        if (in.eof()) {
-          if (!expect_eof) {
-            assert(i == 0);
-            expect_eof = true;
-          }
-          break;
-        }
-      } else {
-        TD::ConvertSentence(buf, &refs[i]);
-        assert(!refs[i].empty());
-      }
-      assert(!expect_eof);
-    }
-    if (!expect_eof) {
-      string src_line;
-      if (srcrf) {
-        getline(srcrf.get(), src_line);
-        map<string,string> dummy;
-        ProcessAndStripSGML(&src_line, &dummy);
-      }
-      scorers_.push_back(ScorerP(SentenceScorer::CreateSentenceScorer(type, refs, src_line)));
-      if (verbose)
-        cerr<<"doc_scorer["<<line<<"] = "<<scorers_.back()->verbose_desc()<<endl;
-      ++line;
-    }
-  }
-  cerr << "Loaded reference translations for " << scorers_.size() << " sentences.\n";
-}
-
diff --git a/vest/scorer.h b/vest/scorer.h
deleted file mode 100644
index 0c8b380f..00000000
--- a/vest/scorer.h
+++ /dev/null
@@ -1,111 +0,0 @@
-#ifndef SCORER_H_
-#define SCORER_H_
-#include <vector>
-#include <string>
-#include <boost/shared_ptr.hpp>
-//TODO: use intrusive shared_ptr in Score (because there are many of them on ErrorSurfaces)
-#include "wordid.h"
-#include "intrusive_refcount.hpp"
-
-class Score;
-class SentenceScorer;
-typedef boost::intrusive_ptr<Score> ScoreP;
-typedef boost::shared_ptr<SentenceScorer> ScorerP;
-
-class ViterbiEnvelope;
-class ErrorSurface;
-class Hypergraph;  // needed for alignment
-
-//TODO: BLEU N (N separate arg, not part of enum)?
-enum ScoreType { IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, BLEU_minus_TER_over_2, SER, AER, IBM_BLEU_3 };
-ScoreType ScoreTypeFromString(const std::string& st);
-std::string StringFromScoreType(ScoreType st);
-
-class Score : public boost::intrusive_refcount<Score> {
- public:
-  virtual ~Score();
-  virtual float ComputeScore() const = 0;
-  virtual float ComputePartialScore() const =0;
-  virtual void ScoreDetails(std::string* details) const = 0;
-  std::string ScoreDetails() {
-    std::string d;
-    ScoreDetails(&d);
-    return d;
-  }
-  virtual void TimesEquals(float scale); // only for bleu; for mira oracle
-  /// same as rhs.TimesEquals(scale);PlusEquals(rhs) except doesn't modify rhs.
-  virtual void PlusEquals(const Score& rhs, const float scale) = 0;
-  virtual void PlusEquals(const Score& rhs) = 0;
-  virtual void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len) = 0;
-  virtual void Subtract(const Score& rhs, Score *res) const = 0;
-  virtual ScoreP GetZero() const = 0;
-  virtual ScoreP GetOne() const = 0;
-  virtual bool IsAdditiveIdentity() const = 0; // returns true if adding this delta
-                                      // to another score results in no score change
-				      // under any circumstances
-  virtual void Encode(std::string* out) const = 0;
-  static ScoreP GetZero(ScoreType type);
-  static ScoreP GetOne(ScoreType type);
-  virtual ScoreP Clone() const = 0;
-protected:
-  Score() {  } // we define these explicitly because refcount is noncopyable
-  Score(Score const& o) {  }
-};
-
-//TODO: make sure default copy ctors for score types do what we want.
-template <class Derived>
-struct ScoreBase : public Score {
-  ScoreP Clone() const  {
-    return ScoreP(new Derived(dynamic_cast<Derived const&>(*this)));
-  }
-};
-
-class SentenceScorer {
- public:
-  typedef std::vector<WordID> Sentence;
-  typedef std::vector<Sentence> Sentences;
-  std::string desc;
-  Sentences refs;
-  SentenceScorer(std::string desc="SentenceScorer_unknown", Sentences const& refs=Sentences()) : desc(desc),refs(refs) {  }
-  std::string verbose_desc() const;
-  virtual float ComputeRefLength(const Sentence& hyp) const; // default: avg of refs.length
-  virtual ~SentenceScorer();
-  virtual ScoreP GetOne() const;
-  virtual ScoreP GetZero() const;
-  void ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface* es, const ScoreType type, const Hypergraph& hg) const;
-  virtual ScoreP ScoreCandidate(const Sentence& hyp) const = 0;
-  virtual ScoreP ScoreCCandidate(const Sentence& hyp) const =0;
-  virtual const std::string* GetSource() const;
-  static ScoreP CreateScoreFromString(const ScoreType type, const std::string& in);
-  static ScorerP CreateSentenceScorer(const ScoreType type,
-    const std::vector<Sentence >& refs,
-    const std::string& src = "");
-};
-
-//TODO: should be able to GetOne GetZero without supplying sentence (just type)
-class DocScorer {
- public:
-  ~DocScorer();
-  DocScorer() {  }
-  void Init(const ScoreType type,
-            const std::vector<std::string>& ref_files,
-            const std::string& src_file = "",
-            bool verbose=false
-    );
-  DocScorer(const ScoreType type,
-            const std::vector<std::string>& ref_files,
-            const std::string& src_file = "",
-            bool verbose=false
-    )
-  {
-    Init(type,ref_files,src_file,verbose);
-  }
-
-  int size() const { return scorers_.size(); }
-  ScorerP operator[](size_t i) const { return scorers_[i]; }
- private:
-  std::vector<ScorerP> scorers_;
-};
-
-
-#endif
diff --git a/vest/ter.cc b/vest/ter.cc
deleted file mode 100644
index cacc5b00..00000000
--- a/vest/ter.cc
+++ /dev/null
@@ -1,535 +0,0 @@
-#include "ter.h"
-
-#include <cstdio>
-#include <cassert>
-#include <iostream>
-#include <limits>
-#include <sstream>
-#include <tr1/unordered_map>
-#include <set>
-#include <valarray>
-#include <boost/functional/hash.hpp>
-#include <stdexcept>
-#include "tdict.h"
-
-const bool ter_use_average_ref_len = true;
-const int ter_short_circuit_long_sentences = -1;
-
-using namespace std;
-using namespace std::tr1;
-
-struct COSTS {
-  static const float substitution;
-  static const float deletion;
-  static const float insertion;
-  static const float shift;
-};
-const float COSTS::substitution = 1.0f;
-const float COSTS::deletion = 1.0f;
-const float COSTS::insertion = 1.0f;
-const float COSTS::shift = 1.0f;
-
-static const int MAX_SHIFT_SIZE = 10;
-static const int MAX_SHIFT_DIST = 50;
-
-struct Shift {
-  unsigned int d_;
-  Shift() : d_() {}
-  Shift(int b, int e, int m) : d_() {
-    begin(b);
-    end(e);
-    moveto(m);
-  }
-  inline int begin() const {
-    return d_ & 0x3ff;
-  }
-  inline int end() const {
-    return (d_ >> 10) & 0x3ff;
-  }
-  inline int moveto() const {
-    int m = (d_ >> 20) & 0x7ff;
-    if (m > 1024) { m -= 1024; m *= -1; }
-    return m;
-  }
-  inline void begin(int b) {
-    d_ &= 0xfffffc00u;
-    d_ |= (b & 0x3ff);
-  }
-  inline void end(int e) {
-    d_ &= 0xfff003ffu;
-    d_ |= (e & 0x3ff) << 10;
-  }
-  inline void moveto(int m) {
-    bool neg = (m < 0);
-    if (neg) { m *= -1; m += 1024; }
-    d_ &= 0xfffff;
-    d_ |= (m & 0x7ff) << 20;
-  }
-};
-
-class TERScorerImpl {
-
- public:
-  enum TransType { MATCH, SUBSTITUTION, INSERTION, DELETION };
-
-  explicit TERScorerImpl(const vector<WordID>& ref) : ref_(ref) {
-    for (int i = 0; i < ref.size(); ++i)
-      rwexists_.insert(ref[i]);
-  }
-
-  float Calculate(const vector<WordID>& hyp, int* subs, int* ins, int* dels, int* shifts) const {
-    return CalculateAllShifts(hyp, subs, ins, dels, shifts);
-  }
-
-  inline int GetRefLength() const {
-    return ref_.size();
-  }
-
- private:
-  vector<WordID> ref_;
-  set<WordID> rwexists_;
-
-  typedef unordered_map<vector<WordID>, set<int>, boost::hash<vector<WordID> > > NgramToIntsMap;
-  mutable NgramToIntsMap nmap_;
-
-  static float MinimumEditDistance(
-      const vector<WordID>& hyp,
-      const vector<WordID>& ref,
-      vector<TransType>* path) {
-    vector<vector<TransType> > bmat(hyp.size() + 1, vector<TransType>(ref.size() + 1, MATCH));
-    vector<vector<float> > cmat(hyp.size() + 1, vector<float>(ref.size() + 1, 0));
-    for (int i = 0; i <= hyp.size(); ++i)
-      cmat[i][0] = i;
-    for (int j = 0; j <= ref.size(); ++j)
-      cmat[0][j] = j;
-    for (int i = 1; i <= hyp.size(); ++i) {
-      const WordID& hw = hyp[i-1];
-      for (int j = 1; j <= ref.size(); ++j) {
-        const WordID& rw = ref[j-1];
-	float& cur_c = cmat[i][j];
-	TransType& cur_b = bmat[i][j];
-
-        if (rw == hw) {
-          cur_c = cmat[i-1][j-1];
-          cur_b = MATCH;
-        } else {
-          cur_c = cmat[i-1][j-1] + COSTS::substitution;
-          cur_b = SUBSTITUTION;
-        }
-	float cwoi = cmat[i-1][j];
-        if (cur_c > cwoi + COSTS::insertion) {
-          cur_c = cwoi + COSTS::insertion;
-          cur_b = INSERTION;
-        }
-        float cwod = cmat[i][j-1];
-        if (cur_c > cwod + COSTS::deletion) {
-          cur_c = cwod + COSTS::deletion;
-          cur_b = DELETION;
-        }
-      }
-    }
-
-    // trace back along the best path and record the transition types
-    path->clear();
-    int i = hyp.size();
-    int j = ref.size();
-    while (i > 0 || j > 0) {
-      if (j == 0) {
-        --i;
-        path->push_back(INSERTION);
-      } else if (i == 0) {
-        --j;
-        path->push_back(DELETION);
-      } else {
-        TransType t = bmat[i][j];
-        path->push_back(t);
-        switch (t) {
-          case SUBSTITUTION:
-          case MATCH:
-            --i; --j; break;
-          case INSERTION:
-            --i; break;
-          case DELETION:
-            --j; break;
-        }
-      }
-    }
-    reverse(path->begin(), path->end());
-    return cmat[hyp.size()][ref.size()];
-  }
-
-  void BuildWordMatches(const vector<WordID>& hyp, NgramToIntsMap* nmap) const {
-    nmap->clear();
-    set<WordID> exists_both;
-    for (int i = 0; i < hyp.size(); ++i)
-      if (rwexists_.find(hyp[i]) != rwexists_.end())
-        exists_both.insert(hyp[i]);
-    for (int start=0; start<ref_.size(); ++start) {
-      if (exists_both.find(ref_[start]) == exists_both.end()) continue;
-      vector<WordID> cp;
-      int mlen = min(MAX_SHIFT_SIZE, static_cast<int>(ref_.size() - start));
-      for (int len=0; len<mlen; ++len) {
-        if (len && exists_both.find(ref_[start + len]) == exists_both.end()) break;
-        cp.push_back(ref_[start + len]);
-	(*nmap)[cp].insert(start);
-      }
-    }
-  }
-
-  static void PerformShift(const vector<WordID>& in,
-    int start, int end, int moveto, vector<WordID>* out) {
-    // cerr << "ps: " << start << " " << end << " " << moveto << endl;
-    out->clear();
-    if (moveto == -1) {
-      for (int i = start; i <= end; ++i)
-       out->push_back(in[i]);
-      for (int i = 0; i < start; ++i)
-       out->push_back(in[i]);
-      for (int i = end+1; i < in.size(); ++i)
-       out->push_back(in[i]);
-    } else if (moveto < start) {
-      for (int i = 0; i <= moveto; ++i)
-       out->push_back(in[i]);
-      for (int i = start; i <= end; ++i)
-       out->push_back(in[i]);
-      for (int i = moveto+1; i < start; ++i)
-       out->push_back(in[i]);
-      for (int i = end+1; i < in.size(); ++i)
-       out->push_back(in[i]);
-    } else if (moveto > end) {
-      for (int i = 0; i < start; ++i)
-       out->push_back(in[i]);
-      for (int i = end+1; i <= moveto; ++i)
-       out->push_back(in[i]);
-      for (int i = start; i <= end; ++i)
-       out->push_back(in[i]);
-      for (int i = moveto+1; i < in.size(); ++i)
-       out->push_back(in[i]);
-    } else {
-      for (int i = 0; i < start; ++i)
-       out->push_back(in[i]);
-      for (int i = end+1; (i < in.size()) && (i <= end + (moveto - start)); ++i)
-       out->push_back(in[i]);
-      for (int i = start; i <= end; ++i)
-       out->push_back(in[i]);
-      for (int i = (end + (moveto - start))+1; i < in.size(); ++i)
-       out->push_back(in[i]);
-    }
-    if (out->size() != in.size()) {
-      cerr << "ps: " << start << " " << end << " " << moveto << endl;
-      cerr << "in=" << TD::GetString(in) << endl;
-      cerr << "out=" << TD::GetString(*out) << endl;
-    }
-    assert(out->size() == in.size());
-    // cerr << "ps: " << TD::GetString(*out) << endl;
-  }
-
-  void GetAllPossibleShifts(const vector<WordID>& hyp,
-      const vector<int>& ralign,
-      const vector<bool>& herr,
-      const vector<bool>& rerr,
-      const int min_size,
-      vector<vector<Shift> >* shifts) const {
-    for (int start = 0; start < hyp.size(); ++start) {
-      vector<WordID> cp(1, hyp[start]);
-      NgramToIntsMap::iterator niter = nmap_.find(cp);
-      if (niter == nmap_.end()) continue;
-      bool ok = false;
-      int moveto;
-      for (set<int>::iterator i = niter->second.begin(); i != niter->second.end(); ++i) {
-        moveto = *i;
-        int rm = ralign[moveto];
-        ok = (start != rm &&
-              (rm - start) < MAX_SHIFT_DIST &&
-              (start - rm - 1) < MAX_SHIFT_DIST);
-        if (ok) break;
-      }
-      if (!ok) continue;
-      cp.clear();
-      for (int end = start + min_size - 1;
-           ok && end < hyp.size() && end < (start + MAX_SHIFT_SIZE); ++end) {
-        cp.push_back(hyp[end]);
-	vector<Shift>& sshifts = (*shifts)[end - start];
-        ok = false;
-        NgramToIntsMap::iterator niter = nmap_.find(cp);
-        if (niter == nmap_.end()) break;
-        bool any_herr = false;
-        for (int i = start; i <= end && !any_herr; ++i)
-          any_herr = herr[i];
-        if (!any_herr) {
-          ok = true;
-          continue;
-        }
-        for (set<int>::iterator mi = niter->second.begin();
-             mi != niter->second.end(); ++mi) {
-          int moveto = *mi;
-	  int rm = ralign[moveto];
-	  if (! ((rm != start) &&
-	        ((rm < start) || (rm > end)) &&
-		(rm - start <= MAX_SHIFT_DIST) &&
-		((start - rm - 1) <= MAX_SHIFT_DIST))) continue;
-          ok = true;
-	  bool any_rerr = false;
-	  for (int i = 0; (i <= end - start) && (!any_rerr); ++i)
-            any_rerr = rerr[moveto+i];
-	  if (!any_rerr) continue;
-	  for (int roff = 0; roff <= (end - start); ++roff) {
-	    int rmr = ralign[moveto+roff];
-	    if ((start != rmr) && ((roff == 0) || (rmr != ralign[moveto])))
-	      sshifts.push_back(Shift(start, end, moveto + roff));
-	  }
-        }
-      }
-    }
-  }
-
-  bool CalculateBestShift(const vector<WordID>& cur,
-                          const vector<WordID>& hyp,
-                          float curerr,
-                          const vector<TransType>& path,
-                          vector<WordID>* new_hyp,
-                          float* newerr,
-                          vector<TransType>* new_path) const {
-    vector<bool> herr, rerr;
-    vector<int> ralign;
-    int hpos = -1;
-    for (int i = 0; i < path.size(); ++i) {
-      switch (path[i]) {
-        case MATCH:
-	  ++hpos;
-	  herr.push_back(false);
-	  rerr.push_back(false);
-	  ralign.push_back(hpos);
-          break;
-        case SUBSTITUTION:
-	  ++hpos;
-	  herr.push_back(true);
-	  rerr.push_back(true);
-	  ralign.push_back(hpos);
-          break;
-        case INSERTION:
-	  ++hpos;
-	  herr.push_back(true);
-          break;
-	case DELETION:
-	  rerr.push_back(true);
-	  ralign.push_back(hpos);
-          break;
-      }
-    }
-#if 0
-    cerr << "RALIGN: ";
-    for (int i = 0; i < rerr.size(); ++i)
-      cerr << ralign[i] << " ";
-    cerr << endl;
-    cerr << "RERR: ";
-    for (int i = 0; i < rerr.size(); ++i)
-      cerr << (bool)rerr[i] << " ";
-    cerr << endl;
-    cerr << "HERR: ";
-    for (int i = 0; i < herr.size(); ++i)
-      cerr << (bool)herr[i] << " ";
-    cerr << endl;
-#endif
-
-    vector<vector<Shift> > shifts(MAX_SHIFT_SIZE + 1);
-    GetAllPossibleShifts(cur, ralign, herr, rerr, 1, &shifts);
-    float cur_best_shift_cost = 0;
-    *newerr = curerr;
-    vector<TransType> cur_best_path;
-    vector<WordID> cur_best_hyp;
-
-    bool res = false;
-    for (int i = shifts.size() - 1; i >=0; --i) {
-      float curfix = curerr - (cur_best_shift_cost + *newerr);
-      float maxfix = 2.0f * (1 + i) - COSTS::shift;
-      if ((curfix > maxfix) || ((cur_best_shift_cost == 0) && (curfix == maxfix))) break;
-      for (int j = 0; j < shifts[i].size(); ++j) {
-        const Shift& s = shifts[i][j];
-	curfix = curerr - (cur_best_shift_cost + *newerr);
-	maxfix = 2.0f * (1 + i) - COSTS::shift;  // TODO remove?
-        if ((curfix > maxfix) || ((cur_best_shift_cost == 0) && (curfix == maxfix))) continue;
-	vector<WordID> shifted(cur.size());
-	PerformShift(cur, s.begin(), s.end(), ralign[s.moveto()], &shifted);
-	vector<TransType> try_path;
-	float try_cost = MinimumEditDistance(shifted, ref_, &try_path);
-	float gain = (*newerr + cur_best_shift_cost) - (try_cost + COSTS::shift);
-	if (gain > 0.0f || ((cur_best_shift_cost == 0.0f) && (gain == 0.0f))) {
-	  *newerr = try_cost;
-	  cur_best_shift_cost = COSTS::shift;
-	  new_path->swap(try_path);
-	  new_hyp->swap(shifted);
-	  res = true;
-	  // cerr << "Found better shift " << s.begin() << "..." << s.end() << " moveto " << s.moveto() << endl;
-	}
-      }
-    }
-
-    return res;
-  }
-
-  static void GetPathStats(const vector<TransType>& path, int* subs, int* ins, int* dels) {
-    *subs = *ins = *dels = 0;
-    for (int i = 0; i < path.size(); ++i) {
-      switch (path[i]) {
-        case SUBSTITUTION:
-	  ++(*subs);
-        case MATCH:
-          break;
-        case INSERTION:
-          ++(*ins); break;
-	case DELETION:
-          ++(*dels); break;
-      }
-    }
-  }
-
-  float CalculateAllShifts(const vector<WordID>& hyp,
-      int* subs, int* ins, int* dels, int* shifts) const {
-    BuildWordMatches(hyp, &nmap_);
-    vector<TransType> path;
-    float med_cost = MinimumEditDistance(hyp, ref_, &path);
-    float edits = 0;
-    vector<WordID> cur = hyp;
-    *shifts = 0;
-    if (ter_short_circuit_long_sentences < 0 ||
-        ref_.size() < ter_short_circuit_long_sentences) {
-      while (true) {
-        vector<WordID> new_hyp;
-        vector<TransType> new_path;
-        float new_med_cost;
-        if (!CalculateBestShift(cur, hyp, med_cost, path, &new_hyp, &new_med_cost, &new_path))
-          break;
-        edits += COSTS::shift;
-        ++(*shifts);
-        med_cost = new_med_cost;
-        path.swap(new_path);
-        cur.swap(new_hyp);
-      }
-    }
-    GetPathStats(path, subs, ins, dels);
-    return med_cost + edits;
-  }
-};
-
-class TERScore : public ScoreBase<TERScore> {
-  friend class TERScorer;
-
- public:
-  static const unsigned kINSERTIONS = 0;
-  static const unsigned kDELETIONS = 1;
-  static const unsigned kSUBSTITUTIONS = 2;
-  static const unsigned kSHIFTS = 3;
-  static const unsigned kREF_WORDCOUNT = 4;
-  static const unsigned kDUMMY_LAST_ENTRY = 5;
-
- TERScore() : stats(0,kDUMMY_LAST_ENTRY) {}
-  float ComputePartialScore() const { return 0.0;}
-  float ComputeScore() const {
-    float edits = static_cast<float>(stats[kINSERTIONS] + stats[kDELETIONS] + stats[kSUBSTITUTIONS] + stats[kSHIFTS]);
-    return edits / static_cast<float>(stats[kREF_WORDCOUNT]);
-  }
-  void ScoreDetails(string* details) const;
-  void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){}
-  void PlusEquals(const Score& delta, const float scale) {
-    if (scale==1)
-      stats += static_cast<const TERScore&>(delta).stats;
-    if (scale==-1)
-      stats -= static_cast<const TERScore&>(delta).stats;
-    throw std::runtime_error("TERScore::PlusEquals with scale != +-1");
- }
-  void PlusEquals(const Score& delta) {
-    stats += static_cast<const TERScore&>(delta).stats;
-  }
-
-  ScoreP GetZero() const {
-    return ScoreP(new TERScore);
-  }
-  ScoreP GetOne() const {
-    return ScoreP(new TERScore);
-  }
-  void Subtract(const Score& rhs, Score* res) const {
-    static_cast<TERScore*>(res)->stats = stats - static_cast<const TERScore&>(rhs).stats;
-  }
-  void Encode(std::string* out) const {
-    ostringstream os;
-    os << stats[kINSERTIONS] << ' '
-       << stats[kDELETIONS] << ' '
-       << stats[kSUBSTITUTIONS] << ' '
-       << stats[kSHIFTS] << ' '
-       << stats[kREF_WORDCOUNT];
-    *out = os.str();
-  }
-  bool IsAdditiveIdentity() const {
-    for (int i = 0; i < kDUMMY_LAST_ENTRY; ++i)
-      if (stats[i] != 0) return false;
-    return true;
-  }
- private:
-  valarray<int> stats;
-};
-
-ScoreP TERScorer::ScoreFromString(const std::string& data) {
-  istringstream is(data);
-  TERScore* r = new TERScore;
-  is >> r->stats[TERScore::kINSERTIONS]
-     >> r->stats[TERScore::kDELETIONS]
-     >> r->stats[TERScore::kSUBSTITUTIONS]
-     >> r->stats[TERScore::kSHIFTS]
-     >> r->stats[TERScore::kREF_WORDCOUNT];
-  return ScoreP(r);
-}
-
-void TERScore::ScoreDetails(std::string* details) const {
-  char buf[200];
-  sprintf(buf, "TER = %.2f, %3d|%3d|%3d|%3d (len=%d)",
-     ComputeScore() * 100.0f,
-     stats[kINSERTIONS],
-     stats[kDELETIONS],
-     stats[kSUBSTITUTIONS],
-     stats[kSHIFTS],
-     stats[kREF_WORDCOUNT]);
-  *details = buf;
-}
-
-TERScorer::~TERScorer() {
-  for (vector<TERScorerImpl*>::iterator i = impl_.begin(); i != impl_.end(); ++i)
-    delete *i;
-}
-
-TERScorer::TERScorer(const vector<vector<WordID> >& refs) : impl_(refs.size()) {
-  for (int i = 0; i < refs.size(); ++i)
-    impl_[i] = new TERScorerImpl(refs[i]);
-}
-
-ScoreP TERScorer::ScoreCCandidate(const vector<WordID>& hyp) const {
-  return ScoreP();
-}
-
-ScoreP TERScorer::ScoreCandidate(const std::vector<WordID>& hyp) const {
-  float best_score = numeric_limits<float>::max();
-  TERScore* res = new TERScore;
-  int avg_len = 0;
-  for (int i = 0; i < impl_.size(); ++i)
-    avg_len += impl_[i]->GetRefLength();
-  avg_len /= impl_.size();
-  for (int i = 0; i < impl_.size(); ++i) {
-    int subs, ins, dels, shifts;
-    float score = impl_[i]->Calculate(hyp, &subs, &ins, &dels, &shifts);
-    // cerr << "Component TER cost: " << score << endl;
-    if (score < best_score) {
-      res->stats[TERScore::kINSERTIONS] = ins;
-      res->stats[TERScore::kDELETIONS] = dels;
-      res->stats[TERScore::kSUBSTITUTIONS] = subs;
-      res->stats[TERScore::kSHIFTS] = shifts;
-      if (ter_use_average_ref_len) {
-        res->stats[TERScore::kREF_WORDCOUNT] = avg_len;
-      } else {
-        res->stats[TERScore::kREF_WORDCOUNT] = impl_[i]->GetRefLength();
-      }
-
-      best_score = score;
-    }
-  }
-  return ScoreP(res);
-}
diff --git a/vest/ter.h b/vest/ter.h
deleted file mode 100644
index 43314791..00000000
--- a/vest/ter.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef _TER_H_
-#define _TER_H_
-
-#include "scorer.h"
-
-class TERScorerImpl;
-
-class TERScorer : public SentenceScorer {
- public:
-  TERScorer(const std::vector<std::vector<WordID> >& references);
-  ~TERScorer();
-  ScoreP ScoreCandidate(const std::vector<WordID>& hyp) const;
-  ScoreP ScoreCCandidate(const std::vector<WordID>& hyp) const;
-  static ScoreP ScoreFromString(const std::string& data);
- private:
-  std::vector<TERScorerImpl*> impl_;
-};
-
-#endif
-- 
cgit v1.2.3