89 files changed, 772 insertions, 333 deletions
diff --git a/Makefile.am b/Makefile.am
index e82e2352..98c2561e 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,4 +1,4 @@
-SUBDIRS = decoder training vest extools gi/pyp-topics/src gi/clda/src gi/posterior-regularisation/prjava
+SUBDIRS = utils mteval decoder training vest extools gi/pyp-topics/src gi/clda/src gi/posterior-regularisation/prjava
 AUTOMAKE_OPTIONS = foreign
 
 ACLOCAL_AMFLAGS = -I m4
diff --git a/configure.ac b/configure.ac
index e627c1cc..302eebed 100644
--- a/configure.ac
+++ b/configure.ac
@@ -76,4 +76,4 @@ then
   AM_CONDITIONAL([RAND_LM], true)
 fi
 
-AC_OUTPUT(Makefile extools/Makefile decoder/Makefile training/Makefile vest/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile)
+AC_OUTPUT(Makefile utils/Makefile mteval/Makefile extools/Makefile decoder/Makefile training/Makefile vest/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile)
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index 68a7d765..f514b340 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -2,24 +2,16 @@ bin_PROGRAMS = cdec
 
 if HAVE_GTEST
 noinst_PROGRAMS = \
-  dict_test \
-  weights_test \
   trule_test \
   hg_test \
   ff_test \
-  logval_test \
   parser_test \
-  grammar_test \
-  small_vector_test
+  grammar_test
 endif
 
-cdec_SOURCES = cdec.cc forest_writer.cc maxtrans_blunsom.cc cdec_ff.cc timing_stats.cc
-small_vector_test_SOURCES = small_vector_test.cc
-small_vector_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
+cdec_SOURCES = cdec.cc forest_writer.cc maxtrans_blunsom.cc cdec_ff.cc
 parser_test_SOURCES = parser_test.cc
 parser_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
-dict_test_SOURCES = dict_test.cc
-dict_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
 ff_test_SOURCES = ff_test.cc
 ff_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
 grammar_test_SOURCES = grammar_test.cc
@@ -28,15 +20,12 @@ hg_test_SOURCES = hg_test.cc
 hg_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
 trule_test_SOURCES = trule_test.cc
 trule_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
-weights_test_SOURCES = weights_test.cc
-weights_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
-logval_test_SOURCES = logval_test.cc
-logval_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS)
 
-LDADD = libcdec.a
+LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a
 
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I..
-AM_LDFLAGS = -lz
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils
+
+AM_LDFLAGS = ../utils/libutils.a -lz
 
 rule_lexer.cc: rule_lexer.l
 	$(LEX) -s -CF -8 -o$@ $<
@@ -49,7 +38,6 @@ libcdec_a_SOURCES = \
   rule_lexer.cc \
   fst_translator.cc \
   csplit.cc \
-  dict.cc \
   translator.cc \
   scfg_translator.cc \
   hg.cc \
@@ -58,17 +46,10 @@ libcdec_a_SOURCES = \
   viterbi.cc \
   lattice.cc \
   aligner.cc \
-  gzstream.cc \
   apply_models.cc \
   earley_composer.cc \
   phrasetable_fst.cc \
-  sparse_vector.cc \
   trule.cc \
-  filelib.cc \
-  stringlib.cc \
-  fdict.cc \
-  tdict.cc \
-  weights.cc \
   ttables.cc \
   ff.cc \
   ff_lm.cc \
@@ -78,12 +59,6 @@ libcdec_a_SOURCES = \
   ff_tagger.cc \
   ff_bleu.cc \
   ff_factory.cc \
-  ../vest/scorer.cc \
-  ../vest/ter.cc \
-  ../vest/aer_scorer.cc \
-  ../vest/comb_scorer.cc \
-  ../vest/error_surface.cc \
-  ../vest/viterbi_envelope.cc \
   freqdict.cc \
   lexalign.cc \
   lextrans.cc \
diff --git a/decoder/aligner.cc b/decoder/aligner.cc
index b089f52e..92431be4 100644
--- a/decoder/aligner.cc
+++ b/decoder/aligner.cc
@@ -5,81 +5,11 @@
 #include "sentence_metadata.h"
 #include "inside_outside.h"
 #include "viterbi.h"
+#include "alignment_pharaoh.h"
 #include <set>
 
 using namespace std;
 
-static bool is_digit(char x) { return x >= '0' && x <= '9'; }
-
-boost::shared_ptr<Array2D<bool> > AlignerTools::ReadPharaohAlignmentGrid(const string& al) {
-  int max_x = 0;
-  int max_y = 0;
-  int i = 0;
-  size_t pos = al.rfind(" ||| ");
-  if (pos != string::npos) { i = pos + 5; }
-  while (i < al.size()) {
-    if (al[i] == '\n' || al[i] == '\r') break;
-    int x = 0;
-    while(i < al.size() && is_digit(al[i])) {
-      x *= 10;
-      x += al[i] - '0';
-      ++i;
-    }
-    if (x > max_x) max_x = x;
-    assert(i < al.size());
-    if(al[i] != '-') {
-      cerr << "BAD ALIGNMENT: " << al << endl;
-      abort();
-    }
-    ++i;
-    int y = 0;
-    while(i < al.size() && is_digit(al[i])) {
-      y *= 10;
-      y += al[i] - '0';
-      ++i;
-    }
-    if (y > max_y) max_y = y;
-    while(i < al.size() && al[i] == ' ') { ++i; }
-  }
-
-  boost::shared_ptr<Array2D<bool> > grid(new Array2D<bool>(max_x + 1, max_y + 1));
-  i = 0;
-  if (pos != string::npos) { i = pos + 5; }
-  while (i < al.size()) {
-    if (al[i] == '\n' || al[i] == '\r') break;
-    int x = 0;
-    while(i < al.size() && is_digit(al[i])) {
-      x *= 10;
-      x += al[i] - '0';
-      ++i;
-    }
-    assert(i < al.size());
-    assert(al[i] == '-');
-    ++i;
-    int y = 0;
-    while(i < al.size() && is_digit(al[i])) {
-      y *= 10;
-      y += al[i] - '0';
-      ++i;
-    }
-    (*grid)(x, y) = true;
-    while(i < al.size() && al[i] == ' ') { ++i; }
-  }
-  // cerr << *grid << endl;
-  return grid;
-}
-
-void AlignerTools::SerializePharaohFormat(const Array2D<bool>& alignment, ostream* out) {
-  bool need_space = false;
-  for (int i = 0; i < alignment.width(); ++i)
-    for (int j = 0; j < alignment.height(); ++j)
-      if (alignment(i,j)) {
-        if (need_space) (*out) << ' '; else need_space = true;
-        (*out) << i << '-' << j;
-      }
-  (*out) << endl;
-}
-
 // used with lexical models since they may not fully generate the
 // source string
 void SourceEdgeCoveragesUsingParseIndices(const Hypergraph& g,
@@ -317,6 +247,6 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice,
     cerr << grid << endl;
   }
   (*out) << TD::GetString(src_sent) << " ||| " << TD::GetString(trg_sent) << " ||| ";
-  SerializePharaohFormat(grid, out);
+  AlignmentPharaoh::SerializePharaohFormat(grid, out);
 };
 
diff --git a/decoder/aligner.h b/decoder/aligner.h
index cd159119..a088ba6c 100644
--- a/decoder/aligner.h
+++ b/decoder/aligner.h
@@ -10,8 +10,6 @@ class Hypergraph;
 class SentenceMetadata;
 
 struct AlignerTools {
-  static boost::shared_ptr<Array2D<bool> > ReadPharaohAlignmentGrid(const std::string& al);
-  static void SerializePharaohFormat(const Array2D<bool>& alignment, std::ostream* out);
 
   // assumption: g contains derivations of input/ref and
   // ONLY input/ref.
diff --git a/decoder/cdec.cc b/decoder/cdec.cc
index 8c4a25e0..3633febd 100644
--- a/decoder/cdec.cc
+++ b/decoder/cdec.cc
@@ -34,7 +34,7 @@
 #include "inside_outside.h"
 #include "exp_semiring.h"
 #include "sentence_metadata.h"
-#include "../vest/scorer.h"
+#include "scorer.h"
 #include "apply_fsa_models.h"
 #include "program_options.h"
 #include "cfg_options.h"
@@ -59,6 +59,15 @@ void ShowBanner() {
   cerr << "cdec v1.0 (c) 2009-2010 by Chris Dyer\n";
 }
 
+void ParseTranslatorInputLattice(const string& line, string* input, Lattice* ref) {
+  string sref;
+  ParseTranslatorInput(line, input, &sref);
+  if (sref.size() > 0) {
+    assert(ref);
+    LatticeTools::ConvertTextOrPLF(sref, ref);
+  }
+}
+
 void ConvertSV(const SparseVector<prob_t>& src, SparseVector<double>* trg) {
   for (SparseVector<prob_t>::const_iterator it = src.begin(); it != src.end(); ++it)
     trg->set_value(it->first, it->second);
diff --git a/decoder/ff_bleu.cc b/decoder/ff_bleu.cc
index 77989331..aa4e6d85 100644
--- a/decoder/ff_bleu.cc
+++ b/decoder/ff_bleu.cc
@@ -18,7 +18,7 @@ char const* bleu_usage_verbose="Uses feature id 0!  Make sure there are no other
 #include "hg.h"
 #include "stringlib.h"
 #include "sentence_metadata.h"
-#include "../vest/scorer.h"
+#include "scorer.h"
 
 using namespace std;
 
diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc
index f3e65cb7..a9929253 100644
--- a/decoder/ff_lm.cc
+++ b/decoder/ff_lm.cc
@@ -728,7 +728,7 @@ LanguageModelRandLM::LanguageModelRandLM(const string& param) :
       filename = argv[0];
     }
   }
-  set_order(order);
+//  set_order(order);
   int cache_MB = 200; // increase cache size
   randlm::RandLM* rlm = randlm::RandLM::initRandLM(filename, order, cache_MB);
   assert(rlm != NULL);
diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc
index 0ba2bf92..087bff0c 100644
--- a/decoder/ff_wordalign.cc
+++ b/decoder/ff_wordalign.cc
@@ -5,6 +5,7 @@
 #include <string>
 #include <cmath>
 
+#include "alignment_pharaoh.h"
 #include "stringlib.h"
 #include "sentence_metadata.h"
 #include "hg.h"
@@ -354,7 +355,7 @@ AlignerResults::AlignerResults(const std::string& param) :
     getline(in, line);
     if (!in) break;
     ++lc;
-    is_aligned_.push_back(AlignerTools::ReadPharaohAlignmentGrid(line));
+    is_aligned_.push_back(AlignmentPharaoh::ReadPharaohAlignmentGrid(line));
   }
   cerr << "  Loaded " << lc << " refs\n";
 }
diff --git a/decoder/hg.h b/decoder/hg.h
index d5c8e197..e9510997 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -102,6 +102,8 @@ public:
     void copy_info(Edge const& o) {
 #if USE_INFO_EDGE
       set_info(o.info_.str()); // by convention, each person putting info here starts with a separator (e.g. space).  it's empty if nobody put any info there.
+#else
+      (void) o;
 #endif
     }
     void copy_pod(Edge const& o) {
@@ -142,7 +144,7 @@ public:
 #else
     std::string info() const { return std::string(); }
     void reset_info() {  }
-    void set_info(std::string const& s) {  }
+    void set_info(std::string const& ) {  }
 #endif
     void show(std::ostream &o,unsigned mask=SPAN|RULE) const {
       o<<'{';
diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc
index 52a8565a..1af8261e 100644
--- a/decoder/hg_io.cc
+++ b/decoder/hg_io.cc
@@ -622,56 +622,3 @@ void HypergraphIO::WriteAsCFG(const Hypergraph& hg) {
   }
 }
 
-namespace B64 {
-
-static const char cb64[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-static const char cd64[]="|$$$}rstuvwxyz{$$$$$$$>?@ABCDEFGHIJKLMNOPQRSTUVW$$$$$$XYZ[\\]^_`abcdefghijklmnopq";
-
-static void encodeblock(const unsigned char* in, ostream* os, int len) {
-  char out[4];
-  out[0] = cb64[ in[0] >> 2 ];
-  out[1] = cb64[ ((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4) ];
-  out[2] = (len > 1 ? cb64[ ((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6) ] : '=');
-  out[3] = (len > 2 ? cb64[ in[2] & 0x3f ] : '=');
-  os->write(out, 4);
-}
-
-void b64encode(const char* data, const size_t size, ostream* out) {
-  size_t cur = 0;
-  while(cur < size) {
-    int len = min(static_cast<size_t>(3), size - cur);
-    encodeblock(reinterpret_cast<const unsigned char*>(&data[cur]), out, len);
-    cur += len;
-  }
-}
-
-static void decodeblock(const unsigned char* in, unsigned char* out) {
-  out[0] = (unsigned char ) (in[0] << 2 | in[1] >> 4);
-  out[1] = (unsigned char ) (in[1] << 4 | in[2] >> 2);
-  out[2] = (unsigned char ) (((in[2] << 6) & 0xc0) | in[3]);
-}
-
-bool b64decode(const unsigned char* data, const size_t insize, char* out, const size_t outsize) {
-  size_t cur = 0;
-  size_t ocur = 0;
-  unsigned char in[4];
-  while(cur < insize) {
-    assert(ocur < outsize);
-    for (int i = 0; i < 4; ++i) {
-      unsigned char v = data[cur];
-      v = (unsigned char) ((v < 43 || v > 122) ? '\0' : cd64[ v - 43 ]);
-      if (!v) {
-        cerr << "B64 decode error at offset " << cur << " offending character: " << (int)data[cur] << endl;
-        return false;
-      }
-      v = (unsigned char) ((v == '$') ? '\0' : v - 61);
-      if (v) in[i] = v - 1; else in[i] = 0;
-      ++cur;
-    }
-    decodeblock(in, reinterpret_cast<unsigned char*>(&out[ocur]));
-    ocur += 3;
-  }
-  return true;
-}
-}
-
diff --git a/decoder/hg_io.h b/decoder/hg_io.h
index b6a176ab..082489d8 100644
--- a/decoder/hg_io.h
+++ b/decoder/hg_io.h
@@ -31,9 +31,4 @@ struct HypergraphIO {
   static std::string Escape(const std::string& s);  // PLF helper
 };
 
-namespace B64 {
-  bool b64decode(const unsigned char* data, const size_t insize, char* out, const size_t outsize);
-  void b64encode(const char* data, const size_t size, std::ostream* out);
-}
-
 #endif
diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h
index 81a584a7..145c84d1 100755
--- a/decoder/oracle_bleu.h
+++ b/decoder/oracle_bleu.h
@@ -9,7 +9,7 @@
 #include <vector>
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
-#include "../vest/scorer.h"
+#include "scorer.h"
 #include "hg.h"
 #include "ff_factory.h"
 #include "ff_bleu.h"
diff --git a/decoder/phrasebased_translator.cc b/decoder/phrasebased_translator.cc
index 726b3f9a..d65e44d1 100644
--- a/decoder/phrasebased_translator.cc
+++ b/decoder/phrasebased_translator.cc
@@ -68,7 +68,6 @@ struct PhraseBasedTranslatorImpl {
   PhraseBasedTranslatorImpl(const boost::program_options::variables_map& conf) :
       add_pass_through_rules(conf.count("add_pass_through_rules")),
       max_distortion(conf["pb_max_distortion"].as<int>()),
-      kSOURCE_RULE(new TRule("[X] ||| [X,1] ||| [X,1]", true)),
       kCONCAT_RULE(new TRule("[X] ||| [X,1] [X,2] ||| [X,1] [X,2]", true)),
       kNT_TYPE(TD::Convert("X") * -1) {
     assert(max_distortion >= 0);
@@ -141,6 +140,8 @@ struct PhraseBasedTranslatorImpl {
         for (int i = 0; i < phrases.size(); ++i) {
           Hypergraph::Edge* edge = minus_lm_forest->AddEdge(phrases[i], Hypergraph::TailNodeVector());
           edge->feature_values_ = edge->rule_->scores_;
+          edge->i_ = s.i;
+          edge->j_ = s.j;
           minus_lm_forest->ConnectEdgeToHeadNode(edge->id_, phrase_head_index);
         }
         CoverageNodeMap::iterator cit = c.find(s.coverage);
@@ -189,7 +190,6 @@ struct PhraseBasedTranslatorImpl {
 
   const bool add_pass_through_rules;
   const int max_distortion;
-  TRulePtr kSOURCE_RULE;
   const TRulePtr kCONCAT_RULE;
   const WordID kNT_TYPE;
   boost::shared_ptr<FSTNode> fst;
diff --git a/decoder/sentence_metadata.h b/decoder/sentence_metadata.h
index 21be9b21..593019c8 100644
--- a/decoder/sentence_metadata.h
+++ b/decoder/sentence_metadata.h
@@ -3,7 +3,7 @@
 
 #include <cassert>
 #include "lattice.h"
-#include "../vest/scorer.h"
+#include "scorer.h"
 
 struct SentenceMetadata {
   SentenceMetadata(int id, const Lattice& ref) :
diff --git a/extools/Makefile.am b/extools/Makefile.am
index 1e82287d..ee363264 100644
--- a/extools/Makefile.am
+++ b/extools/Makefile.am
@@ -11,20 +11,20 @@ sg_lexer.cc: sg_lexer.l
 	$(LEX) -s -CF -8 -o$@ $<
 
 filter_grammar_SOURCES = filter_grammar.cc extract.cc sentence_pair.cc striped_grammar.cc sg_lexer.cc
-filter_grammar_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+filter_grammar_LDADD = $(top_srcdir)/utils/libutils.a -lz
 #filter_grammar_LDFLAGS = -all-static
 
 featurize_grammar_SOURCES = featurize_grammar.cc extract.cc sentence_pair.cc sg_lexer.cc striped_grammar.cc
-featurize_grammar_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+featurize_grammar_LDADD = $(top_srcdir)/utils/libutils.a -lz
 
 mr_stripe_rule_reduce_SOURCES = mr_stripe_rule_reduce.cc extract.cc sentence_pair.cc striped_grammar.cc sg_lexer.cc
-mr_stripe_rule_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+mr_stripe_rule_reduce_LDADD = $(top_srcdir)/utils/libutils.a -lz
 
 extractor_SOURCES = sentence_pair.cc extract.cc extractor.cc striped_grammar.cc
-extractor_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+extractor_LDADD = $(top_srcdir)/utils/libutils.a -lz
 
 extractor_monolingual_SOURCES = extractor_monolingual.cc
-extractor_monolingual_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+extractor_monolingual_LDADD = $(top_srcdir)/utils/libutils.a -lz
 
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils
 
diff --git a/extools/sg_lexer.l b/extools/sg_lexer.l
index 168b835a..d60bd0fc 100644
--- a/extools/sg_lexer.l
+++ b/extools/sg_lexer.l
@@ -1,6 +1,4 @@
 %{
-#include "rule_lexer.h"
-
 #include <string>
 #include <iostream>
 #include <sstream>
@@ -8,7 +6,6 @@
 #include <cassert>
 #include "tdict.h"
 #include "fdict.h"
-#include "trule.h"
 #include "striped_grammar.h"
 
 int lex_line = 0;
diff --git a/gi/clda/src/Makefile.am b/gi/clda/src/Makefile.am
index 688746bb..2b1393ac 100644
--- a/gi/clda/src/Makefile.am
+++ b/gi/clda/src/Makefile.am
@@ -2,5 +2,5 @@ bin_PROGRAMS = clda
 
 clda_SOURCES = clda.cc
 
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I$(top_srcdir)/decoder
-AM_LDFLAGS = $(top_srcdir)/decoder/libcdec.a -lz
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I$(top_srcdir)/utils
+AM_LDFLAGS = $(top_srcdir)/utils/libutils.a -lz
diff --git a/gi/pyp-topics/src/Makefile.am b/gi/pyp-topics/src/Makefile.am
index c22819db..d3f95d0b 100644
--- a/gi/pyp-topics/src/Makefile.am
+++ b/gi/pyp-topics/src/Makefile.am
@@ -4,13 +4,13 @@ contexts_lexer.cc: contexts_lexer.l
 	$(LEX) -s -CF -8 -o$@ $<
 
 pyp_topics_train_SOURCES = mt19937ar.c corpus.cc gzstream.cc pyp-topics.cc train.cc contexts_lexer.cc contexts_corpus.cc
-pyp_topics_train_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+pyp_topics_train_LDADD = $(top_srcdir)/utils/libutils.a -lz
 
 pyp_contexts_train_SOURCES = mt19937ar.c corpus.cc gzstream.cc pyp-topics.cc contexts_lexer.cc contexts_corpus.cc train-contexts.cc
-pyp_contexts_train_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+pyp_contexts_train_LDADD = $(top_srcdir)/utils/libutils.a -lz
 
 #mpi_pyp_contexts_train_SOURCES = mt19937ar.c corpus.cc gzstream.cc mpi-pyp-topics.cc contexts_lexer.cc contexts_corpus.cc mpi-train-contexts.cc
-#mpi_pyp_contexts_train_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+#mpi_pyp_contexts_train_LDADD = $(top_srcdir)/utils/libutils.a -lz
 
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I../../../utils
 
diff --git a/gi/pyp-topics/src/contexts_corpus.hh b/gi/pyp-topics/src/contexts_corpus.hh
index dd721361..b2d235cb 100644
--- a/gi/pyp-topics/src/contexts_corpus.hh
+++ b/gi/pyp-topics/src/contexts_corpus.hh
@@ -10,7 +10,7 @@
 
 #include "corpus.hh"
 #include "contexts_lexer.h"
-#include "../../../decoder/dict.h"
+#include "dict.h"
 
 
 class BackoffGenerator {
diff --git a/gi/pyp-topics/src/contexts_lexer.h b/gi/pyp-topics/src/contexts_lexer.h
index 1b79c6fd..66004990 100644
--- a/gi/pyp-topics/src/contexts_lexer.h
+++ b/gi/pyp-topics/src/contexts_lexer.h
@@ -5,7 +5,7 @@
 #include <vector>
 #include <string>
 
-#include "../../../decoder/dict.h" 
+#include "dict.h" 
 
 struct ContextsLexer {
   typedef std::vector<std::string> Context;
diff --git a/gi/pyp-topics/src/contexts_lexer.l b/gi/pyp-topics/src/contexts_lexer.l
index 7a5d9460..64cd7ca3 100644
--- a/gi/pyp-topics/src/contexts_lexer.l
+++ b/gi/pyp-topics/src/contexts_lexer.l
@@ -101,7 +101,7 @@ INT [\-+]?[0-9]+|inf|[\-+]inf
 
 %%
 
-#include "../../../decoder/filelib.h" 
+#include "filelib.h" 
 
 void ContextsLexer::ReadContexts(std::istream* in, ContextsLexer::ContextsCallback func, void* extra) {
   lex_line = 1;
diff --git a/mteval/Makefile.am b/mteval/Makefile.am
new file mode 100644
index 00000000..7ae14045
--- /dev/null
+++ b/mteval/Makefile.am
@@ -0,0 +1,23 @@
+bin_PROGRAMS = \
+  fast_score \
+  mbr_kbest
+
+if HAVE_GTEST
+noinst_PROGRAMS = \
+  scorer_test
+endif
+
+noinst_LIBRARIES = libmteval.a
+
+libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc
+
+fast_score_SOURCES = fast_score.cc
+fast_score_LDADD = $(top_srcdir)/utils/libutils.a libmteval.a -lz
+
+mbr_kbest_SOURCES = mbr_kbest.cc
+mbr_kbest_LDADD = $(top_srcdir)/utils/libutils.a libmteval.a -lz
+
+scorer_test_SOURCES = scorer_test.cc
+scorer_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/utils/libutils.a libmteval.a -lz
+
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils
diff --git a/vest/aer_scorer.cc b/mteval/aer_scorer.cc
index 25b58b5e..edd4390f 100644
--- a/vest/aer_scorer.cc
+++ b/mteval/aer_scorer.cc
@@ -5,7 +5,7 @@
 #include <sstream>
 
 #include "tdict.h"
-#include "aligner.h"
+#include "alignment_pharaoh.h"
 
 using namespace std;
 
@@ -85,7 +85,7 @@ AERScorer::AERScorer(const vector<vector<WordID> >& refs, const string& src) : s
     cerr << "AERScorer can only take a single reference!\n";
     abort();
   }
-  ref_ = AlignerTools::ReadPharaohAlignmentGrid(TD::GetString(refs.front()));
+  ref_ = AlignmentPharaoh::ReadPharaohAlignmentGrid(TD::GetString(refs.front()));
 }
 
 static inline bool Safe(const Array2D<bool>& a, int i, int j) {
@@ -101,7 +101,7 @@ ScoreP AERScorer::ScoreCCandidate(const vector<WordID>& shyp) const {
 
 ScoreP AERScorer::ScoreCandidate(const vector<WordID>& shyp) const {
   boost::shared_ptr<Array2D<bool> > hyp =
-    AlignerTools::ReadPharaohAlignmentGrid(TD::GetString(shyp));
+    AlignmentPharaoh::ReadPharaohAlignmentGrid(TD::GetString(shyp));
 
   int m = 0;
   int r = 0;
diff --git a/vest/aer_scorer.h b/mteval/aer_scorer.h
index 6d53d359..6d53d359 100644
--- a/vest/aer_scorer.h
+++ b/mteval/aer_scorer.h
diff --git a/vest/comb_scorer.cc b/mteval/comb_scorer.cc
index 9fc37868..9fc37868 100644
--- a/vest/comb_scorer.cc
+++ b/mteval/comb_scorer.cc
diff --git a/vest/comb_scorer.h b/mteval/comb_scorer.h
index 346be576..346be576 100644
--- a/vest/comb_scorer.h
+++ b/mteval/comb_scorer.h
diff --git a/vest/fast_score.cc b/mteval/fast_score.cc
index 5ee264a6..5ee264a6 100644
--- a/vest/fast_score.cc
+++ b/mteval/fast_score.cc
diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc
new file mode 100644
index 00000000..2867b36b
--- /dev/null
+++ b/mteval/mbr_kbest.cc
@@ -0,0 +1,138 @@
+#include <iostream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+
+#include "prob.h"
+#include "tdict.h"
+#include "scorer.h"
+#include "filelib.h"
+#include "stringlib.h"
+
+using namespace std;
+
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("scale,a",po::value<double>()->default_value(1.0), "Posterior scaling factor (alpha)")
+        ("loss_function,l",po::value<string>()->default_value("bleu"), "Loss function")
+        ("input,i",po::value<string>()->default_value("-"), "File to read k-best lists from")
+        ("output_list,L", "Show reranked list as output")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = false;
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+struct LossComparer {
+  bool operator()(const pair<vector<WordID>, double>& a, const pair<vector<WordID>, double>& b) const {
+    return a.second < b.second;
+  }
+};
+
+bool ReadKBestList(istream* in, string* sent_id, vector<pair<vector<WordID>, prob_t> >* list) {
+  static string cache_id;
+  static pair<vector<WordID>, prob_t> cache_pair;
+  list->clear();
+  string cur_id;
+  if (cache_pair.first.size() > 0) {
+    list->push_back(cache_pair);
+    cur_id = cache_id;
+    cache_pair.first.clear();
+  }
+  string line;
+  string tstr;
+  while(*in) {
+    getline(*in, line);
+    if (line.empty()) continue;
+    size_t p1 = line.find(" ||| ");
+    if (p1 == string::npos) { cerr << "Bad format: " << line << endl; abort(); }
+    size_t p2 = line.find(" ||| ", p1 + 4);
+    if (p2 == string::npos) { cerr << "Bad format: " << line << endl; abort(); }
+    size_t p3 = line.rfind(" ||| ");
+    cache_id = line.substr(0, p1);
+    tstr = line.substr(p1 + 5, p2 - p1 - 5);
+    double val = strtod(line.substr(p3 + 5).c_str(), NULL);
+    TD::ConvertSentence(tstr, &cache_pair.first);
+    cache_pair.second.logeq(val);
+    if (cur_id.empty()) cur_id = cache_id;
+    if (cur_id == cache_id) {
+      list->push_back(cache_pair);
+      *sent_id = cur_id;
+      cache_pair.first.clear();
+    } else { break; }
+  }
+  return !list->empty();
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  const string metric = conf["loss_function"].as<string>();
+  const bool output_list = conf.count("output_list") > 0;
+  const string file = conf["input"].as<string>();
+  const double mbr_scale = conf["scale"].as<double>();
+  cerr << "Posterior scaling factor (alpha) = " << mbr_scale << endl;
+
+  ScoreType type = ScoreTypeFromString(metric);
+  vector<pair<vector<WordID>, prob_t> > list;
+  ReadFile rf(file);
+  string sent_id;
+  while(ReadKBestList(rf.stream(), &sent_id, &list)) {
+    vector<prob_t> joints(list.size());
+    const prob_t max_score = pow(list.front().second, mbr_scale);
+    prob_t marginal = prob_t::Zero();
+    for (int i = 0 ; i < list.size(); ++i) {
+      const prob_t joint = pow(list[i].second, mbr_scale) / max_score;
+      joints[i] = joint;
+      // cerr << "list[" << i << "] joint=" << log(joint) << endl;
+      marginal += joint;
+    }
+    int mbr_idx = -1;
+    vector<double> mbr_scores(output_list ? list.size() : 0);
+    double mbr_loss = numeric_limits<double>::max();
+    for (int i = 0 ; i < list.size(); ++i) {
+      vector<vector<WordID> > refs(1, list[i].first);
+      //cerr << i << ": " << list[i].second <<"\t" << TD::GetString(list[i].first) << endl;
+      ScorerP scorer = SentenceScorer::CreateSentenceScorer(type, refs);
+      double wl_acc = 0;
+      for (int j = 0; j < list.size(); ++j) {
+        if (i != j) {
+          ScoreP s = scorer->ScoreCandidate(list[j].first);
+          double loss = 1.0 - s->ComputeScore();
+          if (type == TER || type == AER) loss = 1.0 - loss;
+          double weighted_loss = loss * (joints[j] / marginal);
+          wl_acc += weighted_loss;
+          if ((!output_list) && wl_acc > mbr_loss) break;
+        }
+      }
+      if (output_list) mbr_scores[i] = wl_acc;
+      if (wl_acc < mbr_loss) {
+        mbr_loss = wl_acc;
+        mbr_idx = i;
+      }
+    }
+    // cerr << "ML translation: " << TD::GetString(list[0].first) << endl;
+    cerr << "MBR Best idx: " << mbr_idx << endl;
+    if (output_list) {
+      for (int i = 0; i < list.size(); ++i)
+        list[i].second.logeq(mbr_scores[i]);
+      sort(list.begin(), list.end(), LossComparer());
+      for (int i = 0; i < list.size(); ++i)
+        cout << sent_id << " ||| "
+             << TD::GetString(list[i].first) << " ||| "
+             << log(list[i].second) << endl;
+    } else {
+      cout << TD::GetString(list[mbr_idx].first) << endl;
+    }
+  }
+  return 0;
+}
+
diff --git a/vest/scorer.cc b/mteval/scorer.cc
index 70fdef34..04eeaa93 100644
--- a/vest/scorer.cc
+++ b/mteval/scorer.cc
@@ -12,22 +12,15 @@
 #include <boost/shared_ptr.hpp>
 
 #include "filelib.h"
-#include "aligner.h"
-#include "viterbi_envelope.h"
-#include "error_surface.h"
 #include "ter.h"
 #include "aer_scorer.h"
 #include "comb_scorer.h"
 #include "tdict.h"
 #include "stringlib.h"
-#include "lattice.h"
-
 
 using boost::shared_ptr;
 using namespace std;
 
-const bool minimize_segments = true;    // if adjacent segments have equal scores, merge them
-
 void Score::TimesEquals(float scale) {
   cerr<<"UNIMPLEMENTED except for BLEU (for MIRA): Score::TimesEquals"<<endl;abort();
 }
@@ -410,77 +403,6 @@ ScoreP SentenceScorer::CreateScoreFromString(const ScoreType type, const string&
   }
 }
 
-void SentenceScorer::ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface* env, const ScoreType type, const Hypergraph& hg) const {
-  vector<WordID> prev_trans;
-  const vector<shared_ptr<Segment> >& ienv = ve.GetSortedSegs();
-  env->resize(ienv.size());
-  ScoreP prev_score;
-  int j = 0;
-  for (int i = 0; i < ienv.size(); ++i) {
-    const Segment& seg = *ienv[i];
-    vector<WordID> trans;
-    if (type == AER) {
-      vector<bool> edges(hg.edges_.size(), false);
-      seg.CollectEdgesUsed(&edges);  // get the set of edges in the viterbi
-                                     // alignment
-      ostringstream os;
-      const string* psrc = this->GetSource();
-      if (psrc == NULL) {
-        cerr << "AER scoring in VEST requires source, but it is missing!\n";
-        abort();
-      }
-      size_t pos = psrc->rfind(" ||| ");
-      if (pos == string::npos) {
-        cerr << "Malformed source for AER: expected |||\nINPUT: " << *psrc << endl;
-        abort();
-      }
-      Lattice src;
-      Lattice ref;
-      LatticeTools::ConvertTextOrPLF(psrc->substr(0, pos), &src);
-      LatticeTools::ConvertTextOrPLF(psrc->substr(pos + 5), &ref);
-      AlignerTools::WriteAlignment(src, ref, hg, &os, true, &edges);
-      string tstr = os.str();
-      TD::ConvertSentence(tstr.substr(tstr.rfind(" ||| ") + 5), &trans);
-    } else {
-      seg.ConstructTranslation(&trans);
-    }
-    // cerr << "Scoring: " << TD::GetString(trans) << endl;
-    if (trans == prev_trans) {
-      if (!minimize_segments) {
-        assert(prev_score); // if this fails, it means
-	                    // the decoder can generate null translations
-        ErrorSegment& out = (*env)[j];
-        out.delta = prev_score->GetZero();
-        out.x = seg.x;
-	++j;
-      }
-      // cerr << "Identical translation, skipping scoring\n";
-    } else {
-      ScoreP score = ScoreCandidate(trans);
-      // cerr << "score= " << score->ComputeScore() << "\n";
-      ScoreP cur_delta_p = score->GetZero();
-      Score* cur_delta = cur_delta_p.get();
-      // just record the score diffs
-      if (!prev_score)
-        prev_score = score->GetZero();
-
-      score->Subtract(*prev_score, cur_delta);
-      prev_trans.swap(trans);
-      prev_score = score;
-      if ((!minimize_segments) || (!cur_delta->IsAdditiveIdentity())) {
-        ErrorSegment& out = (*env)[j];
-        out.delta = cur_delta_p;
-        out.x = seg.x;
-        ++j;
-      }
-    }
-  }
-  // cerr << " In segments: " << ienv.size() << endl;
-  // cerr << "Out segments: " << j << endl;
-  assert(j > 0);
-  env->resize(j);
-}
-
 void BLEUScore::ScoreDetails(string* details) const {
   char buf[2000];
   vector<float> precs(max(N(),4));
diff --git a/vest/scorer.h b/mteval/scorer.h
index 0c8b380f..f18c8c7f 100644
--- a/vest/scorer.h
+++ b/mteval/scorer.h
@@ -49,7 +49,7 @@ class Score : public boost::intrusive_refcount<Score> {
   virtual ScoreP Clone() const = 0;
 protected:
   Score() {  } // we define these explicitly because refcount is noncopyable
-  Score(Score const& o) {  }
+  Score(Score const&) {  }
 };
 
 //TODO: make sure default copy ctors for score types do what we want.
@@ -72,7 +72,6 @@ class SentenceScorer {
   virtual ~SentenceScorer();
   virtual ScoreP GetOne() const;
   virtual ScoreP GetZero() const;
-  void ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface* es, const ScoreType type, const Hypergraph& hg) const;
   virtual ScoreP ScoreCandidate(const Sentence& hyp) const = 0;
   virtual ScoreP ScoreCCandidate(const Sentence& hyp) const =0;
   virtual const std::string* GetSource() const;
diff --git a/mteval/scorer_test.cc b/mteval/scorer_test.cc
new file mode 100644
index 00000000..a07a8c4b
--- /dev/null
+++ b/mteval/scorer_test.cc
@@ -0,0 +1,182 @@
+#include <iostream>
+#include <fstream>
+#include <valarray>
+#include <gtest/gtest.h>
+
+#include "tdict.h"
+#include "scorer.h"
+#include "aer_scorer.h"
+
+using namespace std;
+
+class ScorerTest : public testing::Test {
+ protected:
+   virtual void SetUp() {
+     refs0.resize(4);
+     refs1.resize(4);
+     TD::ConvertSentence("export of high-tech products in guangdong in first two months this year reached 3.76 billion us dollars", &refs0[0]);
+     TD::ConvertSentence("guangdong's export of new high technology products amounts to us $ 3.76 billion in first two months of this year", &refs0[1]);
+     TD::ConvertSentence("guangdong exports us $ 3.76 billion worth of high technology products in the first two months of this year", &refs0[2]);
+     TD::ConvertSentence("in the first 2 months this year , the export volume of new hi-tech products in guangdong province reached 3.76 billion us dollars .", &refs0[3]);
+     TD::ConvertSentence("xinhua news agency , guangzhou , march 16 ( reporter chen ji ) the latest statistics show that from january through february this year , the export of high-tech products in guangdong province reached 3.76 billion us dollars , up 34.8 \% over the same period last year and accounted for 25.5 \% of the total export in the province .", &refs1[0]);
+     TD::ConvertSentence("xinhua news agency , guangzhou , march 16 ( reporter : chen ji ) -- latest statistic indicates that guangdong's export of new high technology products amounts to us $ 3.76 billion , up 34.8 \% over corresponding period and accounts for 25.5 \% of the total exports of the province .", &refs1[1]);
+     TD::ConvertSentence("xinhua news agency report of march 16 from guangzhou ( by staff reporter chen ji ) - latest statistics indicate guangdong province exported us $ 3.76 billion worth of high technology products , up 34.8 percent from the same period last year , which account for 25.5 percent of the total exports of the province .", &refs1[2]);
+     TD::ConvertSentence("guangdong , march 16 , ( xinhua ) -- ( chen ji reports ) as the newest statistics shows , in january and feberuary this year , the export volume of new hi-tech products in guangdong province reached 3.76 billion us dollars , up 34.8 \% than last year , making up 25.5 \% of the province's total .", &refs1[3]);
+     TD::ConvertSentence("one guangdong province will next export us $ 3.76 high-tech product two months first this year 3.76 billion us dollars", &hyp1);
+     TD::ConvertSentence("xinhua news agency , guangzhou , 16th of march ( reporter chen ) -- latest statistics suggest that guangdong exports new advanced technology product totals $ 3.76 million , 34.8 percent last corresponding period and accounts for 25.5 percent of the total export province .", &hyp2);
+   }
+
+   virtual void TearDown() { }
+
+   vector<vector<WordID> > refs0;
+   vector<vector<WordID> > refs1;
+   vector<WordID> hyp1;
+   vector<WordID> hyp2;
+};
+
+TEST_F(ScorerTest, TestCreateFromFiles) {
+  vector<string> files;
+  files.push_back("test_data/re.txt.0");
+  files.push_back("test_data/re.txt.1");
+  files.push_back("test_data/re.txt.2");
+  files.push_back("test_data/re.txt.3");
+  DocScorer ds(IBM_BLEU, files);
+}
+
+TEST_F(ScorerTest, TestBLEUScorer) {
+  ScorerP s1 = SentenceScorer::CreateSentenceScorer(IBM_BLEU, refs0);
+  ScorerP s2 = SentenceScorer::CreateSentenceScorer(IBM_BLEU, refs1);
+  ScoreP b1 = s1->ScoreCandidate(hyp1);
+  EXPECT_FLOAT_EQ(0.23185077, b1->ComputeScore());
+  ScoreP b2 = s2->ScoreCandidate(hyp2);
+  EXPECT_FLOAT_EQ(0.38101241, b2->ComputeScore());
+  b1->PlusEquals(*b2);
+  EXPECT_FLOAT_EQ(0.348854, b1->ComputeScore());
+  EXPECT_FALSE(b1->IsAdditiveIdentity());
+  string details;
+  b1->ScoreDetails(&details);
+  EXPECT_EQ("BLEU = 34.89, 81.5|50.8|29.5|18.6 (brev=0.898)", details);
+  cerr << details << endl;
+  string enc;
+  b1->Encode(&enc);
+  ScoreP b3 = SentenceScorer::CreateScoreFromString(IBM_BLEU, enc);
+  details.clear();
+  cerr << "Encoded BLEU score size: " << enc.size() << endl;
+  b3->ScoreDetails(&details);
+  cerr << details << endl;
+  EXPECT_FALSE(b3->IsAdditiveIdentity());
+  EXPECT_EQ("BLEU = 34.89, 81.5|50.8|29.5|18.6 (brev=0.898)", details);
+  ScoreP bz = b3->GetZero();
+  EXPECT_TRUE(bz->IsAdditiveIdentity());
+}
+
+TEST_F(ScorerTest, TestTERScorer) {
+  ScorerP s1 = SentenceScorer::CreateSentenceScorer(TER, refs0);
+  ScorerP s2 = SentenceScorer::CreateSentenceScorer(TER, refs1);
+  string details;
+  ScoreP t1 = s1->ScoreCandidate(hyp1);
+  t1->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  cerr << t1->ComputeScore() << endl;
+  ScoreP t2 = s2->ScoreCandidate(hyp2);
+  t2->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  cerr << t2->ComputeScore() << endl;
+  t1->PlusEquals(*t2);
+  cerr << t1->ComputeScore() << endl;
+  t1->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  EXPECT_EQ("TER = 44.16,   4|  8| 16|  6 (len=77)", details);
+  string enc;
+  t1->Encode(&enc);
+  ScoreP t3 = SentenceScorer::CreateScoreFromString(TER, enc);
+  details.clear();
+  t3->ScoreDetails(&details);
+  EXPECT_EQ("TER = 44.16,   4|  8| 16|  6 (len=77)", details);
+  EXPECT_FALSE(t3->IsAdditiveIdentity());
+  ScoreP tz = t3->GetZero();
+  EXPECT_TRUE(tz->IsAdditiveIdentity());
+}
+
+TEST_F(ScorerTest, TestTERScorerSimple) {
+  vector<vector<WordID> > ref(1);
+  TD::ConvertSentence("1 2 3 A B", &ref[0]);
+  vector<WordID> hyp;
+  TD::ConvertSentence("A B 1 2 3", &hyp);
+  ScorerP s1 = SentenceScorer::CreateSentenceScorer(TER, ref);
+  string details;
+  ScoreP t1 = s1->ScoreCandidate(hyp);
+  t1->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+}
+
+TEST_F(ScorerTest, TestSERScorerSimple) {
+  vector<vector<WordID> > ref(1);
+  TD::ConvertSentence("A B C D", &ref[0]);
+  vector<WordID> hyp1;
+  TD::ConvertSentence("A B C", &hyp1);
+  vector<WordID> hyp2;
+  TD::ConvertSentence("A B C D", &hyp2);
+  ScorerP s1 = SentenceScorer::CreateSentenceScorer(SER, ref);
+  string details;
+  ScoreP t1 = s1->ScoreCandidate(hyp1);
+  t1->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  ScoreP t2 = s1->ScoreCandidate(hyp2);
+  t2->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  t2->PlusEquals(*t1);
+  t2->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+}
+
+TEST_F(ScorerTest, TestCombiScorer) {
+  ScorerP s1 = SentenceScorer::CreateSentenceScorer(BLEU_minus_TER_over_2, refs0);
+  string details;
+  ScoreP t1 = s1->ScoreCandidate(hyp1);
+  t1->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  cerr << t1->ComputeScore() << endl;
+  string enc;
+  t1->Encode(&enc);
+  ScoreP t2 = SentenceScorer::CreateScoreFromString(BLEU_minus_TER_over_2, enc);
+  details.clear();
+  t2->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  ScoreP cz = t2->GetZero();
+  EXPECT_FALSE(t2->IsAdditiveIdentity());
+  EXPECT_TRUE(cz->IsAdditiveIdentity());
+  cz->PlusEquals(*t2);
+  EXPECT_FALSE(cz->IsAdditiveIdentity());
+  string d2;
+  cz->ScoreDetails(&d2);
+  EXPECT_EQ(d2, details);
+}
+
+TEST_F(ScorerTest, AERTest) {
+  vector<vector<WordID> > refs0(1);
+  TD::ConvertSentence("0-0 2-1 1-2 3-3", &refs0[0]);
+
+  vector<WordID> hyp;
+  TD::ConvertSentence("0-0 1-1", &hyp);
+  AERScorer* as = new AERScorer(refs0);
+  ScoreP x = as->ScoreCandidate(hyp);
+  string details;
+  x->ScoreDetails(&details);
+  cerr << details << endl;
+  string enc;
+  x->Encode(&enc);
+  delete as;
+  cerr << "ENC size: " << enc.size() << endl;
+  ScoreP y = SentenceScorer::CreateScoreFromString(AER, enc);
+  string d2;
+  y->ScoreDetails(&d2);
+  cerr << d2 << endl;
+  EXPECT_EQ(d2, details);
+}
+
+int main(int argc, char **argv) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
+
diff --git a/vest/ter.cc b/mteval/ter.cc
index cacc5b00..cacc5b00 100644
--- a/vest/ter.cc
+++ b/mteval/ter.cc
diff --git a/vest/ter.h b/mteval/ter.h
index 43314791..43314791 100644
--- a/vest/ter.h
+++ b/mteval/ter.h
diff --git a/mteval/test_data/re.txt.0 b/mteval/test_data/re.txt.0
new file mode 100644
index 00000000..86eff087
--- /dev/null
+++ b/mteval/test_data/re.txt.0
@@ -0,0 +1,5 @@
+erdogan states turkey to reject any pressures to urge it to recognize cyprus
+ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara will reject any pressure by the european union to urge it to recognize cyprus . this comes two weeks before the summit of european union state and government heads who will decide whether or nor membership negotiations with ankara should be opened .
+erdogan told " ntv " television station that " the european union cannot address us by imposing new conditions on us with regard to cyprus .
+we will discuss this dossier in the course of membership negotiations . "
+he added " let me be clear , i cannot sidestep turkey , this is something we cannot accept . "
diff --git a/mteval/test_data/re.txt.1 b/mteval/test_data/re.txt.1
new file mode 100644
index 00000000..2140f198
--- /dev/null
+++ b/mteval/test_data/re.txt.1
@@ -0,0 +1,5 @@
+erdogan confirms turkey will resist any pressure to recognize cyprus
+ankara 12 - 1 ( afp ) - the turkish head of government , recep tayyip erdogan , announced today ( wednesday ) that ankara would resist any pressure the european union might exercise in order to force it into recognizing cyprus . this comes two weeks before a summit of european union heads of state and government , who will decide whether or not to open membership negotiations with ankara .
+erdogan said to the ntv television channel : " the european union cannot engage with us through imposing new conditions on us with regard to cyprus .
+we shall discuss this issue in the course of the membership negotiations . "
+he added : " let me be clear - i cannot confine turkey . this is something we do not accept . "
diff --git a/mteval/test_data/re.txt.2 b/mteval/test_data/re.txt.2
new file mode 100644
index 00000000..94e46286
--- /dev/null
+++ b/mteval/test_data/re.txt.2
@@ -0,0 +1,5 @@
+erdogan confirms that turkey will reject any pressures to encourage it to recognize cyprus
+ankara , 12 / 1 ( afp ) - the turkish prime minister recep tayyip erdogan declared today , wednesday , that ankara will reject any pressures that the european union may apply on it to encourage to recognize cyprus . this comes two weeks before a summit of the heads of countries and governments of the european union , who will decide on whether or not to start negotiations on joining with ankara .
+erdogan told the ntv television station that " it is not possible for the european union to talk to us by imposing new conditions on us regarding cyprus .
+we shall discuss this dossier during the negotiations on joining . "
+and he added , " let me be clear . turkey's arm should not be twisted ; this is something we cannot accept . "
diff --git a/mteval/test_data/re.txt.3 b/mteval/test_data/re.txt.3
new file mode 100644
index 00000000..f87c3308
--- /dev/null
+++ b/mteval/test_data/re.txt.3
@@ -0,0 +1,5 @@
+erdogan stresses that turkey will reject all pressures to force it to recognize cyprus
+ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara would refuse all pressures applied on it by the european union to force it to recognize cyprus . that came two weeks before the summit of the presidents and prime ministers of the european union , who would decide on whether to open negotiations on joining with ankara or not .
+erdogan said to " ntv " tv station that the " european union can not communicate with us by imposing on us new conditions related to cyprus .
+we will discuss this file during the negotiations on joining . "
+he added , " let me be clear . turkey's arm should not be twisted . this is unacceptable to us . "
diff --git a/training/Makefile.am b/training/Makefile.am
index 490de774..48b19932 100644
--- a/training/Makefile.am
+++ b/training/Makefile.am
@@ -14,37 +14,36 @@ noinst_PROGRAMS = \
   optimize_test
 
 atools_SOURCES = atools.cc
-atools_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+atools_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 model1_SOURCES = model1.cc
-model1_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+model1_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 grammar_convert_SOURCES = grammar_convert.cc
-grammar_convert_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+grammar_convert_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 optimize_test_SOURCES = optimize_test.cc optimize.cc
-optimize_test_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+optimize_test_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 collapse_weights_SOURCES = collapse_weights.cc
-collapse_weights_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+collapse_weights_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 lbfgs_test_SOURCES = lbfgs_test.cc
-lbfgs_test_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+lbfgs_test_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 mr_optimize_reduce_SOURCES = mr_optimize_reduce.cc optimize.cc
-mr_optimize_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+mr_optimize_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 mr_em_map_adapter_SOURCES = mr_em_map_adapter.cc
-mr_em_map_adapter_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+mr_em_map_adapter_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 mr_reduce_to_weights_SOURCES = mr_reduce_to_weights.cc
-mr_reduce_to_weights_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+mr_reduce_to_weights_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 mr_em_adapted_reduce_SOURCES = mr_em_adapted_reduce.cc
-mr_em_adapted_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+mr_em_adapted_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 plftools_SOURCES = plftools.cc
-plftools_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
-
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder
+plftools_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder -I$(top_srcdir)/utils -I$(top_srcdir)/mteval
diff --git a/training/atools.cc b/training/atools.cc
index af62804d..805e3c1d 100644
--- a/training/atools.cc
+++ b/training/atools.cc
@@ -9,6 +9,7 @@
 
 #include "filelib.h"
 #include "aligner.h"
+#include "alignment_pharaoh.h"
 
 namespace po = boost::program_options;
 using namespace std;
@@ -349,9 +350,9 @@ int main(int argc, char **argv) {
     }
     if (line1.empty() && !*in1) break;
     shared_ptr<Array2D<bool> > out(new Array2D<bool>);
-    shared_ptr<Array2D<bool> > a1 = AlignerTools::ReadPharaohAlignmentGrid(line1);
+    shared_ptr<Array2D<bool> > a1 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line1);
     if (in2) {
-      shared_ptr<Array2D<bool> > a2 = AlignerTools::ReadPharaohAlignmentGrid(line2);
+      shared_ptr<Array2D<bool> > a2 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line2);
       cmd.Apply(*a1, *a2, out.get());
     } else {
       Array2D<bool> dummy;
@@ -359,7 +360,7 @@ int main(int argc, char **argv) {
     }
     
     if (cmd.Result() == 1) {
-      AlignerTools::SerializePharaohFormat(*out, &cout);
+      AlignmentPharaoh::SerializePharaohFormat(*out, &cout);
     }
   }
   if (cmd.Result() == 2)
diff --git a/utils/Makefile.am b/utils/Makefile.am
new file mode 100644
index 00000000..e513febd
--- /dev/null
+++ b/utils/Makefile.am
@@ -0,0 +1,38 @@
+if HAVE_GTEST
+noinst_PROGRAMS = \
+  dict_test \
+  weights_test \
+  logval_test \
+  small_vector_test
+endif
+
+noinst_LIBRARIES = libutils.a
+
+libutils_a_SOURCES = \
+  alignment_pharaoh.cc \
+  b64tools.cc \
+  dict.cc \
+  tdict.cc \
+  fdict.cc \
+  gzstream.cc \
+  filelib.cc \
+  stringlib.cc \
+  sparse_vector.cc \
+  timing_stats.cc \
+  weights.cc
+
+dict_test_SOURCES = dict_test.cc
+dict_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS)
+weights_test_SOURCES = weights_test.cc
+weights_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS)
+logval_test_SOURCES = logval_test.cc
+logval_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS)
+small_vector_test_SOURCES = small_vector_test.cc
+small_vector_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS)
+
+AM_LDFLAGS = libutils.a -lz
+
+################################################################
+# do NOT NOT NOT add any other -I includes NO NO NO NO NO ######
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I.
+################################################################
diff --git a/utils/alignment_pharaoh.cc b/utils/alignment_pharaoh.cc
new file mode 100644
index 00000000..890ff565
--- /dev/null
+++ b/utils/alignment_pharaoh.cc
@@ -0,0 +1,77 @@
+#include "utils/alignment_pharaoh.h"
+
+#include <set>
+
+using namespace std;
+
+static bool is_digit(char x) { return x >= '0' && x <= '9'; }
+
+boost::shared_ptr<Array2D<bool> > AlignmentPharaoh::ReadPharaohAlignmentGrid(const string& al) {
+  int max_x = 0;
+  int max_y = 0;
+  int i = 0;
+  size_t pos = al.rfind(" ||| ");
+  if (pos != string::npos) { i = pos + 5; }
+  while (i < al.size()) {
+    if (al[i] == '\n' || al[i] == '\r') break;
+    int x = 0;
+    while(i < al.size() && is_digit(al[i])) {
+      x *= 10;
+      x += al[i] - '0';
+      ++i;
+    }
+    if (x > max_x) max_x = x;
+    assert(i < al.size());
+    if(al[i] != '-') {
+      cerr << "BAD ALIGNMENT: " << al << endl;
+      abort();
+    }
+    ++i;
+    int y = 0;
+    while(i < al.size() && is_digit(al[i])) {
+      y *= 10;
+      y += al[i] - '0';
+      ++i;
+    }
+    if (y > max_y) max_y = y;
+    while(i < al.size() && al[i] == ' ') { ++i; }
+  }
+
+  boost::shared_ptr<Array2D<bool> > grid(new Array2D<bool>(max_x + 1, max_y + 1));
+  i = 0;
+  if (pos != string::npos) { i = pos + 5; }
+  while (i < al.size()) {
+    if (al[i] == '\n' || al[i] == '\r') break;
+    int x = 0;
+    while(i < al.size() && is_digit(al[i])) {
+      x *= 10;
+      x += al[i] - '0';
+      ++i;
+    }
+    assert(i < al.size());
+    assert(al[i] == '-');
+    ++i;
+    int y = 0;
+    while(i < al.size() && is_digit(al[i])) {
+      y *= 10;
+      y += al[i] - '0';
+      ++i;
+    }
+    (*grid)(x, y) = true;
+    while(i < al.size() && al[i] == ' ') { ++i; }
+  }
+  // cerr << *grid << endl;
+  return grid;
+}
+
+void AlignmentPharaoh::SerializePharaohFormat(const Array2D<bool>& alignment, ostream* out) {
+  bool need_space = false;
+  for (int i = 0; i < alignment.width(); ++i)
+    for (int j = 0; j < alignment.height(); ++j)
+      if (alignment(i,j)) {
+        if (need_space) (*out) << ' '; else need_space = true;
+        (*out) << i << '-' << j;
+      }
+  (*out) << endl;
+}
+
diff --git a/utils/alignment_pharaoh.h b/utils/alignment_pharaoh.h
new file mode 100644
index 00000000..d111c8bf
--- /dev/null
+++ b/utils/alignment_pharaoh.h
@@ -0,0 +1,14 @@
+#ifndef _PHARAOH_ALIGNMENT_H_
+#define _PHARAOH_ALIGNMENT_H_
+
+#include <string>
+#include <iostream>
+#include <boost/shared_ptr.hpp>
+#include "array2d.h"
+
+struct AlignmentPharaoh {
+  static boost::shared_ptr<Array2D<bool> > ReadPharaohAlignmentGrid(const std::string& al);
+  static void SerializePharaohFormat(const Array2D<bool>& alignment, std::ostream* out);
+};
+
+#endif
diff --git a/decoder/array2d.h b/utils/array2d.h
index e63eda0d..e63eda0d 100644
--- a/decoder/array2d.h
+++ b/utils/array2d.h
diff --git a/utils/b64tools.cc b/utils/b64tools.cc
new file mode 100644
index 00000000..5512f975
--- /dev/null
+++ b/utils/b64tools.cc
@@ -0,0 +1,59 @@
+#include <iostream>
+#include <cassert>
+
+using namespace std;
+
+namespace B64 {
+
+static const char cb64[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static const char cd64[]="|$$$}rstuvwxyz{$$$$$$$>?@ABCDEFGHIJKLMNOPQRSTUVW$$$$$$XYZ[\\]^_`abcdefghijklmnopq";
+
+static void encodeblock(const unsigned char* in, ostream* os, int len) {
+  char out[4];
+  out[0] = cb64[ in[0] >> 2 ];
+  out[1] = cb64[ ((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4) ];
+  out[2] = (len > 1 ? cb64[ ((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6) ] : '=');
+  out[3] = (len > 2 ? cb64[ in[2] & 0x3f ] : '=');
+  os->write(out, 4);
+}
+
+void b64encode(const char* data, const size_t size, ostream* out) {
+  size_t cur = 0;
+  while(cur < size) {
+    int len = min(static_cast<size_t>(3), size - cur);
+    encodeblock(reinterpret_cast<const unsigned char*>(&data[cur]), out, len);
+    cur += len;
+  }
+}
+
+static void decodeblock(const unsigned char* in, unsigned char* out) {
+  out[0] = (unsigned char ) (in[0] << 2 | in[1] >> 4);
+  out[1] = (unsigned char ) (in[1] << 4 | in[2] >> 2);
+  out[2] = (unsigned char ) (((in[2] << 6) & 0xc0) | in[3]);
+}
+
+bool b64decode(const unsigned char* data, const size_t insize, char* out, const size_t outsize) {
+  size_t cur = 0;
+  size_t ocur = 0;
+  unsigned char in[4];
+  while(cur < insize) {
+    assert(ocur < outsize);
+    for (int i = 0; i < 4; ++i) {
+      unsigned char v = data[cur];
+      v = (unsigned char) ((v < 43 || v > 122) ? '\0' : cd64[ v - 43 ]);
+      if (!v) {
+        cerr << "B64 decode error at offset " << cur << " offending character: " << (int)data[cur] << endl;
+        return false;
+      }
+      v = (unsigned char) ((v == '$') ? '\0' : v - 61);
+      if (v) in[i] = v - 1; else in[i] = 0;
+      ++cur;
+    }
+    decodeblock(in, reinterpret_cast<unsigned char*>(&out[ocur]));
+    ocur += 3;
+  }
+  return true;
+}
+
+}
+
diff --git a/utils/b64tools.h b/utils/b64tools.h
new file mode 100644
index 00000000..c821fc8f
--- /dev/null
+++ b/utils/b64tools.h
@@ -0,0 +1,9 @@
+#ifndef _B64_TOOLS_H_
+#define _B64_TOOLS_H_
+
+namespace B64 {
+  bool b64decode(const unsigned char* data, const size_t insize, char* out, const size_t outsize);
+  void b64encode(const char* data, const size_t size, std::ostream* out);
+}
+
+#endif
diff --git a/decoder/dict.cc b/utils/dict.cc
index 2d6986c8..2d6986c8 100644
--- a/decoder/dict.cc
+++ b/utils/dict.cc
diff --git a/decoder/dict.h b/utils/dict.h
index 348a97e3..348a97e3 100644
--- a/decoder/dict.h
+++ b/utils/dict.h
diff --git a/decoder/dict_test.cc b/utils/dict_test.cc
index 694877fa..2049ec27 100644
--- a/decoder/dict_test.cc
+++ b/utils/dict_test.cc
@@ -5,9 +5,6 @@
 #include <iostream>
 #include <gtest/gtest.h>
 #include <cassert>
-#include "filelib.h"
-
-#include "tdict.h"
 
 using namespace std;
 
diff --git a/decoder/fdict.cc b/utils/fdict.cc
index baa0b552..baa0b552 100644
--- a/decoder/fdict.cc
+++ b/utils/fdict.cc
diff --git a/decoder/fdict.h b/utils/fdict.h
index f9673023..f9673023 100644
--- a/decoder/fdict.h
+++ b/utils/fdict.h
diff --git a/utils/feature_accum.h b/utils/feature_accum.h
new file mode 100755
index 00000000..851b29db
--- /dev/null
+++ b/utils/feature_accum.h
@@ -0,0 +1,129 @@
+#ifndef FEATURE_ACCUM_H
+#define FEATURE_ACCUM_H
+
+#include "ff.h"
+#include "sparse_vector.h"
+#include "value_array.h"
+
+struct SparseFeatureAccumulator : public FeatureVector {
+  typedef FeatureVector State;
+  SparseFeatureAccumulator() {  }
+  template <class FF>
+  FeatureVector const& describe(FF const& ) { return *this; }
+  void Store(FeatureVector *fv) const {
+    fv->set_from(*this);
+  }
+  template <class FF>
+  void Store(FF const& /* ff */,FeatureVector *fv) const {
+    fv->set_from(*this);
+  }
+  template <class FF>
+  void Add(FF const& /* ff */,FeatureVector const& fv) {
+    (*this)+=fv;
+  }
+  void Add(FeatureVector const& fv) {
+    (*this)+=fv;
+  }
+  /*
+  SparseFeatureAccumulator(FeatureVector const& fv) : State(fv) {}
+  FeatureAccumulator(Features const& fids) {}
+  FeatureAccumulator(Features const& fids,FeatureVector const& fv) : State(fv) {}
+  void Add(Features const& fids,FeatureVector const& fv) {
+    *this += fv;
+  }
+  */
+  void Add(int i,Featval v) {
+    (*this)[i]+=v;
+  }
+  void Add(Features const& fids,int i,Featval v) {
+    (*this)[i]+=v;
+  }
+};
+
+struct SingleFeatureAccumulator {
+  typedef Featval State;
+  typedef SingleFeatureAccumulator Self;
+  State v;
+  /*
+  void operator +=(State const& o) {
+    v+=o;
+  }
+  */
+  void operator +=(Self const& s) {
+    v+=s.v;
+  }
+  SingleFeatureAccumulator() : v() {}
+  template <class FF>
+  State const& describe(FF const& ) const { return v; }
+
+  template <class FF>
+  void Store(FF const& ff,FeatureVector *fv) const {
+    fv->set_value(ff.fid_,v);
+  }
+  void Store(Features const& fids,FeatureVector *fv) const {
+    assert(fids.size()==1);
+    fv->set_value(fids[0],v);
+  }
+  /*
+  SingleFeatureAccumulator(Features const& fids) { assert(fids.size()==1); }
+  SingleFeatureAccumulator(Features const& fids,FeatureVector const& fv)
+  {
+    assert(fids.size()==1);
+    v=fv.get_singleton();
+  }
+  */
+
+  template <class FF>
+  void Add(FF const& ff,FeatureVector const& fv) {
+    v+=fv.get(ff.fid_);
+  }
+  void Add(FeatureVector const& fv) {
+    v+=fv.get_singleton();
+  }
+
+  void Add(Features const& fids,FeatureVector const& fv) {
+    v += fv.get(fids[0]);
+  }
+  void Add(Featval dv) {
+    v+=dv;
+  }
+  void Add(int,Featval dv) {
+    v+=dv;
+  }
+  void Add(FeatureVector const& fids,int i,Featval dv) {
+    assert(fids.size()==1 && i==0);
+    v+=dv;
+  }
+};
+
+
+#if 0
+// omitting this so we can default construct an accum.  might be worth resurrecting in the future
+struct ArrayFeatureAccumulator : public ValueArray<Featval> {
+  typedef ValueArray<Featval> State;
+  template <class Fsa>
+  ArrayFeatureAccumulator(Fsa const& fsa) : State(fsa.features_.size()) { }
+  ArrayFeatureAccumulator(Features const& fids) : State(fids.size()) {  }
+  ArrayFeatureAccumulator(Features const& fids) : State(fids.size()) {  }
+  ArrayFeatureAccumulator(Features const& fids,FeatureVector const& fv) : State(fids.size()) {
+    for (int i=0,e=i<fids.size();i<e;++i)
+      (*this)[i]=fv.get(i);
+  }
+  State const& describe(Features const& fids) const { return *this; }
+  void Store(Features const& fids,FeatureVector *fv) const {
+    assert(fids.size()==size());
+    for (int i=0,e=i<fids.size();i<e;++i)
+      fv->set_value(fids[i],(*this)[i]);
+  }
+  void Add(Features const& fids,FeatureVector const& fv) {
+    for (int i=0,e=i<fids.size();i<e;++i)
+      (*this)[i]+=fv.get(i);
+  }
+  void Add(FeatureVector const& fids,int i,Featval v) {
+    (*this)[i]+=v;
+  }
+};
+#endif
+
+
+#endif
diff --git a/decoder/feature_vector.h b/utils/feature_vector.h
index be378a6a..be378a6a 100755
--- a/decoder/feature_vector.h
+++ b/utils/feature_vector.h
diff --git a/decoder/filelib.cc b/utils/filelib.cc
index 79ad2847..79ad2847 100644
--- a/decoder/filelib.cc
+++ b/utils/filelib.cc
diff --git a/decoder/filelib.h b/utils/filelib.h
index b9fef9a7..b9fef9a7 100644
--- a/decoder/filelib.h
+++ b/utils/filelib.h
diff --git a/decoder/gzstream.cc b/utils/gzstream.cc
index 88cd1bd2..88cd1bd2 100644
--- a/decoder/gzstream.cc
+++ b/utils/gzstream.cc
diff --git a/decoder/gzstream.h b/utils/gzstream.h
index a7effd90..a7effd90 100644
--- a/decoder/gzstream.h
+++ b/utils/gzstream.h
diff --git a/decoder/hash.h b/utils/hash.h
index 3a60a429..3a60a429 100755
--- a/decoder/hash.h
+++ b/utils/hash.h
diff --git a/decoder/have_64_bits.h b/utils/have_64_bits.h
index d1e6064f..d1e6064f 100755
--- a/decoder/have_64_bits.h
+++ b/utils/have_64_bits.h
diff --git a/decoder/int_or_pointer.h b/utils/int_or_pointer.h
index 4b6a9e4a..4b6a9e4a 100755
--- a/decoder/int_or_pointer.h
+++ b/utils/int_or_pointer.h
diff --git a/decoder/intrusive_refcount.hpp b/utils/intrusive_refcount.hpp
index 4a4b0187..4a4b0187 100755
--- a/decoder/intrusive_refcount.hpp
+++ b/utils/intrusive_refcount.hpp
diff --git a/decoder/logval.h b/utils/logval.h
index 37f14ae5..37f14ae5 100644
--- a/decoder/logval.h
+++ b/utils/logval.h
diff --git a/decoder/logval_test.cc b/utils/logval_test.cc
index 1a23177d..1a23177d 100644
--- a/decoder/logval_test.cc
+++ b/utils/logval_test.cc
diff --git a/decoder/murmur_hash.h b/utils/murmur_hash.h
index 8dbd7807..8dbd7807 100755
--- a/decoder/murmur_hash.h
+++ b/utils/murmur_hash.h
diff --git a/decoder/null_deleter.h b/utils/null_deleter.h
index 082ab453..082ab453 100755
--- a/decoder/null_deleter.h
+++ b/utils/null_deleter.h
diff --git a/decoder/prob.h b/utils/prob.h
index bc297870..bc297870 100644
--- a/decoder/prob.h
+++ b/utils/prob.h
diff --git a/decoder/sampler.h b/utils/sampler.h
index 5fef45d0..5fef45d0 100644
--- a/decoder/sampler.h
+++ b/utils/sampler.h
diff --git a/decoder/small_vector.h b/utils/small_vector.h
index 25c52359..25c52359 100644
--- a/decoder/small_vector.h
+++ b/utils/small_vector.h
diff --git a/decoder/small_vector_test.cc b/utils/small_vector_test.cc
index d1d8dcab..d1d8dcab 100644
--- a/decoder/small_vector_test.cc
+++ b/utils/small_vector_test.cc
diff --git a/decoder/sparse_vector.cc b/utils/sparse_vector.cc
index 4035b9ef..6e42a216 100644
--- a/decoder/sparse_vector.cc
+++ b/utils/sparse_vector.cc
@@ -3,7 +3,7 @@
 #include <iostream>
 #include <cstring>
 
-#include "hg_io.h"
+#include "b64tools.h"
 
 using namespace std;
 
diff --git a/decoder/sparse_vector.h b/utils/sparse_vector.h
index 207489c5..207489c5 100644
--- a/decoder/sparse_vector.h
+++ b/utils/sparse_vector.h
diff --git a/decoder/static_utoa.h b/utils/static_utoa.h
index fe5f6d92..fe5f6d92 100755
--- a/decoder/static_utoa.h
+++ b/utils/static_utoa.h
diff --git a/decoder/stringlib.cc b/utils/stringlib.cc
index 3e52ae87..7aaee9f0 100644
--- a/decoder/stringlib.cc
+++ b/utils/stringlib.cc
@@ -6,8 +6,6 @@
 #include <iostream>
 #include <map>
 
-#include "lattice.h"
-
 using namespace std;
 
 void ParseTranslatorInput(const string& line, string* input, string* ref) {
@@ -31,15 +29,6 @@ void ParseTranslatorInput(const string& line, string* input, string* ref) {
   }
 }
 
-void ParseTranslatorInputLattice(const string& line, string* input, Lattice* ref) {
-  string sref;
-  ParseTranslatorInput(line, input, &sref);
-  if (sref.size() > 0) {
-    assert(ref);
-    LatticeTools::ConvertTextOrPLF(sref, ref);
-  }
-}
-
 void ProcessAndStripSGML(string* pline, map<string, string>* out) {
   map<string, string>& meta = *out;
   string& line = *pline;
diff --git a/decoder/stringlib.h b/utils/stringlib.h
index 84e95d44..84e95d44 100644
--- a/decoder/stringlib.h
+++ b/utils/stringlib.h
diff --git a/decoder/stringlib_test.cc b/utils/stringlib_test.cc
index f66cdbeb..f66cdbeb 100755
--- a/decoder/stringlib_test.cc
+++ b/utils/stringlib_test.cc
diff --git a/decoder/tdict.cc b/utils/tdict.cc
index 1f68feae..1f68feae 100644
--- a/decoder/tdict.cc
+++ b/utils/tdict.cc
diff --git a/decoder/tdict.h b/utils/tdict.h
index a7b3ee1c..a7b3ee1c 100644
--- a/decoder/tdict.h
+++ b/utils/tdict.h
diff --git a/decoder/test_data/weights b/utils/test_data/weights
index ea70229c..ea70229c 100644
--- a/decoder/test_data/weights
+++ b/utils/test_data/weights
diff --git a/decoder/threadlocal.h b/utils/threadlocal.h
index d79f5d9d..d79f5d9d 100755
--- a/decoder/threadlocal.h
+++ b/utils/threadlocal.h
diff --git a/decoder/timing_stats.cc b/utils/timing_stats.cc
index fc8e9df1..fc8e9df1 100644
--- a/decoder/timing_stats.cc
+++ b/utils/timing_stats.cc
diff --git a/decoder/timing_stats.h b/utils/timing_stats.h
index 0a9f7656..0a9f7656 100644
--- a/decoder/timing_stats.h
+++ b/utils/timing_stats.h
diff --git a/decoder/weights.cc b/utils/weights.cc
index 84647585..84647585 100644
--- a/decoder/weights.cc
+++ b/utils/weights.cc
diff --git a/decoder/weights.h b/utils/weights.h
index f19aa3ce..f19aa3ce 100644
--- a/decoder/weights.h
+++ b/utils/weights.h
diff --git a/decoder/weights_test.cc b/utils/weights_test.cc
index aa6b3db2..8a4c26ef 100644
--- a/decoder/weights_test.cc
+++ b/utils/weights_test.cc
@@ -5,7 +5,6 @@
 #include <gtest/gtest.h>
 #include "weights.h"
 #include "tdict.h"
-#include "hg.h"
 
 using namespace std;
 
diff --git a/decoder/wordid.h b/utils/wordid.h
index fb50bcc1..fb50bcc1 100644
--- a/decoder/wordid.h
+++ b/utils/wordid.h
diff --git a/vest/Makefile.am b/vest/Makefile.am
index abdc8146..b869672b 100644
--- a/vest/Makefile.am
+++ b/vest/Makefile.am
@@ -1,15 +1,12 @@
 bin_PROGRAMS = \
-  mbr_kbest \
   mr_vest_map \
   mr_vest_reduce \
   mr_vest_generate_mapper_input \
-  fast_score \
   sentserver \
   sentclient
 
 if HAVE_GTEST
 noinst_PROGRAMS = \
-  scorer_test \
   lo_test
 endif
 
@@ -17,25 +14,16 @@ sentserver_SOURCES = sentserver.c
 
 sentclient_SOURCES = sentclient.c
 
-mbr_kbest_SOURCES = mbr_kbest.cc ter.cc comb_scorer.cc aer_scorer.cc scorer.cc viterbi_envelope.cc
-mbr_kbest_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+mr_vest_generate_mapper_input_SOURCES = mr_vest_generate_mapper_input.cc line_optimizer.cc
+mr_vest_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
 
-fast_score_SOURCES = fast_score.cc ter.cc comb_scorer.cc aer_scorer.cc scorer.cc viterbi_envelope.cc
-fast_score_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+mr_vest_map_SOURCES = viterbi_envelope.cc ces.cc error_surface.cc mr_vest_map.cc line_optimizer.cc
+mr_vest_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
 
-mr_vest_generate_mapper_input_SOURCES = mr_vest_generate_mapper_input.cc line_optimizer.cc $(top_srcdir)/decoder/timing_stats.cc
-mr_vest_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+mr_vest_reduce_SOURCES = error_surface.cc ces.cc mr_vest_reduce.cc line_optimizer.cc viterbi_envelope.cc
+mr_vest_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
 
-mr_vest_map_SOURCES = viterbi_envelope.cc error_surface.cc aer_scorer.cc mr_vest_map.cc scorer.cc ter.cc comb_scorer.cc line_optimizer.cc
-mr_vest_map_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+lo_test_SOURCES = lo_test.cc ces.cc viterbi_envelope.cc error_surface.cc line_optimizer.cc
+lo_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
 
-mr_vest_reduce_SOURCES = error_surface.cc aer_scorer.cc mr_vest_reduce.cc scorer.cc ter.cc comb_scorer.cc line_optimizer.cc viterbi_envelope.cc
-mr_vest_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
-
-scorer_test_SOURCES = aer_scorer.cc scorer_test.cc scorer.cc ter.cc comb_scorer.cc viterbi_envelope.cc
-scorer_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a -lz
-
-lo_test_SOURCES = lo_test.cc scorer.cc ter.cc aer_scorer.cc comb_scorer.cc viterbi_envelope.cc error_surface.cc line_optimizer.cc
-lo_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a -lz
-
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/vest/lo_test.cc b/vest/lo_test.cc
index 577113bb..9200eb34 100644
--- a/vest/lo_test.cc
+++ b/vest/lo_test.cc
@@ -5,6 +5,7 @@
 #include <boost/shared_ptr.hpp>
 #include <gtest/gtest.h>
 
+#include "ces.h"
 #include "fdict.h"
 #include "hg.h"
 #include "kbest.h"
@@ -166,8 +167,8 @@ TEST_F(OptTest, TestS1) {
   envs[1] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg2, NULL, wf);
 
   vector<ErrorSurface> es(2);
-  scorer1->ComputeErrorSurface(envs[0], &es[0], IBM_BLEU, hg);
-  scorer2->ComputeErrorSurface(envs[1], &es[1], IBM_BLEU, hg2);
+  ComputeErrorSurface(*scorer1, envs[0], &es[0], IBM_BLEU, hg);
+  ComputeErrorSurface(*scorer2, envs[1], &es[1], IBM_BLEU, hg2);
   cerr << envs[0].size() << " " << envs[1].size() << endl;
   cerr << es[0].size() << " " << es[1].size() << endl;
   envs.clear();
diff --git a/vest/mr_vest_map.cc b/vest/mr_vest_map.cc
index b3acc5dd..1506a99f 100644
--- a/vest/mr_vest_map.cc
+++ b/vest/mr_vest_map.cc
@@ -6,6 +6,7 @@
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 
+#include "ces.h"
 #include "filelib.h"
 #include "stringlib.h"
 #include "sparse_vector.h"
@@ -13,7 +14,7 @@
 #include "viterbi_envelope.h"
 #include "inside_outside.h"
 #include "error_surface.h"
-#include "hg.h"
+#include "b64tools.h"
 #include "hg_io.h"
 
 using namespace std;
@@ -90,7 +91,7 @@ int main(int argc, char** argv) {
     ViterbiEnvelopeWeightFunction wf(origin, axis);
     ViterbiEnvelope ve = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
     ErrorSurface es;
-    ds[sent_id]->ComputeErrorSurface(ve, &es, type, hg);
+    ComputeErrorSurface(*ds[sent_id], ve, &es, type, hg);
     //cerr << "Viterbi envelope has " << ve.size() << " segments\n";
     // cerr << "Error surface has " << es.size() << " segments\n";
     string val;
diff --git a/vest/mr_vest_reduce.cc b/vest/mr_vest_reduce.cc
index 5efcc19a..3df52020 100644
--- a/vest/mr_vest_reduce.cc
+++ b/vest/mr_vest_reduce.cc
@@ -9,7 +9,7 @@
 #include "sparse_vector.h"
 #include "error_surface.h"
 #include "line_optimizer.h"
-#include "hg_io.h"
+#include "b64tools.h"
 
 using namespace std;
 namespace po = boost::program_options;