39 files changed, 3431 insertions, 0 deletions
diff --git a/vest/Makefile.am b/vest/Makefile.am
new file mode 100644
index 00000000..6815ae75
--- /dev/null
+++ b/vest/Makefile.am
@@ -0,0 +1,35 @@
+bin_PROGRAMS = \
+  mbr_kbest \
+  mr_vest_map \
+  mr_vest_reduce \
+  mr_vest_generate_mapper_input \
+  fast_score
+
+if HAVE_GTEST
+noinst_PROGRAMS = \
+  scorer_test \
+  lo_test
+endif
+
+mbr_kbest_SOURCES = mbr_kbest.cc ter.cc comb_scorer.cc aer_scorer.cc scorer.cc viterbi_envelope.cc
+mbr_kbest_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+
+fast_score_SOURCES = fast_score.cc ter.cc comb_scorer.cc aer_scorer.cc scorer.cc viterbi_envelope.cc
+fast_score_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+
+mr_vest_generate_mapper_input_SOURCES = mr_vest_generate_mapper_input.cc line_optimizer.cc
+mr_vest_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+
+mr_vest_map_SOURCES = viterbi_envelope.cc error_surface.cc aer_scorer.cc mr_vest_map.cc scorer.cc ter.cc comb_scorer.cc line_optimizer.cc
+mr_vest_map_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+
+mr_vest_reduce_SOURCES = error_surface.cc aer_scorer.cc mr_vest_reduce.cc scorer.cc ter.cc comb_scorer.cc line_optimizer.cc viterbi_envelope.cc
+mr_vest_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
+
+scorer_test_SOURCES = aer_scorer.cc scorer_test.cc scorer.cc ter.cc comb_scorer.cc viterbi_envelope.cc
+scorer_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a -lz
+
+lo_test_SOURCES = lo_test.cc scorer.cc ter.cc aer_scorer.cc comb_scorer.cc viterbi_envelope.cc error_surface.cc line_optimizer.cc
+lo_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a -lz
+
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder
diff --git a/vest/aer_scorer.cc b/vest/aer_scorer.cc
new file mode 100644
index 00000000..9c8a783a
--- /dev/null
+++ b/vest/aer_scorer.cc
@@ -0,0 +1,118 @@
+#include "aer_scorer.h"
+
+#include <cmath>
+#include <cassert>
+#include <sstream>
+
+#include "tdict.h"
+#include "aligner.h"
+
+using namespace std;
+
+class AERScore : public Score {
+  friend class AERScorer;
+ public:
+  AERScore() : num_matches(), num_predicted(), num_in_ref() {}
+  AERScore(int m, int p, int r) :
+    num_matches(m), num_predicted(p), num_in_ref(r) {}
+  virtual void PlusEquals(const Score& delta) {
+    const AERScore& other = static_cast<const AERScore&>(delta);
+    num_matches   += other.num_matches;
+    num_predicted += other.num_predicted;
+    num_in_ref    += other.num_in_ref;
+  }
+  virtual Score* GetZero() const {
+    return new AERScore;
+  }
+  virtual void Subtract(const Score& rhs, Score* out) const {
+    AERScore* res = static_cast<AERScore*>(out);
+    const AERScore& other = static_cast<const AERScore&>(rhs);
+    res->num_matches   = num_matches   - other.num_matches;
+    res->num_predicted = num_predicted - other.num_predicted;
+    res->num_in_ref    = num_in_ref    - other.num_in_ref;
+  }
+  float Precision() const {
+    return static_cast<float>(num_matches) / num_predicted;
+  }
+  float Recall() const {
+    return static_cast<float>(num_matches) / num_in_ref;
+  }
+  virtual float ComputeScore() const {
+    const float prec = Precision();
+    const float rec = Recall();
+    const float f = (2.0 * prec * rec) / (rec + prec);
+    if (isnan(f)) return 1.0f;
+    return 1.0f - f;
+  }
+  virtual bool IsAdditiveIdentity() const {
+    return (num_matches == 0) && (num_predicted == 0) && (num_in_ref == 0);
+  }
+  virtual void ScoreDetails(std::string* out) const {
+    ostringstream os;
+    os << "AER=" << (ComputeScore() * 100.0)
+       << " F=" << (100 - ComputeScore() * 100.0)
+       << " P=" << (Precision() * 100.0) << " R=" << (Recall() * 100.0)
+       << " [" << num_matches << " " << num_predicted << " " << num_in_ref << "]";
+    *out = os.str();
+  }
+  virtual void Encode(std::string*out) const {
+    out->resize(sizeof(int) * 3);
+    *(int *)&(*out)[sizeof(int) * 0] = num_matches;
+    *(int *)&(*out)[sizeof(int) * 1] = num_predicted;
+    *(int *)&(*out)[sizeof(int) * 2] = num_in_ref;
+  }
+ private:
+  int num_matches;
+  int num_predicted;
+  int num_in_ref;
+};
+
+AERScorer::AERScorer(const vector<vector<WordID> >& refs, const string& src) : src_(src) {
+  if (refs.size() != 1) {
+    cerr << "AERScorer can only take a single reference!\n";
+    abort();
+  }
+  ref_ = AlignerTools::ReadPharaohAlignmentGrid(TD::GetString(refs.front()));
+}
+
+static inline bool Safe(const Array2D<bool>& a, int i, int j) {
+  if (i >= 0 && j >= 0 && i < a.width() && j < a.height())
+    return a(i,j);
+  else
+    return false;
+}
+
+Score* AERScorer::ScoreCandidate(const vector<WordID>& shyp) const {
+  boost::shared_ptr<Array2D<bool> > hyp =
+    AlignerTools::ReadPharaohAlignmentGrid(TD::GetString(shyp));
+
+  int m = 0;
+  int r = 0;
+  int p = 0;
+  int i_len = ref_->width();
+  int j_len = ref_->height();
+  for (int i = 0; i < i_len; ++i) {
+    for (int j = 0; j < j_len; ++j) {
+      if ((*ref_)(i,j)) {
+        ++r;
+        if (Safe(*hyp, i, j)) ++m;
+      }
+    }
+  }
+  for (int i = 0; i < hyp->width(); ++i)
+    for (int j = 0; j < hyp->height(); ++j)
+      if ((*hyp)(i,j)) ++p;
+
+  return new AERScore(m,p,r);
+}
+
+Score* AERScorer::ScoreFromString(const string& in) {
+  AERScore* res = new AERScore;
+  res->num_matches   = *(const int *)&in[sizeof(int) * 0];
+  res->num_predicted = *(const int *)&in[sizeof(int) * 1];
+  res->num_in_ref    = *(const int *)&in[sizeof(int) * 2];
+  return res;
+}
+
+const std::string* AERScorer::GetSource() const { return &src_; }
+
diff --git a/vest/aer_scorer.h b/vest/aer_scorer.h
new file mode 100644
index 00000000..a0afea3b
--- /dev/null
+++ b/vest/aer_scorer.h
@@ -0,0 +1,22 @@
+#ifndef _AER_SCORER_
+#define _AER_SCORER_
+
+#include <boost/shared_ptr.hpp>
+
+#include "scorer.h"
+#include "array2d.h"
+
+class AERScorer : public SentenceScorer {
+ public:
+  // when constructing alignment strings from a hypergraph, the source
+  // is necessary.
+  AERScorer(const std::vector<std::vector<WordID> >& refs, const std::string& src = "");
+  Score* ScoreCandidate(const std::vector<WordID>& hyp) const;
+  static Score* ScoreFromString(const std::string& in);
+  const std::string* GetSource() const;
+ private:
+  std::string src_;
+  boost::shared_ptr<Array2D<bool> > ref_;
+};
+
+#endif
diff --git a/vest/comb_scorer.cc b/vest/comb_scorer.cc
new file mode 100644
index 00000000..7b2187f4
--- /dev/null
+++ b/vest/comb_scorer.cc
@@ -0,0 +1,81 @@
+#include "comb_scorer.h"
+
+#include <cstdio>
+
+using namespace std;
+
+class BLEUTERCombinationScore : public Score {
+  friend class BLEUTERCombinationScorer;
+ public:
+  ~BLEUTERCombinationScore();
+  float ComputeScore() const {
+    return (bleu->ComputeScore() - ter->ComputeScore()) / 2.0f;
+  }
+  void ScoreDetails(string* details) const {
+    char buf[160];
+    sprintf(buf, "Combi = %.2f, BLEU = %.2f, TER = %.2f", 
+      ComputeScore()*100.0f, bleu->ComputeScore()*100.0f, ter->ComputeScore()*100.0f);
+    *details = buf;
+  }
+  void PlusEquals(const Score& delta) {
+    bleu->PlusEquals(*static_cast<const BLEUTERCombinationScore&>(delta).bleu);
+    ter->PlusEquals(*static_cast<const BLEUTERCombinationScore&>(delta).ter);
+  }
+  Score* GetZero() const {
+    BLEUTERCombinationScore* res = new BLEUTERCombinationScore;
+    res->bleu = bleu->GetZero();
+    res->ter = ter->GetZero();
+    return res;
+  }
+  void Subtract(const Score& rhs, Score* res) const {
+    bleu->Subtract(*static_cast<const BLEUTERCombinationScore&>(rhs).bleu,
+                   static_cast<BLEUTERCombinationScore*>(res)->bleu);
+    ter->Subtract(*static_cast<const BLEUTERCombinationScore&>(rhs).ter,
+                   static_cast<BLEUTERCombinationScore*>(res)->ter);
+  }
+  void Encode(std::string* out) const {
+    string bs, ts;
+    bleu->Encode(&bs);
+    ter->Encode(&ts);
+    out->clear();
+    (*out) += static_cast<char>(bs.size());
+    (*out) += bs;
+    (*out) += ts;
+  }
+  bool IsAdditiveIdentity() const {
+    return bleu->IsAdditiveIdentity() && ter->IsAdditiveIdentity();
+  }
+ private:
+  Score* bleu;
+  Score* ter;
+};
+
+BLEUTERCombinationScore::~BLEUTERCombinationScore() {
+  delete bleu;
+  delete ter;
+}
+
+BLEUTERCombinationScorer::BLEUTERCombinationScorer(const vector<vector<WordID> >& refs) {
+  bleu_ = SentenceScorer::CreateSentenceScorer(IBM_BLEU, refs);
+  ter_ = SentenceScorer::CreateSentenceScorer(TER, refs);
+}
+
+BLEUTERCombinationScorer::~BLEUTERCombinationScorer() {
+  delete bleu_;
+  delete ter_;
+}
+
+Score* BLEUTERCombinationScorer::ScoreCandidate(const std::vector<WordID>& hyp) const {
+  BLEUTERCombinationScore* res = new BLEUTERCombinationScore;
+  res->bleu = bleu_->ScoreCandidate(hyp);
+  res->ter = ter_->ScoreCandidate(hyp);
+  return res;
+}
+
+Score* BLEUTERCombinationScorer::ScoreFromString(const std::string& in) {
+  int bss = in[0];
+  BLEUTERCombinationScore* r = new BLEUTERCombinationScore;
+  r->bleu = SentenceScorer::CreateScoreFromString(IBM_BLEU, in.substr(1, bss));
+  r->ter = SentenceScorer::CreateScoreFromString(TER, in.substr(1 + bss));
+  return r;
+}
diff --git a/vest/comb_scorer.h b/vest/comb_scorer.h
new file mode 100644
index 00000000..70b1ec75
--- /dev/null
+++ b/vest/comb_scorer.h
@@ -0,0 +1,17 @@
+#ifndef _COMB_SCORER_
+#define _COMB_SCORER_
+
+#include "scorer.h"
+
+class BLEUTERCombinationScorer : public SentenceScorer {
+ public:
+  BLEUTERCombinationScorer(const std::vector<std::vector<WordID> >& refs);
+  ~BLEUTERCombinationScorer();
+  Score* ScoreCandidate(const std::vector<WordID>& hyp) const;
+  static Score* ScoreFromString(const std::string& in);
+ private:
+  SentenceScorer* bleu_;
+  SentenceScorer* ter_;
+};
+
+#endif
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
new file mode 100755
index 00000000..dca2f06b
--- /dev/null
+++ b/vest/dist-vest.pl
@@ -0,0 +1,619 @@
+#!/usr/bin/env perl
+
+use strict;
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; }
+use Getopt::Long;
+use IPC::Open2;
+use strict;
+use POSIX ":sys_wait_h";
+
+# Default settings
+my $srcFile;
+my $refFiles;
+my $bin_dir = $SCRIPT_DIR;
+die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
+my $FAST_SCORE="$bin_dir/fast_score";
+die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
+my $MAPINPUT = "$bin_dir/mr_vest_generate_mapper_input";
+my $MAPPER = "$bin_dir/mr_vest_map";
+my $REDUCER = "$bin_dir/mr_vest_reduce";
+my $SCORER = $FAST_SCORE;
+die "Can't find $MAPPER" unless -x $MAPPER;
+my $cdec = "$bin_dir/../decoder/cdec";
+die "Can't find decoder in $cdec" unless -x $cdec;
+my $decoder = $cdec;
+my $DISCARD_FORESTS = 0;
+my $lines_per_mapper = 400;
+my $rand_directions = 15;
+my $iteration = 1;
+my $run_local = 0;
+my $best_weights;
+my $max_iterations = 15;
+my $optimization_iters = 6;
+my $num_rand_points = 20;
+my $mert_nodes = join(" ", grep(/^c\d\d$/, split(/\n/, `pbsnodes -a`))); # "1 1 1 1 1" fails due to file staging conflicts
+my $decode_nodes = "1 1 1 1 1 1 1 1 1 1 1 1 1 1 1";  # start 15 jobs
+my $pmem = "3g";
+my $disable_clean = 0;
+my %seen_weights;
+my $normalize;
+my $help = 0;
+my $epsilon = 0.0001;
+my $interval = 5;
+my $dryrun = 0;
+my $ranges;
+my $last_score = -10000000;
+my $restart = 0;
+my $metric = "ibm_bleu";
+my $dir;
+my $iniFile;
+my $weights;
+my $initialWeights;
+my $decoderOpt;
+
+# Process command-line options
+Getopt::Long::Configure("no_auto_abbrev");
+if (GetOptions(
+	"decoder=s" => \$decoderOpt,
+	"decode-nodes=s" => \$decode_nodes,
+	"dont-clean" => \$disable_clean,
+	"dry-run" => \$dryrun,
+	"epsilon" => \$epsilon,
+	"help" => \$help,
+	"interval" => \$interval,
+	"iteration=i" => \$iteration,
+	"local" => \$run_local,
+	"max-iterations=i" => \$max_iterations,
+	"mert-nodes=s" => \$mert_nodes,
+	"normalize=s" => \$normalize,
+	"pmem=s" => \$pmem,
+	"ranges=s" => \$ranges,
+	"rand-directions=i" => \$rand_directions,
+	"ref-files=s" => \$refFiles,
+	"metric=s" => \$metric,
+	"restart" => \$restart,
+	"source-file=s" => \$srcFile,
+	"weights=s" => \$initialWeights,
+	"workdir=s" => \$dir
+) == 0 || @ARGV!=1 || $help) {
+	print_help();
+	exit;
+}
+
+if ($metric =~ /^(combi|ter)$/i) {
+  $lines_per_mapper = 40;
+}
+
+($iniFile) = @ARGV;
+
+
+sub write_config;
+sub enseg;
+sub print_help;
+
+my $nodelist;
+my $host =`hostname`; chomp $host;
+my $bleu;
+my $interval_count = 0;
+my $logfile;
+my $projected_score;
+
+# used in sorting scores
+my $DIR_FLAG = '-r';
+if ($metric =~ /^ter$|^aer$/i) {
+  $DIR_FLAG = '';
+}
+
+my $refs_comma_sep = get_comma_sep_refs($refFiles);
+
+unless ($dir){ 
+	$dir = "vest";
+}
+unless ($dir =~ /^\//){  # convert relative path to absolute path
+	my $basedir = `pwd`;
+	chomp $basedir;
+	$dir = "$basedir/$dir";
+}
+if ($restart){
+	$iniFile = `ls $dir/*.ini`; chomp $iniFile;
+	unless (-e $iniFile){
+		die "ERROR: Could not find ini file in $dir to restart\n";
+	}
+	$logfile = "$dir/mert.log";
+	open(LOGFILE, ">>$logfile");
+	print LOGFILE "RESTARTING STOPPED OPTIMIZATION\n\n";
+
+	# figure out best weights so far and iteration number
+	open(LOG, "$dir/mert.log");
+	my $wi = 0;
+	while (my $line = <LOG>){
+		chomp $line;
+		if ($line =~ /ITERATION (\d+)/) {
+			$iteration = $1;
+		}
+	}
+
+	$iteration = $wi + 1;
+}
+
+if ($decoderOpt){ $decoder = $decoderOpt; }
+
+
+# Initializations and helper functions
+srand;
+
+my @childpids = ();
+my @cleanupcmds = ();
+
+sub cleanup {
+	print STDERR "Cleanup...\n";
+	for my $pid (@childpids){ `kill $pid`; }
+	for my $cmd (@cleanupcmds){`$cmd`; }
+	exit 1;
+};
+$SIG{INT} = "cleanup";
+$SIG{TERM} = "cleanup"; 
+$SIG{HUP} = "cleanup";
+
+my $decoderBase = `basename $decoder`; chomp $decoderBase;
+my $newIniFile = "$dir/$decoderBase.ini";
+my $parallelize = '/chomes/redpony/svn-trunk/sa-utils/parallelize.pl';
+my $inputFileName = "$dir/input";
+my $user = $ENV{"USER"};
+
+# process ini file
+-e $iniFile || die "Error: could not open $iniFile for reading\n";
+open(INI, $iniFile);
+
+if ($dryrun){
+	write_config(*STDERR);
+	exit 0;
+} else {
+	if (-e $dir){
+		unless($restart){
+			die "ERROR: working dir $dir already exists\n\n";
+		}
+	} else {
+		mkdir $dir;
+		mkdir "$dir/hgs";
+		unless (-e $initialWeights) {
+			print STDERR "Please specify an initial weights file with --initial-weights\n";
+			print_help();
+			exit;
+		}
+		`cp $initialWeights $dir/weights.0`;
+		die "Can't find weights.0" unless (-e "$dir/weights.0");
+	}
+	unless($restart){
+		$logfile = "$dir/mert.log";
+		open(LOGFILE, ">$logfile");
+	}
+	write_config(*LOGFILE);
+}
+
+
+# Generate initial files and values
+unless ($restart){ `cp $iniFile $newIniFile`; }
+$iniFile = $newIniFile;
+
+my $newsrc = "$dir/dev.input";
+unless($restart){ enseg($srcFile, $newsrc); }
+$srcFile = $newsrc;
+my $devSize = 0;
+open F, "<$srcFile" or die "Can't read $srcFile: $!";
+while(<F>) { $devSize++; }
+close F;
+
+unless($best_weights){ $best_weights = $weights; }
+unless($projected_score){ $projected_score = 0.0; }
+$seen_weights{$weights} = 1;
+
+my $random_seed = int(time / 1000);
+my $lastWeightsFile;
+my $lastPScore = 0;
+# main optimization loop
+while (1){
+	print LOGFILE "\n\nITERATION $iteration\n==========\n";
+
+	# iteration-specific files
+	my $runFile="$dir/run.raw.$iteration";
+	my $onebestFile="$dir/1best.$iteration";
+	my $logdir="$dir/logs.$iteration";
+	my $decoderLog="$logdir/decoder.sentserver.log.$iteration";
+	my $scorerLog="$logdir/scorer.log.$iteration";
+	`mkdir -p $logdir`;
+
+	#decode
+	print LOGFILE "DECODE\n";
+	print LOGFILE `date`;
+	my $im1 = $iteration - 1;
+	my $weightsFile="$dir/weights.$im1";
+	my $decoder_cmd = "$decoder -c $iniFile -w $weightsFile -O $dir/hgs";
+	my $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -n \"$decode_nodes\" -- ";
+        if ($run_local) { $pcmd = "cat $srcFile |"; }
+        my $cmd = $pcmd . "$decoder_cmd 2> $decoderLog 1> $runFile";
+	print LOGFILE "COMMAND:\n$cmd\n";
+	my $result = 0;
+	$result = system($cmd);
+	unless ($result == 0){
+		cleanup();
+		print LOGFILE "ERROR: Parallel decoder returned non-zero exit code $result\n";
+		die;
+	}
+	my $dec_score = `cat $runFile | $SCORER $refs_comma_sep -l $metric`;
+	chomp $dec_score;
+	print LOGFILE "DECODER SCORE: $dec_score\n";
+
+	# save space
+	`gzip $runFile`;
+	`gzip $decoderLog`;
+
+	if ($iteration > $max_iterations){
+		print LOGFILE "\nREACHED STOPPING CRITERION: Maximum iterations\n";
+		last;
+	}
+	
+	# run optimizer
+	print LOGFILE "\nUNION FORESTS\n";
+	print LOGFILE `date`;
+	my $mergeLog="$logdir/prune-merge.log.$iteration";
+	if ($DISCARD_FORESTS) {
+		`rm -f $dir/hgs/*gz`;
+	}
+
+	my $score = 0;
+	my $icc = 0;
+	my $inweights="$dir/weights.$im1";
+	for (my $opt_iter=1; $opt_iter<$optimization_iters; $opt_iter++) {
+		print LOGFILE "\nGENERATE OPTIMIZATION STRATEGY (OPT-ITERATION $opt_iter/$optimization_iters)\n";
+		print LOGFILE `date`;
+		$icc++;
+		$cmd="$MAPINPUT -w $inweights -r $dir/hgs -s $devSize -d $rand_directions > $dir/agenda.$im1-$opt_iter";
+		print LOGFILE "COMMAND:\n$cmd\n";
+		$result = system($cmd);
+		unless ($result == 0){
+			cleanup();
+			print LOGFILE "ERROR: mapinput command returned non-zero exit code $result\n";
+			die;
+		}
+
+	        `mkdir $dir/splag.$im1`;
+		$cmd="split -a 3 -l $lines_per_mapper $dir/agenda.$im1-$opt_iter $dir/splag.$im1/mapinput.";
+		print LOGFILE "COMMAND:\n$cmd\n";
+		$result = system($cmd);
+		unless ($result == 0){
+			cleanup();
+			print LOGFILE "ERROR: split command returned non-zero exit code $result\n";
+			die;
+		}
+		opendir(DIR, "$dir/splag.$im1") or die "Can't open directory: $!";
+		my @shards = grep { /^mapinput\./ } readdir(DIR);
+		closedir DIR;
+		die "No shards!" unless scalar @shards > 0;
+		my $joblist = "";
+		my $nmappers = 0;
+		my @mapoutputs = ();
+		@cleanupcmds = ();
+		my %o2i = ();
+		my $first_shard = 1;
+		for my $shard (@shards) {
+			my $mapoutput = $shard;
+			my $client_name = $shard;
+			$client_name =~ s/mapinput.//;
+			$client_name = "vest.$client_name";
+			$mapoutput =~ s/mapinput/mapoutput/;
+			push @mapoutputs, "$dir/splag.$im1/$mapoutput";
+			$o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard";
+			my $script = "$MAPPER -s $srcFile -l $metric $refs_comma_sep < $dir/splag.$im1/$shard | sort -t \$'\\t' -k 1 > $dir/splag.$im1/$mapoutput";
+			if ($run_local) {
+				print LOGFILE "COMMAND:\n$script\n";
+				$result = system($script);
+				unless ($result == 0){
+					cleanup();
+					print LOGFILE "ERROR: mapper returned non-zero exit code $result\n";
+					die;
+				}
+			} else {
+				my $todo = "qsub -q batch -l pmem=3000mb,walltime=5:00:00 -N $client_name -o /dev/null -e $logdir/$client_name.ER";
+				local(*QOUT, *QIN);
+				open2(\*QOUT, \*QIN, $todo);
+				print QIN $script;
+				if ($first_shard) { print LOGFILE "$script\n"; $first_shard=0; }
+				close QIN;
+				$nmappers++;
+				while (my $jobid=<QOUT>){
+	  				chomp $jobid;
+		  			push(@cleanupcmds, "`qdel $jobid 2> /dev/null`");
+					$jobid =~ s/^(\d+)(.*?)$/\1/g;
+					print STDERR "short job id $jobid\n";
+					if ($joblist == "") { $joblist = $jobid; }
+					else {$joblist = $joblist . "\|" . $jobid; }
+				}
+				close QOUT;
+			}
+		}
+		if ($run_local) {
+		} else {
+			print LOGFILE "Launched $nmappers mappers.\n";
+			print LOGFILE "Waiting for mappers to complete...\n";
+			while ($nmappers > 0) {
+			  sleep 5;
+			  my @livejobs = grep(/$joblist/, split(/\n/, `qstat`));
+			  $nmappers = scalar @livejobs;
+			}
+			print LOGFILE "All mappers complete.\n";
+		}
+		my $tol = 0;
+		my $til = 0;
+		for my $mo (@mapoutputs) {
+		  my $olines = get_lines($mo);
+		  my $ilines = get_lines($o2i{$mo});
+		  $tol += $olines;
+		  $til += $ilines;
+		  die "$mo: output lines ($olines) doesn't match input lines ($ilines)" unless $olines==$ilines;
+		}
+		print LOGFILE "Results for $tol/$til lines\n";
+		print LOGFILE "\nSORTING AND RUNNING FMERT REDUCER\n";
+		print LOGFILE `date`;
+		$cmd="sort -t \$'\\t' -k 1 @mapoutputs | $REDUCER -l $metric > $dir/redoutput.$im1";
+		print LOGFILE "COMMAND:\n$cmd\n";
+		$result = system($cmd);
+		unless ($result == 0){
+			cleanup();
+			print LOGFILE "ERROR: reducer command returned non-zero exit code $result\n";
+			die;
+		}
+		$cmd="sort -nk3 $DIR_FLAG '-t|' $dir/redoutput.$im1 | head -1";
+		my $best=`$cmd`; chomp $best;
+		print LOGFILE "$best\n";
+		my ($oa, $x, $xscore) = split /\|/, $best;
+		$score = $xscore;
+		print LOGFILE "PROJECTED SCORE: $score\n";
+		if (abs($x) < $epsilon) {
+			print LOGFILE "\nOPTIMIZER: no score improvement: abs($x) < $epsilon\n";
+			last;
+		}
+                my $psd = $score - $last_score;
+                $last_score = $score;
+		if (abs($psd) < $epsilon) {
+			print LOGFILE "\nOPTIMIZER: no score improvement: abs($psd) < $epsilon\n";
+			last;
+		}
+		my ($origin, $axis) = split /\s+/, $oa;
+
+		my %ori = convert($origin);
+		my %axi = convert($axis);
+
+		my $finalFile="$dir/weights.$im1-$opt_iter";
+		open W, ">$finalFile" or die "Can't write: $finalFile: $!";
+                my $norm = 0;
+		for my $k (sort keys %ori) {
+			my $dd = $ori{$k} + $axi{$k} * $x;
+                        $norm += $dd * $dd;
+		}
+                $norm = sqrt($norm);
+		$norm = 1;
+		for my $k (sort keys %ori) {
+			my $v = ($ori{$k} + $axi{$k} * $x) / $norm;
+			print W "$k $v\n";
+		}
+		`rm -rf $dir/splag.$im1`;
+		$inweights = $finalFile;
+	}
+	$lastWeightsFile = "$dir/weights.$iteration";
+	`cp $inweights $lastWeightsFile`;
+	if ($icc < 2) {
+		print LOGFILE "\nREACHED STOPPING CRITERION: score change too little\n";
+		last;
+	}
+	$lastPScore = $score;
+	$iteration++;
+	print LOGFILE "\n==========\n";
+}
+
+print LOGFILE "\nFINAL WEIGHTS: $dir/$lastWeightsFile\n(Use -w <this file> with hiero)\n\n";
+
+sub normalize_weights {
+  my ($rfn, $rpts, $feat) = @_;
+  my @feat_names = @$rfn;
+  my @pts = @$rpts;
+  my $z = 1.0;
+  for (my $i=0; $i < scalar @feat_names; $i++) {
+    if ($feat_names[$i] eq $feat) {
+      $z = $pts[$i];
+      last;
+    }
+  }
+  for (my $i=0; $i < scalar @feat_names; $i++) {
+    $pts[$i] /= $z;
+  }
+  print LOGFILE " NORM WEIGHTS: @pts\n";
+  return @pts;
+}
+
+sub get_lines {
+  my $fn = shift @_;
+  open FL, "<$fn" or die "Couldn't read $fn: $!";
+  my $lc = 0;
+  while(<FL>) { $lc++; }
+  return $lc;
+}
+
+sub get_comma_sep_refs {
+  my ($p) = @_;
+  my $o = `echo $p`;
+  chomp $o;
+  my @files = split /\s+/, $o;
+  return "-r " . join(' -r ', @files);
+}
+
+sub read_weights_file {
+  my ($file) = @_;
+  open F, "<$file" or die "Couldn't read $file: $!";
+  my @r = ();
+  my $pm = -1;
+  while(<F>) {
+    next if /^#/;
+    next if /^\s*$/;
+    chomp;
+    if (/^(.+)\s+(.+)$/) {
+      my $m = $1;
+      my $w = $2;
+      die "Weights out of order: $m <= $pm" unless $m > $pm;
+      push @r, $w;
+    } else {
+      warn "Unexpected feature name in weight file: $_";
+    }
+  }
+  close F;
+  return join ' ', @r;
+}
+
+# subs
+sub write_config {
+	my $fh = shift;
+	my $cleanup = "yes";
+	if ($disable_clean) {$cleanup = "no";}
+
+	print $fh "\n";
+	print $fh "DECODER:          $decoder\n";
+	print $fh "INI FILE:         $iniFile\n";
+	print $fh "WORKING DIR:      $dir\n";
+	print $fh "SOURCE (DEV):     $srcFile\n";
+	print $fh "REFS (DEV):       $refFiles\n";
+	print $fh "EVAL METRIC:      $metric\n";
+	print $fh "START ITERATION:  $iteration\n";
+	print $fh "MAX ITERATIONS:   $max_iterations\n";
+	print $fh "MERT NODES:       $mert_nodes\n";
+	print $fh "DECODE NODES:     $decode_nodes\n";
+	print $fh "HEAD NODE:        $host\n";
+	print $fh "PMEM (DECODING):  $pmem\n";
+	print $fh "CLEANUP:          $cleanup\n";
+	print $fh "INITIAL WEIGHTS:  $initialWeights\n";
+
+	if ($restart){
+		print $fh "PROJECTED BLEU:   $projected_score\n";
+		print $fh "BEST WEIGHTS:     $best_weights\n";
+	}
+}
+
+sub update_weights_file {
+  my ($neww, $rfn, $rpts) = @_;
+  my @feats = @$rfn;
+  my @pts = @$rpts;
+  my $num_feats = scalar @feats;
+  my $num_pts = scalar @pts;
+  die "$num_feats (num_feats) != $num_pts (num_pts)" unless $num_feats == $num_pts;
+  open G, ">$neww" or die;
+  for (my $i = 0; $i < $num_feats; $i++) {
+    my $f = $feats[$i];
+    my $lambda = $pts[$i];
+    print G "$f $lambda\n";
+  }
+  close G;
+}
+
+sub enseg {
+	my $src = shift;
+	my $newsrc = shift;
+	open(SRC, $src);
+	open(NEWSRC, ">$newsrc");
+	my $i=0;
+	while (my $line=<SRC>){
+		chomp $line;
+		print NEWSRC "<seg id=\"$i\">$line</seg>\n";
+		$i++;
+	}
+	close SRC;
+	close NEWSRC;
+}
+
+sub print_help {
+
+	my $executable = `basename $0`; chomp $executable;
+    print << "Help";
+
+Usage: $executable [options] <ini file>
+       $executable --restart <work dir>
+
+	$executable [options] <ini file> 
+		Runs a complete MERT optimization and test set decoding, using 
+		the decoder configuration in ini file.  Note that many of the
+		options have default values that are inferred automatically 
+		based on certain conventions.  For details, refer to descriptions
+		of the options --decoder, --weights, and --workdir. 
+
+	$executable --restart <work dir>
+		Continues an optimization run that was stopped for some reason,
+		using configuration information found in the working directory
+		left behind by the stopped run.
+
+Options:
+
+	--local
+		Run the decoder and optimizer locally.
+
+	--decoder <decoder path>
+		Decoder binary to use.
+
+	--help
+		Print this message and exit.
+
+	--iteration <I> 
+		Starting iteration number.  If not specified, defaults to 1.
+
+	--max-iterations <M> 
+		Maximum number of iterations to run.  If not specified, defaults
+		to 10.
+
+	--pmem <N>
+		Amount of physical memory requested for parallel decoding jobs.
+
+	--ref-files <files> 
+		Dev set ref files.  This option takes only a single string argument. 
+		To use multiple files (including file globbing), this argument should 
+		be quoted.
+
+	--metric <method>
+		Metric to optimize.
+		Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi
+
+	--normalize <feature-name>
+		After each iteration, rescale all feature weights such that feature-
+		name has a weight of 1.0.
+
+	--rand-directions <num>
+		MERT will attempt to optimize along all of the principle directions,
+		set this parameter to explore other directions. Defaults to 5.
+
+	--source-file <file> 
+		Dev set source file.
+
+	--weights <file> 
+		A file specifying initial feature weights.  The format is
+		FeatureName_1 value1
+		FeatureName_2 value2
+
+	--workdir <dir> 
+		Directory for intermediate and output files.  If not specified, the
+		name is derived from the ini filename.  Assuming that the ini 
+		filename begins with the decoder name and ends with ini, the default
+		name of the working directory is inferred from the middle part of 
+		the filename.  E.g. an ini file named decoder.foo.ini would have
+		a default working directory name foo.
+
+Help
+}
+
+sub convert {
+  my ($str) = @_;
+  my @ps = split /;/, $str;
+  my %dict = ();
+  for my $p (@ps) {
+    my ($k, $v) = split /=/, $p;
+    $dict{$k} = $v;
+  }
+  return %dict;
+}
+
+
diff --git a/vest/error_surface.cc b/vest/error_surface.cc
new file mode 100644
index 00000000..4e0af35c
--- /dev/null
+++ b/vest/error_surface.cc
@@ -0,0 +1,46 @@
+#include "error_surface.h"
+
+#include <cassert>
+#include <sstream>
+
+using namespace std;
+
+ErrorSurface::~ErrorSurface() {
+  for (ErrorSurface::iterator i = begin(); i != end(); ++i)
+    //delete i->delta;
+    ;
+}
+
+void ErrorSurface::Serialize(std::string* out) const {
+  const int segments = this->size();
+  ostringstream os(ios::binary);
+  os.write((const char*)&segments,sizeof(segments));
+  for (int i = 0; i < segments; ++i) {
+    const ErrorSegment& cur = (*this)[i];
+    string senc;
+    cur.delta->Encode(&senc);
+    assert(senc.size() < 256);
+    unsigned char len = senc.size();
+    os.write((const char*)&cur.x, sizeof(cur.x));
+    os.write((const char*)&len, sizeof(len));
+    os.write((const char*)&senc[0], len);
+  }
+  *out = os.str();
+}
+
+void ErrorSurface::Deserialize(ScoreType type, const std::string& in) {
+  istringstream is(in, ios::binary); 
+  int segments;
+  is.read((char*)&segments, sizeof(segments));
+  this->resize(segments);
+  for (int i = 0; i < segments; ++i) {
+    ErrorSegment& cur = (*this)[i];
+    unsigned char len;
+    is.read((char*)&cur.x, sizeof(cur.x));
+    is.read((char*)&len, sizeof(len));
+    string senc(len, '\0'); assert(senc.size() == len);
+    is.read((char*)&senc[0], len);
+    cur.delta = SentenceScorer::CreateScoreFromString(type, senc);
+  }
+}
+
diff --git a/vest/error_surface.h b/vest/error_surface.h
new file mode 100644
index 00000000..a8734f54
--- /dev/null
+++ b/vest/error_surface.h
@@ -0,0 +1,24 @@
+#ifndef _ERROR_SURFACE_H_
+#define _ERROR_SURFACE_H_
+
+#include <vector>
+#include <string>
+
+#include "scorer.h"
+
+class Score;
+
+struct ErrorSegment {
+  double x;
+  Score* delta;
+  ErrorSegment() : x(0), delta(NULL) {}
+};
+
+class ErrorSurface : public std::vector<ErrorSegment> {
+ public:
+  ~ErrorSurface();
+  void Serialize(std::string* out) const;
+  void Deserialize(ScoreType type, const std::string& in);
+};
+
+#endif
diff --git a/vest/fast_score.cc b/vest/fast_score.cc
new file mode 100644
index 00000000..cf743b4f
--- /dev/null
+++ b/vest/fast_score.cc
@@ -0,0 +1,74 @@
+#include <iostream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "filelib.h"
+#include "tdict.h"
+#include "scorer.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation(s) (tokenized text file)")
+        ("loss_function,l",po::value<string>()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)")
+        ("in_file,i", po::value<string>()->default_value("-"), "Input file")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = false;
+  if (!conf->count("reference")) {
+    cerr << "Please specify one or more references using -r <REF1.TXT> -r <REF2.TXT> ...\n";
+    flag = true;
+  }
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  const string loss_function = conf["loss_function"].as<string>();
+  ScoreType type = ScoreTypeFromString(loss_function);
+  DocScorer ds(type, conf["reference"].as<vector<string> >(), "");
+  cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl;
+
+  ReadFile rf(conf["in_file"].as<string>());
+  Score* acc = NULL;
+  istream& in = *rf.stream();
+  int lc = 0;
+  while(in) {
+    string line;
+    getline(in, line);
+    if (line.empty()) continue;
+    vector<WordID> sent;
+    TD::ConvertSentence(line, &sent);
+    Score* sentscore = ds[lc]->ScoreCandidate(sent);
+    if (!acc) { acc = sentscore->GetZero(); }
+    acc->PlusEquals(*sentscore);
+    delete sentscore;
+    ++lc;
+  }
+  assert(lc > 0);
+  if (lc > ds.size()) {
+    cerr << "Too many (" << lc << ") translations in input, expected " << ds.size() << endl;
+    return 1;
+  }
+  if (lc != ds.size())
+    cerr << "Fewer sentences in hyp (" << lc << ") than refs ("
+         << ds.size() << "): scoring partial set!\n";
+  float score = acc->ComputeScore();
+  string details;
+  acc->ScoreDetails(&details);
+  delete acc;
+  cerr << details << endl;
+  cout << score << endl;
+  return 0;
+}
diff --git a/vest/line_optimizer.cc b/vest/line_optimizer.cc
new file mode 100644
index 00000000..b6410c35
--- /dev/null
+++ b/vest/line_optimizer.cc
@@ -0,0 +1,103 @@
+#include "line_optimizer.h"
+
+#include <limits>
+#include <algorithm>
+
+#include "sparse_vector.h"
+#include "scorer.h"
+
+using namespace std;
+
+typedef ErrorSurface::const_iterator ErrorIter;
+
+// sort by increasing x-ints
+struct IntervalComp {
+  bool operator() (const ErrorIter& a, const ErrorIter& b) const {
+    return a->x < b->x;
+  }
+};	
+
+double LineOptimizer::LineOptimize(
+    const vector<ErrorSurface>& surfaces,
+    const LineOptimizer::ScoreType type,
+    float* best_score,
+    const double epsilon) {
+  // cerr << "MIN=" << MINIMIZE_SCORE << " MAX=" << MAXIMIZE_SCORE << "  MINE=" << type << endl;
+  vector<ErrorIter> all_ints;
+  for (vector<ErrorSurface>::const_iterator i = surfaces.begin();
+       i != surfaces.end(); ++i) {
+    const ErrorSurface& surface = *i;
+    for (ErrorIter j = surface.begin(); j != surface.end(); ++j)
+      all_ints.push_back(j);
+  }
+  sort(all_ints.begin(), all_ints.end(), IntervalComp());
+  double last_boundary = all_ints.front()->x;
+  Score* acc = all_ints.front()->delta->GetZero();
+  float& cur_best_score = *best_score;
+  cur_best_score = (type == MAXIMIZE_SCORE ?
+    -numeric_limits<float>::max() : numeric_limits<float>::max());
+  bool left_edge = true;
+  double pos = numeric_limits<double>::quiet_NaN();
+  for (vector<ErrorIter>::iterator i = all_ints.begin();
+       i != all_ints.end(); ++i) {
+    const ErrorSegment& seg = **i;
+    assert(seg.delta);
+    if (seg.x - last_boundary > epsilon) {
+      float sco = acc->ComputeScore();
+      if ((type == MAXIMIZE_SCORE && sco > cur_best_score) ||
+          (type == MINIMIZE_SCORE && sco < cur_best_score) ) {
+        cur_best_score = sco;
+	if (left_edge) {
+	  pos = seg.x - 0.1;
+	  left_edge = false;
+	} else {
+	  pos = last_boundary + (seg.x - last_boundary) / 2;
+	}
+	// cerr << "NEW BEST: " << pos << "  (score=" << cur_best_score << ")\n";
+      }
+      // string xx; acc->ScoreDetails(&xx); cerr << "---- " << xx;
+      // cerr << "---- s=" << sco << "\n";
+      last_boundary = seg.x;
+    }
+    // cerr << "x-boundary=" << seg.x << "\n";
+    acc->PlusEquals(*seg.delta);
+  }
+  float sco = acc->ComputeScore();
+  if ((type == MAXIMIZE_SCORE && sco > cur_best_score) ||
+      (type == MINIMIZE_SCORE && sco < cur_best_score) ) {
+    cur_best_score = sco;
+    if (left_edge) {
+      pos = 0;
+    } else {
+      pos = last_boundary + 1000.0;
+    }
+  }
+  delete acc;
+  return pos;
+}
+
+void LineOptimizer::RandomUnitVector(const vector<int>& features_to_optimize,
+                                     SparseVector<double>* axis,
+                                     RandomNumberGenerator<boost::mt19937>* rng) {
+  axis->clear();
+  for (int i = 0; i < features_to_optimize.size(); ++i)
+    axis->set_value(features_to_optimize[i], rng->next() - 0.5);
+  (*axis) /= axis->l2norm();
+}
+
+void LineOptimizer::CreateOptimizationDirections(
+     const vector<int>& features_to_optimize,
+     int additional_random_directions,
+     RandomNumberGenerator<boost::mt19937>* rng,
+     vector<SparseVector<double> >* dirs) {
+  const int num_directions = features_to_optimize.size() + additional_random_directions;
+  dirs->resize(num_directions);
+  for (int i = 0; i < num_directions; ++i) {
+    SparseVector<double>& axis = (*dirs)[i];
+    if (i < features_to_optimize.size())
+      axis.set_value(features_to_optimize[i], 1.0);
+    else
+      RandomUnitVector(features_to_optimize, &axis, rng);
+  }
+  cerr << "Generated " << num_directions << " total axes to optimize along.\n";
+}
diff --git a/vest/line_optimizer.h b/vest/line_optimizer.h
new file mode 100644
index 00000000..43164360
--- /dev/null
+++ b/vest/line_optimizer.h
@@ -0,0 +1,44 @@
+#ifndef LINE_OPTIMIZER_H_
+#define LINE_OPTIMIZER_H_
+
+#include <vector>
+
+#include "error_surface.h"
+#include "sampler.h"
+
+template <typename T> class SparseVector;
+class Weights;
+
+struct LineOptimizer {
+
+  // use MINIMIZE_SCORE for things like TER, WER
+  // MAXIMIZE_SCORE for things like BLEU
+  enum ScoreType { MAXIMIZE_SCORE, MINIMIZE_SCORE };
+
+  // merge all the error surfaces together into a global
+  // error surface and find (the middle of) the best segment
+  static double LineOptimize(
+     const std::vector<ErrorSurface>& envs,
+     const LineOptimizer::ScoreType type,
+     float* best_score,
+     const double epsilon = 1.0/65536.0);
+
+  // return a random vector of length 1 where all dimensions
+  // not listed in dimensions will be 0.
+  static void RandomUnitVector(const std::vector<int>& dimensions,
+                               SparseVector<double>* axis,
+                               RandomNumberGenerator<boost::mt19937>* rng);
+
+  // generate a list of directions to optimize; the list will
+  // contain the orthogonal vectors corresponding to the dimensions in
+  // primary and then additional_random_directions directions in those
+  // dimensions as well.  All vectors will be length 1.
+  static void CreateOptimizationDirections(
+     const std::vector<int>& primary,
+     int additional_random_directions,
+     RandomNumberGenerator<boost::mt19937>* rng,
+     std::vector<SparseVector<double> >* dirs);
+
+};
+
+#endif
diff --git a/vest/lo_test.cc b/vest/lo_test.cc
new file mode 100644
index 00000000..f937daac
--- /dev/null
+++ b/vest/lo_test.cc
@@ -0,0 +1,201 @@
+#include <cmath>
+#include <iostream>
+#include <fstream>
+
+#include <boost/shared_ptr.hpp>
+#include <gtest/gtest.h>
+
+#include "fdict.h"
+#include "hg.h"
+#include "kbest.h"
+#include "hg_io.h"
+#include "filelib.h"
+#include "inside_outside.h"
+#include "viterbi.h"
+#include "viterbi_envelope.h"
+#include "line_optimizer.h"
+#include "scorer.h"
+
+using namespace std;
+using boost::shared_ptr;
+
+class OptTest : public testing::Test {
+ protected:
+   virtual void SetUp() { }
+   virtual void TearDown() { }
+};
+
+const char* ref11 = "australia reopens embassy in manila";
+const char* ref12 = "( afp , manila , january 2 ) australia reopened its embassy in the philippines today , which was shut down about seven weeks ago due to what was described as a specific threat of a terrorist attack .";
+const char* ref21 = "australia reopened manila embassy";
+const char* ref22 = "( agence france-presse , manila , 2nd ) - australia reopened its embassy in the philippines today . the embassy was closed seven weeks ago after what was described as a specific threat of a terrorist attack .";
+const char* ref31 = "australia to reopen embassy in manila";
+const char* ref32 = "( afp report from manila , january 2 ) australia reopened its embassy in the philippines today . seven weeks ago , the embassy was shut down due to so - called confirmed terrorist attack threats .";
+const char* ref41 = "australia to re - open its embassy to manila";
+const char* ref42 = "( afp , manila , thursday ) australia reopens its embassy to manila , which was closed for the so - called \" clear \" threat of terrorist attack 7 weeks ago .";
+
+TEST_F(OptTest, TestCheckNaN) {
+  double x = 0;
+  double y = 0;
+  double z = x / y;
+  EXPECT_EQ(true, isnan(z));
+}
+
+TEST_F(OptTest,TestViterbiEnvelope) {
+  shared_ptr<Segment> a1(new Segment(-1, 0));
+  shared_ptr<Segment> b1(new Segment(1, 0));
+  shared_ptr<Segment> a2(new Segment(-1, 1));
+  shared_ptr<Segment> b2(new Segment(1, -1));
+  vector<shared_ptr<Segment> > sa; sa.push_back(a1); sa.push_back(b1);
+  vector<shared_ptr<Segment> > sb; sb.push_back(a2); sb.push_back(b2);
+  ViterbiEnvelope a(sa);
+  cerr << a << endl;
+  ViterbiEnvelope b(sb);
+  ViterbiEnvelope c = a;
+  c *= b;
+  cerr << a << " (*) " << b << " = " << c << endl;
+  EXPECT_EQ(3, c.size());
+}
+
+TEST_F(OptTest,TestViterbiEnvelopeInside) {
+  const string json = "{\"rules\":[1,\"[X] ||| a\",2,\"[X] ||| A [1]\",3,\"[X] ||| c\",4,\"[X] ||| C [1]\",5,\"[X] ||| [1] B [2]\",6,\"[X] ||| [1] b [2]\",7,\"[X] ||| X [1]\",8,\"[X] ||| Z [1]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.8,1,-0.1],\"rule\":2}],\"node\":{\"in_edges\":[1]},\"edges\":[{\"tail\":[],\"feats\":[1,-1],\"rule\":3}],\"node\":{\"in_edges\":[2]},\"edges\":[{\"tail\":[2],\"feats\":[0,-0.2,1,-0.1],\"rule\":4}],\"node\":{\"in_edges\":[3]},\"edges\":[{\"tail\":[1,3],\"feats\":[0,-1.2,1,-0.2],\"rule\":5},{\"tail\":[1,3],\"feats\":[0,-0.5,1,-1.3],\"rule\":6}],\"node\":{\"in_edges\":[4,5]},\"edges\":[{\"tail\":[4],\"feats\":[0,-0.5,1,-0.8],\"rule\":7},{\"tail\":[4],\"feats\":[0,-0.7,1,-0.9],\"rule\":8}],\"node\":{\"in_edges\":[6,7]}}";
+  Hypergraph hg;
+  istringstream instr(json);
+  HypergraphIO::ReadFromJSON(&instr, &hg);
+  SparseVector<double> wts;
+  wts.set_value(FD::Convert("f1"), 0.4);
+  wts.set_value(FD::Convert("f2"), 1.0);
+  hg.Reweight(wts);
+  vector<pair<vector<WordID>, prob_t> > list;
+  std::vector<SparseVector<double> > features;
+  KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(hg, 10);
+  for (int i = 0; i < 10; ++i) {
+    const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
+      kbest.LazyKthBest(hg.nodes_.size() - 1, i);
+    if (!d) break;
+    cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl;
+  }
+  SparseVector<double> dir; dir.set_value(FD::Convert("f1"), 1.0);
+  ViterbiEnvelopeWeightFunction wf(wts, dir);
+  ViterbiEnvelope env = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
+  cerr << env << endl;
+  const vector<boost::shared_ptr<Segment> >& segs = env.GetSortedSegs();
+  dir *= segs[1]->x;
+  wts += dir;
+  hg.Reweight(wts);
+  KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest2(hg, 10);
+  for (int i = 0; i < 10; ++i) {
+    const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
+      kbest2.LazyKthBest(hg.nodes_.size() - 1, i);
+    if (!d) break;
+    cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl;
+  }
+  for (int i = 0; i < segs.size(); ++i) {
+    cerr << "seg=" << i << endl;
+    vector<WordID> trans;
+    segs[i]->ConstructTranslation(&trans);
+    cerr << TD::GetString(trans) << endl;
+  }
+}
+
+TEST_F(OptTest, TestS1) { 
+  int fPhraseModel_0 = FD::Convert("PhraseModel_0");
+  int fPhraseModel_1 = FD::Convert("PhraseModel_1");
+  int fPhraseModel_2 = FD::Convert("PhraseModel_2");
+  int fLanguageModel = FD::Convert("LanguageModel");
+  int fWordPenalty = FD::Convert("WordPenalty");
+  int fPassThrough = FD::Convert("PassThrough");
+  SparseVector<double> wts;
+  wts.set_value(fWordPenalty, 4.25);
+  wts.set_value(fLanguageModel, -1.1165);
+  wts.set_value(fPhraseModel_0, -0.96);
+  wts.set_value(fPhraseModel_1, -0.65);
+  wts.set_value(fPhraseModel_2, -0.77);
+  wts.set_value(fPassThrough, -10.0);
+
+  vector<int> to_optimize;
+  to_optimize.push_back(fWordPenalty);
+  to_optimize.push_back(fLanguageModel);
+  to_optimize.push_back(fPhraseModel_0);
+  to_optimize.push_back(fPhraseModel_1);
+  to_optimize.push_back(fPhraseModel_2);
+
+  Hypergraph hg;
+  ReadFile rf("./test_data/0.json.gz");
+  HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+  hg.Reweight(wts);
+
+  Hypergraph hg2;
+  ReadFile rf2("./test_data/1.json.gz");
+  HypergraphIO::ReadFromJSON(rf2.stream(), &hg2);
+  hg2.Reweight(wts);
+
+  vector<vector<WordID> > refs1(4);
+  TD::ConvertSentence(ref11, &refs1[0]);
+  TD::ConvertSentence(ref21, &refs1[1]);
+  TD::ConvertSentence(ref31, &refs1[2]);
+  TD::ConvertSentence(ref41, &refs1[3]);
+  vector<vector<WordID> > refs2(4);
+  TD::ConvertSentence(ref12, &refs2[0]);
+  TD::ConvertSentence(ref22, &refs2[1]);
+  TD::ConvertSentence(ref32, &refs2[2]);
+  TD::ConvertSentence(ref42, &refs2[3]);
+  ScoreType type = ScoreTypeFromString("ibm_bleu");
+  SentenceScorer* scorer1 = SentenceScorer::CreateSentenceScorer(type, refs1);
+  SentenceScorer* scorer2 = SentenceScorer::CreateSentenceScorer(type, refs2);
+  vector<ViterbiEnvelope> envs(2);
+  
+  RandomNumberGenerator<boost::mt19937> rng;
+
+  vector<SparseVector<double> > axes;
+  LineOptimizer::CreateOptimizationDirections(
+     to_optimize,
+     10,
+     &rng,
+     &axes);
+  assert(axes.size() == 10 + to_optimize.size());
+  for (int i = 0; i < axes.size(); ++i)
+    cerr << axes[i] << endl;
+  const SparseVector<double>& axis = axes[0];
+
+  cerr << "Computing Viterbi envelope using inside algorithm...\n";
+  cerr << "axis: " << axis << endl;
+  clock_t t_start=clock();
+  ViterbiEnvelopeWeightFunction wf(wts, axis);
+  envs[0] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
+  envs[1] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg2, NULL, wf);
+
+  vector<ErrorSurface> es(2);
+  scorer1->ComputeErrorSurface(envs[0], &es[0], IBM_BLEU, hg);
+  scorer2->ComputeErrorSurface(envs[1], &es[1], IBM_BLEU, hg2);
+  cerr << envs[0].size() << " " << envs[1].size() << endl;
+  cerr << es[0].size() << " " << es[1].size() << endl;
+  envs.clear();
+  clock_t t_env=clock();
+  float score;
+  double m = LineOptimizer::LineOptimize(es, LineOptimizer::MAXIMIZE_SCORE, &score);
+  clock_t t_opt=clock();
+  cerr << "line optimizer returned: " << m << " (SCORE=" << score << ")\n";
+  EXPECT_FLOAT_EQ(0.48719698, score);
+  SparseVector<double> res = axis;
+  res *= m;
+  res += wts;
+  cerr << "res: " << res << endl;
+  cerr << "ENVELOPE PROCESSING=" << (static_cast<double>(t_env - t_start) / 1000.0) << endl;
+  cerr << "  LINE OPTIMIZATION=" << (static_cast<double>(t_opt - t_env) / 1000.0) << endl;
+  hg.Reweight(res);
+  hg2.Reweight(res);
+  vector<WordID> t1,t2;
+  ViterbiESentence(hg, &t1);
+  ViterbiESentence(hg2, &t2);
+  cerr << TD::GetString(t1) << endl;
+  cerr << TD::GetString(t2) << endl;
+  delete scorer1;
+  delete scorer2;
+}
+
+int main(int argc, char **argv) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
+
diff --git a/vest/mbr_kbest.cc b/vest/mbr_kbest.cc
new file mode 100644
index 00000000..5d70b4e2
--- /dev/null
+++ b/vest/mbr_kbest.cc
@@ -0,0 +1,140 @@
+#include <iostream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+
+#include "prob.h"
+#include "tdict.h"
+#include "scorer.h"
+#include "filelib.h"
+#include "stringlib.h"
+
+using namespace std;
+
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("scale,a",po::value<double>()->default_value(1.0), "Posterior scaling factor (alpha)")
+        ("loss_function,l",po::value<string>()->default_value("bleu"), "Loss function")
+        ("input,i",po::value<string>()->default_value("-"), "File to read k-best lists from")
+        ("output_list,L", "Show reranked list as output")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = false;
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+struct LossComparer {
+  bool operator()(const pair<vector<WordID>, double>& a, const pair<vector<WordID>, double>& b) const {
+    return a.second < b.second;
+  }
+};
+
+bool ReadKBestList(istream* in, string* sent_id, vector<pair<vector<WordID>, prob_t> >* list) {
+  static string cache_id;
+  static pair<vector<WordID>, prob_t> cache_pair;
+  list->clear();
+  string cur_id;
+  if (cache_pair.first.size() > 0) {
+    list->push_back(cache_pair);
+    cur_id = cache_id;
+    cache_pair.first.clear();
+  }
+  string line;
+  string tstr;
+  while(*in) {
+    getline(*in, line);
+    if (line.empty()) continue;
+    size_t p1 = line.find(" ||| ");
+    if (p1 == string::npos) { cerr << "Bad format: " << line << endl; abort(); }
+    size_t p2 = line.find(" ||| ", p1 + 4);
+    if (p2 == string::npos) { cerr << "Bad format: " << line << endl; abort(); }
+    size_t p3 = line.rfind(" ||| ");
+    cache_id = line.substr(0, p1);
+    tstr = line.substr(p1 + 5, p2 - p1 - 5);
+    double val = strtod(line.substr(p3 + 5).c_str(), NULL);
+    TD::ConvertSentence(tstr, &cache_pair.first);
+    cache_pair.second.logeq(val);
+    if (cur_id.empty()) cur_id = cache_id;
+    if (cur_id == cache_id) {
+      list->push_back(cache_pair);
+      *sent_id = cur_id;
+      cache_pair.first.clear();
+    } else { break; }
+  }
+  return !list->empty();
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  const string metric = conf["loss_function"].as<string>();
+  const bool output_list = conf.count("output_list") > 0;
+  const string file = conf["input"].as<string>();
+  const double mbr_scale = conf["scale"].as<double>();
+  cerr << "Posterior scaling factor (alpha) = " << mbr_scale << endl;
+
+  ScoreType type = ScoreTypeFromString(metric);
+  vector<pair<vector<WordID>, prob_t> > list;
+  ReadFile rf(file);
+  string sent_id;
+  while(ReadKBestList(rf.stream(), &sent_id, &list)) {
+    vector<prob_t> joints(list.size());
+    const prob_t max_score = pow(list.front().second, mbr_scale);
+    prob_t marginal = prob_t::Zero();
+    for (int i = 0 ; i < list.size(); ++i) {
+      const prob_t joint = pow(list[i].second, mbr_scale) / max_score;
+      joints[i] = joint;
+      // cerr << "list[" << i << "] joint=" << log(joint) << endl;
+      marginal += joint;
+    }
+    int mbr_idx = -1;
+    vector<double> mbr_scores(output_list ? list.size() : 0);
+    double mbr_loss = numeric_limits<double>::max();
+    for (int i = 0 ; i < list.size(); ++i) {
+      vector<vector<WordID> > refs(1, list[i].first);
+      //cerr << i << ": " << list[i].second <<"\t" << TD::GetString(list[i].first) << endl;
+      SentenceScorer* scorer = SentenceScorer::CreateSentenceScorer(type, refs);
+      double wl_acc = 0;
+      for (int j = 0; j < list.size(); ++j) {
+        if (i != j) {
+          Score* s = scorer->ScoreCandidate(list[j].first);
+          double loss = 1.0 - s->ComputeScore();
+          if (type == TER || type == AER) loss = 1.0 - loss;
+          delete s;
+          double weighted_loss = loss * (joints[j] / marginal);
+          wl_acc += weighted_loss;
+          if ((!output_list) && wl_acc > mbr_loss) break;
+        }
+      }
+      if (output_list) mbr_scores[i] = wl_acc;
+      if (wl_acc < mbr_loss) {
+        mbr_loss = wl_acc;
+        mbr_idx = i;
+      }
+      delete scorer;
+    }
+    // cerr << "ML translation: " << TD::GetString(list[0].first) << endl;
+    cerr << "MBR Best idx: " << mbr_idx << endl;
+    if (output_list) {
+      for (int i = 0; i < list.size(); ++i)
+        list[i].second.logeq(mbr_scores[i]);
+      sort(list.begin(), list.end(), LossComparer());
+      for (int i = 0; i < list.size(); ++i)
+        cout << sent_id << " ||| "
+             << TD::GetString(list[i].first) << " ||| "
+             << log(list[i].second) << endl;
+    } else {
+      cout << TD::GetString(list[mbr_idx].first) << endl;
+    }
+  }
+  return 0;
+}
+
diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc
new file mode 100644
index 00000000..c96a61e4
--- /dev/null
+++ b/vest/mr_vest_generate_mapper_input.cc
@@ -0,0 +1,72 @@
+#include <iostream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "filelib.h"
+#include "weights.h"
+#include "line_optimizer.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("dev_set_size,s",po::value<unsigned int>(),"[REQD] Development set size (# of parallel sentences)")
+        ("forest_repository,r",po::value<string>(),"[REQD] Path to forest repository")
+        ("weights,w",po::value<string>(),"[REQD] Current feature weights file")
+        ("optimize_feature,o",po::value<vector<string> >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)")
+        ("random_directions,d",po::value<unsigned int>()->default_value(20),"Number of random directions to run the line optimizer in")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = false;
+  if (conf->count("dev_set_size") == 0) {
+    cerr << "Please specify the size of the development set using -d N\n";
+    flag = true;
+  }
+  if (conf->count("weights") == 0) {
+    cerr << "Please specify the starting-point weights using -w <weightfile.txt>\n";
+    flag = true;
+  }
+  if (conf->count("forest_repository") == 0) {
+    cerr << "Please specify the forest repository location using -r <DIR>\n";
+    flag = true;
+  }
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+int main(int argc, char** argv) {
+  RandomNumberGenerator<boost::mt19937> rng;
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  Weights weights;
+  vector<string> features;
+  weights.InitFromFile(conf["weights"].as<string>(), &features);
+  const string forest_repository = conf["forest_repository"].as<string>();
+  assert(DirectoryExists(forest_repository));
+  SparseVector<double> origin;
+  weights.InitSparseVector(&origin);
+  if (conf.count("optimize_feature") > 0)
+    features=conf["optimize_feature"].as<vector<string> >();
+  vector<SparseVector<double> > axes;
+  vector<int> fids(features.size());
+  for (int i = 0; i < features.size(); ++i)
+    fids[i] = FD::Convert(features[i]);
+  LineOptimizer::CreateOptimizationDirections(
+     fids,
+     conf["random_directions"].as<unsigned int>(),
+     &rng,
+     &axes);
+  int dev_set_size = conf["dev_set_size"].as<unsigned int>();
+  for (int i = 0; i < dev_set_size; ++i)
+    for (int j = 0; j < axes.size(); ++j)
+      cout << forest_repository << '/' << i << ".json.gz " << i << ' ' << origin << ' ' << axes[j] << endl;
+  return 0;
+}
diff --git a/vest/mr_vest_map.cc b/vest/mr_vest_map.cc
new file mode 100644
index 00000000..d1ba159f
--- /dev/null
+++ b/vest/mr_vest_map.cc
@@ -0,0 +1,99 @@
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "filelib.h"
+#include "stringlib.h"
+#include "sparse_vector.h"
+#include "scorer.h"
+#include "viterbi_envelope.h"
+#include "inside_outside.h"
+#include "error_surface.h"
+#include "hg.h"
+#include "hg_io.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation (tokenized text)")
+        ("source,s",po::value<string>(), "Source file (ignored, except for AER)")
+        ("loss_function,l",po::value<string>()->default_value("ibm_bleu"), "Loss function being optimized")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = false;
+  if (!conf->count("reference")) {
+    cerr << "Please specify one or more references using -r <REF.TXT>\n";
+    flag = true;
+  }
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+bool ReadSparseVectorString(const string& s, SparseVector<double>* v) {
+  vector<string> fields;
+  Tokenize(s, ';', &fields);
+  if (fields.empty()) return false;
+  for (int i = 0; i < fields.size(); ++i) {
+    vector<string> pair(2);
+    Tokenize(fields[i], '=', &pair);
+    if (pair.size() != 2) {
+      cerr << "Error parsing vector string: " << fields[i] << endl;
+      return false;
+    }
+    v->set_value(FD::Convert(pair[0]), atof(pair[1].c_str()));
+  }
+  return true;
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  const string loss_function = conf["loss_function"].as<string>();
+  ScoreType type = ScoreTypeFromString(loss_function);
+  DocScorer ds(type, conf["reference"].as<vector<string> >(), conf["source"].as<string>());
+  cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl;
+  Hypergraph hg;
+  string last_file;
+  while(cin) {
+    string line;
+    getline(cin, line);
+    if (line.empty()) continue;
+    istringstream is(line);
+    int sent_id;
+    string file, s_origin, s_axis;
+    is >> file >> sent_id >> s_origin >> s_axis;
+    SparseVector<double> origin;
+    assert(ReadSparseVectorString(s_origin, &origin));
+    SparseVector<double> axis;
+    assert(ReadSparseVectorString(s_axis, &axis));
+    // cerr << "File: " << file << "\nAxis: " << axis << "\n   X: " << origin << endl;
+    if (last_file != file) {
+      last_file = file;
+      ReadFile rf(file);
+      HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+    }
+    ViterbiEnvelopeWeightFunction wf(origin, axis);
+    ViterbiEnvelope ve = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
+    ErrorSurface es;
+    ds[sent_id]->ComputeErrorSurface(ve, &es, type, hg);
+    //cerr << "Viterbi envelope has " << ve.size() << " segments\n";
+    // cerr << "Error surface has " << es.size() << " segments\n";
+    string val;
+    es.Serialize(&val);
+    cout << 'M' << ' ' << s_origin << ' ' << s_axis << '\t';
+    B64::b64encode(val.c_str(), val.size(), &cout);
+    cout << endl;
+  }
+  return 0;
+}
diff --git a/vest/mr_vest_reduce.cc b/vest/mr_vest_reduce.cc
new file mode 100644
index 00000000..5efcc19a
--- /dev/null
+++ b/vest/mr_vest_reduce.cc
@@ -0,0 +1,81 @@
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "sparse_vector.h"
+#include "error_surface.h"
+#include "line_optimizer.h"
+#include "hg_io.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("loss_function,l",po::value<string>(), "Loss function being optimized")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = conf->count("loss_function") == 0;
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  const string loss_function = conf["loss_function"].as<string>();
+  ScoreType type = ScoreTypeFromString(loss_function);
+  LineOptimizer::ScoreType opt_type = LineOptimizer::MAXIMIZE_SCORE;
+  if (type == TER || type == AER) {
+    opt_type = LineOptimizer::MINIMIZE_SCORE;
+  }
+  string last_key;
+  vector<ErrorSurface> esv;
+  while(cin) {
+    string line;
+    getline(cin, line);
+    if (line.empty()) continue;
+    size_t ks = line.find("\t");
+    assert(string::npos != ks);
+    assert(ks > 2);
+    string key = line.substr(2, ks - 2);
+    string val = line.substr(ks + 1);
+    if (key != last_key) {
+      if (!last_key.empty()) {
+	float score;
+        double x = LineOptimizer::LineOptimize(esv, opt_type, &score);
+	cout << last_key << "|" << x << "|" << score << endl;
+      }
+      last_key = key;
+      esv.clear();
+    }
+    if (val.size() % 4 != 0) {
+      cerr << "B64 encoding error 1! Skipping.\n";
+      continue;
+    }
+    string encoded(val.size() / 4 * 3, '\0');
+    if (!B64::b64decode(reinterpret_cast<const unsigned char*>(&val[0]), val.size(), &encoded[0], encoded.size())) {
+      cerr << "B64 encoding error 2! Skipping.\n";
+      continue;
+    }
+    esv.push_back(ErrorSurface());
+    esv.back().Deserialize(type, encoded);
+  }
+  if (!esv.empty()) {
+    // cerr << "ESV=" << esv.size() << endl;
+    // for (int i = 0; i < esv.size(); ++i) { cerr << esv[i].size() << endl; }
+    float score;
+    double x = LineOptimizer::LineOptimize(esv, opt_type, &score);
+    cout << last_key << "|" << x << "|" << score << endl;
+  }
+  return 0;
+}
diff --git a/vest/scorer.cc b/vest/scorer.cc
new file mode 100644
index 00000000..53a17917
--- /dev/null
+++ b/vest/scorer.cc
@@ -0,0 +1,534 @@
+#include "scorer.h"
+
+#include <map>
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <cstdio>
+#include <valarray>
+
+#include <boost/shared_ptr.hpp>
+
+#include "filelib.h"
+#include "aligner.h"
+#include "viterbi_envelope.h"
+#include "error_surface.h"
+#include "ter.h"
+#include "aer_scorer.h"
+#include "comb_scorer.h"
+#include "tdict.h"
+#include "stringlib.h"
+#include "lattice.h"
+
+using boost::shared_ptr;
+using namespace std;
+
+const bool minimize_segments = true;    // if adjacent segments have equal scores, merge them
+
+ScoreType ScoreTypeFromString(const string& st) {
+  const string sl = LowercaseString(st);
+  if (sl == "ser")
+    return SER;
+  if (sl == "ter")
+    return TER;
+  if (sl == "aer")
+    return AER;
+  if (sl == "bleu" || sl == "ibm_bleu")
+    return IBM_BLEU;
+  if (sl == "nist_bleu")
+    return NIST_BLEU;
+  if (sl == "koehn_bleu")
+    return Koehn_BLEU;
+  if (sl == "combi")
+    return BLEU_minus_TER_over_2;
+  cerr << "Don't understand score type '" << sl << "', defaulting to ibm_bleu.\n";
+  return IBM_BLEU;
+}
+
+Score::~Score() {}
+SentenceScorer::~SentenceScorer() {}
+const std::string* SentenceScorer::GetSource() const { return NULL; }
+
+class SERScore : public Score {
+  friend class SERScorer;
+ public:
+  SERScore() : correct(0), total(0) {}
+  float ComputeScore() const {
+    return static_cast<float>(correct) / static_cast<float>(total);
+  }
+  void ScoreDetails(string* details) const {
+    ostringstream os;
+    os << "SER= " << ComputeScore() << " (" << correct << '/' << total << ')';
+    *details = os.str();
+  }
+  void PlusEquals(const Score& delta) {
+    correct += static_cast<const SERScore&>(delta).correct;
+    total += static_cast<const SERScore&>(delta).total;
+  }
+  Score* GetZero() const { return new SERScore; }
+  void Subtract(const Score& rhs, Score* res) const {
+    SERScore* r = static_cast<SERScore*>(res);
+    r->correct = correct - static_cast<const SERScore&>(rhs).correct;
+    r->total = total - static_cast<const SERScore&>(rhs).total;
+  }
+  void Encode(string* out) const {
+    assert(!"not implemented");
+  }
+  bool IsAdditiveIdentity() const {
+    return (total == 0 && correct == 0);  // correct is always 0 <= n <= total
+  }
+ private:
+  int correct, total;
+};
+
+class SERScorer : public SentenceScorer {
+ public:
+  SERScorer(const vector<vector<WordID> >& references) : refs_(references) {}
+  Score* ScoreCandidate(const vector<WordID>& hyp) const {
+    SERScore* res = new SERScore;
+    res->total = 1;
+    for (int i = 0; i < refs_.size(); ++i)
+      if (refs_[i] == hyp) res->correct = 1;
+    return res;
+  }
+  static Score* ScoreFromString(const string& data) {
+    assert(!"Not implemented");
+  }
+ private:
+  vector<vector<WordID> > refs_;
+};
+
+class BLEUScore : public Score {
+  friend class BLEUScorerBase;
+ public:
+  BLEUScore(int n) : correct_ngram_hit_counts(0,n), hyp_ngram_counts(0,n) {
+    ref_len = 0;
+    hyp_len = 0; }
+  float ComputeScore() const;
+  void ScoreDetails(string* details) const;
+  void PlusEquals(const Score& delta);
+  Score* GetZero() const;
+  void Subtract(const Score& rhs, Score* res) const;
+  void Encode(string* out) const;
+  bool IsAdditiveIdentity() const {
+    if (fabs(ref_len) > 0.1f || hyp_len != 0) return false;
+    for (int i = 0; i < correct_ngram_hit_counts.size(); ++i)
+      if (hyp_ngram_counts[i] != 0 ||
+        correct_ngram_hit_counts[i] != 0) return false;
+    return true;
+  }
+ private:
+  float ComputeScore(vector<float>* precs, float* bp) const;
+  valarray<int> correct_ngram_hit_counts;
+  valarray<int> hyp_ngram_counts;
+  float ref_len;
+  int hyp_len;
+};
+
+class BLEUScorerBase : public SentenceScorer {
+ public:
+  BLEUScorerBase(const vector<vector<WordID> >& references,
+             int n
+             );
+  Score* ScoreCandidate(const vector<WordID>& hyp) const;
+  static Score* ScoreFromString(const string& in);
+
+ protected:
+  virtual float ComputeRefLength(const vector<WordID>& hyp) const = 0;
+ private:
+  struct NGramCompare {
+    int operator() (const vector<WordID>& a, const vector<WordID>& b) {
+      size_t as = a.size();
+      size_t bs = b.size();
+      const size_t s = (as < bs ? as : bs);
+      for (size_t i = 0; i < s; ++i) {
+         int d = a[i] - b[i];
+         if (d < 0) return true;
+	 if (d > 0) return false;
+      }
+      return as < bs;
+    }
+  };
+  typedef map<vector<WordID>, pair<int,int>, NGramCompare> NGramCountMap;
+  void CountRef(const vector<WordID>& ref) {
+    NGramCountMap tc;
+    vector<WordID> ngram(n_);
+    int s = ref.size();
+    for (int j=0; j<s; ++j) {
+      int remaining = s-j;
+      int k = (n_ < remaining ? n_ : remaining);
+      ngram.clear();
+      for (int i=1; i<=k; ++i) {
+	ngram.push_back(ref[j + i - 1]);
+        tc[ngram].first++;
+      }
+    }
+    for (NGramCountMap::iterator i = tc.begin(); i != tc.end(); ++i) {
+      pair<int,int>& p = ngrams_[i->first];
+      if (p.first < i->second.first)
+        p = i->second;
+    }
+  }
+
+  void ComputeNgramStats(const vector<WordID>& sent,
+       valarray<int>* correct,
+       valarray<int>* hyp) const {
+    assert(correct->size() == n_);
+    assert(hyp->size() == n_);
+    vector<WordID> ngram(n_);
+    (*correct) *= 0;
+    (*hyp) *= 0;
+    int s = sent.size();
+    for (int j=0; j<s; ++j) {
+      int remaining = s-j;
+      int k = (n_ < remaining ? n_ : remaining);
+      ngram.clear();
+      for (int i=1; i<=k; ++i) {
+	ngram.push_back(sent[j + i - 1]);
+        pair<int,int>& p = ngrams_[ngram];
+        if (p.second < p.first) {
+          ++p.second;
+          (*correct)[i-1]++;
+        }
+	// if the 1 gram isn't found, don't try to match don't need to match any 2- 3- .. grams:
+	if (!p.first) {
+	  for (; i<=k; ++i)
+	    (*hyp)[i-1]++;
+	} else {
+          (*hyp)[i-1]++;
+        }
+      }
+    }
+  }
+
+  mutable NGramCountMap ngrams_;
+  int n_;
+  vector<int> lengths_;
+};
+
+Score* BLEUScorerBase::ScoreFromString(const string& in) {
+  istringstream is(in);
+  int n;
+  is >> n;
+  BLEUScore* r = new BLEUScore(n);
+  is >> r->ref_len >> r->hyp_len;
+
+  for (int i = 0; i < n; ++i) {
+    is >> r->correct_ngram_hit_counts[i];
+    is >> r->hyp_ngram_counts[i];
+  }
+  return r;
+}
+
+class IBM_BLEUScorer : public BLEUScorerBase {
+ public:
+    IBM_BLEUScorer(const vector<vector<WordID> >& references,
+		   int n=4) : BLEUScorerBase(references, n), lengths_(references.size()) {
+   for (int i=0; i < references.size(); ++i)
+     lengths_[i] = references[i].size();
+ }   
+ protected:
+  float ComputeRefLength(const vector<WordID>& hyp) const {
+    if (lengths_.size() == 1) return lengths_[0];
+    int bestd = 2000000;
+    int hl = hyp.size();
+    int bl = -1;
+    for (vector<int>::const_iterator ci = lengths_.begin(); ci != lengths_.end(); ++ci) {
+      int cl = *ci;
+      if (abs(cl - hl) < bestd) {
+        bestd = abs(cl - hl);
+        bl = cl;
+      }
+    }
+    return bl;
+  }
+ private:
+  vector<int> lengths_;
+};
+
+class NIST_BLEUScorer : public BLEUScorerBase {
+ public:
+    NIST_BLEUScorer(const vector<vector<WordID> >& references,
+                    int n=4) : BLEUScorerBase(references, n),
+		    shortest_(references[0].size()) {
+   for (int i=1; i < references.size(); ++i)
+     if (references[i].size() < shortest_)
+       shortest_ = references[i].size();
+ }
+ protected:
+  float ComputeRefLength(const vector<WordID>& hyp) const {
+    return shortest_;
+  }
+ private:
+  float shortest_;
+};
+
+class Koehn_BLEUScorer : public BLEUScorerBase {
+ public:
+    Koehn_BLEUScorer(const vector<vector<WordID> >& references,
+                     int n=4) : BLEUScorerBase(references, n),
+                     avg_(0) {
+   for (int i=0; i < references.size(); ++i)
+     avg_ += references[i].size();
+   avg_ /= references.size();
+ }
+ protected:
+  float ComputeRefLength(const vector<WordID>& hyp) const {
+    return avg_;
+  }
+ private:
+  float avg_;
+};
+
+SentenceScorer* SentenceScorer::CreateSentenceScorer(const ScoreType type,
+      const vector<vector<WordID> >& refs,
+      const string& src) {
+  switch (type) {
+    case IBM_BLEU: return new IBM_BLEUScorer(refs, 4);
+    case NIST_BLEU: return new NIST_BLEUScorer(refs, 4);
+    case Koehn_BLEU: return new Koehn_BLEUScorer(refs, 4);
+    case AER: return new AERScorer(refs, src);
+    case TER: return new TERScorer(refs);
+    case SER: return new SERScorer(refs);
+    case BLEU_minus_TER_over_2: return new BLEUTERCombinationScorer(refs);
+    default:
+      assert(!"Not implemented!");
+  }
+}
+
+Score* SentenceScorer::CreateScoreFromString(const ScoreType type, const string& in) {
+  switch (type) {
+    case IBM_BLEU:
+    case NIST_BLEU:
+    case Koehn_BLEU:
+      return BLEUScorerBase::ScoreFromString(in);
+    case TER:
+      return TERScorer::ScoreFromString(in);
+    case AER:
+      return AERScorer::ScoreFromString(in);
+    case SER:
+      return SERScorer::ScoreFromString(in);
+    case BLEU_minus_TER_over_2:
+      return BLEUTERCombinationScorer::ScoreFromString(in);
+    default:
+      assert(!"Not implemented!");
+  }
+}
+
+void SentenceScorer::ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface* env, const ScoreType type, const Hypergraph& hg) const {
+  vector<WordID> prev_trans;
+  const vector<shared_ptr<Segment> >& ienv = ve.GetSortedSegs();
+  env->resize(ienv.size());
+  Score* prev_score = NULL;
+  int j = 0;
+  for (int i = 0; i < ienv.size(); ++i) {
+    const Segment& seg = *ienv[i];
+    vector<WordID> trans;
+    if (type == AER) {
+      vector<bool> edges(hg.edges_.size(), false);
+      seg.CollectEdgesUsed(&edges);  // get the set of edges in the viterbi
+                                     // alignment
+      ostringstream os;
+      const string* psrc = this->GetSource();
+      if (psrc == NULL) {
+        cerr << "AER scoring in VEST requires source, but it is missing!\n";
+        abort();
+      }
+      size_t pos = psrc->rfind(" ||| ");
+      if (pos == string::npos) {
+        cerr << "Malformed source for AER: expected |||\nINPUT: " << *psrc << endl;
+        abort();
+      }
+      Lattice src;
+      Lattice ref;
+      LatticeTools::ConvertTextOrPLF(psrc->substr(0, pos), &src);
+      LatticeTools::ConvertTextOrPLF(psrc->substr(pos + 5), &ref);
+      AlignerTools::WriteAlignment(src, ref, hg, &os, true, &edges);
+      string tstr = os.str();
+      TD::ConvertSentence(tstr.substr(tstr.rfind(" ||| ") + 5), &trans);
+    } else {
+      seg.ConstructTranslation(&trans);
+    }
+    // cerr << "Scoring: " << TD::GetString(trans) << endl;
+    if (trans == prev_trans) {
+      if (!minimize_segments) {
+        assert(prev_score); // if this fails, it means
+	                    // the decoder can generate null translations
+        ErrorSegment& out = (*env)[j];
+        out.delta = prev_score->GetZero();
+        out.x = seg.x;
+	++j;
+      }
+      // cerr << "Identical translation, skipping scoring\n";
+    } else {
+      Score* score = ScoreCandidate(trans);
+      // cerr << "score= " << score->ComputeScore() << "\n";
+      Score* cur_delta = score->GetZero();
+      // just record the score diffs
+      if (!prev_score)
+        prev_score = score->GetZero();
+
+      score->Subtract(*prev_score, cur_delta);
+      delete prev_score;
+      prev_trans.swap(trans);
+      prev_score = score;
+      if ((!minimize_segments) || (!cur_delta->IsAdditiveIdentity())) {
+        ErrorSegment& out = (*env)[j];
+	out.delta = cur_delta;
+        out.x = seg.x;
+	++j;
+      }
+    }
+  }
+  delete prev_score;
+  // cerr << " In segments: " << ienv.size() << endl;
+  // cerr << "Out segments: " << j << endl;
+  assert(j > 0);
+  env->resize(j);
+}
+
+void BLEUScore::ScoreDetails(string* details) const {
+  char buf[2000];
+  vector<float> precs(4);
+  float bp;
+  float bleu = ComputeScore(&precs, &bp);
+  sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)",
+       bleu*100.0,
+       precs[0]*100.0,
+       precs[1]*100.0,
+       precs[2]*100.0,
+       precs[3]*100.0,
+       bp);
+  *details = buf;
+}
+
+float BLEUScore::ComputeScore(vector<float>* precs, float* bp) const {
+  float log_bleu = 0;
+  if (precs) precs->clear();
+  int count = 0;
+  for (int i = 0; i < hyp_ngram_counts.size(); ++i) {
+    if (hyp_ngram_counts[i] > 0) {
+      float lprec = log(correct_ngram_hit_counts[i]) - log(hyp_ngram_counts[i]);
+      if (precs) precs->push_back(exp(lprec));
+      log_bleu += lprec;
+      ++count;
+    }
+  }
+  log_bleu /= static_cast<float>(count);
+  float lbp = 0.0;
+  if (hyp_len < ref_len)
+    lbp = (hyp_len - ref_len) / hyp_len;
+  log_bleu += lbp;
+  if (bp) *bp = exp(lbp);
+  return exp(log_bleu);
+}
+
+float BLEUScore::ComputeScore() const {
+  return ComputeScore(NULL, NULL);
+}
+
+void BLEUScore::Subtract(const Score& rhs, Score* res) const {
+  const BLEUScore& d = static_cast<const BLEUScore&>(rhs);
+  BLEUScore* o = static_cast<BLEUScore*>(res);
+  o->ref_len = ref_len - d.ref_len;
+  o->hyp_len = hyp_len - d.hyp_len;
+  o->correct_ngram_hit_counts = correct_ngram_hit_counts - d.correct_ngram_hit_counts;
+  o->hyp_ngram_counts = hyp_ngram_counts - d.hyp_ngram_counts;
+}
+
+void BLEUScore::PlusEquals(const Score& delta) {
+  const BLEUScore& d = static_cast<const BLEUScore&>(delta);
+  correct_ngram_hit_counts += d.correct_ngram_hit_counts;
+  hyp_ngram_counts += d.hyp_ngram_counts;
+  ref_len += d.ref_len;
+  hyp_len += d.hyp_len;
+}
+
+Score* BLEUScore::GetZero() const {
+  return new BLEUScore(hyp_ngram_counts.size());
+}
+
+void BLEUScore::Encode(string* out) const {
+  ostringstream os;
+  const int n = correct_ngram_hit_counts.size();
+  os << n << ' ' << ref_len << ' ' << hyp_len;
+  for (int i = 0; i < n; ++i)
+    os << ' ' << correct_ngram_hit_counts[i] << ' ' << hyp_ngram_counts[i];
+  *out = os.str();
+}
+
+BLEUScorerBase::BLEUScorerBase(const vector<vector<WordID> >& references,
+                       int n) : n_(n) {
+  for (vector<vector<WordID> >::const_iterator ci = references.begin();
+       ci != references.end(); ++ci) {
+    lengths_.push_back(ci->size());
+    CountRef(*ci);
+  }
+}
+ 
+Score* BLEUScorerBase::ScoreCandidate(const vector<WordID>& hyp) const {
+  BLEUScore* bs = new BLEUScore(n_);
+  for (NGramCountMap::iterator i=ngrams_.begin(); i != ngrams_.end(); ++i)
+    i->second.second = 0;
+  ComputeNgramStats(hyp, &bs->correct_ngram_hit_counts, &bs->hyp_ngram_counts);
+  bs->ref_len = ComputeRefLength(hyp);
+  bs->hyp_len = hyp.size();
+  return bs;
+}
+
+DocScorer::~DocScorer() {
+  for (int i=0; i < scorers_.size(); ++i)
+    delete scorers_[i];
+}
+
+DocScorer::DocScorer(
+      const ScoreType type,
+      const vector<string>& ref_files,
+      const string& src_file) {
+  // TODO stop using valarray, start using ReadFile
+  cerr << "Loading references (" << ref_files.size() << " files)\n";
+  shared_ptr<ReadFile> srcrf;
+  if (type == AER && src_file.size() > 0) {
+    cerr << "  (source=" << src_file << ")\n";
+    srcrf.reset(new ReadFile(src_file));
+  }
+  valarray<ifstream> ifs(ref_files.size());
+  for (int i=0; i < ref_files.size(); ++i) {
+     ifs[i].open(ref_files[i].c_str());
+     assert(ifs[i].good());
+  }
+  char buf[64000];
+  bool expect_eof = false;
+  while (!ifs[0].eof()) {
+    vector<vector<WordID> > refs(ref_files.size());
+    for (int i=0; i < ref_files.size(); ++i) {
+      if (ifs[i].eof()) break;
+      ifs[i].getline(buf, 64000);
+      refs[i].clear();
+      if (strlen(buf) == 0) {
+        if (ifs[i].eof()) {
+	  if (!expect_eof) {
+	    assert(i == 0);
+	    expect_eof = true;
+	  } 
+          break;
+	}
+      } else {
+        TD::ConvertSentence(buf, &refs[i]);
+        assert(!refs[i].empty());
+      }
+      assert(!expect_eof);
+    }
+    if (!expect_eof) {
+      string src_line;
+      if (srcrf) {
+        getline(*srcrf->stream(), src_line);
+        map<string,string> dummy;
+        ProcessAndStripSGML(&src_line, &dummy);
+      }
+      scorers_.push_back(SentenceScorer::CreateSentenceScorer(type, refs, src_line));
+    }
+  }
+  cerr << "Loaded reference translations for " << scorers_.size() << " sentences.\n";
+}
+
diff --git a/vest/scorer.h b/vest/scorer.h
new file mode 100644
index 00000000..83d4db4c
--- /dev/null
+++ b/vest/scorer.h
@@ -0,0 +1,55 @@
+#ifndef SCORER_H_
+#define SCORER_H_
+
+#include <vector>
+#include <string>
+
+#include "wordid.h"
+
+class ViterbiEnvelope;
+class ErrorSurface;
+class Hypergraph;  // needed for alignment
+
+enum ScoreType { IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, BLEU_minus_TER_over_2, SER, AER };
+ScoreType ScoreTypeFromString(const std::string& st);
+
+class Score {
+ public:
+  virtual ~Score();
+  virtual float ComputeScore() const = 0;
+  virtual void ScoreDetails(std::string* details) const = 0;
+  virtual void PlusEquals(const Score& rhs) = 0;
+  virtual void Subtract(const Score& rhs, Score* res) const = 0;
+  virtual Score* GetZero() const = 0;
+  virtual bool IsAdditiveIdentity() const = 0; // returns true if adding this delta
+                                      // to another score results in no score change
+				      // under any circumstances
+  virtual void Encode(std::string* out) const = 0;
+};
+
+class SentenceScorer {
+ public:
+  virtual ~SentenceScorer();
+  void ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface* es, const ScoreType type, const Hypergraph& hg) const;
+  virtual Score* ScoreCandidate(const std::vector<WordID>& hyp) const = 0;
+  virtual const std::string* GetSource() const;
+  static Score* CreateScoreFromString(const ScoreType type, const std::string& in);
+  static SentenceScorer* CreateSentenceScorer(const ScoreType type,
+    const std::vector<std::vector<WordID> >& refs,
+    const std::string& src = "");
+};
+
+class DocScorer {
+ public:
+  ~DocScorer();
+  DocScorer(
+    const ScoreType type,
+    const std::vector<std::string>& ref_files,
+    const std::string& src_file = "");
+  int size() const { return scorers_.size(); }
+  const SentenceScorer* operator[](size_t i) const { return scorers_[i]; }
+ private:
+  std::vector<SentenceScorer*> scorers_;
+};
+
+#endif
diff --git a/vest/scorer_test.cc b/vest/scorer_test.cc
new file mode 100644
index 00000000..74875ab8
--- /dev/null
+++ b/vest/scorer_test.cc
@@ -0,0 +1,203 @@
+#include <iostream>
+#include <fstream>
+#include <valarray>
+#include <gtest/gtest.h>
+
+#include "tdict.h"
+#include "scorer.h"
+#include "aer_scorer.h"
+
+using namespace std;
+
+class ScorerTest : public testing::Test {
+ protected:
+   virtual void SetUp() {
+     refs0.resize(4);
+     refs1.resize(4);
+     TD::ConvertSentence("export of high-tech products in guangdong in first two months this year reached 3.76 billion us dollars", &refs0[0]);
+     TD::ConvertSentence("guangdong's export of new high technology products amounts to us $ 3.76 billion in first two months of this year", &refs0[1]);
+     TD::ConvertSentence("guangdong exports us $ 3.76 billion worth of high technology products in the first two months of this year", &refs0[2]);
+     TD::ConvertSentence("in the first 2 months this year , the export volume of new hi-tech products in guangdong province reached 3.76 billion us dollars .", &refs0[3]);
+     TD::ConvertSentence("xinhua news agency , guangzhou , march 16 ( reporter chen ji ) the latest statistics show that from january through february this year , the export of high-tech products in guangdong province reached 3.76 billion us dollars , up 34.8 \% over the same period last year and accounted for 25.5 \% of the total export in the province .", &refs1[0]);
+     TD::ConvertSentence("xinhua news agency , guangzhou , march 16 ( reporter : chen ji ) -- latest statistic indicates that guangdong's export of new high technology products amounts to us $ 3.76 billion , up 34.8 \% over corresponding period and accounts for 25.5 \% of the total exports of the province .", &refs1[1]);
+     TD::ConvertSentence("xinhua news agency report of march 16 from guangzhou ( by staff reporter chen ji ) - latest statistics indicate guangdong province exported us $ 3.76 billion worth of high technology products , up 34.8 percent from the same period last year , which account for 25.5 percent of the total exports of the province .", &refs1[2]);
+     TD::ConvertSentence("guangdong , march 16 , ( xinhua ) -- ( chen ji reports ) as the newest statistics shows , in january and feberuary this year , the export volume of new hi-tech products in guangdong province reached 3.76 billion us dollars , up 34.8 \% than last year , making up 25.5 \% of the province's total .", &refs1[3]);
+     TD::ConvertSentence("one guangdong province will next export us $ 3.76 high-tech product two months first this year 3.76 billion us dollars", &hyp1);
+     TD::ConvertSentence("xinhua news agency , guangzhou , 16th of march ( reporter chen ) -- latest statistics suggest that guangdong exports new advanced technology product totals $ 3.76 million , 34.8 percent last corresponding period and accounts for 25.5 percent of the total export province .", &hyp2);
+   }
+
+   virtual void TearDown() { }
+
+   vector<vector<WordID> > refs0;
+   vector<vector<WordID> > refs1;
+   vector<WordID> hyp1;
+   vector<WordID> hyp2;
+};
+
+TEST_F(ScorerTest, TestCreateFromFiles) {
+  vector<string> files;
+  files.push_back("test_data/re.txt.0");
+  files.push_back("test_data/re.txt.1");
+  files.push_back("test_data/re.txt.2");
+  files.push_back("test_data/re.txt.3");
+  DocScorer ds(IBM_BLEU, files);
+}
+
+TEST_F(ScorerTest, TestBLEUScorer) {
+  SentenceScorer* s1 = SentenceScorer::CreateSentenceScorer(IBM_BLEU, refs0);
+  SentenceScorer* s2 = SentenceScorer::CreateSentenceScorer(IBM_BLEU, refs1);
+  Score* b1 = s1->ScoreCandidate(hyp1);
+  EXPECT_FLOAT_EQ(0.23185077, b1->ComputeScore());
+  Score* b2 = s2->ScoreCandidate(hyp2);
+  EXPECT_FLOAT_EQ(0.38101241, b2->ComputeScore());
+  b1->PlusEquals(*b2);
+  EXPECT_FLOAT_EQ(0.348854, b1->ComputeScore());
+  EXPECT_FALSE(b1->IsAdditiveIdentity());
+  string details;
+  b1->ScoreDetails(&details);
+  EXPECT_EQ("BLEU = 34.89, 81.5|50.8|29.5|18.6 (brev=0.898)", details);
+  cerr << details << endl;
+  string enc;
+  b1->Encode(&enc);
+  Score* b3 = SentenceScorer::CreateScoreFromString(IBM_BLEU, enc);
+  details.clear();
+  cerr << "Encoded BLEU score size: " << enc.size() << endl;
+  b3->ScoreDetails(&details);
+  cerr << details << endl;
+  EXPECT_FALSE(b3->IsAdditiveIdentity());
+  EXPECT_EQ("BLEU = 34.89, 81.5|50.8|29.5|18.6 (brev=0.898)", details);
+  Score* bz = b3->GetZero();
+  EXPECT_TRUE(bz->IsAdditiveIdentity());
+  delete bz;
+  delete b1;
+  delete s1;
+  delete b2;
+  delete s2;
+}
+
+TEST_F(ScorerTest, TestTERScorer) {
+  SentenceScorer* s1 = SentenceScorer::CreateSentenceScorer(TER, refs0);
+  SentenceScorer* s2 = SentenceScorer::CreateSentenceScorer(TER, refs1);
+  string details;
+  Score* t1 = s1->ScoreCandidate(hyp1);
+  t1->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  cerr << t1->ComputeScore() << endl;
+  Score* t2 = s2->ScoreCandidate(hyp2);
+  t2->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  cerr << t2->ComputeScore() << endl;
+  t1->PlusEquals(*t2);
+  cerr << t1->ComputeScore() << endl;
+  t1->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  EXPECT_EQ("TER = 44.16,   4|  8| 16|  6 (len=77)", details);
+  string enc;
+  t1->Encode(&enc);
+  Score* t3 = SentenceScorer::CreateScoreFromString(TER, enc);
+  details.clear();
+  t3->ScoreDetails(&details);
+  EXPECT_EQ("TER = 44.16,   4|  8| 16|  6 (len=77)", details);
+  EXPECT_FALSE(t3->IsAdditiveIdentity());
+  Score* tz = t3->GetZero();
+  EXPECT_TRUE(tz->IsAdditiveIdentity());
+  delete tz;
+  delete t3;
+  delete t1;
+  delete s1;
+  delete t2;
+  delete s2;
+}
+
+TEST_F(ScorerTest, TestTERScorerSimple) {
+  vector<vector<WordID> > ref(1);
+  TD::ConvertSentence("1 2 3 A B", &ref[0]);
+  vector<WordID> hyp;
+  TD::ConvertSentence("A B 1 2 3", &hyp);
+  SentenceScorer* s1 = SentenceScorer::CreateSentenceScorer(TER, ref);
+  string details;
+  Score* t1 = s1->ScoreCandidate(hyp);
+  t1->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  delete t1;
+  delete s1;
+}
+
+TEST_F(ScorerTest, TestSERScorerSimple) {
+  vector<vector<WordID> > ref(1);
+  TD::ConvertSentence("A B C D", &ref[0]);
+  vector<WordID> hyp1;
+  TD::ConvertSentence("A B C", &hyp1);
+  vector<WordID> hyp2;
+  TD::ConvertSentence("A B C D", &hyp2);
+  SentenceScorer* s1 = SentenceScorer::CreateSentenceScorer(SER, ref);
+  string details;
+  Score* t1 = s1->ScoreCandidate(hyp1);
+  t1->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  Score* t2 = s1->ScoreCandidate(hyp2);
+  t2->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  t2->PlusEquals(*t1);
+  t2->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  delete t1;
+  delete t2;
+  delete s1;
+}
+
+TEST_F(ScorerTest, TestCombiScorer) {
+  SentenceScorer* s1 = SentenceScorer::CreateSentenceScorer(BLEU_minus_TER_over_2, refs0);
+  string details;
+  Score* t1 = s1->ScoreCandidate(hyp1);
+  t1->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  cerr << t1->ComputeScore() << endl;
+  string enc;
+  t1->Encode(&enc);
+  Score* t2 = SentenceScorer::CreateScoreFromString(BLEU_minus_TER_over_2, enc);
+  details.clear();
+  t2->ScoreDetails(&details);
+  cerr << "DETAILS: " << details << endl;
+  Score* cz = t2->GetZero();
+  EXPECT_FALSE(t2->IsAdditiveIdentity());
+  EXPECT_TRUE(cz->IsAdditiveIdentity());
+  cz->PlusEquals(*t2);
+  EXPECT_FALSE(cz->IsAdditiveIdentity());
+  string d2;
+  cz->ScoreDetails(&d2);
+  EXPECT_EQ(d2, details); 
+  delete cz;
+  delete t2;
+  delete t1;
+}
+
+TEST_F(ScorerTest, AERTest) {
+  vector<vector<WordID> > refs0(1);
+  TD::ConvertSentence("0-0 2-1 1-2 3-3", &refs0[0]);
+
+  vector<WordID> hyp;
+  TD::ConvertSentence("0-0 1-1", &hyp);
+  AERScorer* as = new AERScorer(refs0);
+  Score* x = as->ScoreCandidate(hyp);
+  string details;
+  x->ScoreDetails(&details);
+  cerr << details << endl;
+  string enc;
+  x->Encode(&enc);
+  delete x;
+  delete as;
+  cerr << "ENC size: " << enc.size() << endl;
+  Score* y = SentenceScorer::CreateScoreFromString(AER, enc);
+  string d2;
+  y->ScoreDetails(&d2);
+  cerr << d2 << endl;
+  delete y;
+  EXPECT_EQ(d2, details);
+}
+
+int main(int argc, char **argv) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
+
diff --git a/vest/ter.cc b/vest/ter.cc
new file mode 100644
index 00000000..ef66f3b7
--- /dev/null
+++ b/vest/ter.cc
@@ -0,0 +1,518 @@
+#include "ter.h"
+
+#include <cstdio>
+#include <cassert>
+#include <iostream>
+#include <limits>
+#include <sstream>
+#include <tr1/unordered_map>
+#include <set>
+#include <valarray>
+#include <boost/functional/hash.hpp>
+
+#include "tdict.h"
+
+const bool ter_use_average_ref_len = true;
+const int ter_short_circuit_long_sentences = -1;
+
+using namespace std;
+using namespace std::tr1;
+
+struct COSTS {
+  static const float substitution;
+  static const float deletion;
+  static const float insertion;
+  static const float shift;
+};
+const float COSTS::substitution = 1.0f;
+const float COSTS::deletion = 1.0f;
+const float COSTS::insertion = 1.0f;
+const float COSTS::shift = 1.0f;
+
+static const int MAX_SHIFT_SIZE = 10;
+static const int MAX_SHIFT_DIST = 50;
+
+struct Shift {
+  unsigned int d_;
+  Shift() : d_() {}
+  Shift(int b, int e, int m) : d_() {
+    begin(b);
+    end(e);
+    moveto(m);
+  }
+  inline int begin() const {
+    return d_ & 0x3ff;
+  }
+  inline int end() const {
+    return (d_ >> 10) & 0x3ff;
+  }
+  inline int moveto() const {
+    int m = (d_ >> 20) & 0x7ff;
+    if (m > 1024) { m -= 1024; m *= -1; }
+    return m;
+  }
+  inline void begin(int b) {
+    d_ &= 0xfffffc00u;
+    d_ |= (b & 0x3ff);
+  }
+  inline void end(int e) {
+    d_ &= 0xfff003ffu;
+    d_ |= (e & 0x3ff) << 10;
+  }
+  inline void moveto(int m) {
+    bool neg = (m < 0);
+    if (neg) { m *= -1; m += 1024; }
+    d_ &= 0xfffff;
+    d_ |= (m & 0x7ff) << 20;
+  }
+};
+
+class TERScorerImpl {
+
+ public:
+  enum TransType { MATCH, SUBSTITUTION, INSERTION, DELETION };
+
+  explicit TERScorerImpl(const vector<WordID>& ref) : ref_(ref) {
+    for (int i = 0; i < ref.size(); ++i)
+      rwexists_.insert(ref[i]);
+  }
+
+  float Calculate(const vector<WordID>& hyp, int* subs, int* ins, int* dels, int* shifts) const {
+    return CalculateAllShifts(hyp, subs, ins, dels, shifts);
+  }
+
+  inline int GetRefLength() const {
+    return ref_.size();
+  }
+
+ private:
+  vector<WordID> ref_;
+  set<WordID> rwexists_;
+
+  typedef unordered_map<vector<WordID>, set<int>, boost::hash<vector<WordID> > > NgramToIntsMap;
+  mutable NgramToIntsMap nmap_;
+ 
+  static float MinimumEditDistance(
+      const vector<WordID>& hyp,
+      const vector<WordID>& ref,
+      vector<TransType>* path) {
+    vector<vector<TransType> > bmat(hyp.size() + 1, vector<TransType>(ref.size() + 1, MATCH));
+    vector<vector<float> > cmat(hyp.size() + 1, vector<float>(ref.size() + 1, 0));
+    for (int i = 0; i <= hyp.size(); ++i)
+      cmat[i][0] = i;
+    for (int j = 0; j <= ref.size(); ++j)
+      cmat[0][j] = j;
+    for (int i = 1; i <= hyp.size(); ++i) {
+      const WordID& hw = hyp[i-1];
+      for (int j = 1; j <= ref.size(); ++j) {
+        const WordID& rw = ref[j-1];
+	float& cur_c = cmat[i][j];
+	TransType& cur_b = bmat[i][j];
+
+        if (rw == hw) {
+          cur_c = cmat[i-1][j-1];
+          cur_b = MATCH;
+        } else {
+          cur_c = cmat[i-1][j-1] + COSTS::substitution;
+          cur_b = SUBSTITUTION;
+        }
+	float cwoi = cmat[i-1][j];
+        if (cur_c > cwoi + COSTS::insertion) {
+          cur_c = cwoi + COSTS::insertion;
+          cur_b = INSERTION;
+        }
+        float cwod = cmat[i][j-1];
+        if (cur_c > cwod + COSTS::deletion) {
+          cur_c = cwod + COSTS::deletion;
+          cur_b = DELETION;
+        }
+      }
+    }
+    
+    // trace back along the best path and record the transition types
+    path->clear();
+    int i = hyp.size();
+    int j = ref.size();
+    while (i > 0 || j > 0) {
+      if (j == 0) {
+        --i;
+        path->push_back(INSERTION);
+      } else if (i == 0) {
+        --j;
+        path->push_back(DELETION);
+      } else {
+        TransType t = bmat[i][j];
+        path->push_back(t);
+        switch (t) {
+          case SUBSTITUTION:
+          case MATCH:
+            --i; --j; break;
+          case INSERTION:
+            --i; break;
+          case DELETION:
+            --j; break;
+        }
+      }
+    }
+    reverse(path->begin(), path->end());
+    return cmat[hyp.size()][ref.size()];
+  }
+
+  void BuildWordMatches(const vector<WordID>& hyp, NgramToIntsMap* nmap) const {
+    nmap->clear();
+    set<WordID> exists_both;
+    for (int i = 0; i < hyp.size(); ++i)
+      if (rwexists_.find(hyp[i]) != rwexists_.end())
+        exists_both.insert(hyp[i]);
+    for (int start=0; start<ref_.size(); ++start) {
+      if (exists_both.find(ref_[start]) == exists_both.end()) continue;
+      vector<WordID> cp;
+      int mlen = min(MAX_SHIFT_SIZE, static_cast<int>(ref_.size() - start));
+      for (int len=0; len<mlen; ++len) {
+        if (len && exists_both.find(ref_[start + len]) == exists_both.end()) break;
+        cp.push_back(ref_[start + len]);
+	(*nmap)[cp].insert(start);
+      }
+    }
+  }
+
+  static void PerformShift(const vector<WordID>& in,
+    int start, int end, int moveto, vector<WordID>* out) {
+    // cerr << "ps: " << start << " " << end << " " << moveto << endl;
+    out->clear();
+    if (moveto == -1) {
+      for (int i = start; i <= end; ++i)
+       out->push_back(in[i]);
+      for (int i = 0; i < start; ++i)
+       out->push_back(in[i]);
+      for (int i = end+1; i < in.size(); ++i)
+       out->push_back(in[i]);
+    } else if (moveto < start) {
+      for (int i = 0; i <= moveto; ++i)
+       out->push_back(in[i]);
+      for (int i = start; i <= end; ++i)
+       out->push_back(in[i]);
+      for (int i = moveto+1; i < start; ++i)
+       out->push_back(in[i]);
+      for (int i = end+1; i < in.size(); ++i)
+       out->push_back(in[i]);
+    } else if (moveto > end) {
+      for (int i = 0; i < start; ++i)
+       out->push_back(in[i]);
+      for (int i = end+1; i <= moveto; ++i)
+       out->push_back(in[i]);
+      for (int i = start; i <= end; ++i)
+       out->push_back(in[i]);
+      for (int i = moveto+1; i < in.size(); ++i)
+       out->push_back(in[i]);
+    } else {
+      for (int i = 0; i < start; ++i)
+       out->push_back(in[i]);
+      for (int i = end+1; (i < in.size()) && (i <= end + (moveto - start)); ++i)
+       out->push_back(in[i]);
+      for (int i = start; i <= end; ++i)
+       out->push_back(in[i]);
+      for (int i = (end + (moveto - start))+1; i < in.size(); ++i)
+       out->push_back(in[i]);
+    }
+    if (out->size() != in.size()) {
+      cerr << "ps: " << start << " " << end << " " << moveto << endl;
+      cerr << "in=" << TD::GetString(in) << endl;
+      cerr << "out=" << TD::GetString(*out) << endl;
+    }
+    assert(out->size() == in.size()); 
+    // cerr << "ps: " << TD::GetString(*out) << endl;
+  }
+
+  void GetAllPossibleShifts(const vector<WordID>& hyp,
+      const vector<int>& ralign,
+      const vector<bool>& herr,
+      const vector<bool>& rerr,
+      const int min_size,
+      vector<vector<Shift> >* shifts) const {
+    for (int start = 0; start < hyp.size(); ++start) {
+      vector<WordID> cp(1, hyp[start]);
+      NgramToIntsMap::iterator niter = nmap_.find(cp);
+      if (niter == nmap_.end()) continue;
+      bool ok = false;
+      int moveto;
+      for (set<int>::iterator i = niter->second.begin(); i != niter->second.end(); ++i) {
+        moveto = *i;
+        int rm = ralign[moveto];
+        ok = (start != rm &&
+              (rm - start) < MAX_SHIFT_DIST &&
+              (start - rm - 1) < MAX_SHIFT_DIST);
+        if (ok) break;
+      }
+      if (!ok) continue;
+      cp.clear();
+      for (int end = start + min_size - 1;
+           ok && end < hyp.size() && end < (start + MAX_SHIFT_SIZE); ++end) {
+        cp.push_back(hyp[end]);
+	vector<Shift>& sshifts = (*shifts)[end - start];
+        ok = false;
+        NgramToIntsMap::iterator niter = nmap_.find(cp);
+        if (niter == nmap_.end()) break;
+        bool any_herr = false;
+        for (int i = start; i <= end && !any_herr; ++i)
+          any_herr = herr[i];
+        if (!any_herr) {
+          ok = true;
+          continue;
+        }
+        for (set<int>::iterator mi = niter->second.begin();
+             mi != niter->second.end(); ++mi) {
+          int moveto = *mi;
+	  int rm = ralign[moveto];
+	  if (! ((rm != start) &&
+	        ((rm < start) || (rm > end)) &&
+		(rm - start <= MAX_SHIFT_DIST) &&
+		((start - rm - 1) <= MAX_SHIFT_DIST))) continue;
+          ok = true;
+	  bool any_rerr = false;
+	  for (int i = 0; (i <= end - start) && (!any_rerr); ++i)
+            any_rerr = rerr[moveto+i];
+	  if (!any_rerr) continue;
+	  for (int roff = 0; roff <= (end - start); ++roff) {
+	    int rmr = ralign[moveto+roff];
+	    if ((start != rmr) && ((roff == 0) || (rmr != ralign[moveto])))
+	      sshifts.push_back(Shift(start, end, moveto + roff));
+	  }
+        }
+      }
+    }
+  }
+
+  bool CalculateBestShift(const vector<WordID>& cur,
+                          const vector<WordID>& hyp,
+                          float curerr,
+                          const vector<TransType>& path,
+                          vector<WordID>* new_hyp,
+                          float* newerr,
+                          vector<TransType>* new_path) const {
+    vector<bool> herr, rerr;
+    vector<int> ralign;
+    int hpos = -1;
+    for (int i = 0; i < path.size(); ++i) {
+      switch (path[i]) {
+        case MATCH:
+	  ++hpos;
+	  herr.push_back(false);
+	  rerr.push_back(false);
+	  ralign.push_back(hpos);
+          break;
+        case SUBSTITUTION:
+	  ++hpos;
+	  herr.push_back(true);
+	  rerr.push_back(true);
+	  ralign.push_back(hpos);
+          break;
+        case INSERTION:
+	  ++hpos;
+	  herr.push_back(true);
+          break;
+	case DELETION:
+	  rerr.push_back(true);
+	  ralign.push_back(hpos);
+          break;
+      }
+    }
+#if 0
+    cerr << "RALIGN: ";
+    for (int i = 0; i < rerr.size(); ++i)
+      cerr << ralign[i] << " ";
+    cerr << endl;
+    cerr << "RERR: ";
+    for (int i = 0; i < rerr.size(); ++i)
+      cerr << (bool)rerr[i] << " ";
+    cerr << endl;
+    cerr << "HERR: ";
+    for (int i = 0; i < herr.size(); ++i)
+      cerr << (bool)herr[i] << " ";
+    cerr << endl;
+#endif
+
+    vector<vector<Shift> > shifts(MAX_SHIFT_SIZE + 1);
+    GetAllPossibleShifts(cur, ralign, herr, rerr, 1, &shifts);
+    float cur_best_shift_cost = 0;
+    *newerr = curerr;
+    vector<TransType> cur_best_path;
+    vector<WordID> cur_best_hyp;
+    
+    bool res = false;
+    for (int i = shifts.size() - 1; i >=0; --i) {
+      float curfix = curerr - (cur_best_shift_cost + *newerr);
+      float maxfix = 2.0f * (1 + i) - COSTS::shift;
+      if ((curfix > maxfix) || ((cur_best_shift_cost == 0) && (curfix == maxfix))) break;
+      for (int j = 0; j < shifts[i].size(); ++j) {
+        const Shift& s = shifts[i][j];
+	curfix = curerr - (cur_best_shift_cost + *newerr);
+	maxfix = 2.0f * (1 + i) - COSTS::shift;  // TODO remove?
+        if ((curfix > maxfix) || ((cur_best_shift_cost == 0) && (curfix == maxfix))) continue;
+	vector<WordID> shifted(cur.size());
+	PerformShift(cur, s.begin(), s.end(), ralign[s.moveto()], &shifted);
+	vector<TransType> try_path;
+	float try_cost = MinimumEditDistance(shifted, ref_, &try_path);
+	float gain = (*newerr + cur_best_shift_cost) - (try_cost + COSTS::shift);
+	if (gain > 0.0f || ((cur_best_shift_cost == 0.0f) && (gain == 0.0f))) {
+	  *newerr = try_cost;
+	  cur_best_shift_cost = COSTS::shift;
+	  new_path->swap(try_path);
+	  new_hyp->swap(shifted);
+	  res = true;
+	  // cerr << "Found better shift " << s.begin() << "..." << s.end() << " moveto " << s.moveto() << endl;
+	}
+      }
+    }
+
+    return res;
+  }
+
+  static void GetPathStats(const vector<TransType>& path, int* subs, int* ins, int* dels) {
+    *subs = *ins = *dels = 0;
+    for (int i = 0; i < path.size(); ++i) {
+      switch (path[i]) {
+        case SUBSTITUTION:
+	  ++(*subs);
+        case MATCH:
+          break;
+        case INSERTION:
+          ++(*ins); break;
+	case DELETION:
+          ++(*dels); break;
+      }
+    }
+  }
+
+  float CalculateAllShifts(const vector<WordID>& hyp,
+      int* subs, int* ins, int* dels, int* shifts) const {
+    BuildWordMatches(hyp, &nmap_);
+    vector<TransType> path;
+    float med_cost = MinimumEditDistance(hyp, ref_, &path);
+    float edits = 0;
+    vector<WordID> cur = hyp;
+    *shifts = 0;
+    if (ter_short_circuit_long_sentences < 0 ||
+        ref_.size() < ter_short_circuit_long_sentences) {
+      while (true) {
+        vector<WordID> new_hyp;
+        vector<TransType> new_path;
+        float new_med_cost;
+        if (!CalculateBestShift(cur, hyp, med_cost, path, &new_hyp, &new_med_cost, &new_path))
+          break;
+        edits += COSTS::shift;
+        ++(*shifts);
+        med_cost = new_med_cost;
+        path.swap(new_path);
+        cur.swap(new_hyp);
+      }
+    }
+    GetPathStats(path, subs, ins, dels);
+    return med_cost + edits;
+  }
+};
+
+class TERScore : public Score {
+  friend class TERScorer;
+
+ public:
+  static const unsigned kINSERTIONS = 0;
+  static const unsigned kDELETIONS = 1;
+  static const unsigned kSUBSTITUTIONS = 2;
+  static const unsigned kSHIFTS = 3;
+  static const unsigned kREF_WORDCOUNT = 4;
+  static const unsigned kDUMMY_LAST_ENTRY = 5;
+
+ TERScore() : stats(0,kDUMMY_LAST_ENTRY) {}
+  float ComputeScore() const {
+    float edits = static_cast<float>(stats[kINSERTIONS] + stats[kDELETIONS] + stats[kSUBSTITUTIONS] + stats[kSHIFTS]);
+    return edits / static_cast<float>(stats[kREF_WORDCOUNT]);
+  }
+  void ScoreDetails(string* details) const;
+  void PlusEquals(const Score& delta) {
+    stats += static_cast<const TERScore&>(delta).stats;
+  }
+  Score* GetZero() const {
+    return new TERScore;
+  }
+  void Subtract(const Score& rhs, Score* res) const {
+    static_cast<TERScore*>(res)->stats = stats - static_cast<const TERScore&>(rhs).stats;
+  }
+  void Encode(std::string* out) const {
+    ostringstream os;
+    os << stats[kINSERTIONS] << ' '
+       << stats[kDELETIONS] << ' '
+       << stats[kSUBSTITUTIONS] << ' '
+       << stats[kSHIFTS] << ' '
+       << stats[kREF_WORDCOUNT];
+    *out = os.str();
+  }
+  bool IsAdditiveIdentity() const {
+    for (int i = 0; i < kDUMMY_LAST_ENTRY; ++i)
+      if (stats[i] != 0) return false;
+    return true;
+  }
+ private:
+  valarray<int> stats;
+};
+
+Score* TERScorer::ScoreFromString(const std::string& data) {
+  istringstream is(data);
+  TERScore* r = new TERScore;
+  is >> r->stats[TERScore::kINSERTIONS]
+     >> r->stats[TERScore::kDELETIONS]
+     >> r->stats[TERScore::kSUBSTITUTIONS]
+     >> r->stats[TERScore::kSHIFTS]
+     >> r->stats[TERScore::kREF_WORDCOUNT];
+  return r;
+}
+
+void TERScore::ScoreDetails(std::string* details) const {
+  char buf[200];
+  sprintf(buf, "TER = %.2f, %3d|%3d|%3d|%3d (len=%d)",
+     ComputeScore() * 100.0f,  
+     stats[kINSERTIONS],
+     stats[kDELETIONS],
+     stats[kSUBSTITUTIONS],
+     stats[kSHIFTS],
+     stats[kREF_WORDCOUNT]);
+  *details = buf;
+}
+
+TERScorer::~TERScorer() {
+  for (vector<TERScorerImpl*>::iterator i = impl_.begin(); i != impl_.end(); ++i)
+    delete *i;
+}
+
+TERScorer::TERScorer(const vector<vector<WordID> >& refs) : impl_(refs.size()) {
+  for (int i = 0; i < refs.size(); ++i)
+    impl_[i] = new TERScorerImpl(refs[i]);
+}
+
+Score* TERScorer::ScoreCandidate(const std::vector<WordID>& hyp) const {
+  float best_score = numeric_limits<float>::max();
+  TERScore* res = new TERScore;
+  int avg_len = 0;
+  for (int i = 0; i < impl_.size(); ++i)
+    avg_len += impl_[i]->GetRefLength();
+  avg_len /= impl_.size();
+  for (int i = 0; i < impl_.size(); ++i) {
+    int subs, ins, dels, shifts;
+    float score = impl_[i]->Calculate(hyp, &subs, &ins, &dels, &shifts);
+    // cerr << "Component TER cost: " << score << endl;
+    if (score < best_score) {
+      res->stats[TERScore::kINSERTIONS] = ins;
+      res->stats[TERScore::kDELETIONS] = dels;
+      res->stats[TERScore::kSUBSTITUTIONS] = subs;
+      res->stats[TERScore::kSHIFTS] = shifts;
+      if (ter_use_average_ref_len) {
+        res->stats[TERScore::kREF_WORDCOUNT] = avg_len;
+      } else {
+        res->stats[TERScore::kREF_WORDCOUNT] = impl_[i]->GetRefLength();
+      }
+
+      best_score = score;
+    }
+  }
+  return res;
+}
diff --git a/vest/ter.h b/vest/ter.h
new file mode 100644
index 00000000..fe4ba36c
--- /dev/null
+++ b/vest/ter.h
@@ -0,0 +1,18 @@
+#ifndef _TER_H_
+#define _TER_H_
+
+#include "scorer.h"
+
+class TERScorerImpl;
+
+class TERScorer : public SentenceScorer {
+ public:
+  TERScorer(const std::vector<std::vector<WordID> >& references);
+  ~TERScorer();
+  Score* ScoreCandidate(const std::vector<WordID>& hyp) const;
+  static Score* ScoreFromString(const std::string& data);
+ private:
+  std::vector<TERScorerImpl*> impl_;
+};
+
+#endif
diff --git a/vest/test_aer/README b/vest/test_aer/README
new file mode 100644
index 00000000..819b2e32
--- /dev/null
+++ b/vest/test_aer/README
@@ -0,0 +1,8 @@
+To run the test:
+
+../dist-vest.pl --local --metric aer cdec.ini --source-file corpus.src --ref-files=ref.0 --weights weights
+
+This will optimize the parameters of the tiny lexical translation model
+so as to minimize the AER of the Viterbi alignment on the development
+set in corpus.src according to the reference alignments in ref.0.
+
diff --git a/vest/test_aer/cdec.ini b/vest/test_aer/cdec.ini
new file mode 100644
index 00000000..08187848
--- /dev/null
+++ b/vest/test_aer/cdec.ini
@@ -0,0 +1,3 @@
+formalism=lextrans
+grammar=grammar
+aligner=true
diff --git a/vest/test_aer/corpus.src b/vest/test_aer/corpus.src
new file mode 100644
index 00000000..31b23971
--- /dev/null
+++ b/vest/test_aer/corpus.src
@@ -0,0 +1,3 @@
+el gato negro ||| the black cat
+el gato ||| the cat
+el libro ||| the book
diff --git a/vest/test_aer/grammar b/vest/test_aer/grammar
new file mode 100644
index 00000000..9d857824
--- /dev/null
+++ b/vest/test_aer/grammar
@@ -0,0 +1,12 @@
+el ||| cat ||| F1=1
+el ||| the ||| F2=1
+el ||| black ||| F3=1
+el ||| book ||| F11=1
+gato ||| cat ||| F4=1 NN=1
+gato ||| black ||| F5=1
+gato ||| the ||| F6=1
+negro ||| the ||| F7=1
+negro ||| cat ||| F8=1
+negro ||| black ||| F9=1
+libro ||| the ||| F10=1
+libro ||| book ||| F12=1 NN=1
diff --git a/vest/test_aer/ref.0 b/vest/test_aer/ref.0
new file mode 100644
index 00000000..734a9c5b
--- /dev/null
+++ b/vest/test_aer/ref.0
@@ -0,0 +1,3 @@
+0-0 1-2 2-1
+0-0 1-1
+0-0 1-1
diff --git a/vest/test_aer/weights b/vest/test_aer/weights
new file mode 100644
index 00000000..afc9282e
--- /dev/null
+++ b/vest/test_aer/weights
@@ -0,0 +1,13 @@
+F1 0.1
+F2 -.5980815
+F3 0.24235
+F4 0.625
+F5 0.4514
+F6 0.112316
+F7 -0.123415
+F8 -0.25390285
+F9 -0.23852
+F10 0.646
+F11 0.413141
+F12 0.343216
+NN -0.1215
diff --git a/vest/test_data/0.json.gz b/vest/test_data/0.json.gz
new file mode 100644
index 00000000..30f8dd77
--- /dev/null
+++ b/vest/test_data/0.json.gz
diff --git a/vest/test_data/1.json.gz b/vest/test_data/1.json.gz
new file mode 100644
index 00000000..c82cc179
--- /dev/null
+++ b/vest/test_data/1.json.gz
diff --git a/vest/test_data/c2e.txt.0 b/vest/test_data/c2e.txt.0
new file mode 100644
index 00000000..12c4abe9
--- /dev/null
+++ b/vest/test_data/c2e.txt.0
@@ -0,0 +1,2 @@
+australia reopens embassy in manila
+( afp , manila , january 2 ) australia reopened its embassy in the philippines today , which was shut down about seven weeks ago due to what was described as a specific threat of a terrorist attack .
diff --git a/vest/test_data/c2e.txt.1 b/vest/test_data/c2e.txt.1
new file mode 100644
index 00000000..4ac12df1
--- /dev/null
+++ b/vest/test_data/c2e.txt.1
@@ -0,0 +1,2 @@
+australia reopened manila embassy
+( agence france-presse , manila , 2nd ) - australia reopened its embassy in the philippines today . the embassy was closed seven weeks ago after what was described as a specific threat of a terrorist attack .
diff --git a/vest/test_data/c2e.txt.2 b/vest/test_data/c2e.txt.2
new file mode 100644
index 00000000..2f67b72f
--- /dev/null
+++ b/vest/test_data/c2e.txt.2
@@ -0,0 +1,2 @@
+australia to reopen embassy in manila
+( afp report from manila , january 2 ) australia reopened its embassy in the philippines today . seven weeks ago , the embassy was shut down due to so-called confirmed terrorist attack threats .
diff --git a/vest/test_data/c2e.txt.3 b/vest/test_data/c2e.txt.3
new file mode 100644
index 00000000..5483cef6
--- /dev/null
+++ b/vest/test_data/c2e.txt.3
@@ -0,0 +1,2 @@
+australia to re - open its embassy to manila
+( afp , manila , thursday ) australia reopens its embassy to manila , which was closed for the so-called " clear " threat of terrorist attack 7 weeks ago .
diff --git a/vest/test_data/re.txt.0 b/vest/test_data/re.txt.0
new file mode 100644
index 00000000..86eff087
--- /dev/null
+++ b/vest/test_data/re.txt.0
@@ -0,0 +1,5 @@
+erdogan states turkey to reject any pressures to urge it to recognize cyprus
+ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara will reject any pressure by the european union to urge it to recognize cyprus . this comes two weeks before the summit of european union state and government heads who will decide whether or nor membership negotiations with ankara should be opened .
+erdogan told " ntv " television station that " the european union cannot address us by imposing new conditions on us with regard to cyprus .
+we will discuss this dossier in the course of membership negotiations . "
+he added " let me be clear , i cannot sidestep turkey , this is something we cannot accept . "
diff --git a/vest/test_data/re.txt.1 b/vest/test_data/re.txt.1
new file mode 100644
index 00000000..2140f198
--- /dev/null
+++ b/vest/test_data/re.txt.1
@@ -0,0 +1,5 @@
+erdogan confirms turkey will resist any pressure to recognize cyprus
+ankara 12 - 1 ( afp ) - the turkish head of government , recep tayyip erdogan , announced today ( wednesday ) that ankara would resist any pressure the european union might exercise in order to force it into recognizing cyprus . this comes two weeks before a summit of european union heads of state and government , who will decide whether or not to open membership negotiations with ankara .
+erdogan said to the ntv television channel : " the european union cannot engage with us through imposing new conditions on us with regard to cyprus .
+we shall discuss this issue in the course of the membership negotiations . "
+he added : " let me be clear - i cannot confine turkey . this is something we do not accept . "
diff --git a/vest/test_data/re.txt.2 b/vest/test_data/re.txt.2
new file mode 100644
index 00000000..94e46286
--- /dev/null
+++ b/vest/test_data/re.txt.2
@@ -0,0 +1,5 @@
+erdogan confirms that turkey will reject any pressures to encourage it to recognize cyprus
+ankara , 12 / 1 ( afp ) - the turkish prime minister recep tayyip erdogan declared today , wednesday , that ankara will reject any pressures that the european union may apply on it to encourage to recognize cyprus . this comes two weeks before a summit of the heads of countries and governments of the european union , who will decide on whether or not to start negotiations on joining with ankara .
+erdogan told the ntv television station that " it is not possible for the european union to talk to us by imposing new conditions on us regarding cyprus .
+we shall discuss this dossier during the negotiations on joining . "
+and he added , " let me be clear . turkey's arm should not be twisted ; this is something we cannot accept . "
diff --git a/vest/test_data/re.txt.3 b/vest/test_data/re.txt.3
new file mode 100644
index 00000000..f87c3308
--- /dev/null
+++ b/vest/test_data/re.txt.3
@@ -0,0 +1,5 @@
+erdogan stresses that turkey will reject all pressures to force it to recognize cyprus
+ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara would refuse all pressures applied on it by the european union to force it to recognize cyprus . that came two weeks before the summit of the presidents and prime ministers of the european union , who would decide on whether to open negotiations on joining with ankara or not .
+erdogan said to " ntv " tv station that the " european union can not communicate with us by imposing on us new conditions related to cyprus .
+we will discuss this file during the negotiations on joining . "
+he added , " let me be clear . turkey's arm should not be twisted . this is unacceptable to us . "
diff --git a/vest/viterbi_envelope.cc b/vest/viterbi_envelope.cc
new file mode 100644
index 00000000..5c24c018
--- /dev/null
+++ b/vest/viterbi_envelope.cc
@@ -0,0 +1,176 @@
+#include "viterbi_envelope.h"
+
+#include <cassert>
+#include <limits>
+
+using namespace std;
+using boost::shared_ptr;
+
+ostream& operator<<(ostream& os, const ViterbiEnvelope& env) {
+  os << '<';
+  const vector<shared_ptr<Segment> >& segs = env.GetSortedSegs();
+  for (int i = 0; i < segs.size(); ++i)
+    os << (i==0 ? "" : "|") << "x=" << segs[i]->x << ",b=" << segs[i]->b << ",m=" << segs[i]->m << ",p1=" << segs[i]->p1 << ",p2=" << segs[i]->p2;
+  return os << '>';
+}
+
+ViterbiEnvelope::ViterbiEnvelope(int i) {
+  if (i == 0) {
+    // do nothing - <>
+  } else if (i == 1) {
+    segs.push_back(shared_ptr<Segment>(new Segment(0, 0, 0, shared_ptr<Segment>(), shared_ptr<Segment>())));
+    assert(this->IsMultiplicativeIdentity());
+  } else {
+    cerr << "Only can create ViterbiEnvelope semiring 0 and 1 with this constructor!\n";
+    abort();
+  }
+}
+
+struct SlopeCompare {
+  bool operator() (const shared_ptr<Segment>& a, const shared_ptr<Segment>& b) const {
+    return a->m < b->m;
+  }
+};
+
+const ViterbiEnvelope& ViterbiEnvelope::operator+=(const ViterbiEnvelope& other) {
+  if (!other.is_sorted) other.Sort();
+  if (segs.empty()) {
+    segs = other.segs;
+    return *this;
+  }
+  is_sorted = false;
+  int j = segs.size();
+  segs.resize(segs.size() + other.segs.size());
+  for (int i = 0; i < other.segs.size(); ++i)
+    segs[j++] = other.segs[i];
+  assert(j == segs.size());
+  return *this;
+}
+
+void ViterbiEnvelope::Sort() const {
+  sort(segs.begin(), segs.end(), SlopeCompare());
+  const int k = segs.size();
+  int j = 0;
+  for (int i = 0; i < k; ++i) {
+    Segment l = *segs[i];
+    l.x = kMinusInfinity;
+    // cerr << "m=" << l.m << endl;
+    if (0 < j) {
+      if (segs[j-1]->m == l.m) {   // lines are parallel
+        if (l.b <= segs[j-1]->b) continue;
+        --j;
+      }
+      while(0 < j) {
+        l.x = (l.b - segs[j-1]->b) / (segs[j-1]->m - l.m);
+        if (segs[j-1]->x < l.x) break;
+        --j;
+      }
+      if (0 == j) l.x = kMinusInfinity;
+    }
+    *segs[j++] = l;
+  }
+  segs.resize(j);
+  is_sorted = true;
+}
+
+const ViterbiEnvelope& ViterbiEnvelope::operator*=(const ViterbiEnvelope& other) {
+  if (other.IsMultiplicativeIdentity()) { return *this; }
+  if (this->IsMultiplicativeIdentity()) { (*this) = other; return *this; }
+
+  if (!is_sorted) Sort();
+  if (!other.is_sorted) other.Sort();
+
+  if (this->IsEdgeEnvelope()) {
+//    if (other.size() > 1)
+//      cerr << *this << " (TIMES) " << other << endl;
+    shared_ptr<Segment> edge_parent = segs[0];
+    const double& edge_b = edge_parent->b;
+    const double& edge_m = edge_parent->m;
+    segs.clear();
+    for (int i = 0; i < other.segs.size(); ++i) {
+      const Segment& seg = *other.segs[i];
+      const double m = seg.m + edge_m;
+      const double b = seg.b + edge_b;
+      const double& x = seg.x;       // x's don't change with *
+      segs.push_back(shared_ptr<Segment>(new Segment(x, m, b, edge_parent, other.segs[i])));
+      assert(segs.back()->p1->edge);
+    }
+//    if (other.size() > 1)
+//      cerr << " = " << *this << endl;
+  } else {
+    vector<shared_ptr<Segment> > new_segs;
+    int this_i = 0;
+    int other_i = 0;
+    const int this_size  = segs.size();
+    const int other_size = other.segs.size();
+    double cur_x = kMinusInfinity;   // moves from left to right across the
+                                     // real numbers, stopping for all inter-
+                                     // sections
+    double this_next_val  = (1 < this_size  ? segs[1]->x       : kPlusInfinity);
+    double other_next_val = (1 < other_size ? other.segs[1]->x : kPlusInfinity);
+    while (this_i < this_size && other_i < other_size) {
+      const Segment& this_seg = *segs[this_i];
+      const Segment& other_seg= *other.segs[other_i];
+      const double m = this_seg.m + other_seg.m;
+      const double b = this_seg.b + other_seg.b;
+ 
+      new_segs.push_back(shared_ptr<Segment>(new Segment(cur_x, m, b, segs[this_i], other.segs[other_i])));
+      int comp = 0;
+      if (this_next_val < other_next_val) comp = -1; else
+        if (this_next_val > other_next_val) comp = 1;
+      if (0 == comp) {  // the next values are equal, advance both indices
+        ++this_i;
+	++other_i;
+        cur_x = this_next_val;  // could be other_next_val (they're equal!)
+        this_next_val  = (this_i+1  < this_size  ? segs[this_i+1]->x        : kPlusInfinity);
+        other_next_val = (other_i+1 < other_size ? other.segs[other_i+1]->x : kPlusInfinity);
+      } else {  // advance the i with the lower x, update cur_x
+        if (-1 == comp) {
+          ++this_i;
+          cur_x = this_next_val;
+          this_next_val =  (this_i+1  < this_size  ? segs[this_i+1]->x        : kPlusInfinity);
+        } else {
+          ++other_i;
+          cur_x = other_next_val;
+          other_next_val = (other_i+1 < other_size ? other.segs[other_i+1]->x : kPlusInfinity);
+        }
+      }
+    }
+    segs.swap(new_segs);
+  }
+  //cerr << "Multiply: result=" << (*this) << endl;
+  return *this;
+}
+
+// recursively construct translation
+void Segment::ConstructTranslation(vector<WordID>* trans) const {
+  const Segment* cur = this;
+  vector<vector<WordID> > ant_trans;
+  while(!cur->edge) {
+    ant_trans.resize(ant_trans.size() + 1);
+    cur->p2->ConstructTranslation(&ant_trans.back());
+    cur = cur->p1.get();
+  }
+  size_t ant_size = ant_trans.size();
+  vector<const vector<WordID>*> pants(ant_size);
+  --ant_size;
+  for (int i = 0; i < pants.size(); ++i) pants[ant_size - i] = &ant_trans[i];
+  cur->edge->rule_->ESubstitute(pants, trans);
+}
+
+void Segment::CollectEdgesUsed(std::vector<bool>* edges_used) const {
+  if (edge) {
+    assert(edge->id_ < edges_used->size());
+    (*edges_used)[edge->id_] = true;
+  }
+  if (p1) p1->CollectEdgesUsed(edges_used);
+  if (p2) p2->CollectEdgesUsed(edges_used);
+}
+
+ViterbiEnvelope ViterbiEnvelopeWeightFunction::operator()(const Hypergraph::Edge& e) const {
+  const double m = direction.dot(e.feature_values_);
+  const double b = origin.dot(e.feature_values_);
+  Segment* seg = new Segment(m, b, e);
+  return ViterbiEnvelope(1, seg);
+}
+
diff --git a/vest/viterbi_envelope.h b/vest/viterbi_envelope.h
new file mode 100644
index 00000000..1689a00e
--- /dev/null
+++ b/vest/viterbi_envelope.h
@@ -0,0 +1,81 @@
+#ifndef _VITERBI_ENVELOPE_H_
+#define _VITERBI_ENVELOPE_H_
+
+#include <vector>
+#include <iostream>
+#include <boost/shared_ptr.hpp>
+
+#include "hg.h"
+#include "sparse_vector.h"
+
+static const double kMinusInfinity = -std::numeric_limits<double>::infinity();
+static const double kPlusInfinity = std::numeric_limits<double>::infinity();
+
+struct Segment {
+  Segment() : x(), m(), b(), edge() {}
+  Segment(double _m, double _b) :
+    x(kMinusInfinity), m(_m), b(_b), edge() {}
+  Segment(double _x, double _m, double _b, const boost::shared_ptr<Segment>& p1_, const boost::shared_ptr<Segment>& p2_) :
+    x(_x), m(_m), b(_b), p1(p1_), p2(p2_), edge() {}
+  Segment(double _m, double _b, const Hypergraph::Edge& edge) :
+    x(kMinusInfinity), m(_m), b(_b), edge(&edge) {}
+
+  double x;                   // x intersection with previous segment in env, or -inf if none
+  double m;                   // this line's slope
+  double b;                   // intercept with y-axis
+
+  // we keep a pointer to the "parents" of this segment so we can reconstruct
+  // the Viterbi translation corresponding to this segment
+  boost::shared_ptr<Segment> p1;
+  boost::shared_ptr<Segment> p2;
+
+  // only Segments created from an edge using the ViterbiEnvelopeWeightFunction
+  // have rules
+  // TRulePtr rule;
+  const Hypergraph::Edge* edge;
+
+  // recursively recover the Viterbi translation that will result from setting
+  // the weights to origin + axis * x, where x is any value from this->x up
+  // until the next largest x in the containing ViterbiEnvelope
+  void ConstructTranslation(std::vector<WordID>* trans) const;
+  void CollectEdgesUsed(std::vector<bool>* edges_used) const;
+};
+
+// this is the semiring value type,
+// it defines constructors for 0, 1, and the operations + and *
+struct ViterbiEnvelope {
+  // create semiring zero
+  ViterbiEnvelope() : is_sorted(true) {}  // zero
+  // for debugging:
+  ViterbiEnvelope(const std::vector<boost::shared_ptr<Segment> >& s) : segs(s) { Sort(); }
+  // create semiring 1 or 0
+  explicit ViterbiEnvelope(int i);
+  ViterbiEnvelope(int n, Segment* seg) : is_sorted(true), segs(n, boost::shared_ptr<Segment>(seg)) {}
+  const ViterbiEnvelope& operator+=(const ViterbiEnvelope& other);
+  const ViterbiEnvelope& operator*=(const ViterbiEnvelope& other);
+  bool IsMultiplicativeIdentity() const {
+    return size() == 1 && (segs[0]->b == 0.0 && segs[0]->m == 0.0) && (!segs[0]->edge); }
+  const std::vector<boost::shared_ptr<Segment> >& GetSortedSegs() const {
+    if (!is_sorted) Sort();
+    return segs;
+  }
+  size_t size() const { return segs.size(); }
+
+ private:
+  bool IsEdgeEnvelope() const {
+    return segs.size() == 1 && segs[0]->edge; }
+  void Sort() const;
+  mutable bool is_sorted;
+  mutable std::vector<boost::shared_ptr<Segment> > segs;
+};
+std::ostream& operator<<(std::ostream& os, const ViterbiEnvelope& env);
+
+struct ViterbiEnvelopeWeightFunction {
+  ViterbiEnvelopeWeightFunction(const SparseVector<double>& ori,
+                                const SparseVector<double>& dir) : origin(ori), direction(dir) {}
+  ViterbiEnvelope operator()(const Hypergraph::Edge& e) const;
+  const SparseVector<double> origin;
+  const SparseVector<double> direction;
+};
+
+#endif