major restructure of the training code

author: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> 2012-11-18 13:35:42 -0500
committer: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> 2012-11-18 13:35:42 -0500
commit: 1b8181bf0d6e9137e6b9ccdbe414aec37377a1a9 (patch)
tree: 33e5f3aa5abff1f41314cf8f6afbd2c2c40e4bfd
parent: 7c4665949fb93fb3de402e4ce1d19bef67850d05 (diff)
133 files changed, 149 insertions, 2271 deletions
diff --git a/.gitignore b/.gitignore
index aa2e64eb..c6023822 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+example_extff/ff_example.lo
+example_extff/libff_example.la
+mteval/meteor_jar.cc
 *.a
 *.aux
 *.bbl
@@ -176,4 +179,27 @@ utils/reconstruct_weights
 utils/small_vector_test
 utils/ts
 utils/weights_test
-utils/unigram_pyp_lm
+training/crf/mpi_batch_optimize
+training/crf/mpi_compute_cllh
+training/crf/mpi_extract_features
+training/crf/mpi_extract_reachable
+training/crf/mpi_flex_optimize
+training/crf/mpi_online_optimize
+training/dpmert/lo_test
+training/dpmert/mr_dpmert_generate_mapper_input
+training/dpmert/mr_dpmert_map
+training/dpmert/mr_dpmert_reduce
+training/dpmert/sentclient
+training/dpmert/sentserver
+training/dtrain/dtrain
+training/minrisk/minrisk_optimize
+training/mira/kbest_mira
+training/pro/mr_pro_map
+training/pro/mr_pro_reduce
+training/rampion/rampion_cccp
+training/utils/Makefile.am
+training/utils/lbfgs_test
+training/utils/optimize_test
+training/utils/sentclient
+training/utils/sentserver
+word-aligner/fast_align
diff --git a/Makefile.am b/Makefile.am
index 7ca7268a..dbf604a1 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -10,12 +10,7 @@ SUBDIRS = \
   decoder \
   training \
   training/liblbfgs \
-  mira \
-  dtrain \
-  dpmert \
-  pro \
-  rampion \
-  minrisk \
+  word-aligner \
   example_extff
 
 #gi/pyp-topics/src gi/clda/src gi/posterior-regularisation/prjava
diff --git a/configure.ac b/configure.ac
index 09fc5c5b..366112a3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -82,26 +82,34 @@ AC_PROG_INSTALL
 
 CPPFLAGS="-DPIC -fPIC $CPPFLAGS -DHAVE_CONFIG_H"
 
+# core cdec stuff
 AC_CONFIG_FILES([Makefile])
 AC_CONFIG_FILES([utils/Makefile])
 AC_CONFIG_FILES([mteval/Makefile])
+AC_CONFIG_FILES([mteval/meteor_jar.cc])
 AC_CONFIG_FILES([decoder/Makefile])
-AC_CONFIG_FILES([training/Makefile])
-AC_CONFIG_FILES([training/liblbfgs/Makefile])
-AC_CONFIG_FILES([dpmert/Makefile])
-AC_CONFIG_FILES([pro/Makefile])
-AC_CONFIG_FILES([rampion/Makefile])
-AC_CONFIG_FILES([minrisk/Makefile])
+AC_CONFIG_FILES([python/setup.py])
+AC_CONFIG_FILES([word-aligner/Makefile])
+
+# KenLM stuff
 AC_CONFIG_FILES([klm/util/Makefile])
 AC_CONFIG_FILES([klm/lm/Makefile])
 AC_CONFIG_FILES([klm/search/Makefile])
-AC_CONFIG_FILES([mira/Makefile])
-AC_CONFIG_FILES([dtrain/Makefile])
-AC_CONFIG_FILES([example_extff/Makefile])
 
-AC_CONFIG_FILES([mteval/meteor_jar.cc])
-
-AC_CONFIG_FILES([python/setup.py])
+# training stuff
+AC_CONFIG_FILES([training/Makefile])
+AC_CONFIG_FILES([training/utils/Makefile])
+AC_CONFIG_FILES([training/liblbfgs/Makefile])
+AC_CONFIG_FILES([training/crf/Makefile])
+AC_CONFIG_FILES([training/dpmert/Makefile])
+AC_CONFIG_FILES([training/pro/Makefile])
+AC_CONFIG_FILES([training/rampion/Makefile])
+AC_CONFIG_FILES([training/minrisk/Makefile])
+AC_CONFIG_FILES([training/mira/Makefile])
+AC_CONFIG_FILES([training/dtrain/Makefile])
+
+# external feature function example code
+AC_CONFIG_FILES([example_extff/Makefile])
 
 AC_OUTPUT
 
diff --git a/dpmert/README.shared-mem b/dpmert/README.shared-mem
deleted file mode 100644
index 7728efc0..00000000
--- a/dpmert/README.shared-mem
+++ /dev/null
@@ -1,9 +0,0 @@
-If you want to run dist-vest.pl on a very large shared memory machine, do the
-following:
-
-  ./dist-vest.pl --use-make I --decode-nodes J --weights weights.init --source-file=dev.src --ref-files=dev.ref.* cdec.ini
-
-This will use I jobs for doing the line search and J jobs to run the decoder. Typically, since the
-decoder must load grammars, language models, etc., J should be smaller than I, but this will depend
-on the system you are running on and the complexity of the models used for decoding.
-
diff --git a/minrisk/Makefile.am b/minrisk/Makefile.am
deleted file mode 100644
index a24f047c..00000000
--- a/minrisk/Makefile.am
+++ /dev/null
@@ -1,6 +0,0 @@
-bin_PROGRAMS = minrisk_optimize
-
-minrisk_optimize_SOURCES = minrisk_optimize.cc
-minrisk_optimize_LDADD = $(top_srcdir)/training/libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/training/liblbfgs/liblbfgs.a -lz
-
-AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training
diff --git a/pro/README.shared-mem b/pro/README.shared-mem
deleted file mode 100644
index 7728efc0..00000000
--- a/pro/README.shared-mem
+++ /dev/null
@@ -1,9 +0,0 @@
-If you want to run dist-vest.pl on a very large shared memory machine, do the
-following:
-
-  ./dist-vest.pl --use-make I --decode-nodes J --weights weights.init --source-file=dev.src --ref-files=dev.ref.* cdec.ini
-
-This will use I jobs for doing the line search and J jobs to run the decoder. Typically, since the
-decoder must load grammars, language models, etc., J should be smaller than I, but this will depend
-on the system you are running on and the complexity of the models used for decoding.
-
diff --git a/training/Makefile.am b/training/Makefile.am
index f9c25391..e95e045f 100644
--- a/training/Makefile.am
+++ b/training/Makefile.am
@@ -1,91 +1,11 @@
-bin_PROGRAMS = \
-  fast_align \
-  lbl_model \
-  test_ngram \
-  mr_em_map_adapter \
-  mr_em_adapted_reduce \
-  mr_reduce_to_weights \
-  mr_optimize_reduce \
-  grammar_convert \
-  plftools \
-  collapse_weights \
-  mpi_extract_reachable \
-  mpi_extract_features \
-  mpi_online_optimize \
-  mpi_flex_optimize \
-  mpi_batch_optimize \
-  mpi_compute_cllh \
-  augment_grammar
+SUBDIRS = \
+  liblbfgs \
+  utils \
+  crf \
+  minrisk \
+  dpmert \
+  pro \
+  dtrain \
+  mira \
+  rampion
 
-noinst_PROGRAMS = \
-  lbfgs_test \
-  optimize_test
-
-TESTS = lbfgs_test optimize_test
-
-noinst_LIBRARIES = libtraining.a
-libtraining_a_SOURCES = \
-  candidate_set.cc \
-  entropy.cc \
-  optimize.cc \
-  online_optimizer.cc \
-  risk.cc
-
-mpi_online_optimize_SOURCES = mpi_online_optimize.cc
-mpi_online_optimize_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-mpi_flex_optimize_SOURCES = mpi_flex_optimize.cc
-mpi_flex_optimize_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-mpi_extract_reachable_SOURCES = mpi_extract_reachable.cc
-mpi_extract_reachable_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-mpi_extract_features_SOURCES = mpi_extract_features.cc
-mpi_extract_features_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc cllh_observer.cc
-mpi_batch_optimize_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-mpi_compute_cllh_SOURCES = mpi_compute_cllh.cc cllh_observer.cc
-mpi_compute_cllh_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-augment_grammar_SOURCES = augment_grammar.cc
-augment_grammar_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-test_ngram_SOURCES = test_ngram.cc
-test_ngram_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-fast_align_SOURCES = fast_align.cc ttables.cc
-fast_align_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz
-
-lbl_model_SOURCES = lbl_model.cc
-lbl_model_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz
-
-grammar_convert_SOURCES = grammar_convert.cc
-grammar_convert_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz
-
-optimize_test_SOURCES = optimize_test.cc
-optimize_test_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz
-
-collapse_weights_SOURCES = collapse_weights.cc
-collapse_weights_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz
-
-lbfgs_test_SOURCES = lbfgs_test.cc
-lbfgs_test_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz
-
-mr_optimize_reduce_SOURCES = mr_optimize_reduce.cc
-mr_optimize_reduce_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz
-
-mr_em_map_adapter_SOURCES = mr_em_map_adapter.cc
-mr_em_map_adapter_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz
-
-mr_reduce_to_weights_SOURCES = mr_reduce_to_weights.cc
-mr_reduce_to_weights_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz
-
-mr_em_adapted_reduce_SOURCES = mr_em_adapted_reduce.cc
-mr_em_adapted_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz
-
-plftools_SOURCES = plftools.cc
-plftools_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz
-
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder -I$(top_srcdir)/utils -I$(top_srcdir)/mteval -I../klm
diff --git a/training/add-model1-features-to-scfg.pl b/training/add-model1-features-to-scfg.pl
deleted file mode 100755
index a0074317..00000000
--- a/training/add-model1-features-to-scfg.pl
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/perl -w
-
-# [X] ||| so [X,1] die [X,2] der ||| as [X,1] existing [X,2] the ||| 2.47712135315 2.53182387352 5.07100057602 ||| 0-0 2-2 4-4
-# [X] ||| so [X,1] die [X,2] der ||| this [X,1] the [X,2] of ||| 2.47712135315 3.19828724861 2.38270020485 ||| 0-0 2-2 4-4
-# [X] ||| so [X,1] die [X,2] der ||| as [X,1] the [X,2] the ||| 2.47712135315 2.53182387352 1.48463630676 ||| 0-0 2-2 4-4
-# [X] ||| so [X,1] die [X,2] der ||| is [X,1] the [X,2] of the ||| 2.47712135315 3.45197868347 2.64251494408 ||| 0-0 2-2 4-4 4-5
-
-die "Usage: $0 model1.f-e model1.e-f < grammar.scfg\n  (use trianing/model1 to extract the model files)\n" unless scalar @ARGV == 2;
-
-my $fm1 = shift @ARGV;
-die unless $fm1;
-my $frm1 = shift @ARGV;
-die unless $frm1;
-open M1,"<$fm1" or die;
-open RM1,"<$frm1" or die;
-print STDERR "Loading Model 1 probs from $fm1...\n";
-my %m1;
-while(<M1>) {
-  chomp;
-  my ($f, $e, $lp) = split /\s+/;
-  $m1{$e}->{$f} = exp($lp);
-}
-close M1;
-
-print STDERR "Loading Inverse Model 1 probs from $frm1...\n";
-my %rm1;
-while(<RM1>) {
-  chomp;
-  my ($e, $f, $lp) = split /\s+/;
-  $rm1{$f}->{$e} = exp($lp);
-}
-close RM1;
-
-my @label = qw( EGivenF LexFGivenE LexEGivenF );
-while(<>) {
-  chomp;
-  my ($l, $f, $e, $sscores, $al) = split / \|\|\| /;
-  my @scores = split /\s+/, $sscores;
-  unless ($sscores =~ /=/) {
-    for (my $i=0; $i<3; $i++) { $scores[$i] = "$label[$i]=$scores[$i]"; }
-  }
-  push @scores, "RuleCount=1";
-  my @fs = split /\s+/, $f;
-  my @es = split /\s+/, $e;
-  my $flen = scalar @fs;
-  my $elen = scalar @es;
-  my $pgen = 0;
-  my $nongen = 0;
-  for (my $i =0; $i < $flen; $i++) {
-    my $ftot = 0;
-    next if ($fs[$i] =~ /\[X/);
-    my $cr = $rm1{$fs[$i]};
-    for (my $j=0; $j <= $elen; $j++) {
-      my $ej = '<eps>';
-      if ($j < $elen) { $ej = $es[$j]; }
-      my $p = $cr->{$ej};
-      if (defined $p) { $ftot += $p; }
-    }
-    if ($ftot == 0) { $nongen = 1; last; }
-    $pgen += log($ftot) - log($elen);
-  }
-  my $bad = 0;
-  my $good = 0;
-  unless ($nongen) { push @scores, "RGood=1"; $good++; } else { push @scores, "RBad=1"; $bad++; }
-
-  $nongen = 0;
-  $pgen = 0;
-  for (my $i =0; $i < $elen; $i++) {
-    my $etot = 0;
-    next if ($es[$i] =~ /\[X/);
-    my $cr = $m1{$es[$i]};
-#    print STDERR "$es[$i]\n";
-    for (my $j=0; $j <= $flen; $j++) {
-      my $fj = '<eps>';
-      if ($j < $flen) { $fj = $fs[$j]; }
-      my $p = $cr->{$fj};
-#      print STDERR "  $fs[$j] : $p\n";
-      if (defined $p) { $etot += $p; }
-    }
-    if ($etot == 0) { $nongen = 1; last; }
-    $pgen += log($etot) - log($flen);
-  }
-  unless ($nongen) {
-    push @scores, "FGood=1";
-    if ($good) { push @scores, "BothGood=1"; } else { push @scores, "SusDel=1"; }
-  } else {
-    push @scores, "FBad=1";
-    if ($bad) { push @scores, "BothBad=1"; } else { push @scores, "SusHall=1"; }
-  }
-  print "$l ||| $f ||| $e ||| @scores";
-  if (defined $al) { print " ||| $al\n"; } else { print "\n"; }
-}
-
diff --git a/training/collapse_weights.cc b/training/collapse_weights.cc
deleted file mode 100644
index c03eb031..00000000
--- a/training/collapse_weights.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-char const* NOTES =
-  "ZF_and_E means unnormalized scaled features.\n"
-  "For grammars with one nonterminal: F_and_E is joint,\n"
-  "F_given_E and E_given_F are conditional.\n"
-  "TODO: group rules by root nonterminal and then normalize.\n";
-
-
-#include <iostream>
-#include <fstream>
-#include <tr1/unordered_map>
-
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-#include <boost/functional/hash.hpp>
-
-#include "prob.h"
-#include "filelib.h"
-#include "trule.h"
-#include "weights.h"
-
-namespace po = boost::program_options;
-using namespace std;
-
-typedef std::tr1::unordered_map<vector<WordID>, prob_t, boost::hash<vector<WordID> > > MarginalMap;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("grammar,g", po::value<string>(), "Grammar file")
-        ("weights,w", po::value<string>(), "Weights file")
-    ("unnormalized,u", "Always include ZF_and_E unnormalized score (default: only if sum was >1)")
-    ;
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config,c", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    const string cfg = (*conf)["config"].as<string>();
-    cerr << "Configuration file: " << cfg << endl;
-    ifstream config(cfg.c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help") || !conf->count("grammar") || !conf->count("weights")) {
-    cerr << dcmdline_options << endl;
-    cerr << NOTES << endl;
-    exit(1);
-  }
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-  const string wfile = conf["weights"].as<string>();
-  const string gfile = conf["grammar"].as<string>();
-  vector<weight_t> w;
-  Weights::InitFromFile(wfile, &w);
-  MarginalMap e_tots;
-  MarginalMap f_tots;
-  prob_t tot;
-  {
-    ReadFile rf(gfile);
-    assert(*rf.stream());
-    istream& in = *rf.stream();
-    cerr << "Computing marginals...\n";
-    int lc = 0;
-    while(in) {
-      string line;
-      getline(in, line);
-      ++lc;
-      if (line.empty()) continue;
-      TRule tr(line, true);
-      if (tr.GetFeatureValues().empty())
-        cerr << "Line " << lc << ": empty features - may introduce bias\n";
-      prob_t prob;
-      prob.logeq(tr.GetFeatureValues().dot(w));
-      e_tots[tr.e_] += prob;
-      f_tots[tr.f_] += prob;
-      tot += prob;
-    }
-  }
-  bool normalized = (fabs(log(tot)) < 0.001);
-  cerr << "Total: " << tot << (normalized ? " [normalized]" : " [scaled]") << endl;
-  ReadFile rf(gfile);
-  istream&in = *rf.stream();
-  while(in) {
-    string line;
-    getline(in, line);
-    if (line.empty()) continue;
-    TRule tr(line, true);
-    const double lp = tr.GetFeatureValues().dot(w);
-    if (std::isinf(lp)) { continue; }
-    tr.scores_.clear();
-
-    cout << tr.AsString() << " ||| F_and_E=" << lp - log(tot);
-    if (!normalized || conf.count("unnormalized")) {
-      cout << ";ZF_and_E=" << lp;
-    }
-    cout << ";F_given_E=" << lp - log(e_tots[tr.e_])
-         << ";E_given_F=" << lp - log(f_tots[tr.f_]) << endl;
-  }
-  return 0;
-}
-
diff --git a/training/crf/Makefile.am b/training/crf/Makefile.am
new file mode 100644
index 00000000..d203df25
--- /dev/null
+++ b/training/crf/Makefile.am
@@ -0,0 +1,27 @@
+bin_PROGRAMS = \
+  mpi_batch_optimize \
+  mpi_compute_cllh \
+  mpi_extract_features \
+  mpi_extract_reachable \
+  mpi_flex_optimize \
+  mpi_online_optimize
+
+mpi_online_optimize_SOURCES = mpi_online_optimize.cc
+mpi_online_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
+
+mpi_flex_optimize_SOURCES = mpi_flex_optimize.cc
+mpi_flex_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
+
+mpi_extract_reachable_SOURCES = mpi_extract_reachable.cc
+mpi_extract_reachable_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
+
+mpi_extract_features_SOURCES = mpi_extract_features.cc
+mpi_extract_features_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
+
+mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc cllh_observer.cc
+mpi_batch_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
+
+mpi_compute_cllh_SOURCES = mpi_compute_cllh.cc cllh_observer.cc
+mpi_compute_cllh_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
+
+AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir)/training -I$(top_srcdir)/training/utils -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/training/cllh_observer.cc b/training/crf/cllh_observer.cc
index 4ec2fa65..4ec2fa65 100644
--- a/training/cllh_observer.cc
+++ b/training/crf/cllh_observer.cc
diff --git a/training/cllh_observer.h b/training/crf/cllh_observer.h
index 0de47331..0de47331 100644
--- a/training/cllh_observer.h
+++ b/training/crf/cllh_observer.h
diff --git a/training/mpi_batch_optimize.cc b/training/crf/mpi_batch_optimize.cc
index 2eff07e4..2eff07e4 100644
--- a/training/mpi_batch_optimize.cc
+++ b/training/crf/mpi_batch_optimize.cc
diff --git a/training/mpi_compute_cllh.cc b/training/crf/mpi_compute_cllh.cc
index 066389d0..066389d0 100644
--- a/training/mpi_compute_cllh.cc
+++ b/training/crf/mpi_compute_cllh.cc
diff --git a/training/mpi_extract_features.cc b/training/crf/mpi_extract_features.cc
index 6750aa15..6750aa15 100644
--- a/training/mpi_extract_features.cc
+++ b/training/crf/mpi_extract_features.cc
diff --git a/training/mpi_extract_reachable.cc b/training/crf/mpi_extract_reachable.cc
index 2a7c2b9d..2a7c2b9d 100644
--- a/training/mpi_extract_reachable.cc
+++ b/training/crf/mpi_extract_reachable.cc
diff --git a/training/mpi_flex_optimize.cc b/training/crf/mpi_flex_optimize.cc
index b52decdc..b52decdc 100644
--- a/training/mpi_flex_optimize.cc
+++ b/training/crf/mpi_flex_optimize.cc
diff --git a/training/mpi_online_optimize.cc b/training/crf/mpi_online_optimize.cc
index d6968848..d6968848 100644
--- a/training/mpi_online_optimize.cc
+++ b/training/crf/mpi_online_optimize.cc
diff --git a/training/dep-reorder/conll2reordering-forest.pl b/training/dep-reorder/conll2reordering-forest.pl
deleted file mode 100755
index 3cd226be..00000000
--- a/training/dep-reorder/conll2reordering-forest.pl
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-my $script_dir; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
-my $FIRST_CONV = "$script_dir/scripts/conll2simplecfg.pl";
-my $CDEC = "$script_dir/../../decoder/cdec";
-
-our $tfile1 = "grammar1.$$";
-our $tfile2 = "text.$$";
-
-die "Usage: $0 parses.conll\n" unless scalar @ARGV == 1;
-open C, "<$ARGV[0]" or die "Can't read $ARGV[0]: $!";
-
-END { unlink $tfile1; unlink "$tfile1.cfg"; unlink $tfile2; }
-
-my $first = 1;
-open T, ">$tfile1" or die "Can't write $tfile1: $!";
-my $lc = 0;
-my $flag = 0;
-my @words = ();
-while(<C>) {
-  print T;
-  chomp;
-  if (/^$/) {
-    if ($first) { $first = undef; } else { if ($flag) { print "\n"; $flag = 0; } }
-    $first = undef;
-    close T;
-    open SO, ">$tfile2" or die "Can't write $tfile2: $!";
-    print SO "@words\n";
-    close SO;
-    @words=();
-    `$FIRST_CONV < $tfile1 > $tfile1.cfg`;
-    if ($? != 0) {
-      die "Error code: $?";
-    }
-    my $cfg = `$CDEC -n -S 10000 -f scfg -g $tfile1.cfg -i $tfile2 --show_cfg_search_space 2>/dev/null`;
-    if ($? != 0) {
-      die "Error code: $?";
-    }
-    my @rules = split /\n/, $cfg;
-    shift @rules; # get rid of output
-    for my $rule (@rules) {
-      my ($lhs, $f, $e, $feats) = split / \|\|\| /, $rule;
-      $f =~ s/,\d\]/\]/g;
-      $feats = 'TOP=1' unless $feats;
-      if ($lhs =~ /\[Goal_\d+\]/) { $lhs = '[S]'; }
-      print "$lhs ||| $f ||| $feats\n";
-      if ($e eq '[1] [2]') {
-        my ($a, $b) = split /\s+/, $f;
-        $feats =~ s/=1$//;
-        my ($x, $y) = split /_/, $feats;
-        print "$lhs ||| $b $a ||| ${y}_$x=1\n";
-      }
-      $flag = 1;
-    }
-    open T, ">$tfile1" or die "Can't write $tfile1: $!";
-    $lc = -1;
-  } else {
-    my ($ind, $word, @dmmy) = split /\s+/;
-    push @words, $word;
-  }
-  $lc++;
-}
-close T;
-
diff --git a/training/dep-reorder/george.conll b/training/dep-reorder/george.conll
deleted file mode 100644
index 7eebb360..00000000
--- a/training/dep-reorder/george.conll
+++ /dev/null
@@ -1,4 +0,0 @@
-1	George	_	GEORGE	_	_	2	X	_	_
-2	hates	_	HATES	_	_	0	X	_	_
-3	broccoli	_	BROC	_	_	2	X	_	_
-
diff --git a/training/dep-reorder/scripts/conll2simplecfg.pl b/training/dep-reorder/scripts/conll2simplecfg.pl
deleted file mode 100755
index b101347a..00000000
--- a/training/dep-reorder/scripts/conll2simplecfg.pl
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-# 1	在	_	10	_	_	4	X	_	_
-# 2	门厅	_	3	_	_	1	X	_	_
-# 3	下面	_	23	_	_	4	X	_	_
-# 4	。	_	45	_	_	0	X	_	_
-
-my @ldeps;
-my @rdeps;
-@ldeps=(); for (my $i =0; $i <1000; $i++) { push @ldeps, []; }
-@rdeps=(); for (my $i =0; $i <1000; $i++) { push @rdeps, []; }
-my $rootcat = 0;
-my @cats = ('S');
-my $len = 0;
-my @noposcats = ('S');
-while(<>) {
-  chomp;
-  if (/^\s*$/) {
-    write_cfg($len);
-    $len = 0;
-    @cats=('S');
-    @noposcats = ('S');
-    @ldeps=(); for (my $i =0; $i <1000; $i++) { push @ldeps, []; }
-    @rdeps=(); for (my $i =0; $i <1000; $i++) { push @rdeps, []; }
-    next;
-  }
-  $len++;
-  my ($pos, $word, $d1, $xcat, $d2, $d3, $headpos, $deptype) = split /\s+/;
-  my $cat = "C$xcat";
-  my $catpos = $cat . "_$pos";
-  push @cats, $catpos;
-  push @noposcats, $cat;
-  print "[$catpos] ||| $word ||| $word ||| Word=1\n";
-  if ($headpos == 0) { $rootcat = $pos; }
-  if ($pos < $headpos) {
-    push @{$ldeps[$headpos]}, $pos;
-  } else {
-    push @{$rdeps[$headpos]}, $pos;
-  }
-}
-
-sub write_cfg {
-  my $len = shift;
-  for (my $i = 1; $i <= $len; $i++) {
-    my @lds = @{$ldeps[$i]};
-    for my $ld (@lds) {
-      print "[$cats[$i]] ||| [$cats[$ld],1] [$cats[$i],2] ||| [1] [2] ||| $noposcats[$ld]_$noposcats[$i]=1\n";
-    }
-    my @rds = @{$rdeps[$i]};
-    for my $rd (@rds) {
-      print "[$cats[$i]] ||| [$cats[$i],1] [$cats[$rd],2] ||| [1] [2] ||| $noposcats[$i]_$noposcats[$rd]=1\n";
-    }
-  }
-  print "[S] ||| [$cats[$rootcat],1] ||| [1] ||| TOP=1\n";
-}
-
diff --git a/dpmert/Makefile.am b/training/dpmert/Makefile.am
index 00768271..ff318bef 100644
--- a/dpmert/Makefile.am
+++ b/training/dpmert/Makefile.am
@@ -1,20 +1,12 @@
 bin_PROGRAMS = \
   mr_dpmert_map \
   mr_dpmert_reduce \
-  mr_dpmert_generate_mapper_input \
-  sentserver \
-  sentclient
+  mr_dpmert_generate_mapper_input
 
 noinst_PROGRAMS = \
   lo_test
 TESTS = lo_test
 
-sentserver_SOURCES = sentserver.c
-sentserver_LDFLAGS = -pthread
-
-sentclient_SOURCES = sentclient.c
-sentclient_LDFLAGS = -pthread
-
 mr_dpmert_generate_mapper_input_SOURCES = mr_dpmert_generate_mapper_input.cc line_optimizer.cc
 mr_dpmert_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
 
diff --git a/dpmert/ces.cc b/training/dpmert/ces.cc
index 157b2d17..157b2d17 100644
--- a/dpmert/ces.cc
+++ b/training/dpmert/ces.cc
diff --git a/dpmert/ces.h b/training/dpmert/ces.h
index e4fa2080..e4fa2080 100644
--- a/dpmert/ces.h
+++ b/training/dpmert/ces.h
diff --git a/dpmert/divide_refs.py b/training/dpmert/divide_refs.py
index b478f918..b478f918 100755
--- a/dpmert/divide_refs.py
+++ b/training/dpmert/divide_refs.py
diff --git a/dpmert/dpmert.pl b/training/dpmert/dpmert.pl
index c4f98870..559420f5 100755
--- a/dpmert/dpmert.pl
+++ b/training/dpmert/dpmert.pl
@@ -2,7 +2,7 @@
 use strict;
 my @ORIG_ARGV=@ARGV;
 use Cwd qw(getcwd);
-my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; }
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment", "$SCRIPT_DIR/../utils"; }
 
 # Skip local config (used for distributing jobs) if we're running in local-only mode
 use LocalConfig;
@@ -17,21 +17,22 @@ my $srcFile;  # deprecated
 my $refFiles; # deprecated
 my $default_jobs = env_default_jobs();
 my $bin_dir = $SCRIPT_DIR;
+my $util_dir = "$SCRIPT_DIR/../utils";
 die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
-my $FAST_SCORE="$bin_dir/../mteval/fast_score";
+my $FAST_SCORE="$bin_dir/../../mteval/fast_score";
 die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
 my $MAPINPUT = "$bin_dir/mr_dpmert_generate_mapper_input";
 my $MAPPER = "$bin_dir/mr_dpmert_map";
 my $REDUCER = "$bin_dir/mr_dpmert_reduce";
-my $parallelize = "$bin_dir/parallelize.pl";
-my $libcall = "$bin_dir/libcall.pl";
-my $sentserver = "$bin_dir/sentserver";
-my $sentclient = "$bin_dir/sentclient";
-my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm";
+my $parallelize = "$util_dir/parallelize.pl";
+my $libcall = "$util_dir/libcall.pl";
+my $sentserver = "$util_dir/sentserver";
+my $sentclient = "$util_dir/sentclient";
+my $LocalConfig = "$SCRIPT_DIR/../../environment/LocalConfig.pm";
 
 my $SCORER = $FAST_SCORE;
 die "Can't find $MAPPER" unless -x $MAPPER;
-my $cdec = "$bin_dir/../decoder/cdec";
+my $cdec = "$bin_dir/../../decoder/cdec";
 die "Can't find decoder in $cdec" unless -x $cdec;
 die "Can't find $parallelize" unless -x $parallelize;
 die "Can't find $libcall" unless -e $libcall;
diff --git a/dpmert/error_surface.cc b/training/dpmert/error_surface.cc
index 515b67f8..515b67f8 100644
--- a/dpmert/error_surface.cc
+++ b/training/dpmert/error_surface.cc
diff --git a/dpmert/error_surface.h b/training/dpmert/error_surface.h
index bb65847b..bb65847b 100644
--- a/dpmert/error_surface.h
+++ b/training/dpmert/error_surface.h
diff --git a/dpmert/line_mediator.pl b/training/dpmert/line_mediator.pl
index bc2bb24c..bc2bb24c 100755
--- a/dpmert/line_mediator.pl
+++ b/training/dpmert/line_mediator.pl
diff --git a/dpmert/line_optimizer.cc b/training/dpmert/line_optimizer.cc
index 9cf33502..9cf33502 100644
--- a/dpmert/line_optimizer.cc
+++ b/training/dpmert/line_optimizer.cc
diff --git a/dpmert/line_optimizer.h b/training/dpmert/line_optimizer.h
index 83819f41..83819f41 100644
--- a/dpmert/line_optimizer.h
+++ b/training/dpmert/line_optimizer.h
diff --git a/dpmert/lo_test.cc b/training/dpmert/lo_test.cc
index 95a08d3d..95a08d3d 100644
--- a/dpmert/lo_test.cc
+++ b/training/dpmert/lo_test.cc
diff --git a/dpmert/mert_geometry.cc b/training/dpmert/mert_geometry.cc
index d6973658..d6973658 100644
--- a/dpmert/mert_geometry.cc
+++ b/training/dpmert/mert_geometry.cc
diff --git a/dpmert/mert_geometry.h b/training/dpmert/mert_geometry.h
index a8b6959e..a8b6959e 100644
--- a/dpmert/mert_geometry.h
+++ b/training/dpmert/mert_geometry.h
diff --git a/dpmert/mr_dpmert_generate_mapper_input.cc b/training/dpmert/mr_dpmert_generate_mapper_input.cc
index 199cd23a..199cd23a 100644
--- a/dpmert/mr_dpmert_generate_mapper_input.cc
+++ b/training/dpmert/mr_dpmert_generate_mapper_input.cc
diff --git a/dpmert/mr_dpmert_map.cc b/training/dpmert/mr_dpmert_map.cc
index d1efcf96..d1efcf96 100644
--- a/dpmert/mr_dpmert_map.cc
+++ b/training/dpmert/mr_dpmert_map.cc
diff --git a/dpmert/mr_dpmert_reduce.cc b/training/dpmert/mr_dpmert_reduce.cc
index 31512a03..31512a03 100644
--- a/dpmert/mr_dpmert_reduce.cc
+++ b/training/dpmert/mr_dpmert_reduce.cc
diff --git a/dpmert/test_aer/README b/training/dpmert/test_aer/README
index 819b2e32..819b2e32 100644
--- a/dpmert/test_aer/README
+++ b/training/dpmert/test_aer/README
diff --git a/dpmert/test_aer/cdec.ini b/training/dpmert/test_aer/cdec.ini
index 08187848..08187848 100644
--- a/dpmert/test_aer/cdec.ini
+++ b/training/dpmert/test_aer/cdec.ini
diff --git a/dpmert/test_aer/corpus.src b/training/dpmert/test_aer/corpus.src
index 31b23971..31b23971 100644
--- a/dpmert/test_aer/corpus.src
+++ b/training/dpmert/test_aer/corpus.src
diff --git a/dpmert/test_aer/grammar b/training/dpmert/test_aer/grammar
index 9d857824..9d857824 100644
--- a/dpmert/test_aer/grammar
+++ b/training/dpmert/test_aer/grammar
diff --git a/dpmert/test_aer/ref.0 b/training/dpmert/test_aer/ref.0
index 734a9c5b..734a9c5b 100644
--- a/dpmert/test_aer/ref.0
+++ b/training/dpmert/test_aer/ref.0
diff --git a/dpmert/test_aer/weights b/training/dpmert/test_aer/weights
index afc9282e..afc9282e 100644
--- a/dpmert/test_aer/weights
+++ b/training/dpmert/test_aer/weights
diff --git a/dpmert/test_data/0.json.gz b/training/dpmert/test_data/0.json.gz
index 30f8dd77..30f8dd77 100644
--- a/dpmert/test_data/0.json.gz
+++ b/training/dpmert/test_data/0.json.gz
diff --git a/dpmert/test_data/1.json.gz b/training/dpmert/test_data/1.json.gz
index c82cc179..c82cc179 100644
--- a/dpmert/test_data/1.json.gz
+++ b/training/dpmert/test_data/1.json.gz
diff --git a/dpmert/test_data/c2e.txt.0 b/training/dpmert/test_data/c2e.txt.0
index 12c4abe9..12c4abe9 100644
--- a/dpmert/test_data/c2e.txt.0
+++ b/training/dpmert/test_data/c2e.txt.0
diff --git a/dpmert/test_data/c2e.txt.1 b/training/dpmert/test_data/c2e.txt.1
index 4ac12df1..4ac12df1 100644
--- a/dpmert/test_data/c2e.txt.1
+++ b/training/dpmert/test_data/c2e.txt.1
diff --git a/dpmert/test_data/c2e.txt.2 b/training/dpmert/test_data/c2e.txt.2
index 2f67b72f..2f67b72f 100644
--- a/dpmert/test_data/c2e.txt.2
+++ b/training/dpmert/test_data/c2e.txt.2
diff --git a/dpmert/test_data/c2e.txt.3 b/training/dpmert/test_data/c2e.txt.3
index 5483cef6..5483cef6 100644
--- a/dpmert/test_data/c2e.txt.3
+++ b/training/dpmert/test_data/c2e.txt.3
diff --git a/dpmert/test_data/re.txt.0 b/training/dpmert/test_data/re.txt.0
index 86eff087..86eff087 100644
--- a/dpmert/test_data/re.txt.0
+++ b/training/dpmert/test_data/re.txt.0
diff --git a/dpmert/test_data/re.txt.1 b/training/dpmert/test_data/re.txt.1
index 2140f198..2140f198 100644
--- a/dpmert/test_data/re.txt.1
+++ b/training/dpmert/test_data/re.txt.1
diff --git a/dpmert/test_data/re.txt.2 b/training/dpmert/test_data/re.txt.2
index 94e46286..94e46286 100644
--- a/dpmert/test_data/re.txt.2
+++ b/training/dpmert/test_data/re.txt.2
diff --git a/dpmert/test_data/re.txt.3 b/training/dpmert/test_data/re.txt.3
index f87c3308..f87c3308 100644
--- a/dpmert/test_data/re.txt.3
+++ b/training/dpmert/test_data/re.txt.3
diff --git a/dtrain/Makefile.am b/training/dtrain/Makefile.am
index ca9581f5..5b48e756 100644
--- a/dtrain/Makefile.am
+++ b/training/dtrain/Makefile.am
@@ -1,7 +1,7 @@
 bin_PROGRAMS = dtrain
 
 dtrain_SOURCES = dtrain.cc score.cc
-dtrain_LDADD   = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
+dtrain_LDADD   = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
 
 AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
 
diff --git a/dtrain/README.md b/training/dtrain/README.md
index 7edabbf1..7edabbf1 100644
--- a/dtrain/README.md
+++ b/training/dtrain/README.md
diff --git a/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index 18286668..18286668 100644
--- a/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
diff --git a/dtrain/dtrain.h b/training/dtrain/dtrain.h
index 4b6f415c..4b6f415c 100644
--- a/dtrain/dtrain.h
+++ b/training/dtrain/dtrain.h
diff --git a/dtrain/hstreaming/avg.rb b/training/dtrain/hstreaming/avg.rb
index 2599c732..2599c732 100755
--- a/dtrain/hstreaming/avg.rb
+++ b/training/dtrain/hstreaming/avg.rb
diff --git a/dtrain/hstreaming/cdec.ini b/training/dtrain/hstreaming/cdec.ini
index d4f5cecd..d4f5cecd 100644
--- a/dtrain/hstreaming/cdec.ini
+++ b/training/dtrain/hstreaming/cdec.ini
diff --git a/dtrain/hstreaming/dtrain.ini b/training/dtrain/hstreaming/dtrain.ini
index a2c219a1..a2c219a1 100644
--- a/dtrain/hstreaming/dtrain.ini
+++ b/training/dtrain/hstreaming/dtrain.ini
diff --git a/dtrain/hstreaming/dtrain.sh b/training/dtrain/hstreaming/dtrain.sh
index 877ff94c..877ff94c 100755
--- a/dtrain/hstreaming/dtrain.sh
+++ b/training/dtrain/hstreaming/dtrain.sh
diff --git a/dtrain/hstreaming/hadoop-streaming-job.sh b/training/dtrain/hstreaming/hadoop-streaming-job.sh
index 92419956..92419956 100755
--- a/dtrain/hstreaming/hadoop-streaming-job.sh
+++ b/training/dtrain/hstreaming/hadoop-streaming-job.sh
diff --git a/dtrain/hstreaming/lplp.rb b/training/dtrain/hstreaming/lplp.rb
index f0cd58c5..f0cd58c5 100755
--- a/dtrain/hstreaming/lplp.rb
+++ b/training/dtrain/hstreaming/lplp.rb
diff --git a/dtrain/hstreaming/red-test b/training/dtrain/hstreaming/red-test
index 2623d697..2623d697 100644
--- a/dtrain/hstreaming/red-test
+++ b/training/dtrain/hstreaming/red-test
diff --git a/dtrain/kbestget.h b/training/dtrain/kbestget.h
index dd8882e1..dd8882e1 100644
--- a/dtrain/kbestget.h
+++ b/training/dtrain/kbestget.h
diff --git a/dtrain/ksampler.h b/training/dtrain/ksampler.h
index bc2f56cd..bc2f56cd 100644
--- a/dtrain/ksampler.h
+++ b/training/dtrain/ksampler.h
diff --git a/dtrain/pairsampling.h b/training/dtrain/pairsampling.h
index 84be1efb..84be1efb 100644
--- a/dtrain/pairsampling.h
+++ b/training/dtrain/pairsampling.h
diff --git a/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index 1d277ff6..1d277ff6 100755
--- a/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
diff --git a/dtrain/parallelize/test/cdec.ini b/training/dtrain/parallelize/test/cdec.ini
index 72e99dc5..72e99dc5 100644
--- a/dtrain/parallelize/test/cdec.ini
+++ b/training/dtrain/parallelize/test/cdec.ini
diff --git a/dtrain/parallelize/test/dtrain.ini b/training/dtrain/parallelize/test/dtrain.ini
index 03f9d240..03f9d240 100644
--- a/dtrain/parallelize/test/dtrain.ini
+++ b/training/dtrain/parallelize/test/dtrain.ini
diff --git a/dtrain/parallelize/test/in b/training/dtrain/parallelize/test/in
index a312809f..a312809f 100644
--- a/dtrain/parallelize/test/in
+++ b/training/dtrain/parallelize/test/in
diff --git a/dtrain/parallelize/test/refs b/training/dtrain/parallelize/test/refs
index 4d3128cb..4d3128cb 100644
--- a/dtrain/parallelize/test/refs
+++ b/training/dtrain/parallelize/test/refs
diff --git a/dtrain/score.cc b/training/dtrain/score.cc
index 34fc86a9..34fc86a9 100644
--- a/dtrain/score.cc
+++ b/training/dtrain/score.cc
diff --git a/dtrain/score.h b/training/dtrain/score.h
index f317c903..f317c903 100644
--- a/dtrain/score.h
+++ b/training/dtrain/score.h
diff --git a/dtrain/test/example/README b/training/dtrain/test/example/README
index 6937b11b..6937b11b 100644
--- a/dtrain/test/example/README
+++ b/training/dtrain/test/example/README
diff --git a/dtrain/test/example/cdec.ini b/training/dtrain/test/example/cdec.ini
index d5955f0e..d5955f0e 100644
--- a/dtrain/test/example/cdec.ini
+++ b/training/dtrain/test/example/cdec.ini
diff --git a/dtrain/test/example/dtrain.ini b/training/dtrain/test/example/dtrain.ini
index 72d50ca1..72d50ca1 100644
--- a/dtrain/test/example/dtrain.ini
+++ b/training/dtrain/test/example/dtrain.ini
diff --git a/dtrain/test/example/expected-output b/training/dtrain/test/example/expected-output
index 05326763..05326763 100644
--- a/dtrain/test/example/expected-output
+++ b/training/dtrain/test/example/expected-output
diff --git a/dtrain/test/parallelize/cdec.ini b/training/dtrain/test/parallelize/cdec.ini
index 72e99dc5..72e99dc5 100644
--- a/dtrain/test/parallelize/cdec.ini
+++ b/training/dtrain/test/parallelize/cdec.ini
diff --git a/dtrain/test/parallelize/dtrain.ini b/training/dtrain/test/parallelize/dtrain.ini
index 03f9d240..03f9d240 100644
--- a/dtrain/test/parallelize/dtrain.ini
+++ b/training/dtrain/test/parallelize/dtrain.ini
diff --git a/dtrain/test/parallelize/in b/training/dtrain/test/parallelize/in
index a312809f..a312809f 100644
--- a/dtrain/test/parallelize/in
+++ b/training/dtrain/test/parallelize/in
diff --git a/dtrain/test/parallelize/refs b/training/dtrain/test/parallelize/refs
index 4d3128cb..4d3128cb 100644
--- a/dtrain/test/parallelize/refs
+++ b/training/dtrain/test/parallelize/refs
diff --git a/dtrain/test/toy/cdec.ini b/training/dtrain/test/toy/cdec.ini
index 98b02d44..98b02d44 100644
--- a/dtrain/test/toy/cdec.ini
+++ b/training/dtrain/test/toy/cdec.ini
diff --git a/dtrain/test/toy/dtrain.ini b/training/dtrain/test/toy/dtrain.ini
index a091732f..a091732f 100644
--- a/dtrain/test/toy/dtrain.ini
+++ b/training/dtrain/test/toy/dtrain.ini
diff --git a/dtrain/test/toy/input b/training/dtrain/test/toy/input
index 4d10a9ea..4d10a9ea 100644
--- a/dtrain/test/toy/input
+++ b/training/dtrain/test/toy/input
diff --git a/training/feature_expectations.cc b/training/feature_expectations.cc
deleted file mode 100644
index f1a85495..00000000
--- a/training/feature_expectations.cc
+++ /dev/null
@@ -1,232 +0,0 @@
-#include <sstream>
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <cassert>
-#include <cmath>
-#include <tr1/memory>
-
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "verbose.h"
-#include "hg.h"
-#include "prob.h"
-#include "inside_outside.h"
-#include "ff_register.h"
-#include "decoder.h"
-#include "filelib.h"
-#include "online_optimizer.h"
-#include "fdict.h"
-#include "weights.h"
-#include "sparse_vector.h"
-#include "sampler.h"
-
-#ifdef HAVE_MPI
-#include <boost/mpi/timer.hpp>
-#include <boost/mpi.hpp>
-namespace mpi = boost::mpi;
-#endif
-
-using namespace std;
-namespace po = boost::program_options;
-
-struct FComp {
-  const vector<double>& w_;
-  FComp(const vector<double>& w) : w_(w) {}
-  bool operator()(int a, int b) const {
-    return fabs(w_[a]) > fabs(w_[b]);
-  }
-};
-
-void ShowFeatures(const vector<double>& w) {
-  vector<int> fnums(w.size());
-  for (int i = 0; i < w.size(); ++i)
-    fnums[i] = i;
-  sort(fnums.begin(), fnums.end(), FComp(w));
-  for (vector<int>::iterator i = fnums.begin(); i != fnums.end(); ++i) {
-    if (w[*i]) cout << FD::Convert(*i) << ' ' << w[*i] << endl;
-  }
-}
-
-void ReadConfig(const string& ini, vector<string>* out) {
-  ReadFile rf(ini);
-  istream& in = *rf.stream();
-  while(in) {
-    string line;
-    getline(in, line);
-    if (!in) continue;
-    out->push_back(line);
-  }
-}
-
-void StoreConfig(const vector<string>& cfg, istringstream* o) {
-  ostringstream os;
-  for (int i = 0; i < cfg.size(); ++i) { os << cfg[i] << endl; }
-  o->str(os.str());
-}
-
-bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("input,i",po::value<string>(),"Corpus of source language sentences")
-        ("weights,w",po::value<string>(),"Input feature weights file")
-        ("decoder_config,c",po::value<string>(), "cdec.ini file");
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help") || !conf->count("input") || !conf->count("decoder_config")) {
-    cerr << dcmdline_options << endl;
-    return false;
-  }
-  return true;
-}
-
-void ReadTrainingCorpus(const string& fname, int rank, int size, vector<string>* c, vector<int>* order) {
-  ReadFile rf(fname);
-  istream& in = *rf.stream();
-  string line;
-  int id = 0;
-  while(in) {
-    getline(in, line);
-    if (!in) break;
-    if (id % size == rank) {
-      c->push_back(line);
-      order->push_back(id);
-    }
-    ++id;
-  }
-}
-
-static const double kMINUS_EPSILON = -1e-6;
-
-struct TrainingObserver : public DecoderObserver {
-  void Reset() {
-    acc_exp.clear();
-    total_complete = 0;
-  } 
-
-  virtual void NotifyDecodingStart(const SentenceMetadata& smeta) {
-    cur_model_exp.clear();
-    state = 1;
-  }
-
-  // compute model expectations, denominator of objective
-  virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) {
-    assert(state == 1);
-    state = 2;
-    const prob_t z = InsideOutside<prob_t,
-                                   EdgeProb,
-                                   SparseVector<prob_t>,
-                                   EdgeFeaturesAndProbWeightFunction>(*hg, &cur_model_exp);
-    cur_model_exp /= z;
-    acc_exp += cur_model_exp;
-  }
-
-  virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) {
-    cerr << "IGNORING ALIGNMENT FOREST!\n";
-  }
-
-  virtual void NotifyDecodingComplete(const SentenceMetadata& smeta) {
-    if (state == 2) {
-      ++total_complete;
-    }
-  }
-
-  void GetExpectations(SparseVector<double>* g) const {
-    g->clear();
-    for (SparseVector<prob_t>::const_iterator it = acc_exp.begin(); it != acc_exp.end(); ++it)
-      g->set_value(it->first, it->second);
-  }
-
-  int total_complete;
-  SparseVector<prob_t> cur_model_exp;
-  SparseVector<prob_t> acc_exp;
-  int state;
-};
-
-#ifdef HAVE_MPI
-namespace boost { namespace mpi {
-  template<>
-  struct is_commutative<std::plus<SparseVector<double> >, SparseVector<double> > 
-    : mpl::true_ { };
-} } // end namespace boost::mpi
-#endif
-
-int main(int argc, char** argv) {
-#ifdef HAVE_MPI
-  mpi::environment env(argc, argv);
-  mpi::communicator world;
-  const int size = world.size(); 
-  const int rank = world.rank();
-#else
-  const int size = 1;
-  const int rank = 0;
-#endif
-  if (size > 1) SetSilent(true);  // turn off verbose decoder output
-  register_feature_functions();
-
-  po::variables_map conf;
-  if (!InitCommandLine(argc, argv, &conf))
-    return 1;
-
-  // load initial weights
-  Weights weights;
-  if (conf.count("weights"))
-    weights.InitFromFile(conf["weights"].as<string>());
-
-  vector<string> corpus;
-  vector<int> ids;
-  ReadTrainingCorpus(conf["input"].as<string>(), rank, size, &corpus, &ids);
-  assert(corpus.size() > 0);
-
-  vector<string> cdec_ini;
-  ReadConfig(conf["decoder_config"].as<string>(), &cdec_ini);
-  istringstream ini;
-  StoreConfig(cdec_ini, &ini);
-  Decoder decoder(&ini);
-  if (decoder.GetConf()["input"].as<string>() != "-") {
-    cerr << "cdec.ini must not set an input file\n";
-    return 1;
-  }
-
-  SparseVector<double> x;
-  weights.InitSparseVector(&x);
-  TrainingObserver observer;
-
-  weights.InitFromVector(x);
-  vector<double> lambdas;
-  weights.InitVector(&lambdas);
-  decoder.SetWeights(lambdas);
-  observer.Reset();
-  for (unsigned i = 0; i < corpus.size(); ++i) {
-    int id = ids[i];
-    decoder.SetId(id);
-    decoder.Decode(corpus[i], &observer);
-  }
-  SparseVector<double> local_exps, exps;
-  observer.GetExpectations(&local_exps);
-#ifdef HAVE_MPI
-  reduce(world, local_exps, exps, std::plus<SparseVector<double> >(), 0);
-#else
-  exps.swap(local_exps);
-#endif
-
-  weights.InitFromVector(exps);
-  weights.InitVector(&lambdas);
-  ShowFeatures(lambdas);
-
-  return 0;
-}
diff --git a/training/lbl_model.cc b/training/lbl_model.cc
deleted file mode 100644
index a46ce33c..00000000
--- a/training/lbl_model.cc
+++ /dev/null
@@ -1,421 +0,0 @@
-#include <iostream>
-
-#include "config.h"
-#ifndef HAVE_EIGEN
-  int main() { std::cerr << "Please rebuild with --with-eigen PATH\n"; return 1; }
-#else
-
-#include <cstdlib>
-#include <algorithm>
-#include <cmath>
-#include <set>
-#include <cstring> // memset
-#include <ctime>
-
-#ifdef HAVE_MPI
-#include <boost/mpi/timer.hpp>
-#include <boost/mpi.hpp>
-#include <boost/archive/text_oarchive.hpp>
-namespace mpi = boost::mpi;
-#endif
-#include <boost/math/special_functions/fpclassify.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-#include <Eigen/Dense>
-
-#include "corpus_tools.h"
-#include "optimize.h"
-#include "array2d.h"
-#include "m.h"
-#include "lattice.h"
-#include "stringlib.h"
-#include "filelib.h"
-#include "tdict.h"
-
-namespace po = boost::program_options;
-using namespace std;
-
-#define kDIMENSIONS 10
-typedef Eigen::Matrix<double, kDIMENSIONS, 1> RVector;
-typedef Eigen::Matrix<double, 1, kDIMENSIONS> RTVector;
-typedef Eigen::Matrix<double, kDIMENSIONS, kDIMENSIONS> TMatrix;
-vector<RVector> r_src, r_trg;
-
-#if HAVE_MPI
-namespace boost {
-namespace serialization {
-
-template<class Archive>
-void serialize(Archive & ar, RVector & v, const unsigned int version) {
-  for (unsigned i = 0; i < kDIMENSIONS; ++i)
-    ar & v[i];
-}
-
-} // namespace serialization
-} // namespace boost
-#endif
-
-bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("input,i",po::value<string>(),"Input file")
-        ("iterations,I",po::value<unsigned>()->default_value(1000),"Number of iterations of training")
-        ("regularization_strength,C",po::value<double>()->default_value(0.1),"L2 regularization strength (0 for no regularization)")
-        ("eta", po::value<double>()->default_value(0.1f), "Eta for SGD")
-        ("source_embeddings,f", po::value<string>(), "File containing source embeddings (if unset, random vectors will be used)")
-        ("target_embeddings,e", po::value<string>(), "File containing target embeddings (if unset, random vectors will be used)")
-        ("random_seed,s", po::value<unsigned>(), "Random seed")
-        ("diagonal_tension,T", po::value<double>()->default_value(4.0), "How sharp or flat around the diagonal is the alignment distribution (0 = uniform, >0 sharpens)")
-        ("testset,x", po::value<string>(), "After training completes, compute the log likelihood of this set of sentence pairs under the learned model");
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (argc < 2 || conf->count("help")) {
-    cerr << "Usage " << argv[0] << " [OPTIONS] -i corpus.fr-en\n";
-    cerr << dcmdline_options << endl;
-    return false;
-  }
-  return true;
-}
-
-void Normalize(RVector* v) {
-  double norm = v->norm();
-  assert(norm > 0.0f);
-  *v /= norm;
-}
-
-void Flatten(const TMatrix& m, vector<double>* v) {
-  unsigned c = 0;
-  v->resize(kDIMENSIONS * kDIMENSIONS);
-  for (unsigned i = 0; i < kDIMENSIONS; ++i)
-    for (unsigned j = 0; j < kDIMENSIONS; ++j) {
-      assert(boost::math::isfinite(m(i, j)));
-      (*v)[c++] = m(i,j);
-    }
-}
-
-void Unflatten(const vector<double>& v, TMatrix* m) {
-  unsigned c = 0;
-  for (unsigned i = 0; i < kDIMENSIONS; ++i)
-    for (unsigned j = 0; j < kDIMENSIONS; ++j) {
-      assert(boost::math::isfinite(v[c]));
-      (*m)(i, j) = v[c++];
-    }
-}
-
-double ApplyRegularization(const double C,
-                           const vector<double>& weights,
-                           vector<double>* g) {
-  assert(weights.size() == g->size());
-  double reg = 0;
-  for (size_t i = 0; i < weights.size(); ++i) {
-    const double& w_i = weights[i];
-    double& g_i = (*g)[i];
-    reg += C * w_i * w_i;
-    g_i += 2 * C * w_i;
-  }
-  return reg;
-}
-
-void LoadEmbeddings(const string& filename, vector<RVector>* pv) {
-  vector<RVector>& v = *pv;
-  cerr << "Reading embeddings from " << filename << " ...\n";
-  ReadFile rf(filename);
-  istream& in = *rf.stream();
-  string line;
-  unsigned lc = 0;
-  while(getline(in, line)) {
-    ++lc;
-    size_t cur = line.find(' ');
-    if (cur == string::npos || cur == 0) {
-      cerr << "Parse error reading line " << lc << ":\n" << line << endl;
-      abort();
-    }
-    WordID w = TD::Convert(line.substr(0, cur));
-    if (w >= v.size()) continue;
-    RVector& curv = v[w];
-    line[cur] = 0;
-    size_t start = cur + 1;
-    cur = start + 1;
-    size_t c = 0;
-    while(cur < line.size()) {
-      if (line[cur] == ' ') {
-        line[cur] = 0;
-        curv[c++] = strtod(&line[start], NULL);
-        start = cur + 1;
-        cur = start;
-        if (c == kDIMENSIONS) break;
-      }
-      ++cur;
-    }
-    if (c < kDIMENSIONS && cur != start) {
-      if (cur < line.size()) line[cur] = 0;
-      curv[c++] = strtod(&line[start], NULL);
-    }
-    if (c != kDIMENSIONS) {
-      static bool first = true;
-      if (first) {
-        cerr << " read " << c << " dimensions from embedding file, but built with " << kDIMENSIONS << " (filling in with random values)\n";
-        first = false;
-      }
-      for (; c < kDIMENSIONS; ++c) curv[c] = rand();
-    }
-    if (c == kDIMENSIONS && cur != line.size()) {
-      static bool first = true;
-      if (first) {
-        cerr << " embedding file contains more dimensions than configured with, truncating.\n";
-        first = false;
-      }
-    }
-  }
-}
-
-int main(int argc, char** argv) {
-#ifdef HAVE_MPI
-  std::cerr << "**MPI enabled.\n";
-  mpi::environment env(argc, argv);
-  mpi::communicator world;
-  const int size = world.size(); 
-  const int rank = world.rank();
-#else
-  std::cerr << "**MPI disabled.\n";
-  const int rank = 0;
-  const int size = 1;
-#endif
-  po::variables_map conf;
-  if (!InitCommandLine(argc, argv, &conf)) return 1;
-  const string fname = conf["input"].as<string>();
-  const double reg_strength = conf["regularization_strength"].as<double>();
-  const bool has_l2 = reg_strength;
-  assert(reg_strength >= 0.0f);
-  const int ITERATIONS = conf["iterations"].as<unsigned>();
-  const double eta = conf["eta"].as<double>();
-  const double diagonal_tension = conf["diagonal_tension"].as<double>();
-  bool SGD = false;
-  if (diagonal_tension < 0.0) {
-    cerr << "Invalid value for diagonal_tension: must be >= 0\n";
-    return 1;
-  }
-  string testset;
-  if (conf.count("testset")) testset = conf["testset"].as<string>();
-
-  unsigned lc = 0;
-  vector<double> unnormed_a_i;
-  bool flag = false;
-  vector<vector<WordID> > srcs, trgs;
-  vector<WordID> vocab_e;
-  {
-    set<WordID> svocab_e, svocab_f;
-    CorpusTools::ReadFromFile(fname, &srcs, NULL, &trgs, &svocab_e, rank, size);
-    copy(svocab_e.begin(), svocab_e.end(), back_inserter(vocab_e));
-  }
-  cerr << "Number of target word types: " << vocab_e.size() << endl;
-  const double num_examples = lc;
-
-  boost::shared_ptr<LBFGSOptimizer> lbfgs;
-  if (rank == 0)
-    lbfgs.reset(new LBFGSOptimizer(kDIMENSIONS * kDIMENSIONS, 100));
-  r_trg.resize(TD::NumWords() + 1);
-  r_src.resize(TD::NumWords() + 1);
-  vector<set<unsigned> > trg_pos(TD::NumWords() + 1);
-
-  if (conf.count("random_seed")) {
-    srand(conf["random_seed"].as<unsigned>());
-  } else {
-    unsigned seed = time(NULL) + rank * 100;
-    cerr << "Random seed: " << seed << endl;
-    srand(seed);
-  }
-  
-  TMatrix t = TMatrix::Zero();
-  if (rank == 0) {
-    t = TMatrix::Random() / 50.0;
-    for (unsigned i = 1; i < r_trg.size(); ++i) {
-      r_trg[i] = RVector::Random();
-      r_src[i] = RVector::Random();
-    }
-    if (conf.count("source_embeddings"))
-      LoadEmbeddings(conf["source_embeddings"].as<string>(), &r_src);
-    if (conf.count("target_embeddings"))
-      LoadEmbeddings(conf["target_embeddings"].as<string>(), &r_trg);
-  }
-
-  // do optimization
-  TMatrix g = TMatrix::Zero();
-  vector<TMatrix> exp_src;
-  vector<double> z_src;
-  vector<double> flat_g, flat_t, rcv_grad;
-  Flatten(t, &flat_t);
-  bool converged = false;
-#if HAVE_MPI
-  mpi::broadcast(world, &flat_t[0], flat_t.size(), 0);
-  mpi::broadcast(world, r_trg, 0);
-  mpi::broadcast(world, r_src, 0);
-#endif
-  cerr << "rank=" << rank << ": " << r_trg[0][4] << endl;
-  for (int iter = 0; !converged && iter < ITERATIONS; ++iter) {
-    if (rank == 0) cerr << "ITERATION " << (iter + 1) << endl;
-    Unflatten(flat_t, &t);
-    double likelihood = 0;
-    double denom = 0.0;
-    lc = 0;
-    flag = false;
-    g *= 0;
-    for (unsigned i = 0; i < srcs.size(); ++i) {
-      const vector<WordID>& src = srcs[i];
-      const vector<WordID>& trg = trgs[i];
-      ++lc;
-      if (rank == 0 && lc % 1000 == 0) { cerr << '.'; flag = true; }
-      if (rank == 0 && lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; }
-      denom += trg.size();
-
-      exp_src.clear(); exp_src.resize(src.size(), TMatrix::Zero());
-      z_src.clear(); z_src.resize(src.size(), 0.0);
-      Array2D<TMatrix> exp_refs(src.size(), trg.size(), TMatrix::Zero());
-      Array2D<double> z_refs(src.size(), trg.size(), 0.0);
-      for (unsigned j = 0; j < trg.size(); ++j)
-        trg_pos[trg[j]].insert(j);
-
-      for (unsigned i = 0; i < src.size(); ++i) {
-        const RVector& r_s = r_src[src[i]];
-        const RTVector pred = r_s.transpose() * t;
-        TMatrix& exp_m = exp_src[i];
-        double& z = z_src[i];
-        for (unsigned k = 0; k < vocab_e.size(); ++k) {
-          const WordID v_k = vocab_e[k];
-          const RVector& r_t = r_trg[v_k];
-          const double dot_prod = pred * r_t;
-          const double u = exp(dot_prod);
-          z += u;
-          const TMatrix v = r_s * r_t.transpose() * u;
-          exp_m += v;
-          set<unsigned>& ref_locs = trg_pos[v_k];
-          if (!ref_locs.empty()) {
-            for (set<unsigned>::iterator it = ref_locs.begin(); it != ref_locs.end(); ++it) {
-              TMatrix& exp_ref_ij = exp_refs(i, *it);
-              double& z_ref_ij = z_refs(i, *it);
-              z_ref_ij += u;
-              exp_ref_ij += v;
-            }
-          }
-        }
-      }
-      for (unsigned j = 0; j < trg.size(); ++j)
-        trg_pos[trg[j]].clear();
-
-      // model expectations for a single target generation with
-      // uniform alignment prior
-      // TODO: when using a non-uniform alignment, m_exp will be
-      // a function of j (below)
-      double m_z = 0;
-      TMatrix m_exp = TMatrix::Zero();
-      for (unsigned i = 0; i < src.size(); ++i) {
-        m_exp += exp_src[i];
-        m_z += z_src[i];
-      }
-      m_exp /= m_z;
-
-      Array2D<bool> al(src.size(), trg.size(), false);
-      for (unsigned j = 0; j < trg.size(); ++j) {
-        double ref_z = 0;
-        TMatrix ref_exp = TMatrix::Zero();
-        int max_i = 0;
-        double max_s = -9999999;
-        for (unsigned i = 0; i < src.size(); ++i) {
-          ref_exp += exp_refs(i, j);
-          ref_z += z_refs(i, j);
-          if (log(z_refs(i, j)) > max_s) {
-            max_s = log(z_refs(i, j));
-            max_i = i;
-          }
-          // TODO handle alignment prob
-        }
-        if (ref_z <= 0) { 
-          cerr << "TRG=" << TD::Convert(trg[j]) << endl;
-          cerr << " LINE=" << lc << " (RANK=" << rank << "/" << size << ")" << endl;
-          cerr << " REF_EXP=\n" << ref_exp << endl;
-          cerr << " M_EXP=\n" << m_exp << endl;
-          abort();
-        }
-        al(max_i, j) = true;
-        ref_exp /= ref_z;
-        g += m_exp - ref_exp;
-        likelihood += log(ref_z) - log(m_z);
-        if (SGD) {
-          t -= g * eta / num_examples;
-          g *= 0;
-        }
-      }
-      
-      if (rank == 0 && (iter == (ITERATIONS - 1) || lc < 12)) { cerr << al << endl; }
-    }
-    if (flag && rank == 0) { cerr << endl; }
-
-    double obj = 0;
-    if (!SGD) {
-      Flatten(g, &flat_g);
-      obj = -likelihood;
-#if HAVE_MPI
-      rcv_grad.resize(flat_g.size(), 0.0);
-      mpi::reduce(world, &flat_g[0], flat_g.size(), &rcv_grad[0], plus<double>(), 0);
-      swap(flat_g, rcv_grad);
-      rcv_grad.clear();
-
-      double to = 0;
-      mpi::reduce(world, obj, to, plus<double>(), 0);
-      obj = to;
-      double tlh = 0;
-      mpi::reduce(world, likelihood, tlh, plus<double>(), 0);
-      likelihood = tlh;
-      double td = 0;
-      mpi::reduce(world, denom, td, plus<double>(), 0);
-      denom = td;
-#endif
-    }
-
-    if (rank == 0) {
-      double gn = 0;
-      for (unsigned i = 0; i < flat_g.size(); ++i)
-        gn += flat_g[i]*flat_g[i];
-      const double base2_likelihood = likelihood / log(2);
-      cerr << "  log_e likelihood: " << likelihood << endl;
-      cerr << "  log_2 likelihood: " << base2_likelihood << endl;
-      cerr << "     cross entropy: " << (-base2_likelihood / denom) << endl;
-      cerr << "        perplexity: " << pow(2.0, -base2_likelihood / denom) << endl;
-      cerr << "     gradient norm: " << sqrt(gn) << endl;
-      if (!SGD) {
-        if (has_l2) {
-          const double r = ApplyRegularization(reg_strength,
-                                               flat_t,
-                                               &flat_g);
-          obj += r;
-          cerr << "    regularization: " << r << endl;
-        }
-        lbfgs->Optimize(obj, flat_g, &flat_t);
-        converged = (lbfgs->HasConverged());
-      }
-    }
-#ifdef HAVE_MPI
-    mpi::broadcast(world, &flat_t[0], flat_t.size(), 0);
-    mpi::broadcast(world, converged, 0);
-#endif
-  }
-  if (rank == 0)
-    cerr << "TRANSLATION MATRIX:" << endl << t << endl;
-  return 0;
-}
-
-#endif
-
diff --git a/training/minrisk/Makefile.am b/training/minrisk/Makefile.am
new file mode 100644
index 00000000..a15e821e
--- /dev/null
+++ b/training/minrisk/Makefile.am
@@ -0,0 +1,6 @@
+bin_PROGRAMS = minrisk_optimize
+
+minrisk_optimize_SOURCES = minrisk_optimize.cc
+minrisk_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/training/liblbfgs/liblbfgs.a -lz
+
+AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training -I$(top_srcdir)/training/utils
diff --git a/minrisk/minrisk.pl b/training/minrisk/minrisk.pl
index d05b9595..0f8bacd0 100755
--- a/minrisk/minrisk.pl
+++ b/training/minrisk/minrisk.pl
@@ -2,7 +2,7 @@
 use strict;
 my @ORIG_ARGV=@ARGV;
 use Cwd qw(getcwd);
-my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; }
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment", "$SCRIPT_DIR/../utils"; }
 
 # Skip local config (used for distributing jobs) if we're running in local-only mode
 use LocalConfig;
@@ -12,27 +12,27 @@ use POSIX ":sys_wait_h";
 my $QSUB_CMD = qsub_args(mert_memory());
 my $default_jobs = env_default_jobs();
 
-my $VEST_DIR="$SCRIPT_DIR/../dpmert";
-require "$VEST_DIR/libcall.pl";
+my $UTILS_DIR="$SCRIPT_DIR/../utils";
+require "$UTILS_DIR/libcall.pl";
 
 # Default settings
 my $srcFile;
 my $refFiles;
 my $bin_dir = $SCRIPT_DIR;
 die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
-my $FAST_SCORE="$bin_dir/../mteval/fast_score";
+my $FAST_SCORE="$bin_dir/../../mteval/fast_score";
 die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
 my $MAPINPUT = "$bin_dir/minrisk_generate_input.pl";
 my $MAPPER = "$bin_dir/minrisk_optimize";
-my $parallelize = "$VEST_DIR/parallelize.pl";
-my $libcall = "$VEST_DIR/libcall.pl";
-my $sentserver = "$VEST_DIR/sentserver";
-my $sentclient = "$VEST_DIR/sentclient";
-my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm";
+my $parallelize = "$UTILS_DIR/parallelize.pl";
+my $libcall = "$UTILS_DIR/libcall.pl";
+my $sentserver = "$UTILS_DIR/sentserver";
+my $sentclient = "$UTILS_DIR/sentclient";
+my $LocalConfig = "$SCRIPT_DIR/../../environment/LocalConfig.pm";
 
 my $SCORER = $FAST_SCORE;
 die "Can't find $MAPPER" unless -x $MAPPER;
-my $cdec = "$bin_dir/../decoder/cdec";
+my $cdec = "$bin_dir/../../decoder/cdec";
 die "Can't find decoder in $cdec" unless -x $cdec;
 die "Can't find $parallelize" unless -x $parallelize;
 die "Can't find $libcall" unless -e $libcall;
diff --git a/minrisk/minrisk_generate_input.pl b/training/minrisk/minrisk_generate_input.pl
index b30fc4fd..b30fc4fd 100755
--- a/minrisk/minrisk_generate_input.pl
+++ b/training/minrisk/minrisk_generate_input.pl
diff --git a/minrisk/minrisk_optimize.cc b/training/minrisk/minrisk_optimize.cc
index da8b5260..da8b5260 100644
--- a/minrisk/minrisk_optimize.cc
+++ b/training/minrisk/minrisk_optimize.cc
diff --git a/mira/Makefile.am b/training/mira/Makefile.am
index 3f8f17cd..ae609ede 100644
--- a/mira/Makefile.am
+++ b/training/mira/Makefile.am
@@ -1,6 +1,6 @@
 bin_PROGRAMS = kbest_mira
 
 kbest_mira_SOURCES = kbest_mira.cc
-kbest_mira_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
+kbest_mira_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
 
 AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/mira/kbest_mira.cc b/training/mira/kbest_mira.cc
index 8b7993dd..8b7993dd 100644
--- a/mira/kbest_mira.cc
+++ b/training/mira/kbest_mira.cc
diff --git a/training/mpi_em_optimize.cc b/training/mpi_em_optimize.cc
deleted file mode 100644
index 48683b15..00000000
--- a/training/mpi_em_optimize.cc
+++ /dev/null
@@ -1,389 +0,0 @@
-#include <sstream>
-#include <iostream>
-#include <vector>
-#include <cassert>
-#include <cmath>
-
-#ifdef HAVE_MPI
-#include <mpi.h>
-#endif
-
-#include <boost/shared_ptr.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "verbose.h"
-#include "hg.h"
-#include "prob.h"
-#include "inside_outside.h"
-#include "ff_register.h"
-#include "decoder.h"
-#include "filelib.h"
-#include "optimize.h"
-#include "fdict.h"
-#include "weights.h"
-#include "sparse_vector.h"
-
-using namespace std;
-using boost::shared_ptr;
-namespace po = boost::program_options;
-
-void SanityCheck(const vector<double>& w) {
-  for (int i = 0; i < w.size(); ++i) {
-    assert(!isnan(w[i]));
-    assert(!isinf(w[i]));
-  }
-}
-
-struct FComp {
-  const vector<double>& w_;
-  FComp(const vector<double>& w) : w_(w) {}
-  bool operator()(int a, int b) const {
-    return fabs(w_[a]) > fabs(w_[b]);
-  }
-};
-
-void ShowLargestFeatures(const vector<double>& w) {
-  vector<int> fnums(w.size());
-  for (int i = 0; i < w.size(); ++i)
-    fnums[i] = i;
-  vector<int>::iterator mid = fnums.begin();
-  mid += (w.size() > 10 ? 10 : w.size());
-  partial_sort(fnums.begin(), mid, fnums.end(), FComp(w));
-  cerr << "TOP FEATURES:";
-  for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) {
-    cerr << ' ' << FD::Convert(*i) << '=' << w[*i];
-  }
-  cerr << endl;
-}
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("input_weights,w",po::value<string>(),"Input feature weights file")
-        ("training_data,t",po::value<string>(),"Training data")
-        ("decoder_config,c",po::value<string>(),"Decoder configuration file")
-        ("output_weights,o",po::value<string>()->default_value("-"),"Output feature weights file");
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help") || !(conf->count("training_data")) || !conf->count("decoder_config")) {
-    cerr << dcmdline_options << endl;
-#ifdef HAVE_MPI
-    MPI::Finalize();
-#endif
-    exit(1);
-  }
-}
-
-void ReadTrainingCorpus(const string& fname, int rank, int size, vector<string>* c) {
-  ReadFile rf(fname);
-  istream& in = *rf.stream();
-  string line;
-  int lc = 0;
-  while(in) {
-    getline(in, line);
-    if (!in) break;
-    if (lc % size == rank) c->push_back(line);
-    ++lc;
-  }
-}
-
-static const double kMINUS_EPSILON = -1e-6;
-
-struct TrainingObserver : public DecoderObserver {
-  void Reset() {
-    total_complete = 0;
-    cur_obj = 0;
-    tot_obj = 0;
-    tot.clear();
-  } 
-
-  void SetLocalGradientAndObjective(SparseVector<double>* g, double* o) const {
-    *o = tot_obj;
-    *g = tot;
-  }
-
-  virtual void NotifyDecodingStart(const SentenceMetadata& smeta) {
-    cur_obj = 0;
-    state = 1;
-  }
-
-  void ExtractExpectedCounts(Hypergraph* hg) {
-    vector<prob_t> posts;
-    cur.clear();
-    const prob_t z = hg->ComputeEdgePosteriors(1.0, &posts);
-    cur_obj = log(z);
-    for (int i = 0; i < posts.size(); ++i) {
-      const SparseVector<double>& efeats = hg->edges_[i].feature_values_;
-      const double post = static_cast<double>(posts[i] / z);
-      for (SparseVector<double>::const_iterator j = efeats.begin(); j != efeats.end(); ++j)
-        cur.add_value(j->first, post);
-    }
-  }
-
-  // compute model expectations, denominator of objective
-  virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) {
-    assert(state == 1);
-    state = 2;
-    ExtractExpectedCounts(hg);
-  }
-
-  // replace translation forest, since we're doing EM training (we don't know which)
-  virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) {
-    assert(state == 2);
-    state = 3;
-    ExtractExpectedCounts(hg);
-  }
-
-  virtual void NotifyDecodingComplete(const SentenceMetadata& smeta) {
-    ++total_complete;
-    tot_obj += cur_obj;
-    tot += cur;
-  }
-
-  int total_complete;
-  double cur_obj;
-  double tot_obj;
-  SparseVector<double> cur, tot;
-  int state;
-};
-
-void ReadConfig(const string& ini, vector<string>* out) {
-  ReadFile rf(ini);
-  istream& in = *rf.stream();
-  while(in) {
-    string line;
-    getline(in, line);
-    if (!in) continue;
-    out->push_back(line);
-  }
-}
-
-void StoreConfig(const vector<string>& cfg, istringstream* o) {
-  ostringstream os;
-  for (int i = 0; i < cfg.size(); ++i) { os << cfg[i] << endl; }
-  o->str(os.str());
-}
-
-struct OptimizableMultinomialFamily {
-  struct CPD {
-    CPD() : z() {}
-    double z;
-    map<WordID, double> c2counts;
-  };
-  map<WordID, CPD> counts;
-  double Value(WordID conditioning, WordID generated) const {
-    map<WordID, CPD>::const_iterator it = counts.find(conditioning);
-    assert(it != counts.end());
-    map<WordID,double>::const_iterator r = it->second.c2counts.find(generated);
-    if (r == it->second.c2counts.end()) return 0;
-    return r->second;
-  }
-  void Increment(WordID conditioning, WordID generated, double count) {
-    CPD& cc = counts[conditioning];
-    cc.z += count;
-    cc.c2counts[generated] += count;
-  }
-  void Optimize() {
-    for (map<WordID, CPD>::iterator i = counts.begin(); i != counts.end(); ++i) {
-      CPD& cpd = i->second;
-      for (map<WordID, double>::iterator j = cpd.c2counts.begin(); j != cpd.c2counts.end(); ++j) {
-        j->second /= cpd.z;
-        // cerr << "P(" << TD::Convert(j->first) << " | " << TD::Convert(i->first) << " ) =  " << j->second << endl;
-      }
-    }
-  }
-  void Clear() {
-    counts.clear();
-  }
-};
-
-struct CountManager {
-  CountManager(size_t num_types) : oms_(num_types) {}
-  virtual ~CountManager();
-  virtual void AddCounts(const SparseVector<double>& c) = 0;
-  void Optimize(SparseVector<double>* weights) {
-    for (int i = 0; i < oms_.size(); ++i) {
-      oms_[i].Optimize();
-    }
-    GetOptimalValues(weights);
-    for (int i = 0; i < oms_.size(); ++i) {
-      oms_[i].Clear();
-    }
-  }
-  virtual void GetOptimalValues(SparseVector<double>* wv) const = 0;
-  vector<OptimizableMultinomialFamily> oms_;
-};
-CountManager::~CountManager() {}
-
-struct TaggerCountManager : public CountManager {
-  // 0 = transitions, 2 = emissions
-  TaggerCountManager() : CountManager(2) {}
-  void AddCounts(const SparseVector<double>& c);
-  void GetOptimalValues(SparseVector<double>* wv) const {
-    for (set<int>::const_iterator it = fids_.begin(); it != fids_.end(); ++it) {
-      int ftype;
-      WordID cond, gen;
-      bool is_optimized = TaggerCountManager::GetFeature(*it, &ftype, &cond, &gen);
-      assert(is_optimized);
-      wv->set_value(*it, log(oms_[ftype].Value(cond, gen)));
-    }
-  }
-  // Id:0:a=1 Bi:a_b=1 Bi:b_c=1 Bi:c_d=1 Uni:a=1 Uni:b=1 Uni:c=1 Uni:d=1 Id:1:b=1 Bi:BOS_a=1 Id:2:c=1
-  static bool GetFeature(const int fid, int* feature_type, WordID* cond, WordID* gen) {
-    const string& feat = FD::Convert(fid);
-    if (feat.size() > 5 && feat[0] == 'I' && feat[1] == 'd' && feat[2] == ':') {
-      // emission
-      const size_t p = feat.rfind(':');
-      assert(p != string::npos);
-      *cond = TD::Convert(feat.substr(p+1));
-      *gen = TD::Convert(feat.substr(3, p - 3));
-      *feature_type = 1;
-      return true;
-    } else if (feat[0] == 'B' && feat.size() > 5 && feat[2] == ':' && feat[1] == 'i') {
-      // transition
-      const size_t p = feat.rfind('_');
-      assert(p != string::npos);
-      *gen = TD::Convert(feat.substr(p+1));
-      *cond = TD::Convert(feat.substr(3, p - 3));
-      *feature_type = 0;
-      return true;
-    } else if (feat[0] == 'U' && feat.size() > 4 && feat[1] == 'n' && feat[2] == 'i' && feat[3] == ':') {
-      // ignore
-      return false;
-    } else {
-      cerr << "Don't know how to deal with feature of type: " << feat << endl;
-      abort();
-    }
-  }
-  set<int> fids_;
-};
-
-void TaggerCountManager::AddCounts(const SparseVector<double>& c) {
-  for (SparseVector<double>::const_iterator it = c.begin(); it != c.end(); ++it) {
-    const double& val = it->second;
-    int ftype;
-    WordID cond, gen;
-    if (GetFeature(it->first, &ftype, &cond, &gen)) {
-      oms_[ftype].Increment(cond, gen, val);
-      fids_.insert(it->first);
-    }
-  }
-}
-
-int main(int argc, char** argv) {
-#ifdef HAVE_MPI
-  MPI::Init(argc, argv);
-  const int size = MPI::COMM_WORLD.Get_size(); 
-  const int rank = MPI::COMM_WORLD.Get_rank();
-#else
-  const int size = 1;
-  const int rank = 0;
-#endif
-  SetSilent(true);  // turn off verbose decoder output
-  register_feature_functions();
-
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-
-  TaggerCountManager tcm;
-
-  // load cdec.ini and set up decoder
-  vector<string> cdec_ini;
-  ReadConfig(conf["decoder_config"].as<string>(), &cdec_ini);
-  istringstream ini;
-  StoreConfig(cdec_ini, &ini);
-  if (rank == 0) cerr << "Loading grammar...\n";
-  Decoder* decoder = new Decoder(&ini);
-  if (decoder->GetConf()["input"].as<string>() != "-") {
-    cerr << "cdec.ini must not set an input file\n";
-#ifdef HAVE_MPI
-    MPI::COMM_WORLD.Abort(1);
-#endif
-  }
-  if (rank == 0) cerr << "Done loading grammar!\n";
-  Weights w;
-  if (conf.count("input_weights"))
-    w.InitFromFile(conf["input_weights"].as<string>());
-
-  double objective = 0;
-  bool converged = false;
-
-  vector<double> lambdas;
-  w.InitVector(&lambdas);
-  vector<string> corpus;
-  ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus);
-  assert(corpus.size() > 0);
-
-  int iteration = 0;
-  TrainingObserver observer;
-  while (!converged) {
-    ++iteration;
-    observer.Reset();
-    if (rank == 0) {
-      cerr << "Starting decoding... (~" << corpus.size() << " sentences / proc)\n";
-    }
-    decoder->SetWeights(lambdas);
-    for (int i = 0; i < corpus.size(); ++i)
-      decoder->Decode(corpus[i], &observer);
-
-    SparseVector<double> x;
-    observer.SetLocalGradientAndObjective(&x, &objective);
-    cerr << "COUNTS = " << x << endl;
-    cerr << "   OBJ = " << objective << endl;
-    tcm.AddCounts(x);
-
-#if 0
-#ifdef HAVE_MPI
-    MPI::COMM_WORLD.Reduce(const_cast<double*>(&gradient.data()[0]), &rcv_grad[0], num_feats, MPI::DOUBLE, MPI::SUM, 0);
-    MPI::COMM_WORLD.Reduce(&objective, &to, 1, MPI::DOUBLE, MPI::SUM, 0);
-    swap(gradient, rcv_grad);
-    objective = to;
-#endif
-#endif
-
-    if (rank == 0) {
-      SparseVector<double> wsv;
-      tcm.Optimize(&wsv);
-
-      w.InitFromVector(wsv);
-      w.InitVector(&lambdas);
-
-      ShowLargestFeatures(lambdas);
-
-      converged = iteration > 100;
-      if (converged) { cerr << "OPTIMIZER REPORTS CONVERGENCE!\n"; }
-
-      string fname = "weights.cur.gz";
-      if (converged) { fname = "weights.final.gz"; }
-      ostringstream vv;
-      vv << "Objective = " << objective << "  (ITERATION=" << iteration << ")";
-      const string svv = vv.str();
-      w.WriteToFile(fname, true, &svv);
-    }  // rank == 0
-    int cint = converged;
-#ifdef HAVE_MPI
-    MPI::COMM_WORLD.Bcast(const_cast<double*>(&lambdas.data()[0]), num_feats, MPI::DOUBLE, 0);
-    MPI::COMM_WORLD.Bcast(&cint, 1, MPI::INT, 0);
-    MPI::COMM_WORLD.Barrier();
-#endif
-    converged = cint;
-  }
-#ifdef HAVE_MPI
-  MPI::Finalize(); 
-#endif
-  return 0;
-}
diff --git a/training/mr_em_adapted_reduce.cc b/training/mr_em_adapted_reduce.cc
deleted file mode 100644
index f65b5440..00000000
--- a/training/mr_em_adapted_reduce.cc
+++ /dev/null
@@ -1,173 +0,0 @@
-#include <iostream>
-#include <vector>
-#include <cassert>
-#include <cmath>
-
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "filelib.h"
-#include "fdict.h"
-#include "weights.h"
-#include "sparse_vector.h"
-#include "m.h"
-
-using namespace std;
-namespace po = boost::program_options;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("optimization_method,m", po::value<string>()->default_value("em"), "Optimization method (em, vb)")
-        ("input_format,f",po::value<string>()->default_value("b64"),"Encoding of the input (b64 or text)");
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-double NoZero(const double& x) {
-  if (x) return x;
-  return 1e-35;
-}
-
-void Maximize(const bool use_vb,
-              const double& alpha,
-              const int total_event_types,
-              SparseVector<double>* pc) {
-  const SparseVector<double>& counts = *pc;
-
-  if (use_vb)
-    assert(total_event_types >= counts.size());
-
-  double tot = 0;
-  for (SparseVector<double>::const_iterator it = counts.begin();
-       it != counts.end(); ++it)
-    tot += it->second;
-//  cerr << " = " << tot << endl;
-  assert(tot > 0.0);
-  double ltot = log(tot);
-  if (use_vb)
-    ltot = Md::digamma(tot + total_event_types * alpha);
-  for (SparseVector<double>::const_iterator it = counts.begin();
-       it != counts.end(); ++it) {
-    if (use_vb) {
-      pc->set_value(it->first, NoZero(Md::digamma(it->second + alpha) - ltot));
-    } else {
-      pc->set_value(it->first, NoZero(log(it->second) - ltot));
-    }
-  }
-#if 0
-  if (counts.size() < 50) {
-    for (SparseVector<double>::const_iterator it = counts.begin();
-         it != counts.end(); ++it) {
-      cerr << " p(" << FD::Convert(it->first) << ")=" << exp(it->second);
-    }
-    cerr << endl;
-  }
-#endif
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-
-  const bool use_b64 = conf["input_format"].as<string>() == "b64";
-  const bool use_vb = conf["optimization_method"].as<string>() == "vb";
-  const double alpha = 1e-09;
-  if (use_vb)
-    cerr << "Using variational Bayes, make sure alphas are set\n";
-
-  const string s_obj = "**OBJ**";
-  // E-step
-  string cur_key = "";
-  SparseVector<double> acc;
-  double logprob = 0;
-  while(cin) {
-    string line;
-    getline(cin, line);
-    if (line.empty()) continue;
-    int feat;
-    double val;
-    size_t i = line.find("\t");
-    const string key = line.substr(0, i);
-    assert(i != string::npos);
-    ++i;
-    if (key != cur_key) {
-      if  (cur_key.size() > 0) {
-        // TODO shouldn't be num_active, should be total number
-        // of events
-        Maximize(use_vb, alpha, acc.size(), &acc);
-        cout << cur_key << '\t';
-        if (use_b64)
-          B64::Encode(0.0, acc, &cout);
-        else
-          cout << acc;
-        cout << endl;
-        acc.clear();
-      }
-      cur_key = key;
-    }
-    if (use_b64) {
-      SparseVector<double> g;
-      double obj;
-      if (!B64::Decode(&obj, &g, &line[i], line.size() - i)) {
-        cerr << "B64 decoder returned error, skipping!\n";
-        continue;
-      }
-      logprob += obj;
-      acc += g;
-    } else {       // text encoding - your counts will not be accurate!
-      while (i < line.size()) {
-        size_t start = i;
-        while (line[i] != '=' && i < line.size()) ++i;
-        if (i == line.size()) { cerr << "FORMAT ERROR\n"; break; }
-        string fname = line.substr(start, i - start);
-        if (fname == s_obj) {
-          feat = -1;
-        } else {
-          feat = FD::Convert(line.substr(start, i - start));
-        }
-        ++i;
-        start = i;
-        while (line[i] != ';' && i < line.size()) ++i;
-        if (i - start == 0) continue;
-        val = atof(line.substr(start, i - start).c_str());
-        ++i;
-        if (feat == -1) {
-          logprob += val;
-        } else {
-          acc.add_value(feat, val);
-        }
-      }
-    }
-  }
-  // TODO shouldn't be num_active, should be total number
-  // of events
-  Maximize(use_vb, alpha, acc.size(), &acc);
-  cout << cur_key << '\t';
-  if (use_b64)
-    B64::Encode(0.0, acc, &cout);
-  else
-    cout << acc;
-  cout << endl << flush;
-
-  cerr << "LOGPROB: " << logprob << endl;
-
-  return 0;
-}
diff --git a/training/mr_em_map_adapter.cc b/training/mr_em_map_adapter.cc
deleted file mode 100644
index ead4598d..00000000
--- a/training/mr_em_map_adapter.cc
+++ /dev/null
@@ -1,160 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <cassert>
-#include <cmath>
-
-#include <boost/utility.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-#include "boost/tuple/tuple.hpp"
-
-#include "fdict.h"
-#include "sparse_vector.h"
-
-using namespace std;
-namespace po = boost::program_options;
-
-// useful for EM models parameterized by a bunch of multinomials
-// this converts event counts (returned from cdec as feature expectations)
-// into different keys and values (which are lists of all the events,
-// conditioned on the key) for summing and normalization by a reducer
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("buffer_size,b", po::value<int>()->default_value(1), "Buffer size (in # of counts) before emitting counts")
-        ("format,f",po::value<string>()->default_value("b64"), "Encoding of the input (b64 or text)");
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-struct EventMapper {
-  int Map(int fid) {
-    int& cv = map_[fid];
-    if (!cv) {
-      cv = GetConditioningVariable(fid);
-    }
-    return cv;
-  }
-  void Clear() { map_.clear(); }
- protected:
-  virtual int GetConditioningVariable(int fid) const = 0;
- private:
-  map<int, int> map_;
-};
-
-struct LexAlignEventMapper : public EventMapper {
- protected:
-  virtual int GetConditioningVariable(int fid) const {
-    const string& str = FD::Convert(fid);
-    size_t pos = str.rfind("_");
-    if (pos == string::npos || pos == 0 || pos >= str.size() - 1) {
-      cerr << "Bad feature for EM adapter: " << str << endl;
-      abort();
-    }
-    return FD::Convert(str.substr(0, pos));
-  }
-};
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-
-  const bool use_b64 = conf["format"].as<string>() == "b64";
-  const int buffer_size = conf["buffer_size"].as<int>();
-
-  const string s_obj = "**OBJ**";
-  // 0<TAB>**OBJ**=12.2;Feat1=2.3;Feat2=-0.2;
-  // 0<TAB>**OBJ**=1.1;Feat1=1.0;
-
-  EventMapper* event_mapper = new LexAlignEventMapper;
-  map<int, SparseVector<double> > counts;
-  size_t total = 0;
-  while(cin) {
-    string line;
-    getline(cin, line);
-    if (line.empty()) continue;
-    int feat;
-    double val;
-    size_t i = line.find("\t");
-    assert(i != string::npos);
-    ++i;
-    SparseVector<double> g;
-    double obj = 0;
-    if (use_b64) {
-      if (!B64::Decode(&obj, &g, &line[i], line.size() - i)) {
-        cerr << "B64 decoder returned error, skipping!\n";
-        continue;
-      }
-    } else {       // text encoding - your counts will not be accurate!
-      while (i < line.size()) {
-        size_t start = i;
-        while (line[i] != '=' && i < line.size()) ++i;
-        if (i == line.size()) { cerr << "FORMAT ERROR\n"; break; }
-        string fname = line.substr(start, i - start);
-        if (fname == s_obj) {
-          feat = -1;
-        } else {
-          feat = FD::Convert(line.substr(start, i - start));
-        }
-        ++i;
-        start = i;
-        while (line[i] != ';' && i < line.size()) ++i;
-        if (i - start == 0) continue;
-        val = atof(line.substr(start, i - start).c_str());
-        ++i;
-        if (feat == -1) {
-          obj = val;
-        } else {
-          g.set_value(feat, val);
-        }
-      }
-    }
-    //cerr << "OBJ: " << obj << endl;
-    const SparseVector<double>& cg = g;
-    for (SparseVector<double>::const_iterator it = cg.begin(); it != cg.end(); ++it) {
-      const int cond_var = event_mapper->Map(it->first);
-      SparseVector<double>& cond_counts = counts[cond_var];
-      int delta = cond_counts.size();
-      cond_counts.add_value(it->first, it->second);
-      delta = cond_counts.size() - delta;
-      total += delta;
-    }
-    if (total > buffer_size) {
-      for (map<int, SparseVector<double> >::iterator it = counts.begin();
-           it != counts.end(); ++it) {
-        const SparseVector<double>& cc = it->second;
-        cout << FD::Convert(it->first) << '\t';
-        if (use_b64) {
-          B64::Encode(0.0, cc, &cout);
-        } else {
-          abort();
-        }
-        cout << endl;
-      }
-      cout << flush;
-      total = 0;
-      counts.clear();
-    }
-  }
-
-  return 0;
-}
-
diff --git a/training/mr_optimize_reduce.cc b/training/mr_optimize_reduce.cc
deleted file mode 100644
index d490192f..00000000
--- a/training/mr_optimize_reduce.cc
+++ /dev/null
@@ -1,231 +0,0 @@
-#include <sstream>
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <cassert>
-#include <cmath>
-
-#include <boost/shared_ptr.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "optimize.h"
-#include "fdict.h"
-#include "weights.h"
-#include "sparse_vector.h"
-
-using namespace std;
-namespace po = boost::program_options;
-
-void SanityCheck(const vector<double>& w) {
-  for (int i = 0; i < w.size(); ++i) {
-    assert(!std::isnan(w[i]));
-    assert(!std::isinf(w[i]));
-  }
-}
-
-struct FComp {
-  const vector<double>& w_;
-  FComp(const vector<double>& w) : w_(w) {}
-  bool operator()(int a, int b) const {
-    return fabs(w_[a]) > fabs(w_[b]);
-  }
-};
-
-void ShowLargestFeatures(const vector<double>& w) {
-  vector<int> fnums(w.size());
-  for (int i = 0; i < w.size(); ++i)
-    fnums[i] = i;
-  vector<int>::iterator mid = fnums.begin();
-  mid += (w.size() > 10 ? 10 : w.size());
-  partial_sort(fnums.begin(), mid, fnums.end(), FComp(w));
-  cerr << "TOP FEATURES:";
-  for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) {
-    cerr << ' ' << FD::Convert(*i) << '=' << w[*i];
-  }
-  cerr << endl;
-}
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("input_weights,i",po::value<string>(),"Input feature weights file")
-        ("output_weights,o",po::value<string>()->default_value("-"),"Output feature weights file")
-        ("optimization_method,m", po::value<string>()->default_value("lbfgs"), "Optimization method (sgd, lbfgs, rprop)")
-        ("state,s",po::value<string>(),"Read (and write if output_state is not set) optimizer state from this state file. In the first iteration, the file should not exist.")
-        ("input_format,f",po::value<string>()->default_value("b64"),"Encoding of the input (b64 or text)")
-        ("output_state,S", po::value<string>(), "Output state file (optional override)")
-	("correction_buffers,M", po::value<int>()->default_value(10), "Number of gradients for LBFGS to maintain in memory")
-        ("eta,e", po::value<double>()->default_value(0.1), "Learning rate for SGD (eta)")
-        ("gaussian_prior,p","Use a Gaussian prior on the weights")
-        ("means,u", po::value<string>(), "File containing the means for Gaussian prior")
-        ("sigma_squared", po::value<double>()->default_value(1.0), "Sigma squared term for spherical Gaussian prior");
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help") || !conf->count("input_weights") || !conf->count("state")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-
-  const bool use_b64 = conf["input_format"].as<string>() == "b64";
-
-  vector<weight_t> lambdas;
-  Weights::InitFromFile(conf["input_weights"].as<string>(), &lambdas);
-  const string s_obj = "**OBJ**";
-  int num_feats = FD::NumFeats();
-  cerr << "Number of features: " << num_feats << endl;
-  const bool gaussian_prior = conf.count("gaussian_prior");
-  vector<weight_t> means(num_feats, 0);
-  if (conf.count("means")) {
-    if (!gaussian_prior) {
-      cerr << "Don't use --means without --gaussian_prior!\n";
-      exit(1);
-    }
-    Weights::InitFromFile(conf["means"].as<string>(), &means);
-  }
-  boost::shared_ptr<BatchOptimizer> o;
-  const string omethod = conf["optimization_method"].as<string>();
-  if (omethod == "rprop")
-    o.reset(new RPropOptimizer(num_feats));  // TODO add configuration
-  else
-    o.reset(new LBFGSOptimizer(num_feats, conf["correction_buffers"].as<int>()));
-  cerr << "Optimizer: " << o->Name() << endl;
-  string state_file = conf["state"].as<string>();
-  {
-    ifstream in(state_file.c_str(), ios::binary);
-    if (in)
-      o->Load(&in);
-    else
-      cerr << "No state file found, assuming ITERATION 1\n";
-  }
-
-  double objective = 0;
-  vector<double> gradient(num_feats, 0);
-  // 0<TAB>**OBJ**=12.2;Feat1=2.3;Feat2=-0.2;
-  // 0<TAB>**OBJ**=1.1;Feat1=1.0;
-  int total_lines = 0;  // TODO - this should be a count of the
-                        // training instances!!
-  while(cin) {
-    string line;
-    getline(cin, line);
-    if (line.empty()) continue;
-    ++total_lines;
-    int feat;
-    double val;
-    size_t i = line.find("\t");
-    assert(i != string::npos);
-    ++i;
-    if (use_b64) {
-      SparseVector<double> g;
-      double obj;
-      if (!B64::Decode(&obj, &g, &line[i], line.size() - i)) {
-        cerr << "B64 decoder returned error, skipping gradient!\n";
-	cerr << "  START: " << line.substr(0,line.size() > 200 ? 200 : line.size()) << endl;
-	if (line.size() > 200)
-	  cerr << "    END: " << line.substr(line.size() - 200, 200) << endl;
-        cout << "-1\tRESTART\n";
-        exit(99);
-      }
-      objective += obj;
-      const SparseVector<double>& cg = g;
-      for (SparseVector<double>::const_iterator it = cg.begin(); it != cg.end(); ++it) {
-        if (it->first >= num_feats) {
-	  cerr << "Unexpected feature in gradient: " << FD::Convert(it->first) << endl;
-	  abort();
-        }
-        gradient[it->first] -= it->second;
-      }
-    } else {       // text encoding - your gradients will not be accurate!
-      while (i < line.size()) {
-        size_t start = i;
-        while (line[i] != '=' && i < line.size()) ++i;
-        if (i == line.size()) { cerr << "FORMAT ERROR\n"; break; }
-        string fname = line.substr(start, i - start);
-        if (fname == s_obj) {
-          feat = -1;
-        } else {
-          feat = FD::Convert(line.substr(start, i - start));
-          if (feat >= num_feats) {
-	    cerr << "Unexpected feature in gradient: " << line.substr(start, i - start) << endl;
-	    abort();
-	  }
-        }
-        ++i;
-        start = i;
-        while (line[i] != ';' && i < line.size()) ++i;
-        if (i - start == 0) continue;
-        val = atof(line.substr(start, i - start).c_str());
-        ++i;
-        if (feat == -1) {
-          objective += val;
-        } else {
-          gradient[feat] -= val;
-        }
-      }
-    }
-  }
-
-  if (gaussian_prior) {
-    const double sigsq = conf["sigma_squared"].as<double>();
-    double norm = 0;
-    for (int k = 1; k < lambdas.size(); ++k) {
-      const double& lambda_k = lambdas[k];
-      if (lambda_k) {
-        const double param = (lambda_k - means[k]);
-        norm += param * param;
-        gradient[k] += param / sigsq;
-      }
-    }
-    const double reg = norm / (2.0 * sigsq);
-    cerr << "REGULARIZATION TERM: " << reg << endl;
-    objective += reg;
-  }
-  cerr << "EVALUATION #" << o->EvaluationCount() << " OBJECTIVE: " << objective << endl;
-  double gnorm = 0;
-  for (int i = 0; i < gradient.size(); ++i)
-    gnorm += gradient[i] * gradient[i];
-  cerr << "  GNORM=" << sqrt(gnorm) << endl;
-  vector<double> old = lambdas;
-  int c = 0;
-  while (old == lambdas) {
-    ++c;
-    if (c > 1) { cerr << "Same lambdas, repeating optimization\n"; }
-    o->Optimize(objective, gradient, &lambdas);
-    assert(c < 5);
-  }
-  old.clear();
-  SanityCheck(lambdas);
-  ShowLargestFeatures(lambdas);
-  Weights::WriteToFile(conf["output_weights"].as<string>(), lambdas, false);
-
-  const bool conv = o->HasConverged();
-  if (conv) { cerr << "OPTIMIZER REPORTS CONVERGENCE!\n"; }
-  
-  if (conf.count("output_state"))
-    state_file = conf["output_state"].as<string>();
-  ofstream out(state_file.c_str(), ios::binary);
-  cerr << "Writing state to: " << state_file << endl;
-  o->Save(&out);
-  out.close();
-
-  cout << o->EvaluationCount() << "\t" << conv << endl;
-  return 0;
-}
diff --git a/training/mr_reduce_to_weights.cc b/training/mr_reduce_to_weights.cc
deleted file mode 100644
index 16b47720..00000000
--- a/training/mr_reduce_to_weights.cc
+++ /dev/null
@@ -1,109 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <cassert>
-
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "filelib.h"
-#include "fdict.h"
-#include "weights.h"
-#include "sparse_vector.h"
-
-using namespace std;
-namespace po = boost::program_options;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("input_format,f",po::value<string>()->default_value("b64"),"Encoding of the input (b64 or text)")
-        ("input,i",po::value<string>()->default_value("-"),"Read file from")
-        ("output,o",po::value<string>()->default_value("-"),"Write weights to");
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-void WriteWeights(const SparseVector<double>& weights, ostream* out) {
-  for (SparseVector<double>::const_iterator it = weights.begin();
-       it != weights.end(); ++it) {
-    (*out) << FD::Convert(it->first) << " " << it->second << endl;
-  }
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-
-  const bool use_b64 = conf["input_format"].as<string>() == "b64";
-
-  const string s_obj = "**OBJ**";
-  // E-step
-  ReadFile rf(conf["input"].as<string>());
-  istream* in = rf.stream();
-  assert(*in);
-  WriteFile wf(conf["output"].as<string>());
-  ostream* out = wf.stream();
-  out->precision(17);
-  while(*in) {
-    string line;
-    getline(*in, line);
-    if (line.empty()) continue;
-    int feat;
-    double val;
-    size_t i = line.find("\t");
-    assert(i != string::npos);
-    ++i;
-    if (use_b64) {
-      SparseVector<double> g;
-      double obj;
-      if (!B64::Decode(&obj, &g, &line[i], line.size() - i)) {
-        cerr << "B64 decoder returned error, skipping!\n";
-        continue;
-      }
-      WriteWeights(g, out);
-    } else {       // text encoding - your counts will not be accurate!
-      SparseVector<double> weights;
-      while (i < line.size()) {
-        size_t start = i;
-        while (line[i] != '=' && i < line.size()) ++i;
-        if (i == line.size()) { cerr << "FORMAT ERROR\n"; break; }
-        string fname = line.substr(start, i - start);
-        if (fname == s_obj) {
-          feat = -1;
-        } else {
-          feat = FD::Convert(line.substr(start, i - start));
-        }
-        ++i;
-        start = i;
-        while (line[i] != ';' && i < line.size()) ++i;
-        if (i - start == 0) continue;
-        val = atof(line.substr(start, i - start).c_str());
-        ++i;
-        if (feat != -1) {
-          weights.set_value(feat, val);
-        }
-      }
-      WriteWeights(weights, out);
-    }
-  }
-
-  return 0;
-}
diff --git a/pro/Makefile.am b/training/pro/Makefile.am
index 1e9d46b0..1916b6b2 100644
--- a/pro/Makefile.am
+++ b/training/pro/Makefile.am
@@ -3,9 +3,9 @@ bin_PROGRAMS = \
   mr_pro_reduce
 
 mr_pro_map_SOURCES = mr_pro_map.cc
-mr_pro_map_LDADD = $(top_srcdir)/training/libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
+mr_pro_map_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
 
 mr_pro_reduce_SOURCES = mr_pro_reduce.cc
 mr_pro_reduce_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a -lz
 
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils -I$(top_srcdir)/training
diff --git a/pro/mr_pro_generate_mapper_input.pl b/training/pro/mr_pro_generate_mapper_input.pl
index b30fc4fd..b30fc4fd 100755
--- a/pro/mr_pro_generate_mapper_input.pl
+++ b/training/pro/mr_pro_generate_mapper_input.pl
diff --git a/pro/mr_pro_map.cc b/training/pro/mr_pro_map.cc
index eef40b8a..eef40b8a 100644
--- a/pro/mr_pro_map.cc
+++ b/training/pro/mr_pro_map.cc
diff --git a/pro/mr_pro_reduce.cc b/training/pro/mr_pro_reduce.cc
index 5ef9b470..5ef9b470 100644
--- a/pro/mr_pro_reduce.cc
+++ b/training/pro/mr_pro_reduce.cc
diff --git a/pro/pro.pl b/training/pro/pro.pl
index 891b7e4c..3b30c379 100755
--- a/pro/pro.pl
+++ b/training/pro/pro.pl
@@ -3,7 +3,7 @@ use strict;
 use File::Basename qw(basename);
 my @ORIG_ARGV=@ARGV;
 use Cwd qw(getcwd);
-my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; }
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment", "$SCRIPT_DIR/../utils"; }
 
 # Skip local config (used for distributing jobs) if we're running in local-only mode
 use LocalConfig;
@@ -13,28 +13,28 @@ use POSIX ":sys_wait_h";
 my $QSUB_CMD = qsub_args(mert_memory());
 my $default_jobs = env_default_jobs();
 
-my $VEST_DIR="$SCRIPT_DIR/../dpmert";
-require "$VEST_DIR/libcall.pl";
+my $UTILS_DIR="$SCRIPT_DIR/../utils";
+require "$UTILS_DIR/libcall.pl";
 
 # Default settings
 my $srcFile;
 my $refFiles;
 my $bin_dir = $SCRIPT_DIR;
 die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
-my $FAST_SCORE="$bin_dir/../mteval/fast_score";
+my $FAST_SCORE="$bin_dir/../../mteval/fast_score";
 die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
 my $MAPINPUT = "$bin_dir/mr_pro_generate_mapper_input.pl";
 my $MAPPER = "$bin_dir/mr_pro_map";
 my $REDUCER = "$bin_dir/mr_pro_reduce";
-my $parallelize = "$VEST_DIR/parallelize.pl";
-my $libcall = "$VEST_DIR/libcall.pl";
-my $sentserver = "$VEST_DIR/sentserver";
-my $sentclient = "$VEST_DIR/sentclient";
-my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm";
+my $parallelize = "$UTILS_DIR/parallelize.pl";
+my $libcall = "$UTILS_DIR/libcall.pl";
+my $sentserver = "$UTILS_DIR/sentserver";
+my $sentclient = "$UTILS_DIR/sentclient";
+my $LocalConfig = "$SCRIPT_DIR/../../environment/LocalConfig.pm";
 
 my $SCORER = $FAST_SCORE;
 die "Can't find $MAPPER" unless -x $MAPPER;
-my $cdec = "$bin_dir/../decoder/cdec";
+my $cdec = "$bin_dir/../../decoder/cdec";
 die "Can't find decoder in $cdec" unless -x $cdec;
 die "Can't find $parallelize" unless -x $parallelize;
 die "Can't find $libcall" unless -e $libcall;
diff --git a/rampion/Makefile.am b/training/rampion/Makefile.am
index f4dbb7cc..1633d0f7 100644
--- a/rampion/Makefile.am
+++ b/training/rampion/Makefile.am
@@ -1,6 +1,6 @@
 bin_PROGRAMS = rampion_cccp
 
 rampion_cccp_SOURCES = rampion_cccp.cc
-rampion_cccp_LDADD = $(top_srcdir)/training/libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
+rampion_cccp_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
 
-AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training
+AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils
diff --git a/rampion/rampion.pl b/training/rampion/rampion.pl
index 55f7b3f1..ae084db6 100755
--- a/rampion/rampion.pl
+++ b/training/rampion/rampion.pl
@@ -2,7 +2,7 @@
 use strict;
 my @ORIG_ARGV=@ARGV;
 use Cwd qw(getcwd);
-my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; }
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment", "$SCRIPT_DIR/../utils"; }
 
 # Skip local config (used for distributing jobs) if we're running in local-only mode
 use LocalConfig;
@@ -12,27 +12,27 @@ use POSIX ":sys_wait_h";
 my $QSUB_CMD = qsub_args(mert_memory());
 my $default_jobs = env_default_jobs();
 
-my $VEST_DIR="$SCRIPT_DIR/../dpmert";
-require "$VEST_DIR/libcall.pl";
+my $UTILS_DIR="$SCRIPT_DIR/../utils";
+require "$UTILS_DIR/libcall.pl";
 
 # Default settings
 my $srcFile;
 my $refFiles;
 my $bin_dir = $SCRIPT_DIR;
 die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
-my $FAST_SCORE="$bin_dir/../mteval/fast_score";
+my $FAST_SCORE="$bin_dir/../../mteval/fast_score";
 die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
 my $MAPINPUT = "$bin_dir/rampion_generate_input.pl";
 my $MAPPER = "$bin_dir/rampion_cccp";
-my $parallelize = "$VEST_DIR/parallelize.pl";
-my $libcall = "$VEST_DIR/libcall.pl";
-my $sentserver = "$VEST_DIR/sentserver";
-my $sentclient = "$VEST_DIR/sentclient";
-my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm";
+my $parallelize = "$UTILS_DIR/parallelize.pl";
+my $libcall = "$UTILS_DIR/libcall.pl";
+my $sentserver = "$UTILS_DIR/sentserver";
+my $sentclient = "$UTILS_DIR/sentclient";
+my $LocalConfig = "$SCRIPT_DIR/../../environment/LocalConfig.pm";
 
 my $SCORER = $FAST_SCORE;
 die "Can't find $MAPPER" unless -x $MAPPER;
-my $cdec = "$bin_dir/../decoder/cdec";
+my $cdec = "$bin_dir/../../decoder/cdec";
 die "Can't find decoder in $cdec" unless -x $cdec;
 die "Can't find $parallelize" unless -x $parallelize;
 die "Can't find $libcall" unless -e $libcall;
diff --git a/rampion/rampion_cccp.cc b/training/rampion/rampion_cccp.cc
index 1e36dc51..1e36dc51 100644
--- a/rampion/rampion_cccp.cc
+++ b/training/rampion/rampion_cccp.cc
diff --git a/rampion/rampion_generate_input.pl b/training/rampion/rampion_generate_input.pl
index b30fc4fd..b30fc4fd 100755
--- a/rampion/rampion_generate_input.pl
+++ b/training/rampion/rampion_generate_input.pl
diff --git a/training/candidate_set.cc b/training/utils/candidate_set.cc
index 087efec3..087efec3 100644
--- a/training/candidate_set.cc
+++ b/training/utils/candidate_set.cc
diff --git a/training/candidate_set.h b/training/utils/candidate_set.h
index 9d326ed0..9d326ed0 100644
--- a/training/candidate_set.h
+++ b/training/utils/candidate_set.h
diff --git a/dpmert/decode-and-evaluate.pl b/training/utils/decode-and-evaluate.pl
index fe765d00..1a332c08 100755
--- a/dpmert/decode-and-evaluate.pl
+++ b/training/utils/decode-and-evaluate.pl
@@ -2,7 +2,7 @@
 use strict;
 my @ORIG_ARGV=@ARGV;
 use Cwd qw(getcwd);
-my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; }
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment"; }
 
 # Skip local config (used for distributing jobs) if we're running in local-only mode
 use LocalConfig;
@@ -16,16 +16,16 @@ require "libcall.pl";
 my $default_jobs = env_default_jobs();
 my $bin_dir = $SCRIPT_DIR;
 die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
-my $FAST_SCORE="$bin_dir/../mteval/fast_score";
+my $FAST_SCORE="$bin_dir/../../mteval/fast_score";
 die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
 my $parallelize = "$bin_dir/parallelize.pl";
 my $libcall = "$bin_dir/libcall.pl";
 my $sentserver = "$bin_dir/sentserver";
 my $sentclient = "$bin_dir/sentclient";
-my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm";
+my $LocalConfig = "$SCRIPT_DIR/../../environment/LocalConfig.pm";
 
 my $SCORER = $FAST_SCORE;
-my $cdec = "$bin_dir/../decoder/cdec";
+my $cdec = "$bin_dir/../../decoder/cdec";
 die "Can't find decoder in $cdec" unless -x $cdec;
 die "Can't find $parallelize" unless -x $parallelize;
 die "Can't find $libcall" unless -e $libcall;
diff --git a/training/entropy.cc b/training/utils/entropy.cc
index 4fdbe2be..4fdbe2be 100644
--- a/training/entropy.cc
+++ b/training/utils/entropy.cc
diff --git a/training/entropy.h b/training/utils/entropy.h
index 796589ca..796589ca 100644
--- a/training/entropy.h
+++ b/training/utils/entropy.h
diff --git a/training/grammar_convert.cc b/training/utils/grammar_convert.cc
index 607a7cb9..607a7cb9 100644
--- a/training/grammar_convert.cc
+++ b/training/utils/grammar_convert.cc
diff --git a/training/lbfgs.h b/training/utils/lbfgs.h
index e8baecab..e8baecab 100644
--- a/training/lbfgs.h
+++ b/training/utils/lbfgs.h
diff --git a/training/lbfgs_test.cc b/training/utils/lbfgs_test.cc
index 9678e788..9678e788 100644
--- a/training/lbfgs_test.cc
+++ b/training/utils/lbfgs_test.cc
diff --git a/dpmert/libcall.pl b/training/utils/libcall.pl
index c7d0f128..c7d0f128 100644
--- a/dpmert/libcall.pl
+++ b/training/utils/libcall.pl
diff --git a/training/online_optimizer.cc b/training/utils/online_optimizer.cc
index 3ed95452..3ed95452 100644
--- a/training/online_optimizer.cc
+++ b/training/utils/online_optimizer.cc
diff --git a/training/online_optimizer.h b/training/utils/online_optimizer.h
index 28d89344..28d89344 100644
--- a/training/online_optimizer.h
+++ b/training/utils/online_optimizer.h
diff --git a/training/optimize.cc b/training/utils/optimize.cc
index 41ac90d8..41ac90d8 100644
--- a/training/optimize.cc
+++ b/training/utils/optimize.cc
diff --git a/training/optimize.h b/training/utils/optimize.h
index 07943b44..07943b44 100644
--- a/training/optimize.h
+++ b/training/utils/optimize.h
diff --git a/training/optimize_test.cc b/training/utils/optimize_test.cc
index bff2ca03..bff2ca03 100644
--- a/training/optimize_test.cc
+++ b/training/utils/optimize_test.cc
diff --git a/dpmert/parallelize.pl b/training/utils/parallelize.pl
index d2ebaeea..4197e0e5 100755
--- a/dpmert/parallelize.pl
+++ b/training/utils/parallelize.pl
@@ -18,7 +18,7 @@
 
 #ANNOYANCE: if input is shorter than -j n lines, or at the very last few lines, repeatedly sleeps.  time cut down to 15s from 60s
 
-my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; }
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment"; }
 use LocalConfig;
 
 use Cwd qw/ abs_path cwd getcwd /; 
diff --git a/training/risk.cc b/training/utils/risk.cc
index d5a12cfd..d5a12cfd 100644
--- a/training/risk.cc
+++ b/training/utils/risk.cc
diff --git a/training/risk.h b/training/utils/risk.h
index 2e8db0fb..2e8db0fb 100644
--- a/training/risk.h
+++ b/training/utils/risk.h
diff --git a/dpmert/sentclient.c b/training/utils/sentclient.c
index 91d994ab..91d994ab 100644
--- a/dpmert/sentclient.c
+++ b/training/utils/sentclient.c
diff --git a/dpmert/sentserver.c b/training/utils/sentserver.c
index c20b4fa6..c20b4fa6 100644
--- a/dpmert/sentserver.c
+++ b/training/utils/sentserver.c
diff --git a/dpmert/sentserver.h b/training/utils/sentserver.h
index cd17a546..cd17a546 100644
--- a/dpmert/sentserver.h
+++ b/training/utils/sentserver.h
diff --git a/word-aligner/Makefile.am b/word-aligner/Makefile.am
new file mode 100644
index 00000000..280d3ae7
--- /dev/null
+++ b/word-aligner/Makefile.am
@@ -0,0 +1,6 @@
+bin_PROGRAMS = fast_align
+
+fast_align_SOURCES = fast_align.cc ttables.cc
+fast_align_LDADD = $(top_srcdir)/utils/libutils.a -lz
+
+AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/training
diff --git a/training/fast_align.cc b/word-aligner/fast_align.cc
index 7492d26f..7492d26f 100644
--- a/training/fast_align.cc
+++ b/word-aligner/fast_align.cc
diff --git a/word-aligner/makefiles/makefile.grammars b/word-aligner/makefiles/makefile.grammars
index 08ff33e1..ce3e1638 100644
--- a/word-aligner/makefiles/makefile.grammars
+++ b/word-aligner/makefiles/makefile.grammars
@@ -16,7 +16,7 @@ STEM_E = $(SCRIPT_DIR)/stemmers/$(E_LANG).pl
 
 CLASSIFY = $(SUPPORT_DIR)/classify.pl
 MAKE_LEX_GRAMMAR = $(SUPPORT_DIR)/make_lex_grammar.pl
-MODEL1 = $(TRAINING_DIR)/fast_align
+MODEL1 = $(SCRIPT_DIR)/fast_align
 MERGE_CORPUS = $(SUPPORT_DIR)/merge_corpus.pl
 
 e.voc: corpus.e
diff --git a/word-aligner/paste-parallel-files.pl b/word-aligner/paste-parallel-files.pl
deleted file mode 100755
index ce53b325..00000000
--- a/word-aligner/paste-parallel-files.pl
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-my @fs = ();
-for my $file (@ARGV) {
-  my $fh;
-  open $fh, "<$file" or die "Can't open $file for reading: $!";
-  push @fs, $fh;
-}
-my $num = scalar @fs;
-die "Usage: $0 file1.txt file2.txt [...]\n" unless $num > 1;
-
-my $first = $fs[0];
-while(<$first>) {
-  chomp;
-  my @out = ();
-  push @out, $_;
-  for (my $i=1; $i < $num; $i++) {
-    my $f = $fs[$i];
-    my $line = <$f>;
-    die "Mismatched number of lines!" unless defined $line;
-    chomp $line;
-    push @out, $line;
-  }
-  print join(' ||| ', @out) . "\n";
-}
-
-for my $fh (@fs) {
-  my $x=<$fh>;
-  die "Mismatched number of lines!" if defined $x;
-  close $fh;
-}
-
-exit 0;
-
diff --git a/training/ttables.cc b/word-aligner/ttables.cc
index 45bf14c5..45bf14c5 100644
--- a/training/ttables.cc
+++ b/word-aligner/ttables.cc
diff --git a/training/ttables.h b/word-aligner/ttables.h
index 9baa13ca..9baa13ca 100644
--- a/training/ttables.h
+++ b/word-aligner/ttables.h
author	Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>	2012-11-18 13:35:42 -0500
committer	Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>	2012-11-18 13:35:42 -0500
commit	1b8181bf0d6e9137e6b9ccdbe414aec37377a1a9 (patch)
tree	33e5f3aa5abff1f41314cf8f6afbd2c2c40e4bfd
parent	7c4665949fb93fb3de402e4ce1d19bef67850d05 (diff)