156 files changed, 11101 insertions, 2574 deletions
diff --git a/training/Jamfile b/training/Jamfile
deleted file mode 100644
index 073451fa..00000000
--- a/training/Jamfile
+++ /dev/null
@@ -1,25 +0,0 @@
-import testing ;
-import option ;
-
-lib training : 
-  ..//utils
-  ..//mteval
-  ..//decoder
-  ../klm/lm//kenlm
-  ..//boost_program_options
-  ttables.cc
-  : <include>.
-  : :
-  <library>..//decoder
-  <library>../klm/lm//kenlm
-  <library>..//utils
-  <library>..//mteval
-  <library>..//boost_program_options
-  ;
-
-exe model1 : model1.cc : <include>../decoder ;
-
-# // all_tests [ glob *_test.cc ] : ..//decoder : <testing.arg>$(TOP)/decoder/test_data ;
-
-alias programs : model1 ;
-
diff --git a/training/Makefile.am b/training/Makefile.am
index 5254333a..e95e045f 100644
--- a/training/Makefile.am
+++ b/training/Makefile.am
@@ -1,91 +1,11 @@
-bin_PROGRAMS = \
-  fast_align \
-  lbl_model \
-  test_ngram \
-  mr_em_map_adapter \
-  mr_em_adapted_reduce \
-  mr_reduce_to_weights \
-  mr_optimize_reduce \
-  grammar_convert \
-  plftools \
-  collapse_weights \
-  mpi_extract_reachable \
-  mpi_extract_features \
-  mpi_online_optimize \
-  mpi_flex_optimize \
-  mpi_batch_optimize \
-  mpi_compute_cllh \
-  augment_grammar
+SUBDIRS = \
+  liblbfgs \
+  utils \
+  crf \
+  minrisk \
+  dpmert \
+  pro \
+  dtrain \
+  mira \
+  rampion
 
-noinst_PROGRAMS = \
-  lbfgs_test \
-  optimize_test
-
-TESTS = lbfgs_test optimize_test
-
-noinst_LIBRARIES = libtraining.a
-libtraining_a_SOURCES = \
-  candidate_set.cc \
-  entropy.cc \
-  optimize.cc \
-  online_optimizer.cc \
-  risk.cc
-
-mpi_online_optimize_SOURCES = mpi_online_optimize.cc
-mpi_online_optimize_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-mpi_flex_optimize_SOURCES = mpi_flex_optimize.cc
-mpi_flex_optimize_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-mpi_extract_reachable_SOURCES = mpi_extract_reachable.cc
-mpi_extract_reachable_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-mpi_extract_features_SOURCES = mpi_extract_features.cc
-mpi_extract_features_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc cllh_observer.cc
-mpi_batch_optimize_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-mpi_compute_cllh_SOURCES = mpi_compute_cllh.cc cllh_observer.cc
-mpi_compute_cllh_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-augment_grammar_SOURCES = augment_grammar.cc
-augment_grammar_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-test_ngram_SOURCES = test_ngram.cc
-test_ngram_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-fast_align_SOURCES = fast_align.cc ttables.cc
-fast_align_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
-
-lbl_model_SOURCES = lbl_model.cc
-lbl_model_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
-
-grammar_convert_SOURCES = grammar_convert.cc
-grammar_convert_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
-
-optimize_test_SOURCES = optimize_test.cc
-optimize_test_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
-
-collapse_weights_SOURCES = collapse_weights.cc
-collapse_weights_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
-
-lbfgs_test_SOURCES = lbfgs_test.cc
-lbfgs_test_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
-
-mr_optimize_reduce_SOURCES = mr_optimize_reduce.cc
-mr_optimize_reduce_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
-
-mr_em_map_adapter_SOURCES = mr_em_map_adapter.cc
-mr_em_map_adapter_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
-
-mr_reduce_to_weights_SOURCES = mr_reduce_to_weights.cc
-mr_reduce_to_weights_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
-
-mr_em_adapted_reduce_SOURCES = mr_em_adapted_reduce.cc
-mr_em_adapted_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
-
-plftools_SOURCES = plftools.cc
-plftools_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
-
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder -I$(top_srcdir)/utils -I$(top_srcdir)/mteval -I../klm
diff --git a/training/add-model1-features-to-scfg.pl b/training/add-model1-features-to-scfg.pl
deleted file mode 100755
index a0074317..00000000
--- a/training/add-model1-features-to-scfg.pl
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/perl -w
-
-# [X] ||| so [X,1] die [X,2] der ||| as [X,1] existing [X,2] the ||| 2.47712135315 2.53182387352 5.07100057602 ||| 0-0 2-2 4-4
-# [X] ||| so [X,1] die [X,2] der ||| this [X,1] the [X,2] of ||| 2.47712135315 3.19828724861 2.38270020485 ||| 0-0 2-2 4-4
-# [X] ||| so [X,1] die [X,2] der ||| as [X,1] the [X,2] the ||| 2.47712135315 2.53182387352 1.48463630676 ||| 0-0 2-2 4-4
-# [X] ||| so [X,1] die [X,2] der ||| is [X,1] the [X,2] of the ||| 2.47712135315 3.45197868347 2.64251494408 ||| 0-0 2-2 4-4 4-5
-
-die "Usage: $0 model1.f-e model1.e-f < grammar.scfg\n  (use trianing/model1 to extract the model files)\n" unless scalar @ARGV == 2;
-
-my $fm1 = shift @ARGV;
-die unless $fm1;
-my $frm1 = shift @ARGV;
-die unless $frm1;
-open M1,"<$fm1" or die;
-open RM1,"<$frm1" or die;
-print STDERR "Loading Model 1 probs from $fm1...\n";
-my %m1;
-while(<M1>) {
-  chomp;
-  my ($f, $e, $lp) = split /\s+/;
-  $m1{$e}->{$f} = exp($lp);
-}
-close M1;
-
-print STDERR "Loading Inverse Model 1 probs from $frm1...\n";
-my %rm1;
-while(<RM1>) {
-  chomp;
-  my ($e, $f, $lp) = split /\s+/;
-  $rm1{$f}->{$e} = exp($lp);
-}
-close RM1;
-
-my @label = qw( EGivenF LexFGivenE LexEGivenF );
-while(<>) {
-  chomp;
-  my ($l, $f, $e, $sscores, $al) = split / \|\|\| /;
-  my @scores = split /\s+/, $sscores;
-  unless ($sscores =~ /=/) {
-    for (my $i=0; $i<3; $i++) { $scores[$i] = "$label[$i]=$scores[$i]"; }
-  }
-  push @scores, "RuleCount=1";
-  my @fs = split /\s+/, $f;
-  my @es = split /\s+/, $e;
-  my $flen = scalar @fs;
-  my $elen = scalar @es;
-  my $pgen = 0;
-  my $nongen = 0;
-  for (my $i =0; $i < $flen; $i++) {
-    my $ftot = 0;
-    next if ($fs[$i] =~ /\[X/);
-    my $cr = $rm1{$fs[$i]};
-    for (my $j=0; $j <= $elen; $j++) {
-      my $ej = '<eps>';
-      if ($j < $elen) { $ej = $es[$j]; }
-      my $p = $cr->{$ej};
-      if (defined $p) { $ftot += $p; }
-    }
-    if ($ftot == 0) { $nongen = 1; last; }
-    $pgen += log($ftot) - log($elen);
-  }
-  my $bad = 0;
-  my $good = 0;
-  unless ($nongen) { push @scores, "RGood=1"; $good++; } else { push @scores, "RBad=1"; $bad++; }
-
-  $nongen = 0;
-  $pgen = 0;
-  for (my $i =0; $i < $elen; $i++) {
-    my $etot = 0;
-    next if ($es[$i] =~ /\[X/);
-    my $cr = $m1{$es[$i]};
-#    print STDERR "$es[$i]\n";
-    for (my $j=0; $j <= $flen; $j++) {
-      my $fj = '<eps>';
-      if ($j < $flen) { $fj = $fs[$j]; }
-      my $p = $cr->{$fj};
-#      print STDERR "  $fs[$j] : $p\n";
-      if (defined $p) { $etot += $p; }
-    }
-    if ($etot == 0) { $nongen = 1; last; }
-    $pgen += log($etot) - log($flen);
-  }
-  unless ($nongen) {
-    push @scores, "FGood=1";
-    if ($good) { push @scores, "BothGood=1"; } else { push @scores, "SusDel=1"; }
-  } else {
-    push @scores, "FBad=1";
-    if ($bad) { push @scores, "BothBad=1"; } else { push @scores, "SusHall=1"; }
-  }
-  print "$l ||| $f ||| $e ||| @scores";
-  if (defined $al) { print " ||| $al\n"; } else { print "\n"; }
-}
-
diff --git a/training/collapse_weights.cc b/training/collapse_weights.cc
deleted file mode 100644
index dc480f6c..00000000
--- a/training/collapse_weights.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-char const* NOTES =
-  "ZF_and_E means unnormalized scaled features.\n"
-  "For grammars with one nonterminal: F_and_E is joint,\n"
-  "F_given_E and E_given_F are conditional.\n"
-  "TODO: group rules by root nonterminal and then normalize.\n";
-
-
-#include <iostream>
-#include <fstream>
-#include <tr1/unordered_map>
-
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-#include <boost/functional/hash.hpp>
-
-#include "prob.h"
-#include "filelib.h"
-#include "trule.h"
-#include "weights.h"
-
-namespace po = boost::program_options;
-using namespace std;
-
-typedef std::tr1::unordered_map<vector<WordID>, prob_t, boost::hash<vector<WordID> > > MarginalMap;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("grammar,g", po::value<string>(), "Grammar file")
-        ("weights,w", po::value<string>(), "Weights file")
-    ("unnormalized,u", "Always include ZF_and_E unnormalized score (default: only if sum was >1)")
-    ;
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config,c", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    const string cfg = (*conf)["config"].as<string>();
-    cerr << "Configuration file: " << cfg << endl;
-    ifstream config(cfg.c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help") || !conf->count("grammar") || !conf->count("weights")) {
-    cerr << dcmdline_options << endl;
-    cerr << NOTES << endl;
-    exit(1);
-  }
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-  const string wfile = conf["weights"].as<string>();
-  const string gfile = conf["grammar"].as<string>();
-  vector<weight_t> w;
-  Weights::InitFromFile(wfile, &w);
-  MarginalMap e_tots;
-  MarginalMap f_tots;
-  prob_t tot;
-  {
-    ReadFile rf(gfile);
-    assert(*rf.stream());
-    istream& in = *rf.stream();
-    cerr << "Computing marginals...\n";
-    int lc = 0;
-    while(in) {
-      string line;
-      getline(in, line);
-      ++lc;
-      if (line.empty()) continue;
-      TRule tr(line, true);
-      if (tr.GetFeatureValues().empty())
-        cerr << "Line " << lc << ": empty features - may introduce bias\n";
-      prob_t prob;
-      prob.logeq(tr.GetFeatureValues().dot(w));
-      e_tots[tr.e_] += prob;
-      f_tots[tr.f_] += prob;
-      tot += prob;
-    }
-  }
-  bool normalized = (fabs(log(tot)) < 0.001);
-  cerr << "Total: " << tot << (normalized ? " [normalized]" : " [scaled]") << endl;
-  ReadFile rf(gfile);
-  istream&in = *rf.stream();
-  while(in) {
-    string line;
-    getline(in, line);
-    if (line.empty()) continue;
-    TRule tr(line, true);
-    const double lp = tr.GetFeatureValues().dot(w);
-    if (isinf(lp)) { continue; }
-    tr.scores_.clear();
-
-    cout << tr.AsString() << " ||| F_and_E=" << lp - log(tot);
-    if (!normalized || conf.count("unnormalized")) {
-      cout << ";ZF_and_E=" << lp;
-    }
-    cout << ";F_given_E=" << lp - log(e_tots[tr.e_])
-         << ";E_given_F=" << lp - log(f_tots[tr.f_]) << endl;
-  }
-  return 0;
-}
-
diff --git a/training/crf/Makefile.am b/training/crf/Makefile.am
new file mode 100644
index 00000000..4a8c30fd
--- /dev/null
+++ b/training/crf/Makefile.am
@@ -0,0 +1,31 @@
+bin_PROGRAMS = \
+  mpi_batch_optimize \
+  mpi_compute_cllh \
+  mpi_extract_features \
+  mpi_extract_reachable \
+  mpi_flex_optimize \
+  mpi_online_optimize \
+  mpi_baum_welch
+
+mpi_baum_welch_SOURCES = mpi_baum_welch.cc
+mpi_baum_welch_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a -lz
+
+mpi_online_optimize_SOURCES = mpi_online_optimize.cc
+mpi_online_optimize_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a -lz
+
+mpi_flex_optimize_SOURCES = mpi_flex_optimize.cc
+mpi_flex_optimize_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a -lz
+
+mpi_extract_reachable_SOURCES = mpi_extract_reachable.cc
+mpi_extract_reachable_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a -lz
+
+mpi_extract_features_SOURCES = mpi_extract_features.cc
+mpi_extract_features_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a -lz
+
+mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc cllh_observer.cc cllh_observer.h
+mpi_batch_optimize_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a -lz
+
+mpi_compute_cllh_SOURCES = mpi_compute_cllh.cc cllh_observer.cc cllh_observer.h
+mpi_compute_cllh_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a -lz
+
+AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir)/training -I$(top_srcdir)/training/utils -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/training/crf/baum_welch_example/README.md b/training/crf/baum_welch_example/README.md
new file mode 100644
index 00000000..97525da5
--- /dev/null
+++ b/training/crf/baum_welch_example/README.md
@@ -0,0 +1,32 @@
+Here's how to do Baum-Welch training with `cdec`.
+
+## Set the tags you want.
+
+First, set the number of tags you want in tagset.txt (these
+can be any symbols, listed one after another, separated
+by whitespace), e.g.:
+
+    C1 C2 C3 C4
+
+## Extract the parameter feature names
+
+    ../mpi_extract_features -c cdec.ini -t train.txt
+
+If you have compiled with MPI, you can use `mpirun`:
+
+    mpirun -np 8 ../mpi_extract_features -c cdec.ini -t train.txt
+
+## Randomly initialize the weights file
+
+    sort -u features.* | ./random_init.pl > weights.init
+
+## Run training
+
+    ../mpi_baum_welch -c cdec.ini -t train.txt -w weights.init -n 50
+
+Again, if you have compiled with MPI, you can use `mpirun`:
+
+    mpirun -np 8 ../mpi_baum_welch -c cdec.ini -t train.txt -w weights.init -n 50
+
+The `-n` flag indicates how many iterations to run for.
+
diff --git a/training/crf/baum_welch_example/cdec.ini b/training/crf/baum_welch_example/cdec.ini
new file mode 100644
index 00000000..61203da7
--- /dev/null
+++ b/training/crf/baum_welch_example/cdec.ini
@@ -0,0 +1,5 @@
+feature_function=Tagger_BigramIndicator
+feature_function=LexicalPairIndicator
+formalism=tagger
+tagger_tagset=tagset.txt
+intersection_strategy=full
diff --git a/training/crf/baum_welch_example/random_init.pl b/training/crf/baum_welch_example/random_init.pl
new file mode 100755
index 00000000..98467ed1
--- /dev/null
+++ b/training/crf/baum_welch_example/random_init.pl
@@ -0,0 +1,9 @@
+#!/usr/bin/perl -w
+while(<>) {
+  chomp;
+  my ($a,$b,@d) =split /\s+/;
+  die "Bad input" if scalar @d > 0;
+  $r = -rand() * rand() - 0.5;
+  $r = 0 if $a =~ /^Uni:/;
+  print "$a $r\n";
+}
diff --git a/training/crf/baum_welch_example/tagset.txt b/training/crf/baum_welch_example/tagset.txt
new file mode 100644
index 00000000..93a48451
--- /dev/null
+++ b/training/crf/baum_welch_example/tagset.txt
@@ -0,0 +1 @@
+1 2 3 4
diff --git a/training/crf/baum_welch_example/train.txt b/training/crf/baum_welch_example/train.txt
new file mode 100644
index 00000000..e9c3455e
--- /dev/null
+++ b/training/crf/baum_welch_example/train.txt
@@ -0,0 +1,2000 @@
+t h e
+t o
+o f
+i n
+a n d
+a
+s a i d
+f o r
+o n
+t h a t
+w i t h
+w a s
+i s
+b y
+a t
+h e
+a s
+f r o m
+i t
+h a s
+b e
+h i s
+h a v e
+w i l l
+a n
+a r e
+w e r e
+b u t
+n o t
+w h o
+a f t e r
+h a d
+y e a r
+i t s
+t w o
+t h i s
+w h i c h
+t h e y
+t h e i r
+g o v e r n m e n t
+b e e n
+w e
+p e r c e n t
+w o u l d
+n e w
+i
+a l s o
+u p
+m o r e
+o n e
+p e o p l e
+f i r s t
+l a s t
+a b o u t
+c h i n a
+p r e s i d e n t
+o v e r
+m i l l i o n
+o r
+o u t
+w o r l d
+w h e n
+a l l
+o t h e r
+m i n i s t e r
+t h r e e
+t h a n
+u n i t e d
+t h e r e
+a g a i n s t
+i n t o
+c o u n t r y
+s o m e
+p o l i c e
+n o
+t i m e
+y e a r s
+s t a t e
+w e d n e s d a y
+t u e s d a y
+t h u r s d a y
+s t a t e s
+m o n d a y
+u s
+c o u l d
+i f
+f r i d a y
+s i n c e
+b i l l i o n
+s h e
+f o r e i g n
+o f f i c i a l s
+d a y
+i n t e r n a t i o n a l
+h e r
+b e t w e e n
+o n l y
+b e f o r e
+s o u t h
+w h i l e
+d u r i n g
+n a t i o n a l
+t o l d
+s e c o n d
+g r o u p
+f o u r
+d o w n
+c i t y
+p a r t y
+t h e m
+s e c u r i t y
+d o
+m a d e
+d o l l a r s
+p o i n t s
+u n d e r
+m i l i t a r y
+b e c a u s e
+w e e k
+c o u n t r i e s
+c a n
+c h i n e s e
+o f f
+s u n d a y
+m o s t
+s o
+h i m
+e c o n o m i c
+f o r m e r
+i r a q
+f i v e
+s a t u r d a y
+a c c o r d i n g
+d i d
+n o w
+o f f i c i a l
+m a y
+n e w s
+w a r
+a n y
+w h e r e
+t e a m
+m e e t i n g
+k i l l e d
+b a n k
+s h o u l d
+j u s t
+r e p o r t e d
+m a n y
+n e x t
+w h a t
+c o m p a n y
+i n c l u d i n g
+b a c k
+m o n t h
+r e p o r t
+o u r
+p r i m e
+m a r k e t
+s t i l l
+b e i n g
+c o u r t
+t r a d e
+h e r e
+p e a c e
+h i g h
+o l d
+s e t
+t h r o u g h
+y o u
+i s r a e l
+t a l k s
+e n d
+t a k e
+e x p e c t e d
+p o l i t i c a l
+s i x
+s u c h
+b o t h
+m a k e
+h o m e
+l o c a l
+j a p a n
+r u s s i a
+s a y i n g
+g e n e r a l
+t o p
+a n o t h e r
+e u r o p e a n
+n o r t h
+h e l d
+t h i r d
+m a j o r
+s t a t e m e n t
+w e l l
+a m e r i c a n
+i s r a e l i
+t a i w a n
+l e a d e r
+c a p i t a l
+l o n g
+o i l
+t h o s e
+c a l l e d
+p a r t
+s p o k e s m a n
+w o r k
+d e v e l o p m e n t
+a d d e d
+s a y s
+w o n
+m e m b e r s
+l e f t
+c h i e f
+g a m e
+l i k e
+t h e n
+h e l p
+s a y
+p a l e s t i n i a n
+v e r y
+c u p
+p u b l i c
+f r a n c e
+c e n t r a l
+l e a d e r s
+w i n
+b u s h
+m i n i s t r y
+m o n t h s
+g e t
+w a y
+d a y s
+r e g i o n
+s u p p o r t
+t r o o p s
+a g e n c y
+f o r c e s
+e a r l i e r
+e v e n
+n a t i o n s
+v i s i t
+g a m e s
+e u
+f i n a l
+a m o n g
+h o u s e
+s e v e r a l
+e a r l y
+l e d
+d l r s
+l a t e r
+w o m e n
+k o n g
+h o n g
+p r e s s
+p o w e r
+t o d a y
+o p e n
+i n d e x
+o f f i c e
+f o l l o w i n g
+a r o u n d
+b a s e d
+c o n f e r e n c e
+b r i t i s h
+c o u n c i l
+u n i o n
+t o o k
+c a m e
+w e s t
+r u n
+h o w e v e r
+e a s t
+l a t e
+s e a s o n
+g o o d
+c l o s e
+g e r m a n y
+l e a d
+p a s t
+d e f e n s e
+p l a c e
+n u m b e r
+a r m y
+r u s s i a n
+l a w
+i n d i a
+m e n
+f i n a n c i a l
+e c o n o m y
+l e a s t
+s e c r e t a r y
+s a m e
+y o r k
+f o u n d
+g o i n g
+r i g h t
+g o
+m y
+o p p o s i t i o n
+f o r c e
+a g r e e m e n t
+e l e c t i o n
+h o w
+b u s i n e s s
+f r e n c h
+a u t h o r i t i e s
+p l a y
+m u c h
+r i g h t s
+t i m e s
+c o m m i t t e e
+r o u n d
+p r o v i n c e
+k o r e a
+h a l f
+a t t a c k
+p r i c e s
+s t o c k
+h i t
+p l a n
+a r e a
+c o o p e r a t i o n
+s e v e n
+n e a r
+e x c h a n g e
+u s e d
+n u c l e a r
+p a k i s t a n
+b e i j i n g
+a n n o u n c e d
+a i r
+a f r i c a
+c e n t e r
+a g o
+t h e s e
+d e c i s i o n
+a t t a c k s
+w i t h o u t
+m a t c h
+m a r c h
+n a t i o n
+h e a d
+t o t a l
+c o m p a n i e s
+m a n
+d e a l
+w a s h i n g t o n
+r e c e n t
+c a s e
+f i r e
+n i g h t
+a u s t r a l i a
+a f r i c a n
+u n t i l
+i r a n
+e l e c t i o n s
+s o u t h e r n
+l e a g u e
+p u t
+e a c h
+m e m b e r
+c h i l d r e n
+h e a l t h
+p c
+p a r l i a m e n t
+l o s t
+t h i n k
+d e a t h
+m u s t
+e i g h t
+w o r k e r s
+u s e
+b r i t a i n
+w a n t
+s y s t e m
+r e c o r d
+d e p a r t m e n t
+p r o g r a m
+e f f o r t s
+g r o w t h
+r e s u l t s
+i r a q i
+i s s u e
+b e s t
+w h e t h e r
+h u m a n
+n o r t h e r n
+c o n t r o l
+f a r
+f u r t h e r
+a l r e a d y
+s h a r e s
+r e l a t i o n s
+m e e t
+s o l d i e r s
+s e e
+f r e e
+c o m e
+j a p a n e s e
+m o n e y
+d o l l a r
+r e p o r t s
+d i r e c t o r
+s h a r e
+g i v e
+j u n e
+c o m m i s s i o n
+l a r g e s t
+i n d u s t r y
+c o n t i n u e
+s t a r t
+c a m p a i g n
+l e a d i n g
+q u a r t e r
+i n f o r m a t i o n
+v i c t o r y
+r o s e
+o t h e r s
+a n t i
+r e t u r n
+f a m i l y
+i s s u e s
+s h o t
+p o l i c y
+e u r o p e
+m e d i a
+p l a n s
+b o r d e r
+n e e d
+p e r
+a r e a s
+j u l y
+v i o l e n c e
+d e s p i t e
+d o e s
+s t r o n g
+c h a i r m a n
+s e r v i c e
+v o t e
+a s k e d
+f o o d
+f e d e r a l
+w e n t
+t a k e n
+d u e
+m o v e
+o w n
+b e g a n
+i t a l y
+g r o u p s
+p o s s i b l e
+l i f e
+c l e a r
+r a t e
+f e l l
+c r i s i s
+r e b e l s
+p o i n t
+d e m o c r a t i c
+w a t e r
+a h e a d
+i n v e s t m e n t
+i n c r e a s e
+s h o w
+p l a y e r s
+r e l e a s e d
+g e r m a n
+t o w n
+n e a r l y
+p r o c e s s
+m i l e s
+f e w
+d i e d
+a d m i n i s t r a t i o n
+a p r i l
+w e e k s
+l e v e l
+k e y
+a s i a
+s a l e s
+w e a p o n s
+c l o s e d
+b e h i n d
+m i n u t e s
+a g r e e d
+p r e s i d e n t i a l
+g r e a t
+m a k i n g
+t o o
+p r i c e
+j o h n
+g o t
+f o u r t h
+a g a i n
+k i l o m e t e r s
+s i t u a t i o n
+m a i n
+w h i t e
+r e p o r t e r s
+h o u r s
+s e n i o r
+a s i a n
+r e p u b l i c
+a w a y
+g l o b a l
+f i g h t i n g
+s e r i e s
+b e t t e r
+n e w s p a p e r
+m e
+c l i n t o n
+a r r e s t e d
+h i g h e r
+k n o w
+f u t u r e
+s c o r e d
+g o l d
+n a t o
+m o r n i n g
+n e v e r
+b e a t
+w i t h i n
+r n
+c u r r e n t
+p e r i o d
+b e c o m e
+w e s t e r n
+i m p o r t a n t
+l o n d o n
+a u s t r a l i a n
+s p a i n
+e n e r g y
+a i d
+a c c u s e d
+o l y m p i c
+n i n e
+a c r o s s
+f a c e
+o r g a n i z a t i o n
+g o a l
+t a k i n g
+i n j u r e d
+s p e c i a l
+r a c e
+d a i l y
+s u m m i t
+s i d e
+l i n e
+o r d e r
+u n i v e r s i t y
+a f g h a n i s t a n
+p l a y e d
+b i g
+c a r
+t r y i n g
+e n g l a n d
+q u o t e d
+d e
+a l o n g
+i s l a m i c
+o u t s i d e
+t r a d i n g
+e d s
+c u t
+a c t i o n
+p r o b l e m s
+v i c e
+w o r k i n g
+y e n
+b u i l d i n g
+s i g n e d
+k n o w n
+c h a n g e
+c h a r g e s
+s m a l l
+l o w e r
+a l t h o u g h
+s e n t
+c o n g r e s s
+h o s p i t a l
+h o l d
+m i g h t
+u n
+e v e r y
+g i v e n
+d e p u t y
+i n t e r e s t
+i s l a n d
+s c h o o l
+d r u g
+k i l l i n g
+r u l i n g
+t o u r
+o p e n i n g
+t e r m
+f u l l
+c l r
+l i t t l e
+m a r k e t s
+c o a c h
+j a n u a r y
+s c h e d u l e d
+k e e p
+t u r k e y
+p r e v i o u s
+e x e c u t i v e
+g a s
+m e t
+j o i n t
+t r i a l
+b o a r d
+p r o d u c t i o n
+i n d o n e s i a
+s e r v i c e s
+l i k e l y
+t h o u s a n d s
+i n d i a n
+p o s t
+a r a b
+c e n t s
+h o p e
+s i n g a p o r e
+p a l e s t i n i a n s
+p a r t i e s
+g a v e
+b i l l
+d e a d
+r o l e
+s e p t e m b e r
+t e l e v i s i o n
+c o m m u n i t y
+r e g i o n a l
+a d d i n g
+a m e r i c a
+o n c e
+y u a n
+t e s t
+s t o c k s
+w h o s e
+p a y
+p r i v a t e
+l a t e s t
+i n v e s t o r s
+f r o n t
+c a n a d a
+r e l e a s e
+r e c e i v e d
+m e a n w h i l e
+l e s s
+t h a i l a n d
+l a n d
+c h a m p i o n
+r e a c h e d
+u r g e d
+d e c e m b e r
+a s s o c i a t i o n
+f i g h t
+s i d e s
+s t a r t e d
+l a r g e
+y e t
+m i d d l e
+c a l l
+p r e s s u r e
+e n d e d
+s o c i a l
+p r o j e c t
+l o w
+h a r d
+c l u b
+p r e m i e r
+t e c h n o l o g y
+f a i l e d
+t o u r n a m e n t
+r e a l
+p r o v i d e
+g a z a
+m i n u t e
+a f f a i r s
+m i n i s t e r s
+p r o d u c t s
+r e s e a r c h
+s e e n
+g e o r g e
+e v e n t
+s t o p
+i n v e s t i g a t i o n
+a i r p o r t
+m e x i c o
+t i t l e
+t o k y o
+e a s t e r n
+b i g g e s t
+y o u n g
+d e m a n d
+t h o u g h
+a r m e d
+s a n
+o p e n e d
+m e a s u r e s
+n o v e m b e r
+a v e r a g e
+m a r k
+o c t o b e r
+k o r e a n
+r a d i o
+b o d y
+s e c t o r
+c a b i n e t
+g m t
+a s s o c i a t e d
+a p
+c i v i l
+t e r r o r i s m
+s h o w e d
+p r i s o n
+s i t e
+p r o b l e m
+s e s s i o n
+b r a z i l
+m u s l i m
+c o a l i t i o n
+b a g h d a d
+b i d
+s t r e e t
+c o m i n g
+b e l i e v e
+m a l a y s i a
+s t u d e n t s
+d e c i d e d
+f i e l d
+r e d
+n e g o t i a t i o n s
+w i n n i n g
+o p e r a t i o n
+c r o s s
+s o o n
+p l a n n e d
+a b l e
+t i e s
+t a x
+j u s t i c e
+d o m e s t i c
+d a v i d
+i n c l u d e
+n a m e
+b o m b
+t r a i n i n g
+j u d g e
+v i c t i m s
+m e d i c a l
+c o n d i t i o n
+f i n d
+r e m a i n
+i s s u e d
+f i n a n c e
+l o t
+l a b o r
+b t
+e n o u g h
+i m m e d i a t e l y
+s h o r t
+l o s s
+a n n u a l
+m o v e d
+r e b e l
+s t r i k e
+r o a d
+r e c e n t l y
+i t a l i a n
+c o n s t r u c t i o n
+t r y
+a u g u s t
+e x p r e s s e d
+m i l i t a n t s
+t o g e t h e r
+w a n t e d
+r a t e s
+f u n d
+f o r w a r d
+m i s s i o n
+d i s c u s s
+r e s u l t
+c a l l s
+k o s o v o
+o p e r a t i o n s
+c a s e s
+z e a l a n d
+s o u r c e s
+i n c r e a s e d
+l e g a l
+b a n k s
+i n v o l v e d
+o f f i c e r s
+l e a v e
+m e t e r s
+w a r n e d
+h a v i n g
+r e a c h
+b r i n g
+h i s t o r y
+d i s t r i c t
+j o b
+a l l o w e d
+a r r i v e d
+t o w a r d
+c l a i m e d
+e g y p t
+t e a m s
+a l l o w
+a l m o s t
+f e b r u a r y
+s e r i o u s
+p o o r
+c o n t i n u e d
+s t e p
+i n t e r v i e w
+e d u c a t i o n
+n o n
+r e a l l y
+s t a r
+l e e
+r e s i d e n t s
+b a n
+s o c c e r
+n e e d e d
+p a r i s
+i n d u s t r i a l
+p l a y e r
+m o s c o w
+s t a t i o n
+o f f e r
+h u n d r e d s
+t a l i b a n
+w o m a n
+m a n a g e m e n t
+l e b a n o n
+n o t e d
+c h e n
+p o s i t i o n
+f i n i s h e d
+c o s t
+e x p e r t s
+e v e r
+m o v e m e n t
+t e r r o r i s t
+p l a n e
+b l a c k
+d i f f e r e n t
+b e l i e v e d
+p l a y i n g
+c a u s e d
+h o p e s
+c o n d i t i o n s
+b r o u g h t
+f o r c e d
+l a u n c h e d
+w e e k e n d
+m i c h a e l
+s e a
+r i s e
+d e t a i l s
+s p o r t s
+e t h n i c
+s t a f f
+c h a n c e
+g o a l s
+b u d g e t
+h a n d
+b a s e
+s e c o n d s
+s r i
+s p e a k i n g
+o f f i c e r
+m a j o r i t y
+w a n t s
+c h a r g e d
+s h a n g h a i
+v i e t n a m
+x i n h u a
+c o m m e n t
+d r o p p e d
+t u r n e d
+p r o t e s t
+r e f o r m
+s u s p e c t e d
+a m i d
+t r i e d
+c i t i e s
+g r o u n d
+t u r k i s h
+s t a g e
+e f f o r t
+s
+c o m m u n i s t
+a n a l y s t s
+h a m a s
+p r o j e c t s
+c o n t r a c t
+i n d e p e n d e n c e
+l o o k i n g
+a m
+s i g n
+f o l l o w e d
+r e m a i n s
+c o m p a r e d
+u s i n g
+h e a v y
+a f t e r n o o n
+s t r a i g h t
+l o o k
+f a l l
+r e a d y
+e u r o
+c h a r g e
+w o u n d e d
+p r o g r e s s
+p a c i f i c
+d e n i e d
+h o u r
+c a r e e r
+c o n f i r m e d
+t h a i
+r u l e
+c o u r s e
+w i f e
+e x p o r t s
+b e c a m e
+a m e r i c a n s
+e m e r g e n c y
+a r a f a t
+r e f u s e d
+l i s t
+a l l e g e d
+c h a m p i o n s h i p
+p o p u l a t i o n
+n e e d s
+c o m p e t i t i o n
+o r d e r e d
+s a f e t y
+a u t h o r i t y
+i l l e g a l
+t v
+d o n e
+e v i d e n c e
+s t a y
+f i f t h
+s e e k i n g
+s t u d y
+l i v e
+r u n s
+c o a s t
+s a u d i
+h e l p e d
+a c t i v i t i e s
+m a n a g e r
+w o r t h
+k i n g
+g r o w i n g
+r u n n i n g
+f i r e d
+i n c l u d e d
+p a u l
+w a l l
+r e t u r n e d
+c o n f l i c t
+m y a n m a r
+d e m o c r a c y
+p r o
+f o r m
+a l w a y s
+a m b a s s a d o r
+m a t c h e s
+t h i n g s
+m a i n l a n d
+s a w
+d i s e a s e
+r e l a t e d
+f u n d s
+i n d e p e n d e n t
+t o n s
+a p p r o v e d
+e m b a s s y
+c u r r e n c y
+b r e a k
+s e n a t e
+c o n c e r n s
+f i g u r e s
+j o i n
+r e s o l u t i o n
+o f t e n
+c o n f i d e n c e
+e s p e c i a l l y
+w i n n e r
+c a r r i e d
+i m p r o v e
+s w e d e n
+z i m b a b w e
+t h r e a t
+c u r r e n t l y
+s i n g l e
+h i m s e l f
+l i v i n g
+r e f u g e e s
+a i m e d
+c o u n t y
+c a n n o t
+a r m s
+b u i l d
+g e t t i n g
+a p p e a r e d
+d i f f i c u l t
+s p a n i s h
+r i v e r
+m i s s i n g
+e s t i m a t e d
+s o m e t h i n g
+p r o p o s e d
+c e r e m o n y
+i n s t e a d
+b r o k e
+c h u r c h
+o l y m p i c s
+s p a c e
+p r o f i t
+v i l l a g e
+l i g h t
+p e r f o r m a n c e
+d e l e g a t i o n
+t r i p
+o v e r a l l
+p a r t s
+a c t
+c o r r u p t i o n
+d i v i s i o n
+s i m i l a r
+p o s i t i v e
+c a m p
+g r a n d
+p o r t
+s u p p o r t e r s
+r e p u b l i c a n
+b e g i n
+j o n e s
+p a r k
+b i l a t e r a l
+c l o u d y
+d i p l o m a t i c
+p r e s e n t
+l o s
+a r g e n t i n a
+t r a v e l
+s p e e c h
+a t t e n t i o n
+n e t
+j o b s
+a r r e s t
+p r o s e c u t o r s
+i n f l a t i o n
+n a m e d
+j o r d a n
+s o n
+g o v e r n m e n t s
+r u l e s
+p r o t e c t i o n
+k e n y a
+h o m e s
+l i v e s
+s e r b
+s a n c t i o n s
+a t t e m p t
+e x p o r t
+m e a n s
+n i g e r i a
+r e m a i n e d
+t u r n
+c r i m e s
+c o n c e r n
+e n v i r o n m e n t
+p l a n t
+l e t t e r
+v a l u e
+r e s p o n s e
+a s s e m b l y
+p r o p o s a l
+h o l d i n g
+b o m b i n g
+e n s u r e
+a f g h a n
+r e s o u r c e s
+f a m i l i e s
+r e s t
+i n s i d e
+t h r o u g h o u t
+m a t t e r
+c a u s e
+l a w m a k e r s
+i i
+f u e l
+c a l i f o r n i a
+e g y p t i a n
+o w n e d
+s u i c i d e
+c z e c h
+c a r e
+a t t o r n e y
+c l a i m s
+v o t e r s
+n e t w o r k
+b a l l
+p h i l i p p i n e
+f o o t b a l l
+s p o k e s w o m a n
+i n c i d e n t
+p r e v e n t
+w h y
+d e v e l o p i n g
+c i v i l i a n s
+e n g l i s h
+o b a m a
+i n t e r n e t
+r i c e
+s a d d a m
+y o u r
+u p d a t e s
+l e t
+d o i n g
+a i r c r a f t
+f l i g h t
+a n g e l e s
+i n t e l l i g e n c e
+p h i l i p p i n e s
+f a t h e r
+c r e d i t
+a l l i a n c e
+t e r m s
+r a i s e d
+i r a n i a n
+c h a n g e s
+s y r i a
+v a r i o u s
+i n d o n e s i a n
+l i
+i r e l a n d
+l e a v i n g
+d e c l i n e d
+c o m m o n
+i n j u r y
+t r e a t m e n t
+a v a i l a b l e
+c h a m p i o n s
+e l e c t e d
+s u m m e r
+d a t a
+o v e r s e a s
+p a i d
+c e n t u r y
+n o t h i n g
+f i r m
+r e l i g i o u s
+s w i t z e r l a n d
+o f f e r e d
+c h a m p i o n s h i p s
+t h o u g h t
+c a n d i d a t e
+c o n s i d e r e d
+r i s k
+c r i m e
+g o v e r n o r
+f i l m
+r a l l y
+f l o r i d a
+t e r r o r
+d o u b l e
+e q u i p m e n t
+j e r u s a l e m
+c a r r y i n g
+p e r s o n
+f e e l
+t e r r i t o r y
+a l
+c o m m e r c i a l
+u k r a i n e
+b o d i e s
+p r o t e s t s
+n e t h e r l a n d s
+f i n i s h
+a c c e s s
+t a r g e t
+a u s t r i a
+s o u r c e
+r e p r e s e n t a t i v e s
+s p e n t
+j e w i s h
+p o t e n t i a l
+r i s i n g
+t r e a t y
+c a n a d i a n
+a g e
+c a
+s p e n d i n g
+n e c e s s a r y
+r a i n
+z o n e
+c a r s
+p r o m o t e
+n a t u r a l
+d a m a g e
+f o c u s
+w e a t h e r
+p o l i c i e s
+p r o t e c t
+a i d s
+c o
+g i v i n g
+b c
+b a c k e d
+l a n k a
+a p p e a l
+r e j e c t e d
+f a n s
+b a d
+s o u t h e a s t
+r i v a l
+p l a n n i n g
+b o s n i a
+c o m e s
+b u y
+s o v i e t
+h o t e l
+d u t c h
+q u e s t i o n
+t a i p e i
+b o o s t
+c o s t s
+i n s t i t u t e
+s o c i e t y
+s h o o t i n g
+t h e m s e l v e s
+e v e n t s
+k i n d
+p a p e r
+w o r k e d
+c o n s t i t u t i o n
+u r g e n t
+s e t t l e m e n t
+e a r n i n g s
+j o s e
+m o t h e r
+a c c i d e n t
+f a c t
+d r o p
+r a n g e
+h a n d s
+s e e k
+h u g e
+l a w y e r
+s t a r t i n g
+h e a r t
+c o m m a n d e r
+t o u r i s m
+p a s s e n g e r s
+s u s p e c t s
+h i g h e s t
+p o p u l a r
+s t a b i l i t y
+s u p r e m e
+b u s
+r o b e r t
+b a t t l e
+p r o g r a m s
+c u b a
+w i n s
+d r u g s
+s u r v e y
+h o s t
+m u r d e r
+d a t e
+g u l f
+w i l l i a m s
+s e n d
+s u f f e r e d
+p e n a l t y
+k e p t
+s t a d i u m
+c i t i z e n s
+f i g u r e
+h e a d q u a r t e r s
+g u a r d
+p u b l i s h e d
+s t a n d
+t e n n i s
+c r e a t e
+b e g i n n i n g
+e v e n i n g
+p h o n e
+f o o t
+r u l e d
+c a s h
+s o l d
+c h i c a g o
+p o l a n d
+d e m o c r a t s
+r e f o r m s
+b o s n i a n
+s u r e
+c h i l d
+m a y o r
+a t t e n d
+l e a d e r s h i p
+e m p l o y e e s
+t e l e p h o n e
+l o s s e s
+b o r n
+a s s i s t a n c e
+t h i n g
+t r a i n
+s u p p l y
+e i t h e r
+b u i l t
+l a u n c h
+c r u d e
+m o v i n g
+g r e e c e
+t r a c k
+r a i s e
+d r i v e
+r e s p o n s i b i l i t y
+f e d e r a t i o n
+c o l o m b i a
+g r e e n
+c o n c e r n e d
+c a n d i d a t e s
+n e w s p a p e r s
+r e v i e w
+i n t e r i o r
+d e b t
+w h o l e
+t e x a s
+m o s t l y
+r e l i e f
+f a r m e r s
+g o o d s
+p a k i s t a n i
+d e g r e e s
+s e l l
+d e t a i n e d
+s w i s s
+c r i m i n a l
+d e c a d e s
+m i s s i l e
+a b o v e
+d r a w
+p a s s e d
+e x p l o s i o n
+m a k e s
+l a w s
+b a n g l a d e s h
+t a l k
+m a d r i d
+m a s s
+c o n v i c t e d
+i t e m s
+m e d a l
+s u c c e s s
+s e a t s
+q u i c k l y
+c a l l i n g
+k i m
+t r a f f i c
+d i r e c t
+o r g a n i z a t i o n s
+l e v e l s
+s e r v e
+a d d r e s s
+s t r e s s e d
+s t a n d i n g
+w a n g
+d e c l a r e d
+j a m e s
+c a p t a i n
+t h r e a t e n e d
+p r o m i s e d
+s u d a n
+v a n
+p a s s
+e n v i r o n m e n t a l
+r a t h e r
+w o r s t
+p o u n d s
+b l u e
+s i x t h
+m e t e r
+i n c l u d e s
+m u s i c
+r e d u c e
+t a k e s
+v o t e s
+r e s c u e
+c o m p l e t e d
+s e a r c h
+i n n i n g s
+v e h i c l e s
+c l a i m
+t r a n s p o r t
+a v o i d
+i n c o m e
+p o l l
+a f f e c t e d
+g e o r g i a
+g a i n e d
+w o
+r e
+v i s i t i n g
+r e s p o n s i b l e
+e f f e c t
+p o l l s
+h e a r i n g
+l o s i n g
+e s t a b l i s h e d
+f a i r
+g i a n t
+c h a l l e n g e
+f e e t
+p r o p e r t y
+t e s t s
+l e g
+a g r i c u l t u r e
+l o n g e r
+d e a t h s
+s q u a r e
+p a r t i c u l a r l y
+d i s p u t e
+b
+e n t e r p r i s e s
+v o l u m e
+c a r r y
+m i d
+s e p a r a t e
+i d e n t i f i e d
+i t s e l f
+h e a d e d
+a n o n y m i t y
+p a r l i a m e n t a r y
+c r a s h
+r e m a i n i n g
+j o u r n a l i s t s
+i n c r e a s i n g
+s t a t i s t i c s
+d e s c r i b e d
+b u r e a u
+i n j u r i e s
+p r o v i d e d
+j o i n e d
+i m m e d i a t e
+d e b a t e
+i m p a c t
+m e s s a g e
+m e e t i n g s
+r e q u e s t
+s c h o o l s
+o c c u r r e d
+r e m a r k s
+c o m m i t t e d
+p r o t e s t e r s
+t o u g h
+s p o k e
+s t r i p
+f a c e s
+c r o w d
+s h o w s
+w a r n i n g
+s t o r y
+q u a l i t y
+p e t e r
+f r e e d o m
+d e v e l o p
+m a r t i n
+p e r s o n a l
+s e r b i a
+a n y t h i n g
+b l a m e d
+i n t e r e s t s
+n e i g h b o r i n g
+d o c t o r s
+f l i g h t s
+s h i p
+r e g i m e
+b l a i r
+u n i t
+a g e n c i e s
+a f p
+s u g g e s t e d
+l a c k
+s e l l i n g
+a n n a n
+y u g o s l a v i a
+l a
+c o n s u m e r
+s u s p e n d e d
+s t o p p e d
+c o m m e n t s
+c o m p u t e r
+c o n s i d e r
+a i r l i n e s
+l e b a n e s e
+p r e p a r e d
+d i a l o g u e
+e x p e c t
+t w i c e
+p u t i n
+a l l e g a t i o n s
+b r o w n
+a c c e p t
+a p p r o v a l
+w i d e
+n e a r b y
+s y s t e m s
+v i e w
+p u s h
+p r o b a b l y
+e v e r y t h i n g
+d r a f t
+t r a d i t i o n a l
+s t a t u s
+s t r u c k
+s e i z e d
+p a r t l y
+s t a n d a r d
+h u s s e i n
+p o v e r t y
+d o z e n s
+r e g i o n s
+c r i c k e t
+l o a n s
+e
+b o o k
+b a s i s
+a n n o u n c e m e n t
+r u r a l
+s e r b s
+a d d i t i o n
+g r e e k
+c o m p l e t e
+r o o m
+g r e a t e r
+a l l e g e d l y
+f i n a l s
+f a c i n g
+l i m i t e d
+c u t s
+r i c h a r d
+b u s i n e s s e s
+l i n k e d
+p e a c e f u l
+c r e w
+t o u r i s t s
+m a i n l y
+p r i s o n e r s
+p o w e r f u l
+c r o a t i a
+f i l e d
+k u w a i t
+f o r u m
+r e s e r v e
+m i l a n
+b l a s t
+a n n i v e r s a r y
+a t t e n d e d
+e n d i n g
+d e v e l o p e d
+c e r t a i n
+b e l o w
+f e l t
+p r o v i n c i a l
+c y p r u s
+c r i t i c i z e d
+o p p o r t u n i t y
+s m i t h
+p o l i t i c s
+s e l f
+h u m a n i t a r i a n
+r e a s o n
+l a w y e r s
+r e v e n u e
+d o c u m e n t s
+w r o t e
+q u e s t i o n s
+n o r w a y
+d o w
+p a n e l
+f e a r
+s e n t e n c e d
+b a n n e d
+c i v i l i a n
+c u l t u r a l
+p e r s o n n e l
+b e l g i u m
+a b u
+c a p a c i t y
+a m o u n t
+s e c u r i t i e s
+b l o o d
+s i g n i f i c a n t
+e x p e r i e n c e
+a s e a n
+h o u s i n g
+j o h n s o n
+p h o t o s
+r o y a l
+i m p o r t s
+a d d i t i o n a l
+y e l t s i n
+c d y
+h e a r d
+t h o m a s
+b a n k i n g
+l e a d s
+v i s i t e d
+f e a r s
+u g a n d a
+d r i v e r
+c o n t r o l l e d
+d e m a n d s
+i n s t i t u t i o n s
+a l i
+c h r i s t i a n
+s t o r m
+f o r e c a s t
+g r a f
+f i g h t e r s
+s t r e e t s
+r e s p e c t
+s p o t
+w e b
+m i s s e d
+s c i e n c e
+h e a d s
+h i t s
+m a s s i v e
+c u l t u r e
+c o u p l e
+v e n e z u e l a
+r e p o r t e d l y
+i n s u r a n c e
+s p r e a d
+s o l u t i o n
+p l a c e d
+s e r v e d
+f a c i l i t i e s
+s t r a t e g y
+t e c h n i c a l
+s t e p s
+d e e p
+h o p e d
+d e c i d e
+s a l e
+j a i l
+d i s c u s s e d
+s a v e
+n e p a l
+a r a b i a
+e n v o y
+a t t a c k e d
+w a y s
+r e c e i v e
+h a p p y
+h a l l
+g u i l t y
+p r a c t i c e
+l o v e
+e u r o s
+o p e r a t i n g
+c h a n g e d
+b o s t o n
+d e c a d e
+d e f i c i t
+p r o d u c t
+l i n e s
+p a t i e n t s
+f r i e n d s
+s y d n e y
+a c c o r d
+t i e d
+s p e e d
+w o r d s
+t i e
+s c o r e
+c o n d u c t e d
+c r i t i c i s m
+m u s l i m s
+b r o t h e r
+c l a s s
+r o m a n i a
+h e l p i n g
+f a s t
+h a p p e n e d
+d e f e n d i n g
+n a v y
+w i t n e s s e s
+f u l l y
+s u s p e c t
+i s l a n d s
+m a i n t a i n
+p r e s e n c e
+j a k a r t a
+p a c k a g e
+y a r d s
+g a i n
+a c c o u n t
+s q u a d
+s h a r o n
+w i n g
+a c t i o n s
+a t h e n s
+s t r a t e g i c
+s t r e n g t h e n
+f r i e n d l y
+d e s t r o y e d
+a p p a r e n t l y
+c o n s e r v a t i v e
+g a i n s
+f a i l u r e
+f u t u r e s
+s h o t s
+r e l a t i o n s h i p
+c o m m i s s i o n e r
+m a l a y s i a n
+r e q u i r e d
+a t l a n t a
+a g r e e
+d e f e a t
+s t r i k e r
+a d v a n c e d
+b r a z i l i a n
+a s s e t s
+h o u s e s
+s u p p l i e s
+s a f e
+m i l l i o n s
+s o u g h t
+f r e s h
+v i d e o
+p r o s e c u t o r
+p u l l e d
+v e h i c l e
+t o l l
+p a r e n t s
+c e a s e
+a c t i v i s t s
+o r g a n i z e d
+e n t e r e d
+s h i i t e
+l a n g u a g e
+a b b a s
+b i n
+p r e v i o u s l y
+c l o s i n g
+w o r k s
+t e r r o r i s t s
+t o n y
+c o v e r
+f o l l o w
+l e g i s l a t i v e
+r i c h
+c l a s h e s
+i m p o s e d
+r a n
+m c c a i n
+s u c c e s s f u l
+s e v e n t h
+s c o r i n g
+c a u g h t
+a p p o i n t e d
+a l l i e s
+a d m i t t e d
+w o r l d w i d e
+o r d e r s
+d e m a n d e d
+c r e a t e d
+r a n k e d
+m i l i t a n t
+i n v e s t i g a t o r s
+s h o w i n g
+p o s s i b i l i t y
+s e a t
+d a u g h t e r
+s i t e s
+s h o r t l y
+c o m m e r c e
+n e t a n y a h u
+a d v a n c e
+a i r l i n e
+f i r m s
+a b r o a d
+f o u n d a t i o n
+c o m m i t m e n t
+p l e d g e d
+k i l l
+r e p r e s e n t a t i v e
+n o r t h w e s t
+s c e n e
+b e a t i n g
+i m p r o v e d
+r e s u m e
+w h o m
+s l i g h t l y
+v o t i n g
+b o m b i n g s
+s e r i o u s l y
+s e t t i n g
+c a r l o s
+e f f e c t i v e
+h k
+r e g u l a r
+j i a n g
+p r i n c e
+d e c l i n e
+b a y
+n o r t h e a s t
+s o l d i e r
+r e a c h i n g
+a g r e e m e n t s
+m i k e
+h u r t
+c r i t i c a l
+i d e a
+m i l o s e v i c
+f i s c a l
+t a r g e t s
+a g r i c u l t u r a l
+m u s h a r r a f
+d e s i g n e d
+o v e r n i g h t
+b o y
+d o z e n
+p r o d u c e
+c a l m
+s t a n d a r d s
+l e g i s l a t i o n
+s e n t e n c e
+w i t h d r a w a l
+s e e d e d
+c o m p o s i t e
+t r a d e d
+w i n t e r
+d a v i s
+t r u s t
+c l i m a t e
+i n d u s t r i e s
+p r o f i t s
+v o t e d
+c a m b o d i a
+s y r i a n
+s i g n s
+l o a n
+s t e e l
+e l e c t r i c i t y
+t e h r a n
+c i t i n g
+h u s b a n d
+b i t
+c o m b a t
+h a n d e d
+f e s t i v a l
+i m f
+p r e s i d e n c y
+c a p t u r e d
+s t u d e n t
+f i n e
+s t a t i o n s
+s i l v e r
+c h a v e z
+i n t e r
+m o m e n t
+t a b l e
+c o u p
+p o p e
+p r o v i n c e s
+a h m e d
+b u i l d i n g s
+o u t p u t
+l i b e r a t i o n
+m o n e t a r y
+c l o s e r
+c o l l e g e
+f l u
+a d v a n t a g e
+a s s i s t a n t
+g o n e
+s e c r e t
+x
+c a t h o l i c
+n a m e s
+l i s t e d
+f i n a l l y
+c a n c e r
+p r o d u c e d
+m e a s u r e
+f l e d
+l a r g e l y
+d e f e a t e d
+c o n g o
+b a s i c
+j e a n
+l o s e
+p r i z e
+b a n g k o k
+a s k
+f r a n c i s c o
+r e g i s t e r e d
+d i s a s t e r
+g o l f
+i n d i v i d u a l
+c o n t i n u e s
+w t o
+i n i t i a l
+a n y o n e
+q u a k e
+f a c e d
+s c i e n t i s t s
+m o b i l e
+p o s i t i o n s
+f i e l d s
+r e c o v e r y
+m u s e u m
+n u m b e r s
+d e n m a r k
+m a n i l a
+h o l d s
+c e n t
+e x
+e s t a b l i s h
+w i d e l y
+o f f i c e s
+i n s i s t e d
+u n i t s
+k a s h m i r
+r e f e r e n d u m
+l o c a t e d
+u p o n
+a l l o w i n g
+s c a l e
+o p p o s e d
+w a t c h
+i n d i c a t e d
+p a r t n e r
+e a r t h q u a k e
+s c a n d a l
+e v e r y o n e
+a p p r o a c h
+t r u c k
+i m p o r t a n c e
+t h r e a t s
+p o r t u g a l
+s e x
+r e c o r d s
+s u p e r
+s t o o d
+c o n t a c t
+m a t e r i a l s
+v i o l e n t
+p l a c e s
+a n a l y s t
+a d d s
+a l o n e
+g o e s
+m o v i e
+e x p e c t s
+a r t
+s e o u l
+m e x i c a n
+y e s t e r d a y
+p l a n e s
+n i n t h
+o n l i n e
+h e l i c o p t e r
+i m m i g r a t i o n
+p a r t n e r s
+i n f r a s t r u c t u r e
+b o a t
+v i s i t s
+n o r m a l
+s t a k e
+g u e r r i l l a s
+m a c a o
+w i l l i n g
+s u n
+a w a r d
+t e l l
+s o u t h w e s t
+s p o r t
+e n t e r
+r e s o l v e
+c h a n c e s
+m i a m i
+e l
+e n t i r e
diff --git a/training/cllh_observer.cc b/training/crf/cllh_observer.cc
index 58232769..4ec2fa65 100644
--- a/training/cllh_observer.cc
+++ b/training/crf/cllh_observer.cc
@@ -45,7 +45,7 @@ void ConditionalLikelihoodObserver::NotifyAlignmentForest(const SentenceMetadata
     cerr << "DIFF. ERR! log_model_z < log_ref_z: " << cur_obj << " " << log_ref_z << endl;
     exit(1);
   }
-  assert(!isnan(log_ref_z));
+  assert(!std::isnan(log_ref_z));
   acc_obj += (cur_obj - log_ref_z);
   trg_words += smeta.GetReference().size();
 }
diff --git a/training/cllh_observer.h b/training/crf/cllh_observer.h
index 0de47331..0de47331 100644
--- a/training/cllh_observer.h
+++ b/training/crf/cllh_observer.h
diff --git a/training/mpi_batch_optimize.cc b/training/crf/mpi_batch_optimize.cc
index 6432f4a2..2eff07e4 100644
--- a/training/mpi_batch_optimize.cc
+++ b/training/crf/mpi_batch_optimize.cc
@@ -142,7 +142,7 @@ struct TrainingObserver : public DecoderObserver {
       cerr << "DIFF. ERR! log_model_z < log_ref_z: " << cur_obj << " " << log_ref_z << endl;
       exit(1);
     }
-    assert(!isnan(log_ref_z));
+    assert(!std::isnan(log_ref_z));
     ref_exp -= cur_model_exp;
     acc_grad -= ref_exp;
     acc_obj += (cur_obj - log_ref_z);
diff --git a/training/crf/mpi_baum_welch.cc b/training/crf/mpi_baum_welch.cc
new file mode 100644
index 00000000..d69b1769
--- /dev/null
+++ b/training/crf/mpi_baum_welch.cc
@@ -0,0 +1,316 @@
+#include <sstream>
+#include <iostream>
+#include <vector>
+#include <cassert>
+#include <cmath>
+
+#include "config.h"
+#ifdef HAVE_MPI
+#include <boost/mpi/timer.hpp>
+#include <boost/mpi.hpp>
+namespace mpi = boost::mpi;
+#endif
+
+#include <boost/unordered_map.hpp>
+#include <boost/functional/hash.hpp>
+#include <boost/shared_ptr.hpp>
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "sentence_metadata.h"
+#include "verbose.h"
+#include "hg.h"
+#include "prob.h"
+#include "inside_outside.h"
+#include "ff_register.h"
+#include "decoder.h"
+#include "filelib.h"
+#include "stringlib.h"
+#include "fdict.h"
+#include "weights.h"
+#include "sparse_vector.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("input_weights,w",po::value<string>(),"Input feature weights file")
+        ("iterations,n",po::value<unsigned>()->default_value(50), "Number of training iterations")
+        ("training_data,t",po::value<string>(),"Training data")
+        ("decoder_config,c",po::value<string>(),"Decoder configuration file");
+  po::options_description clo("Command line options");
+  clo.add_options()
+        ("config", po::value<string>(), "Configuration file")
+        ("help,h", "Print this help message and exit");
+  po::options_description dconfig_options, dcmdline_options;
+  dconfig_options.add(opts);
+  dcmdline_options.add(opts).add(clo);
+  
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  if (conf->count("config")) {
+    ifstream config((*conf)["config"].as<string>().c_str());
+    po::store(po::parse_config_file(config, dconfig_options), *conf);
+  }
+  po::notify(*conf);
+
+  if (conf->count("help") || !conf->count("input_weights") || !(conf->count("training_data")) || !conf->count("decoder_config")) {
+    cerr << dcmdline_options << endl;
+    return false;
+  }
+  return true;
+}
+
+void ReadTrainingCorpus(const string& fname, int rank, int size, vector<string>* c) {
+  ReadFile rf(fname);
+  istream& in = *rf.stream();
+  string line;
+  int lc = 0;
+  while(in) {
+    getline(in, line);
+    if (!in) break;
+    if (lc % size == rank) c->push_back(line);
+    ++lc;
+  }
+}
+
+static const double kMINUS_EPSILON = -1e-6;
+
+struct TrainingObserver : public DecoderObserver {
+  void Reset() {
+    acc_grad.clear();
+    acc_obj = 0;
+    total_complete = 0;
+    trg_words = 0;
+  } 
+
+  void SetLocalGradientAndObjective(vector<double>* g, double* o) const {
+    *o = acc_obj;
+    for (SparseVector<double>::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it)
+      (*g)[it->first] = it->second;
+  }
+
+  virtual void NotifyDecodingStart(const SentenceMetadata& smeta) {
+    state = 1;
+  }
+
+  // compute model expectations, denominator of objective
+  virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) {
+    assert(state == 1);
+    trg_words += smeta.GetSourceLength();
+    state = 2;
+    SparseVector<prob_t> exps;
+    const prob_t z = InsideOutside<prob_t,
+                                   EdgeProb,
+                                   SparseVector<prob_t>,
+                                   EdgeFeaturesAndProbWeightFunction>(*hg, &exps);
+    exps /= z;
+    for (SparseVector<prob_t>::iterator it = exps.begin(); it != exps.end(); ++it)
+      acc_grad.add_value(it->first, it->second.as_float());
+
+    acc_obj += log(z);
+  }
+
+  // compute "empirical" expectations, numerator of objective
+  virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) {
+    cerr << "Shouldn't get an alignment forest!\n";
+    abort();
+  }
+
+  virtual void NotifyDecodingComplete(const SentenceMetadata& smeta) {
+    ++total_complete;
+  }
+
+  int total_complete;
+  SparseVector<double> acc_grad;
+  double acc_obj;
+  unsigned trg_words;
+  int state;
+};
+
+void ReadConfig(const string& ini, vector<string>* out) {
+  ReadFile rf(ini);
+  istream& in = *rf.stream();
+  while(in) {
+    string line;
+    getline(in, line);
+    if (!in) continue;
+    out->push_back(line);
+  }
+}
+
+void StoreConfig(const vector<string>& cfg, istringstream* o) {
+  ostringstream os;
+  for (int i = 0; i < cfg.size(); ++i) { os << cfg[i] << endl; }
+  o->str(os.str());
+}
+
+#if 0
+template <typename T>
+struct VectorPlus : public binary_function<vector<T>, vector<T>, vector<T> >  {
+  vector<T> operator()(const vector<int>& a, const vector<int>& b) const {
+    assert(a.size() == b.size());
+    vector<T> v(a.size());
+    transform(a.begin(), a.end(), b.begin(), v.begin(), plus<T>()); 
+    return v;
+  } 
+}; 
+#endif
+
+int main(int argc, char** argv) {
+#ifdef HAVE_MPI
+  mpi::environment env(argc, argv);
+  mpi::communicator world;
+  const int size = world.size(); 
+  const int rank = world.rank();
+#else
+  const int size = 1;
+  const int rank = 0;
+#endif
+  SetSilent(true);  // turn off verbose decoder output
+  register_feature_functions();
+
+  po::variables_map conf;
+  if (!InitCommandLine(argc, argv, &conf)) return 1;
+  const unsigned iterations = conf["iterations"].as<unsigned>();
+
+  // load cdec.ini and set up decoder
+  vector<string> cdec_ini;
+  ReadConfig(conf["decoder_config"].as<string>(), &cdec_ini);
+  istringstream ini;
+  StoreConfig(cdec_ini, &ini);
+  Decoder* decoder = new Decoder(&ini);
+  if (decoder->GetConf()["input"].as<string>() != "-") {
+    cerr << "cdec.ini must not set an input file\n";
+    return 1;
+  }
+
+  // load initial weights
+  if (rank == 0) { cerr << "Loading weights...\n"; }
+  vector<weight_t>& lambdas = decoder->CurrentWeightVector();
+  Weights::InitFromFile(conf["input_weights"].as<string>(), &lambdas);
+  if (rank == 0) { cerr << "Done loading weights.\n"; }
+
+  // freeze feature set (should be optional?)
+  const bool freeze_feature_set = true;
+  if (freeze_feature_set) FD::Freeze();
+
+  const int num_feats = FD::NumFeats();
+  if (rank == 0) cerr << "Number of features: " << num_feats << endl;
+  lambdas.resize(num_feats);
+
+  vector<double> gradient(num_feats, 0.0);
+  vector<double> rcv_grad;
+  rcv_grad.clear();
+  bool converged = false;
+
+  vector<string> corpus, test_corpus;
+  ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus);
+  assert(corpus.size() > 0);
+  if (conf.count("test_data"))
+    ReadTrainingCorpus(conf["test_data"].as<string>(), rank, size, &test_corpus);
+
+  // build map from feature id to the accumulator that should normalize
+  boost::unordered_map<std::string, boost::unordered_map<int, double>, boost::hash<std::string> > ccs;
+  vector<boost::unordered_map<int, double>* > cpd_to_acc;
+  if (rank == 0) {
+    cpd_to_acc.resize(num_feats);
+    for (unsigned f = 1; f < num_feats; ++f) {
+      string normalizer;
+      //0 ||| 7 9 ||| Bi:BOS_7=1 Bi:7_9=1 Bi:9_EOS=1 Id:a:7=1 Uni:7=1 Id:b:9=1 Uni:9=1 ||| 0
+      const string& fstr = FD::Convert(f);
+      if (fstr.find("Bi:") == 0) {
+        size_t pos = fstr.rfind('_');
+        if (pos < fstr.size())
+          normalizer = fstr.substr(0, pos);
+      } else if (fstr.find("Id:") == 0) {
+        size_t pos = fstr.rfind(':');
+        if (pos < fstr.size()) {
+          normalizer = "Emit:";
+          normalizer += fstr.substr(pos);
+        }
+      }
+      if (normalizer.size() > 0) {
+        boost::unordered_map<int, double>& acc = ccs[normalizer];
+        cpd_to_acc[f] = &acc;
+      }
+    }
+  }
+
+  TrainingObserver observer;
+  int iteration = 0;
+  while (!converged) {
+    ++iteration;
+    observer.Reset();
+#ifdef HAVE_MPI
+    mpi::timer timer;
+    world.barrier();
+#endif
+    if (rank == 0) {
+      cerr << "Starting decoding... (~" << corpus.size() << " sentences / proc)\n";
+      cerr << "  Testset size: " << test_corpus.size() << " sentences / proc)\n";
+      for(boost::unordered_map<string, boost::unordered_map<int,double>, boost::hash<string> >::iterator it = ccs.begin(); it != ccs.end(); ++it)
+        it->second.clear();
+    }
+    for (int i = 0; i < corpus.size(); ++i)
+      decoder->Decode(corpus[i], &observer);
+    cerr << "  process " << rank << '/' << size << " done\n";
+    fill(gradient.begin(), gradient.end(), 0);
+    double objective = 0;
+    observer.SetLocalGradientAndObjective(&gradient, &objective);
+
+    unsigned total_words = 0;
+#ifdef HAVE_MPI
+    double to = 0;
+    rcv_grad.resize(num_feats, 0.0);
+    mpi::reduce(world, &gradient[0], gradient.size(), &rcv_grad[0], plus<double>(), 0);
+    swap(gradient, rcv_grad);
+    rcv_grad.clear();
+
+    reduce(world, observer.trg_words, total_words, std::plus<unsigned>(), 0);
+    mpi::reduce(world, objective, to, plus<double>(), 0);
+    objective = to;
+#else
+    total_words = observer.trg_words;
+#endif
+    if (rank == 0) {  // run optimizer only on rank=0 node
+      cerr << "TRAINING CORPUS: ln p(x)=" << objective << "\t log_2 p(x) = " << (objective/log(2)) << "\t cross entropy = " << (objective/log(2) / total_words) << "\t ppl = " << pow(2, (-objective/log(2) / total_words)) << endl;
+      for (unsigned f = 1; f < num_feats; ++f) {
+        boost::unordered_map<int, double>* m = cpd_to_acc[f];
+        if (m && gradient[f]) {
+          (*m)[f] += gradient[f];
+        }
+        for(boost::unordered_map<string, boost::unordered_map<int,double>, boost::hash<string> >::iterator it = ccs.begin(); it != ccs.end(); ++it) {
+          const boost::unordered_map<int,double>& ccs = it->second;
+          double z = 0;
+          for (boost::unordered_map<int,double>::const_iterator ci = ccs.begin(); ci != ccs.end(); ++ci)
+            z += ci->second + 1e-09;
+          double lz = log(z);
+          for (boost::unordered_map<int,double>::const_iterator ci = ccs.begin(); ci != ccs.end(); ++ci)
+            lambdas[ci->first] = log(ci->second + 1e-09) - lz;
+        }
+      }
+      Weights::SanityCheck(lambdas);
+      Weights::ShowLargestFeatures(lambdas);
+
+      converged = (iteration == iterations);
+
+      string fname = "weights.cur.gz";
+      if (converged) { fname = "weights.final.gz"; }
+      ostringstream vv;
+      vv << "Objective = " << objective << "  (eval count=" << iteration << ")";
+      const string svv = vv.str();
+      Weights::WriteToFile(fname, lambdas, true, &svv);
+    }  // rank == 0
+    int cint = converged;
+#ifdef HAVE_MPI
+    mpi::broadcast(world, &lambdas[0], lambdas.size(), 0);
+    mpi::broadcast(world, cint, 0);
+    if (rank == 0) { cerr << "  ELAPSED TIME THIS ITERATION=" << timer.elapsed() << endl; }
+#endif
+    converged = cint;
+  }
+  return 0;
+}
+
diff --git a/training/mpi_compute_cllh.cc b/training/crf/mpi_compute_cllh.cc
index 066389d0..066389d0 100644
--- a/training/mpi_compute_cllh.cc
+++ b/training/crf/mpi_compute_cllh.cc
diff --git a/training/mpi_extract_features.cc b/training/crf/mpi_extract_features.cc
index 6750aa15..6750aa15 100644
--- a/training/mpi_extract_features.cc
+++ b/training/crf/mpi_extract_features.cc
diff --git a/training/mpi_extract_reachable.cc b/training/crf/mpi_extract_reachable.cc
index 2a7c2b9d..2a7c2b9d 100644
--- a/training/mpi_extract_reachable.cc
+++ b/training/crf/mpi_extract_reachable.cc
diff --git a/training/mpi_flex_optimize.cc b/training/crf/mpi_flex_optimize.cc
index b52decdc..b52decdc 100644
--- a/training/mpi_flex_optimize.cc
+++ b/training/crf/mpi_flex_optimize.cc
diff --git a/training/mpi_online_optimize.cc b/training/crf/mpi_online_optimize.cc
index 993627f0..9e1ae34c 100644
--- a/training/mpi_online_optimize.cc
+++ b/training/crf/mpi_online_optimize.cc
@@ -5,6 +5,7 @@
 #include <cassert>
 #include <cmath>
 #include <tr1/memory>
+#include <ctime>
 
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
@@ -41,6 +42,7 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
         ("training_agenda,a",po::value<string>(), "Text file listing a series of configuration files and the number of iterations to train using each configuration successively")
         ("minibatch_size_per_proc,s", po::value<unsigned>()->default_value(5), "Number of training instances evaluated per processor in each minibatch")
         ("optimization_method,m", po::value<string>()->default_value("sgd"), "Optimization method (sgd)")
+        ("max_walltime", po::value<unsigned>(), "Maximum walltime to run (in minutes)")
         ("random_seed,S", po::value<uint32_t>(), "Random seed (if not specified, /dev/random will be used)")
         ("eta_0,e", po::value<double>()->default_value(0.2), "Initial learning rate for SGD (eta_0)")
         ("L1,1","Use L1 regularization")
@@ -143,7 +145,7 @@ struct TrainingObserver : public DecoderObserver {
       cerr << "DIFF. ERR! log_model_z < log_ref_z: " << cur_obj << " " << log_ref_z << endl;
       exit(1);
     }
-    assert(!isnan(log_ref_z));
+    assert(!std::isnan(log_ref_z));
     ref_exp -= cur_model_exp;
     acc_grad += ref_exp;
     acc_obj += (cur_obj - log_ref_z);
@@ -304,6 +306,9 @@ int main(int argc, char** argv) {
   int write_weights_every_ith = 100; // TODO configure
   int titer = -1;
 
+  unsigned timeout = 0;
+  if (conf.count("max_walltime")) timeout = 60 * conf["max_walltime"].as<unsigned>();
+  const time_t start_time = time(NULL);
   for (int ai = 0; ai < agenda.size(); ++ai) {
     const string& cur_config = agenda[ai].first;
     const unsigned max_iteration = agenda[ai].second;
@@ -330,15 +335,20 @@ int main(int argc, char** argv) {
       if (rank == 0) {
         converged = (iter == max_iteration);
         Weights::SanityCheck(lambdas);
-        Weights::ShowLargestFeatures(lambdas);
+        static int cc = 0; ++cc; if (cc > 1) { Weights::ShowLargestFeatures(lambdas); }
         string fname = "weights.cur.gz";
         if (iter % write_weights_every_ith == 0) {
           ostringstream o; o << "weights.epoch_" << (ai+1) << '.' << iter << ".gz";
           fname = o.str();
         }
+        const time_t cur_time = time(NULL);
+        if (timeout) {
+          if ((cur_time - start_time) > timeout) converged = true;
+        }
         if (converged && ((ai+1)==agenda.size())) { fname = "weights.final.gz"; }
         ostringstream vv;
-        vv << "total iter=" << titer << " (of current config iter=" << iter << ")  minibatch=" << size_per_proc << " sentences/proc x " << size << " procs.   num_feats=" << x.size() << '/' << FD::NumFeats() << "   passes_thru_data=" << (titer * size_per_proc / static_cast<double>(corpus.size())) << "   eta=" << lr->eta(titer);
+        double minutes = (cur_time - start_time) / 60.0;
+        vv << "total walltime=" << minutes << "min iter=" << titer << " (of current config iter=" << iter << ")  minibatch=" << size_per_proc << " sentences/proc x " << size << " procs.   num_feats=" << x.size() << '/' << FD::NumFeats() << "   passes_thru_data=" << (titer * size_per_proc / static_cast<double>(corpus.size())) << "   eta=" << lr->eta(titer);
         const string svv = vv.str();
         cerr << svv << endl;
         Weights::WriteToFile(fname, lambdas, true, &svv);
diff --git a/training/dep-reorder/conll2reordering-forest.pl b/training/dep-reorder/conll2reordering-forest.pl
deleted file mode 100755
index 3cd226be..00000000
--- a/training/dep-reorder/conll2reordering-forest.pl
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-my $script_dir; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
-my $FIRST_CONV = "$script_dir/scripts/conll2simplecfg.pl";
-my $CDEC = "$script_dir/../../decoder/cdec";
-
-our $tfile1 = "grammar1.$$";
-our $tfile2 = "text.$$";
-
-die "Usage: $0 parses.conll\n" unless scalar @ARGV == 1;
-open C, "<$ARGV[0]" or die "Can't read $ARGV[0]: $!";
-
-END { unlink $tfile1; unlink "$tfile1.cfg"; unlink $tfile2; }
-
-my $first = 1;
-open T, ">$tfile1" or die "Can't write $tfile1: $!";
-my $lc = 0;
-my $flag = 0;
-my @words = ();
-while(<C>) {
-  print T;
-  chomp;
-  if (/^$/) {
-    if ($first) { $first = undef; } else { if ($flag) { print "\n"; $flag = 0; } }
-    $first = undef;
-    close T;
-    open SO, ">$tfile2" or die "Can't write $tfile2: $!";
-    print SO "@words\n";
-    close SO;
-    @words=();
-    `$FIRST_CONV < $tfile1 > $tfile1.cfg`;
-    if ($? != 0) {
-      die "Error code: $?";
-    }
-    my $cfg = `$CDEC -n -S 10000 -f scfg -g $tfile1.cfg -i $tfile2 --show_cfg_search_space 2>/dev/null`;
-    if ($? != 0) {
-      die "Error code: $?";
-    }
-    my @rules = split /\n/, $cfg;
-    shift @rules; # get rid of output
-    for my $rule (@rules) {
-      my ($lhs, $f, $e, $feats) = split / \|\|\| /, $rule;
-      $f =~ s/,\d\]/\]/g;
-      $feats = 'TOP=1' unless $feats;
-      if ($lhs =~ /\[Goal_\d+\]/) { $lhs = '[S]'; }
-      print "$lhs ||| $f ||| $feats\n";
-      if ($e eq '[1] [2]') {
-        my ($a, $b) = split /\s+/, $f;
-        $feats =~ s/=1$//;
-        my ($x, $y) = split /_/, $feats;
-        print "$lhs ||| $b $a ||| ${y}_$x=1\n";
-      }
-      $flag = 1;
-    }
-    open T, ">$tfile1" or die "Can't write $tfile1: $!";
-    $lc = -1;
-  } else {
-    my ($ind, $word, @dmmy) = split /\s+/;
-    push @words, $word;
-  }
-  $lc++;
-}
-close T;
-
diff --git a/training/dep-reorder/george.conll b/training/dep-reorder/george.conll
deleted file mode 100644
index 7eebb360..00000000
--- a/training/dep-reorder/george.conll
+++ /dev/null
@@ -1,4 +0,0 @@
-1	George	_	GEORGE	_	_	2	X	_	_
-2	hates	_	HATES	_	_	0	X	_	_
-3	broccoli	_	BROC	_	_	2	X	_	_
-
diff --git a/training/dep-reorder/scripts/conll2simplecfg.pl b/training/dep-reorder/scripts/conll2simplecfg.pl
deleted file mode 100755
index b101347a..00000000
--- a/training/dep-reorder/scripts/conll2simplecfg.pl
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-# 1	在	_	10	_	_	4	X	_	_
-# 2	门厅	_	3	_	_	1	X	_	_
-# 3	下面	_	23	_	_	4	X	_	_
-# 4	。	_	45	_	_	0	X	_	_
-
-my @ldeps;
-my @rdeps;
-@ldeps=(); for (my $i =0; $i <1000; $i++) { push @ldeps, []; }
-@rdeps=(); for (my $i =0; $i <1000; $i++) { push @rdeps, []; }
-my $rootcat = 0;
-my @cats = ('S');
-my $len = 0;
-my @noposcats = ('S');
-while(<>) {
-  chomp;
-  if (/^\s*$/) {
-    write_cfg($len);
-    $len = 0;
-    @cats=('S');
-    @noposcats = ('S');
-    @ldeps=(); for (my $i =0; $i <1000; $i++) { push @ldeps, []; }
-    @rdeps=(); for (my $i =0; $i <1000; $i++) { push @rdeps, []; }
-    next;
-  }
-  $len++;
-  my ($pos, $word, $d1, $xcat, $d2, $d3, $headpos, $deptype) = split /\s+/;
-  my $cat = "C$xcat";
-  my $catpos = $cat . "_$pos";
-  push @cats, $catpos;
-  push @noposcats, $cat;
-  print "[$catpos] ||| $word ||| $word ||| Word=1\n";
-  if ($headpos == 0) { $rootcat = $pos; }
-  if ($pos < $headpos) {
-    push @{$ldeps[$headpos]}, $pos;
-  } else {
-    push @{$rdeps[$headpos]}, $pos;
-  }
-}
-
-sub write_cfg {
-  my $len = shift;
-  for (my $i = 1; $i <= $len; $i++) {
-    my @lds = @{$ldeps[$i]};
-    for my $ld (@lds) {
-      print "[$cats[$i]] ||| [$cats[$ld],1] [$cats[$i],2] ||| [1] [2] ||| $noposcats[$ld]_$noposcats[$i]=1\n";
-    }
-    my @rds = @{$rdeps[$i]};
-    for my $rd (@rds) {
-      print "[$cats[$i]] ||| [$cats[$i],1] [$cats[$rd],2] ||| [1] [2] ||| $noposcats[$i]_$noposcats[$rd]=1\n";
-    }
-  }
-  print "[S] ||| [$cats[$rootcat],1] ||| [1] ||| TOP=1\n";
-}
-
diff --git a/training/dpmert/Makefile.am b/training/dpmert/Makefile.am
new file mode 100644
index 00000000..b85bb275
--- /dev/null
+++ b/training/dpmert/Makefile.am
@@ -0,0 +1,27 @@
+bin_PROGRAMS = \
+  mr_dpmert_map \
+  mr_dpmert_reduce \
+  mr_dpmert_generate_mapper_input
+
+noinst_PROGRAMS = \
+  lo_test
+TESTS = lo_test
+
+mr_dpmert_generate_mapper_input_SOURCES = mr_dpmert_generate_mapper_input.cc line_optimizer.cc
+mr_dpmert_generate_mapper_input_LDADD = ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a
+
+# nbest2hg_SOURCES = nbest2hg.cc
+# nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst
+
+mr_dpmert_map_SOURCES = mert_geometry.cc ces.cc error_surface.cc mr_dpmert_map.cc line_optimizer.cc ces.h error_surface.h line_optimizer.h mert_geometry.h
+mr_dpmert_map_LDADD = ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a
+
+mr_dpmert_reduce_SOURCES = error_surface.cc ces.cc mr_dpmert_reduce.cc line_optimizer.cc mert_geometry.cc ces.h error_surface.h line_optimizer.h mert_geometry.h
+mr_dpmert_reduce_LDADD = ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a
+
+lo_test_SOURCES = lo_test.cc ces.cc mert_geometry.cc error_surface.cc line_optimizer.cc ces.h error_surface.h line_optimizer.h mert_geometry.h
+lo_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a
+
+EXTRA_DIST = test_data dpmert.pl
+
+AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/training/dpmert/test_data\" -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/training/dpmert/ces.cc b/training/dpmert/ces.cc
new file mode 100644
index 00000000..157b2d17
--- /dev/null
+++ b/training/dpmert/ces.cc
@@ -0,0 +1,90 @@
+#include "ces.h"
+
+#include <vector>
+#include <sstream>
+#include <boost/shared_ptr.hpp>
+
+// TODO, if AER is to be optimized again, we will need this
+// #include "aligner.h"
+#include "lattice.h"
+#include "mert_geometry.h"
+#include "error_surface.h"
+#include "ns.h"
+
+using namespace std;
+
+const bool minimize_segments = true;    // if adjacent segments have equal scores, merge them
+
+void ComputeErrorSurface(const SegmentEvaluator& ss,
+                         const ConvexHull& ve,
+                         ErrorSurface* env,
+                         const EvaluationMetric* metric,
+                         const Hypergraph& hg) {
+  vector<WordID> prev_trans;
+  const vector<boost::shared_ptr<MERTPoint> >& ienv = ve.GetSortedSegs();
+  env->resize(ienv.size());
+  SufficientStats prev_score; // defaults to 0
+  int j = 0;
+  for (unsigned i = 0; i < ienv.size(); ++i) {
+    const MERTPoint& seg = *ienv[i];
+    vector<WordID> trans;
+#if 0
+    if (type == AER) {
+      vector<bool> edges(hg.edges_.size(), false);
+      seg.CollectEdgesUsed(&edges);  // get the set of edges in the viterbi
+                                     // alignment
+      ostringstream os;
+      const string* psrc = ss.GetSource();
+      if (psrc == NULL) {
+        cerr << "AER scoring in VEST requires source, but it is missing!\n";
+        abort();
+      }
+      size_t pos = psrc->rfind(" ||| ");
+      if (pos == string::npos) {
+        cerr << "Malformed source for AER: expected |||\nINPUT: " << *psrc << endl;
+        abort();
+      }
+      Lattice src;
+      Lattice ref;
+      LatticeTools::ConvertTextOrPLF(psrc->substr(0, pos), &src);
+      LatticeTools::ConvertTextOrPLF(psrc->substr(pos + 5), &ref);
+      AlignerTools::WriteAlignment(src, ref, hg, &os, true, 0, &edges);
+      string tstr = os.str();
+      TD::ConvertSentence(tstr.substr(tstr.rfind(" ||| ") + 5), &trans);
+    } else {
+#endif
+      seg.ConstructTranslation(&trans);
+    //}
+    //cerr << "Scoring: " << TD::GetString(trans) << endl;
+    if (trans == prev_trans) {
+      if (!minimize_segments) {
+        ErrorSegment& out = (*env)[j];
+        out.delta.fields.clear();
+        out.x = seg.x;
+	++j;
+      }
+      //cerr << "Identical translation, skipping scoring\n";
+    } else {
+      SufficientStats score;
+      ss.Evaluate(trans, &score);
+      // cerr << "score= " << score->ComputeScore() << "\n";
+      //string x1; score.Encode(&x1); cerr << "STATS: " << x1 << endl;
+      const SufficientStats delta = score - prev_score;
+      //string x2; delta.Encode(&x2); cerr << "DELTA: " << x2 << endl;
+      //string xx; delta.Encode(&xx); cerr << xx << endl;
+      prev_trans.swap(trans);
+      prev_score = score;
+      if ((!minimize_segments) || (!delta.IsAdditiveIdentity())) {
+        ErrorSegment& out = (*env)[j];
+        out.delta = delta;
+        out.x = seg.x;
+        ++j;
+      }
+    }
+  }
+  // cerr << " In segments: " << ienv.size() << endl;
+  // cerr << "Out segments: " << j << endl;
+  assert(j > 0);
+  env->resize(j);
+}
+
diff --git a/training/dpmert/ces.h b/training/dpmert/ces.h
new file mode 100644
index 00000000..e4fa2080
--- /dev/null
+++ b/training/dpmert/ces.h
@@ -0,0 +1,16 @@
+#ifndef _CES_H_
+#define _CES_H_
+
+class ConvexHull;
+class Hypergraph;
+class SegmentEvaluator;
+class ErrorSurface;
+class EvaluationMetric;
+
+void ComputeErrorSurface(const SegmentEvaluator& ss,
+                         const ConvexHull& convex_hull,
+                         ErrorSurface* es,
+                         const EvaluationMetric* metric,
+                         const Hypergraph& hg);
+
+#endif
diff --git a/training/dpmert/divide_refs.py b/training/dpmert/divide_refs.py
new file mode 100755
index 00000000..b478f918
--- /dev/null
+++ b/training/dpmert/divide_refs.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python
+import sys
+
+(numRefs, outPrefix) = sys.argv[1:]
+numRefs = int(numRefs)
+
+outs = [open(outPrefix+str(i), "w") for i in range(numRefs)]
+
+i = 0
+for line in sys.stdin:
+  outs[i].write(line)
+  i = (i + 1) % numRefs
+
+for out in outs:
+  out.close()
diff --git a/training/dpmert/dpmert.pl b/training/dpmert/dpmert.pl
new file mode 100755
index 00000000..559420f5
--- /dev/null
+++ b/training/dpmert/dpmert.pl
@@ -0,0 +1,618 @@
+#!/usr/bin/env perl
+use strict;
+my @ORIG_ARGV=@ARGV;
+use Cwd qw(getcwd);
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment", "$SCRIPT_DIR/../utils"; }
+
+# Skip local config (used for distributing jobs) if we're running in local-only mode
+use LocalConfig;
+use Getopt::Long;
+use File::Basename qw(basename);
+require "libcall.pl";
+
+my $QSUB_CMD = qsub_args(mert_memory());
+
+# Default settings
+my $srcFile;  # deprecated
+my $refFiles; # deprecated
+my $default_jobs = env_default_jobs();
+my $bin_dir = $SCRIPT_DIR;
+my $util_dir = "$SCRIPT_DIR/../utils";
+die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
+my $FAST_SCORE="$bin_dir/../../mteval/fast_score";
+die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
+my $MAPINPUT = "$bin_dir/mr_dpmert_generate_mapper_input";
+my $MAPPER = "$bin_dir/mr_dpmert_map";
+my $REDUCER = "$bin_dir/mr_dpmert_reduce";
+my $parallelize = "$util_dir/parallelize.pl";
+my $libcall = "$util_dir/libcall.pl";
+my $sentserver = "$util_dir/sentserver";
+my $sentclient = "$util_dir/sentclient";
+my $LocalConfig = "$SCRIPT_DIR/../../environment/LocalConfig.pm";
+
+my $SCORER = $FAST_SCORE;
+die "Can't find $MAPPER" unless -x $MAPPER;
+my $cdec = "$bin_dir/../../decoder/cdec";
+die "Can't find decoder in $cdec" unless -x $cdec;
+die "Can't find $parallelize" unless -x $parallelize;
+die "Can't find $libcall" unless -e $libcall;
+my $decoder = $cdec;
+my $lines_per_mapper = 200;
+my $rand_directions = 15;
+my $iteration = 1;
+my $best_weights;
+my $max_iterations = 15;
+my $optimization_iters = 6;
+my $jobs = $default_jobs;   # number of decode nodes
+my $pmem = "9g";
+my $disable_clean = 0;
+my %seen_weights;
+my $help = 0;
+my $epsilon = 0.0001;
+my $last_score = -10000000;
+my $metric = "ibm_bleu";
+my $dir;
+my $iniFile;
+my $weights;
+my $initialWeights;
+my $bleu_weight=1;
+my $use_make = 1;  # use make to parallelize line search
+my $useqsub;
+my $pass_suffix = '';
+my $devset;
+# Process command-line options
+if (GetOptions(
+	"config=s" => \$iniFile,
+	"weights=s" => \$initialWeights,
+        "devset=s" => \$devset,
+	"jobs=i" => \$jobs,
+	"pass-suffix=s" => \$pass_suffix,
+	"help" => \$help,
+	"qsub" => \$useqsub,
+	"iterations=i" => \$max_iterations,
+	"pmem=s" => \$pmem,
+	"random-directions=i" => \$rand_directions,
+	"metric=s" => \$metric,
+	"source-file=s" => \$srcFile,
+	"output-dir=s" => \$dir,
+) == 0 || @ARGV!=0 || $help) {
+	print_help();
+	exit;
+}
+
+if ($useqsub) {
+  $use_make = 0;
+  die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub();
+}
+
+my @missing_args = ();
+if (defined $srcFile || defined $refFiles) {
+  die <<EOT;
+
+  The options --ref-files and --source-file are no longer supported.
+  Please specify the input file and its reference translations with
+  --devset FILE
+
+EOT
+}
+
+if (!defined $iniFile) { push @missing_args, "--config"; }
+if (!defined $devset) { push @missing_args, "--devset"; }
+if (!defined $initialWeights) { push @missing_args, "--weights"; }
+die "Please specify missing arguments: " . join (', ', @missing_args) . "\nUse --help for more information.\n" if (@missing_args);
+
+if ($metric =~ /^(combi|ter)$/i) {
+  $lines_per_mapper = 40;
+} elsif ($metric =~ /^meteor$/i) {
+  $lines_per_mapper = 2000;   # start up time is really high for METEOR
+}
+
+
+my $nodelist;
+my $host =check_output("hostname"); chomp $host;
+my $bleu;
+my $interval_count = 0;
+my $logfile;
+my $projected_score;
+
+# used in sorting scores
+my $DIR_FLAG = '-r';
+if ($metric =~ /^ter$|^aer$/i) {
+  $DIR_FLAG = '';
+}
+
+unless ($dir){
+	$dir = "dpmert";
+}
+unless ($dir =~ /^\//){  # convert relative path to absolute path
+	my $basedir = check_output("pwd");
+	chomp $basedir;
+	$dir = "$basedir/$dir";
+}
+
+
+# Initializations and helper functions
+srand;
+
+my @childpids = ();
+my @cleanupcmds = ();
+
+sub cleanup {
+	print STDERR "Cleanup...\n";
+	for my $pid (@childpids){ unchecked_call("kill $pid"); }
+	for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); }
+	exit 1;
+};
+# Always call cleanup, no matter how we exit
+*CORE::GLOBAL::exit = sub{ cleanup(); }; 
+$SIG{INT} = "cleanup";
+$SIG{TERM} = "cleanup";
+$SIG{HUP} = "cleanup";
+
+my $decoderBase = basename($decoder); chomp $decoderBase;
+my $newIniFile = "$dir/$decoderBase.ini";
+my $inputFileName = "$dir/input";
+my $user = $ENV{"USER"};
+
+# process ini file
+-e $iniFile || die "Error: could not open $iniFile for reading\n";
+
+sub dirsize {
+    opendir ISEMPTY,$_[0];
+    return scalar(readdir(ISEMPTY))-1;
+}
+if (-e $dir) {
+	# allow preexisting logfile, binaries, but not dist-dpmert.pl outputs
+	die "ERROR: output directory $dir already exists (remove or use --output-dir dir)\n\n";
+} else {
+	mkdir "$dir" or die "Can't mkdir $dir: $!";
+	mkdir "$dir/hgs" or die;
+	mkdir "$dir/scripts" or die;
+	print STDERR <<EOT;
+	DECODER:          $decoder
+	INI FILE:         $iniFile
+	WORKING DIR:      $dir
+	DEVSET:           $devset
+	EVAL METRIC:      $metric
+	MAX ITERATIONS:   $max_iterations
+	PARALLEL JOBS:    $jobs
+	HEAD NODE:        $host
+	PMEM (DECODING):  $pmem
+	INITIAL WEIGHTS:  $initialWeights
+EOT
+}
+
+# Generate initial files and values
+check_call("cp $iniFile $newIniFile");
+check_call("cp $initialWeights $dir/weights.0");
+$iniFile = $newIniFile;
+
+split_devset($devset, "$dir/dev.input.raw", "$dir/dev.refs");
+my $refs = "-r $dir/dev.refs";
+my $newsrc = "$dir/dev.input";
+enseg("$dir/dev.input.raw", $newsrc);
+$srcFile = $newsrc;
+my $devSize = 0;
+open F, "<$srcFile" or die "Can't read $srcFile: $!";
+while(<F>) { $devSize++; }
+close F;
+
+unless($best_weights){ $best_weights = $weights; }
+unless($projected_score){ $projected_score = 0.0; }
+$seen_weights{$weights} = 1;
+
+my $random_seed = int(time / 1000);
+my $lastWeightsFile;
+my $lastPScore = 0;
+# main optimization loop
+while (1){
+	print STDERR "\n\nITERATION $iteration\n==========\n";
+
+	if ($iteration > $max_iterations){
+		print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n";
+		last;
+	}
+	# iteration-specific files
+	my $runFile="$dir/run.raw.$iteration";
+	my $onebestFile="$dir/1best.$iteration";
+	my $logdir="$dir/logs.$iteration";
+	my $decoderLog="$logdir/decoder.sentserver.log.$iteration";
+	my $scorerLog="$logdir/scorer.log.$iteration";
+	check_call("mkdir -p $logdir");
+
+
+	#decode
+	print STDERR "RUNNING DECODER AT ";
+	print STDERR unchecked_output("date");
+	my $im1 = $iteration - 1;
+	my $weightsFile="$dir/weights.$im1";
+	my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs";
+	my $pcmd;
+	if ($use_make) {
+		$pcmd = "cat $srcFile | $parallelize --workdir $dir --use-fork -p $pmem -e $logdir -j $jobs --";
+	} else {
+		$pcmd = "cat $srcFile | $parallelize --workdir $dir -p $pmem -e $logdir -j $jobs --";
+	}
+	my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile";
+	print STDERR "COMMAND:\n$cmd\n";
+	check_bash_call($cmd);
+        my $num_hgs;
+        my $num_topbest;
+        my $retries = 0;
+	while($retries < 5) {
+	    $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l");
+	    $num_topbest = check_output("wc -l < $runFile");
+	    print STDERR "NUMBER OF HGs: $num_hgs\n";
+	    print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n";
+	    if($devSize == $num_hgs && $devSize == $num_topbest) {
+		last;
+	    } else {
+		print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n";
+		sleep(3);
+	    }
+	    $retries++;
+	}
+	die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest);
+	my $dec_score = check_output("cat $runFile | $SCORER $refs -m $metric");
+	chomp $dec_score;
+	print STDERR "DECODER SCORE: $dec_score\n";
+
+	# save space
+	check_call("gzip -f $runFile");
+	check_call("gzip -f $decoderLog");
+
+	# run optimizer
+	print STDERR "RUNNING OPTIMIZER AT ";
+	print STDERR unchecked_output("date");
+	my $mergeLog="$logdir/prune-merge.log.$iteration";
+
+	my $score = 0;
+	my $icc = 0;
+	my $inweights="$dir/weights.$im1";
+	for (my $opt_iter=1; $opt_iter<$optimization_iters; $opt_iter++) {
+		print STDERR "\nGENERATE OPTIMIZATION STRATEGY (OPT-ITERATION $opt_iter/$optimization_iters)\n";
+		print STDERR unchecked_output("date");
+		$icc++;
+		$cmd="$MAPINPUT -w $inweights -r $dir/hgs -s $devSize -d $rand_directions > $dir/agenda.$im1-$opt_iter";
+		print STDERR "COMMAND:\n$cmd\n";
+		check_call($cmd);
+		check_call("mkdir -p $dir/splag.$im1");
+		$cmd="split -a 3 -l $lines_per_mapper $dir/agenda.$im1-$opt_iter $dir/splag.$im1/mapinput.";
+		print STDERR "COMMAND:\n$cmd\n";
+		check_call($cmd);
+		opendir(DIR, "$dir/splag.$im1") or die "Can't open directory: $!";
+		my @shards = grep { /^mapinput\./ } readdir(DIR);
+		closedir DIR;
+		die "No shards!" unless scalar @shards > 0;
+		my $joblist = "";
+		my $nmappers = 0;
+		my @mapoutputs = ();
+		@cleanupcmds = ();
+		my %o2i = ();
+		my $first_shard = 1;
+		my $mkfile; # only used with makefiles
+		my $mkfilename;
+		if ($use_make) {
+			$mkfilename = "$dir/splag.$im1/domap.mk";
+			open $mkfile, ">$mkfilename" or die "Couldn't write $mkfilename: $!";
+			print $mkfile "all: $dir/splag.$im1/map.done\n\n";
+		}
+		my @mkouts = ();  # only used with makefiles
+		for my $shard (@shards) {
+			my $mapoutput = $shard;
+			my $client_name = $shard;
+			$client_name =~ s/mapinput.//;
+			$client_name = "dpmert.$client_name";
+			$mapoutput =~ s/mapinput/mapoutput/;
+			push @mapoutputs, "$dir/splag.$im1/$mapoutput";
+			$o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard";
+			my $script = "$MAPPER -s $srcFile -m $metric $refs < $dir/splag.$im1/$shard | sort -t \$'\\t' -k 1 > $dir/splag.$im1/$mapoutput";
+			if ($use_make) {
+				my $script_file = "$dir/scripts/map.$shard";
+				open F, ">$script_file" or die "Can't write $script_file: $!";
+				print F "#!/bin/bash\n";
+				print F "$script\n";
+				close F;
+				my $output = "$dir/splag.$im1/$mapoutput";
+				push @mkouts, $output;
+				chmod(0755, $script_file) or die "Can't chmod $script_file: $!";
+				if ($first_shard) { print STDERR "$script\n"; $first_shard=0; }
+				print $mkfile "$output: $dir/splag.$im1/$shard\n\t$script_file\n\n";
+			} else {
+				my $script_file = "$dir/scripts/map.$shard";
+				open F, ">$script_file" or die "Can't write $script_file: $!";
+				print F "$script\n";
+				close F;
+				if ($first_shard) { print STDERR "$script\n"; $first_shard=0; }
+
+				$nmappers++;
+				my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file";
+				my $jobid = check_output("$qcmd");
+				chomp $jobid;
+				$jobid =~ s/^(\d+)(.*?)$/\1/g;
+				$jobid =~ s/^Your job (\d+) .*$/\1/;
+		 	 	push(@cleanupcmds, "qdel $jobid 2> /dev/null");
+				print STDERR " $jobid";
+				if ($joblist == "") { $joblist = $jobid; }
+				else {$joblist = $joblist . "\|" . $jobid; }
+			}
+		}
+		if ($use_make) {
+			print $mkfile "$dir/splag.$im1/map.done: @mkouts\n\ttouch $dir/splag.$im1/map.done\n\n";
+			close $mkfile;
+			my $mcmd = "make -j $jobs -f $mkfilename";
+			print STDERR "\nExecuting: $mcmd\n";
+			check_call($mcmd);
+		} else {
+			print STDERR "\nLaunched $nmappers mappers.\n";
+      			sleep 8;
+			print STDERR "Waiting for mappers to complete...\n";
+			while ($nmappers > 0) {
+			  sleep 5;
+			  my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat | grep -v ' C '")));
+			  $nmappers = scalar @livejobs;
+			}
+			print STDERR "All mappers complete.\n";
+		}
+		my $tol = 0;
+		my $til = 0;
+		for my $mo (@mapoutputs) {
+		  my $olines = get_lines($mo);
+		  my $ilines = get_lines($o2i{$mo});
+		  $tol += $olines;
+		  $til += $ilines;
+		  die "$mo: output lines ($olines) doesn't match input lines ($ilines)" unless $olines==$ilines;
+		}
+		print STDERR "Results for $tol/$til lines\n";
+		print STDERR "\nSORTING AND RUNNING VEST REDUCER\n";
+		print STDERR unchecked_output("date");
+		$cmd="sort -t \$'\\t' -k 1 @mapoutputs | $REDUCER -m $metric > $dir/redoutput.$im1";
+		print STDERR "COMMAND:\n$cmd\n";
+		check_bash_call($cmd);
+		$cmd="sort -nk3 $DIR_FLAG '-t|' $dir/redoutput.$im1 | head -1";
+		# sort returns failure even when it doesn't fail for some reason
+		my $best=unchecked_output("$cmd"); chomp $best;
+		print STDERR "$best\n";
+		my ($oa, $x, $xscore) = split /\|/, $best;
+		$score = $xscore;
+		print STDERR "PROJECTED SCORE: $score\n";
+		if (abs($x) < $epsilon) {
+			print STDERR "\nOPTIMIZER: no score improvement: abs($x) < $epsilon\n";
+			last;
+		}
+                my $psd = $score - $last_score;
+                $last_score = $score;
+		if (abs($psd) < $epsilon) {
+			print STDERR "\nOPTIMIZER: no score improvement: abs($psd) < $epsilon\n";
+			last;
+		}
+		my ($origin, $axis) = split /\s+/, $oa;
+
+		my %ori = convert($origin);
+		my %axi = convert($axis);
+
+		my $finalFile="$dir/weights.$im1-$opt_iter";
+		open W, ">$finalFile" or die "Can't write: $finalFile: $!";
+                my $norm = 0;
+		for my $k (sort keys %ori) {
+			my $dd = $ori{$k} + $axi{$k} * $x;
+                        $norm += $dd * $dd;
+		}
+                $norm = sqrt($norm);
+		$norm = 1;
+		for my $k (sort keys %ori) {
+			my $v = ($ori{$k} + $axi{$k} * $x) / $norm;
+			print W "$k $v\n";
+		}
+		check_call("rm $dir/splag.$im1/*");
+		$inweights = $finalFile;
+	}
+	$lastWeightsFile = "$dir/weights.$iteration";
+	check_call("cp $inweights $lastWeightsFile");
+	if ($icc < 2) {
+		print STDERR "\nREACHED STOPPING CRITERION: score change too little\n";
+		last;
+	}
+	$lastPScore = $score;
+	$iteration++;
+	print STDERR "\n==========\n";
+}
+
+check_call("cp $lastWeightsFile $dir/weights.final");
+print STDERR "\nFINAL WEIGHTS: $dir/weights.final\n(Use -w <this file> with the decoder)\n\n";
+print STDOUT "$dir/weights.final\n";
+exit 0;
+
+
+sub get_lines {
+  my $fn = shift @_;
+  open FL, "<$fn" or die "Couldn't read $fn: $!";
+  my $lc = 0;
+  while(<FL>) { $lc++; }
+  return $lc;
+}
+
+sub read_weights_file {
+  my ($file) = @_;
+  open F, "<$file" or die "Couldn't read $file: $!";
+  my @r = ();
+  my $pm = -1;
+  while(<F>) {
+    next if /^#/;
+    next if /^\s*$/;
+    chomp;
+    if (/^(.+)\s+(.+)$/) {
+      my $m = $1;
+      my $w = $2;
+      die "Weights out of order: $m <= $pm" unless $m > $pm;
+      push @r, $w;
+    } else {
+      warn "Unexpected feature name in weight file: $_";
+    }
+  }
+  close F;
+  return join ' ', @r;
+}
+
+sub update_weights_file {
+  my ($neww, $rfn, $rpts) = @_;
+  my @feats = @$rfn;
+  my @pts = @$rpts;
+  my $num_feats = scalar @feats;
+  my $num_pts = scalar @pts;
+  die "$num_feats (num_feats) != $num_pts (num_pts)" unless $num_feats == $num_pts;
+  open G, ">$neww" or die;
+  for (my $i = 0; $i < $num_feats; $i++) {
+    my $f = $feats[$i];
+    my $lambda = $pts[$i];
+    print G "$f $lambda\n";
+  }
+  close G;
+}
+
+sub enseg {
+	my $src = shift;
+	my $newsrc = shift;
+	open(SRC, $src);
+	open(NEWSRC, ">$newsrc");
+	my $i=0;
+	while (my $line=<SRC>){
+		chomp $line;
+		if ($line =~ /^\s*<seg/i) {
+		    if($line =~ /id="[0-9]+"/) {
+			print NEWSRC "$line\n";
+		    } else {
+			die "When using segments with pre-generated <seg> tags, you must include a zero-based id attribute";
+		    }
+		} else {
+			print NEWSRC "<seg id=\"$i\">$line</seg>\n";
+		}
+		$i++;
+	}
+	close SRC;
+	close NEWSRC;
+}
+
+sub print_help {
+
+	my $executable = basename($0); chomp $executable;
+	print << "Help";
+
+Usage: $executable [options] <ini file>
+
+	$executable [options]
+		Runs a complete MERT optimization. Required options are --weights,
+		--devset, and --config.
+
+Options:
+
+	--config <file>   [-c <file>]
+		The decoder configuration file.
+
+	--devset <file>   [-d <file>]
+		The source *and* references for the development set.
+
+	--weights <file>  [-w <file>]
+		A file specifying initial feature weights.  The format is
+		FeatureName_1 value1
+		FeatureName_2 value2
+		**All and only the weights listed in <file> will be optimized!**
+
+	--metric <name>
+		Metric to optimize.
+		Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi
+
+	--iterations <M>
+		Maximum number of iterations to run.  If not specified, defaults
+		to 10.
+
+	--pass-suffix <S>
+		If the decoder is doing multi-pass decoding, the pass suffix "2",
+		"3", etc., is used to control what iteration of weights is set.
+
+	--rand-directions <num>
+		MERT will attempt to optimize along all of the principle directions,
+		set this parameter to explore other directions. Defaults to 5.
+
+	--output-dir <dir>
+		Directory for intermediate and output files.
+
+	--help
+		Print this message and exit.
+
+Job control options:
+
+	--jobs <I>
+		Number of decoder processes to run in parallel. [default=$default_jobs]
+
+	--qsub
+		Use qsub to run jobs in parallel (qsub must be configured in
+		environment/LocalEnvironment.pm)
+
+	--pmem <N>
+		Amount of physical memory requested for parallel decoding jobs
+		(used with qsub requests only)
+
+Help
+}
+
+sub convert {
+  my ($str) = @_;
+  my @ps = split /;/, $str;
+  my %dict = ();
+  for my $p (@ps) {
+    my ($k, $v) = split /=/, $p;
+    $dict{$k} = $v;
+  }
+  return %dict;
+}
+
+
+
+sub cmdline {
+    return join ' ',($0,@ORIG_ARGV);
+}
+
+#buggy: last arg gets quoted sometimes?
+my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]};
+my $shell_escape_in_quote=qr{[\\"\$`!]};
+
+sub escape_shell {
+    my ($arg)=@_;
+    return undef unless defined $arg;
+    if ($arg =~ /$is_shell_special/) {
+        $arg =~ s/($shell_escape_in_quote)/\\$1/g;
+        return "\"$arg\"";
+    }
+    return $arg;
+}
+
+sub escaped_shell_args {
+    return map {local $_=$_;chomp;escape_shell($_)} @_;
+}
+
+sub escaped_shell_args_str {
+    return join ' ',&escaped_shell_args(@_);
+}
+
+sub escaped_cmdline {
+    return "$0 ".&escaped_shell_args_str(@ORIG_ARGV);
+}
+
+sub split_devset {
+  my ($infile, $outsrc, $outref) = @_;
+  open F, "<$infile" or die "Can't read $infile: $!";
+  open S, ">$outsrc" or die "Can't write $outsrc: $!";
+  open R, ">$outref" or die "Can't write $outref: $!";
+  while(<F>) {
+    chomp;
+    my ($src, @refs) = split /\s*\|\|\|\s*/;
+    die "Malformed devset line: $_\n" unless scalar @refs > 0;
+    print S "$src\n";
+    print R join(' ||| ', @refs) . "\n";
+  }
+  close R;
+  close S;
+  close F;
+}
+
diff --git a/training/dpmert/error_surface.cc b/training/dpmert/error_surface.cc
new file mode 100644
index 00000000..515b67f8
--- /dev/null
+++ b/training/dpmert/error_surface.cc
@@ -0,0 +1,42 @@
+#include "error_surface.h"
+
+#include <cassert>
+#include <sstream>
+
+using namespace std;
+
+ErrorSurface::~ErrorSurface() {}
+
+void ErrorSurface::Serialize(std::string* out) const {
+  const int segments = this->size();
+  ostringstream os(ios::binary);
+  os.write((const char*)&segments,sizeof(segments));
+  for (int i = 0; i < segments; ++i) {
+    const ErrorSegment& cur = (*this)[i];
+    string senc;
+    cur.delta.Encode(&senc);
+    assert(senc.size() < 1024);
+    unsigned char len = senc.size();
+    os.write((const char*)&cur.x, sizeof(cur.x));
+    os.write((const char*)&len, sizeof(len));
+    os.write((const char*)&senc[0], len);
+  }
+  *out = os.str();
+}
+
+void ErrorSurface::Deserialize(const std::string& in) {
+  istringstream is(in, ios::binary);
+  int segments;
+  is.read((char*)&segments, sizeof(segments));
+  this->resize(segments);
+  for (int i = 0; i < segments; ++i) {
+    ErrorSegment& cur = (*this)[i];
+    unsigned char len;
+    is.read((char*)&cur.x, sizeof(cur.x));
+    is.read((char*)&len, sizeof(len));
+    string senc(len, '\0'); assert(senc.size() == len);
+    is.read((char*)&senc[0], len);
+    cur.delta = SufficientStats(senc);
+  }
+}
+
diff --git a/training/dpmert/error_surface.h b/training/dpmert/error_surface.h
new file mode 100644
index 00000000..bb65847b
--- /dev/null
+++ b/training/dpmert/error_surface.h
@@ -0,0 +1,24 @@
+#ifndef _ERROR_SURFACE_H_
+#define _ERROR_SURFACE_H_
+
+#include <vector>
+#include <string>
+
+#include "ns.h"
+
+class Score;
+
+struct ErrorSegment {
+  double x;
+  SufficientStats delta;
+  ErrorSegment() : x(0), delta() {}
+};
+
+class ErrorSurface : public std::vector<ErrorSegment> {
+ public:
+  ~ErrorSurface();
+  void Serialize(std::string* out) const;
+  void Deserialize(const std::string& in);
+};
+
+#endif
diff --git a/training/dpmert/line_mediator.pl b/training/dpmert/line_mediator.pl
new file mode 100755
index 00000000..bc2bb24c
--- /dev/null
+++ b/training/dpmert/line_mediator.pl
@@ -0,0 +1,116 @@
+#!/usr/bin/perl -w
+#hooks up two processes, 2nd of which has one line of output per line of input, expected by the first, which starts off the communication
+
+# if you don't know how to fork/exec in a C program, this could be helpful under limited cirmustances (would be ok to liaise with sentserver)
+
+#WARNING: because it waits for the result from command 2 after sending every line, and especially if command 1 does the same, using sentserver as command 2 won't actually buy you any real parallelism.
+
+use strict;
+use IPC::Open2;
+use POSIX qw(pipe dup2 STDIN_FILENO STDOUT_FILENO);
+
+my $quiet=!$ENV{DEBUG};
+$quiet=1 if $ENV{QUIET};
+sub info {
+    local $,=' ';
+    print STDERR @_ unless $quiet;
+}
+
+my $mode='CROSS';
+my $ser='DIRECT';
+$mode='PIPE' if $ENV{PIPE};
+$mode='SNAKE' if $ENV{SNAKE};
+$mode='CROSS' if $ENV{CROSS};
+$ser='SERIAL' if $ENV{SERIAL};
+$ser='DIRECT' if $ENV{DIRECT};
+$ser='SERIAL' if $mode eq 'SNAKE';
+info("mode: $mode\n");
+info("connection: $ser\n");
+
+
+my @c1;
+if (scalar @ARGV) {
+    do {
+        push @c1,shift
+    } while scalar @ARGV && $c1[$#c1] ne '--';
+}
+pop @c1;
+my @c2=@ARGV;
+@ARGV=();
+(scalar @c1 && scalar @c2) || die qq{
+usage: $0 cmd1 args -- cmd2 args
+all options are environment variables.
+DEBUG=1 env var enables debugging output.
+CROSS=1 hooks up two processes, 2nd of which has one line of output per line of input, expected by the first, which starts off the communication.  crosses stdin/stderr of cmd1 and cmd2 line by line (both must flush on newline and output.  cmd1 initiates the conversation (sends the first line).    default: attempts to cross stdin/stdout of c1 and c2 directly (via two unidirectional posix pipes created before fork).
+SERIAL=1: (no parallelism possible) but lines exchanged are logged if DEBUG.
+if SNAKE then stdin -> c1 -> c2 -> c1 -> stdout.
+if PIPE then stdin -> c1 -> c2 -> stdout (same as shell c1|c2, but with SERIAL you can see the intermediate in real time; you could do similar with c1 | tee /dev/fd/2 |c2.
+DIRECT=1 (default) will override SERIAL=1.
+CROSS=1 (default) will override SNAKE or PIPE.
+};
+
+info("1 cmd:",@c1,"\n");
+info("2 cmd:",@c2,"\n");
+
+sub lineto {
+    select $_[0];
+    $|=1;
+    shift;
+    print @_;
+}
+
+if ($ser eq 'SERIAL') {
+    my ($R1,$W1,$R2,$W2);
+    my $c1p=open2($R1,$W1,@c1); # Open2 R W backward from Open3.
+    my $c2p=open2($R2,$W2,@c2);
+    if ($mode eq 'CROSS') {
+        while(<$R1>) {
+            info("1:",$_);
+            lineto($W2,$_);
+            last unless defined ($_=<$R2>);
+            info("1|2:",$_);
+            lineto($W1,$_);
+        }
+    } else {
+        my $snake=$mode eq 'SNAKE';
+        while(<STDIN>) {
+            info("IN:",$_);
+            lineto($W1,$_);
+            last unless defined ($_=<$R1>);
+            info("IN|1:",$_);
+            lineto($W2,$_);
+            last unless defined ($_=<$R2>);
+            info("IN|1|2:",$_);
+            if ($snake) {
+                lineto($W1,$_);
+                last unless defined ($_=<$R1>);
+                info("IN|1|2|1:",$_);
+            }
+            lineto(*STDOUT,$_);
+        }
+    }
+} else {
+    info("DIRECT mode\n");
+    my @rw1=POSIX::pipe();
+    my @rw2=POSIX::pipe();
+    my $pid=undef;
+    $SIG{CHLD} = sub { wait };
+    while (not defined ($pid=fork())) {
+        sleep 1;
+    }
+    my $pipe = $mode eq 'PIPE';
+    unless ($pipe) {
+        POSIX::close(STDOUT_FILENO);
+        POSIX::close(STDIN_FILENO);
+    }
+    if ($pid) {
+        POSIX::dup2($rw1[1],STDOUT_FILENO);
+        POSIX::dup2($rw2[0],STDIN_FILENO) unless $pipe;
+        exec @c1;
+    } else {
+        POSIX::dup2($rw2[1],STDOUT_FILENO) unless $pipe;
+        POSIX::dup2($rw1[0],STDIN_FILENO);
+        exec @c2;
+    }
+    while (wait()!=-1) {}
+}
diff --git a/training/dpmert/line_optimizer.cc b/training/dpmert/line_optimizer.cc
new file mode 100644
index 00000000..9cf33502
--- /dev/null
+++ b/training/dpmert/line_optimizer.cc
@@ -0,0 +1,114 @@
+#include "line_optimizer.h"
+
+#include <limits>
+#include <algorithm>
+
+#include "sparse_vector.h"
+#include "ns.h"
+
+using namespace std;
+
+typedef ErrorSurface::const_iterator ErrorIter;
+
+// sort by increasing x-ints
+struct IntervalComp {
+  bool operator() (const ErrorIter& a, const ErrorIter& b) const {
+    return a->x < b->x;
+  }
+};
+
+double LineOptimizer::LineOptimize(
+    const EvaluationMetric* metric,
+    const vector<ErrorSurface>& surfaces,
+    const LineOptimizer::ScoreType type,
+    float* best_score,
+    const double epsilon) {
+  // cerr << "MIN=" << MINIMIZE_SCORE << " MAX=" << MAXIMIZE_SCORE << "  MINE=" << type << endl;
+  vector<ErrorIter> all_ints;
+  for (vector<ErrorSurface>::const_iterator i = surfaces.begin();
+       i != surfaces.end(); ++i) {
+    const ErrorSurface& surface = *i;
+    for (ErrorIter j = surface.begin(); j != surface.end(); ++j)
+      all_ints.push_back(j);
+  }
+  sort(all_ints.begin(), all_ints.end(), IntervalComp());
+  double last_boundary = all_ints.front()->x;
+  SufficientStats acc;
+  float& cur_best_score = *best_score;
+  cur_best_score = (type == MAXIMIZE_SCORE ?
+    -numeric_limits<float>::max() : numeric_limits<float>::max());
+  bool left_edge = true;
+  double pos = numeric_limits<double>::quiet_NaN();
+  for (vector<ErrorIter>::iterator i = all_ints.begin();
+       i != all_ints.end(); ++i) {
+    const ErrorSegment& seg = **i;
+    if (seg.x - last_boundary > epsilon) {
+      float sco = metric->ComputeScore(acc);
+      if ((type == MAXIMIZE_SCORE && sco > cur_best_score) ||
+          (type == MINIMIZE_SCORE && sco < cur_best_score) ) {
+        cur_best_score = sco;
+	if (left_edge) {
+	  pos = seg.x - 0.1;
+	  left_edge = false;
+	} else {
+	  pos = last_boundary + (seg.x - last_boundary) / 2;
+	}
+	//cerr << "NEW BEST: " << pos << "  (score=" << cur_best_score << ")\n";
+      }
+      // string xx = metric->DetailedScore(acc); cerr << "---- " << xx;
+#undef SHOW_ERROR_SURFACES
+#ifdef SHOW_ERROR_SURFACES
+      cerr << "x=" << seg.x << "\ts=" << sco << "\n";
+#endif
+      last_boundary = seg.x;
+    }
+    // cerr << "x-boundary=" << seg.x << "\n";
+    //string x2; acc.Encode(&x2); cerr << "   ACC: " << x2 << endl;
+    //string x1; seg.delta.Encode(&x1); cerr << " DELTA: " << x1 << endl;
+    acc += seg.delta;
+  }
+  float sco = metric->ComputeScore(acc);
+  if ((type == MAXIMIZE_SCORE && sco > cur_best_score) ||
+      (type == MINIMIZE_SCORE && sco < cur_best_score) ) {
+    cur_best_score = sco;
+    if (left_edge) {
+      pos = 0;
+    } else {
+      pos = last_boundary + 1000.0;
+    }
+  }
+  return pos;
+}
+
+void LineOptimizer::RandomUnitVector(const vector<int>& features_to_optimize,
+                                     SparseVector<double>* axis,
+                                     RandomNumberGenerator<boost::mt19937>* rng) {
+  axis->clear();
+  for (int i = 0; i < features_to_optimize.size(); ++i)
+    axis->set_value(features_to_optimize[i], rng->NextNormal(0.0,1.0));
+  (*axis) /= axis->l2norm();
+}
+
+void LineOptimizer::CreateOptimizationDirections(
+     const vector<int>& features_to_optimize,
+     int additional_random_directions,
+     RandomNumberGenerator<boost::mt19937>* rng,
+     vector<SparseVector<double> >* dirs
+     , bool include_orthogonal
+  ) {
+  dirs->clear();
+  typedef SparseVector<double> Dir;
+  vector<Dir> &out=*dirs;
+  int i=0;
+  if (include_orthogonal)
+    for (;i<features_to_optimize.size();++i) {
+      Dir d;
+      d.set_value(features_to_optimize[i],1.);
+      out.push_back(d);
+    }
+  out.resize(i+additional_random_directions);
+  for (;i<out.size();++i)
+     RandomUnitVector(features_to_optimize, &out[i], rng);
+  cerr << "Generated " << out.size() << " total axes to optimize along.\n";
+}
+
diff --git a/training/dpmert/line_optimizer.h b/training/dpmert/line_optimizer.h
new file mode 100644
index 00000000..83819f41
--- /dev/null
+++ b/training/dpmert/line_optimizer.h
@@ -0,0 +1,48 @@
+#ifndef LINE_OPTIMIZER_H_
+#define LINE_OPTIMIZER_H_
+
+#include <vector>
+
+#include "sparse_vector.h"
+#include "error_surface.h"
+#include "sampler.h"
+
+class EvaluationMetric;
+class Weights;
+
+struct LineOptimizer {
+
+  // use MINIMIZE_SCORE for things like TER, WER
+  // MAXIMIZE_SCORE for things like BLEU
+  enum ScoreType { MAXIMIZE_SCORE, MINIMIZE_SCORE };
+
+  // merge all the error surfaces together into a global
+  // error surface and find (the middle of) the best segment
+  static double LineOptimize(
+     const EvaluationMetric* metric,
+     const std::vector<ErrorSurface>& envs,
+     const LineOptimizer::ScoreType type,
+     float* best_score,
+     const double epsilon = 1.0/65536.0);
+
+  // return a random vector of length 1 where all dimensions
+  // not listed in dimensions will be 0.
+  static void RandomUnitVector(const std::vector<int>& dimensions,
+                               SparseVector<double>* axis,
+                               RandomNumberGenerator<boost::mt19937>* rng);
+
+  // generate a list of directions to optimize; the list will
+  // contain the orthogonal vectors corresponding to the dimensions in
+  // primary and then additional_random_directions directions in those
+  // dimensions as well.  All vectors will be length 1.
+  static void CreateOptimizationDirections(
+     const std::vector<int>& primary,
+     int additional_random_directions,
+     RandomNumberGenerator<boost::mt19937>* rng,
+     std::vector<SparseVector<double> >* dirs
+     , bool include_primary=true
+    );
+
+};
+
+#endif
diff --git a/training/dpmert/lo_test.cc b/training/dpmert/lo_test.cc
new file mode 100644
index 00000000..d89bcd99
--- /dev/null
+++ b/training/dpmert/lo_test.cc
@@ -0,0 +1,229 @@
+#define BOOST_TEST_MODULE LineOptimizerTest
+#include <boost/test/unit_test.hpp>
+#include <boost/test/floating_point_comparison.hpp>
+
+#include <cmath>
+#include <iostream>
+#include <fstream>
+
+#include <boost/shared_ptr.hpp>
+
+#include "ns.h"
+#include "ns_docscorer.h"
+#include "ces.h"
+#include "fdict.h"
+#include "hg.h"
+#include "kbest.h"
+#include "hg_io.h"
+#include "filelib.h"
+#include "inside_outside.h"
+#include "viterbi.h"
+#include "mert_geometry.h"
+#include "line_optimizer.h"
+
+using namespace std;
+
+const char* ref11 = "australia reopens embassy in manila";
+const char* ref12 = "( afp , manila , january 2 ) australia reopened its embassy in the philippines today , which was shut down about seven weeks ago due to what was described as a specific threat of a terrorist attack .";
+const char* ref21 = "australia reopened manila embassy";
+const char* ref22 = "( agence france-presse , manila , 2nd ) - australia reopened its embassy in the philippines today . the embassy was closed seven weeks ago after what was described as a specific threat of a terrorist attack .";
+const char* ref31 = "australia to reopen embassy in manila";
+const char* ref32 = "( afp report from manila , january 2 ) australia reopened its embassy in the philippines today . seven weeks ago , the embassy was shut down due to so - called confirmed terrorist attack threats .";
+const char* ref41 = "australia to re - open its embassy to manila";
+const char* ref42 = "( afp , manila , thursday ) australia reopens its embassy to manila , which was closed for the so - called \" clear \" threat of terrorist attack 7 weeks ago .";
+
+BOOST_AUTO_TEST_CASE( TestCheckNaN) {
+  double x = 0;
+  double y = 0;
+  double z = x / y;
+  BOOST_CHECK_EQUAL(true, std::isnan(z));
+}
+
+BOOST_AUTO_TEST_CASE(TestConvexHull) {
+  boost::shared_ptr<MERTPoint> a1(new MERTPoint(-1, 0));
+  boost::shared_ptr<MERTPoint> b1(new MERTPoint(1, 0));
+  boost::shared_ptr<MERTPoint> a2(new MERTPoint(-1, 1));
+  boost::shared_ptr<MERTPoint> b2(new MERTPoint(1, -1));
+  vector<boost::shared_ptr<MERTPoint> > sa; sa.push_back(a1); sa.push_back(b1);
+  vector<boost::shared_ptr<MERTPoint> > sb; sb.push_back(a2); sb.push_back(b2);
+  ConvexHull a(sa);
+  cerr << a << endl;
+  ConvexHull b(sb);
+  ConvexHull c = a;
+  c *= b;
+  cerr << a << " (*) " << b << " = " << c << endl;
+  BOOST_CHECK_EQUAL(3, c.size());
+}
+
+BOOST_AUTO_TEST_CASE(TestConvexHullInside) {
+  const string json = "{\"rules\":[1,\"[X] ||| a\",2,\"[X] ||| A [1]\",3,\"[X] ||| c\",4,\"[X] ||| C [1]\",5,\"[X] ||| [1] B [2]\",6,\"[X] ||| [1] b [2]\",7,\"[X] ||| X [1]\",8,\"[X] ||| Z [1]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.8,1,-0.1],\"rule\":2}],\"node\":{\"in_edges\":[1]},\"edges\":[{\"tail\":[],\"feats\":[1,-1],\"rule\":3}],\"node\":{\"in_edges\":[2]},\"edges\":[{\"tail\":[2],\"feats\":[0,-0.2,1,-0.1],\"rule\":4}],\"node\":{\"in_edges\":[3]},\"edges\":[{\"tail\":[1,3],\"feats\":[0,-1.2,1,-0.2],\"rule\":5},{\"tail\":[1,3],\"feats\":[0,-0.5,1,-1.3],\"rule\":6}],\"node\":{\"in_edges\":[4,5]},\"edges\":[{\"tail\":[4],\"feats\":[0,-0.5,1,-0.8],\"rule\":7},{\"tail\":[4],\"feats\":[0,-0.7,1,-0.9],\"rule\":8}],\"node\":{\"in_edges\":[6,7]}}";
+  Hypergraph hg;
+  istringstream instr(json);
+  HypergraphIO::ReadFromJSON(&instr, &hg);
+  SparseVector<double> wts;
+  wts.set_value(FD::Convert("f1"), 0.4);
+  wts.set_value(FD::Convert("f2"), 1.0);
+  hg.Reweight(wts);
+  vector<pair<vector<WordID>, prob_t> > list;
+  std::vector<SparseVector<double> > features;
+  KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(hg, 10);
+  for (int i = 0; i < 10; ++i) {
+    const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
+      kbest.LazyKthBest(hg.nodes_.size() - 1, i);
+    if (!d) break;
+    cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl;
+  }
+  SparseVector<double> dir; dir.set_value(FD::Convert("f1"), 1.0);
+  ConvexHullWeightFunction wf(wts, dir);
+  ConvexHull env = Inside<ConvexHull, ConvexHullWeightFunction>(hg, NULL, wf);
+  cerr << env << endl;
+  const vector<boost::shared_ptr<MERTPoint> >& segs = env.GetSortedSegs();
+  dir *= segs[1]->x;
+  wts += dir;
+  hg.Reweight(wts);
+  KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest2(hg, 10);
+  for (int i = 0; i < 10; ++i) {
+    const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
+      kbest2.LazyKthBest(hg.nodes_.size() - 1, i);
+    if (!d) break;
+    cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl;
+  }
+  for (unsigned i = 0; i < segs.size(); ++i) {
+    cerr << "seg=" << i << endl;
+    vector<WordID> trans;
+    segs[i]->ConstructTranslation(&trans);
+    cerr << TD::GetString(trans) << endl;
+  }
+}
+
+BOOST_AUTO_TEST_CASE( TestS1) {
+  int fPhraseModel_0 = FD::Convert("PhraseModel_0");
+  int fPhraseModel_1 = FD::Convert("PhraseModel_1");
+  int fPhraseModel_2 = FD::Convert("PhraseModel_2");
+  int fLanguageModel = FD::Convert("LanguageModel");
+  int fWordPenalty = FD::Convert("WordPenalty");
+  int fPassThrough = FD::Convert("PassThrough");
+  SparseVector<double> wts;
+  wts.set_value(fWordPenalty, 4.25);
+  wts.set_value(fLanguageModel, -1.1165);
+  wts.set_value(fPhraseModel_0, -0.96);
+  wts.set_value(fPhraseModel_1, -0.65);
+  wts.set_value(fPhraseModel_2, -0.77);
+  wts.set_value(fPassThrough, -10.0);
+
+  vector<int> to_optimize;
+  to_optimize.push_back(fWordPenalty);
+  to_optimize.push_back(fLanguageModel);
+  to_optimize.push_back(fPhraseModel_0);
+  to_optimize.push_back(fPhraseModel_1);
+  to_optimize.push_back(fPhraseModel_2);
+
+  std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
+
+  Hypergraph hg;
+  ReadFile rf(path + "/0.json.gz");
+  HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+  hg.Reweight(wts);
+
+  Hypergraph hg2;
+  ReadFile rf2(path + "/1.json.gz");
+  HypergraphIO::ReadFromJSON(rf2.stream(), &hg2);
+  hg2.Reweight(wts);
+
+  vector<vector<WordID> > refs1(4);
+  TD::ConvertSentence(ref11, &refs1[0]);
+  TD::ConvertSentence(ref21, &refs1[1]);
+  TD::ConvertSentence(ref31, &refs1[2]);
+  TD::ConvertSentence(ref41, &refs1[3]);
+  vector<vector<WordID> > refs2(4);
+  TD::ConvertSentence(ref12, &refs2[0]);
+  TD::ConvertSentence(ref22, &refs2[1]);
+  TD::ConvertSentence(ref32, &refs2[2]);
+  TD::ConvertSentence(ref42, &refs2[3]);
+  vector<ConvexHull> envs(2);
+
+  RandomNumberGenerator<boost::mt19937> rng;
+
+  vector<SparseVector<double> > axes; // directions to search
+  LineOptimizer::CreateOptimizationDirections(
+     to_optimize,
+     10,
+     &rng,
+     &axes);
+  assert(axes.size() == 10 + to_optimize.size());
+  for (unsigned i = 0; i < axes.size(); ++i)
+    cerr << axes[i] << endl;
+  const SparseVector<double>& axis = axes[0];
+
+  cerr << "Computing Viterbi envelope using inside algorithm...\n";
+  cerr << "axis: " << axis << endl;
+  clock_t t_start=clock();
+  ConvexHullWeightFunction wf(wts, axis);  // wts = starting point, axis = search direction
+  envs[0] = Inside<ConvexHull, ConvexHullWeightFunction>(hg, NULL, wf);
+  envs[1] = Inside<ConvexHull, ConvexHullWeightFunction>(hg2, NULL, wf);
+
+  vector<ErrorSurface> es(2);
+  EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU");
+  boost::shared_ptr<SegmentEvaluator> scorer1 = metric->CreateSegmentEvaluator(refs1);
+  boost::shared_ptr<SegmentEvaluator> scorer2 = metric->CreateSegmentEvaluator(refs2);
+  ComputeErrorSurface(*scorer1, envs[0], &es[0], metric, hg);
+  ComputeErrorSurface(*scorer2, envs[1], &es[1], metric, hg2);
+  cerr << envs[0].size() << " " << envs[1].size() << endl;
+  cerr << es[0].size() << " " << es[1].size() << endl;
+  envs.clear();
+  clock_t t_env=clock();
+  float score;
+  double m = LineOptimizer::LineOptimize(metric,es, LineOptimizer::MAXIMIZE_SCORE, &score);
+  clock_t t_opt=clock();
+  cerr << "line optimizer returned: " << m << " (SCORE=" << score << ")\n";
+  BOOST_CHECK_CLOSE(0.48719698, score, 1e-5);
+  SparseVector<double> res = axis;
+  res *= m;
+  res += wts;
+  cerr << "res: " << res << endl;
+  cerr << "ENVELOPE PROCESSING=" << (static_cast<double>(t_env - t_start) / 1000.0) << endl;
+  cerr << "  LINE OPTIMIZATION=" << (static_cast<double>(t_opt - t_env) / 1000.0) << endl;
+  hg.Reweight(res);
+  hg2.Reweight(res);
+  vector<WordID> t1,t2;
+  ViterbiESentence(hg, &t1);
+  ViterbiESentence(hg2, &t2);
+  cerr << TD::GetString(t1) << endl;
+  cerr << TD::GetString(t2) << endl;
+}
+
+BOOST_AUTO_TEST_CASE(TestZeroOrigin) {
+  const string json = "{\"rules\":[1,\"[X7] ||| blA ||| without ||| LHSProb=3.92173 LexE2F=2.90799 LexF2E=1.85003 GenerativeProb=10.5381 RulePenalty=1 XFE=2.77259 XEF=0.441833 LabelledEF=2.63906 LabelledFE=4.96981 LogRuleCount=0.693147\",2,\"[X7] ||| blA ||| except ||| LHSProb=4.92173 LexE2F=3.90799 LexF2E=1.85003 GenerativeProb=11.5381 RulePenalty=1 XFE=2.77259 XEF=1.44183 LabelledEF=2.63906 LabelledFE=4.96981 LogRuleCount=1.69315\",3,\"[S] ||| [X7,1] ||| [1] ||| GlueTop=1\",4,\"[X28] ||| EnwAn ||| title ||| LHSProb=3.96802 LexE2F=2.22462 LexF2E=1.83258 GenerativeProb=10.0863 RulePenalty=1 XFE=0 XEF=1.20397 LabelledEF=1.20397 LabelledFE=-1.98341e-08 LogRuleCount=1.09861\",5,\"[X0] ||| EnwAn ||| funny ||| LHSProb=3.98479 LexE2F=1.79176 LexF2E=3.21888 GenerativeProb=11.1681 RulePenalty=1 XFE=0 XEF=2.30259 LabelledEF=2.30259 LabelledFE=0 LogRuleCount=0 SingletonRule=1\",6,\"[X8] ||| [X7,1] EnwAn ||| entitled [1] ||| LHSProb=3.82533 LexE2F=3.21888 LexF2E=2.52573 GenerativeProb=11.3276 RulePenalty=1 XFE=1.20397 XEF=1.20397 LabelledEF=2.30259 LabelledFE=2.30259 LogRuleCount=0 SingletonRule=1\",7,\"[S] ||| [S,1] [X28,2] ||| [1] [2] ||| Glue=1\",8,\"[S] ||| [S,1] [X0,2] ||| [1] [2] ||| Glue=1\",9,\"[S] ||| [X8,1] ||| [1] ||| GlueTop=1\",10,\"[Goal] ||| [S,1] ||| [1]\"],\"features\":[\"PassThrough\",\"Glue\",\"GlueTop\",\"LanguageModel\",\"WordPenalty\",\"LHSProb\",\"LexE2F\",\"LexF2E\",\"GenerativeProb\",\"RulePenalty\",\"XFE\",\"XEF\",\"LabelledEF\",\"LabelledFE\",\"LogRuleCount\",\"SingletonRule\"],\"edges\":[{\"tail\":[],\"spans\":[0,1,-1,-1],\"feats\":[5,3.92173,6,2.90799,7,1.85003,8,10.5381,9,1,10,2.77259,11,0.441833,12,2.63906,13,4.96981,14,0.693147],\"rule\":1},{\"tail\":[],\"spans\":[0,1,-1,-1],\"feats\":[5,4.92173,6,3.90799,7,1.85003,8,11.5381,9,1,10,2.77259,11,1.44183,12,2.63906,13,4.96981,14,1.69315],\"rule\":2}],\"node\":{\"in_edges\":[0,1],\"cat\":\"X7\"},\"edges\":[{\"tail\":[0],\"spans\":[0,1,-1,-1],\"feats\":[2,1],\"rule\":3}],\"node\":{\"in_edges\":[2],\"cat\":\"S\"},\"edges\":[{\"tail\":[],\"spans\":[1,2,-1,-1],\"feats\":[5,3.96802,6,2.22462,7,1.83258,8,10.0863,9,1,11,1.20397,12,1.20397,13,-1.98341e-08,14,1.09861],\"rule\":4}],\"node\":{\"in_edges\":[3],\"cat\":\"X28\"},\"edges\":[{\"tail\":[],\"spans\":[1,2,-1,-1],\"feats\":[5,3.98479,6,1.79176,7,3.21888,8,11.1681,9,1,11,2.30259,12,2.30259,15,1],\"rule\":5}],\"node\":{\"in_edges\":[4],\"cat\":\"X0\"},\"edges\":[{\"tail\":[0],\"spans\":[0,2,-1,-1],\"feats\":[5,3.82533,6,3.21888,7,2.52573,8,11.3276,9,1,10,1.20397,11,1.20397,12,2.30259,13,2.30259,15,1],\"rule\":6}],\"node\":{\"in_edges\":[5],\"cat\":\"X8\"},\"edges\":[{\"tail\":[1,2],\"spans\":[0,2,-1,-1],\"feats\":[1,1],\"rule\":7},{\"tail\":[1,3],\"spans\":[0,2,-1,-1],\"feats\":[1,1],\"rule\":8},{\"tail\":[4],\"spans\":[0,2,-1,-1],\"feats\":[2,1],\"rule\":9}],\"node\":{\"in_edges\":[6,7,8],\"cat\":\"S\"},\"edges\":[{\"tail\":[5],\"spans\":[0,2,-1,-1],\"feats\":[],\"rule\":10}],\"node\":{\"in_edges\":[9],\"cat\":\"Goal\"}}";
+  Hypergraph hg;
+  istringstream instr(json);
+  HypergraphIO::ReadFromJSON(&instr, &hg);
+  SparseVector<double> wts;
+  wts.set_value(FD::Convert("PassThrough"), -0.929201533002898);
+  hg.Reweight(wts);
+
+  vector<pair<vector<WordID>, prob_t> > list;
+  std::vector<SparseVector<double> > features;
+  KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(hg, 10);
+  for (int i = 0; i < 10; ++i) {
+    const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
+      kbest.LazyKthBest(hg.nodes_.size() - 1, i);
+    if (!d) break;
+    cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl;
+  }
+ 
+  SparseVector<double> axis; axis.set_value(FD::Convert("Glue"),1.0);
+  ConvexHullWeightFunction wf(wts, axis);  // wts = starting point, axis = search direction
+  vector<ConvexHull> envs(1);
+  envs[0] = Inside<ConvexHull, ConvexHullWeightFunction>(hg, NULL, wf);
+
+  vector<vector<WordID> > mr(4);
+  TD::ConvertSentence("untitled", &mr[0]);
+  TD::ConvertSentence("with no title", &mr[1]);
+  TD::ConvertSentence("without a title", &mr[2]);
+  TD::ConvertSentence("without title", &mr[3]);
+  EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU");
+  boost::shared_ptr<SegmentEvaluator> scorer1 = metric->CreateSegmentEvaluator(mr);
+  vector<ErrorSurface> es(1);
+  ComputeErrorSurface(*scorer1, envs[0], &es[0], metric, hg);
+}
+
diff --git a/training/dpmert/mert_geometry.cc b/training/dpmert/mert_geometry.cc
new file mode 100644
index 00000000..d6973658
--- /dev/null
+++ b/training/dpmert/mert_geometry.cc
@@ -0,0 +1,185 @@
+#include "mert_geometry.h"
+
+#include <cassert>
+#include <limits>
+
+using namespace std;
+
+ConvexHull::ConvexHull(int i) {
+  if (i == 0) {
+    // do nothing - <>
+  } else if (i == 1) {
+    points.push_back(boost::shared_ptr<MERTPoint>(new MERTPoint(0, 0, 0, boost::shared_ptr<MERTPoint>(), boost::shared_ptr<MERTPoint>())));
+    assert(this->IsMultiplicativeIdentity());
+  } else {
+    cerr << "Only can create ConvexHull semiring 0 and 1 with this constructor!\n";
+    abort();
+  }
+}
+
+const ConvexHull ConvexHullWeightFunction::operator()(const Hypergraph::Edge& e) const {
+  const double m = direction.dot(e.feature_values_);
+  const double b = origin.dot(e.feature_values_);
+  MERTPoint* point = new MERTPoint(m, b, e);
+  return ConvexHull(1, point);
+}
+
+ostream& operator<<(ostream& os, const ConvexHull& env) {
+  os << '<';
+  const vector<boost::shared_ptr<MERTPoint> >& points = env.GetSortedSegs();
+  for (int i = 0; i < points.size(); ++i)
+    os << (i==0 ? "" : "|") << "x=" << points[i]->x << ",b=" << points[i]->b << ",m=" << points[i]->m << ",p1=" << points[i]->p1 << ",p2=" << points[i]->p2;
+  return os << '>';
+}
+
+#define ORIGINAL_MERT_IMPLEMENTATION 1
+#ifdef ORIGINAL_MERT_IMPLEMENTATION
+
+struct SlopeCompare {
+  bool operator() (const boost::shared_ptr<MERTPoint>& a, const boost::shared_ptr<MERTPoint>& b) const {
+    return a->m < b->m;
+  }
+};
+
+const ConvexHull& ConvexHull::operator+=(const ConvexHull& other) {
+  if (!other.is_sorted) other.Sort();
+  if (points.empty()) {
+    points = other.points;
+    return *this;
+  }
+  is_sorted = false;
+  int j = points.size();
+  points.resize(points.size() + other.points.size());
+  for (int i = 0; i < other.points.size(); ++i)
+    points[j++] = other.points[i];
+  assert(j == points.size());
+  return *this;
+}
+
+void ConvexHull::Sort() const {
+  sort(points.begin(), points.end(), SlopeCompare());
+  const int k = points.size();
+  int j = 0;
+  for (int i = 0; i < k; ++i) {
+    MERTPoint l = *points[i];
+    l.x = kMinusInfinity;
+    // cerr << "m=" << l.m << endl;
+    if (0 < j) {
+      if (points[j-1]->m == l.m) {   // lines are parallel
+        if (l.b <= points[j-1]->b) continue;
+        --j;
+      }
+      while(0 < j) {
+        l.x = (l.b - points[j-1]->b) / (points[j-1]->m - l.m);
+        if (points[j-1]->x < l.x) break;
+        --j;
+      }
+      if (0 == j) l.x = kMinusInfinity;
+    }
+    *points[j++] = l;
+  }
+  points.resize(j);
+  is_sorted = true;
+}
+
+const ConvexHull& ConvexHull::operator*=(const ConvexHull& other) {
+  if (other.IsMultiplicativeIdentity()) { return *this; }
+  if (this->IsMultiplicativeIdentity()) { (*this) = other; return *this; }
+
+  if (!is_sorted) Sort();
+  if (!other.is_sorted) other.Sort();
+
+  if (this->IsEdgeEnvelope()) {
+//    if (other.size() > 1)
+//      cerr << *this << " (TIMES) " << other << endl;
+    boost::shared_ptr<MERTPoint> edge_parent = points[0];
+    const double& edge_b = edge_parent->b;
+    const double& edge_m = edge_parent->m;
+    points.clear();
+    for (int i = 0; i < other.points.size(); ++i) {
+      const MERTPoint& p = *other.points[i];
+      const double m = p.m + edge_m;
+      const double b = p.b + edge_b;
+      const double& x = p.x;       // x's don't change with *
+      points.push_back(boost::shared_ptr<MERTPoint>(new MERTPoint(x, m, b, edge_parent, other.points[i])));
+      assert(points.back()->p1->edge);
+    }
+//    if (other.size() > 1)
+//      cerr << " = " << *this << endl;
+  } else {
+    vector<boost::shared_ptr<MERTPoint> > new_points;
+    int this_i = 0;
+    int other_i = 0;
+    const int this_size  = points.size();
+    const int other_size = other.points.size();
+    double cur_x = kMinusInfinity;   // moves from left to right across the
+                                     // real numbers, stopping for all inter-
+                                     // sections
+    double this_next_val  = (1 < this_size  ? points[1]->x       : kPlusInfinity);
+    double other_next_val = (1 < other_size ? other.points[1]->x : kPlusInfinity);
+    while (this_i < this_size && other_i < other_size) {
+      const MERTPoint& this_point = *points[this_i];
+      const MERTPoint& other_point= *other.points[other_i];
+      const double m = this_point.m + other_point.m;
+      const double b = this_point.b + other_point.b;
+ 
+      new_points.push_back(boost::shared_ptr<MERTPoint>(new MERTPoint(cur_x, m, b, points[this_i], other.points[other_i])));
+      int comp = 0;
+      if (this_next_val < other_next_val) comp = -1; else
+        if (this_next_val > other_next_val) comp = 1;
+      if (0 == comp) {  // the next values are equal, advance both indices
+        ++this_i;
+	++other_i;
+        cur_x = this_next_val;  // could be other_next_val (they're equal!)
+        this_next_val  = (this_i+1  < this_size  ? points[this_i+1]->x        : kPlusInfinity);
+        other_next_val = (other_i+1 < other_size ? other.points[other_i+1]->x : kPlusInfinity);
+      } else {  // advance the i with the lower x, update cur_x
+        if (-1 == comp) {
+          ++this_i;
+          cur_x = this_next_val;
+          this_next_val =  (this_i+1  < this_size  ? points[this_i+1]->x        : kPlusInfinity);
+        } else {
+          ++other_i;
+          cur_x = other_next_val;
+          other_next_val = (other_i+1 < other_size ? other.points[other_i+1]->x : kPlusInfinity);
+        }
+      }
+    }
+    points.swap(new_points);
+  }
+  //cerr << "Multiply: result=" << (*this) << endl;
+  return *this;
+}
+
+// recursively construct translation
+void MERTPoint::ConstructTranslation(vector<WordID>* trans) const {
+  const MERTPoint* cur = this;
+  vector<vector<WordID> > ant_trans;
+  while(!cur->edge) {
+    ant_trans.resize(ant_trans.size() + 1);
+    cur->p2->ConstructTranslation(&ant_trans.back());
+    cur = cur->p1.get();
+  }
+  size_t ant_size = ant_trans.size();
+  vector<const vector<WordID>*> pants(ant_size);
+  assert(ant_size == cur->edge->tail_nodes_.size());
+  --ant_size;
+  for (int i = 0; i < pants.size(); ++i) pants[ant_size - i] = &ant_trans[i];
+  cur->edge->rule_->ESubstitute(pants, trans);
+}
+
+void MERTPoint::CollectEdgesUsed(std::vector<bool>* edges_used) const {
+  if (edge) {
+    assert(edge->id_ < edges_used->size());
+    (*edges_used)[edge->id_] = true;
+  }
+  if (p1) p1->CollectEdgesUsed(edges_used);
+  if (p2) p2->CollectEdgesUsed(edges_used);
+}
+
+#else
+
+// THIS IS THE NEW FASTER IMPLEMENTATION OF THE MERT SEMIRING OPERATIONS
+
+#endif
+
diff --git a/training/dpmert/mert_geometry.h b/training/dpmert/mert_geometry.h
new file mode 100644
index 00000000..a8b6959e
--- /dev/null
+++ b/training/dpmert/mert_geometry.h
@@ -0,0 +1,81 @@
+#ifndef _MERT_GEOMETRY_H_
+#define _MERT_GEOMETRY_H_
+
+#include <vector>
+#include <iostream>
+#include <boost/shared_ptr.hpp>
+
+#include "hg.h"
+#include "sparse_vector.h"
+
+static const double kMinusInfinity = -std::numeric_limits<double>::infinity();
+static const double kPlusInfinity = std::numeric_limits<double>::infinity();
+
+struct MERTPoint {
+  MERTPoint() : x(), m(), b(), edge() {}
+  MERTPoint(double _m, double _b) :
+    x(kMinusInfinity), m(_m), b(_b), edge() {}
+  MERTPoint(double _x, double _m, double _b, const boost::shared_ptr<MERTPoint>& p1_, const boost::shared_ptr<MERTPoint>& p2_) :
+    x(_x), m(_m), b(_b), p1(p1_), p2(p2_), edge() {}
+  MERTPoint(double _m, double _b, const Hypergraph::Edge& edge) :
+    x(kMinusInfinity), m(_m), b(_b), edge(&edge) {}
+
+  double x;                   // x intersection with previous segment in env, or -inf if none
+  double m;                   // this line's slope
+  double b;                   // intercept with y-axis
+
+  // we keep a pointer to the "parents" of this segment so we can reconstruct
+  // the Viterbi translation corresponding to this segment
+  boost::shared_ptr<MERTPoint> p1;
+  boost::shared_ptr<MERTPoint> p2;
+
+  // only MERTPoints created from an edge using the ConvexHullWeightFunction
+  // have rules
+  // TRulePtr rule;
+  const Hypergraph::Edge* edge;
+
+  // recursively recover the Viterbi translation that will result from setting
+  // the weights to origin + axis * x, where x is any value from this->x up
+  // until the next largest x in the containing ConvexHull
+  void ConstructTranslation(std::vector<WordID>* trans) const;
+  void CollectEdgesUsed(std::vector<bool>* edges_used) const;
+};
+
+// this is the semiring value type,
+// it defines constructors for 0, 1, and the operations + and *
+struct ConvexHull {
+  // create semiring zero
+  ConvexHull() : is_sorted(true) {}  // zero
+  // for debugging:
+  ConvexHull(const std::vector<boost::shared_ptr<MERTPoint> >& s) : points(s) { Sort(); }
+  // create semiring 1 or 0
+  explicit ConvexHull(int i);
+  ConvexHull(int n, MERTPoint* point) : is_sorted(true), points(n, boost::shared_ptr<MERTPoint>(point)) {}
+  const ConvexHull& operator+=(const ConvexHull& other);
+  const ConvexHull& operator*=(const ConvexHull& other);
+  bool IsMultiplicativeIdentity() const {
+    return size() == 1 && (points[0]->b == 0.0 && points[0]->m == 0.0) && (!points[0]->edge) && (!points[0]->p1) && (!points[0]->p2); }
+  const std::vector<boost::shared_ptr<MERTPoint> >& GetSortedSegs() const {
+    if (!is_sorted) Sort();
+    return points;
+  }
+  size_t size() const { return points.size(); }
+
+ private:
+  bool IsEdgeEnvelope() const {
+    return points.size() == 1 && points[0]->edge; }
+  void Sort() const;
+  mutable bool is_sorted;
+  mutable std::vector<boost::shared_ptr<MERTPoint> > points;
+};
+std::ostream& operator<<(std::ostream& os, const ConvexHull& env);
+
+struct ConvexHullWeightFunction {
+  ConvexHullWeightFunction(const SparseVector<double>& ori,
+                           const SparseVector<double>& dir) : origin(ori), direction(dir) {}
+  const ConvexHull operator()(const Hypergraph::Edge& e) const;
+  const SparseVector<double> origin;
+  const SparseVector<double> direction;
+};
+
+#endif
diff --git a/training/dpmert/mr_dpmert_generate_mapper_input.cc b/training/dpmert/mr_dpmert_generate_mapper_input.cc
new file mode 100644
index 00000000..199cd23a
--- /dev/null
+++ b/training/dpmert/mr_dpmert_generate_mapper_input.cc
@@ -0,0 +1,81 @@
+#include <iostream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "filelib.h"
+#include "weights.h"
+#include "line_optimizer.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("dev_set_size,s",po::value<unsigned>(),"[REQD] Development set size (# of parallel sentences)")
+        ("forest_repository,r",po::value<string>(),"[REQD] Path to forest repository")
+        ("weights,w",po::value<string>(),"[REQD] Current feature weights file")
+        ("optimize_feature,o",po::value<vector<string> >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)")
+        ("random_directions,d",po::value<unsigned int>()->default_value(20),"Number of random directions to run the line optimizer in")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = false;
+  if (conf->count("dev_set_size") == 0) {
+    cerr << "Please specify the size of the development set using -d N\n";
+    flag = true;
+  }
+  if (conf->count("weights") == 0) {
+    cerr << "Please specify the starting-point weights using -w <weightfile.txt>\n";
+    flag = true;
+  }
+  if (conf->count("forest_repository") == 0) {
+    cerr << "Please specify the forest repository location using -r <DIR>\n";
+    flag = true;
+  }
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+int main(int argc, char** argv) {
+  RandomNumberGenerator<boost::mt19937> rng;
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  vector<string> features;
+  SparseVector<weight_t> origin;
+  vector<weight_t> w;
+  Weights::InitFromFile(conf["weights"].as<string>(), &w, &features);
+  Weights::InitSparseVector(w, &origin);
+  const string forest_repository = conf["forest_repository"].as<string>();
+  if (!DirectoryExists(forest_repository)) {
+    cerr << "Forest repository directory " << forest_repository << " not found!\n";
+    return 1;
+  }
+  if (conf.count("optimize_feature") > 0)
+    features=conf["optimize_feature"].as<vector<string> >();
+  vector<SparseVector<weight_t> > directions;
+  vector<int> fids(features.size());
+  for (unsigned i = 0; i < features.size(); ++i)
+    fids[i] = FD::Convert(features[i]);
+  LineOptimizer::CreateOptimizationDirections(
+     fids,
+     conf["random_directions"].as<unsigned int>(),
+     &rng,
+     &directions);
+  unsigned dev_set_size = conf["dev_set_size"].as<unsigned>();
+  for (unsigned i = 0; i < dev_set_size; ++i) {
+    for (unsigned j = 0; j < directions.size(); ++j) {
+      cout << forest_repository << '/' << i << ".json.gz " << i << ' ';
+      print(cout, origin, "=", ";");
+      cout << ' ';
+      print(cout, directions[j], "=", ";");
+      cout << endl;
+    }
+  }
+  return 0;
+}
diff --git a/training/dpmert/mr_dpmert_map.cc b/training/dpmert/mr_dpmert_map.cc
new file mode 100644
index 00000000..d1efcf96
--- /dev/null
+++ b/training/dpmert/mr_dpmert_map.cc
@@ -0,0 +1,112 @@
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "ns.h"
+#include "ns_docscorer.h"
+#include "ces.h"
+#include "filelib.h"
+#include "stringlib.h"
+#include "sparse_vector.h"
+#include "mert_geometry.h"
+#include "inside_outside.h"
+#include "error_surface.h"
+#include "b64tools.h"
+#include "hg_io.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation (tokenized text)")
+        ("source,s",po::value<string>(), "Source file (ignored, except for AER)")
+        ("evaluation_metric,m",po::value<string>()->default_value("ibm_bleu"), "Evaluation metric being optimized")
+        ("input,i",po::value<string>()->default_value("-"), "Input file to map (- is STDIN)")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = false;
+  if (!conf->count("reference")) {
+    cerr << "Please specify one or more references using -r <REF.TXT>\n";
+    flag = true;
+  }
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+bool ReadSparseVectorString(const string& s, SparseVector<double>* v) {
+#if 0
+  // this should work, but untested.
+  std::istringstream i(s);
+  i>>*v;
+#else
+  vector<string> fields;
+  Tokenize(s, ';', &fields);
+  if (fields.empty()) return false;
+  for (unsigned i = 0; i < fields.size(); ++i) {
+    vector<string> pair(2);
+    Tokenize(fields[i], '=', &pair);
+    if (pair.size() != 2) {
+      cerr << "Error parsing vector string: " << fields[i] << endl;
+      return false;
+    }
+    v->set_value(FD::Convert(pair[0]), atof(pair[1].c_str()));
+  }
+  return true;
+#endif
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  const string evaluation_metric = conf["evaluation_metric"].as<string>();
+  EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric);
+  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
+  cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl;
+  Hypergraph hg;
+  string last_file;
+  ReadFile in_read(conf["input"].as<string>());
+  istream &in=*in_read.stream();
+  while(in) {
+    string line;
+    getline(in, line);
+    if (line.empty()) continue;
+    istringstream is(line);
+    int sent_id;
+    string file, s_origin, s_direction;
+    // path-to-file (JSON) sent_ed starting-point search-direction
+    is >> file >> sent_id >> s_origin >> s_direction;
+    SparseVector<double> origin;
+    ReadSparseVectorString(s_origin, &origin);
+    SparseVector<double> direction;
+    ReadSparseVectorString(s_direction, &direction);
+    // cerr << "File: " << file << "\nDir: " << direction << "\n   X: " << origin << endl;
+    if (last_file != file) {
+      last_file = file;
+      ReadFile rf(file);
+      HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+    }
+    const ConvexHullWeightFunction wf(origin, direction);
+    const ConvexHull hull = Inside<ConvexHull, ConvexHullWeightFunction>(hg, NULL, wf);
+
+    ErrorSurface es;
+    ComputeErrorSurface(*ds[sent_id], hull, &es, metric, hg);
+    //cerr << "Viterbi envelope has " << ve.size() << " segments\n";
+    // cerr << "Error surface has " << es.size() << " segments\n";
+    string val;
+    es.Serialize(&val);
+    cout << 'M' << ' ' << s_origin << ' ' << s_direction << '\t';
+    B64::b64encode(val.c_str(), val.size(), &cout);
+    cout << endl << flush;
+  }
+  return 0;
+}
diff --git a/training/dpmert/mr_dpmert_reduce.cc b/training/dpmert/mr_dpmert_reduce.cc
new file mode 100644
index 00000000..31512a03
--- /dev/null
+++ b/training/dpmert/mr_dpmert_reduce.cc
@@ -0,0 +1,77 @@
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "sparse_vector.h"
+#include "error_surface.h"
+#include "line_optimizer.h"
+#include "b64tools.h"
+#include "stringlib.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("evaluation_metric,m",po::value<string>(), "Evaluation metric (IBM_BLEU, etc.)")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = conf->count("evaluation_metric") == 0;
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  const string evaluation_metric = conf["evaluation_metric"].as<string>();
+  EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric);
+  LineOptimizer::ScoreType opt_type = LineOptimizer::MAXIMIZE_SCORE;
+  if (metric->IsErrorMetric())
+    opt_type = LineOptimizer::MINIMIZE_SCORE;
+
+  vector<ErrorSurface> esv;
+  string last_key, line, key, val;
+  while(getline(cin, line)) {
+    size_t ks = line.find("\t");
+    assert(string::npos != ks);
+    assert(ks > 2);
+    key = line.substr(2, ks - 2);
+    val = line.substr(ks + 1);
+    if (key != last_key) {
+      if (!last_key.empty()) {
+	float score;
+        double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score);
+	cout << last_key << "|" << x << "|" << score << endl;
+      }
+      last_key.swap(key);
+      esv.clear();
+    }
+    if (val.size() % 4 != 0) {
+      cerr << "B64 encoding error 1! Skipping.\n";
+      continue;
+    }
+    string encoded(val.size() / 4 * 3, '\0');
+    if (!B64::b64decode(reinterpret_cast<const unsigned char*>(&val[0]), val.size(), &encoded[0], encoded.size())) {
+      cerr << "B64 encoding error 2! Skipping.\n";
+      continue;
+    }
+    esv.push_back(ErrorSurface());
+    esv.back().Deserialize(encoded);
+  }
+  if (!esv.empty()) {
+    float score;
+    double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score);
+    cout << last_key << "|" << x << "|" << score << endl;
+  }
+  return 0;
+}
diff --git a/training/dpmert/test_aer/README b/training/dpmert/test_aer/README
new file mode 100644
index 00000000..819b2e32
--- /dev/null
+++ b/training/dpmert/test_aer/README
@@ -0,0 +1,8 @@
+To run the test:
+
+../dist-vest.pl --local --metric aer cdec.ini --source-file corpus.src --ref-files=ref.0 --weights weights
+
+This will optimize the parameters of the tiny lexical translation model
+so as to minimize the AER of the Viterbi alignment on the development
+set in corpus.src according to the reference alignments in ref.0.
+
diff --git a/training/dpmert/test_aer/cdec.ini b/training/dpmert/test_aer/cdec.ini
new file mode 100644
index 00000000..08187848
--- /dev/null
+++ b/training/dpmert/test_aer/cdec.ini
@@ -0,0 +1,3 @@
+formalism=lextrans
+grammar=grammar
+aligner=true
diff --git a/training/dpmert/test_aer/corpus.src b/training/dpmert/test_aer/corpus.src
new file mode 100644
index 00000000..31b23971
--- /dev/null
+++ b/training/dpmert/test_aer/corpus.src
@@ -0,0 +1,3 @@
+el gato negro ||| the black cat
+el gato ||| the cat
+el libro ||| the book
diff --git a/training/dpmert/test_aer/grammar b/training/dpmert/test_aer/grammar
new file mode 100644
index 00000000..9d857824
--- /dev/null
+++ b/training/dpmert/test_aer/grammar
@@ -0,0 +1,12 @@
+el ||| cat ||| F1=1
+el ||| the ||| F2=1
+el ||| black ||| F3=1
+el ||| book ||| F11=1
+gato ||| cat ||| F4=1 NN=1
+gato ||| black ||| F5=1
+gato ||| the ||| F6=1
+negro ||| the ||| F7=1
+negro ||| cat ||| F8=1
+negro ||| black ||| F9=1
+libro ||| the ||| F10=1
+libro ||| book ||| F12=1 NN=1
diff --git a/training/dpmert/test_aer/ref.0 b/training/dpmert/test_aer/ref.0
new file mode 100644
index 00000000..734a9c5b
--- /dev/null
+++ b/training/dpmert/test_aer/ref.0
@@ -0,0 +1,3 @@
+0-0 1-2 2-1
+0-0 1-1
+0-0 1-1
diff --git a/training/dpmert/test_aer/weights b/training/dpmert/test_aer/weights
new file mode 100644
index 00000000..afc9282e
--- /dev/null
+++ b/training/dpmert/test_aer/weights
@@ -0,0 +1,13 @@
+F1 0.1
+F2 -.5980815
+F3 0.24235
+F4 0.625
+F5 0.4514
+F6 0.112316
+F7 -0.123415
+F8 -0.25390285
+F9 -0.23852
+F10 0.646
+F11 0.413141
+F12 0.343216
+NN -0.1215
diff --git a/training/dpmert/test_data/0.json.gz b/training/dpmert/test_data/0.json.gz
new file mode 100644
index 00000000..30f8dd77
--- /dev/null
+++ b/training/dpmert/test_data/0.json.gz
diff --git a/training/dpmert/test_data/1.json.gz b/training/dpmert/test_data/1.json.gz
new file mode 100644
index 00000000..c82cc179
--- /dev/null
+++ b/training/dpmert/test_data/1.json.gz
diff --git a/training/dpmert/test_data/c2e.txt.0 b/training/dpmert/test_data/c2e.txt.0
new file mode 100644
index 00000000..12c4abe9
--- /dev/null
+++ b/training/dpmert/test_data/c2e.txt.0
@@ -0,0 +1,2 @@
+australia reopens embassy in manila
+( afp , manila , january 2 ) australia reopened its embassy in the philippines today , which was shut down about seven weeks ago due to what was described as a specific threat of a terrorist attack .
diff --git a/training/dpmert/test_data/c2e.txt.1 b/training/dpmert/test_data/c2e.txt.1
new file mode 100644
index 00000000..4ac12df1
--- /dev/null
+++ b/training/dpmert/test_data/c2e.txt.1
@@ -0,0 +1,2 @@
+australia reopened manila embassy
+( agence france-presse , manila , 2nd ) - australia reopened its embassy in the philippines today . the embassy was closed seven weeks ago after what was described as a specific threat of a terrorist attack .
diff --git a/training/dpmert/test_data/c2e.txt.2 b/training/dpmert/test_data/c2e.txt.2
new file mode 100644
index 00000000..2f67b72f
--- /dev/null
+++ b/training/dpmert/test_data/c2e.txt.2
@@ -0,0 +1,2 @@
+australia to reopen embassy in manila
+( afp report from manila , january 2 ) australia reopened its embassy in the philippines today . seven weeks ago , the embassy was shut down due to so-called confirmed terrorist attack threats .
diff --git a/training/dpmert/test_data/c2e.txt.3 b/training/dpmert/test_data/c2e.txt.3
new file mode 100644
index 00000000..5483cef6
--- /dev/null
+++ b/training/dpmert/test_data/c2e.txt.3
@@ -0,0 +1,2 @@
+australia to re - open its embassy to manila
+( afp , manila , thursday ) australia reopens its embassy to manila , which was closed for the so-called " clear " threat of terrorist attack 7 weeks ago .
diff --git a/training/dpmert/test_data/re.txt.0 b/training/dpmert/test_data/re.txt.0
new file mode 100644
index 00000000..86eff087
--- /dev/null
+++ b/training/dpmert/test_data/re.txt.0
@@ -0,0 +1,5 @@
+erdogan states turkey to reject any pressures to urge it to recognize cyprus
+ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara will reject any pressure by the european union to urge it to recognize cyprus . this comes two weeks before the summit of european union state and government heads who will decide whether or nor membership negotiations with ankara should be opened .
+erdogan told " ntv " television station that " the european union cannot address us by imposing new conditions on us with regard to cyprus .
+we will discuss this dossier in the course of membership negotiations . "
+he added " let me be clear , i cannot sidestep turkey , this is something we cannot accept . "
diff --git a/training/dpmert/test_data/re.txt.1 b/training/dpmert/test_data/re.txt.1
new file mode 100644
index 00000000..2140f198
--- /dev/null
+++ b/training/dpmert/test_data/re.txt.1
@@ -0,0 +1,5 @@
+erdogan confirms turkey will resist any pressure to recognize cyprus
+ankara 12 - 1 ( afp ) - the turkish head of government , recep tayyip erdogan , announced today ( wednesday ) that ankara would resist any pressure the european union might exercise in order to force it into recognizing cyprus . this comes two weeks before a summit of european union heads of state and government , who will decide whether or not to open membership negotiations with ankara .
+erdogan said to the ntv television channel : " the european union cannot engage with us through imposing new conditions on us with regard to cyprus .
+we shall discuss this issue in the course of the membership negotiations . "
+he added : " let me be clear - i cannot confine turkey . this is something we do not accept . "
diff --git a/training/dpmert/test_data/re.txt.2 b/training/dpmert/test_data/re.txt.2
new file mode 100644
index 00000000..94e46286
--- /dev/null
+++ b/training/dpmert/test_data/re.txt.2
@@ -0,0 +1,5 @@
+erdogan confirms that turkey will reject any pressures to encourage it to recognize cyprus
+ankara , 12 / 1 ( afp ) - the turkish prime minister recep tayyip erdogan declared today , wednesday , that ankara will reject any pressures that the european union may apply on it to encourage to recognize cyprus . this comes two weeks before a summit of the heads of countries and governments of the european union , who will decide on whether or not to start negotiations on joining with ankara .
+erdogan told the ntv television station that " it is not possible for the european union to talk to us by imposing new conditions on us regarding cyprus .
+we shall discuss this dossier during the negotiations on joining . "
+and he added , " let me be clear . turkey's arm should not be twisted ; this is something we cannot accept . "
diff --git a/training/dpmert/test_data/re.txt.3 b/training/dpmert/test_data/re.txt.3
new file mode 100644
index 00000000..f87c3308
--- /dev/null
+++ b/training/dpmert/test_data/re.txt.3
@@ -0,0 +1,5 @@
+erdogan stresses that turkey will reject all pressures to force it to recognize cyprus
+ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara would refuse all pressures applied on it by the european union to force it to recognize cyprus . that came two weeks before the summit of the presidents and prime ministers of the european union , who would decide on whether to open negotiations on joining with ankara or not .
+erdogan said to " ntv " tv station that the " european union can not communicate with us by imposing on us new conditions related to cyprus .
+we will discuss this file during the negotiations on joining . "
+he added , " let me be clear . turkey's arm should not be twisted . this is unacceptable to us . "
diff --git a/training/dtrain/Makefile.am b/training/dtrain/Makefile.am
new file mode 100644
index 00000000..844c790d
--- /dev/null
+++ b/training/dtrain/Makefile.am
@@ -0,0 +1,7 @@
+bin_PROGRAMS = dtrain
+
+dtrain_SOURCES = dtrain.cc score.cc dtrain.h kbestget.h ksampler.h pairsampling.h score.h
+dtrain_LDADD   = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a
+
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
+
diff --git a/training/dtrain/README.md b/training/dtrain/README.md
new file mode 100644
index 00000000..2ab2f232
--- /dev/null
+++ b/training/dtrain/README.md
@@ -0,0 +1,30 @@
+This is a simple (and parallelizable) tuning method for cdec
+which is able to train the weights of very many (sparse) features.
+It was used here:
+  "Joint Feature Selection in Distributed Stochastic
+   Learning for Large-Scale Discriminative Training in
+   SMT"
+(Simianer, Riezler, Dyer; ACL 2012)
+
+
+Building
+--------
+Builds when building cdec, see ../BUILDING .
+To build only parts needed for dtrain do
+```
+  autoreconf -ifv
+  ./configure
+  cd training/dtrain/; make
+```
+
+Running
+-------
+See directories under test/ .
+
+Legal
+-----
+Copyright (c) 2012-2013 by Patrick Simianer <p@simianer.de>
+
+See the file LICENSE.txt in the root folder for the licensing terms that this software is
+released under.
+
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
new file mode 100644
index 00000000..149f87d4
--- /dev/null
+++ b/training/dtrain/dtrain.cc
@@ -0,0 +1,553 @@
+#include "dtrain.h"
+
+
+bool
+dtrain_init(int argc, char** argv, po::variables_map* cfg)
+{
+  po::options_description ini("Configuration File Options");
+  ini.add_options()
+    ("input",             po::value<string>()->default_value("-"),                                             "input file (src)")
+    ("refs,r",            po::value<string>(),                                                                       "references")
+    ("output",            po::value<string>()->default_value("-"),                          "output weights file, '-' for STDOUT")
+    ("input_weights",     po::value<string>(),                                "input weights file (e.g. from previous iteration)")
+    ("decoder_config",    po::value<string>(),                                                      "configuration file for cdec")
+    ("print_weights",     po::value<string>(),                                               "weights to print on each iteration")
+    ("stop_after",        po::value<unsigned>()->default_value(0),                                 "stop after X input sentences")
+    ("keep",              po::value<bool>()->zero_tokens(),                               "keep weights files for each iteration")
+    ("epochs",            po::value<unsigned>()->default_value(10),                               "# of iterations T (per shard)")
+    ("k",                 po::value<unsigned>()->default_value(100),                            "how many translations to sample")
+    ("sample_from",       po::value<string>()->default_value("kbest"),     "where to sample translations from: 'kbest', 'forest'")
+    ("filter",            po::value<string>()->default_value("uniq"),                          "filter kbest list: 'not', 'uniq'")
+    ("pair_sampling",     po::value<string>()->default_value("XYX"),                 "how to sample pairs: 'all', 'XYX' or 'PRO'")
+    ("hi_lo",             po::value<float>()->default_value(0.1),                   "hi and lo (X) for XYX (default 0.1), <= 0.5")
+    ("pair_threshold",    po::value<score_t>()->default_value(0.),                         "bleu [0,1] threshold to filter pairs")
+    ("N",                 po::value<unsigned>()->default_value(4),                                          "N for Ngrams (BLEU)")
+    ("scorer",            po::value<string>()->default_value("stupid_bleu"),      "scoring: bleu, stupid_, smooth_, approx_, lc_")
+    ("learning_rate",     po::value<weight_t>()->default_value(1.0),                                              "learning rate")
+    ("gamma",             po::value<weight_t>()->default_value(0.),                            "gamma for SVM (0 for perceptron)")
+    ("select_weights",    po::value<string>()->default_value("last"),     "output best, last, avg weights ('VOID' to throw away)")
+    ("rescale",           po::value<bool>()->zero_tokens(),                              "rescale weight vector after each input")
+    ("l1_reg",            po::value<string>()->default_value("none"), "apply l1 regularization as in 'Tsuroka et al' (2010) UNTESTED")
+    ("l1_reg_strength",   po::value<weight_t>(),                                                     "l1 regularization strength")
+    ("fselect",           po::value<weight_t>()->default_value(-1), "select top x percent (or by threshold) of features after each epoch NOT IMPLEMENTED") // TODO
+    ("approx_bleu_d",     po::value<score_t>()->default_value(0.9),                                   "discount for approx. BLEU")
+    ("scale_bleu_diff",   po::value<bool>()->zero_tokens(),                      "learning rate <- bleu diff of a misranked pair")
+    ("loss_margin",       po::value<weight_t>()->default_value(0.),  "update if no error in pref pair but model scores this near")
+    ("max_pairs",         po::value<unsigned>()->default_value(std::numeric_limits<unsigned>::max()), "max. # of pairs per Sent.")
+    ("noup",              po::value<bool>()->zero_tokens(),                                               "do not update weights");
+  po::options_description cl("Command Line Options");
+  cl.add_options()
+    ("config,c",         po::value<string>(),              "dtrain config file")
+    ("quiet,q",          po::value<bool>()->zero_tokens(),           "be quiet")
+    ("verbose,v",        po::value<bool>()->zero_tokens(),         "be verbose");
+  cl.add(ini);
+  po::store(parse_command_line(argc, argv, cl), *cfg);
+  if (cfg->count("config")) {
+    ifstream ini_f((*cfg)["config"].as<string>().c_str());
+    po::store(po::parse_config_file(ini_f, ini), *cfg);
+  }
+  po::notify(*cfg);
+  if (!cfg->count("decoder_config")) {
+    cerr << cl << endl;
+    return false;
+  }
+  if ((*cfg)["sample_from"].as<string>() != "kbest"
+       && (*cfg)["sample_from"].as<string>() != "forest") {
+    cerr << "Wrong 'sample_from' param: '" << (*cfg)["sample_from"].as<string>() << "', use 'kbest' or 'forest'." << endl;
+    return false;
+  }
+  if ((*cfg)["sample_from"].as<string>() == "kbest" && (*cfg)["filter"].as<string>() != "uniq" &&
+        (*cfg)["filter"].as<string>() != "not") {
+    cerr << "Wrong 'filter' param: '" << (*cfg)["filter"].as<string>() << "', use 'uniq' or 'not'." << endl;
+    return false;
+  }
+  if ((*cfg)["pair_sampling"].as<string>() != "all" && (*cfg)["pair_sampling"].as<string>() != "XYX" &&
+        (*cfg)["pair_sampling"].as<string>() != "PRO") {
+    cerr << "Wrong 'pair_sampling' param: '" << (*cfg)["pair_sampling"].as<string>() << "'." << endl;
+    return false;
+  }
+  if(cfg->count("hi_lo") && (*cfg)["pair_sampling"].as<string>() != "XYX") {
+    cerr << "Warning: hi_lo only works with pair_sampling XYX." << endl;
+  }
+  if((*cfg)["hi_lo"].as<float>() > 0.5 || (*cfg)["hi_lo"].as<float>() < 0.01) {
+    cerr << "hi_lo must lie in [0.01, 0.5]" << endl;
+    return false;
+  }
+  if ((*cfg)["pair_threshold"].as<score_t>() < 0) {
+    cerr << "The threshold must be >= 0!" << endl;
+    return false;
+  }
+  if ((*cfg)["select_weights"].as<string>() != "last" && (*cfg)["select_weights"].as<string>() != "best" &&
+        (*cfg)["select_weights"].as<string>() != "avg" && (*cfg)["select_weights"].as<string>() != "VOID") {
+    cerr << "Wrong 'select_weights' param: '" << (*cfg)["select_weights"].as<string>() << "', use 'last' or 'best'." << endl;
+    return false;
+  }
+  return true;
+}
+
+int
+main(int argc, char** argv)
+{
+  // handle most parameters
+  po::variables_map cfg;
+  if (!dtrain_init(argc, argv, &cfg)) exit(1); // something is wrong
+  bool quiet = false;
+  if (cfg.count("quiet")) quiet = true;
+  bool verbose = false;
+  if (cfg.count("verbose")) verbose = true;
+  bool noup = false;
+  if (cfg.count("noup")) noup = true;
+  bool rescale = false;
+  if (cfg.count("rescale")) rescale = true;
+  bool keep = false;
+  if (cfg.count("keep")) keep = true;
+
+  const unsigned k = cfg["k"].as<unsigned>();
+  const unsigned N = cfg["N"].as<unsigned>();
+  const unsigned T = cfg["epochs"].as<unsigned>();
+  const unsigned stop_after = cfg["stop_after"].as<unsigned>();
+  const string filter_type = cfg["filter"].as<string>();
+  const string sample_from = cfg["sample_from"].as<string>();
+  const string pair_sampling = cfg["pair_sampling"].as<string>();
+  const score_t pair_threshold = cfg["pair_threshold"].as<score_t>();
+  const string select_weights = cfg["select_weights"].as<string>();
+  const float hi_lo = cfg["hi_lo"].as<float>();
+  const score_t approx_bleu_d = cfg["approx_bleu_d"].as<score_t>();
+  const unsigned max_pairs = cfg["max_pairs"].as<unsigned>();
+  weight_t loss_margin = cfg["loss_margin"].as<weight_t>();
+  if (loss_margin > 9998.) loss_margin = std::numeric_limits<float>::max();
+  bool scale_bleu_diff = false;
+  if (cfg.count("scale_bleu_diff")) scale_bleu_diff = true;
+  bool average = false;
+  if (select_weights == "avg")
+    average = true;
+  vector<string> print_weights;
+  if (cfg.count("print_weights"))
+    boost::split(print_weights, cfg["print_weights"].as<string>(), boost::is_any_of(" "));
+
+
+  // setup decoder
+  register_feature_functions();
+  SetSilent(true);
+  ReadFile ini_rf(cfg["decoder_config"].as<string>());
+  if (!quiet)
+    cerr << setw(25) << "cdec cfg " << "'" << cfg["decoder_config"].as<string>() << "'" << endl;
+  Decoder decoder(ini_rf.stream());
+
+  // scoring metric/scorer
+  string scorer_str = cfg["scorer"].as<string>();
+  LocalScorer* scorer;
+  if (scorer_str == "bleu") {
+    scorer = dynamic_cast<BleuScorer*>(new BleuScorer);
+  } else if (scorer_str == "stupid_bleu") {
+    scorer = dynamic_cast<StupidBleuScorer*>(new StupidBleuScorer);
+  } else if (scorer_str == "fixed_stupid_bleu") {
+    scorer = dynamic_cast<FixedStupidBleuScorer*>(new FixedStupidBleuScorer);
+  } else if (scorer_str == "smooth_bleu") {
+    scorer = dynamic_cast<SmoothBleuScorer*>(new SmoothBleuScorer);
+  } else if (scorer_str == "sum_bleu") {
+    scorer = dynamic_cast<SumBleuScorer*>(new SumBleuScorer);
+  } else if (scorer_str == "sumexp_bleu") {
+    scorer = dynamic_cast<SumExpBleuScorer*>(new SumExpBleuScorer);
+  } else if (scorer_str == "sumwhatever_bleu") {
+    scorer = dynamic_cast<SumWhateverBleuScorer*>(new SumWhateverBleuScorer);
+  } else if (scorer_str == "approx_bleu") {
+    scorer = dynamic_cast<ApproxBleuScorer*>(new ApproxBleuScorer(N, approx_bleu_d));
+  } else if (scorer_str == "lc_bleu") {
+    scorer = dynamic_cast<LinearBleuScorer*>(new LinearBleuScorer(N));
+  } else {
+    cerr << "Don't know scoring metric: '" << scorer_str << "', exiting." << endl;
+    exit(1);
+  }
+  vector<score_t> bleu_weights;
+  scorer->Init(N, bleu_weights);
+
+  // setup decoder observer
+  MT19937 rng; // random number generator, only for forest sampling
+  HypSampler* observer;
+  if (sample_from == "kbest")
+    observer = dynamic_cast<KBestGetter*>(new KBestGetter(k, filter_type));
+  else
+    observer = dynamic_cast<KSampler*>(new KSampler(k, &rng));
+  observer->SetScorer(scorer);
+
+  // init weights
+  vector<weight_t>& dense_weights = decoder.CurrentWeightVector();
+  SparseVector<weight_t> lambdas, cumulative_penalties, w_average;
+  if (cfg.count("input_weights")) Weights::InitFromFile(cfg["input_weights"].as<string>(), &dense_weights);
+  Weights::InitSparseVector(dense_weights, &lambdas);
+
+  // meta params for perceptron, SVM
+  weight_t eta = cfg["learning_rate"].as<weight_t>();
+  weight_t gamma = cfg["gamma"].as<weight_t>();
+
+  // faster perceptron: consider only misranked pairs, see
+  // DO NOT ENABLE  WITH SVM (gamma > 0) OR loss_margin!
+  bool faster_perceptron = false;
+  if (gamma==0 && loss_margin==0) faster_perceptron = true;
+
+  // l1 regularization
+  bool l1naive = false;
+  bool l1clip = false;
+  bool l1cumul = false;
+  weight_t l1_reg = 0;
+  if (cfg["l1_reg"].as<string>() != "none") {
+    string s = cfg["l1_reg"].as<string>();
+    if (s == "naive") l1naive = true;
+    else if (s == "clip") l1clip = true;
+    else if (s == "cumul") l1cumul = true;
+    l1_reg = cfg["l1_reg_strength"].as<weight_t>();
+  }
+
+  // output
+  string output_fn = cfg["output"].as<string>();
+  // input
+  string input_fn = cfg["input"].as<string>();
+  ReadFile input(input_fn);
+  // buffer input for t > 0
+  vector<string> src_str_buf;          // source strings (decoder takes only strings)
+  vector<vector<WordID> > ref_ids_buf; // references as WordID vecs
+  string refs_fn = cfg["refs"].as<string>();
+  ReadFile refs(refs_fn);
+
+  unsigned in_sz = std::numeric_limits<unsigned>::max(); // input index, input size
+  vector<pair<score_t, score_t> > all_scores;
+  score_t max_score = 0.;
+  unsigned best_it = 0;
+  float overall_time = 0.;
+
+  // output cfg
+  if (!quiet) {
+    cerr << _p5;
+    cerr << endl << "dtrain" << endl << "Parameters:" << endl;
+    cerr << setw(25) << "k " << k << endl;
+    cerr << setw(25) << "N " << N << endl;
+    cerr << setw(25) << "T " << T << endl;
+    cerr << setw(26) << "scorer '" << scorer_str << "'" << endl;
+    if (scorer_str == "approx_bleu")
+      cerr << setw(25) << "approx. B discount " << approx_bleu_d << endl;
+    cerr << setw(25) << "sample from " << "'" << sample_from << "'" << endl;
+    if (sample_from == "kbest")
+      cerr << setw(25) << "filter " << "'" << filter_type << "'" << endl;
+    if (!scale_bleu_diff) cerr << setw(25) << "learning rate " << eta << endl;
+    else cerr << setw(25) << "learning rate " << "bleu diff" << endl;
+    cerr << setw(25) << "gamma " << gamma << endl;
+    cerr << setw(25) << "loss margin " << loss_margin << endl;
+    cerr << setw(25) << "faster perceptron " << faster_perceptron << endl;
+    cerr << setw(25) << "pairs " << "'" << pair_sampling << "'" << endl;
+    if (pair_sampling == "XYX")
+      cerr << setw(25) << "hi lo " << hi_lo << endl;
+    cerr << setw(25) << "pair threshold " << pair_threshold << endl;
+    cerr << setw(25) << "select weights " << "'" << select_weights << "'" << endl;
+    if (cfg.count("l1_reg"))
+      cerr << setw(25) << "l1 reg " << l1_reg << " '" << cfg["l1_reg"].as<string>() << "'" << endl;
+    if (rescale)
+      cerr << setw(25) << "rescale " << rescale << endl;
+    cerr << setw(25) << "max pairs " << max_pairs << endl;
+    cerr << setw(25) << "cdec cfg " << "'" << cfg["decoder_config"].as<string>() << "'" << endl;
+    cerr << setw(25) << "input " << "'" << input_fn << "'" << endl;
+    cerr << setw(25) << "refs " << "'" << refs_fn << "'" << endl;
+    cerr << setw(25) << "output " << "'" << output_fn << "'" << endl;
+    if (cfg.count("input_weights"))
+      cerr << setw(25) << "weights in " << "'" << cfg["input_weights"].as<string>() << "'" << endl;
+    if (stop_after > 0)
+      cerr << setw(25) << "stop_after " << stop_after << endl;
+    if (!verbose) cerr << "(a dot represents " << DTRAIN_DOTS << " inputs)" << endl;
+  }
+
+
+  for (unsigned t = 0; t < T; t++) // T epochs
+  {
+
+  time_t start, end;
+  time(&start);
+  score_t score_sum = 0.;
+  score_t model_sum(0);
+  unsigned ii = 0, rank_errors = 0, margin_violations = 0, npairs = 0, f_count = 0, list_sz = 0;
+  if (!quiet) cerr << "Iteration #" << t+1 << " of " << T << "." << endl;
+
+  while(true)
+  {
+
+    string in;
+    bool next = false, stop = false; // next iteration or premature stop
+    if (t == 0) {
+      if(!getline(*input, in)) next = true;
+    } else {
+      if (ii == in_sz) next = true; // stop if we reach the end of our input
+    }
+    // stop after X sentences (but still go on for those)
+    if (stop_after > 0 && stop_after == ii && !next) stop = true;
+
+    // produce some pretty output
+    if (!quiet && !verbose) {
+      if (ii == 0) cerr << " ";
+      if ((ii+1) % (DTRAIN_DOTS) == 0) {
+        cerr << ".";
+        cerr.flush();
+      }
+      if ((ii+1) % (20*DTRAIN_DOTS) == 0) {
+        cerr << " " << ii+1 << endl;
+        if (!next && !stop) cerr << " ";
+      }
+      if (stop) {
+        if (ii % (20*DTRAIN_DOTS) != 0) cerr << " " << ii << endl;
+        cerr << "Stopping after " << stop_after << " input sentences." << endl;
+      } else {
+        if (next) {
+          if (ii % (20*DTRAIN_DOTS) != 0) cerr << " " << ii << endl;
+        }
+      }
+    }
+
+    // next iteration
+    if (next || stop) break;
+
+    // weights
+    lambdas.init_vector(&dense_weights);
+
+    // getting input
+    vector<WordID> ref_ids; // reference as vector<WordID>
+    if (t == 0) {
+      string r_;
+      getline(*refs, r_);
+      vector<string> ref_tok;
+      boost::split(ref_tok, r_, boost::is_any_of(" "));
+      register_and_convert(ref_tok, ref_ids);
+      ref_ids_buf.push_back(ref_ids);
+      src_str_buf.push_back(in);
+    } else {
+      ref_ids = ref_ids_buf[ii];
+    }
+    observer->SetRef(ref_ids);
+    if (t == 0)
+      decoder.Decode(in, observer);
+    else
+      decoder.Decode(src_str_buf[ii], observer);
+
+    // get (scored) samples
+    vector<ScoredHyp>* samples = observer->GetSamples();
+
+    if (verbose) {
+      cerr << "--- ref for " << ii << ": ";
+      if (t > 0) printWordIDVec(ref_ids_buf[ii]);
+      else printWordIDVec(ref_ids);
+      cerr << endl;
+      for (unsigned u = 0; u < samples->size(); u++) {
+        cerr << _p2 << _np << "[" << u << ". '";
+        printWordIDVec((*samples)[u].w);
+        cerr << "'" << endl;
+        cerr << "SCORE=" << (*samples)[u].score << ",model="<< (*samples)[u].model << endl;
+        cerr << "F{" << (*samples)[u].f << "} ]" << endl << endl;
+      }
+    }
+
+    score_sum += (*samples)[0].score; // stats for 1best
+    model_sum += (*samples)[0].model;
+
+    f_count += observer->get_f_count();
+    list_sz += observer->get_sz();
+
+    // weight updates
+    if (!noup) {
+      // get pairs
+      vector<pair<ScoredHyp,ScoredHyp> > pairs;
+      if (pair_sampling == "all")
+        all_pairs(samples, pairs, pair_threshold, max_pairs, faster_perceptron);
+      if (pair_sampling == "XYX")
+        partXYX(samples, pairs, pair_threshold, max_pairs, faster_perceptron, hi_lo);
+      if (pair_sampling == "PRO")
+        PROsampling(samples, pairs, pair_threshold, max_pairs);
+      npairs += pairs.size();
+
+      for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin();
+           it != pairs.end(); it++) {
+        bool rank_error;
+        score_t margin;
+        if (faster_perceptron) { // we only have considering misranked pairs
+          rank_error = true; // pair sampling already did this for us
+          margin = std::numeric_limits<float>::max();
+        } else {
+          rank_error = it->first.model <= it->second.model;
+          margin = fabs(fabs(it->first.model) - fabs(it->second.model));
+          if (!rank_error && margin < loss_margin) margin_violations++;
+        }
+        if (rank_error) rank_errors++;
+        if (scale_bleu_diff) eta = it->first.score - it->second.score;
+        if (rank_error || margin < loss_margin) {
+          SparseVector<weight_t> diff_vec = it->first.f - it->second.f;
+          lambdas.plus_eq_v_times_s(diff_vec, eta);
+          if (gamma)
+            lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta*(1./npairs));
+        }
+      }
+
+      // l1 regularization
+      // please note that this penalizes _all_ weights
+      // (contrary to only the ones changed by the last update)
+      // after a _sentence_ (not after each example/pair)
+      if (l1naive) {
+        FastSparseVector<weight_t>::iterator it = lambdas.begin();
+        for (; it != lambdas.end(); ++it) {
+          it->second -= sign(it->second) * l1_reg;
+        }
+      } else if (l1clip) {
+        FastSparseVector<weight_t>::iterator it = lambdas.begin();
+        for (; it != lambdas.end(); ++it) {
+          if (it->second != 0) {
+            weight_t v = it->second;
+            if (v > 0) {
+              it->second = max(0., v - l1_reg);
+            } else {
+              it->second = min(0., v + l1_reg);
+            }
+          }
+        }
+      } else if (l1cumul) {
+        weight_t acc_penalty = (ii+1) * l1_reg; // ii is the index of the current input
+        FastSparseVector<weight_t>::iterator it = lambdas.begin();
+        for (; it != lambdas.end(); ++it) {
+          if (it->second != 0) {
+            weight_t v = it->second;
+            weight_t penalized = 0.;
+            if (v > 0) {
+              penalized = max(0., v-(acc_penalty + cumulative_penalties.get(it->first)));
+            } else {
+              penalized = min(0., v+(acc_penalty - cumulative_penalties.get(it->first)));
+            }
+            it->second = penalized;
+            cumulative_penalties.set_value(it->first, cumulative_penalties.get(it->first)+penalized);
+          }
+        }
+      }
+
+    }
+
+    if (rescale) lambdas /= lambdas.l2norm();
+
+    ++ii;
+
+  } // input loop
+
+  if (average) w_average += lambdas;
+
+  if (scorer_str == "approx_bleu" || scorer_str == "lc_bleu") scorer->Reset();
+
+  if (t == 0) {
+    in_sz = ii; // remember size of input (# lines)
+  }
+
+  // print some stats
+  score_t score_avg = score_sum/(score_t)in_sz;
+  score_t model_avg = model_sum/(score_t)in_sz;
+  score_t score_diff, model_diff;
+  if (t > 0) {
+    score_diff = score_avg - all_scores[t-1].first;
+    model_diff = model_avg - all_scores[t-1].second;
+  } else {
+    score_diff = score_avg;
+    model_diff = model_avg;
+  }
+
+  unsigned nonz = 0;
+  if (!quiet) nonz = (unsigned)lambdas.num_nonzero();
+
+  if (!quiet) {
+    cerr << _p5 << _p << "WEIGHTS" << endl;
+    for (vector<string>::iterator it = print_weights.begin(); it != print_weights.end(); it++) {
+      cerr << setw(18) << *it << " = " << lambdas.get(FD::Convert(*it)) << endl;
+    }
+    cerr << "        ---" << endl;
+    cerr << _np << "       1best avg score: " << score_avg;
+    cerr << _p << " (" << score_diff << ")" << endl;
+    cerr << _np << " 1best avg model score: " << model_avg;
+    cerr << _p << " (" << model_diff << ")" << endl;
+    cerr << "           avg # pairs: ";
+    cerr << _np << npairs/(float)in_sz;
+    if (faster_perceptron) cerr << " (meaningless)";
+    cerr << endl;
+    cerr << "        avg # rank err: ";
+    cerr << rank_errors/(float)in_sz << endl;
+    cerr << "     avg # margin viol: ";
+    cerr << margin_violations/(float)in_sz << endl;
+    cerr << "    non0 feature count: " <<  nonz << endl;
+    cerr << "           avg list sz: " << list_sz/(float)in_sz << endl;
+    cerr << "           avg f count: " << f_count/(float)list_sz << endl;
+  }
+
+  pair<score_t,score_t> remember;
+  remember.first = score_avg;
+  remember.second = model_avg;
+  all_scores.push_back(remember);
+  if (score_avg > max_score) {
+    max_score = score_avg;
+    best_it = t;
+  }
+  time (&end);
+  float time_diff = difftime(end, start);
+  overall_time += time_diff;
+  if (!quiet) {
+    cerr << _p2 << _np << "(time " << time_diff/60. << " min, ";
+    cerr << time_diff/in_sz << " s/S)" << endl;
+  }
+  if (t+1 != T && !quiet) cerr << endl;
+
+  if (noup) break;
+
+  // write weights to file
+  if (select_weights == "best" || keep) {
+    lambdas.init_vector(&dense_weights);
+    string w_fn = "weights." + boost::lexical_cast<string>(t) + ".gz";
+    Weights::WriteToFile(w_fn, dense_weights, true);
+  }
+
+  } // outer loop
+
+  if (average) w_average /= (weight_t)T;
+
+  if (!noup) {
+    if (!quiet) cerr << endl << "Writing weights file to '" << output_fn << "' ..." << endl;
+    if (select_weights == "last" || average) { // last, average
+      WriteFile of(output_fn); // works with '-'
+      ostream& o = *of.stream();
+      o.precision(17);
+      o << _np;
+      if (average) {
+        for (SparseVector<weight_t>::iterator it = w_average.begin(); it != w_average.end(); ++it) {
+	      if (it->second == 0) continue;
+          o << FD::Convert(it->first) << '\t' << it->second << endl;
+        }
+      } else {
+        for (SparseVector<weight_t>::iterator it = lambdas.begin(); it != lambdas.end(); ++it) {
+	      if (it->second == 0) continue;
+          o << FD::Convert(it->first) << '\t' << it->second << endl;
+        }
+      }
+    } else if (select_weights == "VOID") { // do nothing with the weights
+    } else { // best
+      if (output_fn != "-") {
+        CopyFile("weights."+boost::lexical_cast<string>(best_it)+".gz", output_fn);
+      } else {
+        ReadFile bestw("weights."+boost::lexical_cast<string>(best_it)+".gz");
+        string o;
+        cout.precision(17);
+        cout << _np;
+        while(getline(*bestw, o)) cout << o << endl;
+      }
+      if (!keep) {
+        for (unsigned i = 0; i < T; i++) {
+          string s = "weights." + boost::lexical_cast<string>(i) + ".gz";
+          unlink(s.c_str());
+        }
+      }
+    }
+    if (!quiet) cerr << "done" << endl;
+  }
+
+  if (!quiet) {
+    cerr << _p5 << _np << endl << "---" << endl << "Best iteration: ";
+    cerr << best_it+1 << " [SCORE '" << scorer_str << "'=" << max_score << "]." << endl;
+    cerr << "This took " << overall_time/60. << " min." << endl;
+  }
+}
+
diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h
new file mode 100644
index 00000000..eb0b9f17
--- /dev/null
+++ b/training/dtrain/dtrain.h
@@ -0,0 +1,92 @@
+#ifndef _DTRAIN_H_
+#define _DTRAIN_H_
+
+#define DTRAIN_DOTS 10 // after how many inputs to display a '.'
+#define DTRAIN_SCALE 100000
+
+#include <iomanip>
+#include <climits>
+#include <string.h>
+
+#include <boost/algorithm/string.hpp>
+#include <boost/program_options.hpp>
+
+#include "ksampler.h"
+#include "pairsampling.h"
+
+#include "filelib.h"
+
+
+using namespace std;
+using namespace dtrain;
+namespace po = boost::program_options;
+
+inline void register_and_convert(const vector<string>& strs, vector<WordID>& ids)
+{
+  vector<string>::const_iterator it;
+  for (it = strs.begin(); it < strs.end(); it++)
+    ids.push_back(TD::Convert(*it));
+}
+
+inline string gettmpf(const string path, const string infix)
+{
+  char fn[path.size() + infix.size() + 8];
+  strcpy(fn, path.c_str());
+  strcat(fn, "/");
+  strcat(fn, infix.c_str());
+  strcat(fn, "-XXXXXX");
+  if (!mkstemp(fn)) {
+    cerr << "Cannot make temp file in" << path << " , exiting." << endl;
+    exit(1);
+  }
+  return string(fn);
+}
+
+inline void split_in(string& s, vector<string>& parts)
+{
+  unsigned f = 0;
+  for(unsigned i = 0; i < 3; i++) {
+    unsigned e = f;
+    f = s.find("\t", f+1);
+    if (e != 0) parts.push_back(s.substr(e+1, f-e-1));
+    else parts.push_back(s.substr(0, f));
+  }
+  s.erase(0, f+1);
+}
+
+struct HSReporter
+{
+  string task_id_;
+
+  HSReporter(string task_id) : task_id_(task_id) {}
+
+  inline void update_counter(string name, unsigned amount) {
+    cerr << "reporter:counter:" << task_id_ << "," << name << "," << amount << endl;
+  }
+  inline void update_gcounter(string name, unsigned amount) {
+    cerr << "reporter:counter:Global," << name << "," << amount << endl;
+  }
+};
+
+inline ostream& _np(ostream& out) { return out << resetiosflags(ios::showpos); }
+inline ostream& _p(ostream& out)  { return out << setiosflags(ios::showpos); }
+inline ostream& _p2(ostream& out) { return out << setprecision(2); }
+inline ostream& _p5(ostream& out) { return out << setprecision(5); }
+
+inline void printWordIDVec(vector<WordID>& v)
+{
+  for (unsigned i = 0; i < v.size(); i++) {
+    cerr << TD::Convert(v[i]);
+    if (i < v.size()-1) cerr << " ";
+  }
+}
+
+template<typename T>
+inline T sign(T z)
+{
+  if (z == 0) return 0;
+  return z < 0 ? -1 : +1;
+}
+
+#endif
+
diff --git a/training/dtrain/examples/parallelized/README b/training/dtrain/examples/parallelized/README
new file mode 100644
index 00000000..89715105
--- /dev/null
+++ b/training/dtrain/examples/parallelized/README
@@ -0,0 +1,5 @@
+run for example
+  ../../parallelize.rb ./dtrain.ini 4 false 2 2 ./in ./refs
+
+final weights will be in the file work/weights.3
+
diff --git a/training/dtrain/examples/parallelized/cdec.ini b/training/dtrain/examples/parallelized/cdec.ini
new file mode 100644
index 00000000..e43ba1c4
--- /dev/null
+++ b/training/dtrain/examples/parallelized/cdec.ini
@@ -0,0 +1,22 @@
+formalism=scfg
+add_pass_through_rules=true
+intersection_strategy=cube_pruning
+cubepruning_pop_limit=200
+scfg_max_span_limit=15
+feature_function=WordPenalty
+feature_function=KLanguageModel ../example/nc-wmt11.en.srilm.gz
+#feature_function=ArityPenalty
+#feature_function=CMR2008ReorderingFeatures
+#feature_function=Dwarf
+#feature_function=InputIndicator
+#feature_function=LexNullJump
+#feature_function=NewJump
+#feature_function=NgramFeatures
+#feature_function=NonLatinCount
+#feature_function=OutputIndicator
+#feature_function=RuleIdentityFeatures
+#feature_function=RuleNgramFeatures
+#feature_function=RuleShape
+#feature_function=SourceSpanSizeFeatures
+#feature_function=SourceWordPenalty
+#feature_function=SpanFeatures
diff --git a/training/dtrain/examples/parallelized/dtrain.ini b/training/dtrain/examples/parallelized/dtrain.ini
new file mode 100644
index 00000000..f19ef891
--- /dev/null
+++ b/training/dtrain/examples/parallelized/dtrain.ini
@@ -0,0 +1,16 @@
+k=100
+N=4
+learning_rate=0.0001
+gamma=0
+loss_margin=1.0
+epochs=1
+scorer=stupid_bleu
+sample_from=kbest
+filter=uniq
+pair_sampling=XYX
+hi_lo=0.1
+select_weights=last
+print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
+# newer version of the grammar extractor use different feature names: 
+#print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
+decoder_config=cdec.ini
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz
new file mode 100644
index 00000000..1e28a24b
--- /dev/null
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz
new file mode 100644
index 00000000..372f5675
--- /dev/null
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz
new file mode 100644
index 00000000..145d0dc0
--- /dev/null
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz
new file mode 100644
index 00000000..105593ff
--- /dev/null
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz
new file mode 100644
index 00000000..30781f48
--- /dev/null
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz
new file mode 100644
index 00000000..834ee759
--- /dev/null
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz
new file mode 100644
index 00000000..2e76f348
--- /dev/null
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz
new file mode 100644
index 00000000..3741a887
--- /dev/null
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz
new file mode 100644
index 00000000..ebf6bd0c
--- /dev/null
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz
new file mode 100644
index 00000000..c1791059
--- /dev/null
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz
diff --git a/training/dtrain/examples/parallelized/in b/training/dtrain/examples/parallelized/in
new file mode 100644
index 00000000..51d01fe7
--- /dev/null
+++ b/training/dtrain/examples/parallelized/in
@@ -0,0 +1,10 @@
+<seg grammar="grammar/grammar.out.0.gz" id="0">europas nach rassen geteiltes haus</seg>
+<seg grammar="grammar/grammar.out.1.gz" id="1">ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .</seg>
+<seg grammar="grammar/grammar.out.2.gz" id="2">der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .</seg>
+<seg grammar="grammar/grammar.out.3.gz" id="3">während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .</seg>
+<seg grammar="grammar/grammar.out.4.gz" id="4">eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .</seg>
+<seg grammar="grammar/grammar.out.5.gz" id="5">die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .</seg>
+<seg grammar="grammar/grammar.out.6.gz" id="6">das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .</seg>
+<seg grammar="grammar/grammar.out.7.gz" id="7">die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .</seg>
+<seg grammar="grammar/grammar.out.8.gz" id="8">der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .</seg>
+<seg grammar="grammar/grammar.out.9.gz" id="9">genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .</seg>
diff --git a/training/dtrain/examples/parallelized/refs b/training/dtrain/examples/parallelized/refs
new file mode 100644
index 00000000..632e27b0
--- /dev/null
+++ b/training/dtrain/examples/parallelized/refs
@@ -0,0 +1,10 @@
+europe 's divided racial house
+a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge .
+the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them .
+while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon .
+an aging population at home and ever more open borders imply increasing racial fragmentation in european countries .
+mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear .
+it will not , as america 's racial history clearly shows .
+race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes .
+the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths .
+this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us .
diff --git a/training/dtrain/examples/parallelized/work/out.0.0 b/training/dtrain/examples/parallelized/work/out.0.0
new file mode 100644
index 00000000..7a00ed0f
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/out.0.0
@@ -0,0 +1,61 @@
+                cdec cfg 'cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ../example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+Seeding random number sequence to 3121929377
+
+dtrain
+Parameters:
+                       k 100
+                       N 4
+                       T 1
+                  scorer 'stupid_bleu'
+             sample from 'kbest'
+                  filter 'uniq'
+           learning rate 0.0001
+                   gamma 0
+             loss margin 1
+                   pairs 'XYX'
+                   hi lo 0.1
+          pair threshold 0
+          select weights 'last'
+                  l1 reg 0 'none'
+               max pairs 4294967295
+                cdec cfg 'cdec.ini'
+                   input 'work/shard.0.0.in'
+                    refs 'work/shard.0.0.refs'
+                  output 'work/weights.0.0'
+(a dot represents 10 inputs)
+Iteration #1 of 1.
+  5
+WEIGHTS
+              Glue = +0.2663
+       WordPenalty = -0.0079042
+     LanguageModel = +0.44782
+ LanguageModel_OOV = -0.0401
+     PhraseModel_0 = -0.193
+     PhraseModel_1 = +0.71321
+     PhraseModel_2 = +0.85196
+     PhraseModel_3 = -0.43986
+     PhraseModel_4 = -0.44803
+     PhraseModel_5 = -0.0538
+     PhraseModel_6 = -0.1788
+       PassThrough = -0.1477
+        ---
+       1best avg score: 0.17521 (+0.17521)
+ 1best avg model score: 21.556 (+21.556)
+           avg # pairs: 1671.2
+        avg # rank err: 1118.6
+     avg # margin viol: 552.6
+    non0 feature count: 12
+           avg list sz: 100
+           avg f count: 11.32
+(time 0.37 min, 4.4 s/S)
+
+Writing weights file to 'work/weights.0.0' ...
+done
+
+---
+Best iteration: 1 [SCORE 'stupid_bleu'=0.17521].
+This took 0.36667 min.
diff --git a/training/dtrain/examples/parallelized/work/out.0.1 b/training/dtrain/examples/parallelized/work/out.0.1
new file mode 100644
index 00000000..e2bd6649
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/out.0.1
@@ -0,0 +1,62 @@
+                cdec cfg 'cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ../example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+Seeding random number sequence to 2767202922
+
+dtrain
+Parameters:
+                       k 100
+                       N 4
+                       T 1
+                  scorer 'stupid_bleu'
+             sample from 'kbest'
+                  filter 'uniq'
+           learning rate 0.0001
+                   gamma 0
+             loss margin 1
+                   pairs 'XYX'
+                   hi lo 0.1
+          pair threshold 0
+          select weights 'last'
+                  l1 reg 0 'none'
+               max pairs 4294967295
+                cdec cfg 'cdec.ini'
+                   input 'work/shard.0.0.in'
+                    refs 'work/shard.0.0.refs'
+                  output 'work/weights.0.1'
+              weights in 'work/weights.0'
+(a dot represents 10 inputs)
+Iteration #1 of 1.
+  5
+WEIGHTS
+              Glue = -0.2699
+       WordPenalty = +0.080605
+     LanguageModel = -0.026572
+ LanguageModel_OOV = -0.30025
+     PhraseModel_0 = -0.32076
+     PhraseModel_1 = +0.67451
+     PhraseModel_2 = +0.92
+     PhraseModel_3 = -0.36402
+     PhraseModel_4 = -0.592
+     PhraseModel_5 = -0.0269
+     PhraseModel_6 = -0.28755
+       PassThrough = -0.33285
+        ---
+       1best avg score: 0.26638 (+0.26638)
+ 1best avg model score: 53.197 (+53.197)
+           avg # pairs: 2028.6
+        avg # rank err: 998.2
+     avg # margin viol: 918.8
+    non0 feature count: 12
+           avg list sz: 100
+           avg f count: 10.496
+(time 0.32 min, 3.8 s/S)
+
+Writing weights file to 'work/weights.0.1' ...
+done
+
+---
+Best iteration: 1 [SCORE 'stupid_bleu'=0.26638].
+This took 0.31667 min.
diff --git a/training/dtrain/examples/parallelized/work/out.1.0 b/training/dtrain/examples/parallelized/work/out.1.0
new file mode 100644
index 00000000..6e790e38
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/out.1.0
@@ -0,0 +1,61 @@
+                cdec cfg 'cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ../example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+Seeding random number sequence to 1432415010
+
+dtrain
+Parameters:
+                       k 100
+                       N 4
+                       T 1
+                  scorer 'stupid_bleu'
+             sample from 'kbest'
+                  filter 'uniq'
+           learning rate 0.0001
+                   gamma 0
+             loss margin 1
+                   pairs 'XYX'
+                   hi lo 0.1
+          pair threshold 0
+          select weights 'last'
+                  l1 reg 0 'none'
+               max pairs 4294967295
+                cdec cfg 'cdec.ini'
+                   input 'work/shard.1.0.in'
+                    refs 'work/shard.1.0.refs'
+                  output 'work/weights.1.0'
+(a dot represents 10 inputs)
+Iteration #1 of 1.
+  5
+WEIGHTS
+              Glue = -0.3815
+       WordPenalty = +0.20064
+     LanguageModel = +0.95304
+ LanguageModel_OOV = -0.264
+     PhraseModel_0 = -0.22362
+     PhraseModel_1 = +0.12254
+     PhraseModel_2 = +0.26328
+     PhraseModel_3 = +0.38018
+     PhraseModel_4 = -0.48654
+     PhraseModel_5 = +0
+     PhraseModel_6 = -0.3645
+       PassThrough = -0.2216
+        ---
+       1best avg score: 0.10863 (+0.10863)
+ 1best avg model score: -4.9841 (-4.9841)
+           avg # pairs: 1345.4
+        avg # rank err: 822.4
+     avg # margin viol: 501
+    non0 feature count: 11
+           avg list sz: 100
+           avg f count: 11.814
+(time 0.45 min, 5.4 s/S)
+
+Writing weights file to 'work/weights.1.0' ...
+done
+
+---
+Best iteration: 1 [SCORE 'stupid_bleu'=0.10863].
+This took 0.45 min.
diff --git a/training/dtrain/examples/parallelized/work/out.1.1 b/training/dtrain/examples/parallelized/work/out.1.1
new file mode 100644
index 00000000..0b984761
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/out.1.1
@@ -0,0 +1,62 @@
+                cdec cfg 'cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ../example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+Seeding random number sequence to 1771918374
+
+dtrain
+Parameters:
+                       k 100
+                       N 4
+                       T 1
+                  scorer 'stupid_bleu'
+             sample from 'kbest'
+                  filter 'uniq'
+           learning rate 0.0001
+                   gamma 0
+             loss margin 1
+                   pairs 'XYX'
+                   hi lo 0.1
+          pair threshold 0
+          select weights 'last'
+                  l1 reg 0 'none'
+               max pairs 4294967295
+                cdec cfg 'cdec.ini'
+                   input 'work/shard.1.0.in'
+                    refs 'work/shard.1.0.refs'
+                  output 'work/weights.1.1'
+              weights in 'work/weights.0'
+(a dot represents 10 inputs)
+Iteration #1 of 1.
+  5
+WEIGHTS
+              Glue = -0.3178
+       WordPenalty = +0.11092
+     LanguageModel = +0.17269
+ LanguageModel_OOV = -0.13485
+     PhraseModel_0 = -0.45371
+     PhraseModel_1 = +0.38789
+     PhraseModel_2 = +0.75311
+     PhraseModel_3 = -0.38163
+     PhraseModel_4 = -0.58817
+     PhraseModel_5 = -0.0269
+     PhraseModel_6 = -0.27315
+       PassThrough = -0.16745
+        ---
+       1best avg score: 0.13169 (+0.13169)
+ 1best avg model score: 24.226 (+24.226)
+           avg # pairs: 1951.2
+        avg # rank err: 985.4
+     avg # margin viol: 951
+    non0 feature count: 12
+           avg list sz: 100
+           avg f count: 11.224
+(time 0.42 min, 5 s/S)
+
+Writing weights file to 'work/weights.1.1' ...
+done
+
+---
+Best iteration: 1 [SCORE 'stupid_bleu'=0.13169].
+This took 0.41667 min.
diff --git a/training/dtrain/examples/parallelized/work/shard.0.0.in b/training/dtrain/examples/parallelized/work/shard.0.0.in
new file mode 100644
index 00000000..92f9c78e
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/shard.0.0.in
@@ -0,0 +1,5 @@
+<seg grammar="grammar/grammar.out.0.gz" id="0">europas nach rassen geteiltes haus</seg>
+<seg grammar="grammar/grammar.out.1.gz" id="1">ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .</seg>
+<seg grammar="grammar/grammar.out.2.gz" id="2">der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .</seg>
+<seg grammar="grammar/grammar.out.3.gz" id="3">während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .</seg>
+<seg grammar="grammar/grammar.out.4.gz" id="4">eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .</seg>
diff --git a/training/dtrain/examples/parallelized/work/shard.0.0.refs b/training/dtrain/examples/parallelized/work/shard.0.0.refs
new file mode 100644
index 00000000..bef68fee
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/shard.0.0.refs
@@ -0,0 +1,5 @@
+europe 's divided racial house
+a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge .
+the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them .
+while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon .
+an aging population at home and ever more open borders imply increasing racial fragmentation in european countries .
diff --git a/training/dtrain/examples/parallelized/work/shard.1.0.in b/training/dtrain/examples/parallelized/work/shard.1.0.in
new file mode 100644
index 00000000..b7695ce7
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/shard.1.0.in
@@ -0,0 +1,5 @@
+<seg grammar="grammar/grammar.out.5.gz" id="5">die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .</seg>
+<seg grammar="grammar/grammar.out.6.gz" id="6">das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .</seg>
+<seg grammar="grammar/grammar.out.7.gz" id="7">die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .</seg>
+<seg grammar="grammar/grammar.out.8.gz" id="8">der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .</seg>
+<seg grammar="grammar/grammar.out.9.gz" id="9">genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .</seg>
diff --git a/training/dtrain/examples/parallelized/work/shard.1.0.refs b/training/dtrain/examples/parallelized/work/shard.1.0.refs
new file mode 100644
index 00000000..6076f6d5
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/shard.1.0.refs
@@ -0,0 +1,5 @@
+mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear .
+it will not , as america 's racial history clearly shows .
+race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes .
+the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths .
+this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us .
diff --git a/training/dtrain/examples/parallelized/work/weights.0 b/training/dtrain/examples/parallelized/work/weights.0
new file mode 100644
index 00000000..ddd595a8
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.0
@@ -0,0 +1,12 @@
+LanguageModel	0.7004298992212881
+PhraseModel_2	0.5576194336478857
+PhraseModel_1	0.41787318415343155
+PhraseModel_4	-0.46728502545635164
+PhraseModel_3	-0.029839521598455515
+Glue	-0.05760000000000068
+PhraseModel_6	-0.2716499999999978
+PhraseModel_0	-0.20831031065605327
+LanguageModel_OOV	-0.15205000000000077
+PassThrough	-0.1846500000000006
+WordPenalty	0.09636994553433414
+PhraseModel_5	-0.026900000000000257
diff --git a/training/dtrain/examples/parallelized/work/weights.0.0 b/training/dtrain/examples/parallelized/work/weights.0.0
new file mode 100644
index 00000000..c9370b18
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.0.0
@@ -0,0 +1,12 @@
+WordPenalty	-0.0079041595706392243
+LanguageModel	0.44781580828279532
+LanguageModel_OOV	-0.04010000000000042
+Glue	0.26629999999999948
+PhraseModel_0	-0.19299677809125185
+PhraseModel_1	0.71321026861732773
+PhraseModel_2	0.85195540993310537
+PhraseModel_3	-0.43986310822842656
+PhraseModel_4	-0.44802855630415955
+PhraseModel_5	-0.053800000000000514
+PhraseModel_6	-0.17879999999999835
+PassThrough	-0.14770000000000036
diff --git a/training/dtrain/examples/parallelized/work/weights.0.1 b/training/dtrain/examples/parallelized/work/weights.0.1
new file mode 100644
index 00000000..8fad3de8
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.0.1
@@ -0,0 +1,12 @@
+WordPenalty	0.080605055841244472
+LanguageModel	-0.026571720531022844
+LanguageModel_OOV	-0.30024999999999141
+Glue	-0.26989999999999842
+PhraseModel_2	0.92000295209089566
+PhraseModel_1	0.67450748692470841
+PhraseModel_4	-0.5920000014976784
+PhraseModel_3	-0.36402437203127397
+PhraseModel_6	-0.28754999999999603
+PhraseModel_0	-0.32076244202907672
+PassThrough	-0.33284999999999004
+PhraseModel_5	-0.026900000000000257
diff --git a/training/dtrain/examples/parallelized/work/weights.1 b/training/dtrain/examples/parallelized/work/weights.1
new file mode 100644
index 00000000..03058a16
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.1
@@ -0,0 +1,12 @@
+PhraseModel_2	0.8365578543552836
+PhraseModel_4	-0.5900840266009169
+PhraseModel_1	0.5312000609786991
+PhraseModel_0	-0.3872342271319619
+PhraseModel_3	-0.3728279676912084
+Glue	-0.2938500000000036
+PhraseModel_6	-0.2803499999999967
+PassThrough	-0.25014999999999626
+LanguageModel_OOV	-0.21754999999999702
+LanguageModel	0.07306061161169894
+WordPenalty	0.09576193325966899
+PhraseModel_5	-0.026900000000000257
diff --git a/training/dtrain/examples/parallelized/work/weights.1.0 b/training/dtrain/examples/parallelized/work/weights.1.0
new file mode 100644
index 00000000..6a6a65c1
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.1.0
@@ -0,0 +1,11 @@
+WordPenalty	0.20064405063930751
+LanguageModel	0.9530439901597807
+LanguageModel_OOV	-0.26400000000000112
+Glue	-0.38150000000000084
+PhraseModel_0	-0.22362384322085468
+PhraseModel_1	0.12253609968953538
+PhraseModel_2	0.26328345736266612
+PhraseModel_3	0.38018406503151553
+PhraseModel_4	-0.48654149460854373
+PhraseModel_6	-0.36449999999999722
+PassThrough	-0.22160000000000085
diff --git a/training/dtrain/examples/parallelized/work/weights.1.1 b/training/dtrain/examples/parallelized/work/weights.1.1
new file mode 100644
index 00000000..f56ea4a2
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.1.1
@@ -0,0 +1,12 @@
+WordPenalty	0.1109188106780935
+LanguageModel	0.17269294375442074
+LanguageModel_OOV	-0.13485000000000266
+Glue	-0.3178000000000088
+PhraseModel_2	0.75311275661967159
+PhraseModel_1	0.38789263503268989
+PhraseModel_4	-0.58816805170415531
+PhraseModel_3	-0.38163156335114284
+PhraseModel_6	-0.27314999999999739
+PhraseModel_0	-0.45370601223484697
+PassThrough	-0.16745000000000249
+PhraseModel_5	-0.026900000000000257
diff --git a/training/dtrain/examples/standard/README b/training/dtrain/examples/standard/README
new file mode 100644
index 00000000..ce37d31a
--- /dev/null
+++ b/training/dtrain/examples/standard/README
@@ -0,0 +1,2 @@
+Call `dtrain` from this folder with ../../dtrain -c dtrain.ini .
+
diff --git a/training/dtrain/examples/standard/cdec.ini b/training/dtrain/examples/standard/cdec.ini
new file mode 100644
index 00000000..e1edc68d
--- /dev/null
+++ b/training/dtrain/examples/standard/cdec.ini
@@ -0,0 +1,26 @@
+formalism=scfg
+add_pass_through_rules=true
+scfg_max_span_limit=15
+intersection_strategy=cube_pruning
+cubepruning_pop_limit=200
+grammar=nc-wmt11.grammar.gz
+feature_function=WordPenalty
+feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz
+# all currently working feature functions for translation:
+# (with those features active that were used in the ACL paper)
+#feature_function=ArityPenalty
+#feature_function=CMR2008ReorderingFeatures
+#feature_function=Dwarf
+#feature_function=InputIndicator
+#feature_function=LexNullJump
+#feature_function=NewJump
+#feature_function=NgramFeatures
+#feature_function=NonLatinCount
+#feature_function=OutputIndicator
+feature_function=RuleIdentityFeatures
+feature_function=RuleSourceBigramFeatures
+feature_function=RuleTargetBigramFeatures
+feature_function=RuleShape
+#feature_function=SourceSpanSizeFeatures
+#feature_function=SourceWordPenalty
+#feature_function=SpanFeatures
diff --git a/training/dtrain/examples/standard/dtrain.ini b/training/dtrain/examples/standard/dtrain.ini
new file mode 100644
index 00000000..e1072d30
--- /dev/null
+++ b/training/dtrain/examples/standard/dtrain.ini
@@ -0,0 +1,24 @@
+input=./nc-wmt11.de.gz
+refs=./nc-wmt11.en.gz
+output=-                  # a weights file (add .gz for gzip compression) or STDOUT '-'
+select_weights=VOID       # output average (over epochs) weight vector
+decoder_config=./cdec.ini # config for cdec
+# weights for these features will be printed on each iteration
+print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
+# newer version of the grammar extractor use different feature names: 
+#print_weights= EgivenFCoherent SampleCountF CountEF MaxLexFgivenE MaxLexEgivenF IsSingletonF IsSingletonFE Glue WordPenalty PassThrough LanguageModel LanguageModel_OOV
+stop_after=10 # stop epoch after 10 inputs
+
+# interesting stuff
+epochs=2                # run over input 2 times
+k=100                   # use 100best lists
+N=4                     # optimize (approx) BLEU4
+scorer=stupid_bleu      # use 'stupid' BLEU+1
+learning_rate=1.0       # learning rate, don't care if gamma=0 (perceptron)
+gamma=0                 # use SVM reg
+sample_from=kbest       # use kbest lists (as opposed to forest)
+filter=uniq             # only unique entries in kbest (surface form)
+pair_sampling=XYX       #
+hi_lo=0.1               # 10 vs 80 vs 10 and 80 vs 10 here
+pair_threshold=0        # minimum distance in BLEU (here: > 0)
+loss_margin=0           # update if correctly ranked, but within this margin
diff --git a/training/dtrain/examples/standard/expected-output b/training/dtrain/examples/standard/expected-output
new file mode 100644
index 00000000..7cd09dbf
--- /dev/null
+++ b/training/dtrain/examples/standard/expected-output
@@ -0,0 +1,91 @@
+                cdec cfg './cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ./nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+  Example feature: Shape_S00000_T00000
+Seeding random number sequence to 2679584485
+
+dtrain
+Parameters:
+                       k 100
+                       N 4
+                       T 2
+                  scorer 'stupid_bleu'
+             sample from 'kbest'
+                  filter 'uniq'
+           learning rate 1
+                   gamma 0
+             loss margin 0
+       faster perceptron 1
+                   pairs 'XYX'
+                   hi lo 0.1
+          pair threshold 0
+          select weights 'VOID'
+                  l1 reg 0 'none'
+               max pairs 4294967295
+                cdec cfg './cdec.ini'
+                   input './nc-wmt11.de.gz'
+                    refs './nc-wmt11.en.gz'
+                  output '-'
+              stop_after 10
+(a dot represents 10 inputs)
+Iteration #1 of 2.
+ . 10
+Stopping after 10 input sentences.
+WEIGHTS
+              Glue = -576
+       WordPenalty = +417.79
+     LanguageModel = +5117.5
+ LanguageModel_OOV = -1307
+     PhraseModel_0 = -1612
+     PhraseModel_1 = -2159.6
+     PhraseModel_2 = -677.36
+     PhraseModel_3 = +2663.8
+     PhraseModel_4 = -1025.9
+     PhraseModel_5 = -8
+     PhraseModel_6 = +70
+       PassThrough = -1455
+        ---
+       1best avg score: 0.27697 (+0.27697)
+ 1best avg model score: -47918 (-47918)
+           avg # pairs: 581.9 (meaningless)
+        avg # rank err: 581.9
+     avg # margin viol: 0
+    non0 feature count: 703
+           avg list sz: 90.9
+           avg f count: 100.09
+(time 0.25 min, 1.5 s/S)
+
+Iteration #2 of 2.
+ . 10
+WEIGHTS
+              Glue = -622
+       WordPenalty = +898.56
+     LanguageModel = +8066.2
+ LanguageModel_OOV = -2590
+     PhraseModel_0 = -4335.8
+     PhraseModel_1 = -5864.4
+     PhraseModel_2 = -1729.8
+     PhraseModel_3 = +2831.9
+     PhraseModel_4 = -5384.8
+     PhraseModel_5 = +1449
+     PhraseModel_6 = +480
+       PassThrough = -2578
+        ---
+       1best avg score: 0.37119 (+0.094226)
+ 1best avg model score: -1.3174e+05 (-83822)
+           avg # pairs: 584.1 (meaningless)
+        avg # rank err: 584.1
+     avg # margin viol: 0
+    non0 feature count: 1115
+           avg list sz: 91.3
+           avg f count: 90.755
+(time 0.3 min, 1.8 s/S)
+
+Writing weights file to '-' ...
+done
+
+---
+Best iteration: 2 [SCORE 'stupid_bleu'=0.37119].
+This took 0.55 min.
diff --git a/training/dtrain/examples/standard/nc-wmt11.de.gz b/training/dtrain/examples/standard/nc-wmt11.de.gz
new file mode 100644
index 00000000..0741fd92
--- /dev/null
+++ b/training/dtrain/examples/standard/nc-wmt11.de.gz
diff --git a/training/dtrain/examples/standard/nc-wmt11.en.gz b/training/dtrain/examples/standard/nc-wmt11.en.gz
new file mode 100644
index 00000000..1c0bd401
--- /dev/null
+++ b/training/dtrain/examples/standard/nc-wmt11.en.gz
diff --git a/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz b/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz
new file mode 100644
index 00000000..7ce81057
--- /dev/null
+++ b/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz
diff --git a/training/dtrain/examples/standard/nc-wmt11.grammar.gz b/training/dtrain/examples/standard/nc-wmt11.grammar.gz
new file mode 100644
index 00000000..ce4024a1
--- /dev/null
+++ b/training/dtrain/examples/standard/nc-wmt11.grammar.gz
diff --git a/training/dtrain/examples/toy/cdec.ini b/training/dtrain/examples/toy/cdec.ini
new file mode 100644
index 00000000..b14f4819
--- /dev/null
+++ b/training/dtrain/examples/toy/cdec.ini
@@ -0,0 +1,3 @@
+formalism=scfg
+add_pass_through_rules=true
+grammar=grammar.gz
diff --git a/training/dtrain/examples/toy/dtrain.ini b/training/dtrain/examples/toy/dtrain.ini
new file mode 100644
index 00000000..cd715f26
--- /dev/null
+++ b/training/dtrain/examples/toy/dtrain.ini
@@ -0,0 +1,13 @@
+decoder_config=cdec.ini
+input=src
+refs=tgt
+output=-
+print_weights=logp shell_rule house_rule small_rule little_rule PassThrough
+k=4
+N=4
+epochs=2
+scorer=bleu
+sample_from=kbest
+filter=uniq
+pair_sampling=all
+learning_rate=1
diff --git a/training/dtrain/examples/toy/expected-output b/training/dtrain/examples/toy/expected-output
new file mode 100644
index 00000000..1da2aadd
--- /dev/null
+++ b/training/dtrain/examples/toy/expected-output
@@ -0,0 +1,77 @@
+Warning: hi_lo only works with pair_sampling XYX.
+                cdec cfg 'cdec.ini'
+Seeding random number sequence to 1664825829
+
+dtrain
+Parameters:
+                       k 4
+                       N 4
+                       T 2
+                  scorer 'bleu'
+             sample from 'kbest'
+                  filter 'uniq'
+           learning rate 1
+                   gamma 0
+             loss margin 0
+                   pairs 'all'
+          pair threshold 0
+          select weights 'last'
+                  l1 reg 0 'none'
+               max pairs 4294967295
+                cdec cfg 'cdec.ini'
+                   input 'src'
+                    refs 'tgt'
+                  output '-'
+(a dot represents 10 inputs)
+Iteration #1 of 2.
+  2
+WEIGHTS
+              logp = +0
+        shell_rule = -1
+        house_rule = +2
+        small_rule = -2
+       little_rule = +3
+       PassThrough = -5
+        ---
+       1best avg score: 0.5 (+0.5)
+ 1best avg model score: 2.5 (+2.5)
+           avg # pairs: 4
+        avg # rank err: 1.5
+     avg # margin viol: 0
+    non0 feature count: 6
+           avg list sz: 4
+           avg f count: 2.875
+(time 0 min, 0 s/S)
+
+Iteration #2 of 2.
+  2
+WEIGHTS
+              logp = +0
+        shell_rule = -1
+        house_rule = +2
+        small_rule = -2
+       little_rule = +3
+       PassThrough = -5
+        ---
+       1best avg score: 1 (+0.5)
+ 1best avg model score: 5 (+2.5)
+           avg # pairs: 5
+        avg # rank err: 0
+     avg # margin viol: 0
+    non0 feature count: 6
+           avg list sz: 4
+           avg f count: 3
+(time 0 min, 0 s/S)
+
+Writing weights file to '-' ...
+house_rule	2
+little_rule	3
+Glue	-4
+PassThrough	-5
+small_rule	-2
+shell_rule	-1
+done
+
+---
+Best iteration: 2 [SCORE 'bleu'=1].
+This took 0 min.
diff --git a/training/dtrain/examples/toy/grammar.gz b/training/dtrain/examples/toy/grammar.gz
new file mode 100644
index 00000000..8eb0d29e
--- /dev/null
+++ b/training/dtrain/examples/toy/grammar.gz
diff --git a/training/dtrain/examples/toy/src b/training/dtrain/examples/toy/src
new file mode 100644
index 00000000..87e39ef2
--- /dev/null
+++ b/training/dtrain/examples/toy/src
@@ -0,0 +1,2 @@
+ich sah ein kleines haus
+ich fand ein kleines haus
diff --git a/training/dtrain/examples/toy/tgt b/training/dtrain/examples/toy/tgt
new file mode 100644
index 00000000..174926b3
--- /dev/null
+++ b/training/dtrain/examples/toy/tgt
@@ -0,0 +1,2 @@
+i saw a little house
+i found a little house
diff --git a/training/dtrain/kbestget.h b/training/dtrain/kbestget.h
new file mode 100644
index 00000000..dd8882e1
--- /dev/null
+++ b/training/dtrain/kbestget.h
@@ -0,0 +1,152 @@
+#ifndef _DTRAIN_KBESTGET_H_
+#define _DTRAIN_KBESTGET_H_
+
+#include "kbest.h" // cdec
+#include "sentence_metadata.h"
+
+#include "verbose.h"
+#include "viterbi.h"
+#include "ff_register.h"
+#include "decoder.h"
+#include "weights.h"
+#include "logval.h"
+
+using namespace std;
+
+namespace dtrain
+{
+
+
+typedef double score_t;
+
+struct ScoredHyp
+{
+  vector<WordID> w;
+  SparseVector<double> f;
+  score_t model;
+  score_t score;
+  unsigned rank;
+};
+
+struct LocalScorer
+{
+  unsigned N_;
+  vector<score_t> w_;
+
+  virtual score_t
+  Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned rank, const unsigned src_len)=0;
+
+  void Reset() {} // only for approx bleu
+
+  inline void
+  Init(unsigned N, vector<score_t> weights)
+  {
+    assert(N > 0);
+    N_ = N;
+    if (weights.empty()) for (unsigned i = 0; i < N_; i++) w_.push_back(1./N_);
+    else w_ = weights;
+  }
+
+  inline score_t
+  brevity_penalty(const unsigned hyp_len, const unsigned ref_len)
+  {
+    if (hyp_len > ref_len) return 1;
+    return exp(1 - (score_t)ref_len/hyp_len);
+  }
+};
+
+struct HypSampler : public DecoderObserver
+{
+  LocalScorer* scorer_;
+  vector<WordID>* ref_;
+  unsigned f_count_, sz_;
+  virtual vector<ScoredHyp>* GetSamples()=0;
+  inline void SetScorer(LocalScorer* scorer) { scorer_ = scorer; }
+  inline void SetRef(vector<WordID>& ref) { ref_ = &ref; }
+  inline unsigned get_f_count() { return f_count_; }
+  inline unsigned get_sz() { return sz_; }
+};
+////////////////////////////////////////////////////////////////////////////////
+
+
+
+
+struct KBestGetter : public HypSampler
+{
+  const unsigned k_;
+  const string filter_type_;
+  vector<ScoredHyp> s_;
+  unsigned src_len_;
+
+  KBestGetter(const unsigned k, const string filter_type) :
+    k_(k), filter_type_(filter_type) {}
+
+  virtual void
+  NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg)
+  {
+    src_len_ = smeta.GetSourceLength();
+    KBestScored(*hg);
+  }
+
+  vector<ScoredHyp>* GetSamples() { return &s_; }
+
+  void
+  KBestScored(const Hypergraph& forest)
+  {
+    if (filter_type_ == "uniq") {
+      KBestUnique(forest);
+    } else if (filter_type_ == "not") {
+      KBestNoFilter(forest);
+    }
+  }
+
+  void
+  KBestUnique(const Hypergraph& forest)
+  {
+    s_.clear(); sz_ = f_count_ = 0;
+    KBest::KBestDerivations<vector<WordID>, ESentenceTraversal,
+      KBest::FilterUnique, prob_t, EdgeProb> kbest(forest, k_);
+    for (unsigned i = 0; i < k_; ++i) {
+      const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal, KBest::FilterUnique,
+              prob_t, EdgeProb>::Derivation* d =
+            kbest.LazyKthBest(forest.nodes_.size() - 1, i);
+      if (!d) break;
+      ScoredHyp h;
+      h.w = d->yield;
+      h.f = d->feature_values;
+      h.model = log(d->score);
+      h.rank = i;
+      h.score = scorer_->Score(h.w, *ref_, i, src_len_);
+      s_.push_back(h);
+      sz_++;
+      f_count_ += h.f.size();
+    }
+  }
+
+  void
+  KBestNoFilter(const Hypergraph& forest)
+  {
+    s_.clear(); sz_ = f_count_ = 0;
+    KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(forest, k_);
+    for (unsigned i = 0; i < k_; ++i) {
+      const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
+            kbest.LazyKthBest(forest.nodes_.size() - 1, i);
+      if (!d) break;
+      ScoredHyp h;
+      h.w = d->yield;
+      h.f = d->feature_values;
+      h.model = log(d->score);
+      h.rank = i;
+      h.score = scorer_->Score(h.w, *ref_, i, src_len_);
+      s_.push_back(h);
+      sz_++;
+      f_count_ += h.f.size();
+    }
+  }
+};
+
+
+} // namespace
+
+#endif
+
diff --git a/training/dtrain/ksampler.h b/training/dtrain/ksampler.h
new file mode 100644
index 00000000..bc2f56cd
--- /dev/null
+++ b/training/dtrain/ksampler.h
@@ -0,0 +1,61 @@
+#ifndef _DTRAIN_KSAMPLER_H_
+#define _DTRAIN_KSAMPLER_H_
+
+#include "hg_sampler.h" // cdec
+#include "kbestget.h"
+#include "score.h"
+
+namespace dtrain
+{
+
+bool
+cmp_hyp_by_model_d(ScoredHyp a, ScoredHyp b)
+{
+  return a.model > b.model;
+}
+
+struct KSampler : public HypSampler
+{
+  const unsigned k_;
+  vector<ScoredHyp> s_;
+  MT19937* prng_;
+  score_t (*scorer)(NgramCounts&, const unsigned, const unsigned, unsigned, vector<score_t>);
+  unsigned src_len_;
+
+  explicit KSampler(const unsigned k, MT19937* prng) :
+    k_(k), prng_(prng) {}
+
+  virtual void
+  NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg)
+  {
+    src_len_ = smeta.GetSourceLength();
+    ScoredSamples(*hg);
+  }
+
+  vector<ScoredHyp>* GetSamples() { return &s_; }
+
+  void ScoredSamples(const Hypergraph& forest) {
+    s_.clear(); sz_ = f_count_ = 0;
+    std::vector<HypergraphSampler::Hypothesis> samples;
+    HypergraphSampler::sample_hypotheses(forest, k_, prng_, &samples);
+    for (unsigned i = 0; i < k_; ++i) {
+      ScoredHyp h;
+      h.w = samples[i].words;
+      h.f = samples[i].fmap;
+      h.model = log(samples[i].model_score);
+      h.rank = i;
+      h.score = scorer_->Score(h.w, *ref_, i, src_len_);
+      s_.push_back(h);
+      sz_++;
+      f_count_ += h.f.size();
+    }
+    sort(s_.begin(), s_.end(), cmp_hyp_by_model_d);
+    for (unsigned i = 0; i < s_.size(); i++) s_[i].rank = i;
+  }
+};
+
+
+} // namespace
+
+#endif
+
diff --git a/training/dtrain/lplp.rb b/training/dtrain/lplp.rb
new file mode 100755
index 00000000..86e835e8
--- /dev/null
+++ b/training/dtrain/lplp.rb
@@ -0,0 +1,123 @@
+# lplp.rb
+
+# norms
+def l0(feature_column, n)
+  if feature_column.size >= n then return 1 else return 0 end
+end
+
+def l1(feature_column, n=-1)
+  return feature_column.map { |i| i.abs }.reduce { |sum,i| sum+i }
+end
+
+def l2(feature_column, n=-1)
+  return Math.sqrt feature_column.map { |i| i.abs2 }.reduce { |sum,i| sum+i }
+end
+
+def linfty(feature_column, n=-1)
+  return feature_column.map { |i| i.abs }.max
+end
+
+# stats
+def median(feature_column, n)
+  return feature_column.concat(0.step(n-feature_column.size-1).map{|i|0}).sort[feature_column.size/2]
+end
+
+def mean(feature_column, n)
+  return feature_column.reduce { |sum, i| sum+i } / n
+end
+
+# selection
+def select_k(weights, norm_fun, n, k=10000)
+  weights.sort{|a,b| norm_fun.call(b[1], n) <=> norm_fun.call(a[1], n)}.each { |p|
+    puts "#{p[0]}\t#{mean(p[1], n)}"
+    k -= 1
+    if k == 0 then break end
+  }
+end
+
+def cut(weights, norm_fun, n, epsilon=0.0001)
+  weights.each { |k,v|
+    if norm_fun.call(v, n).abs >= epsilon
+      puts "#{k}\t#{mean(v, n)}"
+    end
+  }
+end
+
+# test
+def _test()
+  puts
+  w = {}
+  w["a"] = [1, 2, 3]
+  w["b"] = [1, 2]
+  w["c"] = [66]
+  w["d"] = [10, 20, 30]
+  n = 3
+  puts w.to_s
+  puts
+  puts "select_k"
+  puts "l0 expect ad"
+  select_k(w, method(:l0), n, 2)
+  puts "l1 expect cd"
+  select_k(w, method(:l1), n, 2)
+  puts "l2 expect c"
+  select_k(w, method(:l2), n, 1)
+  puts
+  puts "cut"
+  puts "l1 expect cd"
+  cut(w, method(:l1), n, 7)
+  puts
+  puts "median"
+  a = [1,2,3,4,5]
+  puts a.to_s
+  puts median(a, 5)
+  puts
+  puts "#{median(a, 7)} <- that's because we add missing 0s:"
+  puts a.concat(0.step(7-a.size-1).map{|i|0}).to_s
+  puts
+  puts "mean expect bc"
+  w.clear
+  w["a"] = [2]
+  w["b"] = [2.1]
+  w["c"] = [2.2]
+  cut(w, method(:mean), 1, 2.05)
+ exit
+end
+#_test()
+
+
+def usage()
+  puts "lplp.rb <l0,l1,l2,linfty,mean,median> <cut|select_k> <k|threshold> <#shards> < <input>"
+  puts "   l0...: norms for selection"
+  puts "select_k: only output top k (according to the norm of their column vector) features"
+  puts "     cut: output features with weight >= threshold"
+  puts "       n: if we do not have a shard count use this number for averaging"
+  exit 1
+end
+
+if ARGV.size < 4 then usage end
+norm_fun = method(ARGV[0].to_sym)
+type = ARGV[1]
+x = ARGV[2].to_f
+shard_count = ARGV[3].to_f
+
+STDIN.set_encoding 'utf-8'
+STDOUT.set_encoding 'utf-8'
+
+w = {}
+while line = STDIN.gets
+  key, val = line.split /\s+/
+  if w.has_key? key
+    w[key].push val.to_f
+  else
+    w[key] = [val.to_f]
+  end
+end
+
+if type == 'cut'
+  cut(w, norm_fun, shard_count, x)
+elsif type == 'select_k'
+  select_k(w, norm_fun, shard_count, x)
+else
+  puts "oh oh"
+end
+
diff --git a/training/dtrain/pairsampling.h b/training/dtrain/pairsampling.h
new file mode 100644
index 00000000..3f67e209
--- /dev/null
+++ b/training/dtrain/pairsampling.h
@@ -0,0 +1,140 @@
+#ifndef _DTRAIN_PAIRSAMPLING_H_
+#define _DTRAIN_PAIRSAMPLING_H_
+
+namespace dtrain
+{
+
+
+bool
+accept_pair(score_t a, score_t b, score_t threshold)
+{
+  if (fabs(a - b) < threshold) return false;
+  return true;
+}
+
+bool
+cmp_hyp_by_score_d(ScoredHyp a, ScoredHyp b)
+{
+  return a.score > b.score;
+}
+
+inline void
+all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, bool misranked_only, float _unused=1)
+{
+  sort(s->begin(), s->end(), cmp_hyp_by_score_d);
+  unsigned sz = s->size();
+  bool b = false;
+  unsigned count = 0;
+  for (unsigned i = 0; i < sz-1; i++) {
+    for (unsigned j = i+1; j < sz; j++) {
+      if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue;
+      if (threshold > 0) {
+        if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
+          training.push_back(make_pair((*s)[i], (*s)[j]));
+      } else {
+        if ((*s)[i].score != (*s)[j].score)
+          training.push_back(make_pair((*s)[i], (*s)[j]));
+      }
+      if (++count == max) {
+        b = true;
+        break;
+      }
+    }
+    if (b) break;
+  }
+}
+
+/*
+ * multipartite ranking
+ *  sort (descending) by bleu
+ *  compare top X to middle Y and low X
+ *  cmp middle Y to low X
+ */
+
+inline void
+partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, bool misranked_only, float hi_lo)
+{
+  unsigned sz = s->size();
+  if (sz < 2) return;
+  sort(s->begin(), s->end(), cmp_hyp_by_score_d);
+  unsigned sep = round(sz*hi_lo);
+  unsigned sep_hi = sep;
+  if (sz > 4) while (sep_hi < sz && (*s)[sep_hi-1].score == (*s)[sep_hi].score) ++sep_hi;
+  else sep_hi = 1;
+  bool b = false;
+  unsigned count = 0;
+  for (unsigned i = 0; i < sep_hi; i++) {
+    for (unsigned j = sep_hi; j < sz; j++) {
+      if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue;
+      if (threshold > 0) {
+        if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
+          training.push_back(make_pair((*s)[i], (*s)[j]));
+      } else {
+        if ((*s)[i].score != (*s)[j].score)
+          training.push_back(make_pair((*s)[i], (*s)[j]));
+      }
+      if (++count == max) {
+        b = true;
+        break;
+      }
+    }
+    if (b) break;
+  }
+  unsigned sep_lo = sz-sep;
+  while (sep_lo > 0 && (*s)[sep_lo-1].score == (*s)[sep_lo].score) --sep_lo;
+  for (unsigned i = sep_hi; i < sz-sep_lo; i++) {
+    for (unsigned j = sz-sep_lo; j < sz; j++) {
+      if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue;
+      if (threshold > 0) {
+        if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
+          training.push_back(make_pair((*s)[i], (*s)[j]));
+      } else {
+        if ((*s)[i].score != (*s)[j].score)
+          training.push_back(make_pair((*s)[i], (*s)[j]));
+      }
+      if (++count == max) return;
+    }
+  }
+}
+
+/*
+ * pair sampling as in
+ * 'Tuning as Ranking' (Hopkins & May, 2011)
+ *     count = 5000
+ * threshold = 5% BLEU (0.05 for param 3)
+ *       cut = top 50
+ */
+bool
+_PRO_cmp_pair_by_diff_d(pair<ScoredHyp,ScoredHyp> a, pair<ScoredHyp,ScoredHyp> b)
+{
+  return (fabs(a.first.score - a.second.score)) > (fabs(b.first.score - b.second.score));
+}
+inline void
+PROsampling(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, bool _unused=false, float _also_unused=0)
+{
+  unsigned max_count = 5000, count = 0, sz = s->size();
+  bool b = false;
+  for (unsigned i = 0; i < sz-1; i++) {
+    for (unsigned j = i+1; j < sz; j++) {
+      if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) {
+        training.push_back(make_pair((*s)[i], (*s)[j]));
+        if (++count == max_count) {
+          b = true;
+          break;
+        }
+      }
+    }
+    if (b) break;
+  }
+  if (training.size() > 50) {
+    sort(training.begin(), training.end(), _PRO_cmp_pair_by_diff_d);
+    training.erase(training.begin()+50, training.end());
+  }
+  return;
+}
+
+
+} // namespace
+
+#endif
+
diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
new file mode 100755
index 00000000..e661416e
--- /dev/null
+++ b/training/dtrain/parallelize.rb
@@ -0,0 +1,149 @@
+#!/usr/bin/env ruby
+
+require 'trollop'
+
+def usage
+  STDERR.write "Usage: "
+  STDERR.write "ruby parallelize.rb -c <dtrain.ini> [-e <epochs=10>] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p <at once=9999>] -i <input> -r <refs> [--qsub/-q] [--dtrain_binary <path to dtrain binary>] [-l \"l2 select_k 100000\"]\n"
+  exit 1
+end
+
+opts = Trollop::options do
+  opt :config, "dtrain config file", :type => :string
+  opt :epochs, "number of epochs", :type => :int, :default => 10
+  opt :lplp_args, "arguments for lplp.rb", :type => :string, :default => "l2 select_k 100000"
+  opt :randomize, "randomize shards before each epoch", :type => :bool, :short => '-z', :default => false
+  opt :reshard, "reshard after each epoch", :type => :bool, :short => '-y', :default => false
+  opt :shards, "number of shards", :type => :int
+  opt :processes_at_once, "have this number (max) running at the same time", :type => :int, :default => 9999
+  opt :input, "input", :type => :string
+  opt :references, "references", :type => :string
+  opt :qsub, "use qsub", :type => :bool, :default => false
+  opt :dtrain_binary, "path to dtrain binary", :type => :string
+end
+usage if not opts[:config]&&opts[:shards]&&opts[:input]&&opts[:references]
+
+
+dtrain_dir = File.expand_path File.dirname(__FILE__)
+if not opts[:dtrain_binary]
+  dtrain_bin = "#{dtrain_dir}/dtrain"
+else
+  dtrain_bin = opts[:dtrain_binary]
+end
+ruby       = '/usr/bin/ruby'
+lplp_rb    = "#{dtrain_dir}/lplp.rb"
+lplp_args  = opts[:lplp_args]
+cat        = '/bin/cat'
+
+ini        = opts[:config]
+epochs     = opts[:epochs]
+rand       = opts[:randomize]
+reshard    = opts[:reshard]
+predefined_shards = false
+if opts[:shards] == 0
+  predefined_shards = true
+  num_shards = 0
+else
+  num_shards = opts[:shards]
+end
+input = opts[:input]
+refs  = opts[:references]
+use_qsub       = opts[:qsub]
+shards_at_once = opts[:processes_at_once]
+
+`mkdir work`
+
+def make_shards(input, refs, num_shards, epoch, rand)
+  lc = `wc -l #{input}`.split.first.to_i
+  index = (0..lc-1).to_a
+  index.reverse!
+  index.shuffle! if rand
+  shard_sz = lc / num_shards
+  leftover = lc % num_shards
+  in_f = File.new input, 'r'
+  in_lines = in_f.readlines
+  refs_f = File.new refs, 'r'
+  refs_lines = refs_f.readlines
+  shard_in_files = []
+  shard_refs_files = []
+  in_fns = []
+  refs_fns = []
+  0.upto(num_shards-1) { |shard|
+    in_fn = "work/shard.#{shard}.#{epoch}.in"
+    shard_in = File.new in_fn, 'w+'
+    in_fns << in_fn
+    refs_fn = "work/shard.#{shard}.#{epoch}.refs"
+    shard_refs = File.new refs_fn, 'w+'
+    refs_fns << refs_fn
+    0.upto(shard_sz-1) { |i|
+      j = index.pop
+      shard_in.write in_lines[j]
+      shard_refs.write refs_lines[j]
+    }
+    shard_in_files << shard_in
+    shard_refs_files << shard_refs
+  }
+  while leftover > 0
+    j = index.pop
+    shard_in_files[-1].write in_lines[j]
+    shard_refs_files[-1].write refs_lines[j]
+    leftover -= 1
+  end
+  (shard_in_files + shard_refs_files).each do |f| f.close end
+  in_f.close
+  refs_f.close
+  return [in_fns, refs_fns]
+end
+
+input_files = []
+refs_files = []
+if predefined_shards
+  input_files = File.new(input).readlines.map {|i| i.strip }
+  refs_files = File.new(refs).readlines.map {|i| i.strip }
+  num_shards = input_files.size
+else
+  input_files, refs_files = make_shards input, refs, num_shards, 0, rand
+end
+
+0.upto(epochs-1) { |epoch|
+  puts "epoch #{epoch+1}"
+  pids = []
+  input_weights = ''
+  if epoch > 0 then input_weights = "--input_weights work/weights.#{epoch-1}" end
+  weights_files = []
+  shard = 0
+  remaining_shards = num_shards
+  while remaining_shards > 0
+    shards_at_once.times {
+      break if remaining_shards==0
+      qsub_str_start = qsub_str_end = ''
+      local_end = ''
+      if use_qsub
+        qsub_str_start = "qsub -cwd -sync y -b y -j y -o work/out.#{shard}.#{epoch} -N dtrain.#{shard}.#{epoch} \""
+        qsub_str_end = "\""
+        local_end = ''
+      else
+        local_end = "&>work/out.#{shard}.#{epoch}"
+      end
+      pids << Kernel.fork {
+        `#{qsub_str_start}#{dtrain_bin} -c #{ini}\
+          --input #{input_files[shard]}\
+          --refs #{refs_files[shard]} #{input_weights}\
+          --output work/weights.#{shard}.#{epoch}#{qsub_str_end} #{local_end}`
+      }
+      weights_files << "work/weights.#{shard}.#{epoch}"
+      shard += 1
+      remaining_shards -= 1
+    }
+    pids.each { |pid| Process.wait(pid) }
+    pids.clear
+  end
+  `#{cat} work/weights.*.#{epoch} > work/weights_cat`
+  `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat > work/weights.#{epoch}`
+  if rand and reshard and epoch+1!=epochs
+    input_files, refs_files = make_shards input, refs, num_shards, epoch+1, rand
+  end
+}
+
+`rm work/weights_cat`
+
diff --git a/training/dtrain/score.cc b/training/dtrain/score.cc
new file mode 100644
index 00000000..96d6e10a
--- /dev/null
+++ b/training/dtrain/score.cc
@@ -0,0 +1,283 @@
+#include "score.h"
+
+namespace dtrain
+{
+
+
+/*
+ * bleu
+ *
+ * as in "BLEU: a Method for Automatic Evaluation
+ *        of Machine Translation"
+ * (Papineni et al. '02)
+ *
+ * NOTE: 0 if for one n \in {1..N} count is 0
+ */
+score_t
+BleuScorer::Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref_len)
+{
+  if (hyp_len == 0 || ref_len == 0) return 0.;
+  unsigned M = N_;
+  vector<score_t> v = w_;
+  if (ref_len < N_) {
+    M = ref_len;
+    for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M);
+  }
+  score_t sum = 0;
+  for (unsigned i = 0; i < M; i++) {
+    if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) return 0.;
+    sum += v[i] * log((score_t)counts.clipped_[i]/counts.sum_[i]);
+  }
+  return brevity_penalty(hyp_len, ref_len) * exp(sum);
+}
+
+score_t
+BleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+                  const unsigned /*rank*/, const unsigned /*src_len*/)
+{
+  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  if (hyp_len == 0 || ref_len == 0) return 0.;
+  NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+  return Bleu(counts, hyp_len, ref_len);
+}
+
+/*
+ * 'stupid' bleu
+ *
+ * as in "ORANGE: a Method for Evaluating
+ *        Automatic Evaluation Metrics
+ *        for Machine Translation"
+ * (Lin & Och '04)
+ *
+ * NOTE: 0 iff no 1gram match ('grounded')
+ */
+score_t
+StupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+                        const unsigned /*rank*/, const unsigned /*src_len*/)
+{
+  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  if (hyp_len == 0 || ref_len == 0) return 0.;
+  NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+  unsigned M = N_;
+  vector<score_t> v = w_;
+  if (ref_len < N_) {
+    M = ref_len;
+    for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M);
+  }
+  score_t sum = 0, add = 0;
+  for (unsigned i = 0; i < M; i++) {
+    if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.;
+    if (i == 1) add = 1;
+    sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add)));
+  }
+  return  brevity_penalty(hyp_len, ref_len) * exp(sum);
+}
+
+/*
+ * fixed 'stupid' bleu
+ *
+ * as in "Optimizing for Sentence-Level BLEU+1
+ *        Yields Short Translations"
+ * (Nakov et al. '12)
+ */
+score_t
+FixedStupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+                        const unsigned /*rank*/, const unsigned /*src_len*/)
+{
+  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  if (hyp_len == 0 || ref_len == 0) return 0.;
+  NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+  unsigned M = N_;
+  vector<score_t> v = w_;
+  if (ref_len < N_) {
+    M = ref_len;
+    for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M);
+  }
+  score_t sum = 0, add = 0;
+  for (unsigned i = 0; i < M; i++) {
+    if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.;
+    if (i == 1) add = 1;
+    sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add)));
+  }
+  return  brevity_penalty(hyp_len, ref_len+1) * exp(sum); // <- fix
+}
+
+/*
+ * smooth bleu
+ *
+ * as in "An End-to-End Discriminative Approach
+ *        to Machine Translation"
+ * (Liang et al. '06)
+ *
+ * NOTE: max is 0.9375 (with N=4)
+ */
+score_t
+SmoothBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+                        const unsigned /*rank*/, const unsigned /*src_len*/)
+{
+  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  if (hyp_len == 0 || ref_len == 0) return 0.;
+  NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+  unsigned M = N_;
+  if (ref_len < N_) M = ref_len;
+  score_t sum = 0.;
+  vector<score_t> i_bleu;
+  for (unsigned i = 0; i < M; i++) i_bleu.push_back(0.);
+  for (unsigned i = 0; i < M; i++) {
+    if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) {
+      break;
+    } else {
+      score_t i_ng = log((score_t)counts.clipped_[i]/counts.sum_[i]);
+      for (unsigned j = i; j < M; j++) {
+        i_bleu[j] += (1/((score_t)j+1)) * i_ng;
+      }
+    }
+    sum += exp(i_bleu[i])/pow(2.0, (double)(N_-i));
+  }
+  return brevity_penalty(hyp_len, ref_len) * sum;
+}
+
+/*
+ * 'sum' bleu
+ *
+ * sum up Ngram precisions
+ */
+score_t
+SumBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+                        const unsigned /*rank*/, const unsigned /*src_len*/)
+{
+  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  if (hyp_len == 0 || ref_len == 0) return 0.;
+  NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+  unsigned M = N_;
+  if (ref_len < N_) M = ref_len;
+  score_t sum = 0.;
+  unsigned j = 1;
+  for (unsigned i = 0; i < M; i++) {
+    if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break;
+    sum += ((score_t)counts.clipped_[i]/counts.sum_[i])/pow(2.0, (double) (N_-j+1));
+    j++;
+  }
+  return brevity_penalty(hyp_len, ref_len) * sum;
+}
+
+/*
+ * 'sum' (exp) bleu
+ *
+ * sum up exp(Ngram precisions)
+ */
+score_t
+SumExpBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+                        const unsigned /*rank*/, const unsigned /*src_len*/)
+{
+  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  if (hyp_len == 0 || ref_len == 0) return 0.;
+  NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+  unsigned M = N_;
+  if (ref_len < N_) M = ref_len;
+  score_t sum = 0.;
+  unsigned j = 1;
+  for (unsigned i = 0; i < M; i++) {
+    if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break;
+    sum += exp(((score_t)counts.clipped_[i]/counts.sum_[i]))/pow(2.0, (double) (N_-j+1));
+    j++;
+  }
+  return brevity_penalty(hyp_len, ref_len) * sum;
+}
+
+/*
+ * 'sum' (whatever) bleu
+ *
+ * sum up exp(weight * log(Ngram precisions))
+ */
+score_t
+SumWhateverBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+                        const unsigned /*rank*/, const unsigned /*src_len*/)
+{
+  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  if (hyp_len == 0 || ref_len == 0) return 0.;
+  NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+  unsigned M = N_;
+  vector<score_t> v = w_;
+  if (ref_len < N_) {
+    M = ref_len;
+    for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M);
+  }
+  score_t sum = 0.;
+  unsigned j = 1;
+  for (unsigned i = 0; i < M; i++) {
+    if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break;
+    sum += exp(v[i] * log(((score_t)counts.clipped_[i]/counts.sum_[i])))/pow(2.0, (double) (N_-j+1));
+    j++;
+  }
+  return brevity_penalty(hyp_len, ref_len) * sum;
+}
+
+/*
+ * approx. bleu
+ *
+ * as in "Online Large-Margin Training of Syntactic
+ *        and Structural Translation Features"
+ * (Chiang et al. '08)
+ *
+ * NOTE: Needs some more code in dtrain.cc .
+ *       No scaling by src len.
+ */
+score_t
+ApproxBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+                        const unsigned rank, const unsigned src_len)
+{
+  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  if (ref_len == 0) return 0.;
+  score_t score = 0.;
+  NgramCounts counts(N_);
+  if (hyp_len > 0) {
+    counts = make_ngram_counts(hyp, ref, N_);
+    NgramCounts tmp = glob_onebest_counts_ + counts;
+    score = Bleu(tmp, hyp_len, ref_len);
+  }
+  if (rank == 0) { // 'context of 1best translations'
+    glob_onebest_counts_ += counts;
+    glob_onebest_counts_ *= discount_;
+    glob_hyp_len_ = discount_ * (glob_hyp_len_ + hyp_len);
+    glob_ref_len_ = discount_ * (glob_ref_len_ + ref_len);
+    glob_src_len_ = discount_ * (glob_src_len_ + src_len);
+  }
+  return score;
+}
+
+/*
+ * Linear (Corpus) Bleu
+ *
+ * as in "Lattice Minimum Bayes-Risk Decoding
+ *        for Statistical Machine Translation"
+ * (Tromble et al. '08)
+ *
+ */
+score_t
+LinearBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+                        const unsigned rank, const unsigned /*src_len*/)
+{
+  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  if (ref_len == 0) return 0.;
+  unsigned M = N_;
+  if (ref_len < N_) M = ref_len;
+  NgramCounts counts(M);
+  if (hyp_len > 0)
+    counts = make_ngram_counts(hyp, ref, M);
+  score_t ret = 0.;
+  for (unsigned i = 0; i < M; i++) {
+    if (counts.sum_[i] == 0 || onebest_counts_.sum_[i] == 0) break;
+    ret += counts.sum_[i]/onebest_counts_.sum_[i];
+  }
+  ret = -(hyp_len/(score_t)onebest_len_) + (1./M) * ret;
+  if (rank == 0) {
+    onebest_len_ += hyp_len;
+    onebest_counts_ += counts;
+  }
+  return ret;
+}
+
+
+} // namespace
+
diff --git a/training/dtrain/score.h b/training/dtrain/score.h
new file mode 100644
index 00000000..bddaa071
--- /dev/null
+++ b/training/dtrain/score.h
@@ -0,0 +1,217 @@
+#ifndef _DTRAIN_SCORE_H_
+#define _DTRAIN_SCORE_H_
+
+#include "kbestget.h"
+
+using namespace std;
+
+namespace dtrain
+{
+
+
+struct NgramCounts
+{
+  unsigned N_;
+  map<unsigned, score_t> clipped_;
+  map<unsigned, score_t> sum_;
+
+  NgramCounts(const unsigned N) : N_(N) { Zero(); }
+
+  inline void
+  operator+=(const NgramCounts& rhs)
+  {
+    if (rhs.N_ > N_) Resize(rhs.N_);
+    for (unsigned i = 0; i < N_; i++) {
+      this->clipped_[i] += rhs.clipped_.find(i)->second;
+      this->sum_[i] += rhs.sum_.find(i)->second;
+    }
+  }
+
+  inline const NgramCounts
+  operator+(const NgramCounts &other) const
+  {
+    NgramCounts result = *this;
+    result += other;
+    return result;
+  }
+
+  inline void
+  operator*=(const score_t rhs)
+  {
+    for (unsigned i = 0; i < N_; i++) {
+      this->clipped_[i] *= rhs;
+      this->sum_[i] *= rhs;
+    }
+  }
+
+  inline void
+  Add(const unsigned count, const unsigned ref_count, const unsigned i)
+  {
+    assert(i < N_);
+    if (count > ref_count) {
+      clipped_[i] += ref_count;
+    } else {
+      clipped_[i] += count;
+    }
+    sum_[i] += count;
+  }
+
+  inline void
+  Zero()
+  {
+    for (unsigned i = 0; i < N_; i++) {
+      clipped_[i] = 0.;
+      sum_[i] = 0.;
+    }
+  }
+
+  inline void
+  One()
+  {
+    for (unsigned i = 0; i < N_; i++) {
+      clipped_[i] = 1.;
+      sum_[i] = 1.;
+    }
+  }
+
+  inline void
+  Print()
+  {
+    for (unsigned i = 0; i < N_; i++) {
+      cout << i+1 << "grams (clipped):\t" << clipped_[i] << endl;
+      cout << i+1 << "grams:\t\t\t" << sum_[i] << endl;
+    }
+  }
+
+  inline void Resize(unsigned N)
+  {
+    if (N == N_) return;
+    else if (N > N_) {
+      for (unsigned i = N_; i < N; i++) {
+        clipped_[i] = 0.;
+        sum_[i] = 0.;
+      }
+    } else { // N < N_
+      for (unsigned i = N_-1; i > N-1; i--) {
+        clipped_.erase(i);
+        sum_.erase(i);
+      }
+    }
+    N_ = N;
+  }
+};
+
+typedef map<vector<WordID>, unsigned> Ngrams;
+
+inline Ngrams
+make_ngrams(const vector<WordID>& s, const unsigned N)
+{
+  Ngrams ngrams;
+  vector<WordID> ng;
+  for (size_t i = 0; i < s.size(); i++) {
+    ng.clear();
+    for (unsigned j = i; j < min(i+N, s.size()); j++) {
+      ng.push_back(s[j]);
+      ngrams[ng]++;
+    }
+  }
+  return ngrams;
+}
+
+inline NgramCounts
+make_ngram_counts(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned N)
+{
+  Ngrams hyp_ngrams = make_ngrams(hyp, N);
+  Ngrams ref_ngrams = make_ngrams(ref, N);
+  NgramCounts counts(N);
+  Ngrams::iterator it;
+  Ngrams::iterator ti;
+  for (it = hyp_ngrams.begin(); it != hyp_ngrams.end(); it++) {
+    ti = ref_ngrams.find(it->first);
+    if (ti != ref_ngrams.end()) {
+      counts.Add(it->second, ti->second, it->first.size() - 1);
+    } else {
+      counts.Add(it->second, 0, it->first.size() - 1);
+    }
+  }
+  return counts;
+}
+
+struct BleuScorer : public LocalScorer
+{
+  score_t Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref_len);
+  score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+};
+
+struct StupidBleuScorer : public LocalScorer
+{
+  score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+};
+
+struct FixedStupidBleuScorer : public LocalScorer
+{
+  score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+};
+
+struct SmoothBleuScorer : public LocalScorer
+{
+  score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+};
+
+struct SumBleuScorer : public LocalScorer
+{
+   score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+};
+
+struct SumExpBleuScorer : public LocalScorer
+{
+   score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+};
+
+struct SumWhateverBleuScorer : public LocalScorer
+{
+   score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+};
+
+struct ApproxBleuScorer : public BleuScorer
+{
+  NgramCounts glob_onebest_counts_;
+  unsigned glob_hyp_len_, glob_ref_len_, glob_src_len_;
+  score_t discount_;
+
+  ApproxBleuScorer(unsigned N, score_t d) : glob_onebest_counts_(NgramCounts(N)), discount_(d)
+  {
+    glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0;
+  }
+
+  inline void Reset() {
+    glob_onebest_counts_.Zero();
+    glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0.;
+  }
+
+  score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned rank, const unsigned src_len);
+};
+
+struct LinearBleuScorer : public BleuScorer
+{
+  unsigned onebest_len_;
+  NgramCounts onebest_counts_;
+
+  LinearBleuScorer(unsigned N) : onebest_len_(1), onebest_counts_(N)
+  {
+    onebest_counts_.One();
+  }
+
+  score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned rank, const unsigned /*src_len*/);
+
+  inline void Reset() {
+    onebest_len_ = 1;
+    onebest_counts_.One();
+  }
+};
+
+
+} // namespace
+
+#endif
+
diff --git a/training/fast_align.cc b/training/fast_align.cc
deleted file mode 100644
index 0d7b0202..00000000
--- a/training/fast_align.cc
+++ /dev/null
@@ -1,271 +0,0 @@
-#include <iostream>
-#include <cmath>
-
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "m.h"
-#include "corpus_tools.h"
-#include "stringlib.h"
-#include "filelib.h"
-#include "ttables.h"
-#include "tdict.h"
-
-namespace po = boost::program_options;
-using namespace std;
-
-bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("input,i",po::value<string>(),"Parallel corpus input file")
-        ("reverse,r","Reverse estimation (swap source and target during training)")
-        ("iterations,I",po::value<unsigned>()->default_value(5),"Number of iterations of EM training")
-        //("bidir,b", "Run bidirectional alignment")
-        ("favor_diagonal,d", "Use a static alignment distribution that assigns higher probabilities to alignments near the diagonal")
-        ("prob_align_null", po::value<double>()->default_value(0.08), "When --favor_diagonal is set, what's the probability of a null alignment?")
-        ("diagonal_tension,T", po::value<double>()->default_value(4.0), "How sharp or flat around the diagonal is the alignment distribution (<1 = flat >1 = sharp)")
-        ("variational_bayes,v","Infer VB estimate of parameters under a symmetric Dirichlet prior")
-        ("alpha,a", po::value<double>()->default_value(0.01), "Hyperparameter for optional Dirichlet prior")
-        ("no_null_word,N","Do not generate from a null token")
-        ("output_parameters,p", "Write model parameters instead of alignments")
-        ("beam_threshold,t",po::value<double>()->default_value(-4),"When writing parameters, log_10 of beam threshold for writing parameter (-10000 to include everything, 0 max parameter only)")
-        ("testset,x", po::value<string>(), "After training completes, compute the log likelihood of this set of sentence pairs under the learned model")
-        ("no_add_viterbi,V","When writing model parameters, do not add Viterbi alignment points (may generate a grammar where some training sentence pairs are unreachable)");
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help") || conf->count("input") == 0) {
-    cerr << "Usage " << argv[0] << " [OPTIONS] -i corpus.fr-en\n";
-    cerr << dcmdline_options << endl;
-    return false;
-  }
-  return true;
-}
-
-double PosteriorInference(const vector<WordID>& src, const vector<WordID>& trg) {
-  double llh = 0;
-  static vector<double> unnormed_a_i;
-  if (src.size() > unnormed_a_i.size())
-    unnormed_a_i.resize(src.size());
-  return llh;
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  if (!InitCommandLine(argc, argv, &conf)) return 1;
-  const string fname = conf["input"].as<string>();
-  const bool reverse = conf.count("reverse") > 0;
-  const int ITERATIONS = conf["iterations"].as<unsigned>();
-  const double BEAM_THRESHOLD = pow(10.0, conf["beam_threshold"].as<double>());
-  const bool use_null = (conf.count("no_null_word") == 0);
-  const WordID kNULL = TD::Convert("<eps>");
-  const bool add_viterbi = (conf.count("no_add_viterbi") == 0);
-  const bool variational_bayes = (conf.count("variational_bayes") > 0);
-  const bool write_alignments = (conf.count("output_parameters") == 0);
-  const double diagonal_tension = conf["diagonal_tension"].as<double>();
-  const double prob_align_null = conf["prob_align_null"].as<double>();
-  string testset;
-  if (conf.count("testset")) testset = conf["testset"].as<string>();
-  const double prob_align_not_null = 1.0 - prob_align_null;
-  const double alpha = conf["alpha"].as<double>();
-  const bool favor_diagonal = conf.count("favor_diagonal");
-  if (variational_bayes && alpha <= 0.0) {
-    cerr << "--alpha must be > 0\n";
-    return 1;
-  }
-
-  TTable s2t, t2s;
-  TTable::Word2Word2Double s2t_viterbi;
-  double tot_len_ratio = 0;
-  double mean_srclen_multiplier = 0;
-  vector<double> unnormed_a_i;
-  for (int iter = 0; iter < ITERATIONS; ++iter) {
-    const bool final_iteration = (iter == (ITERATIONS - 1));
-    cerr << "ITERATION " << (iter + 1) << (final_iteration ? " (FINAL)" : "") << endl;
-    ReadFile rf(fname);
-    istream& in = *rf.stream();
-    double likelihood = 0;
-    double denom = 0.0;
-    int lc = 0;
-    bool flag = false;
-    string line;
-    string ssrc, strg;
-    vector<WordID> src, trg;
-    while(true) {
-      getline(in, line);
-      if (!in) break;
-      ++lc;
-      if (lc % 1000 == 0) { cerr << '.'; flag = true; }
-      if (lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; }
-      src.clear(); trg.clear();
-      CorpusTools::ReadLine(line, &src, &trg);
-      if (reverse) swap(src, trg);
-      if (src.size() == 0 || trg.size() == 0) {
-        cerr << "Error: " << lc << "\n" << line << endl;
-        return 1;
-      }
-      if (src.size() > unnormed_a_i.size())
-        unnormed_a_i.resize(src.size());
-      if (iter == 0)
-        tot_len_ratio += static_cast<double>(trg.size()) / static_cast<double>(src.size());
-      denom += trg.size();
-      vector<double> probs(src.size() + 1);
-      bool first_al = true;  // used for write_alignments
-      for (int j = 0; j < trg.size(); ++j) {
-        const WordID& f_j = trg[j];
-        double sum = 0;
-        const double j_over_ts = double(j) / trg.size();
-        double prob_a_i = 1.0 / (src.size() + use_null);  // uniform (model 1)
-        if (use_null) {
-          if (favor_diagonal) prob_a_i = prob_align_null;
-          probs[0] = s2t.prob(kNULL, f_j) * prob_a_i;
-          sum += probs[0];
-        }
-        double az = 0;
-        if (favor_diagonal) {
-          for (int ta = 0; ta < src.size(); ++ta) {
-            unnormed_a_i[ta] = exp(-fabs(double(ta) / src.size() - j_over_ts) * diagonal_tension);
-            az += unnormed_a_i[ta];
-          }
-          az /= prob_align_not_null;
-        }
-        for (int i = 1; i <= src.size(); ++i) {
-          if (favor_diagonal)
-            prob_a_i = unnormed_a_i[i-1] / az;
-          probs[i] = s2t.prob(src[i-1], f_j) * prob_a_i;
-          sum += probs[i];
-        }
-        if (final_iteration) {
-          if (add_viterbi || write_alignments) {
-            WordID max_i = 0;
-            double max_p = -1;
-            int max_index = -1;
-            if (use_null) {
-              max_i = kNULL;
-              max_index = 0;
-              max_p = probs[0];
-            }
-            for (int i = 1; i <= src.size(); ++i) {
-              if (probs[i] > max_p) {
-                max_index = i;
-                max_p = probs[i];
-                max_i = src[i-1];
-              }
-            }
-            if (write_alignments) {
-              if (max_index > 0) {
-                if (first_al) first_al = false; else cout << ' ';
-                if (reverse)
-                  cout << j << '-' << (max_index - 1);
-                else
-                  cout << (max_index - 1) << '-' << j;
-              }
-            }
-            s2t_viterbi[max_i][f_j] = 1.0;
-          }
-        } else {
-          if (use_null)
-            s2t.Increment(kNULL, f_j, probs[0] / sum);
-          for (int i = 1; i <= src.size(); ++i)
-            s2t.Increment(src[i-1], f_j, probs[i] / sum);
-        }
-        likelihood += log(sum);
-      }
-      if (write_alignments && final_iteration) cout << endl;
-    }
-
-    // log(e) = 1.0
-    double base2_likelihood = likelihood / log(2);
-
-    if (flag) { cerr << endl; }
-    if (iter == 0) {
-      mean_srclen_multiplier = tot_len_ratio / lc;
-      cerr << "expected target length = source length * " << mean_srclen_multiplier << endl;
-    }
-    cerr << "  log_e likelihood: " << likelihood << endl;
-    cerr << "  log_2 likelihood: " << base2_likelihood << endl;
-    cerr << "   cross entropy: " << (-base2_likelihood / denom) << endl;
-    cerr << "      perplexity: " << pow(2.0, -base2_likelihood / denom) << endl;
-    if (!final_iteration) {
-      if (variational_bayes)
-        s2t.NormalizeVB(alpha);
-      else
-        s2t.Normalize();
-    }
-  }
-  if (testset.size()) {
-    ReadFile rf(testset);
-    istream& in = *rf.stream();
-    int lc = 0;
-    double tlp = 0;
-    string ssrc, strg, line;
-    while (getline(in, line)) {
-      ++lc;
-      vector<WordID> src, trg;
-      CorpusTools::ReadLine(line, &src, &trg);
-      double log_prob = Md::log_poisson(trg.size(), 0.05 + src.size() * mean_srclen_multiplier);
-      if (src.size() > unnormed_a_i.size())
-        unnormed_a_i.resize(src.size());
-
-      // compute likelihood
-      for (int j = 0; j < trg.size(); ++j) {
-        const WordID& f_j = trg[j];
-        double sum = 0;
-        const double j_over_ts = double(j) / trg.size();
-        double prob_a_i = 1.0 / (src.size() + use_null);  // uniform (model 1)
-        if (use_null) {
-          if (favor_diagonal) prob_a_i = prob_align_null;
-          sum += s2t.prob(kNULL, f_j) * prob_a_i;
-        }
-        double az = 0;
-        if (favor_diagonal) {
-          for (int ta = 0; ta < src.size(); ++ta) {
-            unnormed_a_i[ta] = exp(-fabs(double(ta) / src.size() - j_over_ts) * diagonal_tension);
-            az += unnormed_a_i[ta];
-          }
-          az /= prob_align_not_null;
-        }
-        for (int i = 1; i <= src.size(); ++i) {
-          if (favor_diagonal)
-            prob_a_i = unnormed_a_i[i-1] / az;
-          sum += s2t.prob(src[i-1], f_j) * prob_a_i;
-        }
-        log_prob += log(sum);
-      }
-      tlp += log_prob;
-      cerr << ssrc << " ||| " << strg << " ||| " << log_prob << endl;
-    }
-    cerr << "TOTAL LOG PROB " << tlp << endl;
-  }
-
-  if (write_alignments) return 0;
-
-  for (TTable::Word2Word2Double::iterator ei = s2t.ttable.begin(); ei != s2t.ttable.end(); ++ei) {
-    const TTable::Word2Double& cpd = ei->second;
-    const TTable::Word2Double& vit = s2t_viterbi[ei->first];
-    const string& esym = TD::Convert(ei->first);
-    double max_p = -1;
-    for (TTable::Word2Double::const_iterator fi = cpd.begin(); fi != cpd.end(); ++fi)
-      if (fi->second > max_p) max_p = fi->second;
-    const double threshold = max_p * BEAM_THRESHOLD;
-    for (TTable::Word2Double::const_iterator fi = cpd.begin(); fi != cpd.end(); ++fi) {
-      if (fi->second > threshold || (vit.find(fi->first) != vit.end())) {
-        cout << esym << ' ' << TD::Convert(fi->first) << ' ' << log(fi->second) << endl;
-      }
-    } 
-  }
-  return 0;
-}
-
diff --git a/training/feature_expectations.cc b/training/feature_expectations.cc
deleted file mode 100644
index f1a85495..00000000
--- a/training/feature_expectations.cc
+++ /dev/null
@@ -1,232 +0,0 @@
-#include <sstream>
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <cassert>
-#include <cmath>
-#include <tr1/memory>
-
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "verbose.h"
-#include "hg.h"
-#include "prob.h"
-#include "inside_outside.h"
-#include "ff_register.h"
-#include "decoder.h"
-#include "filelib.h"
-#include "online_optimizer.h"
-#include "fdict.h"
-#include "weights.h"
-#include "sparse_vector.h"
-#include "sampler.h"
-
-#ifdef HAVE_MPI
-#include <boost/mpi/timer.hpp>
-#include <boost/mpi.hpp>
-namespace mpi = boost::mpi;
-#endif
-
-using namespace std;
-namespace po = boost::program_options;
-
-struct FComp {
-  const vector<double>& w_;
-  FComp(const vector<double>& w) : w_(w) {}
-  bool operator()(int a, int b) const {
-    return fabs(w_[a]) > fabs(w_[b]);
-  }
-};
-
-void ShowFeatures(const vector<double>& w) {
-  vector<int> fnums(w.size());
-  for (int i = 0; i < w.size(); ++i)
-    fnums[i] = i;
-  sort(fnums.begin(), fnums.end(), FComp(w));
-  for (vector<int>::iterator i = fnums.begin(); i != fnums.end(); ++i) {
-    if (w[*i]) cout << FD::Convert(*i) << ' ' << w[*i] << endl;
-  }
-}
-
-void ReadConfig(const string& ini, vector<string>* out) {
-  ReadFile rf(ini);
-  istream& in = *rf.stream();
-  while(in) {
-    string line;
-    getline(in, line);
-    if (!in) continue;
-    out->push_back(line);
-  }
-}
-
-void StoreConfig(const vector<string>& cfg, istringstream* o) {
-  ostringstream os;
-  for (int i = 0; i < cfg.size(); ++i) { os << cfg[i] << endl; }
-  o->str(os.str());
-}
-
-bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("input,i",po::value<string>(),"Corpus of source language sentences")
-        ("weights,w",po::value<string>(),"Input feature weights file")
-        ("decoder_config,c",po::value<string>(), "cdec.ini file");
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help") || !conf->count("input") || !conf->count("decoder_config")) {
-    cerr << dcmdline_options << endl;
-    return false;
-  }
-  return true;
-}
-
-void ReadTrainingCorpus(const string& fname, int rank, int size, vector<string>* c, vector<int>* order) {
-  ReadFile rf(fname);
-  istream& in = *rf.stream();
-  string line;
-  int id = 0;
-  while(in) {
-    getline(in, line);
-    if (!in) break;
-    if (id % size == rank) {
-      c->push_back(line);
-      order->push_back(id);
-    }
-    ++id;
-  }
-}
-
-static const double kMINUS_EPSILON = -1e-6;
-
-struct TrainingObserver : public DecoderObserver {
-  void Reset() {
-    acc_exp.clear();
-    total_complete = 0;
-  } 
-
-  virtual void NotifyDecodingStart(const SentenceMetadata& smeta) {
-    cur_model_exp.clear();
-    state = 1;
-  }
-
-  // compute model expectations, denominator of objective
-  virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) {
-    assert(state == 1);
-    state = 2;
-    const prob_t z = InsideOutside<prob_t,
-                                   EdgeProb,
-                                   SparseVector<prob_t>,
-                                   EdgeFeaturesAndProbWeightFunction>(*hg, &cur_model_exp);
-    cur_model_exp /= z;
-    acc_exp += cur_model_exp;
-  }
-
-  virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) {
-    cerr << "IGNORING ALIGNMENT FOREST!\n";
-  }
-
-  virtual void NotifyDecodingComplete(const SentenceMetadata& smeta) {
-    if (state == 2) {
-      ++total_complete;
-    }
-  }
-
-  void GetExpectations(SparseVector<double>* g) const {
-    g->clear();
-    for (SparseVector<prob_t>::const_iterator it = acc_exp.begin(); it != acc_exp.end(); ++it)
-      g->set_value(it->first, it->second);
-  }
-
-  int total_complete;
-  SparseVector<prob_t> cur_model_exp;
-  SparseVector<prob_t> acc_exp;
-  int state;
-};
-
-#ifdef HAVE_MPI
-namespace boost { namespace mpi {
-  template<>
-  struct is_commutative<std::plus<SparseVector<double> >, SparseVector<double> > 
-    : mpl::true_ { };
-} } // end namespace boost::mpi
-#endif
-
-int main(int argc, char** argv) {
-#ifdef HAVE_MPI
-  mpi::environment env(argc, argv);
-  mpi::communicator world;
-  const int size = world.size(); 
-  const int rank = world.rank();
-#else
-  const int size = 1;
-  const int rank = 0;
-#endif
-  if (size > 1) SetSilent(true);  // turn off verbose decoder output
-  register_feature_functions();
-
-  po::variables_map conf;
-  if (!InitCommandLine(argc, argv, &conf))
-    return 1;
-
-  // load initial weights
-  Weights weights;
-  if (conf.count("weights"))
-    weights.InitFromFile(conf["weights"].as<string>());
-
-  vector<string> corpus;
-  vector<int> ids;
-  ReadTrainingCorpus(conf["input"].as<string>(), rank, size, &corpus, &ids);
-  assert(corpus.size() > 0);
-
-  vector<string> cdec_ini;
-  ReadConfig(conf["decoder_config"].as<string>(), &cdec_ini);
-  istringstream ini;
-  StoreConfig(cdec_ini, &ini);
-  Decoder decoder(&ini);
-  if (decoder.GetConf()["input"].as<string>() != "-") {
-    cerr << "cdec.ini must not set an input file\n";
-    return 1;
-  }
-
-  SparseVector<double> x;
-  weights.InitSparseVector(&x);
-  TrainingObserver observer;
-
-  weights.InitFromVector(x);
-  vector<double> lambdas;
-  weights.InitVector(&lambdas);
-  decoder.SetWeights(lambdas);
-  observer.Reset();
-  for (unsigned i = 0; i < corpus.size(); ++i) {
-    int id = ids[i];
-    decoder.SetId(id);
-    decoder.Decode(corpus[i], &observer);
-  }
-  SparseVector<double> local_exps, exps;
-  observer.GetExpectations(&local_exps);
-#ifdef HAVE_MPI
-  reduce(world, local_exps, exps, std::plus<SparseVector<double> >(), 0);
-#else
-  exps.swap(local_exps);
-#endif
-
-  weights.InitFromVector(exps);
-  weights.InitVector(&lambdas);
-  ShowFeatures(lambdas);
-
-  return 0;
-}
diff --git a/training/lbl_model.cc b/training/lbl_model.cc
deleted file mode 100644
index a46ce33c..00000000
--- a/training/lbl_model.cc
+++ /dev/null
@@ -1,421 +0,0 @@
-#include <iostream>
-
-#include "config.h"
-#ifndef HAVE_EIGEN
-  int main() { std::cerr << "Please rebuild with --with-eigen PATH\n"; return 1; }
-#else
-
-#include <cstdlib>
-#include <algorithm>
-#include <cmath>
-#include <set>
-#include <cstring> // memset
-#include <ctime>
-
-#ifdef HAVE_MPI
-#include <boost/mpi/timer.hpp>
-#include <boost/mpi.hpp>
-#include <boost/archive/text_oarchive.hpp>
-namespace mpi = boost::mpi;
-#endif
-#include <boost/math/special_functions/fpclassify.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-#include <Eigen/Dense>
-
-#include "corpus_tools.h"
-#include "optimize.h"
-#include "array2d.h"
-#include "m.h"
-#include "lattice.h"
-#include "stringlib.h"
-#include "filelib.h"
-#include "tdict.h"
-
-namespace po = boost::program_options;
-using namespace std;
-
-#define kDIMENSIONS 10
-typedef Eigen::Matrix<double, kDIMENSIONS, 1> RVector;
-typedef Eigen::Matrix<double, 1, kDIMENSIONS> RTVector;
-typedef Eigen::Matrix<double, kDIMENSIONS, kDIMENSIONS> TMatrix;
-vector<RVector> r_src, r_trg;
-
-#if HAVE_MPI
-namespace boost {
-namespace serialization {
-
-template<class Archive>
-void serialize(Archive & ar, RVector & v, const unsigned int version) {
-  for (unsigned i = 0; i < kDIMENSIONS; ++i)
-    ar & v[i];
-}
-
-} // namespace serialization
-} // namespace boost
-#endif
-
-bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("input,i",po::value<string>(),"Input file")
-        ("iterations,I",po::value<unsigned>()->default_value(1000),"Number of iterations of training")
-        ("regularization_strength,C",po::value<double>()->default_value(0.1),"L2 regularization strength (0 for no regularization)")
-        ("eta", po::value<double>()->default_value(0.1f), "Eta for SGD")
-        ("source_embeddings,f", po::value<string>(), "File containing source embeddings (if unset, random vectors will be used)")
-        ("target_embeddings,e", po::value<string>(), "File containing target embeddings (if unset, random vectors will be used)")
-        ("random_seed,s", po::value<unsigned>(), "Random seed")
-        ("diagonal_tension,T", po::value<double>()->default_value(4.0), "How sharp or flat around the diagonal is the alignment distribution (0 = uniform, >0 sharpens)")
-        ("testset,x", po::value<string>(), "After training completes, compute the log likelihood of this set of sentence pairs under the learned model");
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (argc < 2 || conf->count("help")) {
-    cerr << "Usage " << argv[0] << " [OPTIONS] -i corpus.fr-en\n";
-    cerr << dcmdline_options << endl;
-    return false;
-  }
-  return true;
-}
-
-void Normalize(RVector* v) {
-  double norm = v->norm();
-  assert(norm > 0.0f);
-  *v /= norm;
-}
-
-void Flatten(const TMatrix& m, vector<double>* v) {
-  unsigned c = 0;
-  v->resize(kDIMENSIONS * kDIMENSIONS);
-  for (unsigned i = 0; i < kDIMENSIONS; ++i)
-    for (unsigned j = 0; j < kDIMENSIONS; ++j) {
-      assert(boost::math::isfinite(m(i, j)));
-      (*v)[c++] = m(i,j);
-    }
-}
-
-void Unflatten(const vector<double>& v, TMatrix* m) {
-  unsigned c = 0;
-  for (unsigned i = 0; i < kDIMENSIONS; ++i)
-    for (unsigned j = 0; j < kDIMENSIONS; ++j) {
-      assert(boost::math::isfinite(v[c]));
-      (*m)(i, j) = v[c++];
-    }
-}
-
-double ApplyRegularization(const double C,
-                           const vector<double>& weights,
-                           vector<double>* g) {
-  assert(weights.size() == g->size());
-  double reg = 0;
-  for (size_t i = 0; i < weights.size(); ++i) {
-    const double& w_i = weights[i];
-    double& g_i = (*g)[i];
-    reg += C * w_i * w_i;
-    g_i += 2 * C * w_i;
-  }
-  return reg;
-}
-
-void LoadEmbeddings(const string& filename, vector<RVector>* pv) {
-  vector<RVector>& v = *pv;
-  cerr << "Reading embeddings from " << filename << " ...\n";
-  ReadFile rf(filename);
-  istream& in = *rf.stream();
-  string line;
-  unsigned lc = 0;
-  while(getline(in, line)) {
-    ++lc;
-    size_t cur = line.find(' ');
-    if (cur == string::npos || cur == 0) {
-      cerr << "Parse error reading line " << lc << ":\n" << line << endl;
-      abort();
-    }
-    WordID w = TD::Convert(line.substr(0, cur));
-    if (w >= v.size()) continue;
-    RVector& curv = v[w];
-    line[cur] = 0;
-    size_t start = cur + 1;
-    cur = start + 1;
-    size_t c = 0;
-    while(cur < line.size()) {
-      if (line[cur] == ' ') {
-        line[cur] = 0;
-        curv[c++] = strtod(&line[start], NULL);
-        start = cur + 1;
-        cur = start;
-        if (c == kDIMENSIONS) break;
-      }
-      ++cur;
-    }
-    if (c < kDIMENSIONS && cur != start) {
-      if (cur < line.size()) line[cur] = 0;
-      curv[c++] = strtod(&line[start], NULL);
-    }
-    if (c != kDIMENSIONS) {
-      static bool first = true;
-      if (first) {
-        cerr << " read " << c << " dimensions from embedding file, but built with " << kDIMENSIONS << " (filling in with random values)\n";
-        first = false;
-      }
-      for (; c < kDIMENSIONS; ++c) curv[c] = rand();
-    }
-    if (c == kDIMENSIONS && cur != line.size()) {
-      static bool first = true;
-      if (first) {
-        cerr << " embedding file contains more dimensions than configured with, truncating.\n";
-        first = false;
-      }
-    }
-  }
-}
-
-int main(int argc, char** argv) {
-#ifdef HAVE_MPI
-  std::cerr << "**MPI enabled.\n";
-  mpi::environment env(argc, argv);
-  mpi::communicator world;
-  const int size = world.size(); 
-  const int rank = world.rank();
-#else
-  std::cerr << "**MPI disabled.\n";
-  const int rank = 0;
-  const int size = 1;
-#endif
-  po::variables_map conf;
-  if (!InitCommandLine(argc, argv, &conf)) return 1;
-  const string fname = conf["input"].as<string>();
-  const double reg_strength = conf["regularization_strength"].as<double>();
-  const bool has_l2 = reg_strength;
-  assert(reg_strength >= 0.0f);
-  const int ITERATIONS = conf["iterations"].as<unsigned>();
-  const double eta = conf["eta"].as<double>();
-  const double diagonal_tension = conf["diagonal_tension"].as<double>();
-  bool SGD = false;
-  if (diagonal_tension < 0.0) {
-    cerr << "Invalid value for diagonal_tension: must be >= 0\n";
-    return 1;
-  }
-  string testset;
-  if (conf.count("testset")) testset = conf["testset"].as<string>();
-
-  unsigned lc = 0;
-  vector<double> unnormed_a_i;
-  bool flag = false;
-  vector<vector<WordID> > srcs, trgs;
-  vector<WordID> vocab_e;
-  {
-    set<WordID> svocab_e, svocab_f;
-    CorpusTools::ReadFromFile(fname, &srcs, NULL, &trgs, &svocab_e, rank, size);
-    copy(svocab_e.begin(), svocab_e.end(), back_inserter(vocab_e));
-  }
-  cerr << "Number of target word types: " << vocab_e.size() << endl;
-  const double num_examples = lc;
-
-  boost::shared_ptr<LBFGSOptimizer> lbfgs;
-  if (rank == 0)
-    lbfgs.reset(new LBFGSOptimizer(kDIMENSIONS * kDIMENSIONS, 100));
-  r_trg.resize(TD::NumWords() + 1);
-  r_src.resize(TD::NumWords() + 1);
-  vector<set<unsigned> > trg_pos(TD::NumWords() + 1);
-
-  if (conf.count("random_seed")) {
-    srand(conf["random_seed"].as<unsigned>());
-  } else {
-    unsigned seed = time(NULL) + rank * 100;
-    cerr << "Random seed: " << seed << endl;
-    srand(seed);
-  }
-  
-  TMatrix t = TMatrix::Zero();
-  if (rank == 0) {
-    t = TMatrix::Random() / 50.0;
-    for (unsigned i = 1; i < r_trg.size(); ++i) {
-      r_trg[i] = RVector::Random();
-      r_src[i] = RVector::Random();
-    }
-    if (conf.count("source_embeddings"))
-      LoadEmbeddings(conf["source_embeddings"].as<string>(), &r_src);
-    if (conf.count("target_embeddings"))
-      LoadEmbeddings(conf["target_embeddings"].as<string>(), &r_trg);
-  }
-
-  // do optimization
-  TMatrix g = TMatrix::Zero();
-  vector<TMatrix> exp_src;
-  vector<double> z_src;
-  vector<double> flat_g, flat_t, rcv_grad;
-  Flatten(t, &flat_t);
-  bool converged = false;
-#if HAVE_MPI
-  mpi::broadcast(world, &flat_t[0], flat_t.size(), 0);
-  mpi::broadcast(world, r_trg, 0);
-  mpi::broadcast(world, r_src, 0);
-#endif
-  cerr << "rank=" << rank << ": " << r_trg[0][4] << endl;
-  for (int iter = 0; !converged && iter < ITERATIONS; ++iter) {
-    if (rank == 0) cerr << "ITERATION " << (iter + 1) << endl;
-    Unflatten(flat_t, &t);
-    double likelihood = 0;
-    double denom = 0.0;
-    lc = 0;
-    flag = false;
-    g *= 0;
-    for (unsigned i = 0; i < srcs.size(); ++i) {
-      const vector<WordID>& src = srcs[i];
-      const vector<WordID>& trg = trgs[i];
-      ++lc;
-      if (rank == 0 && lc % 1000 == 0) { cerr << '.'; flag = true; }
-      if (rank == 0 && lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; }
-      denom += trg.size();
-
-      exp_src.clear(); exp_src.resize(src.size(), TMatrix::Zero());
-      z_src.clear(); z_src.resize(src.size(), 0.0);
-      Array2D<TMatrix> exp_refs(src.size(), trg.size(), TMatrix::Zero());
-      Array2D<double> z_refs(src.size(), trg.size(), 0.0);
-      for (unsigned j = 0; j < trg.size(); ++j)
-        trg_pos[trg[j]].insert(j);
-
-      for (unsigned i = 0; i < src.size(); ++i) {
-        const RVector& r_s = r_src[src[i]];
-        const RTVector pred = r_s.transpose() * t;
-        TMatrix& exp_m = exp_src[i];
-        double& z = z_src[i];
-        for (unsigned k = 0; k < vocab_e.size(); ++k) {
-          const WordID v_k = vocab_e[k];
-          const RVector& r_t = r_trg[v_k];
-          const double dot_prod = pred * r_t;
-          const double u = exp(dot_prod);
-          z += u;
-          const TMatrix v = r_s * r_t.transpose() * u;
-          exp_m += v;
-          set<unsigned>& ref_locs = trg_pos[v_k];
-          if (!ref_locs.empty()) {
-            for (set<unsigned>::iterator it = ref_locs.begin(); it != ref_locs.end(); ++it) {
-              TMatrix& exp_ref_ij = exp_refs(i, *it);
-              double& z_ref_ij = z_refs(i, *it);
-              z_ref_ij += u;
-              exp_ref_ij += v;
-            }
-          }
-        }
-      }
-      for (unsigned j = 0; j < trg.size(); ++j)
-        trg_pos[trg[j]].clear();
-
-      // model expectations for a single target generation with
-      // uniform alignment prior
-      // TODO: when using a non-uniform alignment, m_exp will be
-      // a function of j (below)
-      double m_z = 0;
-      TMatrix m_exp = TMatrix::Zero();
-      for (unsigned i = 0; i < src.size(); ++i) {
-        m_exp += exp_src[i];
-        m_z += z_src[i];
-      }
-      m_exp /= m_z;
-
-      Array2D<bool> al(src.size(), trg.size(), false);
-      for (unsigned j = 0; j < trg.size(); ++j) {
-        double ref_z = 0;
-        TMatrix ref_exp = TMatrix::Zero();
-        int max_i = 0;
-        double max_s = -9999999;
-        for (unsigned i = 0; i < src.size(); ++i) {
-          ref_exp += exp_refs(i, j);
-          ref_z += z_refs(i, j);
-          if (log(z_refs(i, j)) > max_s) {
-            max_s = log(z_refs(i, j));
-            max_i = i;
-          }
-          // TODO handle alignment prob
-        }
-        if (ref_z <= 0) { 
-          cerr << "TRG=" << TD::Convert(trg[j]) << endl;
-          cerr << " LINE=" << lc << " (RANK=" << rank << "/" << size << ")" << endl;
-          cerr << " REF_EXP=\n" << ref_exp << endl;
-          cerr << " M_EXP=\n" << m_exp << endl;
-          abort();
-        }
-        al(max_i, j) = true;
-        ref_exp /= ref_z;
-        g += m_exp - ref_exp;
-        likelihood += log(ref_z) - log(m_z);
-        if (SGD) {
-          t -= g * eta / num_examples;
-          g *= 0;
-        }
-      }
-      
-      if (rank == 0 && (iter == (ITERATIONS - 1) || lc < 12)) { cerr << al << endl; }
-    }
-    if (flag && rank == 0) { cerr << endl; }
-
-    double obj = 0;
-    if (!SGD) {
-      Flatten(g, &flat_g);
-      obj = -likelihood;
-#if HAVE_MPI
-      rcv_grad.resize(flat_g.size(), 0.0);
-      mpi::reduce(world, &flat_g[0], flat_g.size(), &rcv_grad[0], plus<double>(), 0);
-      swap(flat_g, rcv_grad);
-      rcv_grad.clear();
-
-      double to = 0;
-      mpi::reduce(world, obj, to, plus<double>(), 0);
-      obj = to;
-      double tlh = 0;
-      mpi::reduce(world, likelihood, tlh, plus<double>(), 0);
-      likelihood = tlh;
-      double td = 0;
-      mpi::reduce(world, denom, td, plus<double>(), 0);
-      denom = td;
-#endif
-    }
-
-    if (rank == 0) {
-      double gn = 0;
-      for (unsigned i = 0; i < flat_g.size(); ++i)
-        gn += flat_g[i]*flat_g[i];
-      const double base2_likelihood = likelihood / log(2);
-      cerr << "  log_e likelihood: " << likelihood << endl;
-      cerr << "  log_2 likelihood: " << base2_likelihood << endl;
-      cerr << "     cross entropy: " << (-base2_likelihood / denom) << endl;
-      cerr << "        perplexity: " << pow(2.0, -base2_likelihood / denom) << endl;
-      cerr << "     gradient norm: " << sqrt(gn) << endl;
-      if (!SGD) {
-        if (has_l2) {
-          const double r = ApplyRegularization(reg_strength,
-                                               flat_t,
-                                               &flat_g);
-          obj += r;
-          cerr << "    regularization: " << r << endl;
-        }
-        lbfgs->Optimize(obj, flat_g, &flat_t);
-        converged = (lbfgs->HasConverged());
-      }
-    }
-#ifdef HAVE_MPI
-    mpi::broadcast(world, &flat_t[0], flat_t.size(), 0);
-    mpi::broadcast(world, converged, 0);
-#endif
-  }
-  if (rank == 0)
-    cerr << "TRANSLATION MATRIX:" << endl << t << endl;
-  return 0;
-}
-
-#endif
-
diff --git a/training/liblbfgs/Jamfile b/training/liblbfgs/Jamfile
deleted file mode 100644
index 49c82748..00000000
--- a/training/liblbfgs/Jamfile
+++ /dev/null
@@ -1,5 +0,0 @@
-import testing ;
-
-lib liblbfgs : lbfgs.c : <include>.. ;
-
-unit-test ll_test : ll_test.cc liblbfgs : <include>.. ;
diff --git a/training/liblbfgs/Makefile.am b/training/liblbfgs/Makefile.am
index 64a3794d..272d6f56 100644
--- a/training/liblbfgs/Makefile.am
+++ b/training/liblbfgs/Makefile.am
@@ -6,10 +6,17 @@ ll_test_LDADD = liblbfgs.a -lz
 
 noinst_LIBRARIES = liblbfgs.a
 
-liblbfgs_a_SOURCES = lbfgs.c
+liblbfgs_a_SOURCES = \
+  lbfgs.c \
+  arithmetic_ansi.h \
+  arithmetic_sse_double.h \
+  arithmetic_sse_float.h \
+  lbfgs++.h \
+  lbfgs.h
 
 ################################################################
 # do NOT NOT NOT add any other -I includes NO NO NO NO NO ######
 AM_LDFLAGS = liblbfgs.a -lz
-AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. -I..
+AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I$(top_srcdir)/training -I$(top_srcdir)/training/liblbfgs
 ################################################################
+
diff --git a/training/minrisk/Makefile.am b/training/minrisk/Makefile.am
new file mode 100644
index 00000000..ebf6fa91
--- /dev/null
+++ b/training/minrisk/Makefile.am
@@ -0,0 +1,8 @@
+bin_PROGRAMS = minrisk_optimize
+
+minrisk_optimize_SOURCES = minrisk_optimize.cc
+minrisk_optimize_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a ../../training/liblbfgs/liblbfgs.a
+
+EXTRA_DIST = minrisk.pl minrisk_generate_input.pl
+
+AM_CPPFLAGS = -W -Wall -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training -I$(top_srcdir)/training/utils
diff --git a/training/minrisk/minrisk.pl b/training/minrisk/minrisk.pl
new file mode 100755
index 00000000..0f8bacd0
--- /dev/null
+++ b/training/minrisk/minrisk.pl
@@ -0,0 +1,540 @@
+#!/usr/bin/env perl
+use strict;
+my @ORIG_ARGV=@ARGV;
+use Cwd qw(getcwd);
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment", "$SCRIPT_DIR/../utils"; }
+
+# Skip local config (used for distributing jobs) if we're running in local-only mode
+use LocalConfig;
+use Getopt::Long;
+use IPC::Open2;
+use POSIX ":sys_wait_h";
+my $QSUB_CMD = qsub_args(mert_memory());
+my $default_jobs = env_default_jobs();
+
+my $UTILS_DIR="$SCRIPT_DIR/../utils";
+require "$UTILS_DIR/libcall.pl";
+
+# Default settings
+my $srcFile;
+my $refFiles;
+my $bin_dir = $SCRIPT_DIR;
+die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
+my $FAST_SCORE="$bin_dir/../../mteval/fast_score";
+die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
+my $MAPINPUT = "$bin_dir/minrisk_generate_input.pl";
+my $MAPPER = "$bin_dir/minrisk_optimize";
+my $parallelize = "$UTILS_DIR/parallelize.pl";
+my $libcall = "$UTILS_DIR/libcall.pl";
+my $sentserver = "$UTILS_DIR/sentserver";
+my $sentclient = "$UTILS_DIR/sentclient";
+my $LocalConfig = "$SCRIPT_DIR/../../environment/LocalConfig.pm";
+
+my $SCORER = $FAST_SCORE;
+die "Can't find $MAPPER" unless -x $MAPPER;
+my $cdec = "$bin_dir/../../decoder/cdec";
+die "Can't find decoder in $cdec" unless -x $cdec;
+die "Can't find $parallelize" unless -x $parallelize;
+die "Can't find $libcall" unless -e $libcall;
+my $decoder = $cdec;
+my $lines_per_mapper = 30;
+my $iteration = 1;
+my $best_weights;
+my $psi = 1;
+my $default_max_iter = 30;
+my $max_iterations = $default_max_iter;
+my $jobs = $default_jobs;   # number of decode nodes
+my $pmem = "4g";
+my $disable_clean = 0;
+my %seen_weights;
+my $help = 0;
+my $epsilon = 0.0001;
+my $dryrun = 0;
+my $last_score = -10000000;
+my $metric = "ibm_bleu";
+my $dir;
+my $iniFile;
+my $weights;
+my $use_make = 1;  # use make to parallelize
+my $useqsub = 0;
+my $initial_weights;
+my $pass_suffix = '';
+my $cpbin=1;
+
+# regularization strength
+my $tune_regularizer = 0;
+my $reg = 500;
+my $reg_previous = 5000;
+my $dont_accum = 0;
+
+# Process command-line options
+Getopt::Long::Configure("no_auto_abbrev");
+if (GetOptions(
+	"jobs=i" => \$jobs,
+	"dont-clean" => \$disable_clean,
+	"dont-accumulate" => \$dont_accum,
+	"pass-suffix=s" => \$pass_suffix,
+        "qsub" => \$useqsub,
+	"dry-run" => \$dryrun,
+	"epsilon=s" => \$epsilon,
+	"help" => \$help,
+        "weights=s" => \$initial_weights,
+	"reg=f" => \$reg,
+	"use-make=i" => \$use_make,
+	"max-iterations=i" => \$max_iterations,
+	"pmem=s" => \$pmem,
+        "cpbin!" => \$cpbin,
+	"ref-files=s" => \$refFiles,
+	"metric=s" => \$metric,
+	"source-file=s" => \$srcFile,
+	"workdir=s" => \$dir,
+) == 0 || @ARGV!=1 || $help) {
+	print_help();
+	exit;
+}
+
+die "--tune-regularizer is no longer supported with --reg-previous and --reg. Please tune manually.\n" if $tune_regularizer;
+
+if ($useqsub) {
+  $use_make = 0;
+  die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub();
+}
+
+my @missing_args = ();
+if (!defined $srcFile) { push @missing_args, "--source-file"; }
+if (!defined $refFiles) { push @missing_args, "--ref-files"; }
+if (!defined $initial_weights) { push @missing_args, "--weights"; }
+die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args);
+
+if ($metric =~ /^(combi|ter)$/i) {
+  $lines_per_mapper = 5;
+}
+
+($iniFile) = @ARGV;
+
+
+sub write_config;
+sub enseg;
+sub print_help;
+
+my $nodelist;
+my $host =check_output("hostname"); chomp $host;
+my $bleu;
+my $interval_count = 0;
+my $logfile;
+my $projected_score;
+
+# used in sorting scores
+my $DIR_FLAG = '-r';
+if ($metric =~ /^ter$|^aer$/i) {
+  $DIR_FLAG = '';
+}
+
+my $refs_comma_sep = get_comma_sep_refs('r',$refFiles);
+
+unless ($dir){
+	$dir = "minrisk";
+}
+unless ($dir =~ /^\//){  # convert relative path to absolute path
+	my $basedir = check_output("pwd");
+	chomp $basedir;
+	$dir = "$basedir/$dir";
+}
+
+
+# Initializations and helper functions
+srand;
+
+my @childpids = ();
+my @cleanupcmds = ();
+
+sub cleanup {
+	print STDERR "Cleanup...\n";
+	for my $pid (@childpids){ unchecked_call("kill $pid"); }
+	for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); }
+	exit 1;
+};
+# Always call cleanup, no matter how we exit
+*CORE::GLOBAL::exit = 
+    sub{ cleanup(); }; 
+$SIG{INT} = "cleanup";
+$SIG{TERM} = "cleanup";
+$SIG{HUP} = "cleanup";
+
+my $decoderBase = check_output("basename $decoder"); chomp $decoderBase;
+my $newIniFile = "$dir/$decoderBase.ini";
+my $inputFileName = "$dir/input";
+my $user = $ENV{"USER"};
+# process ini file
+-e $iniFile || die "Error: could not open $iniFile for reading\n";
+open(INI, $iniFile);
+
+use File::Basename qw(basename);
+#pass bindir, refs to vars holding bin
+sub modbin {
+    local $_;
+    my $bindir=shift;
+    check_call("mkdir -p $bindir");
+    -d $bindir || die "couldn't make bindir $bindir";
+    for (@_) {
+        my $src=$$_;
+        $$_="$bindir/".basename($src);
+        check_call("cp -p $src $$_");
+    }
+}
+sub dirsize {
+    opendir ISEMPTY,$_[0];
+    return scalar(readdir(ISEMPTY))-1;
+}
+my @allweights;
+if ($dryrun){
+	write_config(*STDERR);
+	exit 0;
+} else {
+	if (-e $dir && dirsize($dir)>1 && -e "$dir/hgs" ){ # allow preexisting logfile, binaries, but not dist-pro.pl outputs
+	  die "ERROR: working dir $dir already exists\n\n";
+	} else {
+		-e $dir || mkdir $dir;
+		mkdir "$dir/hgs";
+        modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$parallelize,\$sentserver,\$sentclient,\$libcall) if $cpbin;
+    mkdir "$dir/scripts";
+        my $cmdfile="$dir/rerun-pro.sh";
+        open CMD,'>',$cmdfile;
+        print CMD "cd ",&getcwd,"\n";
+#        print CMD &escaped_cmdline,"\n"; #buggy - last arg is quoted.
+        my $cline=&cmdline."\n";
+        print CMD $cline;
+        close CMD;
+        print STDERR $cline;
+        chmod(0755,$cmdfile);
+	check_call("cp $initial_weights $dir/weights.0");
+	die "Can't find weights.0" unless (-e "$dir/weights.0");
+	}
+	write_config(*STDERR);
+}
+
+
+# Generate initial files and values
+check_call("cp $iniFile $newIniFile");
+$iniFile = $newIniFile;
+
+my $newsrc = "$dir/dev.input";
+enseg($srcFile, $newsrc);
+$srcFile = $newsrc;
+my $devSize = 0;
+open F, "<$srcFile" or die "Can't read $srcFile: $!";
+while(<F>) { $devSize++; }
+close F;
+
+unless($best_weights){ $best_weights = $weights; }
+unless($projected_score){ $projected_score = 0.0; }
+$seen_weights{$weights} = 1;
+my $kbest = "$dir/kbest";
+if ($dont_accum) {
+  $kbest = '';
+} else {
+  check_call("mkdir -p $kbest");
+  $kbest = "--kbest_repository $kbest";
+}
+
+my $random_seed = int(time / 1000);
+my $lastWeightsFile;
+my $lastPScore = 0;
+# main optimization loop
+while (1){
+	print STDERR "\n\nITERATION $iteration\n==========\n";
+
+	if ($iteration > $max_iterations){
+		print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n";
+		last;
+	}
+	# iteration-specific files
+	my $runFile="$dir/run.raw.$iteration";
+	my $onebestFile="$dir/1best.$iteration";
+	my $logdir="$dir/logs.$iteration";
+	my $decoderLog="$logdir/decoder.sentserver.log.$iteration";
+	my $scorerLog="$logdir/scorer.log.$iteration";
+	check_call("mkdir -p $logdir");
+
+
+	#decode
+	print STDERR "RUNNING DECODER AT ";
+	print STDERR unchecked_output("date");
+	my $im1 = $iteration - 1;
+	my $weightsFile="$dir/weights.$im1";
+        push @allweights, "-w $dir/weights.$im1";
+        `rm -f $dir/hgs/*.gz`;
+	my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs";
+	my $pcmd;
+	if ($use_make) {
+		$pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --";
+	} else {
+		$pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --";
+	}
+	my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile";
+	print STDERR "COMMAND:\n$cmd\n";
+	check_bash_call($cmd);
+        my $num_hgs;
+        my $num_topbest;
+        my $retries = 0;
+	while($retries < 5) {
+	    $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l");
+	    $num_topbest = check_output("wc -l < $runFile");
+	    print STDERR "NUMBER OF HGs: $num_hgs\n";
+	    print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n";
+	    if($devSize == $num_hgs && $devSize == $num_topbest) {
+		last;
+	    } else {
+		print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n";
+		sleep(3);
+	    }
+	    $retries++;
+	}
+	die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest);
+	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric");
+	chomp $dec_score;
+	print STDERR "DECODER SCORE: $dec_score\n";
+
+	# save space
+	check_call("gzip -f $runFile");
+	check_call("gzip -f $decoderLog");
+
+	# run optimizer
+	print STDERR "RUNNING OPTIMIZER AT ";
+	print STDERR unchecked_output("date");
+	print STDERR " - GENERATE TRAINING EXEMPLARS\n";
+	my $mergeLog="$logdir/prune-merge.log.$iteration";
+
+	my $score = 0;
+	my $icc = 0;
+	my $inweights="$dir/weights.$im1";
+	my $outweights="$dir/weights.$iteration";
+	$cmd="$MAPINPUT $dir/hgs > $dir/agenda.$im1";
+	print STDERR "COMMAND:\n$cmd\n";
+	check_call($cmd);
+	$cmd="$MAPPER $refs_comma_sep -m $metric -i $dir/agenda.$im1 $kbest -w $inweights > $outweights";
+	check_call($cmd);
+	$lastWeightsFile = $outweights;
+	$iteration++;
+	`rm hgs/*.gz`;
+	print STDERR "\n==========\n";
+}
+
+print STDERR "\nFINAL WEIGHTS: $lastWeightsFile\n(Use -w <this file> with the decoder)\n\n";
+
+print STDOUT "$lastWeightsFile\n";
+
+exit 0;
+
+sub get_lines {
+  my $fn = shift @_;
+  open FL, "<$fn" or die "Couldn't read $fn: $!";
+  my $lc = 0;
+  while(<FL>) { $lc++; }
+  return $lc;
+}
+
+sub get_comma_sep_refs {
+  my ($r,$p) = @_;
+  my $o = check_output("echo $p");
+  chomp $o;
+  my @files = split /\s+/, $o;
+  return "-$r " . join(" -$r ", @files);
+}
+
+sub read_weights_file {
+  my ($file) = @_;
+  open F, "<$file" or die "Couldn't read $file: $!";
+  my @r = ();
+  my $pm = -1;
+  while(<F>) {
+    next if /^#/;
+    next if /^\s*$/;
+    chomp;
+    if (/^(.+)\s+(.+)$/) {
+      my $m = $1;
+      my $w = $2;
+      die "Weights out of order: $m <= $pm" unless $m > $pm;
+      push @r, $w;
+    } else {
+      warn "Unexpected feature name in weight file: $_";
+    }
+  }
+  close F;
+  return join ' ', @r;
+}
+
+# subs
+sub write_config {
+	my $fh = shift;
+	my $cleanup = "yes";
+	if ($disable_clean) {$cleanup = "no";}
+
+	print $fh "\n";
+	print $fh "DECODER:          $decoder\n";
+	print $fh "INI FILE:         $iniFile\n";
+	print $fh "WORKING DIR:      $dir\n";
+	print $fh "SOURCE (DEV):     $srcFile\n";
+	print $fh "REFS (DEV):       $refFiles\n";
+	print $fh "EVAL METRIC:      $metric\n";
+	print $fh "MAX ITERATIONS:   $max_iterations\n";
+	print $fh "JOBS:             $jobs\n";
+	print $fh "HEAD NODE:        $host\n";
+	print $fh "PMEM (DECODING):  $pmem\n";
+	print $fh "CLEANUP:          $cleanup\n";
+}
+
+sub update_weights_file {
+  my ($neww, $rfn, $rpts) = @_;
+  my @feats = @$rfn;
+  my @pts = @$rpts;
+  my $num_feats = scalar @feats;
+  my $num_pts = scalar @pts;
+  die "$num_feats (num_feats) != $num_pts (num_pts)" unless $num_feats == $num_pts;
+  open G, ">$neww" or die;
+  for (my $i = 0; $i < $num_feats; $i++) {
+    my $f = $feats[$i];
+    my $lambda = $pts[$i];
+    print G "$f $lambda\n";
+  }
+  close G;
+}
+
+sub enseg {
+	my $src = shift;
+	my $newsrc = shift;
+	open(SRC, $src);
+	open(NEWSRC, ">$newsrc");
+	my $i=0;
+	while (my $line=<SRC>){
+		chomp $line;
+		if ($line =~ /^\s*<seg/i) {
+		    if($line =~ /id="[0-9]+"/) {
+			print NEWSRC "$line\n";
+		    } else {
+			die "When using segments with pre-generated <seg> tags, you must include a zero-based id attribute";
+		    }
+		} else {
+			print NEWSRC "<seg id=\"$i\">$line</seg>\n";
+		}
+		$i++;
+	}
+	close SRC;
+	close NEWSRC;
+	die "Empty dev set!" if ($i == 0);
+}
+
+sub print_help {
+
+	my $executable = check_output("basename $0"); chomp $executable;
+	print << "Help";
+
+Usage: $executable [options] <ini file>
+
+	$executable [options] <ini file>
+		Runs a complete PRO optimization using the ini file specified.
+
+Required:
+
+	--ref-files <files>
+		Dev set ref files.  This option takes only a single string argument.
+		To use multiple files (including file globbing), this argument should
+		be quoted.
+
+	--source-file <file>
+		Dev set source file.
+
+	--weights <file>
+		Initial weights file (use empty file to start from 0)
+
+General options:
+
+	--help
+		Print this message and exit.
+
+	--dont-accumulate
+		Don't accumulate k-best lists from multiple iterations.
+
+	--max-iterations <M>
+		Maximum number of iterations to run.  If not specified, defaults
+		to $default_max_iter.
+
+	--metric <method>
+		Metric to optimize.
+		Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi
+
+	--pass-suffix <S>
+		If the decoder is doing multi-pass decoding, the pass suffix "2",
+		"3", etc., is used to control what iteration of weights is set.
+
+	--workdir <dir>
+		Directory for intermediate and output files.  If not specified, the
+		name is derived from the ini filename.  Assuming that the ini
+		filename begins with the decoder name and ends with ini, the default
+		name of the working directory is inferred from the middle part of
+		the filename.  E.g. an ini file named decoder.foo.ini would have
+		a default working directory name foo.
+
+Regularization options:
+
+	--reg <F>
+		l2 regularization strength [default=500]. The greater this value,
+		the closer to zero the weights will be.
+
+Job control options:
+
+	--jobs <I>
+		Number of decoder processes to run in parallel. [default=$default_jobs]
+
+	--qsub
+		Use qsub to run jobs in parallel (qsub must be configured in
+		environment/LocalEnvironment.pm)
+
+	--pmem <N>
+		Amount of physical memory requested for parallel decoding jobs
+		(used with qsub requests only)
+
+Help
+}
+
+sub convert {
+  my ($str) = @_;
+  my @ps = split /;/, $str;
+  my %dict = ();
+  for my $p (@ps) {
+    my ($k, $v) = split /=/, $p;
+    $dict{$k} = $v;
+  }
+  return %dict;
+}
+
+
+sub cmdline {
+    return join ' ',($0,@ORIG_ARGV);
+}
+
+#buggy: last arg gets quoted sometimes?
+my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]};
+my $shell_escape_in_quote=qr{[\\"\$`!]};
+
+sub escape_shell {
+    my ($arg)=@_;
+    return undef unless defined $arg;
+    if ($arg =~ /$is_shell_special/) {
+        $arg =~ s/($shell_escape_in_quote)/\\$1/g;
+        return "\"$arg\"";
+    }
+    return $arg;
+}
+
+sub escaped_shell_args {
+    return map {local $_=$_;chomp;escape_shell($_)} @_;
+}
+
+sub escaped_shell_args_str {
+    return join ' ',&escaped_shell_args(@_);
+}
+
+sub escaped_cmdline {
+    return "$0 ".&escaped_shell_args_str(@ORIG_ARGV);
+}
diff --git a/training/minrisk/minrisk_generate_input.pl b/training/minrisk/minrisk_generate_input.pl
new file mode 100755
index 00000000..b30fc4fd
--- /dev/null
+++ b/training/minrisk/minrisk_generate_input.pl
@@ -0,0 +1,18 @@
+#!/usr/bin/perl -w
+use strict;
+
+die "Usage: $0 HG_DIR\n" unless scalar @ARGV == 1;
+my $d = shift @ARGV;
+die "Can't find directory $d" unless -d $d;
+
+opendir(DIR, $d) or die "Can't read $d: $!";
+my @hgs = grep { /\.gz$/ } readdir(DIR);
+closedir DIR;
+
+for my $hg (@hgs) {
+  my $file = $hg;
+  my $id = $hg;
+  $id =~ s/(\.json)?\.gz//;
+  print "$d/$file $id\n";
+}
+
diff --git a/training/minrisk/minrisk_optimize.cc b/training/minrisk/minrisk_optimize.cc
new file mode 100644
index 00000000..da8b5260
--- /dev/null
+++ b/training/minrisk/minrisk_optimize.cc
@@ -0,0 +1,197 @@
+#include <sstream>
+#include <iostream>
+#include <vector>
+#include <limits>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "liblbfgs/lbfgs++.h"
+#include "filelib.h"
+#include "stringlib.h"
+#include "weights.h"
+#include "hg_io.h"
+#include "kbest.h"
+#include "viterbi.h"
+#include "ns.h"
+#include "ns_docscorer.h"
+#include "candidate_set.h"
+#include "risk.h"
+#include "entropy.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation (tokenized text)")
+        ("weights,w",po::value<string>(), "[REQD] Weights files from current iterations")
+        ("input,i",po::value<string>()->default_value("-"), "Input file to map (- is STDIN)")
+        ("evaluation_metric,m",po::value<string>()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)")
+        ("temperature,T",po::value<double>()->default_value(0.0), "Temperature parameter for objective (>0 increases the entropy)")
+        ("l1_strength,C",po::value<double>()->default_value(0.0), "L1 regularization strength")
+        ("memory_buffers,M",po::value<unsigned>()->default_value(20), "Memory buffers used in LBFGS")
+        ("kbest_repository,R",po::value<string>(), "Accumulate k-best lists from previous iterations (parameter is path to repository)")
+        ("kbest_size,k",po::value<unsigned>()->default_value(500u), "Top k-hypotheses to extract")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = false;
+  if (!conf->count("reference")) {
+    cerr << "Please specify one or more references using -r <REF.TXT>\n";
+    flag = true;
+  }
+  if (!conf->count("weights")) {
+    cerr << "Please specify weights using -w <WEIGHTS.TXT>\n";
+    flag = true;
+  }
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+EvaluationMetric* metric = NULL;
+
+struct RiskObjective {
+  explicit RiskObjective(const vector<training::CandidateSet>& tr, const double temp) : training(tr), T(temp) {}
+  double operator()(const vector<double>& x, double* g) const {
+    fill(g, g + x.size(), 0.0);
+    double obj = 0;
+    double h = 0;
+    for (unsigned i = 0; i < training.size(); ++i) {
+      training::CandidateSetRisk risk(training[i], *metric);
+      training::CandidateSetEntropy entropy(training[i]);
+      SparseVector<double> tg, hg;
+      double r = risk(x, &tg);
+      double hh = entropy(x, &hg);
+      h += hh;
+      obj += r;
+      for (SparseVector<double>::iterator it = tg.begin(); it != tg.end(); ++it)
+        g[it->first] += it->second;
+      if (T) {
+        for (SparseVector<double>::iterator it = hg.begin(); it != hg.end(); ++it)
+          g[it->first] += T * it->second;
+      }
+    }
+    cerr << (1-(obj / training.size())) << "  H=" << h << endl;
+    return obj - T * h;
+  }
+  const vector<training::CandidateSet>& training;
+  const double T; // temperature for entropy regularization
+};  
+
+double LearnParameters(const vector<training::CandidateSet>& training,
+                       const double temp, // > 0 increases the entropy, < 0 decreases the entropy
+                       const double C1,
+                       const unsigned memory_buffers,
+                       vector<weight_t>* px) {
+  RiskObjective obj(training, temp);
+  LBFGS<RiskObjective> lbfgs(px, obj, memory_buffers, C1);
+  lbfgs.MinimizeFunction();
+  return 0;
+}
+
+#if 0
+struct FooLoss {
+  double operator()(const vector<double>& x, double* g) const {
+    fill(g, g + x.size(), 0.0);
+    training::CandidateSet cs;
+    training::CandidateSetEntropy cse(cs);
+    cs.cs.resize(3);
+    cs.cs[0].fmap.set_value(FD::Convert("F1"), -1.0);
+    cs.cs[1].fmap.set_value(FD::Convert("F2"), 1.0);
+    cs.cs[2].fmap.set_value(FD::Convert("F1"), 2.0);
+    cs.cs[2].fmap.set_value(FD::Convert("F2"), 0.5);
+    SparseVector<double> xx;
+    double h = cse(x, &xx);
+    cerr << cse(x, &xx) << endl; cerr << "G: " << xx << endl;
+    for (SparseVector<double>::iterator i = xx.begin(); i != xx.end(); ++i)
+      g[i->first] += i->second;
+    return -h;
+  }
+};
+#endif
+
+int main(int argc, char** argv) {
+#if 0
+  training::CandidateSet cs;
+  training::CandidateSetEntropy cse(cs);
+  cs.cs.resize(3);
+  cs.cs[0].fmap.set_value(FD::Convert("F1"), -1.0);
+  cs.cs[1].fmap.set_value(FD::Convert("F2"), 1.0);
+  cs.cs[2].fmap.set_value(FD::Convert("F1"), 2.0);
+  cs.cs[2].fmap.set_value(FD::Convert("F2"), 0.5);
+  FooLoss foo;
+  vector<double> ww(FD::NumFeats()); ww[FD::Convert("F1")] = 1.0;
+  LBFGS<FooLoss> lbfgs(&ww, foo, 100, 0.0);
+  lbfgs.MinimizeFunction();
+  return 1;
+#endif
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  const string evaluation_metric = conf["evaluation_metric"].as<string>();
+
+  metric = EvaluationMetric::Instance(evaluation_metric);
+  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
+  cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl;
+
+  Hypergraph hg;
+  string last_file;
+  ReadFile in_read(conf["input"].as<string>());
+  string kbest_repo;
+  if (conf.count("kbest_repository")) {
+    kbest_repo = conf["kbest_repository"].as<string>();
+    MkDirP(kbest_repo);
+  }
+  istream &in=*in_read.stream();
+  const unsigned kbest_size = conf["kbest_size"].as<unsigned>();
+  vector<weight_t> weights;
+  const string weightsf = conf["weights"].as<string>();
+  Weights::InitFromFile(weightsf, &weights);
+  double t = 0;
+  for (unsigned i = 0; i < weights.size(); ++i)
+    t += weights[i] * weights[i];
+  if (t > 0) {
+    for (unsigned i = 0; i < weights.size(); ++i)
+      weights[i] /= sqrt(t);
+  }
+  string line, file;
+  vector<training::CandidateSet> kis;
+  cerr << "Loading hypergraphs...\n";
+  while(getline(in, line)) {
+    istringstream is(line);
+    int sent_id;
+    kis.resize(kis.size() + 1);
+    training::CandidateSet& curkbest = kis.back();
+    string kbest_file;
+    if (kbest_repo.size()) {
+      ostringstream os;
+      os << kbest_repo << "/kbest." << sent_id << ".txt.gz";
+      kbest_file = os.str();
+      if (FileExists(kbest_file))
+        curkbest.ReadFromFile(kbest_file);
+    }
+    is >> file >> sent_id;
+    ReadFile rf(file);
+    if (kis.size() % 5 == 0) { cerr << '.'; }
+    if (kis.size() % 200 == 0) { cerr << " [" << kis.size() << "]\n"; }
+    HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+    hg.Reweight(weights);
+    curkbest.AddKBestCandidates(hg, kbest_size, ds[sent_id]);
+    if (kbest_file.size())
+      curkbest.WriteToFile(kbest_file);
+  }
+  cerr << "\nHypergraphs loaded.\n";
+  weights.resize(FD::NumFeats());
+
+  double c1 = conf["l1_strength"].as<double>();
+  double temp = conf["temperature"].as<double>();
+  unsigned m = conf["memory_buffers"].as<unsigned>();
+  LearnParameters(kis, temp, c1, m, &weights);
+  Weights::WriteToFile("-", weights);
+  return 0;
+}
+
diff --git a/training/mira/Makefile.am b/training/mira/Makefile.am
new file mode 100644
index 00000000..fa4fb22d
--- /dev/null
+++ b/training/mira/Makefile.am
@@ -0,0 +1,6 @@
+bin_PROGRAMS = kbest_mira
+
+kbest_mira_SOURCES = kbest_mira.cc
+kbest_mira_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a
+
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/training/mira/kbest_mira.cc b/training/mira/kbest_mira.cc
new file mode 100644
index 00000000..d59b4224
--- /dev/null
+++ b/training/mira/kbest_mira.cc
@@ -0,0 +1,322 @@
+#include <sstream>
+#include <iostream>
+#include <vector>
+#include <cassert>
+#include <cmath>
+#include <tr1/memory>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "stringlib.h"
+#include "hg_sampler.h"
+#include "sentence_metadata.h"
+#include "ns.h"
+#include "ns_docscorer.h"
+#include "verbose.h"
+#include "viterbi.h"
+#include "hg.h"
+#include "prob.h"
+#include "kbest.h"
+#include "ff_register.h"
+#include "decoder.h"
+#include "filelib.h"
+#include "fdict.h"
+#include "weights.h"
+#include "sparse_vector.h"
+#include "sampler.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+bool invert_score;
+std::tr1::shared_ptr<MT19937> rng;
+
+void RandomPermutation(int len, vector<int>* p_ids) {
+  vector<int>& ids = *p_ids;
+  ids.resize(len);
+  for (int i = 0; i < len; ++i) ids[i] = i;
+  for (int i = len; i > 0; --i) {
+    int j = rng->next() * i;
+    if (j == i) i--;
+    swap(ids[i-1], ids[j]);
+  }  
+}
+
+bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("input_weights,w",po::value<string>(),"Input feature weights file")
+        ("source,i",po::value<string>(),"Source file for development set")
+        ("passes,p", po::value<int>()->default_value(15), "Number of passes through the training data")
+        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation(s) (tokenized text file)")
+        ("mt_metric,m",po::value<string>()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)")
+        ("max_step_size,C", po::value<double>()->default_value(0.01), "regularization strength (C)")
+        ("mt_metric_scale,s", po::value<double>()->default_value(1.0), "Amount to scale MT loss function by")
+        ("k_best_size,k", po::value<int>()->default_value(250), "Size of hypothesis list to search for oracles")
+        ("sample_forest,f", "Instead of a k-best list, sample k hypotheses from the decoder's forest")
+        ("sample_forest_unit_weight_vector,x", "Before sampling (must use -f option), rescale the weight vector used so it has unit length; this may improve the quality of the samples")
+        ("random_seed,S", po::value<uint32_t>(), "Random seed (if not specified, /dev/random will be used)")
+        ("decoder_config,c",po::value<string>(),"Decoder configuration file");
+  po::options_description clo("Command line options");
+  clo.add_options()
+        ("config", po::value<string>(), "Configuration file")
+        ("help,h", "Print this help message and exit");
+  po::options_description dconfig_options, dcmdline_options;
+  dconfig_options.add(opts);
+  dcmdline_options.add(opts).add(clo);
+  
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  if (conf->count("config")) {
+    ifstream config((*conf)["config"].as<string>().c_str());
+    po::store(po::parse_config_file(config, dconfig_options), *conf);
+  }
+  po::notify(*conf);
+
+  if (conf->count("help") || !conf->count("input_weights") || !conf->count("source") || !conf->count("decoder_config") || !conf->count("reference")) {
+    cerr << dcmdline_options << endl;
+    return false;
+  }
+  return true;
+}
+
+static const double kMINUS_EPSILON = -1e-6;
+
+struct HypothesisInfo {
+  SparseVector<double> features;
+  double mt_metric;
+};
+
+struct GoodBadOracle {
+  std::tr1::shared_ptr<HypothesisInfo> good;
+  std::tr1::shared_ptr<HypothesisInfo> bad;
+};
+
+struct TrainingObserver : public DecoderObserver {
+  TrainingObserver(const int k, const DocumentScorer& d, const EvaluationMetric& m, bool sf, vector<GoodBadOracle>* o) : ds(d), metric(m), oracles(*o), kbest_size(k), sample_forest(sf) {}
+  const DocumentScorer& ds;
+  const EvaluationMetric& metric;
+  vector<GoodBadOracle>& oracles;
+  std::tr1::shared_ptr<HypothesisInfo> cur_best;
+  const int kbest_size;
+  const bool sample_forest;
+
+  const HypothesisInfo& GetCurrentBestHypothesis() const {
+    return *cur_best;
+  }
+
+  virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) {
+    UpdateOracles(smeta.GetSentenceID(), *hg);
+  }
+
+  std::tr1::shared_ptr<HypothesisInfo> MakeHypothesisInfo(const SparseVector<double>& feats, const double score) {
+    std::tr1::shared_ptr<HypothesisInfo> h(new HypothesisInfo);
+    h->features = feats;
+    h->mt_metric = score;
+    return h;
+  }
+
+  void UpdateOracles(int sent_id, const Hypergraph& forest) {
+    std::tr1::shared_ptr<HypothesisInfo>& cur_good = oracles[sent_id].good;
+    std::tr1::shared_ptr<HypothesisInfo>& cur_bad = oracles[sent_id].bad;
+    cur_bad.reset();  // TODO get rid of??
+
+    if (sample_forest) {
+      vector<WordID> cur_prediction;
+      ViterbiESentence(forest, &cur_prediction);
+      SufficientStats sstats;
+      ds[sent_id]->Evaluate(cur_prediction, &sstats);
+      float sentscore = metric.ComputeScore(sstats);
+      cur_best = MakeHypothesisInfo(ViterbiFeatures(forest), sentscore);
+
+      vector<HypergraphSampler::Hypothesis> samples;
+      HypergraphSampler::sample_hypotheses(forest, kbest_size, &*rng, &samples);
+      for (unsigned i = 0; i < samples.size(); ++i) {
+        ds[sent_id]->Evaluate(samples[i].words, &sstats);
+        float sentscore = metric.ComputeScore(sstats);
+        if (invert_score) sentscore *= -1.0;
+        if (!cur_good || sentscore > cur_good->mt_metric)
+          cur_good = MakeHypothesisInfo(samples[i].fmap, sentscore);
+        if (!cur_bad || sentscore < cur_bad->mt_metric)
+          cur_bad = MakeHypothesisInfo(samples[i].fmap, sentscore);
+      }
+    } else {
+      KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(forest, kbest_size);
+      SufficientStats sstats;
+      for (int i = 0; i < kbest_size; ++i) {
+        const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
+          kbest.LazyKthBest(forest.nodes_.size() - 1, i);
+        if (!d) break;
+        ds[sent_id]->Evaluate(d->yield, &sstats);
+        float sentscore = metric.ComputeScore(sstats);
+        if (invert_score) sentscore *= -1.0;
+        // cerr << TD::GetString(d->yield) << " ||| " << d->score << " ||| " << sentscore << endl;
+        if (i == 0)
+          cur_best = MakeHypothesisInfo(d->feature_values, sentscore);
+        if (!cur_good || sentscore > cur_good->mt_metric)
+          cur_good = MakeHypothesisInfo(d->feature_values, sentscore);
+        if (!cur_bad || sentscore < cur_bad->mt_metric)
+          cur_bad = MakeHypothesisInfo(d->feature_values, sentscore);
+      }
+      //cerr << "GOOD: " << cur_good->mt_metric << endl;
+      //cerr << " CUR: " << cur_best->mt_metric << endl;
+      //cerr << " BAD: " << cur_bad->mt_metric << endl;
+    }
+  }
+};
+
+void ReadTrainingCorpus(const string& fname, vector<string>* c) {
+  ReadFile rf(fname);
+  istream& in = *rf.stream();
+  string line;
+  while(in) {
+    getline(in, line);
+    if (!in) break;
+    c->push_back(line);
+  }
+}
+
+bool ApproxEqual(double a, double b) {
+  if (a == b) return true;
+  return (fabs(a-b)/fabs(b)) < 0.000001;
+}
+
+int main(int argc, char** argv) {
+  register_feature_functions();
+  SetSilent(true);  // turn off verbose decoder output
+
+  po::variables_map conf;
+  if (!InitCommandLine(argc, argv, &conf)) return 1;
+
+  if (conf.count("random_seed"))
+    rng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
+  else
+    rng.reset(new MT19937);
+  const bool sample_forest = conf.count("sample_forest") > 0;
+  const bool sample_forest_unit_weight_vector = conf.count("sample_forest_unit_weight_vector") > 0;
+  if (sample_forest_unit_weight_vector && !sample_forest) {
+    cerr << "Cannot --sample_forest_unit_weight_vector without --sample_forest" << endl;
+    return 1;
+  }
+  vector<string> corpus;
+  ReadTrainingCorpus(conf["source"].as<string>(), &corpus);
+
+  string metric_name = UppercaseString(conf["mt_metric"].as<string>());
+  if (metric_name == "COMBI") {
+    cerr << "WARNING: 'combi' metric is no longer supported, switching to 'COMB:TER=-0.5;IBM_BLEU=0.5'\n";
+    metric_name = "COMB:TER=-0.5;IBM_BLEU=0.5";
+  } else if (metric_name == "BLEU") {
+    cerr << "WARNING: 'BLEU' is ambiguous, assuming 'IBM_BLEU'\n";
+    metric_name = "IBM_BLEU";
+  }
+  EvaluationMetric* metric = EvaluationMetric::Instance(metric_name);
+  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
+  cerr << "Loaded " << ds.size() << " references for scoring with " << metric_name << endl;
+  invert_score = metric->IsErrorMetric();
+
+  if (ds.size() != corpus.size()) {
+    cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n";
+    return 1;
+  }
+
+  ReadFile ini_rf(conf["decoder_config"].as<string>());
+  Decoder decoder(ini_rf.stream());
+
+  // load initial weights
+  vector<weight_t>& dense_weights = decoder.CurrentWeightVector();
+  SparseVector<weight_t> lambdas;
+  Weights::InitFromFile(conf["input_weights"].as<string>(), &dense_weights);
+  Weights::InitSparseVector(dense_weights, &lambdas);
+
+  const double max_step_size = conf["max_step_size"].as<double>();
+  const double mt_metric_scale = conf["mt_metric_scale"].as<double>();
+
+  assert(corpus.size() > 0);
+  vector<GoodBadOracle> oracles(corpus.size());
+
+  TrainingObserver observer(conf["k_best_size"].as<int>(), ds, *metric, sample_forest, &oracles);
+  int cur_sent = 0;
+  int lcount = 0;
+  int normalizer = 0;
+  double tot_loss = 0;
+  int dots = 0;
+  int cur_pass = 0;
+  SparseVector<double> tot;
+  tot += lambdas;          // initial weights
+  normalizer++;            // count for initial weights
+  int max_iteration = conf["passes"].as<int>() * corpus.size();
+  string msg = "# MIRA tuned weights";
+  string msga = "# MIRA tuned weights AVERAGED";
+  vector<int> order;
+  RandomPermutation(corpus.size(), &order);
+  while (lcount <= max_iteration) {
+    lambdas.init_vector(&dense_weights);
+    if ((cur_sent * 40 / corpus.size()) > dots) { ++dots; cerr << '.'; }
+    if (corpus.size() == cur_sent) {
+      cerr << " [AVG METRIC LAST PASS=" << (tot_loss / corpus.size()) << "]\n";
+      Weights::ShowLargestFeatures(dense_weights);
+      cur_sent = 0;
+      tot_loss = 0;
+      dots = 0;
+      ostringstream os;
+      os << "weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << ".gz";
+      SparseVector<double> x = tot;
+      x /= normalizer;
+      ostringstream sa;
+      sa << "weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "-avg.gz";
+      x.init_vector(&dense_weights);
+      Weights::WriteToFile(os.str(), dense_weights, true, &msg);
+      ++cur_pass;
+      RandomPermutation(corpus.size(), &order);
+    }
+    if (cur_sent == 0) {
+      cerr << "PASS " << (lcount / corpus.size() + 1) << endl;
+    }
+    decoder.SetId(order[cur_sent]);
+    double sc = 1.0;
+    if (sample_forest_unit_weight_vector) {
+      sc = lambdas.l2norm();
+      if (sc > 0) {
+        for (unsigned i = 0; i < dense_weights.size(); ++i)
+          dense_weights[i] /= sc;
+      }
+    }
+    decoder.Decode(corpus[order[cur_sent]], &observer);  // update oracles
+    if (sc && sc != 1.0) {
+      for (unsigned i = 0; i < dense_weights.size(); ++i)
+        dense_weights[i] *= sc;
+    }
+    const HypothesisInfo& cur_hyp = observer.GetCurrentBestHypothesis();
+    const HypothesisInfo& cur_good = *oracles[order[cur_sent]].good;
+    const HypothesisInfo& cur_bad = *oracles[order[cur_sent]].bad;
+    tot_loss += cur_hyp.mt_metric;
+    if (!ApproxEqual(cur_hyp.mt_metric, cur_good.mt_metric)) {
+      const double loss = cur_bad.features.dot(dense_weights) - cur_good.features.dot(dense_weights) +
+          mt_metric_scale * (cur_good.mt_metric - cur_bad.mt_metric);
+      //cerr << "LOSS: " << loss << endl;
+      if (loss > 0.0) {
+        SparseVector<double> diff = cur_good.features;
+        diff -= cur_bad.features;
+        double step_size = loss / diff.l2norm_sq();
+        //cerr << loss << " " << step_size << " " << diff << endl;
+        if (step_size > max_step_size) step_size = max_step_size;
+        lambdas += (cur_good.features * step_size);
+        lambdas -= (cur_bad.features * step_size);
+        //cerr << "L: " << lambdas << endl;
+      }
+    }
+    tot += lambdas;
+    ++normalizer;
+    ++lcount;
+    ++cur_sent;
+  }
+  cerr << endl;
+  Weights::WriteToFile("weights.mira-final.gz", dense_weights, true, &msg);
+  tot /= normalizer;
+  tot.init_vector(dense_weights);
+  msg = "# MIRA tuned weights (averaged vector)";
+  Weights::WriteToFile("weights.mira-final-avg.gz", dense_weights, true, &msg);
+  cerr << "Optimization complete.\nAVERAGED WEIGHTS: weights.mira-final-avg.gz\n";
+  return 0;
+}
+
diff --git a/training/mpi_em_optimize.cc b/training/mpi_em_optimize.cc
deleted file mode 100644
index 48683b15..00000000
--- a/training/mpi_em_optimize.cc
+++ /dev/null
@@ -1,389 +0,0 @@
-#include <sstream>
-#include <iostream>
-#include <vector>
-#include <cassert>
-#include <cmath>
-
-#ifdef HAVE_MPI
-#include <mpi.h>
-#endif
-
-#include <boost/shared_ptr.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "verbose.h"
-#include "hg.h"
-#include "prob.h"
-#include "inside_outside.h"
-#include "ff_register.h"
-#include "decoder.h"
-#include "filelib.h"
-#include "optimize.h"
-#include "fdict.h"
-#include "weights.h"
-#include "sparse_vector.h"
-
-using namespace std;
-using boost::shared_ptr;
-namespace po = boost::program_options;
-
-void SanityCheck(const vector<double>& w) {
-  for (int i = 0; i < w.size(); ++i) {
-    assert(!isnan(w[i]));
-    assert(!isinf(w[i]));
-  }
-}
-
-struct FComp {
-  const vector<double>& w_;
-  FComp(const vector<double>& w) : w_(w) {}
-  bool operator()(int a, int b) const {
-    return fabs(w_[a]) > fabs(w_[b]);
-  }
-};
-
-void ShowLargestFeatures(const vector<double>& w) {
-  vector<int> fnums(w.size());
-  for (int i = 0; i < w.size(); ++i)
-    fnums[i] = i;
-  vector<int>::iterator mid = fnums.begin();
-  mid += (w.size() > 10 ? 10 : w.size());
-  partial_sort(fnums.begin(), mid, fnums.end(), FComp(w));
-  cerr << "TOP FEATURES:";
-  for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) {
-    cerr << ' ' << FD::Convert(*i) << '=' << w[*i];
-  }
-  cerr << endl;
-}
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("input_weights,w",po::value<string>(),"Input feature weights file")
-        ("training_data,t",po::value<string>(),"Training data")
-        ("decoder_config,c",po::value<string>(),"Decoder configuration file")
-        ("output_weights,o",po::value<string>()->default_value("-"),"Output feature weights file");
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help") || !(conf->count("training_data")) || !conf->count("decoder_config")) {
-    cerr << dcmdline_options << endl;
-#ifdef HAVE_MPI
-    MPI::Finalize();
-#endif
-    exit(1);
-  }
-}
-
-void ReadTrainingCorpus(const string& fname, int rank, int size, vector<string>* c) {
-  ReadFile rf(fname);
-  istream& in = *rf.stream();
-  string line;
-  int lc = 0;
-  while(in) {
-    getline(in, line);
-    if (!in) break;
-    if (lc % size == rank) c->push_back(line);
-    ++lc;
-  }
-}
-
-static const double kMINUS_EPSILON = -1e-6;
-
-struct TrainingObserver : public DecoderObserver {
-  void Reset() {
-    total_complete = 0;
-    cur_obj = 0;
-    tot_obj = 0;
-    tot.clear();
-  } 
-
-  void SetLocalGradientAndObjective(SparseVector<double>* g, double* o) const {
-    *o = tot_obj;
-    *g = tot;
-  }
-
-  virtual void NotifyDecodingStart(const SentenceMetadata& smeta) {
-    cur_obj = 0;
-    state = 1;
-  }
-
-  void ExtractExpectedCounts(Hypergraph* hg) {
-    vector<prob_t> posts;
-    cur.clear();
-    const prob_t z = hg->ComputeEdgePosteriors(1.0, &posts);
-    cur_obj = log(z);
-    for (int i = 0; i < posts.size(); ++i) {
-      const SparseVector<double>& efeats = hg->edges_[i].feature_values_;
-      const double post = static_cast<double>(posts[i] / z);
-      for (SparseVector<double>::const_iterator j = efeats.begin(); j != efeats.end(); ++j)
-        cur.add_value(j->first, post);
-    }
-  }
-
-  // compute model expectations, denominator of objective
-  virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) {
-    assert(state == 1);
-    state = 2;
-    ExtractExpectedCounts(hg);
-  }
-
-  // replace translation forest, since we're doing EM training (we don't know which)
-  virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) {
-    assert(state == 2);
-    state = 3;
-    ExtractExpectedCounts(hg);
-  }
-
-  virtual void NotifyDecodingComplete(const SentenceMetadata& smeta) {
-    ++total_complete;
-    tot_obj += cur_obj;
-    tot += cur;
-  }
-
-  int total_complete;
-  double cur_obj;
-  double tot_obj;
-  SparseVector<double> cur, tot;
-  int state;
-};
-
-void ReadConfig(const string& ini, vector<string>* out) {
-  ReadFile rf(ini);
-  istream& in = *rf.stream();
-  while(in) {
-    string line;
-    getline(in, line);
-    if (!in) continue;
-    out->push_back(line);
-  }
-}
-
-void StoreConfig(const vector<string>& cfg, istringstream* o) {
-  ostringstream os;
-  for (int i = 0; i < cfg.size(); ++i) { os << cfg[i] << endl; }
-  o->str(os.str());
-}
-
-struct OptimizableMultinomialFamily {
-  struct CPD {
-    CPD() : z() {}
-    double z;
-    map<WordID, double> c2counts;
-  };
-  map<WordID, CPD> counts;
-  double Value(WordID conditioning, WordID generated) const {
-    map<WordID, CPD>::const_iterator it = counts.find(conditioning);
-    assert(it != counts.end());
-    map<WordID,double>::const_iterator r = it->second.c2counts.find(generated);
-    if (r == it->second.c2counts.end()) return 0;
-    return r->second;
-  }
-  void Increment(WordID conditioning, WordID generated, double count) {
-    CPD& cc = counts[conditioning];
-    cc.z += count;
-    cc.c2counts[generated] += count;
-  }
-  void Optimize() {
-    for (map<WordID, CPD>::iterator i = counts.begin(); i != counts.end(); ++i) {
-      CPD& cpd = i->second;
-      for (map<WordID, double>::iterator j = cpd.c2counts.begin(); j != cpd.c2counts.end(); ++j) {
-        j->second /= cpd.z;
-        // cerr << "P(" << TD::Convert(j->first) << " | " << TD::Convert(i->first) << " ) =  " << j->second << endl;
-      }
-    }
-  }
-  void Clear() {
-    counts.clear();
-  }
-};
-
-struct CountManager {
-  CountManager(size_t num_types) : oms_(num_types) {}
-  virtual ~CountManager();
-  virtual void AddCounts(const SparseVector<double>& c) = 0;
-  void Optimize(SparseVector<double>* weights) {
-    for (int i = 0; i < oms_.size(); ++i) {
-      oms_[i].Optimize();
-    }
-    GetOptimalValues(weights);
-    for (int i = 0; i < oms_.size(); ++i) {
-      oms_[i].Clear();
-    }
-  }
-  virtual void GetOptimalValues(SparseVector<double>* wv) const = 0;
-  vector<OptimizableMultinomialFamily> oms_;
-};
-CountManager::~CountManager() {}
-
-struct TaggerCountManager : public CountManager {
-  // 0 = transitions, 2 = emissions
-  TaggerCountManager() : CountManager(2) {}
-  void AddCounts(const SparseVector<double>& c);
-  void GetOptimalValues(SparseVector<double>* wv) const {
-    for (set<int>::const_iterator it = fids_.begin(); it != fids_.end(); ++it) {
-      int ftype;
-      WordID cond, gen;
-      bool is_optimized = TaggerCountManager::GetFeature(*it, &ftype, &cond, &gen);
-      assert(is_optimized);
-      wv->set_value(*it, log(oms_[ftype].Value(cond, gen)));
-    }
-  }
-  // Id:0:a=1 Bi:a_b=1 Bi:b_c=1 Bi:c_d=1 Uni:a=1 Uni:b=1 Uni:c=1 Uni:d=1 Id:1:b=1 Bi:BOS_a=1 Id:2:c=1
-  static bool GetFeature(const int fid, int* feature_type, WordID* cond, WordID* gen) {
-    const string& feat = FD::Convert(fid);
-    if (feat.size() > 5 && feat[0] == 'I' && feat[1] == 'd' && feat[2] == ':') {
-      // emission
-      const size_t p = feat.rfind(':');
-      assert(p != string::npos);
-      *cond = TD::Convert(feat.substr(p+1));
-      *gen = TD::Convert(feat.substr(3, p - 3));
-      *feature_type = 1;
-      return true;
-    } else if (feat[0] == 'B' && feat.size() > 5 && feat[2] == ':' && feat[1] == 'i') {
-      // transition
-      const size_t p = feat.rfind('_');
-      assert(p != string::npos);
-      *gen = TD::Convert(feat.substr(p+1));
-      *cond = TD::Convert(feat.substr(3, p - 3));
-      *feature_type = 0;
-      return true;
-    } else if (feat[0] == 'U' && feat.size() > 4 && feat[1] == 'n' && feat[2] == 'i' && feat[3] == ':') {
-      // ignore
-      return false;
-    } else {
-      cerr << "Don't know how to deal with feature of type: " << feat << endl;
-      abort();
-    }
-  }
-  set<int> fids_;
-};
-
-void TaggerCountManager::AddCounts(const SparseVector<double>& c) {
-  for (SparseVector<double>::const_iterator it = c.begin(); it != c.end(); ++it) {
-    const double& val = it->second;
-    int ftype;
-    WordID cond, gen;
-    if (GetFeature(it->first, &ftype, &cond, &gen)) {
-      oms_[ftype].Increment(cond, gen, val);
-      fids_.insert(it->first);
-    }
-  }
-}
-
-int main(int argc, char** argv) {
-#ifdef HAVE_MPI
-  MPI::Init(argc, argv);
-  const int size = MPI::COMM_WORLD.Get_size(); 
-  const int rank = MPI::COMM_WORLD.Get_rank();
-#else
-  const int size = 1;
-  const int rank = 0;
-#endif
-  SetSilent(true);  // turn off verbose decoder output
-  register_feature_functions();
-
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-
-  TaggerCountManager tcm;
-
-  // load cdec.ini and set up decoder
-  vector<string> cdec_ini;
-  ReadConfig(conf["decoder_config"].as<string>(), &cdec_ini);
-  istringstream ini;
-  StoreConfig(cdec_ini, &ini);
-  if (rank == 0) cerr << "Loading grammar...\n";
-  Decoder* decoder = new Decoder(&ini);
-  if (decoder->GetConf()["input"].as<string>() != "-") {
-    cerr << "cdec.ini must not set an input file\n";
-#ifdef HAVE_MPI
-    MPI::COMM_WORLD.Abort(1);
-#endif
-  }
-  if (rank == 0) cerr << "Done loading grammar!\n";
-  Weights w;
-  if (conf.count("input_weights"))
-    w.InitFromFile(conf["input_weights"].as<string>());
-
-  double objective = 0;
-  bool converged = false;
-
-  vector<double> lambdas;
-  w.InitVector(&lambdas);
-  vector<string> corpus;
-  ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus);
-  assert(corpus.size() > 0);
-
-  int iteration = 0;
-  TrainingObserver observer;
-  while (!converged) {
-    ++iteration;
-    observer.Reset();
-    if (rank == 0) {
-      cerr << "Starting decoding... (~" << corpus.size() << " sentences / proc)\n";
-    }
-    decoder->SetWeights(lambdas);
-    for (int i = 0; i < corpus.size(); ++i)
-      decoder->Decode(corpus[i], &observer);
-
-    SparseVector<double> x;
-    observer.SetLocalGradientAndObjective(&x, &objective);
-    cerr << "COUNTS = " << x << endl;
-    cerr << "   OBJ = " << objective << endl;
-    tcm.AddCounts(x);
-
-#if 0
-#ifdef HAVE_MPI
-    MPI::COMM_WORLD.Reduce(const_cast<double*>(&gradient.data()[0]), &rcv_grad[0], num_feats, MPI::DOUBLE, MPI::SUM, 0);
-    MPI::COMM_WORLD.Reduce(&objective, &to, 1, MPI::DOUBLE, MPI::SUM, 0);
-    swap(gradient, rcv_grad);
-    objective = to;
-#endif
-#endif
-
-    if (rank == 0) {
-      SparseVector<double> wsv;
-      tcm.Optimize(&wsv);
-
-      w.InitFromVector(wsv);
-      w.InitVector(&lambdas);
-
-      ShowLargestFeatures(lambdas);
-
-      converged = iteration > 100;
-      if (converged) { cerr << "OPTIMIZER REPORTS CONVERGENCE!\n"; }
-
-      string fname = "weights.cur.gz";
-      if (converged) { fname = "weights.final.gz"; }
-      ostringstream vv;
-      vv << "Objective = " << objective << "  (ITERATION=" << iteration << ")";
-      const string svv = vv.str();
-      w.WriteToFile(fname, true, &svv);
-    }  // rank == 0
-    int cint = converged;
-#ifdef HAVE_MPI
-    MPI::COMM_WORLD.Bcast(const_cast<double*>(&lambdas.data()[0]), num_feats, MPI::DOUBLE, 0);
-    MPI::COMM_WORLD.Bcast(&cint, 1, MPI::INT, 0);
-    MPI::COMM_WORLD.Barrier();
-#endif
-    converged = cint;
-  }
-#ifdef HAVE_MPI
-  MPI::Finalize(); 
-#endif
-  return 0;
-}
diff --git a/training/mr_em_adapted_reduce.cc b/training/mr_em_adapted_reduce.cc
deleted file mode 100644
index f65b5440..00000000
--- a/training/mr_em_adapted_reduce.cc
+++ /dev/null
@@ -1,173 +0,0 @@
-#include <iostream>
-#include <vector>
-#include <cassert>
-#include <cmath>
-
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "filelib.h"
-#include "fdict.h"
-#include "weights.h"
-#include "sparse_vector.h"
-#include "m.h"
-
-using namespace std;
-namespace po = boost::program_options;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("optimization_method,m", po::value<string>()->default_value("em"), "Optimization method (em, vb)")
-        ("input_format,f",po::value<string>()->default_value("b64"),"Encoding of the input (b64 or text)");
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-double NoZero(const double& x) {
-  if (x) return x;
-  return 1e-35;
-}
-
-void Maximize(const bool use_vb,
-              const double& alpha,
-              const int total_event_types,
-              SparseVector<double>* pc) {
-  const SparseVector<double>& counts = *pc;
-
-  if (use_vb)
-    assert(total_event_types >= counts.size());
-
-  double tot = 0;
-  for (SparseVector<double>::const_iterator it = counts.begin();
-       it != counts.end(); ++it)
-    tot += it->second;
-//  cerr << " = " << tot << endl;
-  assert(tot > 0.0);
-  double ltot = log(tot);
-  if (use_vb)
-    ltot = Md::digamma(tot + total_event_types * alpha);
-  for (SparseVector<double>::const_iterator it = counts.begin();
-       it != counts.end(); ++it) {
-    if (use_vb) {
-      pc->set_value(it->first, NoZero(Md::digamma(it->second + alpha) - ltot));
-    } else {
-      pc->set_value(it->first, NoZero(log(it->second) - ltot));
-    }
-  }
-#if 0
-  if (counts.size() < 50) {
-    for (SparseVector<double>::const_iterator it = counts.begin();
-         it != counts.end(); ++it) {
-      cerr << " p(" << FD::Convert(it->first) << ")=" << exp(it->second);
-    }
-    cerr << endl;
-  }
-#endif
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-
-  const bool use_b64 = conf["input_format"].as<string>() == "b64";
-  const bool use_vb = conf["optimization_method"].as<string>() == "vb";
-  const double alpha = 1e-09;
-  if (use_vb)
-    cerr << "Using variational Bayes, make sure alphas are set\n";
-
-  const string s_obj = "**OBJ**";
-  // E-step
-  string cur_key = "";
-  SparseVector<double> acc;
-  double logprob = 0;
-  while(cin) {
-    string line;
-    getline(cin, line);
-    if (line.empty()) continue;
-    int feat;
-    double val;
-    size_t i = line.find("\t");
-    const string key = line.substr(0, i);
-    assert(i != string::npos);
-    ++i;
-    if (key != cur_key) {
-      if  (cur_key.size() > 0) {
-        // TODO shouldn't be num_active, should be total number
-        // of events
-        Maximize(use_vb, alpha, acc.size(), &acc);
-        cout << cur_key << '\t';
-        if (use_b64)
-          B64::Encode(0.0, acc, &cout);
-        else
-          cout << acc;
-        cout << endl;
-        acc.clear();
-      }
-      cur_key = key;
-    }
-    if (use_b64) {
-      SparseVector<double> g;
-      double obj;
-      if (!B64::Decode(&obj, &g, &line[i], line.size() - i)) {
-        cerr << "B64 decoder returned error, skipping!\n";
-        continue;
-      }
-      logprob += obj;
-      acc += g;
-    } else {       // text encoding - your counts will not be accurate!
-      while (i < line.size()) {
-        size_t start = i;
-        while (line[i] != '=' && i < line.size()) ++i;
-        if (i == line.size()) { cerr << "FORMAT ERROR\n"; break; }
-        string fname = line.substr(start, i - start);
-        if (fname == s_obj) {
-          feat = -1;
-        } else {
-          feat = FD::Convert(line.substr(start, i - start));
-        }
-        ++i;
-        start = i;
-        while (line[i] != ';' && i < line.size()) ++i;
-        if (i - start == 0) continue;
-        val = atof(line.substr(start, i - start).c_str());
-        ++i;
-        if (feat == -1) {
-          logprob += val;
-        } else {
-          acc.add_value(feat, val);
-        }
-      }
-    }
-  }
-  // TODO shouldn't be num_active, should be total number
-  // of events
-  Maximize(use_vb, alpha, acc.size(), &acc);
-  cout << cur_key << '\t';
-  if (use_b64)
-    B64::Encode(0.0, acc, &cout);
-  else
-    cout << acc;
-  cout << endl << flush;
-
-  cerr << "LOGPROB: " << logprob << endl;
-
-  return 0;
-}
diff --git a/training/mr_em_map_adapter.cc b/training/mr_em_map_adapter.cc
deleted file mode 100644
index ead4598d..00000000
--- a/training/mr_em_map_adapter.cc
+++ /dev/null
@@ -1,160 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <cassert>
-#include <cmath>
-
-#include <boost/utility.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-#include "boost/tuple/tuple.hpp"
-
-#include "fdict.h"
-#include "sparse_vector.h"
-
-using namespace std;
-namespace po = boost::program_options;
-
-// useful for EM models parameterized by a bunch of multinomials
-// this converts event counts (returned from cdec as feature expectations)
-// into different keys and values (which are lists of all the events,
-// conditioned on the key) for summing and normalization by a reducer
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("buffer_size,b", po::value<int>()->default_value(1), "Buffer size (in # of counts) before emitting counts")
-        ("format,f",po::value<string>()->default_value("b64"), "Encoding of the input (b64 or text)");
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-struct EventMapper {
-  int Map(int fid) {
-    int& cv = map_[fid];
-    if (!cv) {
-      cv = GetConditioningVariable(fid);
-    }
-    return cv;
-  }
-  void Clear() { map_.clear(); }
- protected:
-  virtual int GetConditioningVariable(int fid) const = 0;
- private:
-  map<int, int> map_;
-};
-
-struct LexAlignEventMapper : public EventMapper {
- protected:
-  virtual int GetConditioningVariable(int fid) const {
-    const string& str = FD::Convert(fid);
-    size_t pos = str.rfind("_");
-    if (pos == string::npos || pos == 0 || pos >= str.size() - 1) {
-      cerr << "Bad feature for EM adapter: " << str << endl;
-      abort();
-    }
-    return FD::Convert(str.substr(0, pos));
-  }
-};
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-
-  const bool use_b64 = conf["format"].as<string>() == "b64";
-  const int buffer_size = conf["buffer_size"].as<int>();
-
-  const string s_obj = "**OBJ**";
-  // 0<TAB>**OBJ**=12.2;Feat1=2.3;Feat2=-0.2;
-  // 0<TAB>**OBJ**=1.1;Feat1=1.0;
-
-  EventMapper* event_mapper = new LexAlignEventMapper;
-  map<int, SparseVector<double> > counts;
-  size_t total = 0;
-  while(cin) {
-    string line;
-    getline(cin, line);
-    if (line.empty()) continue;
-    int feat;
-    double val;
-    size_t i = line.find("\t");
-    assert(i != string::npos);
-    ++i;
-    SparseVector<double> g;
-    double obj = 0;
-    if (use_b64) {
-      if (!B64::Decode(&obj, &g, &line[i], line.size() - i)) {
-        cerr << "B64 decoder returned error, skipping!\n";
-        continue;
-      }
-    } else {       // text encoding - your counts will not be accurate!
-      while (i < line.size()) {
-        size_t start = i;
-        while (line[i] != '=' && i < line.size()) ++i;
-        if (i == line.size()) { cerr << "FORMAT ERROR\n"; break; }
-        string fname = line.substr(start, i - start);
-        if (fname == s_obj) {
-          feat = -1;
-        } else {
-          feat = FD::Convert(line.substr(start, i - start));
-        }
-        ++i;
-        start = i;
-        while (line[i] != ';' && i < line.size()) ++i;
-        if (i - start == 0) continue;
-        val = atof(line.substr(start, i - start).c_str());
-        ++i;
-        if (feat == -1) {
-          obj = val;
-        } else {
-          g.set_value(feat, val);
-        }
-      }
-    }
-    //cerr << "OBJ: " << obj << endl;
-    const SparseVector<double>& cg = g;
-    for (SparseVector<double>::const_iterator it = cg.begin(); it != cg.end(); ++it) {
-      const int cond_var = event_mapper->Map(it->first);
-      SparseVector<double>& cond_counts = counts[cond_var];
-      int delta = cond_counts.size();
-      cond_counts.add_value(it->first, it->second);
-      delta = cond_counts.size() - delta;
-      total += delta;
-    }
-    if (total > buffer_size) {
-      for (map<int, SparseVector<double> >::iterator it = counts.begin();
-           it != counts.end(); ++it) {
-        const SparseVector<double>& cc = it->second;
-        cout << FD::Convert(it->first) << '\t';
-        if (use_b64) {
-          B64::Encode(0.0, cc, &cout);
-        } else {
-          abort();
-        }
-        cout << endl;
-      }
-      cout << flush;
-      total = 0;
-      counts.clear();
-    }
-  }
-
-  return 0;
-}
-
diff --git a/training/mr_optimize_reduce.cc b/training/mr_optimize_reduce.cc
deleted file mode 100644
index 461e6b5f..00000000
--- a/training/mr_optimize_reduce.cc
+++ /dev/null
@@ -1,231 +0,0 @@
-#include <sstream>
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <cassert>
-#include <cmath>
-
-#include <boost/shared_ptr.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "optimize.h"
-#include "fdict.h"
-#include "weights.h"
-#include "sparse_vector.h"
-
-using namespace std;
-namespace po = boost::program_options;
-
-void SanityCheck(const vector<double>& w) {
-  for (int i = 0; i < w.size(); ++i) {
-    assert(!isnan(w[i]));
-    assert(!isinf(w[i]));
-  }
-}
-
-struct FComp {
-  const vector<double>& w_;
-  FComp(const vector<double>& w) : w_(w) {}
-  bool operator()(int a, int b) const {
-    return fabs(w_[a]) > fabs(w_[b]);
-  }
-};
-
-void ShowLargestFeatures(const vector<double>& w) {
-  vector<int> fnums(w.size());
-  for (int i = 0; i < w.size(); ++i)
-    fnums[i] = i;
-  vector<int>::iterator mid = fnums.begin();
-  mid += (w.size() > 10 ? 10 : w.size());
-  partial_sort(fnums.begin(), mid, fnums.end(), FComp(w));
-  cerr << "TOP FEATURES:";
-  for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) {
-    cerr << ' ' << FD::Convert(*i) << '=' << w[*i];
-  }
-  cerr << endl;
-}
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("input_weights,i",po::value<string>(),"Input feature weights file")
-        ("output_weights,o",po::value<string>()->default_value("-"),"Output feature weights file")
-        ("optimization_method,m", po::value<string>()->default_value("lbfgs"), "Optimization method (sgd, lbfgs, rprop)")
-        ("state,s",po::value<string>(),"Read (and write if output_state is not set) optimizer state from this state file. In the first iteration, the file should not exist.")
-        ("input_format,f",po::value<string>()->default_value("b64"),"Encoding of the input (b64 or text)")
-        ("output_state,S", po::value<string>(), "Output state file (optional override)")
-	("correction_buffers,M", po::value<int>()->default_value(10), "Number of gradients for LBFGS to maintain in memory")
-        ("eta,e", po::value<double>()->default_value(0.1), "Learning rate for SGD (eta)")
-        ("gaussian_prior,p","Use a Gaussian prior on the weights")
-        ("means,u", po::value<string>(), "File containing the means for Gaussian prior")
-        ("sigma_squared", po::value<double>()->default_value(1.0), "Sigma squared term for spherical Gaussian prior");
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help") || !conf->count("input_weights") || !conf->count("state")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-
-  const bool use_b64 = conf["input_format"].as<string>() == "b64";
-
-  vector<weight_t> lambdas;
-  Weights::InitFromFile(conf["input_weights"].as<string>(), &lambdas);
-  const string s_obj = "**OBJ**";
-  int num_feats = FD::NumFeats();
-  cerr << "Number of features: " << num_feats << endl;
-  const bool gaussian_prior = conf.count("gaussian_prior");
-  vector<weight_t> means(num_feats, 0);
-  if (conf.count("means")) {
-    if (!gaussian_prior) {
-      cerr << "Don't use --means without --gaussian_prior!\n";
-      exit(1);
-    }
-    Weights::InitFromFile(conf["means"].as<string>(), &means);
-  }
-  boost::shared_ptr<BatchOptimizer> o;
-  const string omethod = conf["optimization_method"].as<string>();
-  if (omethod == "rprop")
-    o.reset(new RPropOptimizer(num_feats));  // TODO add configuration
-  else
-    o.reset(new LBFGSOptimizer(num_feats, conf["correction_buffers"].as<int>()));
-  cerr << "Optimizer: " << o->Name() << endl;
-  string state_file = conf["state"].as<string>();
-  {
-    ifstream in(state_file.c_str(), ios::binary);
-    if (in)
-      o->Load(&in);
-    else
-      cerr << "No state file found, assuming ITERATION 1\n";
-  }
-
-  double objective = 0;
-  vector<double> gradient(num_feats, 0);
-  // 0<TAB>**OBJ**=12.2;Feat1=2.3;Feat2=-0.2;
-  // 0<TAB>**OBJ**=1.1;Feat1=1.0;
-  int total_lines = 0;  // TODO - this should be a count of the
-                        // training instances!!
-  while(cin) {
-    string line;
-    getline(cin, line);
-    if (line.empty()) continue;
-    ++total_lines;
-    int feat;
-    double val;
-    size_t i = line.find("\t");
-    assert(i != string::npos);
-    ++i;
-    if (use_b64) {
-      SparseVector<double> g;
-      double obj;
-      if (!B64::Decode(&obj, &g, &line[i], line.size() - i)) {
-        cerr << "B64 decoder returned error, skipping gradient!\n";
-	cerr << "  START: " << line.substr(0,line.size() > 200 ? 200 : line.size()) << endl;
-	if (line.size() > 200)
-	  cerr << "    END: " << line.substr(line.size() - 200, 200) << endl;
-        cout << "-1\tRESTART\n";
-        exit(99);
-      }
-      objective += obj;
-      const SparseVector<double>& cg = g;
-      for (SparseVector<double>::const_iterator it = cg.begin(); it != cg.end(); ++it) {
-        if (it->first >= num_feats) {
-	  cerr << "Unexpected feature in gradient: " << FD::Convert(it->first) << endl;
-	  abort();
-        }
-        gradient[it->first] -= it->second;
-      }
-    } else {       // text encoding - your gradients will not be accurate!
-      while (i < line.size()) {
-        size_t start = i;
-        while (line[i] != '=' && i < line.size()) ++i;
-        if (i == line.size()) { cerr << "FORMAT ERROR\n"; break; }
-        string fname = line.substr(start, i - start);
-        if (fname == s_obj) {
-          feat = -1;
-        } else {
-          feat = FD::Convert(line.substr(start, i - start));
-          if (feat >= num_feats) {
-	    cerr << "Unexpected feature in gradient: " << line.substr(start, i - start) << endl;
-	    abort();
-	  }
-        }
-        ++i;
-        start = i;
-        while (line[i] != ';' && i < line.size()) ++i;
-        if (i - start == 0) continue;
-        val = atof(line.substr(start, i - start).c_str());
-        ++i;
-        if (feat == -1) {
-          objective += val;
-        } else {
-          gradient[feat] -= val;
-        }
-      }
-    }
-  }
-
-  if (gaussian_prior) {
-    const double sigsq = conf["sigma_squared"].as<double>();
-    double norm = 0;
-    for (int k = 1; k < lambdas.size(); ++k) {
-      const double& lambda_k = lambdas[k];
-      if (lambda_k) {
-        const double param = (lambda_k - means[k]);
-        norm += param * param;
-        gradient[k] += param / sigsq;
-      }
-    }
-    const double reg = norm / (2.0 * sigsq);
-    cerr << "REGULARIZATION TERM: " << reg << endl;
-    objective += reg;
-  }
-  cerr << "EVALUATION #" << o->EvaluationCount() << " OBJECTIVE: " << objective << endl;
-  double gnorm = 0;
-  for (int i = 0; i < gradient.size(); ++i)
-    gnorm += gradient[i] * gradient[i];
-  cerr << "  GNORM=" << sqrt(gnorm) << endl;
-  vector<double> old = lambdas;
-  int c = 0;
-  while (old == lambdas) {
-    ++c;
-    if (c > 1) { cerr << "Same lambdas, repeating optimization\n"; }
-    o->Optimize(objective, gradient, &lambdas);
-    assert(c < 5);
-  }
-  old.clear();
-  SanityCheck(lambdas);
-  ShowLargestFeatures(lambdas);
-  Weights::WriteToFile(conf["output_weights"].as<string>(), lambdas, false);
-
-  const bool conv = o->HasConverged();
-  if (conv) { cerr << "OPTIMIZER REPORTS CONVERGENCE!\n"; }
-  
-  if (conf.count("output_state"))
-    state_file = conf["output_state"].as<string>();
-  ofstream out(state_file.c_str(), ios::binary);
-  cerr << "Writing state to: " << state_file << endl;
-  o->Save(&out);
-  out.close();
-
-  cout << o->EvaluationCount() << "\t" << conv << endl;
-  return 0;
-}
diff --git a/training/mr_reduce_to_weights.cc b/training/mr_reduce_to_weights.cc
deleted file mode 100644
index 16b47720..00000000
--- a/training/mr_reduce_to_weights.cc
+++ /dev/null
@@ -1,109 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <cassert>
-
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "filelib.h"
-#include "fdict.h"
-#include "weights.h"
-#include "sparse_vector.h"
-
-using namespace std;
-namespace po = boost::program_options;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("input_format,f",po::value<string>()->default_value("b64"),"Encoding of the input (b64 or text)")
-        ("input,i",po::value<string>()->default_value("-"),"Read file from")
-        ("output,o",po::value<string>()->default_value("-"),"Write weights to");
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-void WriteWeights(const SparseVector<double>& weights, ostream* out) {
-  for (SparseVector<double>::const_iterator it = weights.begin();
-       it != weights.end(); ++it) {
-    (*out) << FD::Convert(it->first) << " " << it->second << endl;
-  }
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-
-  const bool use_b64 = conf["input_format"].as<string>() == "b64";
-
-  const string s_obj = "**OBJ**";
-  // E-step
-  ReadFile rf(conf["input"].as<string>());
-  istream* in = rf.stream();
-  assert(*in);
-  WriteFile wf(conf["output"].as<string>());
-  ostream* out = wf.stream();
-  out->precision(17);
-  while(*in) {
-    string line;
-    getline(*in, line);
-    if (line.empty()) continue;
-    int feat;
-    double val;
-    size_t i = line.find("\t");
-    assert(i != string::npos);
-    ++i;
-    if (use_b64) {
-      SparseVector<double> g;
-      double obj;
-      if (!B64::Decode(&obj, &g, &line[i], line.size() - i)) {
-        cerr << "B64 decoder returned error, skipping!\n";
-        continue;
-      }
-      WriteWeights(g, out);
-    } else {       // text encoding - your counts will not be accurate!
-      SparseVector<double> weights;
-      while (i < line.size()) {
-        size_t start = i;
-        while (line[i] != '=' && i < line.size()) ++i;
-        if (i == line.size()) { cerr << "FORMAT ERROR\n"; break; }
-        string fname = line.substr(start, i - start);
-        if (fname == s_obj) {
-          feat = -1;
-        } else {
-          feat = FD::Convert(line.substr(start, i - start));
-        }
-        ++i;
-        start = i;
-        while (line[i] != ';' && i < line.size()) ++i;
-        if (i - start == 0) continue;
-        val = atof(line.substr(start, i - start).c_str());
-        ++i;
-        if (feat != -1) {
-          weights.set_value(feat, val);
-        }
-      }
-      WriteWeights(weights, out);
-    }
-  }
-
-  return 0;
-}
diff --git a/training/pro/Makefile.am b/training/pro/Makefile.am
new file mode 100644
index 00000000..09364804
--- /dev/null
+++ b/training/pro/Makefile.am
@@ -0,0 +1,13 @@
+bin_PROGRAMS = \
+  mr_pro_map \
+  mr_pro_reduce
+
+mr_pro_map_SOURCES = mr_pro_map.cc
+mr_pro_map_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a
+
+mr_pro_reduce_SOURCES = mr_pro_reduce.cc
+mr_pro_reduce_LDADD = ../../training/liblbfgs/liblbfgs.a ../../utils/libutils.a
+
+EXTRA_DIST = mr_pro_generate_mapper_input.pl pro.pl
+
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils -I$(top_srcdir)/training
diff --git a/training/pro/mr_pro_generate_mapper_input.pl b/training/pro/mr_pro_generate_mapper_input.pl
new file mode 100755
index 00000000..b30fc4fd
--- /dev/null
+++ b/training/pro/mr_pro_generate_mapper_input.pl
@@ -0,0 +1,18 @@
+#!/usr/bin/perl -w
+use strict;
+
+die "Usage: $0 HG_DIR\n" unless scalar @ARGV == 1;
+my $d = shift @ARGV;
+die "Can't find directory $d" unless -d $d;
+
+opendir(DIR, $d) or die "Can't read $d: $!";
+my @hgs = grep { /\.gz$/ } readdir(DIR);
+closedir DIR;
+
+for my $hg (@hgs) {
+  my $file = $hg;
+  my $id = $hg;
+  $id =~ s/(\.json)?\.gz//;
+  print "$d/$file $id\n";
+}
+
diff --git a/training/pro/mr_pro_map.cc b/training/pro/mr_pro_map.cc
new file mode 100644
index 00000000..eef40b8a
--- /dev/null
+++ b/training/pro/mr_pro_map.cc
@@ -0,0 +1,201 @@
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <tr1/unordered_map>
+
+#include <boost/functional/hash.hpp>
+#include <boost/shared_ptr.hpp>
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "candidate_set.h"
+#include "sampler.h"
+#include "filelib.h"
+#include "stringlib.h"
+#include "weights.h"
+#include "inside_outside.h"
+#include "hg_io.h"
+#include "ns.h"
+#include "ns_docscorer.h"
+
+// This is Figure 4 (Algorithm Sampler) from Hopkins&May (2011)
+
+using namespace std;
+namespace po = boost::program_options;
+
+boost::shared_ptr<MT19937> rng;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation (tokenized text)")
+        ("weights,w",po::value<string>(), "[REQD] Weights files from current iterations")
+        ("kbest_repository,K",po::value<string>()->default_value("./kbest"),"K-best list repository (directory)")
+        ("input,i",po::value<string>()->default_value("-"), "Input file to map (- is STDIN)")
+        ("source,s",po::value<string>()->default_value(""), "Source file (ignored, except for AER)")
+        ("evaluation_metric,m",po::value<string>()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)")
+        ("kbest_size,k",po::value<unsigned>()->default_value(1500u), "Top k-hypotheses to extract")
+        ("candidate_pairs,G", po::value<unsigned>()->default_value(5000u), "Number of pairs to sample per hypothesis (Gamma)")
+        ("best_pairs,X", po::value<unsigned>()->default_value(50u), "Number of pairs, ranked by magnitude of objective delta, to retain (Xi)")
+        ("random_seed,S", po::value<uint32_t>(), "Random seed (if not specified, /dev/random will be used)")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = false;
+  if (!conf->count("reference")) {
+    cerr << "Please specify one or more references using -r <REF.TXT>\n";
+    flag = true;
+  }
+  if (!conf->count("weights")) {
+    cerr << "Please specify weights using -w <WEIGHTS.TXT>\n";
+    flag = true;
+  }
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+struct ThresholdAlpha {
+  explicit ThresholdAlpha(double t = 0.05) : threshold(t) {}
+  double operator()(double mag) const {
+    if (mag < threshold) return 0.0; else return 1.0;
+  }
+  const double threshold;
+};
+
+struct TrainingInstance {
+  TrainingInstance(const SparseVector<weight_t>& feats, bool positive, float diff) : x(feats), y(positive), gdiff(diff) {}
+  SparseVector<weight_t> x;
+#undef DEBUGGING_PRO
+#ifdef DEBUGGING_PRO
+  vector<WordID> a;
+  vector<WordID> b;
+#endif
+  bool y;
+  float gdiff;
+};
+#ifdef DEBUGGING_PRO
+ostream& operator<<(ostream& os, const TrainingInstance& d) {
+  return os << d.gdiff << " y=" << d.y << "\tA:" << TD::GetString(d.a) << "\n\tB: " << TD::GetString(d.b) << "\n\tX: " << d.x;
+}
+#endif
+
+struct DiffOrder {
+  bool operator()(const TrainingInstance& a, const TrainingInstance& b) const {
+    return a.gdiff > b.gdiff;
+  }
+};
+
+void Sample(const unsigned gamma,
+            const unsigned xi,
+            const training::CandidateSet& J_i,
+            const EvaluationMetric* metric,
+            vector<TrainingInstance>* pv) {
+  const bool invert_score = metric->IsErrorMetric();
+  vector<TrainingInstance> v1, v2;
+  float avg_diff = 0;
+  for (unsigned i = 0; i < gamma; ++i) {
+    const size_t a = rng->inclusive(0, J_i.size() - 1)();
+    const size_t b = rng->inclusive(0, J_i.size() - 1)();
+    if (a == b) continue;
+    float ga = metric->ComputeScore(J_i[a].eval_feats);
+    float gb = metric->ComputeScore(J_i[b].eval_feats);
+    bool positive = gb < ga;
+    if (invert_score) positive = !positive;
+    const float gdiff = fabs(ga - gb);
+    if (!gdiff) continue;
+    avg_diff += gdiff;
+    SparseVector<weight_t> xdiff = (J_i[a].fmap - J_i[b].fmap).erase_zeros();
+    if (xdiff.empty()) {
+      cerr << "Empty diff:\n  " << TD::GetString(J_i[a].ewords) << endl << "x=" << J_i[a].fmap << endl;
+      cerr << "  " << TD::GetString(J_i[b].ewords) << endl << "x=" << J_i[b].fmap << endl;
+      continue;
+    }
+    v1.push_back(TrainingInstance(xdiff, positive, gdiff));
+#ifdef DEBUGGING_PRO
+    v1.back().a = J_i[a].hyp;
+    v1.back().b = J_i[b].hyp;
+    cerr << "N: " << v1.back() << endl;
+#endif
+  }
+  avg_diff /= v1.size();
+
+  for (unsigned i = 0; i < v1.size(); ++i) {
+    double p = 1.0 / (1.0 + exp(-avg_diff - v1[i].gdiff));
+    // cerr << "avg_diff=" << avg_diff << "  gdiff=" << v1[i].gdiff << "  p=" << p << endl;
+    if (rng->next() < p) v2.push_back(v1[i]);
+  }
+  vector<TrainingInstance>::iterator mid = v2.begin() + xi;
+  if (xi > v2.size()) mid = v2.end();
+  partial_sort(v2.begin(), mid, v2.end(), DiffOrder());
+  copy(v2.begin(), mid, back_inserter(*pv));
+#ifdef DEBUGGING_PRO
+  if (v2.size() >= 5) {
+    for (int i =0; i < (mid - v2.begin()); ++i) {
+      cerr << v2[i] << endl;
+    }
+    cerr << pv->back() << endl;
+  }
+#endif
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  if (conf.count("random_seed"))
+    rng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
+  else
+    rng.reset(new MT19937);
+  const string evaluation_metric = conf["evaluation_metric"].as<string>();
+
+  EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric);
+  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
+  cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl;
+
+  Hypergraph hg;
+  string last_file;
+  ReadFile in_read(conf["input"].as<string>());
+  istream &in=*in_read.stream();
+  const unsigned kbest_size = conf["kbest_size"].as<unsigned>();
+  const unsigned gamma = conf["candidate_pairs"].as<unsigned>();
+  const unsigned xi = conf["best_pairs"].as<unsigned>();
+  string weightsf = conf["weights"].as<string>();
+  vector<weight_t> weights;
+  Weights::InitFromFile(weightsf, &weights);
+  string kbest_repo = conf["kbest_repository"].as<string>();
+  MkDirP(kbest_repo);
+  while(in) {
+    vector<TrainingInstance> v;
+    string line;
+    getline(in, line);
+    if (line.empty()) continue;
+    istringstream is(line);
+    int sent_id;
+    string file;
+    // path-to-file (JSON) sent_id
+    is >> file >> sent_id;
+    ReadFile rf(file);
+    ostringstream os;
+    training::CandidateSet J_i;
+    os << kbest_repo << "/kbest." << sent_id << ".txt.gz";
+    const string kbest_file = os.str();
+    if (FileExists(kbest_file))
+      J_i.ReadFromFile(kbest_file);
+    HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+    hg.Reweight(weights);
+    J_i.AddKBestCandidates(hg, kbest_size, ds[sent_id]);
+    J_i.WriteToFile(kbest_file);
+
+    Sample(gamma, xi, J_i, metric, &v);
+    for (unsigned i = 0; i < v.size(); ++i) {
+      const TrainingInstance& vi = v[i];
+      cout << vi.y << "\t" << vi.x << endl;
+      cout << (!vi.y) << "\t" << (vi.x * -1.0) << endl;
+    }
+  }
+  return 0;
+}
+
diff --git a/training/pro/mr_pro_reduce.cc b/training/pro/mr_pro_reduce.cc
new file mode 100644
index 00000000..5ef9b470
--- /dev/null
+++ b/training/pro/mr_pro_reduce.cc
@@ -0,0 +1,286 @@
+#include <cstdlib>
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "filelib.h"
+#include "weights.h"
+#include "sparse_vector.h"
+#include "optimize.h"
+#include "liblbfgs/lbfgs++.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+// since this is a ranking model, there should be equal numbers of
+// positive and negative examples, so the bias should be 0
+static const double MAX_BIAS = 1e-10;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("weights,w", po::value<string>(), "Weights from previous iteration (used as initialization and interpolation")
+        ("regularization_strength,C",po::value<double>()->default_value(500.0), "l2 regularization strength")
+        ("l1",po::value<double>()->default_value(0.0), "l1 regularization strength")
+        ("regularize_to_weights,y",po::value<double>()->default_value(5000.0), "Differences in learned weights to previous weights are penalized with an l2 penalty with this strength; 0.0 = no effect")
+        ("memory_buffers,m",po::value<unsigned>()->default_value(100), "Number of memory buffers (LBFGS)")
+        ("min_reg,r",po::value<double>()->default_value(0.01), "When tuning (-T) regularization strength, minimum regularization strenght")
+        ("max_reg,R",po::value<double>()->default_value(1e6), "When tuning (-T) regularization strength, maximum regularization strenght")
+        ("testset,t",po::value<string>(), "Optional held-out test set")
+        ("tune_regularizer,T", "Use the held out test set (-t) to tune the regularization strength")
+        ("interpolate_with_weights,p",po::value<double>()->default_value(1.0), "[deprecated] Output weights are p*w + (1-p)*w_prev; 1.0 = no effect")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  if (conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+void ParseSparseVector(string& line, size_t cur, SparseVector<weight_t>* out) {
+  SparseVector<weight_t>& x = *out;
+  size_t last_start = cur;
+  size_t last_comma = string::npos;
+  while(cur <= line.size()) {
+    if (line[cur] == ' ' || cur == line.size()) {
+      if (!(cur > last_start && last_comma != string::npos && cur > last_comma)) {
+        cerr << "[ERROR] " << line << endl << "  position = " << cur << endl;
+        exit(1);
+      }
+      const int fid = FD::Convert(line.substr(last_start, last_comma - last_start));
+      if (cur < line.size()) line[cur] = 0;
+      const weight_t val = strtod(&line[last_comma + 1], NULL);
+      x.set_value(fid, val);
+
+      last_comma = string::npos;
+      last_start = cur+1;
+    } else {
+      if (line[cur] == '=')
+        last_comma = cur;
+    }
+    ++cur;
+  }
+}
+
+void ReadCorpus(istream* pin, vector<pair<bool, SparseVector<weight_t> > >* corpus) {
+  istream& in = *pin;
+  corpus->clear();
+  bool flag = false;
+  int lc = 0;
+  string line;
+  SparseVector<weight_t> x;
+  while(getline(in, line)) {
+    ++lc;
+    if (lc % 1000 == 0) { cerr << '.'; flag = true; }
+    if (lc % 40000 == 0) { cerr << " [" << lc << "]\n"; flag = false; }
+    if (line.empty()) continue;
+    const size_t ks = line.find("\t");
+    assert(string::npos != ks);
+    assert(ks == 1);
+    const bool y = line[0] == '1';
+    x.clear();
+    ParseSparseVector(line, ks + 1, &x);
+    corpus->push_back(make_pair(y, x));
+  }
+  if (flag) cerr << endl;
+}
+
+void GradAdd(const SparseVector<weight_t>& v, const double scale, weight_t* acc) {
+  for (SparseVector<weight_t>::const_iterator it = v.begin();
+       it != v.end(); ++it) {
+    acc[it->first] += it->second * scale;
+  }
+}
+
+double ApplyRegularizationTerms(const double C,
+                                const double T,
+                                const vector<weight_t>& weights,
+                                const vector<weight_t>& prev_weights,
+                                weight_t* g) {
+  double reg = 0;
+  for (size_t i = 0; i < weights.size(); ++i) {
+    const double prev_w_i = (i < prev_weights.size() ? prev_weights[i] : 0.0);
+    const double& w_i = weights[i];
+    reg += C * w_i * w_i;
+    g[i] += 2 * C * w_i;
+
+    const double diff_i = w_i - prev_w_i;
+    reg += T * diff_i * diff_i;
+    g[i] += 2 * T * diff_i;
+  }
+  return reg;
+}
+
+double TrainingInference(const vector<weight_t>& x,
+                         const vector<pair<bool, SparseVector<weight_t> > >& corpus,
+                         weight_t* g = NULL) {
+  double cll = 0;
+  for (int i = 0; i < corpus.size(); ++i) {
+    const double dotprod = corpus[i].second.dot(x) + (x.size() ? x[0] : weight_t()); // x[0] is bias
+    double lp_false = dotprod;
+    double lp_true = -dotprod;
+    if (0 < lp_true) {
+      lp_true += log1p(exp(-lp_true));
+      lp_false = log1p(exp(lp_false));
+    } else {
+      lp_true = log1p(exp(lp_true));
+      lp_false += log1p(exp(-lp_false));
+    }
+    lp_true*=-1;
+    lp_false*=-1;
+    if (corpus[i].first) {  // true label
+      cll -= lp_true;
+      if (g) {
+        // g -= corpus[i].second * exp(lp_false);
+        GradAdd(corpus[i].second, -exp(lp_false), g);
+        g[0] -= exp(lp_false); // bias
+      }
+    } else {                  // false label
+      cll -= lp_false;
+      if (g) {
+        // g += corpus[i].second * exp(lp_true);
+        GradAdd(corpus[i].second, exp(lp_true), g);
+        g[0] += exp(lp_true); // bias
+      }
+    }
+  }
+  return cll;
+}
+
+struct ProLoss {
+  ProLoss(const vector<pair<bool, SparseVector<weight_t> > >& tr,
+          const vector<pair<bool, SparseVector<weight_t> > >& te,
+          const double c,
+          const double t,
+          const vector<weight_t>& px) : training(tr), testing(te), C(c), T(t), prev_x(px){}
+  double operator()(const vector<double>& x, double* g) const {
+    fill(g, g + x.size(), 0.0);
+    double cll = TrainingInference(x, training, g);
+    tppl = 0;
+    if (testing.size())
+      tppl = pow(2.0, TrainingInference(x, testing, g) / (log(2) * testing.size()));
+    double ppl = cll / log(2);
+    ppl /= training.size();
+    ppl = pow(2.0, ppl);
+    double reg = ApplyRegularizationTerms(C, T, x, prev_x, g);
+    return cll + reg;
+  }
+  const vector<pair<bool, SparseVector<weight_t> > >& training, testing;
+  const double C, T;
+  const vector<double>& prev_x;
+  mutable double tppl;
+};
+
+// return held-out log likelihood
+double LearnParameters(const vector<pair<bool, SparseVector<weight_t> > >& training,
+                       const vector<pair<bool, SparseVector<weight_t> > >& testing,
+                       const double C,
+                       const double C1,
+                       const double T,
+                       const unsigned memory_buffers,
+                       const vector<weight_t>& prev_x,
+                       vector<weight_t>* px) {
+  assert(px->size() == prev_x.size());
+  ProLoss loss(training, testing, C, T, prev_x);
+  LBFGS<ProLoss> lbfgs(px, loss, memory_buffers, C1);
+  lbfgs.MinimizeFunction();
+  return loss.tppl;
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  string line;
+  vector<pair<bool, SparseVector<weight_t> > > training, testing;
+  const bool tune_regularizer = conf.count("tune_regularizer");
+  if (tune_regularizer && !conf.count("testset")) {
+    cerr << "--tune_regularizer requires --testset to be set\n";
+    return 1;
+  }
+  const double min_reg = conf["min_reg"].as<double>();
+  const double max_reg = conf["max_reg"].as<double>();
+  double C = conf["regularization_strength"].as<double>(); // will be overridden if parameter is tuned
+  double C1 = conf["l1"].as<double>(); // will be overridden if parameter is tuned
+  const double T = conf["regularize_to_weights"].as<double>();
+  assert(C >= 0.0);
+  assert(min_reg >= 0.0);
+  assert(max_reg >= 0.0);
+  assert(max_reg > min_reg);
+  const double psi = conf["interpolate_with_weights"].as<double>();
+  if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; return 1; }
+  ReadCorpus(&cin, &training);
+  if (conf.count("testset")) {
+    ReadFile rf(conf["testset"].as<string>());
+    ReadCorpus(rf.stream(), &testing);
+  }
+  cerr << "Number of features: " << FD::NumFeats() << endl;
+
+  vector<weight_t> x, prev_x;  // x[0] is bias
+  if (conf.count("weights")) {
+    Weights::InitFromFile(conf["weights"].as<string>(), &x);
+    x.resize(FD::NumFeats());
+    prev_x = x;
+  } else {
+    x.resize(FD::NumFeats());
+    prev_x = x;
+  }
+  cerr << "         Number of features: " << x.size() << endl;
+  cerr << "Number of training examples: " << training.size() << endl;
+  cerr << "Number of  testing examples: " << testing.size() << endl;
+  double tppl = 0.0;
+  vector<pair<double,double> > sp;
+  vector<double> smoothed;
+  if (tune_regularizer) {
+    C = min_reg;
+    const double steps = 18;
+    double sweep_factor = exp((log(max_reg) - log(min_reg)) / steps);
+    cerr << "SWEEP FACTOR: " << sweep_factor << endl;
+    while(C < max_reg) {
+      cerr << "C=" << C << "\tT=" <<T << endl;
+      tppl = LearnParameters(training, testing, C, C1, T, conf["memory_buffers"].as<unsigned>(), prev_x, &x);
+      sp.push_back(make_pair(C, tppl));
+      C *= sweep_factor;
+    }
+    smoothed.resize(sp.size(), 0);
+    smoothed[0] = sp[0].second;
+    smoothed.back() = sp.back().second; 
+    for (int i = 1; i < sp.size()-1; ++i) {
+      double prev = sp[i-1].second;
+      double next = sp[i+1].second;
+      double cur = sp[i].second;
+      smoothed[i] = (prev*0.2) + cur * 0.6 + (0.2*next);
+    }
+    double best_ppl = 9999999;
+    unsigned best_i = 0;
+    for (unsigned i = 0; i < sp.size(); ++i) {
+      if (smoothed[i] < best_ppl) {
+        best_ppl = smoothed[i];
+        best_i = i;
+      }
+    }
+    C = sp[best_i].first;
+  }  // tune regularizer
+  tppl = LearnParameters(training, testing, C, C1, T, conf["memory_buffers"].as<unsigned>(), prev_x, &x);
+  if (conf.count("weights")) {
+    for (int i = 1; i < x.size(); ++i) {
+      x[i] = (x[i] * psi) + prev_x[i] * (1.0 - psi);
+    }
+  }
+  cout.precision(15);
+  cout << "# C=" << C << "\theld out perplexity=";
+  if (tppl) { cout << tppl << endl; } else { cout << "N/A\n"; }
+  if (sp.size()) {
+    cout << "# Parameter sweep:\n";
+    for (int i = 0; i < sp.size(); ++i) {
+      cout << "# " << sp[i].first << "\t" << sp[i].second << "\t" << smoothed[i] << endl;
+    }
+  }
+  Weights::WriteToFile("-", x);
+  return 0;
+}
diff --git a/training/pro/pro.pl b/training/pro/pro.pl
new file mode 100755
index 00000000..3b30c379
--- /dev/null
+++ b/training/pro/pro.pl
@@ -0,0 +1,555 @@
+#!/usr/bin/env perl
+use strict;
+use File::Basename qw(basename);
+my @ORIG_ARGV=@ARGV;
+use Cwd qw(getcwd);
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment", "$SCRIPT_DIR/../utils"; }
+
+# Skip local config (used for distributing jobs) if we're running in local-only mode
+use LocalConfig;
+use Getopt::Long;
+use IPC::Open2;
+use POSIX ":sys_wait_h";
+my $QSUB_CMD = qsub_args(mert_memory());
+my $default_jobs = env_default_jobs();
+
+my $UTILS_DIR="$SCRIPT_DIR/../utils";
+require "$UTILS_DIR/libcall.pl";
+
+# Default settings
+my $srcFile;
+my $refFiles;
+my $bin_dir = $SCRIPT_DIR;
+die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
+my $FAST_SCORE="$bin_dir/../../mteval/fast_score";
+die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
+my $MAPINPUT = "$bin_dir/mr_pro_generate_mapper_input.pl";
+my $MAPPER = "$bin_dir/mr_pro_map";
+my $REDUCER = "$bin_dir/mr_pro_reduce";
+my $parallelize = "$UTILS_DIR/parallelize.pl";
+my $libcall = "$UTILS_DIR/libcall.pl";
+my $sentserver = "$UTILS_DIR/sentserver";
+my $sentclient = "$UTILS_DIR/sentclient";
+my $LocalConfig = "$SCRIPT_DIR/../../environment/LocalConfig.pm";
+
+my $SCORER = $FAST_SCORE;
+die "Can't find $MAPPER" unless -x $MAPPER;
+my $cdec = "$bin_dir/../../decoder/cdec";
+die "Can't find decoder in $cdec" unless -x $cdec;
+die "Can't find $parallelize" unless -x $parallelize;
+die "Can't find $libcall" unless -e $libcall;
+my $decoder = $cdec;
+my $lines_per_mapper = 30;
+my $iteration = 1;
+my $best_weights;
+my $psi = 1;
+my $default_max_iter = 30;
+my $max_iterations = $default_max_iter;
+my $jobs = $default_jobs;   # number of decode nodes
+my $pmem = "4g";
+my $disable_clean = 0;
+my %seen_weights;
+my $help = 0;
+my $epsilon = 0.0001;
+my $dryrun = 0;
+my $last_score = -10000000;
+my $metric = "ibm_bleu";
+my $dir;
+my $iniFile;
+my $weights;
+my $use_make = 1;  # use make to parallelize
+my $useqsub = 0;
+my $initial_weights;
+my $pass_suffix = '';
+my $devset;
+
+# regularization strength
+my $reg = 500;
+my $reg_previous = 5000;
+
+# Process command-line options
+if (GetOptions(
+	"config=s" => \$iniFile,
+	"weights=s" => \$initial_weights,
+        "devset=s" => \$devset,
+	"jobs=i" => \$jobs,
+	"metric=s" => \$metric,
+	"pass-suffix=s" => \$pass_suffix,
+        "qsub" => \$useqsub,
+	"help" => \$help,
+	"reg=f" => \$reg,
+	"reg-previous=f" => \$reg_previous,
+	"output-dir=s" => \$dir,
+) == 0 || @ARGV!=0 || $help) {
+	print_help();
+	exit;
+}
+
+if ($useqsub) {
+  $use_make = 0;
+  die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub();
+}
+
+my @missing_args = ();
+if (!defined $iniFile) { push @missing_args, "--config"; }
+if (!defined $devset) { push @missing_args, "--devset"; }
+if (!defined $initial_weights) { push @missing_args, "--weights"; }
+die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args);
+
+if ($metric =~ /^(combi|ter)$/i) {
+  $lines_per_mapper = 5;
+}
+
+my $host =check_output("hostname"); chomp $host;
+my $bleu;
+my $interval_count = 0;
+my $logfile;
+my $projected_score;
+
+# used in sorting scores
+my $DIR_FLAG = '-r';
+if ($metric =~ /^ter$|^aer$/i) {
+  $DIR_FLAG = '';
+}
+
+unless ($dir){
+	$dir = 'pro';
+}
+unless ($dir =~ /^\//){  # convert relative path to absolute path
+	my $basedir = check_output("pwd");
+	chomp $basedir;
+	$dir = "$basedir/$dir";
+}
+
+# Initializations and helper functions
+srand;
+
+my @childpids = ();
+my @cleanupcmds = ();
+
+sub cleanup {
+	print STDERR "Cleanup...\n";
+	for my $pid (@childpids){ unchecked_call("kill $pid"); }
+	for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); }
+	exit 1;
+};
+# Always call cleanup, no matter how we exit
+*CORE::GLOBAL::exit = 
+    sub{ cleanup(); }; 
+$SIG{INT} = "cleanup";
+$SIG{TERM} = "cleanup";
+$SIG{HUP} = "cleanup";
+
+my $decoderBase = check_output("basename $decoder"); chomp $decoderBase;
+my $newIniFile = "$dir/$decoderBase.ini";
+my $inputFileName = "$dir/input";
+my $user = $ENV{"USER"};
+
+
+# process ini file
+-e $iniFile || die "Error: could not open $iniFile for reading\n";
+open(INI, $iniFile);
+
+if (-e $dir) {
+	die "ERROR: working dir $dir already exists\n\n";
+} else {
+	mkdir "$dir" or die "Can't mkdir $dir: $!";
+	mkdir "$dir/hgs" or die;
+	mkdir "$dir/scripts" or die;
+	print STDERR <<EOT;
+	DECODER:          $decoder
+	INI FILE:         $iniFile
+	WORKING DIR:      $dir
+	DEVSET:           $devset
+	EVAL METRIC:      $metric
+	MAX ITERATIONS:   $max_iterations
+	PARALLEL JOBS:    $jobs
+	HEAD NODE:        $host
+	PMEM (DECODING):  $pmem
+	INITIAL WEIGHTS:  $initial_weights
+EOT
+}
+
+# Generate initial files and values
+check_call("cp $iniFile $newIniFile");
+check_call("cp $initial_weights $dir/weights.0");
+$iniFile = $newIniFile;
+
+my $refs = "$dir/dev.refs";
+split_devset($devset, "$dir/dev.input.raw", $refs);
+my $newsrc = "$dir/dev.input";
+enseg("$dir/dev.input.raw", $newsrc);
+$srcFile = $newsrc;
+my $devSize = 0;
+open F, "<$srcFile" or die "Can't read $srcFile: $!";
+while(<F>) { $devSize++; }
+close F;
+
+unless($best_weights){ $best_weights = $weights; }
+unless($projected_score){ $projected_score = 0.0; }
+$seen_weights{$weights} = 1;
+
+my $random_seed = int(time / 1000);
+my $lastWeightsFile;
+my $lastPScore = 0;
+# main optimization loop
+my @allweights;
+while (1){
+	print STDERR "\n\nITERATION $iteration\n==========\n";
+
+	if ($iteration > $max_iterations){
+		print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n";
+		last;
+	}
+	# iteration-specific files
+	my $runFile="$dir/run.raw.$iteration";
+	my $onebestFile="$dir/1best.$iteration";
+	my $logdir="$dir/logs.$iteration";
+	my $decoderLog="$logdir/decoder.sentserver.log.$iteration";
+	my $scorerLog="$logdir/scorer.log.$iteration";
+	check_call("mkdir -p $logdir");
+
+
+	#decode
+	print STDERR "RUNNING DECODER AT ";
+	print STDERR unchecked_output("date");
+	my $im1 = $iteration - 1;
+	my $weightsFile="$dir/weights.$im1";
+        push @allweights, "-w $dir/weights.$im1";
+        `rm -f $dir/hgs/*.gz`;
+	my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs";
+	my $pcmd;
+	if ($use_make) {
+		$pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --";
+	} else {
+		$pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --";
+	}
+	my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile";
+	print STDERR "COMMAND:\n$cmd\n";
+	check_bash_call($cmd);
+        my $num_hgs;
+        my $num_topbest;
+        my $retries = 0;
+	while($retries < 5) {
+	    $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l");
+	    $num_topbest = check_output("wc -l < $runFile");
+	    print STDERR "NUMBER OF HGs: $num_hgs\n";
+	    print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n";
+	    if($devSize == $num_hgs && $devSize == $num_topbest) {
+		last;
+	    } else {
+		print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n";
+		sleep(3);
+	    }
+	    $retries++;
+	}
+	die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest);
+	my $dec_score = check_output("cat $runFile | $SCORER -r $refs -m $metric");
+	chomp $dec_score;
+	print STDERR "DECODER SCORE: $dec_score\n";
+
+	# save space
+	check_call("gzip -f $runFile");
+	check_call("gzip -f $decoderLog");
+
+	# run optimizer
+	print STDERR "RUNNING OPTIMIZER AT ";
+	print STDERR unchecked_output("date");
+	print STDERR " - GENERATE TRAINING EXEMPLARS\n";
+	my $mergeLog="$logdir/prune-merge.log.$iteration";
+
+	my $score = 0;
+	my $icc = 0;
+	my $inweights="$dir/weights.$im1";
+	$cmd="$MAPINPUT $dir/hgs > $dir/agenda.$im1";
+	print STDERR "COMMAND:\n$cmd\n";
+	check_call($cmd);
+	check_call("mkdir -p $dir/splag.$im1");
+	$cmd="split -a 3 -l $lines_per_mapper $dir/agenda.$im1 $dir/splag.$im1/mapinput.";
+	print STDERR "COMMAND:\n$cmd\n";
+	check_call($cmd);
+	opendir(DIR, "$dir/splag.$im1") or die "Can't open directory: $!";
+	my @shards = grep { /^mapinput\./ } readdir(DIR);
+	closedir DIR;
+	die "No shards!" unless scalar @shards > 0;
+	my $joblist = "";
+	my $nmappers = 0;
+	@cleanupcmds = ();
+	my %o2i = ();
+	my $first_shard = 1;
+	my $mkfile; # only used with makefiles
+	my $mkfilename;
+	if ($use_make) {
+		$mkfilename = "$dir/splag.$im1/domap.mk";
+		open $mkfile, ">$mkfilename" or die "Couldn't write $mkfilename: $!";
+		print $mkfile "all: $dir/splag.$im1/map.done\n\n";
+	}
+	my @mkouts = ();  # only used with makefiles
+	my @mapoutputs = ();
+	for my $shard (@shards) {
+		my $mapoutput = $shard;
+		my $client_name = $shard;
+		$client_name =~ s/mapinput.//;
+		$client_name = "pro.$client_name";
+		$mapoutput =~ s/mapinput/mapoutput/;
+		push @mapoutputs, "$dir/splag.$im1/$mapoutput";
+		$o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard";
+		my $script = "$MAPPER -s $srcFile -m $metric -r $refs -w $inweights -K $dir/kbest < $dir/splag.$im1/$shard > $dir/splag.$im1/$mapoutput";
+		if ($use_make) {
+			my $script_file = "$dir/scripts/map.$shard";
+			open F, ">$script_file" or die "Can't write $script_file: $!";
+			print F "#!/bin/bash\n";
+			print F "$script\n";
+			close F;
+			my $output = "$dir/splag.$im1/$mapoutput";
+			push @mkouts, $output;
+			chmod(0755, $script_file) or die "Can't chmod $script_file: $!";
+			if ($first_shard) { print STDERR "$script\n"; $first_shard=0; }
+			print $mkfile "$output: $dir/splag.$im1/$shard\n\t$script_file\n\n";
+		} else {
+			my $script_file = "$dir/scripts/map.$shard";
+			open F, ">$script_file" or die "Can't write $script_file: $!";
+			print F "$script\n";
+			close F;
+			if ($first_shard) { print STDERR "$script\n"; $first_shard=0; }
+
+			$nmappers++;
+			my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file";
+			my $jobid = check_output("$qcmd");
+			chomp $jobid;
+			$jobid =~ s/^(\d+)(.*?)$/\1/g;
+			$jobid =~ s/^Your job (\d+) .*$/\1/;
+		 	push(@cleanupcmds, "qdel $jobid 2> /dev/null");
+			print STDERR " $jobid";
+			if ($joblist == "") { $joblist = $jobid; }
+			else {$joblist = $joblist . "\|" . $jobid; }
+		}
+	}
+	my @dev_outs = ();
+	my @devtest_outs = ();
+	@dev_outs = @mapoutputs;
+	if ($use_make) {
+		print $mkfile "$dir/splag.$im1/map.done: @mkouts\n\ttouch $dir/splag.$im1/map.done\n\n";
+		close $mkfile;
+		my $mcmd = "make -j $jobs -f $mkfilename";
+		print STDERR "\nExecuting: $mcmd\n";
+		check_call($mcmd);
+	} else {
+		print STDERR "\nLaunched $nmappers mappers.\n";
+      		sleep 8;
+		print STDERR "Waiting for mappers to complete...\n";
+		while ($nmappers > 0) {
+		  sleep 5;
+		  my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat | grep -v ' C '")));
+		  $nmappers = scalar @livejobs;
+		}
+		print STDERR "All mappers complete.\n";
+	}
+	my $tol = 0;
+	my $til = 0;
+	my $dev_test_file = "$dir/splag.$im1/devtest.gz";
+	print STDERR "\nRUNNING CLASSIFIER (REDUCER)\n";
+	print STDERR unchecked_output("date");
+	$cmd="cat @dev_outs | $REDUCER -w $dir/weights.$im1 -C $reg -y $reg_previous --interpolate_with_weights $psi";
+        $cmd .= " > $dir/weights.$iteration";
+	print STDERR "COMMAND:\n$cmd\n";
+	check_bash_call($cmd);
+	$lastWeightsFile = "$dir/weights.$iteration";
+	$lastPScore = $score;
+	$iteration++;
+	print STDERR "\n==========\n";
+}
+
+
+check_call("cp $lastWeightsFile $dir/weights.final");
+print STDERR "\nFINAL WEIGHTS: $dir/weights.final\n(Use -w <this file> with the decoder)\n\n";
+print STDOUT "$dir/weights.final\n";
+
+exit 0;
+
+sub read_weights_file {
+  my ($file) = @_;
+  open F, "<$file" or die "Couldn't read $file: $!";
+  my @r = ();
+  my $pm = -1;
+  while(<F>) {
+    next if /^#/;
+    next if /^\s*$/;
+    chomp;
+    if (/^(.+)\s+(.+)$/) {
+      my $m = $1;
+      my $w = $2;
+      die "Weights out of order: $m <= $pm" unless $m > $pm;
+      push @r, $w;
+    } else {
+      warn "Unexpected feature name in weight file: $_";
+    }
+  }
+  close F;
+  return join ' ', @r;
+}
+
+sub enseg {
+	my $src = shift;
+	my $newsrc = shift;
+	open(SRC, $src);
+	open(NEWSRC, ">$newsrc");
+	my $i=0;
+	while (my $line=<SRC>){
+		chomp $line;
+		if ($line =~ /^\s*<seg/i) {
+		    if($line =~ /id="[0-9]+"/) {
+			print NEWSRC "$line\n";
+		    } else {
+			die "When using segments with pre-generated <seg> tags, you must include a zero-based id attribute";
+		    }
+		} else {
+			print NEWSRC "<seg id=\"$i\">$line</seg>\n";
+		}
+		$i++;
+	}
+	close SRC;
+	close NEWSRC;
+	die "Empty dev set!" if ($i == 0);
+}
+
+sub print_help {
+
+	my $executable = basename($0); chomp $executable;
+	print << "Help";
+
+Usage: $executable [options]
+
+	$executable [options]
+		Runs a complete PRO optimization using the ini file specified.
+
+Required:
+
+	--config <cdec.ini>
+		Decoder configuration file.
+
+	--devset <files>
+		Dev set source and reference data.
+
+	--weights <file>
+		Initial weights file (use empty file to start from 0)
+
+General options:
+
+	--help
+		Print this message and exit.
+
+	--max-iterations <M>
+		Maximum number of iterations to run.  If not specified, defaults
+		to $default_max_iter.
+
+	--metric <method>
+		Metric to optimize.
+		Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi
+
+	--pass-suffix <S>
+		If the decoder is doing multi-pass decoding, the pass suffix "2",
+		"3", etc., is used to control what iteration of weights is set.
+
+	--workdir <dir>
+		Directory for intermediate and output files.  If not specified, the
+		name is derived from the ini filename.  Assuming that the ini
+		filename begins with the decoder name and ends with ini, the default
+		name of the working directory is inferred from the middle part of
+		the filename.  E.g. an ini file named decoder.foo.ini would have
+		a default working directory name foo.
+
+Regularization options:
+
+	--reg <F>
+		l2 regularization strength [default=500]. The greater this value,
+		the closer to zero the weights will be.
+
+	--reg-previous <F>
+		l2 penalty for moving away from the weights from the previous
+		iteration. [default=5000]. The greater this value, the closer
+		to the previous iteration's weights the next iteration's weights
+		will be.
+
+Job control options:
+
+	--jobs <I>
+		Number of decoder processes to run in parallel. [default=$default_jobs]
+
+	--qsub
+		Use qsub to run jobs in parallel (qsub must be configured in
+		environment/LocalEnvironment.pm)
+
+	--pmem <N>
+		Amount of physical memory requested for parallel decoding jobs
+		(used with qsub requests only)
+
+Deprecated options:
+
+	--interpolate-with-weights <F>
+		[deprecated] At each iteration the resulting weights are
+		interpolated with the weights from the previous iteration, with
+		this factor. [default=1.0, i.e., no effect]
+
+Help
+}
+
+sub convert {
+  my ($str) = @_;
+  my @ps = split /;/, $str;
+  my %dict = ();
+  for my $p (@ps) {
+    my ($k, $v) = split /=/, $p;
+    $dict{$k} = $v;
+  }
+  return %dict;
+}
+
+
+sub cmdline {
+    return join ' ',($0,@ORIG_ARGV);
+}
+
+#buggy: last arg gets quoted sometimes?
+my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]};
+my $shell_escape_in_quote=qr{[\\"\$`!]};
+
+sub escape_shell {
+    my ($arg)=@_;
+    return undef unless defined $arg;
+    if ($arg =~ /$is_shell_special/) {
+        $arg =~ s/($shell_escape_in_quote)/\\$1/g;
+        return "\"$arg\"";
+    }
+    return $arg;
+}
+
+sub escaped_shell_args {
+    return map {local $_=$_;chomp;escape_shell($_)} @_;
+}
+
+sub escaped_shell_args_str {
+    return join ' ',&escaped_shell_args(@_);
+}
+
+sub escaped_cmdline {
+    return "$0 ".&escaped_shell_args_str(@ORIG_ARGV);
+}
+
+sub split_devset {
+  my ($infile, $outsrc, $outref) = @_;
+  open F, "<$infile" or die "Can't read $infile: $!";
+  open S, ">$outsrc" or die "Can't write $outsrc: $!";
+  open R, ">$outref" or die "Can't write $outref: $!";
+  while(<F>) {
+    chomp;
+    my ($src, @refs) = split /\s*\|\|\|\s*/;
+    die "Malformed devset line: $_\n" unless scalar @refs > 0;
+    print S "$src\n";
+    print R join(' ||| ', @refs) . "\n";
+  }
+  close R;
+  close S;
+  close F;
+}
+
diff --git a/training/rampion/Makefile.am b/training/rampion/Makefile.am
new file mode 100644
index 00000000..c72283cd
--- /dev/null
+++ b/training/rampion/Makefile.am
@@ -0,0 +1,8 @@
+bin_PROGRAMS = rampion_cccp
+
+rampion_cccp_SOURCES = rampion_cccp.cc
+rampion_cccp_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a
+
+EXTRA_DIST = rampion.pl rampion_generate_input.pl
+
+AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils
diff --git a/training/rampion/rampion.pl b/training/rampion/rampion.pl
new file mode 100755
index 00000000..ae084db6
--- /dev/null
+++ b/training/rampion/rampion.pl
@@ -0,0 +1,540 @@
+#!/usr/bin/env perl
+use strict;
+my @ORIG_ARGV=@ARGV;
+use Cwd qw(getcwd);
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment", "$SCRIPT_DIR/../utils"; }
+
+# Skip local config (used for distributing jobs) if we're running in local-only mode
+use LocalConfig;
+use Getopt::Long;
+use IPC::Open2;
+use POSIX ":sys_wait_h";
+my $QSUB_CMD = qsub_args(mert_memory());
+my $default_jobs = env_default_jobs();
+
+my $UTILS_DIR="$SCRIPT_DIR/../utils";
+require "$UTILS_DIR/libcall.pl";
+
+# Default settings
+my $srcFile;
+my $refFiles;
+my $bin_dir = $SCRIPT_DIR;
+die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
+my $FAST_SCORE="$bin_dir/../../mteval/fast_score";
+die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
+my $MAPINPUT = "$bin_dir/rampion_generate_input.pl";
+my $MAPPER = "$bin_dir/rampion_cccp";
+my $parallelize = "$UTILS_DIR/parallelize.pl";
+my $libcall = "$UTILS_DIR/libcall.pl";
+my $sentserver = "$UTILS_DIR/sentserver";
+my $sentclient = "$UTILS_DIR/sentclient";
+my $LocalConfig = "$SCRIPT_DIR/../../environment/LocalConfig.pm";
+
+my $SCORER = $FAST_SCORE;
+die "Can't find $MAPPER" unless -x $MAPPER;
+my $cdec = "$bin_dir/../../decoder/cdec";
+die "Can't find decoder in $cdec" unless -x $cdec;
+die "Can't find $parallelize" unless -x $parallelize;
+die "Can't find $libcall" unless -e $libcall;
+my $decoder = $cdec;
+my $lines_per_mapper = 30;
+my $iteration = 1;
+my $best_weights;
+my $psi = 1;
+my $default_max_iter = 30;
+my $max_iterations = $default_max_iter;
+my $jobs = $default_jobs;   # number of decode nodes
+my $pmem = "4g";
+my $disable_clean = 0;
+my %seen_weights;
+my $help = 0;
+my $epsilon = 0.0001;
+my $dryrun = 0;
+my $last_score = -10000000;
+my $metric = "ibm_bleu";
+my $dir;
+my $iniFile;
+my $weights;
+my $use_make = 1;  # use make to parallelize
+my $useqsub = 0;
+my $initial_weights;
+my $pass_suffix = '';
+my $cpbin=1;
+
+# regularization strength
+my $tune_regularizer = 0;
+my $reg = 500;
+my $reg_previous = 5000;
+my $dont_accum = 0;
+
+# Process command-line options
+Getopt::Long::Configure("no_auto_abbrev");
+if (GetOptions(
+	"jobs=i" => \$jobs,
+	"dont-clean" => \$disable_clean,
+	"dont-accumulate" => \$dont_accum,
+	"pass-suffix=s" => \$pass_suffix,
+        "qsub" => \$useqsub,
+	"dry-run" => \$dryrun,
+	"epsilon=s" => \$epsilon,
+	"help" => \$help,
+        "weights=s" => \$initial_weights,
+	"reg=f" => \$reg,
+	"use-make=i" => \$use_make,
+	"max-iterations=i" => \$max_iterations,
+	"pmem=s" => \$pmem,
+        "cpbin!" => \$cpbin,
+	"ref-files=s" => \$refFiles,
+	"metric=s" => \$metric,
+	"source-file=s" => \$srcFile,
+	"workdir=s" => \$dir,
+) == 0 || @ARGV!=1 || $help) {
+	print_help();
+	exit;
+}
+
+die "--tune-regularizer is no longer supported with --reg-previous and --reg. Please tune manually.\n" if $tune_regularizer;
+
+if ($useqsub) {
+  $use_make = 0;
+  die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub();
+}
+
+my @missing_args = ();
+if (!defined $srcFile) { push @missing_args, "--source-file"; }
+if (!defined $refFiles) { push @missing_args, "--ref-files"; }
+if (!defined $initial_weights) { push @missing_args, "--weights"; }
+die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args);
+
+if ($metric =~ /^(combi|ter)$/i) {
+  $lines_per_mapper = 5;
+}
+
+($iniFile) = @ARGV;
+
+
+sub write_config;
+sub enseg;
+sub print_help;
+
+my $nodelist;
+my $host =check_output("hostname"); chomp $host;
+my $bleu;
+my $interval_count = 0;
+my $logfile;
+my $projected_score;
+
+# used in sorting scores
+my $DIR_FLAG = '-r';
+if ($metric =~ /^ter$|^aer$/i) {
+  $DIR_FLAG = '';
+}
+
+my $refs_comma_sep = get_comma_sep_refs('r',$refFiles);
+
+unless ($dir){
+	$dir = "rampion";
+}
+unless ($dir =~ /^\//){  # convert relative path to absolute path
+	my $basedir = check_output("pwd");
+	chomp $basedir;
+	$dir = "$basedir/$dir";
+}
+
+
+# Initializations and helper functions
+srand;
+
+my @childpids = ();
+my @cleanupcmds = ();
+
+sub cleanup {
+	print STDERR "Cleanup...\n";
+	for my $pid (@childpids){ unchecked_call("kill $pid"); }
+	for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); }
+	exit 1;
+};
+# Always call cleanup, no matter how we exit
+*CORE::GLOBAL::exit = 
+    sub{ cleanup(); }; 
+$SIG{INT} = "cleanup";
+$SIG{TERM} = "cleanup";
+$SIG{HUP} = "cleanup";
+
+my $decoderBase = check_output("basename $decoder"); chomp $decoderBase;
+my $newIniFile = "$dir/$decoderBase.ini";
+my $inputFileName = "$dir/input";
+my $user = $ENV{"USER"};
+# process ini file
+-e $iniFile || die "Error: could not open $iniFile for reading\n";
+open(INI, $iniFile);
+
+use File::Basename qw(basename);
+#pass bindir, refs to vars holding bin
+sub modbin {
+    local $_;
+    my $bindir=shift;
+    check_call("mkdir -p $bindir");
+    -d $bindir || die "couldn't make bindir $bindir";
+    for (@_) {
+        my $src=$$_;
+        $$_="$bindir/".basename($src);
+        check_call("cp -p $src $$_");
+    }
+}
+sub dirsize {
+    opendir ISEMPTY,$_[0];
+    return scalar(readdir(ISEMPTY))-1;
+}
+my @allweights;
+if ($dryrun){
+	write_config(*STDERR);
+	exit 0;
+} else {
+	if (-e $dir && dirsize($dir)>1 && -e "$dir/hgs" ){ # allow preexisting logfile, binaries, but not dist-pro.pl outputs
+	  die "ERROR: working dir $dir already exists\n\n";
+	} else {
+		-e $dir || mkdir $dir;
+		mkdir "$dir/hgs";
+        modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$parallelize,\$sentserver,\$sentclient,\$libcall) if $cpbin;
+    mkdir "$dir/scripts";
+        my $cmdfile="$dir/rerun-pro.sh";
+        open CMD,'>',$cmdfile;
+        print CMD "cd ",&getcwd,"\n";
+#        print CMD &escaped_cmdline,"\n"; #buggy - last arg is quoted.
+        my $cline=&cmdline."\n";
+        print CMD $cline;
+        close CMD;
+        print STDERR $cline;
+        chmod(0755,$cmdfile);
+	check_call("cp $initial_weights $dir/weights.0");
+	die "Can't find weights.0" unless (-e "$dir/weights.0");
+	}
+	write_config(*STDERR);
+}
+
+
+# Generate initial files and values
+check_call("cp $iniFile $newIniFile");
+$iniFile = $newIniFile;
+
+my $newsrc = "$dir/dev.input";
+enseg($srcFile, $newsrc);
+$srcFile = $newsrc;
+my $devSize = 0;
+open F, "<$srcFile" or die "Can't read $srcFile: $!";
+while(<F>) { $devSize++; }
+close F;
+
+unless($best_weights){ $best_weights = $weights; }
+unless($projected_score){ $projected_score = 0.0; }
+$seen_weights{$weights} = 1;
+my $kbest = "$dir/kbest";
+if ($dont_accum) {
+  $kbest = '';
+} else {
+  check_call("mkdir -p $kbest");
+  $kbest = "--kbest_repository $kbest";
+}
+
+my $random_seed = int(time / 1000);
+my $lastWeightsFile;
+my $lastPScore = 0;
+# main optimization loop
+while (1){
+	print STDERR "\n\nITERATION $iteration\n==========\n";
+
+	if ($iteration > $max_iterations){
+		print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n";
+		last;
+	}
+	# iteration-specific files
+	my $runFile="$dir/run.raw.$iteration";
+	my $onebestFile="$dir/1best.$iteration";
+	my $logdir="$dir/logs.$iteration";
+	my $decoderLog="$logdir/decoder.sentserver.log.$iteration";
+	my $scorerLog="$logdir/scorer.log.$iteration";
+	check_call("mkdir -p $logdir");
+
+
+	#decode
+	print STDERR "RUNNING DECODER AT ";
+	print STDERR unchecked_output("date");
+	my $im1 = $iteration - 1;
+	my $weightsFile="$dir/weights.$im1";
+        push @allweights, "-w $dir/weights.$im1";
+        `rm -f $dir/hgs/*.gz`;
+	my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs";
+	my $pcmd;
+	if ($use_make) {
+		$pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --";
+	} else {
+		$pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --";
+	}
+	my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile";
+	print STDERR "COMMAND:\n$cmd\n";
+	check_bash_call($cmd);
+        my $num_hgs;
+        my $num_topbest;
+        my $retries = 0;
+	while($retries < 5) {
+	    $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l");
+	    $num_topbest = check_output("wc -l < $runFile");
+	    print STDERR "NUMBER OF HGs: $num_hgs\n";
+	    print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n";
+	    if($devSize == $num_hgs && $devSize == $num_topbest) {
+		last;
+	    } else {
+		print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n";
+		sleep(3);
+	    }
+	    $retries++;
+	}
+	die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest);
+	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric");
+	chomp $dec_score;
+	print STDERR "DECODER SCORE: $dec_score\n";
+
+	# save space
+	check_call("gzip -f $runFile");
+	check_call("gzip -f $decoderLog");
+
+	# run optimizer
+	print STDERR "RUNNING OPTIMIZER AT ";
+	print STDERR unchecked_output("date");
+	print STDERR " - GENERATE TRAINING EXEMPLARS\n";
+	my $mergeLog="$logdir/prune-merge.log.$iteration";
+
+	my $score = 0;
+	my $icc = 0;
+	my $inweights="$dir/weights.$im1";
+	my $outweights="$dir/weights.$iteration";
+	$cmd="$MAPINPUT $dir/hgs > $dir/agenda.$im1";
+	print STDERR "COMMAND:\n$cmd\n";
+	check_call($cmd);
+	$cmd="$MAPPER $refs_comma_sep -m $metric -i $dir/agenda.$im1 $kbest -w $inweights > $outweights";
+	check_call($cmd);
+	$lastWeightsFile = $outweights;
+	$iteration++;
+	`rm hgs/*.gz`;
+	print STDERR "\n==========\n";
+}
+
+print STDERR "\nFINAL WEIGHTS: $lastWeightsFile\n(Use -w <this file> with the decoder)\n\n";
+
+print STDOUT "$lastWeightsFile\n";
+
+exit 0;
+
+sub get_lines {
+  my $fn = shift @_;
+  open FL, "<$fn" or die "Couldn't read $fn: $!";
+  my $lc = 0;
+  while(<FL>) { $lc++; }
+  return $lc;
+}
+
+sub get_comma_sep_refs {
+  my ($r,$p) = @_;
+  my $o = check_output("echo $p");
+  chomp $o;
+  my @files = split /\s+/, $o;
+  return "-$r " . join(" -$r ", @files);
+}
+
+sub read_weights_file {
+  my ($file) = @_;
+  open F, "<$file" or die "Couldn't read $file: $!";
+  my @r = ();
+  my $pm = -1;
+  while(<F>) {
+    next if /^#/;
+    next if /^\s*$/;
+    chomp;
+    if (/^(.+)\s+(.+)$/) {
+      my $m = $1;
+      my $w = $2;
+      die "Weights out of order: $m <= $pm" unless $m > $pm;
+      push @r, $w;
+    } else {
+      warn "Unexpected feature name in weight file: $_";
+    }
+  }
+  close F;
+  return join ' ', @r;
+}
+
+# subs
+sub write_config {
+	my $fh = shift;
+	my $cleanup = "yes";
+	if ($disable_clean) {$cleanup = "no";}
+
+	print $fh "\n";
+	print $fh "DECODER:          $decoder\n";
+	print $fh "INI FILE:         $iniFile\n";
+	print $fh "WORKING DIR:      $dir\n";
+	print $fh "SOURCE (DEV):     $srcFile\n";
+	print $fh "REFS (DEV):       $refFiles\n";
+	print $fh "EVAL METRIC:      $metric\n";
+	print $fh "MAX ITERATIONS:   $max_iterations\n";
+	print $fh "JOBS:             $jobs\n";
+	print $fh "HEAD NODE:        $host\n";
+	print $fh "PMEM (DECODING):  $pmem\n";
+	print $fh "CLEANUP:          $cleanup\n";
+}
+
+sub update_weights_file {
+  my ($neww, $rfn, $rpts) = @_;
+  my @feats = @$rfn;
+  my @pts = @$rpts;
+  my $num_feats = scalar @feats;
+  my $num_pts = scalar @pts;
+  die "$num_feats (num_feats) != $num_pts (num_pts)" unless $num_feats == $num_pts;
+  open G, ">$neww" or die;
+  for (my $i = 0; $i < $num_feats; $i++) {
+    my $f = $feats[$i];
+    my $lambda = $pts[$i];
+    print G "$f $lambda\n";
+  }
+  close G;
+}
+
+sub enseg {
+	my $src = shift;
+	my $newsrc = shift;
+	open(SRC, $src);
+	open(NEWSRC, ">$newsrc");
+	my $i=0;
+	while (my $line=<SRC>){
+		chomp $line;
+		if ($line =~ /^\s*<seg/i) {
+		    if($line =~ /id="[0-9]+"/) {
+			print NEWSRC "$line\n";
+		    } else {
+			die "When using segments with pre-generated <seg> tags, you must include a zero-based id attribute";
+		    }
+		} else {
+			print NEWSRC "<seg id=\"$i\">$line</seg>\n";
+		}
+		$i++;
+	}
+	close SRC;
+	close NEWSRC;
+	die "Empty dev set!" if ($i == 0);
+}
+
+sub print_help {
+
+	my $executable = check_output("basename $0"); chomp $executable;
+	print << "Help";
+
+Usage: $executable [options] <ini file>
+
+	$executable [options] <ini file>
+		Runs a complete PRO optimization using the ini file specified.
+
+Required:
+
+	--ref-files <files>
+		Dev set ref files.  This option takes only a single string argument.
+		To use multiple files (including file globbing), this argument should
+		be quoted.
+
+	--source-file <file>
+		Dev set source file.
+
+	--weights <file>
+		Initial weights file (use empty file to start from 0)
+
+General options:
+
+	--help
+		Print this message and exit.
+
+	--dont-accumulate
+		Don't accumulate k-best lists from multiple iterations.
+
+	--max-iterations <M>
+		Maximum number of iterations to run.  If not specified, defaults
+		to $default_max_iter.
+
+	--metric <method>
+		Metric to optimize.
+		Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi
+
+	--pass-suffix <S>
+		If the decoder is doing multi-pass decoding, the pass suffix "2",
+		"3", etc., is used to control what iteration of weights is set.
+
+	--workdir <dir>
+		Directory for intermediate and output files.  If not specified, the
+		name is derived from the ini filename.  Assuming that the ini
+		filename begins with the decoder name and ends with ini, the default
+		name of the working directory is inferred from the middle part of
+		the filename.  E.g. an ini file named decoder.foo.ini would have
+		a default working directory name foo.
+
+Regularization options:
+
+	--reg <F>
+		l2 regularization strength [default=500]. The greater this value,
+		the closer to zero the weights will be.
+
+Job control options:
+
+	--jobs <I>
+		Number of decoder processes to run in parallel. [default=$default_jobs]
+
+	--qsub
+		Use qsub to run jobs in parallel (qsub must be configured in
+		environment/LocalEnvironment.pm)
+
+	--pmem <N>
+		Amount of physical memory requested for parallel decoding jobs
+		(used with qsub requests only)
+
+Help
+}
+
+sub convert {
+  my ($str) = @_;
+  my @ps = split /;/, $str;
+  my %dict = ();
+  for my $p (@ps) {
+    my ($k, $v) = split /=/, $p;
+    $dict{$k} = $v;
+  }
+  return %dict;
+}
+
+
+sub cmdline {
+    return join ' ',($0,@ORIG_ARGV);
+}
+
+#buggy: last arg gets quoted sometimes?
+my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]};
+my $shell_escape_in_quote=qr{[\\"\$`!]};
+
+sub escape_shell {
+    my ($arg)=@_;
+    return undef unless defined $arg;
+    if ($arg =~ /$is_shell_special/) {
+        $arg =~ s/($shell_escape_in_quote)/\\$1/g;
+        return "\"$arg\"";
+    }
+    return $arg;
+}
+
+sub escaped_shell_args {
+    return map {local $_=$_;chomp;escape_shell($_)} @_;
+}
+
+sub escaped_shell_args_str {
+    return join ' ',&escaped_shell_args(@_);
+}
+
+sub escaped_cmdline {
+    return "$0 ".&escaped_shell_args_str(@ORIG_ARGV);
+}
diff --git a/training/rampion/rampion_cccp.cc b/training/rampion/rampion_cccp.cc
new file mode 100644
index 00000000..1e36dc51
--- /dev/null
+++ b/training/rampion/rampion_cccp.cc
@@ -0,0 +1,168 @@
+#include <sstream>
+#include <iostream>
+#include <vector>
+#include <limits>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "filelib.h"
+#include "stringlib.h"
+#include "weights.h"
+#include "hg_io.h"
+#include "kbest.h"
+#include "viterbi.h"
+#include "ns.h"
+#include "ns_docscorer.h"
+#include "candidate_set.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation (tokenized text)")
+        ("weights,w",po::value<string>(), "[REQD] Weights files from current iterations")
+        ("input,i",po::value<string>()->default_value("-"), "Input file to map (- is STDIN)")
+        ("evaluation_metric,m",po::value<string>()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)")
+        ("kbest_repository,R",po::value<string>(), "Accumulate k-best lists from previous iterations (parameter is path to repository)")
+        ("kbest_size,k",po::value<unsigned>()->default_value(500u), "Top k-hypotheses to extract")
+        ("cccp_iterations,I", po::value<unsigned>()->default_value(10u), "CCCP iterations (T')")
+        ("ssd_iterations,J", po::value<unsigned>()->default_value(5u), "Stochastic subgradient iterations (T'')")
+        ("eta", po::value<double>()->default_value(1e-4), "Step size")
+        ("regularization_strength,C", po::value<double>()->default_value(1.0), "L2 regularization strength")
+        ("alpha,a", po::value<double>()->default_value(10.0), "Cost scale (alpha); alpha * [1-metric(y,y')]")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = false;
+  if (!conf->count("reference")) {
+    cerr << "Please specify one or more references using -r <REF.TXT>\n";
+    flag = true;
+  }
+  if (!conf->count("weights")) {
+    cerr << "Please specify weights using -w <WEIGHTS.TXT>\n";
+    flag = true;
+  }
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+struct GainFunction {
+  explicit GainFunction(const EvaluationMetric* m) : metric(m) {}
+  float operator()(const SufficientStats& eval_feats) const {
+    float g = metric->ComputeScore(eval_feats);
+    if (!metric->IsErrorMetric()) g = 1 - g;
+    return g;
+  }
+  const EvaluationMetric* metric;
+};
+
+template <typename GainFunc>
+void CostAugmentedSearch(const GainFunc& gain,
+                         const training::CandidateSet& cs,
+                         const SparseVector<double>& w,
+                         double alpha,
+                         SparseVector<double>* fmap) {
+  unsigned best_i = 0;
+  double best = -numeric_limits<double>::infinity();
+  for (unsigned i = 0; i < cs.size(); ++i) {
+    double s = cs[i].fmap.dot(w) + alpha * gain(cs[i].eval_feats);
+    if (s > best) {
+      best = s;
+      best_i = i;
+    }
+  }
+  *fmap = cs[best_i].fmap;
+}
+
+
+
+// runs lines 4--15 of rampion algorithm
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  const string evaluation_metric = conf["evaluation_metric"].as<string>();
+
+  EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric);
+  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
+  cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl;
+  double goodsign = -1;
+  double badsign = -goodsign;
+
+  Hypergraph hg;
+  string last_file;
+  ReadFile in_read(conf["input"].as<string>());
+  string kbest_repo;
+  if (conf.count("kbest_repository")) {
+    kbest_repo = conf["kbest_repository"].as<string>();
+    MkDirP(kbest_repo);
+  }
+  istream &in=*in_read.stream();
+  const unsigned kbest_size = conf["kbest_size"].as<unsigned>();
+  const unsigned tp = conf["cccp_iterations"].as<unsigned>();
+  const unsigned tpp = conf["ssd_iterations"].as<unsigned>();
+  const double eta = conf["eta"].as<double>();
+  const double reg = conf["regularization_strength"].as<double>();
+  const double alpha = conf["alpha"].as<double>();
+  SparseVector<weight_t> weights;
+  {
+    vector<weight_t> vweights;
+    const string weightsf = conf["weights"].as<string>();
+    Weights::InitFromFile(weightsf, &vweights);
+    Weights::InitSparseVector(vweights, &weights);
+  }
+  string line, file;
+  vector<training::CandidateSet> kis;
+  cerr << "Loading hypergraphs...\n";
+  while(getline(in, line)) {
+    istringstream is(line);
+    int sent_id;
+    kis.resize(kis.size() + 1);
+    training::CandidateSet& curkbest = kis.back();
+    string kbest_file;
+    if (kbest_repo.size()) {
+      ostringstream os;
+      os << kbest_repo << "/kbest." << sent_id << ".txt.gz";
+      kbest_file = os.str();
+      if (FileExists(kbest_file))
+        curkbest.ReadFromFile(kbest_file);
+    }
+    is >> file >> sent_id;
+    ReadFile rf(file);
+    if (kis.size() % 5 == 0) { cerr << '.'; }
+    if (kis.size() % 200 == 0) { cerr << " [" << kis.size() << "]\n"; }
+    HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+    hg.Reweight(weights);
+    curkbest.AddKBestCandidates(hg, kbest_size, ds[sent_id]);
+    if (kbest_file.size())
+      curkbest.WriteToFile(kbest_file);
+  }
+  cerr << "\nHypergraphs loaded.\n";
+
+  vector<SparseVector<weight_t> > goals(kis.size());  // f(x_i,y+,h+)
+  SparseVector<weight_t> fear;  // f(x,y-,h-)
+  const GainFunction gain(metric);
+  for (unsigned iterp = 1; iterp <= tp; ++iterp) {
+    cerr << "CCCP Iteration " << iterp << endl;
+    for (unsigned i = 0; i < goals.size(); ++i)
+      CostAugmentedSearch(gain, kis[i], weights, goodsign * alpha, &goals[i]);
+    for (unsigned iterpp = 1; iterpp <= tpp; ++iterpp) {
+      cerr << "  SSD Iteration " << iterpp << endl;
+      for (unsigned i = 0; i < goals.size(); ++i) {
+        CostAugmentedSearch(gain, kis[i], weights, badsign * alpha, &fear);
+        weights -= weights * (eta * reg / goals.size());
+        weights += (goals[i] - fear) * eta;
+      }
+    }
+  }
+  vector<weight_t> w;
+  weights.init_vector(&w);
+  Weights::WriteToFile("-", w);
+  return 0;
+}
+
diff --git a/training/rampion/rampion_generate_input.pl b/training/rampion/rampion_generate_input.pl
new file mode 100755
index 00000000..b30fc4fd
--- /dev/null
+++ b/training/rampion/rampion_generate_input.pl
@@ -0,0 +1,18 @@
+#!/usr/bin/perl -w
+use strict;
+
+die "Usage: $0 HG_DIR\n" unless scalar @ARGV == 1;
+my $d = shift @ARGV;
+die "Can't find directory $d" unless -d $d;
+
+opendir(DIR, $d) or die "Can't read $d: $!";
+my @hgs = grep { /\.gz$/ } readdir(DIR);
+closedir DIR;
+
+for my $hg (@hgs) {
+  my $file = $hg;
+  my $id = $hg;
+  $id =~ s/(\.json)?\.gz//;
+  print "$d/$file $id\n";
+}
+
diff --git a/training/ttables.cc b/training/ttables.cc
deleted file mode 100644
index 45bf14c5..00000000
--- a/training/ttables.cc
+++ /dev/null
@@ -1,31 +0,0 @@
-#include "ttables.h"
-
-#include <cassert>
-
-#include "dict.h"
-
-using namespace std;
-using namespace std::tr1;
-
-void TTable::DeserializeProbsFromText(std::istream* in) {
-  int c = 0;
-  while(*in) {
-    string e;
-    string f;
-    double p;
-    (*in) >> e >> f >> p;
-    if (e.empty()) break;
-    ++c;
-    ttable[TD::Convert(e)][TD::Convert(f)] = p;
-  }
-  cerr << "Loaded " << c << " translation parameters.\n";
-}
-
-void TTable::SerializeHelper(string* out, const Word2Word2Double& o) {
-  assert(!"not implemented");
-}
-
-void TTable::DeserializeHelper(const string& in, Word2Word2Double* o) {
-  assert(!"not implemented");
-}
-
diff --git a/training/ttables.h b/training/ttables.h
deleted file mode 100644
index 9baa13ca..00000000
--- a/training/ttables.h
+++ /dev/null
@@ -1,101 +0,0 @@
-#ifndef _TTABLES_H_
-#define _TTABLES_H_
-
-#include <iostream>
-#include <tr1/unordered_map>
-
-#include "sparse_vector.h"
-#include "m.h"
-#include "wordid.h"
-#include "tdict.h"
-
-class TTable {
- public:
-  TTable() {}
-  typedef std::tr1::unordered_map<WordID, double> Word2Double;
-  typedef std::tr1::unordered_map<WordID, Word2Double> Word2Word2Double;
-  inline double prob(const int& e, const int& f) const {
-    const Word2Word2Double::const_iterator cit = ttable.find(e);
-    if (cit != ttable.end()) {
-      const Word2Double& cpd = cit->second;
-      const Word2Double::const_iterator it = cpd.find(f);
-      if (it == cpd.end()) return 1e-9;
-      return it->second;
-    } else {
-      return 1e-9;
-    }
-  }
-  inline void Increment(const int& e, const int& f) {
-    counts[e][f] += 1.0;
-  }
-  inline void Increment(const int& e, const int& f, double x) {
-    counts[e][f] += x;
-  }
-  void NormalizeVB(const double alpha) {
-    ttable.swap(counts);
-    for (Word2Word2Double::iterator cit = ttable.begin();
-         cit != ttable.end(); ++cit) {
-      double tot = 0;
-      Word2Double& cpd = cit->second;
-      for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it)
-        tot += it->second + alpha;
-      for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it)
-        it->second = exp(Md::digamma(it->second + alpha) - Md::digamma(tot));
-    }
-    counts.clear();
-  }
-  void Normalize() {
-    ttable.swap(counts);
-    for (Word2Word2Double::iterator cit = ttable.begin();
-         cit != ttable.end(); ++cit) {
-      double tot = 0;
-      Word2Double& cpd = cit->second;
-      for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it)
-        tot += it->second;
-      for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it)
-        it->second /= tot;
-    }
-    counts.clear();
-  }
-  // adds counts from another TTable - probabilities remain unchanged
-  TTable& operator+=(const TTable& rhs) {
-    for (Word2Word2Double::const_iterator it = rhs.counts.begin();
-         it != rhs.counts.end(); ++it) {
-      const Word2Double& cpd = it->second;
-      Word2Double& tgt = counts[it->first];
-      for (Word2Double::const_iterator j = cpd.begin(); j != cpd.end(); ++j) {
-        tgt[j->first] += j->second;
-      }
-    }
-    return *this;
-  }
-  void ShowTTable() const {
-    for (Word2Word2Double::const_iterator it = ttable.begin(); it != ttable.end(); ++it) {
-      const Word2Double& cpd = it->second;
-      for (Word2Double::const_iterator j = cpd.begin(); j != cpd.end(); ++j) {
-        std::cerr << "P(" << TD::Convert(j->first) << '|' << TD::Convert(it->first) << ") = " << j->second << std::endl;
-      }
-    }
-  }
-  void ShowCounts() const {
-    for (Word2Word2Double::const_iterator it = counts.begin(); it != counts.end(); ++it) {
-      const Word2Double& cpd = it->second;
-      for (Word2Double::const_iterator j = cpd.begin(); j != cpd.end(); ++j) {
-        std::cerr << "c(" << TD::Convert(j->first) << '|' << TD::Convert(it->first) << ") = " << j->second << std::endl;
-      }
-    }
-  }
-  void DeserializeProbsFromText(std::istream* in);
-  void SerializeCounts(std::string* out) const { SerializeHelper(out, counts); }
-  void DeserializeCounts(const std::string& in) { DeserializeHelper(in, &counts); }
-  void SerializeProbs(std::string* out) const { SerializeHelper(out, ttable); }
-  void DeserializeProbs(const std::string& in) { DeserializeHelper(in, &ttable); }
- private:
-  static void SerializeHelper(std::string*, const Word2Word2Double& o);
-  static void DeserializeHelper(const std::string&, Word2Word2Double* o);
- public:
-  Word2Word2Double ttable;
-  Word2Word2Double counts;
-};
-
-#endif
diff --git a/training/utils/Makefile.am b/training/utils/Makefile.am
new file mode 100644
index 00000000..27c6e344
--- /dev/null
+++ b/training/utils/Makefile.am
@@ -0,0 +1,46 @@
+noinst_LIBRARIES = libtraining_utils.a
+
+bin_PROGRAMS = \
+  sentserver \
+  sentclient \
+  grammar_convert
+
+noinst_PROGRAMS = \
+  lbfgs_test \
+  optimize_test
+
+EXTRA_DIST = decode-and-evaluate.pl libcall.pl parallelize.pl
+
+sentserver_SOURCES = sentserver.cc
+sentserver_LDFLAGS = -pthread
+
+sentclient_SOURCES = sentclient.cc
+sentclient_LDFLAGS = -pthread
+
+TESTS = lbfgs_test optimize_test
+
+libtraining_utils_a_SOURCES = \
+  candidate_set.h \
+  entropy.h \
+  lbfgs.h \
+  online_optimizer.h \
+  optimize.h \
+  risk.h \
+  sentserver.h \
+  candidate_set.cc \
+  entropy.cc \
+  optimize.cc \
+  online_optimizer.cc \
+  risk.cc
+
+optimize_test_SOURCES = optimize_test.cc
+optimize_test_LDADD = libtraining_utils.a ../../utils/libutils.a
+
+grammar_convert_SOURCES = grammar_convert.cc
+grammar_convert_LDADD = ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a
+
+lbfgs_test_SOURCES = lbfgs_test.cc
+lbfgs_test_LDADD = ../../utils/libutils.a
+
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/decoder -I$(top_srcdir)/utils -I$(top_srcdir)/mteval -I$(top_srcdir)/klm
+
diff --git a/training/candidate_set.cc b/training/utils/candidate_set.cc
index 087efec3..087efec3 100644
--- a/training/candidate_set.cc
+++ b/training/utils/candidate_set.cc
diff --git a/training/candidate_set.h b/training/utils/candidate_set.h
index 9d326ed0..9d326ed0 100644
--- a/training/candidate_set.h
+++ b/training/utils/candidate_set.h
diff --git a/training/utils/decode-and-evaluate.pl b/training/utils/decode-and-evaluate.pl
new file mode 100755
index 00000000..1a332c08
--- /dev/null
+++ b/training/utils/decode-and-evaluate.pl
@@ -0,0 +1,246 @@
+#!/usr/bin/env perl
+use strict;
+my @ORIG_ARGV=@ARGV;
+use Cwd qw(getcwd);
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment"; }
+
+# Skip local config (used for distributing jobs) if we're running in local-only mode
+use LocalConfig;
+use Getopt::Long;
+use File::Basename qw(basename);
+my $QSUB_CMD = qsub_args(mert_memory());
+
+require "libcall.pl";
+
+# Default settings
+my $default_jobs = env_default_jobs();
+my $bin_dir = $SCRIPT_DIR;
+die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
+my $FAST_SCORE="$bin_dir/../../mteval/fast_score";
+die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
+my $parallelize = "$bin_dir/parallelize.pl";
+my $libcall = "$bin_dir/libcall.pl";
+my $sentserver = "$bin_dir/sentserver";
+my $sentclient = "$bin_dir/sentclient";
+my $LocalConfig = "$SCRIPT_DIR/../../environment/LocalConfig.pm";
+
+my $SCORER = $FAST_SCORE;
+my $cdec = "$bin_dir/../../decoder/cdec";
+die "Can't find decoder in $cdec" unless -x $cdec;
+die "Can't find $parallelize" unless -x $parallelize;
+die "Can't find $libcall" unless -e $libcall;
+my $decoder = $cdec;
+my $jobs = $default_jobs;   # number of decode nodes
+my $pmem = "9g";
+my $help = 0;
+my $config;
+my $test_set;
+my $weights;
+my $use_make = 1;
+my $useqsub;
+my $cpbin=1;
+# Process command-line options
+if (GetOptions(
+	"jobs=i" => \$jobs,
+	"help" => \$help,
+	"qsub" => \$useqsub,
+	"input=s" => \$test_set,
+        "config=s" => \$config,
+	"weights=s" => \$weights,
+) == 0 || @ARGV!=0 || $help) {
+	print_help();
+	exit;
+}
+
+if ($useqsub) {
+  $use_make = 0;
+  die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub();
+}
+
+my @missing_args = ();
+
+if (!defined $test_set) { push @missing_args, "--input"; }
+if (!defined $config) { push @missing_args, "--config"; }
+if (!defined $weights) { push @missing_args, "--weights"; }
+die "Please specify missing arguments: " . join (', ', @missing_args) . "\nUse --help for more information.\n" if (@missing_args);
+
+my @tf = localtime(time);
+my $tname = basename($test_set);
+$tname =~ s/\.(sgm|sgml|xml)$//i;
+my $dir = "eval.$tname." . sprintf('%d%02d%02d-%02d%02d%02d', 1900+$tf[5], $tf[4], $tf[3], $tf[2], $tf[1], $tf[0]);
+
+my $time = unchecked_output("date");
+
+check_call("mkdir -p $dir");
+
+split_devset($test_set, "$dir/test.input.raw", "$dir/test.refs");
+my $refs = "-r $dir/test.refs";
+my $newsrc = "$dir/test.input";
+enseg("$dir/test.input.raw", $newsrc);
+my $src_file = $newsrc;
+open F, "<$src_file" or die "Can't read $src_file: $!"; close F;
+
+my $test_trans="$dir/test.trans";
+my $logdir="$dir/logs";
+my $decoderLog="$logdir/decoder.sentserver.log";
+check_call("mkdir -p $logdir");
+
+#decode
+print STDERR "RUNNING DECODER AT ";
+print STDERR unchecked_output("date");
+my $decoder_cmd = "$decoder -c $config --weights $weights";
+my $pcmd;
+if ($use_make) {
+	$pcmd = "cat $src_file | $parallelize --workdir $dir --use-fork -p $pmem -e $logdir -j $jobs --";
+} else {
+	$pcmd = "cat $src_file | $parallelize --workdir $dir -p $pmem -e $logdir -j $jobs --";
+}
+my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $test_trans";
+check_bash_call($cmd);
+print STDERR "DECODER COMPLETED AT ";
+print STDERR unchecked_output("date");
+print STDERR "\nOUTPUT: $test_trans\n\n";
+my $bleu = check_output("cat $test_trans | $SCORER $refs -m ibm_bleu");
+chomp $bleu;
+print STDERR "BLEU: $bleu\n";
+my $ter = check_output("cat $test_trans | $SCORER $refs -m ter");
+chomp $ter;
+print STDERR " TER: $ter\n";
+open TR, ">$dir/test.scores" or die "Can't write $dir/test.scores: $!";
+print TR <<EOT;
+### SCORE REPORT #############################################################
+        OUTPUT=$test_trans
+  SCRIPT INPUT=$test_set
+ DECODER INPUT=$src_file
+    REFERENCES=$dir/test.refs
+------------------------------------------------------------------------------
+          BLEU=$bleu
+           TER=$ter
+##############################################################################
+EOT
+close TR;
+my $sr = unchecked_output("cat $dir/test.scores");
+print STDERR "\n\n$sr\n(A copy of this report can be found in $dir/test.scores)\n\n";
+exit 0;
+
+sub enseg {
+	my $src = shift;
+	my $newsrc = shift;
+	open(SRC, $src);
+	open(NEWSRC, ">$newsrc");
+	my $i=0;
+	while (my $line=<SRC>){
+		chomp $line;
+		if ($line =~ /^\s*<seg/i) {
+		    if($line =~ /id="[0-9]+"/) {
+			print NEWSRC "$line\n";
+		    } else {
+			die "When using segments with pre-generated <seg> tags, you must include a zero-based id attribute";
+		    }
+		} else {
+			print NEWSRC "<seg id=\"$i\">$line</seg>\n";
+		}
+		$i++;
+	}
+	close SRC;
+	close NEWSRC;
+}
+
+sub print_help {
+	my $executable = basename($0); chomp $executable;
+	print << "Help";
+
+Usage: $executable [options] <ini file>
+
+	$executable --config cdec.ini --weights weights.txt [--jobs N] [--qsub] <testset.in-ref>
+
+Options:
+
+	--help
+		Print this message and exit.
+
+	--config <file>
+		A path to the cdec.ini file.
+
+	--weights <file>
+		A file specifying feature weights.
+
+	--dir <dir>
+		Directory for intermediate and output files.
+
+Job control options:
+
+	--jobs <I>
+		Number of decoder processes to run in parallel. [default=$default_jobs]
+
+	--qsub
+		Use qsub to run jobs in parallel (qsub must be configured in
+		environment/LocalEnvironment.pm)
+
+	--pmem <N>
+		Amount of physical memory requested for parallel decoding jobs
+		(used with qsub requests only)
+
+Help
+}
+
+sub convert {
+  my ($str) = @_;
+  my @ps = split /;/, $str;
+  my %dict = ();
+  for my $p (@ps) {
+    my ($k, $v) = split /=/, $p;
+    $dict{$k} = $v;
+  }
+  return %dict;
+}
+
+
+
+sub cmdline {
+    return join ' ',($0,@ORIG_ARGV);
+}
+
+#buggy: last arg gets quoted sometimes?
+my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]};
+my $shell_escape_in_quote=qr{[\\"\$`!]};
+
+sub escape_shell {
+    my ($arg)=@_;
+    return undef unless defined $arg;
+    if ($arg =~ /$is_shell_special/) {
+        $arg =~ s/($shell_escape_in_quote)/\\$1/g;
+        return "\"$arg\"";
+    }
+    return $arg;
+}
+
+sub escaped_shell_args {
+    return map {local $_=$_;chomp;escape_shell($_)} @_;
+}
+
+sub escaped_shell_args_str {
+    return join ' ',&escaped_shell_args(@_);
+}
+
+sub escaped_cmdline {
+    return "$0 ".&escaped_shell_args_str(@ORIG_ARGV);
+}
+
+sub split_devset {
+  my ($infile, $outsrc, $outref) = @_;
+  open F, "<$infile" or die "Can't read $infile: $!";
+  open S, ">$outsrc" or die "Can't write $outsrc: $!";
+  open R, ">$outref" or die "Can't write $outref: $!";
+  while(<F>) {
+    chomp;
+    my ($src, @refs) = split /\s*\|\|\|\s*/;
+    die "Malformed devset line: $_\n" unless scalar @refs > 0;
+    print S "$src\n";
+    print R join(' ||| ', @refs) . "\n";
+  }
+  close R;
+  close S;
+  close F;
+}
+
diff --git a/training/entropy.cc b/training/utils/entropy.cc
index 4fdbe2be..4fdbe2be 100644
--- a/training/entropy.cc
+++ b/training/utils/entropy.cc
diff --git a/training/entropy.h b/training/utils/entropy.h
index 796589ca..796589ca 100644
--- a/training/entropy.h
+++ b/training/utils/entropy.h
diff --git a/training/grammar_convert.cc b/training/utils/grammar_convert.cc
index 607a7cb9..607a7cb9 100644
--- a/training/grammar_convert.cc
+++ b/training/utils/grammar_convert.cc
diff --git a/training/lbfgs.h b/training/utils/lbfgs.h
index e8baecab..e8baecab 100644
--- a/training/lbfgs.h
+++ b/training/utils/lbfgs.h
diff --git a/training/lbfgs_test.cc b/training/utils/lbfgs_test.cc
index 9678e788..9678e788 100644
--- a/training/lbfgs_test.cc
+++ b/training/utils/lbfgs_test.cc
diff --git a/training/utils/libcall.pl b/training/utils/libcall.pl
new file mode 100644
index 00000000..c7d0f128
--- /dev/null
+++ b/training/utils/libcall.pl
@@ -0,0 +1,71 @@
+use IPC::Open3;
+use Symbol qw(gensym);
+
+$DUMMY_STDERR = gensym();
+$DUMMY_STDIN = gensym();
+
+# Run the command and ignore failures
+sub unchecked_call {
+    system("@_")
+}
+
+# Run the command and return its output, if any ignoring failures
+sub unchecked_output {
+    return `@_`
+}
+
+# WARNING: Do not use this for commands that will return large amounts
+# of stdout or stderr -- they might block indefinitely
+sub check_output {
+    print STDERR "Executing and gathering output: @_\n";
+
+    my $pid = open3($DUMMY_STDIN, \*PH, $DUMMY_STDERR, @_);
+    my $proc_output = "";
+    while( <PH> ) {
+	$proc_output .= $_;
+    }
+    waitpid($pid, 0);
+    # TODO: Grab signal that the process died from
+    my $child_exit_status = $? >> 8;
+    if($child_exit_status == 0) {
+	return $proc_output;
+    } else {
+	print STDERR "ERROR: Execution of @_ failed.\n";
+	exit(1);
+    }
+}
+
+# Based on Moses' safesystem sub
+sub check_call {
+    print STDERR "Executing: @_\n";
+    system(@_);
+    my $exitcode = $? >> 8;
+    if($exitcode == 0) {
+	return 0;
+    } elsif ($? == -1) {
+	print STDERR "ERROR: Failed to execute: @_\n  $!\n";
+	exit(1);
+
+    } elsif ($? & 127) {
+      printf STDERR "ERROR: Execution of: @_\n  died with signal %d, %s coredump\n",
+      ($? & 127),  ($? & 128) ? 'with' : 'without';
+      exit(1);
+
+    } else {
+	print STDERR "Failed with exit code: $exitcode\n" if $exitcode;
+	exit($exitcode);
+    }
+}
+
+sub check_bash_call {
+    my @args = ( "bash", "-auxeo", "pipefail", "-c", "@_");
+    check_call(@args);
+}
+
+sub check_bash_output {
+    my @args = ( "bash", "-auxeo", "pipefail", "-c", "@_");
+    return check_output(@args);
+}
+
+# perl module weirdness...
+return 1;
diff --git a/training/online_optimizer.cc b/training/utils/online_optimizer.cc
index 3ed95452..3ed95452 100644
--- a/training/online_optimizer.cc
+++ b/training/utils/online_optimizer.cc
diff --git a/training/online_optimizer.h b/training/utils/online_optimizer.h
index 28d89344..28d89344 100644
--- a/training/online_optimizer.h
+++ b/training/utils/online_optimizer.h
diff --git a/training/optimize.cc b/training/utils/optimize.cc
index 41ac90d8..41ac90d8 100644
--- a/training/optimize.cc
+++ b/training/utils/optimize.cc
diff --git a/training/optimize.h b/training/utils/optimize.h
index 07943b44..07943b44 100644
--- a/training/optimize.h
+++ b/training/utils/optimize.h
diff --git a/training/optimize_test.cc b/training/utils/optimize_test.cc
index bff2ca03..bff2ca03 100644
--- a/training/optimize_test.cc
+++ b/training/utils/optimize_test.cc
diff --git a/training/utils/parallelize.pl b/training/utils/parallelize.pl
new file mode 100755
index 00000000..4197e0e5
--- /dev/null
+++ b/training/utils/parallelize.pl
@@ -0,0 +1,423 @@
+#!/usr/bin/env perl
+
+# Author: Adam Lopez
+#
+# This script takes a command that processes input
+# from stdin one-line-at-time, and parallelizes it
+# on the cluster using David Chiang's sentserver/
+# sentclient architecture.
+#
+# Prerequisites: the command *must* read each line
+# without waiting for subsequent lines of input
+# (for instance, a command which must read all lines
+# of input before processing will not work) and
+# return it to the output *without* buffering
+# multiple lines.
+
+#TODO: if -j 1, run immediately, not via sentserver?  possible differences in environment might make debugging harder
+
+#ANNOYANCE: if input is shorter than -j n lines, or at the very last few lines, repeatedly sleeps.  time cut down to 15s from 60s
+
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment"; }
+use LocalConfig;
+
+use Cwd qw/ abs_path cwd getcwd /; 
+use File::Temp qw/ tempfile /;
+use Getopt::Long;
+use IPC::Open2;
+use strict;
+use POSIX ":sys_wait_h";
+
+use File::Basename;
+my $myDir = dirname(__FILE__);
+print STDERR __FILE__." -> $myDir\n";
+push(@INC, $myDir);
+require "libcall.pl";
+
+my $tailn=5; # +0 = concatenate all the client logs.  5 = last 5 lines
+my $recycle_clients;    # spawn new clients when previous ones terminate
+my $stay_alive;      # dont let server die when having zero clients
+my $joblist = "";
+my $errordir="";
+my $multiline;
+my $workdir = '.';
+my $numnodes = 8;
+my $user = $ENV{"USER"};
+my $pmem = "9g";
+my $basep=50300;
+my $randp=300;
+my $tryp=50;
+my $no_which;
+my $no_cd;
+
+my $DEBUG=$ENV{DEBUG};
+print STDERR "DEBUG=$DEBUG output enabled.\n" if $DEBUG;
+my $verbose = 1;
+sub verbose {
+    if ($verbose) {
+        print STDERR @_,"\n";
+    }
+}
+sub debug {
+    if ($DEBUG) {
+        my ($package, $filename, $line) = caller;
+        print STDERR "DEBUG: $filename($line): ",join(' ',@_),"\n";
+    }
+}
+my $is_shell_special=qr.[ \t\n\\><|&;"'`~*?{}$!()].;
+my $shell_escape_in_quote=qr.[\\"\$`!].;
+sub escape_shell {
+    my ($arg)=@_;
+    return undef unless defined $arg;
+    return '""' unless $arg;
+    if ($arg =~ /$is_shell_special/) {
+        $arg =~ s/($shell_escape_in_quote)/\\$1/g;
+        return "\"$arg\"";
+    }
+    return $arg;
+}
+sub preview_files {
+    my ($l,$skipempty,$footer,$n)=@_;
+    $n=$tailn unless defined $n;
+    my @f=grep { ! ($skipempty && -z $_) } @$l;
+    my $fn=join(' ',map {escape_shell($_)} @f);
+    my $cmd="tail -n $n $fn";
+    unchecked_output("$cmd").($footer?"\nNONEMPTY FILES:\n$fn\n":"");
+}
+sub prefix_dirname($) {
+    #like `dirname but if ends in / then return the whole thing
+    local ($_)=@_;
+    if (/\/$/) {
+        $_;
+    } else {
+        s#/[^/]$##;
+        $_ ? $_ : '';
+    }
+}
+sub ensure_final_slash($) {
+    local ($_)=@_;
+    m#/$# ? $_ : ($_."/");
+}
+sub extend_path($$;$$) {
+    my ($base,$ext,$mkdir,$baseisdir)=@_;
+    if (-d $base) {
+        $base.="/";
+    } else {
+        my $dir;
+        if ($baseisdir) {
+            $dir=$base;
+            $base.='/' unless $base =~ /\/$/;
+        } else {
+            $dir=prefix_dirname($base);
+        }
+        my @cmd=("/bin/mkdir","-p",$dir);
+        check_call(@cmd) if $mkdir;
+    }
+    return $base.$ext;
+}
+
+my $abscwd=abs_path(&getcwd);
+sub print_help;
+
+my $use_fork;
+my @pids;
+
+# Process command-line options
+unless (GetOptions(
+      "stay-alive" => \$stay_alive,
+      "recycle-clients" => \$recycle_clients,
+      "error-dir=s" => \$errordir,
+      "multi-line" => \$multiline,
+      "workdir=s" => \$workdir,
+      "use-fork" => \$use_fork,
+      "verbose" => \$verbose,
+      "jobs=i" => \$numnodes,
+      "pmem=s" => \$pmem,
+        "baseport=i" => \$basep,
+#       "iport=i" => \$randp, #for short name -i
+        "no-which!" => \$no_which,
+            "no-cd!" => \$no_cd,
+            "tailn=s" => \$tailn,
+) && scalar @ARGV){
+  print_help();
+    die "bad options.";
+}
+
+my $cmd = "";
+my $prog=shift;
+if ($no_which) {
+    $cmd=$prog;
+} else {
+    $cmd=check_output("which $prog");
+    chomp $cmd;
+    die "$prog not found - $cmd" unless $cmd;
+}
+#$cmd=abs_path($cmd);
+for my $arg (@ARGV) {
+    $cmd .= " ".escape_shell($arg);
+}
+die "Please specify a command to parallelize\n" if $cmd eq '';
+
+my $cdcmd=$no_cd ? '' : ("cd ".escape_shell($abscwd)."\n");
+
+my $executable = $cmd;
+$executable =~ s/^\s*(\S+)($|\s.*)/$1/;
+$executable=check_output("basename $executable");
+chomp $executable;
+
+
+print STDERR "Parallelizing ($numnodes ways): $cmd\n\n";
+
+# create -e dir and save .sh
+use File::Temp qw/tempdir/;
+unless ($errordir) {
+    $errordir=tempdir("$executable.XXXXXX",CLEANUP=>1);
+}
+if ($errordir) {
+    my $scriptfile=extend_path("$errordir/","$executable.sh",1,1);
+    -d $errordir || die "should have created -e dir $errordir";
+    open SF,">",$scriptfile || die;
+    print SF "$cdcmd$cmd\n";
+    close SF;
+    chmod 0755,$scriptfile;
+    $errordir=abs_path($errordir);
+    &verbose("-e dir: $errordir");
+}
+
+# set cleanup handler
+my @cleanup_cmds;
+sub cleanup;
+sub cleanup_and_die;
+$SIG{INT} = "cleanup_and_die";
+$SIG{TERM} = "cleanup_and_die";
+$SIG{HUP} = "cleanup_and_die";
+
+# other subs:
+sub numof_live_jobs;
+sub launch_job_on_node;
+
+
+# vars
+my $mydir = check_output("dirname $0"); chomp $mydir;
+my $sentserver = "$mydir/sentserver";
+my $sentclient = "$mydir/sentclient";
+my $host = check_output("hostname");
+chomp $host;
+
+
+# find open port
+srand;
+my $port = 50300+int(rand($randp));
+my $endp=$port+$tryp;
+sub listening_port_lines {
+    my $quiet=$verbose?'':'2>/dev/null';
+    return unchecked_output("netstat -a -n $quiet | grep LISTENING | grep -i tcp");
+}
+my $netstat=&listening_port_lines;
+
+if ($verbose){ print STDERR "Testing port $port...";}
+
+while ($netstat=~/$port/ || &listening_port_lines=~/$port/){
+  if ($verbose){ print STDERR "port is busy\n";}
+  $port++;
+  if ($port > $endp){
+    die "Unable to find open port\n";
+  }
+  if ($verbose){ print STDERR "Testing port $port... "; }
+}
+if ($verbose){
+  print STDERR "port $port is available\n";
+}
+
+my $key = int(rand()*1000000);
+
+my $multiflag = "";
+if ($multiline){ $multiflag = "-m"; print STDERR "expecting multiline output.\n"; }
+my $stay_alive_flag = "";
+if ($stay_alive){ $stay_alive_flag = "--stay-alive"; print STDERR "staying alive while no clients are connected.\n"; }
+
+my $node_count = 0;
+my $script = "";
+# fork == one thread runs the sentserver, while the
+# other spawns the sentclient commands.
+my $pid = fork;
+if ($pid == 0) { # child
+  sleep 8; # give other thread time to start sentserver
+  $script = "$cdcmd$sentclient $host:$port:$key $cmd";
+
+  if ($verbose){
+    print STDERR "Client script:\n====\n";
+    print STDERR $script;
+    print STDERR "====\n";
+  }
+  for (my $jobn=0; $jobn<$numnodes; $jobn++){
+    launch_job();
+  }
+  if ($recycle_clients) {
+    my $ret;
+    my $livejobs;
+    while (1) {
+      $ret = waitpid($pid, WNOHANG);
+      #print STDERR "waitpid $pid ret = $ret \n";
+      last if ($ret != 0);
+      $livejobs = numof_live_jobs();
+      if ($numnodes >= $livejobs ) {  # a client terminated, OR # lines of input was less than -j
+        print STDERR "num of requested nodes = $numnodes; num of currently live jobs = $livejobs; Client terminated - launching another.\n";
+        launch_job();
+      } else {
+        sleep 15;
+      }
+    }
+  }
+  print STDERR "CHILD PROCESSES SPAWNED ... WAITING\n";
+  for my $p (@pids) {
+    waitpid($p, 0);
+  }
+} else {
+#  my $todo = "$sentserver -k $key $multiflag $port ";
+  my $todo = "$sentserver -k $key $multiflag $port $stay_alive_flag ";
+  if ($verbose){ print STDERR "Running: $todo\n"; }
+  check_call($todo);
+  print STDERR "Call to $sentserver returned.\n";
+  cleanup();
+  exit(0);
+}
+
+sub numof_live_jobs {
+  if ($use_fork) {
+    die "not implemented";
+  } else {
+    # We can probably continue decoding if the qstat error is only temporary
+    my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat")));
+    return ($#livejobs + 1);
+  }
+}
+my (@errors,@outs,@cmds);
+
+sub launch_job {
+    if ($use_fork) { return launch_job_fork(); }
+    my $errorfile = "/dev/null";
+    my $outfile = "/dev/null";
+    $node_count++;
+    my $clientname = $executable;
+    $clientname =~ s/^(.{4}).*$/$1/;
+    $clientname = "$clientname.$node_count";
+    if ($errordir){
+      $errorfile = "$errordir/$clientname.ER";
+      $outfile = "$errordir/$clientname.OU";
+      push @errors,$errorfile;
+      push @outs,$outfile;
+    }
+    my $todo = qsub_args($pmem) . " -N $clientname -o $outfile -e $errorfile";
+    push @cmds,$todo;
+
+    print STDERR "Running: $todo\n";
+    local(*QOUT, *QIN);
+    open2(\*QOUT, \*QIN, $todo) or die "Failed to open2: $!";
+    print QIN $script;
+    close QIN;
+    while (my $jobid=<QOUT>){
+      chomp $jobid;
+      if ($verbose){ print STDERR "Launched client job: $jobid"; }
+      $jobid =~ s/^(\d+)(.*?)$/\1/g;
+            $jobid =~ s/^Your job (\d+) .*$/\1/;
+      print STDERR " short job id $jobid\n";
+            if ($verbose){
+                print STDERR "cd: $abscwd\n";
+                print STDERR "cmd: $cmd\n";
+            }
+      if ($joblist == "") { $joblist = $jobid; }
+      else {$joblist = $joblist . "\|" . $jobid; }
+      my $cleanfn="qdel $jobid 2> /dev/null";
+      push(@cleanup_cmds, $cleanfn);
+    }
+    close QOUT;
+}
+
+sub launch_job_fork {
+  my $errorfile = "/dev/null";
+  my $outfile = "/dev/null";
+  $node_count++;
+  my $clientname = $executable;
+  $clientname =~ s/^(.{4}).*$/$1/;
+  $clientname = "$clientname.$node_count";
+  if ($errordir){
+    $errorfile = "$errordir/$clientname.ER";
+    $outfile = "$errordir/$clientname.OU";
+    push @errors,$errorfile;
+    push @outs,$outfile;
+  }
+  my $pid = fork;
+  if ($pid == 0) {
+    my ($fh, $scr_name) = get_temp_script();
+    print $fh $script;
+    close $fh;
+    my $todo = "/bin/bash -xeo pipefail $scr_name 1> $outfile 2> $errorfile";
+    print STDERR "EXEC: $todo\n";
+    my $out = check_output("$todo");
+    unlink $scr_name or warn "Failed to remove $scr_name";
+    exit 0;
+  } else {
+    push @pids, $pid;
+  }
+}
+
+sub get_temp_script {
+  my ($fh, $filename) = tempfile( "$workdir/workXXXX", SUFFIX => '.sh');
+  return ($fh, $filename);
+}
+
+sub cleanup_and_die {
+  cleanup();
+  die "\n";
+}
+
+sub cleanup {
+  print STDERR "Cleaning up...\n";
+  for $cmd (@cleanup_cmds){
+    print STDERR "  Cleanup command: $cmd\n";
+    eval $cmd;
+  }
+  print STDERR "outputs:\n",preview_files(\@outs,1),"\n";
+  print STDERR "errors:\n",preview_files(\@errors,1),"\n";
+  print STDERR "cmd:\n",$cmd,"\n";
+  print STDERR " cat $errordir/*.ER\nfor logs.\n";
+  print STDERR "Cleanup finished.\n";
+}
+
+sub print_help
+{
+  my $name = check_output("basename $0"); chomp $name;
+  print << "Help";
+
+usage: $name [options]
+
+  Automatic black-box parallelization of commands.
+
+options:
+
+  --use-fork
+    Instead of using qsub, use fork.
+
+  -e, --error-dir <dir>
+    Retain output files from jobs in <dir>, rather
+    than silently deleting them.
+
+  -m, --multi-line
+    Expect that command may produce multiple output
+    lines for a single input line.  $name makes a
+    reasonable attempt to obtain all output before
+    processing additional inputs.  However, use of this
+    option is inherently unsafe.
+
+  -v, --verbose
+    Print diagnostic informatoin on stderr.
+
+  -j, --jobs
+    Number of jobs to use.
+
+  -p, --pmem
+    pmem setting for each job.
+
+Help
+}
diff --git a/training/risk.cc b/training/utils/risk.cc
index d5a12cfd..d5a12cfd 100644
--- a/training/risk.cc
+++ b/training/utils/risk.cc
diff --git a/training/risk.h b/training/utils/risk.h
index 2e8db0fb..2e8db0fb 100644
--- a/training/risk.h
+++ b/training/utils/risk.h
diff --git a/training/utils/sentclient.cc b/training/utils/sentclient.cc
new file mode 100644
index 00000000..91d994ab
--- /dev/null
+++ b/training/utils/sentclient.cc
@@ -0,0 +1,76 @@
+/* Copyright (c) 2001 by David Chiang. All rights reserved.*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <string.h>
+
+#include "sentserver.h"
+
+int main (int argc, char *argv[]) {
+  int sock, port;
+  char *s, *key;
+  struct hostent *hp;
+  struct sockaddr_in server;
+  int errors = 0;
+
+  if (argc < 3) {
+    fprintf(stderr, "Usage: sentclient host[:port[:key]] command [args ...]\n");
+    exit(1);
+  }
+
+  s = strchr(argv[1], ':');
+  key = NULL;
+
+  if (s == NULL) {
+    port = DEFAULT_PORT;
+  } else {
+    *s = '\0';
+    s+=1;
+	/* dumb hack */
+	key = strchr(s, ':');
+	if (key != NULL){
+		*key = '\0';
+		key += 1;
+	}
+    port = atoi(s);
+  }
+
+  sock = socket(AF_INET, SOCK_STREAM, 0);
+
+  hp = gethostbyname(argv[1]);
+  if (hp == NULL) {
+    fprintf(stderr, "unknown host %s\n", argv[1]);
+    exit(1);
+  }
+
+  bzero((char *)&server, sizeof(server));
+  bcopy(hp->h_addr, (char *)&server.sin_addr, hp->h_length);
+  server.sin_family = hp->h_addrtype;
+  server.sin_port = htons(port);
+
+  while (connect(sock, (struct sockaddr *)&server, sizeof(server)) < 0) {
+    perror("connect()");
+    sleep(1);
+    errors++;
+    if (errors > 5)
+      exit(1);
+  }
+
+  close(0);
+  close(1);
+  dup2(sock, 0);
+  dup2(sock, 1);
+
+  if (key != NULL){
+	write(1, key, strlen(key));
+	write(1, "\n", 1);
+  }
+
+  execvp(argv[2], argv+2);
+  return 0;
+}
diff --git a/training/utils/sentserver.cc b/training/utils/sentserver.cc
new file mode 100644
index 00000000..b425955f
--- /dev/null
+++ b/training/utils/sentserver.cc
@@ -0,0 +1,515 @@
+/* Copyright (c) 2001 by David Chiang. All rights reserved.*/
+
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <sched.h>
+#include <pthread.h>
+#include <errno.h>
+
+#include "sentserver.h"
+
+#define MAX_CLIENTS 64
+
+struct clientinfo {
+  int s;
+  struct sockaddr_in sin;
+};
+
+struct line {
+  int id;
+  char *s;
+  int status;
+  struct line *next;
+} *head, **ptail;
+
+int n_sent = 0, n_received=0, n_flushed=0;
+
+#define STATUS_RUNNING 0
+#define STATUS_ABORTED 1
+#define STATUS_FINISHED 2
+
+pthread_mutex_t queue_mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_mutex_t clients_mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_mutex_t input_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+int n_clients = 0;
+int s;
+int expect_multiline_output = 0;
+int log_mutex = 0;
+int stay_alive = 0;		/* dont panic and die with zero clients */
+
+void queue_finish(struct line *node, char *s, int fid);
+char * read_line(int fd, int multiline);
+void done (int code);
+
+struct line * queue_get(int fid) {
+	struct line *cur;
+	char *s, *synch;
+
+	if (log_mutex) fprintf(stderr, "Getting for data for fid %d\n", fid);
+	if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid);
+	pthread_mutex_lock(&queue_mutex);
+
+	/* First, check for aborted sentences. */
+
+	if (log_mutex) fprintf(stderr, "  Checking queue for aborted jobs (fid %d)\n", fid);
+	for (cur = head; cur != NULL; cur = cur->next) {
+		if (cur->status == STATUS_ABORTED) {
+			cur->status = STATUS_RUNNING;
+
+			if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
+			pthread_mutex_unlock(&queue_mutex);
+
+			return cur;
+		}
+	}
+	if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
+	pthread_mutex_unlock(&queue_mutex);
+
+	/* Otherwise, read a new one. */
+	if (log_mutex) fprintf(stderr, "Locking input mutex (%d)\n", fid);
+	if (log_mutex) fprintf(stderr, "  Reading input for new data (fid %d)\n", fid);
+	pthread_mutex_lock(&input_mutex);
+	s = read_line(0,0);
+
+	while (s) {
+		if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid);
+		pthread_mutex_lock(&queue_mutex);
+		if (log_mutex) fprintf(stderr, "Unlocking input mutex (%d)\n", fid);
+		pthread_mutex_unlock(&input_mutex);
+
+		cur = (line*)malloc(sizeof (struct line));
+		cur->id = n_sent;
+		cur->s = s;
+		cur->next = NULL;
+
+		*ptail = cur;
+		ptail = &cur->next;
+
+		n_sent++;
+
+		if (strcmp(s,"===SYNCH===\n")==0){
+			fprintf(stderr, "Received ===SYNCH=== signal (fid %d)\n", fid);
+			// Note: queue_finish calls free(cur->s).
+			// Therefore we need to create a new string here.
+			synch = (char*)malloc((strlen("===SYNCH===\n")+2) * sizeof (char));
+			synch = strcpy(synch, s);
+
+			if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
+			pthread_mutex_unlock(&queue_mutex);
+			queue_finish(cur, synch, fid); /* handles its own lock */
+
+			if (log_mutex) fprintf(stderr, "Locking input mutex (%d)\n", fid);
+			if (log_mutex) fprintf(stderr, "  Reading input for new data (fid %d)\n", fid);
+			pthread_mutex_lock(&input_mutex);
+
+			s = read_line(0,0);
+		} else {
+			if (log_mutex) fprintf(stderr, "  Received new data %d (fid %d)\n", cur->id, fid);
+			cur->status = STATUS_RUNNING;
+			if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
+			pthread_mutex_unlock(&queue_mutex);
+			return cur;
+		}
+	}
+
+	if (log_mutex) fprintf(stderr, "Unlocking input mutex (%d)\n", fid);
+	pthread_mutex_unlock(&input_mutex);
+	/* Only way to reach this point: no more output */
+
+	if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid);
+	pthread_mutex_lock(&queue_mutex);
+	if (head == NULL) {
+		fprintf(stderr, "Reached end of file. Exiting.\n");
+		done(0);
+	} else
+		ptail = NULL; /* This serves as a signal that there is no more input */
+	if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
+	pthread_mutex_unlock(&queue_mutex);
+
+	return NULL;
+}
+
+void queue_panic() {
+	struct line *next;
+	while (head && head->status == STATUS_FINISHED) {
+		/* Write out finished sentences */
+		if (head->status == STATUS_FINISHED) {
+			fputs(head->s, stdout);
+			fflush(stdout);
+		}
+		/* Write out blank line for unfinished sentences */
+		if (head->status == STATUS_ABORTED) {
+			fputs("\n", stdout);
+			fflush(stdout);
+		}
+		/* By defition, there cannot be any RUNNING sentences, since
+		function is only called when n_clients == 0 */
+		free(head->s);
+		next = head->next;
+		free(head);
+		head = next;
+		n_flushed++;
+	}
+	fclose(stdout);
+	fprintf(stderr, "All clients died. Panicking, flushing completed sentences and exiting.\n");
+	done(1);
+}
+
+void queue_abort(struct line *node, int fid) {
+	if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid);
+	pthread_mutex_lock(&queue_mutex);
+	node->status = STATUS_ABORTED;
+	if (n_clients == 0) {
+		if (stay_alive) {
+			fprintf(stderr, "Warning! No live clients detected! Staying alive, will retry soon.\n");
+		} else {
+			queue_panic();
+		}
+	}
+	if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
+	pthread_mutex_unlock(&queue_mutex);
+}
+
+
+void queue_print() {
+  struct line *cur;
+
+  fprintf(stderr, "  Queue\n");
+
+  for (cur = head; cur != NULL; cur = cur->next) {
+    switch(cur->status) {
+    case STATUS_RUNNING:
+      fprintf(stderr, "    %d running  ", cur->id); break;
+    case STATUS_ABORTED:
+      fprintf(stderr, "    %d aborted  ", cur->id); break;
+    case STATUS_FINISHED:
+      fprintf(stderr, "    %d finished ", cur->id); break;
+
+    }
+	fprintf(stderr, "\n");
+    //fprintf(stderr, cur->s);
+  }
+}
+
+void queue_finish(struct line *node, char *s, int fid) {
+  struct line *next;
+  if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid);
+  pthread_mutex_lock(&queue_mutex);
+
+  free(node->s);
+  node->s = s;
+  node->status = STATUS_FINISHED;
+  n_received++;
+
+  /* Flush out finished nodes */
+  while (head && head->status == STATUS_FINISHED) {
+
+    if (log_mutex) fprintf(stderr, "  Flushing finished node %d\n", head->id);
+
+    fputs(head->s, stdout);
+    fflush(stdout);
+    if (log_mutex) fprintf(stderr, "  Flushed node %d\n", head->id);
+    free(head->s);
+
+    next = head->next;
+    free(head);
+
+    head = next;
+
+    n_flushed++;
+
+    if (head == NULL) { /* empty queue */
+      if (ptail == NULL) { /* This can only happen if set in queue_get as signal that there is no more input. */
+        fprintf(stderr, "All sentences finished. Exiting.\n");
+        done(0);
+      } else /* ptail pointed at something which was just popped off the stack -- reset to head*/
+        ptail = &head;
+    }
+  }
+
+  if (log_mutex) fprintf(stderr, "  Flushing output %d\n", head->id);
+  fflush(stdout);
+  fprintf(stderr, "%d sentences sent, %d sentences finished, %d sentences flushed\n", n_sent, n_received, n_flushed);
+
+  if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid);
+  pthread_mutex_unlock(&queue_mutex);
+
+}
+
+char * read_line(int fd, int multiline) {
+  int size = 80;
+  char errorbuf[100];
+  char *s = (char*)malloc(size+2);
+  int result, errors=0;
+  int i = 0;
+
+  result = read(fd, s+i, 1);
+
+  while (1) {
+    if (result < 0) {
+      perror("read()");
+      sprintf(errorbuf, "Error code: %d\n", errno);
+      fputs(errorbuf, stderr);
+      errors++;
+      if (errors > 5) {
+	free(s);
+	return NULL;
+      } else {
+	sleep(1); /* retry after delay */
+      }
+    } else if (result == 0) {
+      break;
+    } else if (multiline==0 && s[i] == '\n') {
+      break;
+    } else {
+      if (s[i] == '\n'){
+	/* if we've reached this point,
+	   then multiline must be 1, and we're
+	   going to poll the fd for an additional
+	   line of data.  The basic design is to
+	   run a select on the filedescriptor fd.
+	   Select will return under two conditions:
+	   if there is data on the fd, or if a
+	   timeout is reached.  We'll select on this
+	   fd.  If select returns because there's data
+	   ready, keep going; else assume there's no
+	   more and return the data we already have.
+	*/
+
+	fd_set set;
+	FD_ZERO(&set);
+	FD_SET(fd, &set);
+
+	struct timeval timeout;
+	timeout.tv_sec = 3; // number of seconds for timeout
+	timeout.tv_usec = 0;
+
+	int ready = select(FD_SETSIZE, &set, NULL, NULL, &timeout);
+	if (ready<1){
+	  break; // no more data, stop looping
+	}
+      }
+      i++;
+
+      if (i == size) {
+	size = size*2;
+	s = (char*)realloc(s, size+2);
+      }
+    }
+
+    result = read(fd, s+i, 1);
+  }
+
+  if (result == 0 && i == 0) { /* end of file */
+    free(s);
+    return NULL;
+  }
+
+  s[i] = '\n';
+  s[i+1] = '\0';
+
+  return s;
+}
+
+void * new_client(void *arg) {
+  struct clientinfo *client = (struct clientinfo *)arg;
+  struct line *cur;
+  int result;
+  char *s;
+  char errorbuf[100];
+
+  pthread_mutex_lock(&clients_mutex);
+  n_clients++;
+  pthread_mutex_unlock(&clients_mutex);
+
+  fprintf(stderr, "Client connected (%d connected)\n", n_clients);
+
+  for (;;) {
+
+    cur = queue_get(client->s);
+
+    if (cur) {
+      /* fprintf(stderr, "Sending to client: %s", cur->s); */
+      fprintf(stderr, "Sending data %d to client (fid %d)\n", cur->id, client->s);
+      result = write(client->s, cur->s, strlen(cur->s));
+      if (result < strlen(cur->s)){
+        perror("write()");
+        sprintf(errorbuf, "Error code: %d\n", errno);
+        fputs(errorbuf, stderr);
+
+        pthread_mutex_lock(&clients_mutex);
+        n_clients--;
+        pthread_mutex_unlock(&clients_mutex);
+
+        fprintf(stderr, "Client died (%d connected)\n", n_clients);
+        queue_abort(cur, client->s);
+
+        close(client->s);
+        free(client);
+
+        pthread_exit(NULL);
+      }
+    } else {
+      close(client->s);
+      pthread_mutex_lock(&clients_mutex);
+      n_clients--;
+      pthread_mutex_unlock(&clients_mutex);
+      fprintf(stderr, "Client dismissed (%d connected)\n", n_clients);
+      pthread_exit(NULL);
+    }
+
+    s = read_line(client->s,expect_multiline_output);
+    if (s) {
+      /* fprintf(stderr, "Client (fid %d) returned: %s", client->s, s); */
+      fprintf(stderr, "Client (fid %d) returned data %d\n", client->s, cur->id);
+//      queue_print();
+      queue_finish(cur, s, client->s);
+    } else {
+      pthread_mutex_lock(&clients_mutex);
+      n_clients--;
+      pthread_mutex_unlock(&clients_mutex);
+
+      fprintf(stderr, "Client died (%d connected)\n", n_clients);
+      queue_abort(cur, client->s);
+
+      close(client->s);
+      free(client);
+
+      pthread_exit(NULL);
+    }
+
+  }
+  return 0;
+}
+
+void done (int code) {
+  close(s);
+  exit(code);
+}
+
+
+
+int main (int argc, char *argv[]) {
+  struct sockaddr_in sin, from;
+  int g;
+  socklen_t len;
+  struct clientinfo *client;
+  int port;
+  int opt;
+  int errors = 0;
+  int argi;
+  char *key = NULL, *client_key;
+  int use_key = 0;
+  /* the key stuff here doesn't provide any
+  real measure of security, it's mainly to keep
+  jobs from bumping into each other.  */
+
+  pthread_t tid;
+  port = DEFAULT_PORT;
+
+  for (argi=1; argi < argc; argi++){
+    if (strcmp(argv[argi], "-m")==0){
+      expect_multiline_output = 1;
+    } else if (strcmp(argv[argi], "-k")==0){
+      argi++;
+      if (argi == argc){
+      	fprintf(stderr, "Key must be specified after -k\n");
+      	exit(1);
+      }
+      key = argv[argi];
+      use_key = 1;
+    } else if (strcmp(argv[argi], "--stay-alive")==0){
+      stay_alive = 1;    /* dont panic and die with zero clients */
+    } else {
+      port = atoi(argv[argi]);
+    }
+  }
+
+  /* Initialize data structures */
+  head = NULL;
+  ptail = &head;
+
+  /* Set up listener */
+  s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+  opt = 1;
+  setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+
+  sin.sin_family = AF_INET;
+  sin.sin_addr.s_addr = htonl(INADDR_ANY);
+  sin.sin_port = htons(port);
+  while (bind(s, (struct sockaddr *) &sin, sizeof(sin)) < 0) {
+	perror("bind()");
+	sleep(1);
+	errors++;
+	if (errors > 100)
+	  exit(1);
+  }
+
+  len = sizeof(sin);
+  getsockname(s, (struct sockaddr *) &sin, &len);
+
+  fprintf(stderr, "Listening on port %hu\n", ntohs(sin.sin_port));
+
+  while (listen(s, MAX_CLIENTS) < 0) {
+	perror("listen()");
+	sleep(1);
+	errors++;
+	if (errors > 100)
+	  exit(1);
+  }
+
+  for (;;) {
+    len = sizeof(from);
+    g = accept(s, (struct sockaddr *)&from, &len);
+    if (g < 0) {
+      perror("accept()");
+      sleep(1);
+      continue;
+    }
+    client = (clientinfo*)malloc(sizeof(struct clientinfo));
+    client->s = g;
+    bcopy(&from, &client->sin, len);
+
+	if (use_key){
+		fd_set set;
+		FD_ZERO(&set);
+		FD_SET(client->s, &set);
+
+		struct timeval timeout;
+		timeout.tv_sec = 3; // number of seconds for timeout
+		timeout.tv_usec = 0;
+
+		int ready = select(FD_SETSIZE, &set, NULL, NULL, &timeout);
+		if (ready<1){
+			fprintf(stderr, "Prospective client failed to respond with correct key.\n");
+			close(client->s);
+			free(client);
+		} else {
+			client_key = read_line(client->s,0);
+			client_key[strlen(client_key)-1]='\0'; /* chop trailing newline */
+			if (strcmp(key, client_key)==0){
+				pthread_create(&tid, NULL, new_client, client);
+			} else {
+				fprintf(stderr, "Prospective client failed to respond with correct key.\n");
+				close(client->s);
+				free(client);
+			}
+			free(client_key);
+		}
+	} else {
+		pthread_create(&tid, NULL, new_client, client);
+	}
+  }
+
+}
+
+
+
diff --git a/training/utils/sentserver.h b/training/utils/sentserver.h
new file mode 100644
index 00000000..cd17a546
--- /dev/null
+++ b/training/utils/sentserver.h
@@ -0,0 +1,6 @@
+#ifndef SENTSERVER_H
+#define SENTSERVER_H
+
+#define DEFAULT_PORT 50000
+
+#endif