From a95dc528678ef59d8c970d6eb16a581ec58f2539 Mon Sep 17 00:00:00 2001
From: Avneesh <avneesh@cmu.edu>
Date: Fri, 28 Sep 2012 11:09:33 -0700
Subject: adding latent SSVM code, modified Makefile.am and configure.ac files

---
 latent_svm/Makefile.am   |   6 +
 latent_svm/latent_svm.cc | 412 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 418 insertions(+)
 create mode 100644 latent_svm/Makefile.am
 create mode 100644 latent_svm/latent_svm.cc
diff --git a/latent_svm/Makefile.am b/latent_svm/Makefile.am
new file mode 100644
index 00000000..673b9159
--- /dev/null
+++ b/latent_svm/Makefile.am
@@ -0,0 +1,6 @@
+bin_PROGRAMS = latent_svm
+
+latent_svm_SOURCES = latent_svm.cc
+latent_svm_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
+
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/latent_svm/latent_svm.cc b/latent_svm/latent_svm.cc
new file mode 100644
index 00000000..ab9c1d5d
--- /dev/null
+++ b/latent_svm/latent_svm.cc
@@ -0,0 +1,412 @@
+/*
+Points to note regarding variable names:
+total_loss and prev_loss actually refer not to loss, but the metric (usually BLEU)
+*/
+#include <sstream>
+#include <iostream>
+#include <vector>
+#include <cassert>
+#include <cmath>
+
+//boost libraries
+#include <boost/shared_ptr.hpp>
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+//cdec libraries
+#include "config.h"
+#include "hg_sampler.h"
+#include "sentence_metadata.h"
+#include "scorer.h"
+#include "verbose.h"
+#include "viterbi.h"
+#include "hg.h"
+#include "prob.h"
+#include "kbest.h"
+#include "ff_register.h"
+#include "decoder.h"
+#include "filelib.h"
+#include "fdict.h"
+#include "weights.h"
+#include "sparse_vector.h"
+#include "sampler.h"
+
+using namespace std;
+using boost::shared_ptr;
+namespace po = boost::program_options;
+
+bool invert_score; 
+boost::shared_ptr<MT19937> rng; //random seed ptr
+
+void RandomPermutation(int len, vector<int>* p_ids) {
+  vector<int>& ids = *p_ids;
+  ids.resize(len);
+  for (int i = 0; i < len; ++i) ids[i] = i;
+  for (int i = len; i > 0; --i) {
+    int j = rng->next() * i;
+    if (j == i) i--;
+    swap(ids[i-1], ids[j]);
+  }  
+}
+
+bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("weights,w",po::value<string>(),"[REQD] Input feature weights file")
+        ("input,i",po::value<string>(),"[REQD] Input source file for development set")
+        ("passes,p", po::value<int>()->default_value(15), "Number of passes through the training data")
+        ("weights_write_interval,n", po::value<int>()->default_value(1000), "Number of lines between writing out weights")
+        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation(s) (tokenized text file)")
+        ("mt_metric,m",po::value<string>()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)")
+        ("regularizer_strength,C", po::value<double>()->default_value(0.01), "regularization strength")
+        ("mt_metric_scale,s", po::value<double>()->default_value(1.0), "Cost function is -mt_metric_scale*BLEU")
+        ("costaug_log_bleu,l", "Flag converts BLEU to log space. Cost function is thus -mt_metric_scale*log(BLEU). Not on by default")
+        ("average,A", "Average the weights (this is a weighted average due to the scaling factor)")
+        ("mu,u", po::value<double>()->default_value(0.0), "weight (between 0 and 1) to scale model score by for oracle selection")
+        ("stepsize_param,a", po::value<double>()->default_value(0.01), "Stepsize parameter, during optimization")
+        ("stepsize_reduce,t", "Divide step size by sqrt(number of examples seen so far), as per Ratliff et al., 2007")
+	("metric_threshold,T", po::value<double>()->default_value(0.0), "Threshold for diff between oracle BLEU and cost-aug BLEU for updating the weights")
+	("check_positive,P", "Check that the loss is positive before updating")
+        ("k_best_size,k", po::value<int>()->default_value(250), "Size of hypothesis list to search for oracles")
+        ("best_ever,b", "Keep track of the best hypothesis we've ever seen (metric score), and use that as the reference")
+        ("random_seed,S", po::value<uint32_t>(), "Random seed (if not specified, /dev/random will be used)")
+        ("decoder_config,c",po::value<string>(),"Decoder configuration file");
+  po::options_description clo("Command line options");
+  clo.add_options()
+        ("config", po::value<string>(), "Configuration file")
+        ("help,h", "Print this help message and exit");
+  po::options_description dconfig_options, dcmdline_options;
+  dconfig_options.add(opts);
+  dcmdline_options.add(opts).add(clo);
+  
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  if (conf->count("config")) {
+    ifstream config((*conf)["config"].as<string>().c_str());
+    po::store(po::parse_config_file(config, dconfig_options), *conf);
+  }
+  po::notify(*conf);
+
+  if (conf->count("help") || !conf->count("weights") || !conf->count("input") || !conf->count("decoder_config") || !conf->count("reference")) {
+    cerr << dcmdline_options << endl;
+    return false;
+  }
+  return true;
+}
+
+double scaling_trick = 1; // see http://blog.smola.org/post/940672544/fast-quadratic-regularization-for-online-learning
+/*computes and returns cost augmented score for negative example selection*/
+double cost_augmented_score(const LogVal<double> model_score, const double mt_metric_score, const double mt_metric_scale, const bool logbleu) {
+  if(logbleu) {
+    if(mt_metric_score != 0)
+      // NOTE: log(model_score) is just the model score feature weights * features
+      return log(model_score) * scaling_trick + (- mt_metric_scale * log(mt_metric_score));
+    else
+      return -1000000;
+  }
+  // NOTE: log(model_score) is just the model score feature weights * features
+  return log(model_score) * scaling_trick + (- mt_metric_scale * mt_metric_score);
+}
+
+/*computes and returns mu score, for oracle selection*/
+double muscore(const vector<weight_t>& feature_weights, const SparseVector<double>& feature_values, const double mt_metric_score, const double mu, const bool logbleu) {
+  if(logbleu) {
+    if(mt_metric_score != 0)
+      return feature_values.dot(feature_weights) * mu + (1 - mu) * log(mt_metric_score);
+    else
+      return feature_values.dot(feature_weights) * mu + (1 - mu) * (-1000000);  // log(0) is -inf
+  }
+  return feature_values.dot(feature_weights) * mu + (1 - mu) * mt_metric_score;
+}
+
+static const double kMINUS_EPSILON = -1e-6;
+
+struct HypothesisInfo {
+  SparseVector<double> features;
+  double mt_metric_score;
+  // The model score changes when the feature weights change, so it is not stored here
+  // It must be recomputed every time
+};
+
+struct GoodOracle {
+  shared_ptr<HypothesisInfo> good;
+};
+
+struct TrainingObserver : public DecoderObserver {
+  TrainingObserver(const int k,
+                   const DocScorer& d,
+                   vector<GoodOracle>* o,
+                   const vector<weight_t>& feat_weights,
+                   const double metric_scale,
+                   const double Mu,
+                   const bool bestever,
+                   const bool LogBleu) : ds(d), feature_weights(feat_weights), oracles(*o), kbest_size(k), mt_metric_scale(metric_scale), mu(Mu), best_ever(bestever), log_bleu(LogBleu) {}
+  const DocScorer& ds;
+  const vector<weight_t>& feature_weights;
+  vector<GoodOracle>& oracles;
+  shared_ptr<HypothesisInfo> cur_best;
+  shared_ptr<HypothesisInfo> cur_costaug_best;
+  shared_ptr<HypothesisInfo> cur_ref; 
+  const int kbest_size;
+  const double mt_metric_scale;
+  const double mu;
+  const bool best_ever;
+  const bool log_bleu;
+
+  const HypothesisInfo& GetCurrentBestHypothesis() const {
+    return *cur_best;
+  }
+
+  const HypothesisInfo& GetCurrentCostAugmentedHypothesis() const {
+    return *cur_costaug_best;
+  }
+
+  const HypothesisInfo& GetCurrentReference() const {
+    return *cur_ref; 
+  }
+
+  virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) {
+    UpdateOracles(smeta.GetSentenceID(), *hg);
+  }
+
+  shared_ptr<HypothesisInfo> MakeHypothesisInfo(const SparseVector<double>& feats, const double metric) {
+    shared_ptr<HypothesisInfo> h(new HypothesisInfo);
+    h->features = feats;
+    h->mt_metric_score = metric;
+    return h;
+  }
+
+  void UpdateOracles(int sent_id, const Hypergraph& forest) {
+    //shared_ptr<HypothesisInfo>& cur_ref = oracles[sent_id].good;
+    cur_ref = oracles[sent_id].good; 
+    if(!best_ever)
+      cur_ref.reset();
+
+    KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(forest, kbest_size);
+    double costaug_best_score = 0;
+
+    for (int i = 0; i < kbest_size; ++i) {
+      const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
+        kbest.LazyKthBest(forest.nodes_.size() - 1, i);
+      if (!d) break;
+      double mt_metric_score = ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore(); //this might need to change!!
+      const SparseVector<double>& feature_vals = d->feature_values; 
+      double costaugmented_score = cost_augmented_score(d->score, mt_metric_score, mt_metric_scale, log_bleu); //note that d->score, i.e., model score, is passed in
+      if (i == 0) { //i.e., setting up cur_best to be model score highest, and initializing costaug_best
+        cur_best = MakeHypothesisInfo(feature_vals, mt_metric_score);
+        cur_costaug_best = cur_best;
+        costaug_best_score = costaugmented_score; 
+      }
+      if (costaugmented_score > costaug_best_score) {   // kbest_mira's cur_bad, i.e., "fear" derivation
+        cur_costaug_best = MakeHypothesisInfo(feature_vals, mt_metric_score);
+        costaug_best_score = costaugmented_score;
+      }
+      double cur_muscore = mt_metric_score;
+      if (!cur_ref)   // kbest_mira's cur_good, i.e., "hope" derivation
+        cur_ref =  MakeHypothesisInfo(feature_vals, cur_muscore);
+      else {
+          double cur_ref_muscore = cur_ref->mt_metric_score;
+          if(mu > 0) { //select oracle with mixture of model score and BLEU
+              cur_ref_muscore =  muscore(feature_weights, cur_ref->features, cur_ref->mt_metric_score, mu, log_bleu);
+              cur_muscore = muscore(feature_weights, d->feature_values, mt_metric_score, mu, log_bleu);
+          }
+          if (cur_muscore > cur_ref_muscore) //replace oracle
+            cur_ref = MakeHypothesisInfo(feature_vals, mt_metric_score);
+      }
+    }
+  }
+};
+
+void ReadTrainingCorpus(const string& fname, vector<string>* c) {
+  ReadFile rf(fname);
+  istream& in = *rf.stream();
+  string line;
+  while(in) {
+    getline(in, line);
+    if (!in) break;
+    c->push_back(line);
+  }
+}
+
+bool ApproxEqual(double a, double b) {
+  if (a == b) return true;
+  return (fabs(a-b)/fabs(b)) < 0.000001;
+}
+
+int main(int argc, char** argv) {
+  register_feature_functions();
+  SetSilent(true);  // turn off verbose decoder output
+
+  po::variables_map conf;
+  if (!InitCommandLine(argc, argv, &conf)) return 1;
+
+  if (conf.count("random_seed"))
+    rng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
+  else
+    rng.reset(new MT19937);
+
+  const bool best_ever = conf.count("best_ever") > 0;
+  vector<string> corpus;
+  ReadTrainingCorpus(conf["input"].as<string>(), &corpus);
+
+  const string metric_name = conf["mt_metric"].as<string>(); //set up scoring; this may need to be changed!!
+  
+  ScoreType type = ScoreTypeFromString(metric_name);
+  if (type == TER) {
+    invert_score = true;
+  } else {
+    invert_score = false;
+  } 
+  DocScorer ds(type, conf["reference"].as<vector<string> >(), "");
+  cerr << "Loaded " << ds.size() << " references for scoring with " << metric_name << endl;
+  if (ds.size() != corpus.size()) {
+    cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n";
+    return 1;
+  }
+
+  ReadFile ini_rf(conf["decoder_config"].as<string>());
+  Decoder decoder(ini_rf.stream());
+
+  // load initial weights
+  vector<weight_t>& decoder_weights = decoder.CurrentWeightVector(); //equivalent to "dense_weights" vector in kbest_mira.cc
+  SparseVector<weight_t> sparse_weights; //equivaelnt to  kbest_mira.cc "lambdas"
+  Weights::InitFromFile(conf["weights"].as<string>(), &decoder_weights);
+  Weights::InitSparseVector(decoder_weights, &sparse_weights);
+
+  //initializing other algorithm and output parameters
+  const double c = conf["regularizer_strength"].as<double>();
+  const int weights_write_interval = conf["weights_write_interval"].as<int>();
+  const double mt_metric_scale = conf["mt_metric_scale"].as<double>();
+  const double mu = conf["mu"].as<double>();
+  const double metric_threshold = conf["metric_threshold"].as<double>();
+  const double stepsize_param = conf["stepsize_param"].as<double>(); //step size in structured SGD optimization step
+  const bool stepsize_reduce = conf.count("stepsize_reduce") > 0; 
+  const bool costaug_log_bleu = conf.count("costaug_log_bleu") > 0;
+  const bool average = conf.count("average") > 0;
+  const bool checkpositive = conf.count("check_positive") > 0;
+
+  assert(corpus.size() > 0);
+  vector<GoodOracle> oracles(corpus.size());
+  TrainingObserver observer(conf["k_best_size"].as<int>(),  // kbest size
+                            ds,                             // doc scorer
+                            &oracles,
+                            decoder_weights,
+                            mt_metric_scale,
+                            mu,
+                            best_ever,
+                            costaug_log_bleu);
+  int cur_sent = 0;
+  int line_count = 0;
+  int normalizer = 0; 
+  double total_loss = 0;
+  double prev_loss = 0;
+  int dots = 0;             // progess bar
+  int cur_pass = 0;
+  SparseVector<double> tot;
+  tot += sparse_weights; //add initial weights to total
+  normalizer++; //add 1 to normalizer
+  int max_iteration = conf["passes"].as<int>();
+  string msg = "# LatentSVM tuned weights";
+  vector<int> order;
+  int interval_counter = 0;
+  RandomPermutation(corpus.size(), &order); //shuffle corpus
+  while (line_count <= max_iteration * corpus.size()) { //loop over all (passes * num sentences) examples
+    //if ((interval_counter * 40 / weights_write_interval) > dots) { ++dots; cerr << '.'; } //check this
+    if ((cur_sent * 40 / corpus.size()) > dots) { ++dots; cerr << '.';}
+    if (interval_counter == weights_write_interval) { //i.e., we need to write out weights
+      sparse_weights *= scaling_trick;
+      tot *= scaling_trick;
+      scaling_trick = 1;
+      cerr << " [SENTENCE NUMBER= " << cur_sent << "\n";
+      cerr << " [AVG METRIC LAST INTERVAL =" << ((total_loss - prev_loss) / weights_write_interval) << "]\n";
+      cerr << " [AVG METRIC THIS PASS THUS FAR =" << (total_loss / cur_sent) << "]\n";
+      cerr << " [TOTAL LOSS: =" << total_loss << "\n";
+      Weights::ShowLargestFeatures(decoder_weights);
+      //dots = 0;
+      interval_counter = 0;
+      prev_loss = total_loss;
+      if (average){
+	SparseVector<double> x = tot;
+	x /= normalizer;
+	ostringstream sa;
+	sa << "weights.latentsvm-" << line_count/weights_write_interval << "-avg.gz";
+	x.init_vector(&decoder_weights);
+	Weights::WriteToFile(sa.str(), decoder_weights, true, &msg); 
+      }
+      else {
+	ostringstream os;
+	os << "weights.latentsvm-" << line_count/weights_write_interval << ".gz";
+	sparse_weights.init_vector(&decoder_weights);
+	Weights::WriteToFile(os.str(), decoder_weights, true, &msg);
+      }
+    }
+    if (corpus.size() == cur_sent) { //i.e., finished a pass
+      //cerr << " [AVG METRIC LAST PASS=" << (document_metric_score / corpus.size()) << "]\n";
+      cerr << " [AVG METRIC LAST PASS=" << (total_loss / corpus.size()) << "]\n";
+      cerr << " TOTAL LOSS: " << total_loss << "\n";
+      Weights::ShowLargestFeatures(decoder_weights);
+      cur_sent = 0;
+      total_loss = 0;
+      dots = 0;
+      if(average) {
+        SparseVector<double> x = tot; 
+        x /= normalizer;
+        ostringstream sa;
+        sa << "weights.latentsvm-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "-avg.gz";
+        x.init_vector(&decoder_weights);
+        Weights::WriteToFile(sa.str(), decoder_weights, true, &msg);
+      }
+      else {
+	ostringstream os;
+	os << "weights.latentsvm-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << ".gz";
+	Weights::WriteToFile(os.str(), decoder_weights, true, &msg);	
+      }
+      cur_pass++;
+      RandomPermutation(corpus.size(), &order);
+    }
+    if (cur_sent == 0) { //i.e., starting a new pass
+      cerr << "PASS " << (line_count / corpus.size() + 1) << endl;
+    }
+    sparse_weights.init_vector(&decoder_weights);   // copy sparse_weights to the decoder weights
+    decoder.SetId(order[cur_sent]); //assign current sentence
+    decoder.Decode(corpus[order[cur_sent]], &observer);  // decode/update oracles
+
+    const HypothesisInfo& cur_best = observer.GetCurrentBestHypothesis(); //model score best
+    const HypothesisInfo& cur_costaug = observer.GetCurrentCostAugmentedHypothesis(); //(model + cost) best; cost = -metric_scale*log(BLEU) or -metric_scale*BLEU
+    //const HypothesisInfo& cur_ref = *oracles[order[cur_sent]].good; //this oracle-best line only picks based on BLEU
+    const HypothesisInfo& cur_ref = observer.GetCurrentReference();  //if mu > 0, this mu-mixed oracle will be picked; otherwise, only on BLEU
+    total_loss += cur_best.mt_metric_score; 
+
+    double step_size = stepsize_param;
+    if (stepsize_reduce){       // w_{t+1} = w_t - stepsize_t * grad(Loss) 
+        step_size  /= (sqrt(cur_sent+1.0)); 
+    }
+    //actual update step - compute gradient, and modify sparse_weights
+    if(cur_ref.mt_metric_score - cur_costaug.mt_metric_score > metric_threshold) {
+      const double loss = (cur_costaug.features.dot(decoder_weights) - cur_ref.features.dot(decoder_weights)) * scaling_trick + mt_metric_scale * (cur_ref.mt_metric_score - cur_costaug.mt_metric_score);
+      if (!checkpositive || loss > 0.0) { //can update either all the time if check positive is off, or only when loss > 0 if it's on
+	sparse_weights -= cur_costaug.features * step_size / ((1.0-2.0*step_size*c)*scaling_trick);    // cost augmented hyp orig -
+	sparse_weights += cur_ref.features * step_size / ((1.0-2.0*step_size*c)*scaling_trick);        // ref orig +
+      }
+    }
+    scaling_trick *= (1.0 - 2.0 * step_size * c);
+
+    tot += sparse_weights; //for averaging purposes
+    normalizer++; //for averaging purposes
+    line_count++;
+    interval_counter++;
+    cur_sent++;
+  }
+  cerr << endl;
+  if(average) {
+    tot /= normalizer;
+    tot.init_vector(decoder_weights);
+    msg = "# Latent SSVM tuned weights (averaged vector)";
+    Weights::WriteToFile("weights.latentsvm-final-avg.gz", decoder_weights, true, &msg); 
+    cerr << "Optimization complete.\n" << "AVERAGED WEIGHTS: weights.latentsvm-final-avg.gz\n";
+  } else {
+    Weights::WriteToFile("weights.latentsvm-final.gz", decoder_weights, true, &msg);    
+    cerr << "Optimization complete.\n";
+  }
+  return 0;
+}
+
-- 
cgit v1.2.3


From 2389a5a8a43dda87c355579838559515b0428421 Mon Sep 17 00:00:00 2001
From: Avneesh <avneesh@cmu.edu>
Date: Fri, 28 Sep 2012 11:22:55 -0700
Subject: latent SSVM code, new Makefile.am and configure.ac

---
 Makefile.am  | 3 ++-
 configure.ac | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/Makefile.am b/Makefile.am
index 24aafd63..b16fc90f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -11,6 +11,7 @@ SUBDIRS = \
   training \
   training/liblbfgs \
   mira \
+  latent_svm \
   dtrain \
   dpmert \
   pro-train \
@@ -18,7 +19,7 @@ SUBDIRS = \
   minrisk \
   gi/pf \
   gi/markov_al \
-  rst_parser
+  rst_parser 
 
 #gi/pyp-topics/src gi/clda/src gi/posterior-regularisation/prjava
 
diff --git a/configure.ac b/configure.ac
index ea9e84fb..dc68ab77 100644
--- a/configure.ac
+++ b/configure.ac
@@ -124,6 +124,7 @@ AC_CONFIG_FILES([minrisk/Makefile])
 AC_CONFIG_FILES([klm/util/Makefile])
 AC_CONFIG_FILES([klm/lm/Makefile])
 AC_CONFIG_FILES([mira/Makefile])
+AC_CONFIG_FILES([latent_svm/Makefile])
 AC_CONFIG_FILES([dtrain/Makefile])
 AC_CONFIG_FILES([gi/pyp-topics/src/Makefile])
 AC_CONFIG_FILES([gi/clda/src/Makefile])
-- 
cgit v1.2.3


From f9ec8ec31e5a0b6ccc352c834733462eae2481a0 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <simianer@cl.uni-heidelberg.de>
Date: Sat, 22 Dec 2012 00:57:06 +0100
Subject: merge

---
 Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile.am b/Makefile.am
index dbf604a1..5cfa547b 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -17,5 +17,5 @@ SUBDIRS = \
 
 AUTOMAKE_OPTIONS = foreign
 ACLOCAL_AMFLAGS = -I m4
-AM_CPPFLAGS =  -D_GLIBCXX_PARALLEL
+AM_CPPFLAGS = -D_GLIBCXX_PARALLEL -march=native -mtune=native -O2 -pipe -fomit-frame-pointer -Wall
 
-- 
cgit v1.2.3


From 8b399cb09513cd79ed4182be9f75882c1e1b336a Mon Sep 17 00:00:00 2001
From: Patrick Simianer <simianer@cl.uni-heidelberg.de>
Date: Fri, 18 Jan 2013 14:36:51 +0100
Subject: parallelize enhancements

---
 training/dtrain/dtrain.h                         |  2 +-
 training/dtrain/parallelize.rb                   | 99 +++++++++++++++++-------
 training/dtrain/test/parallelize/cdec.ini        |  2 +-
 training/dtrain/test/parallelize/in              | 20 ++---
 training/dtrain/test/parallelize/refs            | 20 ++---
 training/dtrain/test/parallelize/test/cdec.ini   | 22 ------
 training/dtrain/test/parallelize/test/dtrain.ini | 15 ----
 training/dtrain/test/parallelize/test/in         | 10 ---
 training/dtrain/test/parallelize/test/refs       | 10 ---
 9 files changed, 91 insertions(+), 109 deletions(-)
 delete mode 100644 training/dtrain/test/parallelize/test/cdec.ini
 delete mode 100644 training/dtrain/test/parallelize/test/dtrain.ini
 delete mode 100644 training/dtrain/test/parallelize/test/in
 delete mode 100644 training/dtrain/test/parallelize/test/refs

diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h
index 4b6f415c..572fd613 100644
--- a/training/dtrain/dtrain.h
+++ b/training/dtrain/dtrain.h
@@ -3,7 +3,7 @@
 
 #undef DTRAIN_FASTER_PERCEPTRON // only look at misranked pairs
                                  // DO NOT USE WITH SVM!
-//#define DTRAIN_LOCAL
+#define DTRAIN_LOCAL
 #define DTRAIN_DOTS 10 // after how many inputs to display a '.'
 #define DTRAIN_GRAMMAR_DELIM "########EOS########"
 #define DTRAIN_SCALE 100000
diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index eb4148f5..92ce1f6f 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -1,80 +1,119 @@
 #!/usr/bin/env ruby
 
 
-if ARGV.size != 5
+if ARGV.size != 7
   STDERR.write "Usage: "
-  STDERR.write "ruby parallelize.rb <#shards> <input> <refs> <epochs> <dtrain.ini>\n"
+  STDERR.write "ruby parallelize.rb <dtrain.ini> <epochs> <rand=true|false> <#shards|predef> <at once> <input> <refs>\n"
   exit
 end
 
-cdec_dir   = '/path/to/cdec_dir'
-dtrain_bin = "#{cdec_dir}/training/dtrain/dtrain_local"
+cdec_dir   = '~/mt/cdec-dtrain/'
+dtrain_bin = "~/bin/dtrain_local"
 ruby       = '/usr/bin/ruby'
 lplp_rb    = "#{cdec_dir}/training/dtrain/hstreaming/lplp.rb"
 lplp_args  = 'l2 select_k 100000'
-gzip       = '/bin/gzip'
-
-num_shards = ARGV[0].to_i
-input      = ARGV[1]
-refs       = ARGV[2]
-epochs     = ARGV[3].to_i
-ini        = ARGV[4]
 
+ini        = ARGV[0]
+epochs     = ARGV[1].to_i
+rand = false
+rand = true if ARGV[2]=='true'
+predefined_shards = false
+if ARGV[3] == 'predef'
+  predefined_shards = true
+  num_shards = -1
+else
+  num_shards = ARGV[3].to_i
+end
+shards_at_once = ARGV[4].to_i
+input = ARGV[5]
+refs  = ARGV[6]
 
 `mkdir work`
 
-def make_shards(input, refs, num_shards)
+def make_shards(input, refs, num_shards, epoch, rand)
   lc = `wc -l #{input}`.split.first.to_i
+  index = (0..lc-1).to_a
+  index.reverse!
+  index.shuffle! if rand
   shard_sz = lc / num_shards
   leftover = lc % num_shards
   in_f = File.new input, 'r'
+  in_lines = in_f.readlines
   refs_f = File.new refs, 'r'
+  refs_lines = refs_f.readlines
   shard_in_files = []
   shard_refs_files = []
+  in_fns = []
+  refs_fns = []
   0.upto(num_shards-1) { |shard|
-    shard_in = File.new "work/shard.#{shard}.in", 'w+'
-    shard_refs = File.new "work/shard.#{shard}.refs", 'w+'
+    in_fn = "work/shard.#{shard}.#{epoch}.in"
+    shard_in = File.new in_fn, 'w+'
+    in_fns << in_fn
+    refs_fn = "work/shard.#{shard}.#{epoch}.refs"
+    shard_refs = File.new refs_fn, 'w+'
+    refs_fns << refs_fn
     0.upto(shard_sz-1) { |i|
-      shard_in.write in_f.gets
-      shard_refs.write refs_f.gets
+      j = index.pop 
+      shard_in.write in_lines[j]
+      shard_refs.write refs_lines[j]
     }
     shard_in_files << shard_in
     shard_refs_files << shard_refs
   }
   while leftover > 0
-    shard_in_files[-1].write in_f.gets
-    shard_refs_files[-1].write refs_f.gets
+    j = index.pop
+    shard_in_files[-1].write in_lines[j]
+    shard_refs_files[-1].write refs_lines[j]
     leftover -= 1
   end
   (shard_in_files + shard_refs_files).each do |f| f.close end
   in_f.close
   refs_f.close
+  return [in_fns, refs_fns]
 end
 
-make_shards input, refs, num_shards
+input_files = []
+refs_files = []
+if predefined_shards
+  input_files = File.new(input).readlines.map {|i| i.strip }
+  refs_files = File.new(refs).readlines.map {|i| i.strip }
+  num_shards = input_files.size
+else
+  input_files, refs_files = make_shards input, refs, num_shards, 0, rand
+end
 
 0.upto(epochs-1) { |epoch|
+  puts "epoch #{epoch+1}"
   pids = []
   input_weights = ''
   if epoch > 0 then input_weights = "--input_weights work/weights.#{epoch-1}" end
   weights_files = []
-  0.upto(num_shards-1) { |shard|
-    pids << Kernel.fork {
-      `#{dtrain_bin} -c #{ini}\
-        --input work/shard.#{shard}.in\
-        --refs work/shard.#{shard}.refs #{input_weights}\
-        --output work/weights.#{shard}.#{epoch}\
-        &> work/out.#{shard}.#{epoch}`
+  shard = 0
+  remaining_shards = num_shards
+  while remaining_shards > 0
+    shards_at_once.times {
+      pids << Kernel.fork {
+        `#{dtrain_bin} -c #{ini}\
+          --input #{input_files[shard]}\
+          --refs #{refs_files[shard]} #{input_weights}\
+          --output work/weights.#{shard}.#{epoch}\
+          &> work/out.#{shard}.#{epoch}`
+      }
+      weights_files << "work/weights.#{shard}.#{epoch}"
+      shard += 1
+      remaining_shards -= 1
     }
-    weights_files << "work/weights.#{shard}.#{epoch}"
-  }
-  pids.each { |pid| Process.wait(pid) }
+    pids.each { |pid| Process.wait(pid) }
+    pids.clear
+  end
   cat = File.new('work/weights_cat', 'w+')
   weights_files.each { |f| cat.write File.new(f, 'r').read }
   cat.close
   `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat &> work/weights.#{epoch}`
+  if rand and epoch+1!=epochs
+    input_files, refs_files = make_shards input, refs, num_shards, epoch+1, rand
+  end
 }
 
 `rm work/weights_cat`
-`#{gzip} work/*`
 
diff --git a/training/dtrain/test/parallelize/cdec.ini b/training/dtrain/test/parallelize/cdec.ini
index 72e99dc5..e118374b 100644
--- a/training/dtrain/test/parallelize/cdec.ini
+++ b/training/dtrain/test/parallelize/cdec.ini
@@ -4,7 +4,7 @@ intersection_strategy=cube_pruning
 cubepruning_pop_limit=200
 scfg_max_span_limit=15
 feature_function=WordPenalty
-feature_function=KLanguageModel /stor/dat/wmt12/en/news_only/m/wmt12.news.en.3.kenv5
+feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz
 #feature_function=ArityPenalty
 #feature_function=CMR2008ReorderingFeatures
 #feature_function=Dwarf
diff --git a/training/dtrain/test/parallelize/in b/training/dtrain/test/parallelize/in
index a312809f..3b7dec39 100644
--- a/training/dtrain/test/parallelize/in
+++ b/training/dtrain/test/parallelize/in
@@ -1,10 +1,10 @@
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.0.gz" id="0">barack obama erhält als vierter us @-@ präsident den frieden nobelpreis</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.1.gz" id="1">der amerikanische präsident barack obama kommt für 26 stunden nach oslo , norwegen , um hier als vierter us @-@ präsident in der geschichte den frieden nobelpreis entgegen zunehmen .</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.2.gz" id="2">darüber hinaus erhält er das diplom sowie die medaille und einen scheck über 1,4 mio. dollar für seine außer gewöhnlichen bestrebungen um die intensivierung der welt diplomatie und zusammen arbeit unter den völkern .</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.3.gz" id="3">der chef des weißen hauses kommt morgen zusammen mit seiner frau michelle in der nordwegischen metropole an und wird die ganze zeit beschäftigt sein .</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.4.gz" id="4">zunächst stattet er dem nobel @-@ institut einen besuch ab , wo er überhaupt zum ersten mal mit den fünf ausschuss mitglieder zusammen trifft , die ihn im oktober aus 172 leuten und 33 organisationen gewählt haben .</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.5.gz" id="5">das präsidenten paar hat danach ein treffen mit dem norwegischen könig harald v. und königin sonja eingeplant .</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.6.gz" id="6">nachmittags erreicht dann der besuch seinen höhepunkt mit der zeremonie , bei der obama den prestige preis übernimmt .</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.7.gz" id="7">diesen erhält er als der vierte us @-@ präsident , aber erst als der dritte , der den preis direkt im amt entgegen nimmt .</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.8.gz" id="8">das weiße haus avisierte schon , dass obama bei der übernahme des preises über den afghanistan krieg sprechen wird .</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.9.gz" id="9">der präsident will diesem thema nicht ausweichen , weil er weiß , dass er den preis als ein präsident übernimmt , der zur zeit krieg in zwei ländern führt .</seg>
+<seg grammar="g/grammar.out.0.gz" id="0">europas nach rassen geteiltes haus</seg>
+<seg grammar="g/grammar.out.1.gz" id="1">ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .</seg>
+<seg grammar="g/grammar.out.2.gz" id="2">der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .</seg>
+<seg grammar="g/grammar.out.3.gz" id="3">während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .</seg>
+<seg grammar="g/grammar.out.4.gz" id="4">eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .</seg>
+<seg grammar="g/grammar.out.5.gz" id="5">die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .</seg>
+<seg grammar="g/grammar.out.6.gz" id="6">das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .</seg>
+<seg grammar="g/grammar.out.7.gz" id="7">die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .</seg>
+<seg grammar="g/grammar.out.8.gz" id="8">der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .</seg>
+<seg grammar="g/grammar.out.9.gz" id="9">genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .</seg>
diff --git a/training/dtrain/test/parallelize/refs b/training/dtrain/test/parallelize/refs
index 4d3128cb..632e27b0 100644
--- a/training/dtrain/test/parallelize/refs
+++ b/training/dtrain/test/parallelize/refs
@@ -1,10 +1,10 @@
-barack obama becomes the fourth american president to receive the nobel peace prize
-the american president barack obama will fly into oslo , norway for 26 hours to receive the nobel peace prize , the fourth american president in history to do so .
-he will receive a diploma , medal and cheque for 1.4 million dollars for his exceptional efforts to improve global diplomacy and encourage international cooperation , amongst other things .
-the head of the white house will be flying into the norwegian city in the morning with his wife michelle and will have a busy schedule .
-first , he will visit the nobel institute , where he will have his first meeting with the five committee members who selected him from 172 people and 33 organisations .
-the presidential couple then has a meeting scheduled with king harald v and queen sonja of norway .
-then , in the afternoon , the visit will culminate in a grand ceremony , at which obama will receive the prestigious award .
-he will be the fourth american president to be awarded the prize , and only the third to have received it while actually in office .
-the white house has stated that , when he accepts the prize , obama will speak about the war in afghanistan .
-the president does not want to skirt around this topic , as he realises that he is accepting the prize as a president whose country is currently at war in two countries .
+europe 's divided racial house
+a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge .
+the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them .
+while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon .
+an aging population at home and ever more open borders imply increasing racial fragmentation in european countries .
+mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear .
+it will not , as america 's racial history clearly shows .
+race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes .
+the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths .
+this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us .
diff --git a/training/dtrain/test/parallelize/test/cdec.ini b/training/dtrain/test/parallelize/test/cdec.ini
deleted file mode 100644
index 72e99dc5..00000000
--- a/training/dtrain/test/parallelize/test/cdec.ini
+++ /dev/null
@@ -1,22 +0,0 @@
-formalism=scfg
-add_pass_through_rules=true
-intersection_strategy=cube_pruning
-cubepruning_pop_limit=200
-scfg_max_span_limit=15
-feature_function=WordPenalty
-feature_function=KLanguageModel /stor/dat/wmt12/en/news_only/m/wmt12.news.en.3.kenv5
-#feature_function=ArityPenalty
-#feature_function=CMR2008ReorderingFeatures
-#feature_function=Dwarf
-#feature_function=InputIndicator
-#feature_function=LexNullJump
-#feature_function=NewJump
-#feature_function=NgramFeatures
-#feature_function=NonLatinCount
-#feature_function=OutputIndicator
-#feature_function=RuleIdentityFeatures
-#feature_function=RuleNgramFeatures
-#feature_function=RuleShape
-#feature_function=SourceSpanSizeFeatures
-#feature_function=SourceWordPenalty
-#feature_function=SpanFeatures
diff --git a/training/dtrain/test/parallelize/test/dtrain.ini b/training/dtrain/test/parallelize/test/dtrain.ini
deleted file mode 100644
index 03f9d240..00000000
--- a/training/dtrain/test/parallelize/test/dtrain.ini
+++ /dev/null
@@ -1,15 +0,0 @@
-k=100
-N=4
-learning_rate=0.0001
-gamma=0
-loss_margin=0
-epochs=1
-scorer=stupid_bleu
-sample_from=kbest
-filter=uniq
-pair_sampling=XYX
-hi_lo=0.1
-select_weights=last
-print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
-tmp=/tmp
-decoder_config=cdec.ini
diff --git a/training/dtrain/test/parallelize/test/in b/training/dtrain/test/parallelize/test/in
deleted file mode 100644
index a312809f..00000000
--- a/training/dtrain/test/parallelize/test/in
+++ /dev/null
@@ -1,10 +0,0 @@
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.0.gz" id="0">barack obama erhält als vierter us @-@ präsident den frieden nobelpreis</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.1.gz" id="1">der amerikanische präsident barack obama kommt für 26 stunden nach oslo , norwegen , um hier als vierter us @-@ präsident in der geschichte den frieden nobelpreis entgegen zunehmen .</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.2.gz" id="2">darüber hinaus erhält er das diplom sowie die medaille und einen scheck über 1,4 mio. dollar für seine außer gewöhnlichen bestrebungen um die intensivierung der welt diplomatie und zusammen arbeit unter den völkern .</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.3.gz" id="3">der chef des weißen hauses kommt morgen zusammen mit seiner frau michelle in der nordwegischen metropole an und wird die ganze zeit beschäftigt sein .</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.4.gz" id="4">zunächst stattet er dem nobel @-@ institut einen besuch ab , wo er überhaupt zum ersten mal mit den fünf ausschuss mitglieder zusammen trifft , die ihn im oktober aus 172 leuten und 33 organisationen gewählt haben .</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.5.gz" id="5">das präsidenten paar hat danach ein treffen mit dem norwegischen könig harald v. und königin sonja eingeplant .</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.6.gz" id="6">nachmittags erreicht dann der besuch seinen höhepunkt mit der zeremonie , bei der obama den prestige preis übernimmt .</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.7.gz" id="7">diesen erhält er als der vierte us @-@ präsident , aber erst als der dritte , der den preis direkt im amt entgegen nimmt .</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.8.gz" id="8">das weiße haus avisierte schon , dass obama bei der übernahme des preises über den afghanistan krieg sprechen wird .</seg>
-<seg grammar="/stor/dat/wmt12/dev/newstest2010/g/grammar.out.9.gz" id="9">der präsident will diesem thema nicht ausweichen , weil er weiß , dass er den preis als ein präsident übernimmt , der zur zeit krieg in zwei ländern führt .</seg>
diff --git a/training/dtrain/test/parallelize/test/refs b/training/dtrain/test/parallelize/test/refs
deleted file mode 100644
index 4d3128cb..00000000
--- a/training/dtrain/test/parallelize/test/refs
+++ /dev/null
@@ -1,10 +0,0 @@
-barack obama becomes the fourth american president to receive the nobel peace prize
-the american president barack obama will fly into oslo , norway for 26 hours to receive the nobel peace prize , the fourth american president in history to do so .
-he will receive a diploma , medal and cheque for 1.4 million dollars for his exceptional efforts to improve global diplomacy and encourage international cooperation , amongst other things .
-the head of the white house will be flying into the norwegian city in the morning with his wife michelle and will have a busy schedule .
-first , he will visit the nobel institute , where he will have his first meeting with the five committee members who selected him from 172 people and 33 organisations .
-the presidential couple then has a meeting scheduled with king harald v and queen sonja of norway .
-then , in the afternoon , the visit will culminate in a grand ceremony , at which obama will receive the prestigious award .
-he will be the fourth american president to be awarded the prize , and only the third to have received it while actually in office .
-the white house has stated that , when he accepts the prize , obama will speak about the war in afghanistan .
-the president does not want to skirt around this topic , as he realises that he is accepting the prize as a president whose country is currently at war in two countries .
-- 
cgit v1.2.3


From 0c02f35192e7cec1298c94065dee4a32a6730252 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <simianer@cl.uni-heidelberg.de>
Date: Thu, 24 Jan 2013 15:28:03 +0100
Subject: enable qsub use

---
 environment/LocalConfig.pm     |  7 +++++++
 training/dtrain/parallelize.rb | 25 +++++++++++++++----------
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm
index b9549c6e..627f7f8c 100644
--- a/environment/LocalConfig.pm
+++ b/environment/LocalConfig.pm
@@ -12,6 +12,7 @@ my $host = domainname;
 
 # keys are: HOST_REGEXP, MERTMem, QSubQueue, QSubMemFlag, QSubExtraFlags
 my $CCONFIG = {
+
   'StarCluster' => {
     'HOST_REGEXP' => qr/compute-\d+\.internal$/,
     'JobControl'  => 'qsub',
@@ -67,6 +68,12 @@ my $CCONFIG = {
     'JobControl'  => 'fork',
     'DefaultJobs' => 12,
   },
+  'cluster.cl.uni-heidelberg.de' => {
+    'HOST_REGEXP' => qr/node25/,
+    'JobControl'  => 'qsub',
+    'QSubMemFlag' => '-l h_vmem=',
+    'DefaultJobs' => 13,
+  },
   'LOCAL' => {  # LOCAL must be last in the list!!!
     'HOST_REGEXP' => qr//,
     'QSubMemFlag' => ' ',
diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index 92ce1f6f..6e30cf9d 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -3,15 +3,16 @@
 
 if ARGV.size != 7
   STDERR.write "Usage: "
-  STDERR.write "ruby parallelize.rb <dtrain.ini> <epochs> <rand=true|false> <#shards|predef> <at once> <input> <refs>\n"
+  STDERR.write "ruby parallelize.rb <dtrain.ini> <epochs> <rand=true|false> <#shards|predef> <at once> <input> <refs> <qsub>\n"
   exit
 end
 
-cdec_dir   = '~/mt/cdec-dtrain/'
-dtrain_bin = "~/bin/dtrain_local"
+cdec_dir   = '~/MAREC/cdec-dtrain/'
+dtrain_bin = "~/MAREC/cdec-dtrain/training/dtrain/dtrain"
 ruby       = '/usr/bin/ruby'
 lplp_rb    = "#{cdec_dir}/training/dtrain/hstreaming/lplp.rb"
 lplp_args  = 'l2 select_k 100000'
+cat        = '/bin/cat'
 
 ini        = ARGV[0]
 epochs     = ARGV[1].to_i
@@ -27,6 +28,8 @@ end
 shards_at_once = ARGV[4].to_i
 input = ARGV[5]
 refs  = ARGV[6]
+use_qsub   = false
+use_qsub = true if ARGV[7]
 
 `mkdir work`
 
@@ -92,12 +95,16 @@ end
   remaining_shards = num_shards
   while remaining_shards > 0
     shards_at_once.times {
+      qsub_str_start = qsub_str_end = ''
+      if use_qsub
+        qsub_str_start = "qsub -cwd -sync y -b y -j y -o work/out.#{shard}.#{epoch} -N dtrain.#{shard}.#{epoch} \""
+        qsub_str_end = "\""
+      end
       pids << Kernel.fork {
-        `#{dtrain_bin} -c #{ini}\
+        `#{qsub_str_start}#{dtrain_bin} -c #{ini}\
           --input #{input_files[shard]}\
           --refs #{refs_files[shard]} #{input_weights}\
-          --output work/weights.#{shard}.#{epoch}\
-          &> work/out.#{shard}.#{epoch}`
+          --output work/weights.#{shard}.#{epoch}#{qsub_str_end}`
       }
       weights_files << "work/weights.#{shard}.#{epoch}"
       shard += 1
@@ -106,10 +113,8 @@ end
     pids.each { |pid| Process.wait(pid) }
     pids.clear
   end
-  cat = File.new('work/weights_cat', 'w+')
-  weights_files.each { |f| cat.write File.new(f, 'r').read }
-  cat.close
-  `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat &> work/weights.#{epoch}`
+  `#{cat} work/weights.*.#{epoch} > work/weights_cat`
+  `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat > work/weights.#{epoch}`
   if rand and epoch+1!=epochs
     input_files, refs_files = make_shards input, refs, num_shards, epoch+1, rand
   end
-- 
cgit v1.2.3


From 7c4a9e0825b15ce6c08c45c7654c614d542cf93a Mon Sep 17 00:00:00 2001
From: Patrick Simianer <simianer@cl.uni-heidelberg.de>
Date: Thu, 24 Jan 2013 16:28:23 +0100
Subject: made examples work again

---
 training/dtrain/parallelize.rb                      |  12 ++++++++----
 training/dtrain/test/example/README                 |   4 ++--
 training/dtrain/test/example/cdec.ini               |   2 +-
 training/dtrain/test/example/dtrain.ini             |   4 ++--
 training/dtrain/test/parallelize/README             |   5 +++++
 training/dtrain/test/parallelize/cdec.ini           |   2 +-
 training/dtrain/test/parallelize/g/grammar.out.0.gz | Bin 0 -> 8318 bytes
 training/dtrain/test/parallelize/g/grammar.out.1.gz | Bin 0 -> 358560 bytes
 training/dtrain/test/parallelize/g/grammar.out.2.gz | Bin 0 -> 1014466 bytes
 training/dtrain/test/parallelize/g/grammar.out.3.gz | Bin 0 -> 391811 bytes
 training/dtrain/test/parallelize/g/grammar.out.4.gz | Bin 0 -> 149590 bytes
 training/dtrain/test/parallelize/g/grammar.out.5.gz | Bin 0 -> 537024 bytes
 training/dtrain/test/parallelize/g/grammar.out.6.gz | Bin 0 -> 291286 bytes
 training/dtrain/test/parallelize/g/grammar.out.7.gz | Bin 0 -> 1038140 bytes
 training/dtrain/test/parallelize/g/grammar.out.8.gz | Bin 0 -> 419889 bytes
 training/dtrain/test/parallelize/g/grammar.out.9.gz | Bin 0 -> 409140 bytes
 16 files changed, 19 insertions(+), 10 deletions(-)
 create mode 100644 training/dtrain/test/parallelize/README
 create mode 100644 training/dtrain/test/parallelize/g/grammar.out.0.gz
 create mode 100644 training/dtrain/test/parallelize/g/grammar.out.1.gz
 create mode 100644 training/dtrain/test/parallelize/g/grammar.out.2.gz
 create mode 100644 training/dtrain/test/parallelize/g/grammar.out.3.gz
 create mode 100644 training/dtrain/test/parallelize/g/grammar.out.4.gz
 create mode 100644 training/dtrain/test/parallelize/g/grammar.out.5.gz
 create mode 100644 training/dtrain/test/parallelize/g/grammar.out.6.gz
 create mode 100644 training/dtrain/test/parallelize/g/grammar.out.7.gz
 create mode 100644 training/dtrain/test/parallelize/g/grammar.out.8.gz
 create mode 100644 training/dtrain/test/parallelize/g/grammar.out.9.gz

diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index 6e30cf9d..9b0923f6 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -7,10 +7,10 @@ if ARGV.size != 7
   exit
 end
 
-cdec_dir   = '~/MAREC/cdec-dtrain/'
-dtrain_bin = "~/MAREC/cdec-dtrain/training/dtrain/dtrain"
+dtrain_dir = File.expand_path File.dirname(__FILE__)
+dtrain_bin = "#{dtrain_dir}/dtrain"
 ruby       = '/usr/bin/ruby'
-lplp_rb    = "#{cdec_dir}/training/dtrain/hstreaming/lplp.rb"
+lplp_rb    = "#{dtrain_dir}/hstreaming/lplp.rb"
 lplp_args  = 'l2 select_k 100000'
 cat        = '/bin/cat'
 
@@ -96,15 +96,19 @@ end
   while remaining_shards > 0
     shards_at_once.times {
       qsub_str_start = qsub_str_end = ''
+      local_end = ''
       if use_qsub
         qsub_str_start = "qsub -cwd -sync y -b y -j y -o work/out.#{shard}.#{epoch} -N dtrain.#{shard}.#{epoch} \""
         qsub_str_end = "\""
+        local_end = '' 
+      else
+        local_end = "&>work/out.#{shard}.#{epoch}"
       end
       pids << Kernel.fork {
         `#{qsub_str_start}#{dtrain_bin} -c #{ini}\
           --input #{input_files[shard]}\
           --refs #{refs_files[shard]} #{input_weights}\
-          --output work/weights.#{shard}.#{epoch}#{qsub_str_end}`
+          --output work/weights.#{shard}.#{epoch}#{qsub_str_end} #{local_end}`
       }
       weights_files << "work/weights.#{shard}.#{epoch}"
       shard += 1
diff --git a/training/dtrain/test/example/README b/training/dtrain/test/example/README
index 6937b11b..2df77086 100644
--- a/training/dtrain/test/example/README
+++ b/training/dtrain/test/example/README
@@ -1,8 +1,8 @@
 Small example of input format for distributed training.
-Call dtrain from cdec/dtrain/ with ./dtrain -c test/example/dtrain.ini .
+Call dtrain from this folder with ../../dtrain -c test/example/dtrain.ini .
 
 For this to work, undef 'DTRAIN_LOCAL' in dtrain.h
 and recompile.
 
-Data is here: http://simianer.de/#dtrain
+data can be found here: http://simianer.de/#dtrain
 
diff --git a/training/dtrain/test/example/cdec.ini b/training/dtrain/test/example/cdec.ini
index d5955f0e..068ebd4d 100644
--- a/training/dtrain/test/example/cdec.ini
+++ b/training/dtrain/test/example/cdec.ini
@@ -4,7 +4,7 @@ scfg_max_span_limit=15
 intersection_strategy=cube_pruning
 cubepruning_pop_limit=30
 feature_function=WordPenalty
-feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz
+feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz
 # all currently working feature functions for translation:
 # (with those features active that were used in the ACL paper)
 #feature_function=ArityPenalty
diff --git a/training/dtrain/test/example/dtrain.ini b/training/dtrain/test/example/dtrain.ini
index 72d50ca1..97fce7f0 100644
--- a/training/dtrain/test/example/dtrain.ini
+++ b/training/dtrain/test/example/dtrain.ini
@@ -1,7 +1,7 @@
-input=test/example/nc-wmt11.1k.gz    # use '-' for STDIN
+input=./nc-wmt11.1k.gz    # use '-' for STDIN
 output=-                             # a weights file (add .gz for gzip compression) or STDOUT '-'
 select_weights=VOID                  # don't output weights
-decoder_config=test/example/cdec.ini # config for cdec
+decoder_config=./cdec.ini # config for cdec
 # weights for these features will be printed on each iteration
 print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
 tmp=/tmp
diff --git a/training/dtrain/test/parallelize/README b/training/dtrain/test/parallelize/README
new file mode 100644
index 00000000..89715105
--- /dev/null
+++ b/training/dtrain/test/parallelize/README
@@ -0,0 +1,5 @@
+run for example
+  ../../parallelize.rb ./dtrain.ini 4 false 2 2 ./in ./refs
+
+final weights will be in the file work/weights.3
+
diff --git a/training/dtrain/test/parallelize/cdec.ini b/training/dtrain/test/parallelize/cdec.ini
index e118374b..e43ba1c4 100644
--- a/training/dtrain/test/parallelize/cdec.ini
+++ b/training/dtrain/test/parallelize/cdec.ini
@@ -4,7 +4,7 @@ intersection_strategy=cube_pruning
 cubepruning_pop_limit=200
 scfg_max_span_limit=15
 feature_function=WordPenalty
-feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz
+feature_function=KLanguageModel ../example/nc-wmt11.en.srilm.gz
 #feature_function=ArityPenalty
 #feature_function=CMR2008ReorderingFeatures
 #feature_function=Dwarf
diff --git a/training/dtrain/test/parallelize/g/grammar.out.0.gz b/training/dtrain/test/parallelize/g/grammar.out.0.gz
new file mode 100644
index 00000000..1e28a24b
Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.0.gz differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.1.gz b/training/dtrain/test/parallelize/g/grammar.out.1.gz
new file mode 100644
index 00000000..372f5675
Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.1.gz differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.2.gz b/training/dtrain/test/parallelize/g/grammar.out.2.gz
new file mode 100644
index 00000000..145d0dc0
Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.2.gz differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.3.gz b/training/dtrain/test/parallelize/g/grammar.out.3.gz
new file mode 100644
index 00000000..105593ff
Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.3.gz differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.4.gz b/training/dtrain/test/parallelize/g/grammar.out.4.gz
new file mode 100644
index 00000000..30781f48
Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.4.gz differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.5.gz b/training/dtrain/test/parallelize/g/grammar.out.5.gz
new file mode 100644
index 00000000..834ee759
Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.5.gz differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.6.gz b/training/dtrain/test/parallelize/g/grammar.out.6.gz
new file mode 100644
index 00000000..2e76f348
Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.6.gz differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.7.gz b/training/dtrain/test/parallelize/g/grammar.out.7.gz
new file mode 100644
index 00000000..3741a887
Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.7.gz differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.8.gz b/training/dtrain/test/parallelize/g/grammar.out.8.gz
new file mode 100644
index 00000000..ebf6bd0c
Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.8.gz differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.9.gz b/training/dtrain/test/parallelize/g/grammar.out.9.gz
new file mode 100644
index 00000000..c1791059
Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.9.gz differ
-- 
cgit v1.2.3


From b89fd90083b22e6d4ab469af001a1f15fbcd7da9 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <simianer@cl.uni-heidelberg.de>
Date: Mon, 11 Feb 2013 17:10:29 +0100
Subject: fixed l1 regularization iteration silliness

---
 training/dtrain/dtrain.cc | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index 18286668..b317c365 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -246,7 +246,7 @@ main(int argc, char** argv)
     cerr << setw(25) << "k " << k << endl;
     cerr << setw(25) << "N " << N << endl;
     cerr << setw(25) << "T " << T << endl;
-    cerr << setw(25) << "scorer '" << scorer_str << "'" << endl;
+    cerr << setw(26) << "scorer '" << scorer_str << "'" << endl;
     if (scorer_str == "approx_bleu")
       cerr << setw(25) << "approx. B discount " << approx_bleu_d << endl;
     cerr << setw(25) << "sample from " << "'" << sample_from << "'" << endl;
@@ -459,35 +459,40 @@ main(int argc, char** argv)
       }
 
       // l1 regularization
+      // please note that this penalizes _all_ weights
+      // (contrary to only the ones changed by the last update)
+      // after a _sentence_ (not after each example/pair)
       if (l1naive) {
-        for (unsigned d = 0; d < lambdas.size(); d++) {
-          weight_t v = lambdas.get(d);
-          lambdas.set_value(d, v - sign(v) * l1_reg);
+        FastSparseVector<weight_t>::iterator it = lambdas.begin();
+        for (; it != lambdas.end(); ++it) {
+          it->second -= sign(it->second) * l1_reg;
         }
       } else if (l1clip) {
-        for (unsigned d = 0; d < lambdas.size(); d++) {
-          if (lambdas.nonzero(d)) {
-            weight_t v = lambdas.get(d);
+        FastSparseVector<weight_t>::iterator it = lambdas.begin();
+        for (; it != lambdas.end(); ++it) {
+          if (it->second != 0) {
+            weight_t v = it->second;
             if (v > 0) {
-              lambdas.set_value(d, max(0., v - l1_reg));
+              it->second = max(0., v - l1_reg);
             } else {
-              lambdas.set_value(d, min(0., v + l1_reg));
+              it->second = min(0., v + l1_reg);
             }
           }
         }
       } else if (l1cumul) {
         weight_t acc_penalty = (ii+1) * l1_reg; // ii is the index of the current input
-        for (unsigned d = 0; d < lambdas.size(); d++) {
-          if (lambdas.nonzero(d)) {
-            weight_t v = lambdas.get(d);
-            weight_t penalty = 0;
+        FastSparseVector<weight_t>::iterator it = lambdas.begin();
+        for (; it != lambdas.end(); ++it) {
+          if (it->second != 0) {
+            weight_t v = it->second;
+            weight_t penalized = 0.;
             if (v > 0) {
-              penalty = max(0., v-(acc_penalty + cumulative_penalties.get(d)));
+              penalized = max(0., v-(acc_penalty + cumulative_penalties.get(it->first)));
             } else {
-              penalty = min(0., v+(acc_penalty - cumulative_penalties.get(d)));
+              penalized = min(0., v+(acc_penalty - cumulative_penalties.get(it->first)));
             }
-            lambdas.set_value(d, penalty);
-            cumulative_penalties.set_value(d, cumulative_penalties.get(d)+penalty);
+            it->second = penalized;
+            cumulative_penalties.set_value(it->first, cumulative_penalties.get(it->first)+penalized);
           }
         }
       }
-- 
cgit v1.2.3


From 81e8a7b851e064df1de6330a52966d8aeb13be25 Mon Sep 17 00:00:00 2001
From: Jeff Flanigan <jeffflanigan@gmail.com>
Date: Thu, 21 Feb 2013 19:51:06 -0500
Subject: Add QCRI_BLEU

---
 mteval/ns.cc             | 25 +++++++++++++++++++------
 python/pkg/cdec/score.py |  2 +-
 python/src/mteval.pxi    |  2 ++
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/mteval/ns.cc b/mteval/ns.cc
index d8214558..b64d4798 100644
--- a/mteval/ns.cc
+++ b/mteval/ns.cc
@@ -61,7 +61,7 @@ string EvaluationMetric::DetailedScore(const SufficientStats& stats) const {
   return os.str();
 }
 
-enum BleuType { IBM, Koehn, NIST };
+enum BleuType { IBM, Koehn, NIST, QCRI };
 template <unsigned int N = 4u, BleuType BrevityType = IBM>
 struct BleuSegmentEvaluator : public SegmentEvaluator {
   BleuSegmentEvaluator(const vector<vector<WordID> >& refs, const EvaluationMetric* em) : evaluation_metric(em) {
@@ -91,7 +91,7 @@ struct BleuSegmentEvaluator : public SegmentEvaluator {
     float& ref_len = out->fields[2*N + 1];
     hyp_len = hyp.size();
     ref_len = lengths_[0];
-    if (lengths_.size() > 1 && BrevityType == IBM) {
+    if (lengths_.size() > 1 && (BrevityType == IBM || BrevityType == QCRI)) {
       float bestd = 2000000;
       float hl = hyp.size();
       float bl = -1;
@@ -186,7 +186,7 @@ struct BleuSegmentEvaluator : public SegmentEvaluator {
 
 template <unsigned int N = 4u, BleuType BrevityType = IBM>
 struct BleuMetric : public EvaluationMetric {
-  BleuMetric() : EvaluationMetric(BrevityType == IBM ? "IBM_BLEU" : (BrevityType == Koehn ? "KOEHN_BLEU" : "NIST_BLEU")) {}
+  BleuMetric() : EvaluationMetric(BrevityType == IBM ? "IBM_BLEU" : (BrevityType == Koehn ? "KOEHN_BLEU" : (BrevityType == NIST ? "NIST_BLEU" : "QCRI_BLEU"))) {}
   unsigned SufficientStatisticsVectorSize() const { return N*2 + 2; }
   boost::shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const vector<vector<WordID> >& refs) const {
     return boost::shared_ptr<SegmentEvaluator>(new BleuSegmentEvaluator<N,BrevityType>(refs, this));
@@ -194,26 +194,37 @@ struct BleuMetric : public EvaluationMetric {
   float ComputeBreakdown(const SufficientStats& stats, float* bp, vector<float>* out) const {
     if (out) { out->clear(); }
     float log_bleu = 0;
+    float log_bleu_adj = 0;  // for QCRI
     int count = 0;
+    float alpha = BrevityType == QCRI ? 1 : 0.01;
     for (int i = 0; i < N; ++i) {
       if (stats.fields[i+N] > 0) {
         float cor_count = stats.fields[i];  // correct_ngram_hit_counts[i];
         // smooth bleu
-        if (!cor_count) { cor_count = 0.01; }
+        if (!cor_count) { cor_count = alpha; }
         float lprec = log(cor_count) - log(stats.fields[i+N]); // log(hyp_ngram_counts[i]);
         if (out) out->push_back(exp(lprec));
         log_bleu += lprec;
+        if (BrevityType == QCRI)
+          log_bleu_adj += log(alpha) - log(stats.fields[i+N] + alpha);
         ++count;
       }
     }
     log_bleu /= count;
+    log_bleu_adj /= count;
     float lbp = 0.0;
     const float& hyp_len = stats.fields[2*N];
     const float& ref_len = stats.fields[2*N + 1];
-    if (hyp_len < ref_len)
-      lbp = (hyp_len - ref_len) / hyp_len;
+    if (hyp_len < ref_len) {
+      if (BrevityType == QCRI)
+        lbp = (hyp_len - ref_len - alpha) / hyp_len;
+      else
+        lbp = (hyp_len - ref_len) / hyp_len;
+    }
     log_bleu += lbp;
     if (bp) *bp = exp(lbp);
+    if (BrevityType == QCRI)
+      return exp(log_bleu) - exp(lbp + log_bleu_adj);
     return exp(log_bleu);
   }
   string DetailedScore(const SufficientStats& stats) const {
@@ -253,6 +264,8 @@ EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) {
       m = new BleuMetric<4, NIST>;
     } else if (metric_id == "KOEHN_BLEU") {
       m = new BleuMetric<4, Koehn>;
+    } else if (metric_id == "QCRI_BLEU") {
+      m = new BleuMetric<4, QCRI>;
     } else if (metric_id == "SSK") {
       m = new SSKMetric;
     } else if (metric_id == "TER") {
diff --git a/python/pkg/cdec/score.py b/python/pkg/cdec/score.py
index 22257774..829dfdfd 100644
--- a/python/pkg/cdec/score.py
+++ b/python/pkg/cdec/score.py
@@ -1 +1 @@
-from _cdec import BLEU, TER, CER, Metric
+from _cdec import BLEU, TER, CER, SSK, QCRI, Metric
diff --git a/python/src/mteval.pxi b/python/src/mteval.pxi
index f3bec393..436a1e01 100644
--- a/python/src/mteval.pxi
+++ b/python/src/mteval.pxi
@@ -192,5 +192,7 @@ cdef class Metric:
         return []
 
 BLEU = Scorer('IBM_BLEU')
+QCRI = Scorer('QCRI_BLEU')
 TER = Scorer('TER')
 CER = Scorer('CER')
+SSK = Scorer('SSK')
-- 
cgit v1.2.3


From 462c37d573512d8b5adf6ccfd7361a495fe0d94e Mon Sep 17 00:00:00 2001
From: Jeff Flanigan <jeffflanigan@gmail.com>
Date: Fri, 22 Feb 2013 13:31:19 -0500
Subject: Add QCRI_BLEU

---
 python/src/_cdec.cpp | 3017 ++++++++++++++++++++++++++------------------------
 1 file changed, 1546 insertions(+), 1471 deletions(-)

diff --git a/python/src/_cdec.cpp b/python/src/_cdec.cpp
index 00cb0641..770b422c 100644
--- a/python/src/_cdec.cpp
+++ b/python/src/_cdec.cpp
@@ -1,4 +1,4 @@
-/* Generated by Cython 0.17.1 on Sun Nov 18 15:08:18 2012 */
+/* Generated by Cython 0.18 on Thu Feb 21 19:39:13 2013 */
 
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
@@ -53,12 +53,15 @@
                                 (PyErr_Format(PyExc_TypeError, \
                                               "expected index value, got %.200s", Py_TYPE(o)->tp_name), \
                                  (PyObject*)0))
-  #define PyIndex_Check(o)     (PyNumber_Check(o) && !PyFloat_Check(o) && !PyComplex_Check(o))
+  #define __Pyx_PyIndex_Check(o) (PyNumber_Check(o) && !PyFloat_Check(o) && \
+                                  !PyComplex_Check(o))
+  #define PyIndex_Check __Pyx_PyIndex_Check
   #define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message)
   #define __PYX_BUILD_PY_SSIZE_T "i"
 #else
   #define __PYX_BUILD_PY_SSIZE_T "n"
   #define CYTHON_FORMAT_SSIZE_T "z"
+  #define __Pyx_PyIndex_Check PyIndex_Check
 #endif
 #if PY_VERSION_HEX < 0x02060000
   #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
@@ -295,8 +298,6 @@
 #define CYTHON_WITHOUT_ASSERTIONS
 #endif
 
-
-/* inline attribute */
 #ifndef CYTHON_INLINE
   #if defined(__GNUC__)
     #define CYTHON_INLINE __inline__
@@ -308,8 +309,6 @@
     #define CYTHON_INLINE
   #endif
 #endif
-
-/* unused attribute */
 #ifndef CYTHON_UNUSED
 # if defined(__GNUC__)
 #   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
@@ -323,24 +322,17 @@
 #   define CYTHON_UNUSED
 # endif
 #endif
-
 typedef struct {PyObject **p; char *s; const long n; const char* encoding; const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; /*proto*/
 
-
-/* Type Conversion Predeclarations */
-
 #define __Pyx_PyBytes_FromUString(s) PyBytes_FromString((char*)s)
 #define __Pyx_PyBytes_AsUString(s)   ((unsigned char*) PyBytes_AsString(s))
-
 #define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None)
 #define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
 static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
 static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x);
-
 static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
 static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
 static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject*);
-
 #if CYTHON_COMPILING_IN_CPYTHON
 #define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
 #else
@@ -348,6 +340,7 @@ static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject*);
 #endif
 #define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
 
+
 #ifdef __GNUC__
   /* Test for GCC > 2.95 */
   #if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
@@ -448,7 +441,7 @@ struct __pyx_opt_args_5_cdec_as_str {
   char *error_msg;
 };
 
-/* "/home/vchahune/tools/cdec/python/src/cdec.sa._sa.pxd":25
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/cdec.sa._sa.pxd":25
  *     cdef void read_handle(self, FILE* f)
  * 
  * cdef class FeatureVector:             # <<<<<<<<<<<<<<
@@ -475,7 +468,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_24___init__ {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":21
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":21
  *         return '[%s]' % self.cat
  * 
  * cdef class NTRef:             # <<<<<<<<<<<<<<
@@ -488,7 +481,7 @@ struct __pyx_obj_5_cdec_NTRef {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":121
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":121
  *         return CandidateSet(self)
  * 
  * cdef class Scorer:             # <<<<<<<<<<<<<<
@@ -502,7 +495,7 @@ struct __pyx_obj_5_cdec_Scorer {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/cdec.sa._sa.pxd":12
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/cdec.sa._sa.pxd":12
  *     cdef void read_handle(self, FILE* f)
  * 
  * cdef class IntList:             # <<<<<<<<<<<<<<
@@ -519,7 +512,7 @@ struct __pyx_obj_4cdec_2sa_3_sa_IntList {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/cdec.sa._sa.pxd":29
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/cdec.sa._sa.pxd":29
  *     cdef FloatList values
  * 
  * cdef class Phrase:             # <<<<<<<<<<<<<<
@@ -536,7 +529,7 @@ struct __pyx_obj_4cdec_2sa_3_sa_Phrase {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":90
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":90
  *         return candidate
  * 
  *     def __iter__(self):             # <<<<<<<<<<<<<<
@@ -552,7 +545,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_22___iter__ {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":193
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":193
  *         super(MRule, self).__init__(lhs, rhs, e, scores, None)
  * 
  * cdef class Grammar:             # <<<<<<<<<<<<<<
@@ -565,7 +558,7 @@ struct __pyx_obj_5_cdec_Grammar {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/lattice.pxi":63
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":63
  *     def todot(self):
  *         """lattice.todot() -> Representation of the lattice in GraphViz dot format."""
  *         def lines():             # <<<<<<<<<<<<<<
@@ -588,7 +581,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_20_lines {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":108
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":108
  *             del hypos
  * 
  *     def sample_trees(self, unsigned n):             # <<<<<<<<<<<<<<
@@ -606,7 +599,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_12_sample_trees {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":161
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":161
  * 
  *     property edges:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -622,7 +615,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_13___get__ {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":5
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":5
  * import cdec.sa._sa as _sa
  * 
  * def _phrase(phrase):             # <<<<<<<<<<<<<<
@@ -635,7 +628,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_2__phrase {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":65
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":65
  *         return result
  * 
  * cdef class CandidateSet:             # <<<<<<<<<<<<<<
@@ -650,7 +643,7 @@ struct __pyx_obj_5_cdec_CandidateSet {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":167
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":167
  * 
  *     property nodes:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -666,7 +659,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_14___get__ {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":49
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":49
  *     return TRule(lhs, f, e, scores, a)
  * 
  * cdef class TRule:             # <<<<<<<<<<<<<<
@@ -679,7 +672,7 @@ struct __pyx_obj_5_cdec_TRule {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/cdec.sa._sa.pxd":35
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/cdec.sa._sa.pxd":35
  *     cdef public int chunklen(self, int k)
  * 
  * cdef class Rule:             # <<<<<<<<<<<<<<
@@ -697,7 +690,7 @@ struct __pyx_obj_4cdec_2sa_3_sa_Rule {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":177
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":177
  *                 _phrase(self.f), _phrase(self.e), scores)
  * 
  * cdef class MRule(TRule):             # <<<<<<<<<<<<<<
@@ -709,7 +702,7 @@ struct __pyx_obj_5_cdec_MRule {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":100
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":100
  *         self.cs.AddKBestCandidates(hypergraph.hg[0], k, self.scorer.get())
  * 
  * cdef class SegmentEvaluator:             # <<<<<<<<<<<<<<
@@ -740,7 +733,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_25_genexpr {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/lattice.pxi":61
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":61
  *             yield self[i]
  * 
  *     def todot(self):             # <<<<<<<<<<<<<<
@@ -753,7 +746,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_19_todot {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":12
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":12
  *         return stats
  * 
  * cdef class Candidate:             # <<<<<<<<<<<<<<
@@ -767,7 +760,7 @@ struct __pyx_obj_5_cdec_Candidate {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":173
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":173
  * 
  *     def __str__(self):
  *         scores = ' '.join('%s=%s' % feat for feat in self.scores)             # <<<<<<<<<<<<<<
@@ -784,7 +777,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_6_genexpr {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":8
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":8
  *     return ' '.join(w.encode('utf8') if isinstance(w, unicode) else str(w) for w in phrase)
  * 
  * cdef class NT:             # <<<<<<<<<<<<<<
@@ -798,7 +791,7 @@ struct __pyx_obj_5_cdec_NT {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/cdec.sa._sa.pxd":3
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/cdec.sa._sa.pxd":3
  * from libc.stdio cimport FILE
  * 
  * cdef class FloatList:             # <<<<<<<<<<<<<<
@@ -815,7 +808,7 @@ struct __pyx_obj_4cdec_2sa_3_sa_FloatList {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":196
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":196
  *         return vector
  * 
  * cdef class HypergraphEdge:             # <<<<<<<<<<<<<<
@@ -831,7 +824,7 @@ struct __pyx_obj_5_cdec_HypergraphEdge {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":72
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":72
  *         self.vector.set_value(fid, value)
  * 
  *     def __iter__(self):             # <<<<<<<<<<<<<<
@@ -848,7 +841,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_1___iter__ {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":256
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":256
  * 
  *     property in_edges:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -864,7 +857,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_16___get__ {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":131
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":131
  * 
  *     property a:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -881,7 +874,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_4___get__ {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/lattice.pxi":56
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":56
  *         return unicode(str(self), 'utf8')
  * 
  *     def __iter__(self):             # <<<<<<<<<<<<<<
@@ -911,7 +904,7 @@ struct __pyx_obj_5_cdec_Decoder {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":246
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":246
  *         raise NotImplemented('comparison not implemented for HypergraphEdge')
  * 
  * cdef class HypergraphNode:             # <<<<<<<<<<<<<<
@@ -926,7 +919,7 @@ struct __pyx_obj_5_cdec_HypergraphNode {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":48
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":48
  *         return sparse
  * 
  * cdef class SparseVector:             # <<<<<<<<<<<<<<
@@ -939,7 +932,7 @@ struct __pyx_obj_5_cdec_SparseVector {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":262
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":262
  * 
  *     property out_edges:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -955,7 +948,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_17___get__ {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":32
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":32
  *         self.vector[0][fid] = value
  * 
  *     def __iter__(self):             # <<<<<<<<<<<<<<
@@ -971,7 +964,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct____iter__ {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":44
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":44
  *         return self.stats.size()
  * 
  *     def __iter__(self):             # <<<<<<<<<<<<<<
@@ -988,7 +981,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_21___iter__ {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":3
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":3
  * from cython.operator cimport preincrement as pinc
  * 
  * cdef class DenseVector:             # <<<<<<<<<<<<<<
@@ -1002,7 +995,7 @@ struct __pyx_obj_5_cdec_DenseVector {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":199
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":199
  *         del self.grammar
  * 
  *     def __iter__(self):             # <<<<<<<<<<<<<<
@@ -1021,7 +1014,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_7___iter__ {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":176
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":176
  *         out.fields[i] = ss[i]
  * 
  * cdef class Metric:             # <<<<<<<<<<<<<<
@@ -1034,7 +1027,7 @@ struct __pyx_obj_5_cdec_Metric {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":26
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":26
  *             return fmap
  * 
  * cdef class SufficientStats:             # <<<<<<<<<<<<<<
@@ -1048,7 +1041,7 @@ struct __pyx_obj_5_cdec_SufficientStats {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":49
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":49
  *         return unicode(hypergraph.JoshuaVisualizationString(self.hg[0]).c_str(), 'utf8')
  * 
  *     def kbest(self, size):             # <<<<<<<<<<<<<<
@@ -1067,7 +1060,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_8_kbest {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":81
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":81
  *             del e_derivations
  * 
  *     def kbest_features(self, size):             # <<<<<<<<<<<<<<
@@ -1087,7 +1080,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_10_kbest_features {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":216
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":216
  * 
  *     property tail_nodes:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -1103,7 +1096,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_15___get__ {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":172
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":172
  *             self.rule.get().lhs_ = -TDConvert((<NT> lhs).cat)
  * 
  *     def __str__(self):             # <<<<<<<<<<<<<<
@@ -1116,7 +1109,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_5___str__ {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":6
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":6
  * 
  * def _phrase(phrase):
  *     return ' '.join(w.encode('utf8') if isinstance(w, unicode) else str(w) for w in phrase)             # <<<<<<<<<<<<<<
@@ -1133,7 +1126,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_3_genexpr {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":62
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":62
  *             del derivations
  * 
  *     def kbest_trees(self, size):             # <<<<<<<<<<<<<<
@@ -1156,7 +1149,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_9_kbest_trees {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":4
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":4
  * cimport kbest
  * 
  * cdef class Hypergraph:             # <<<<<<<<<<<<<<
@@ -1171,7 +1164,7 @@ struct __pyx_obj_5_cdec_Hypergraph {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/lattice.pxi":3
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":3
  * cimport lattice
  * 
  * cdef class Lattice:             # <<<<<<<<<<<<<<
@@ -1184,7 +1177,7 @@ struct __pyx_obj_5_cdec_Lattice {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":97
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":97
  *             del derivations
  * 
  *     def sample(self, unsigned n):             # <<<<<<<<<<<<<<
@@ -1225,7 +1218,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_23__make_config {
 };
 
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":217
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":217
  *             self.grammar.get().SetGrammarName(name)
  * 
  * cdef class TextGrammar(Grammar):             # <<<<<<<<<<<<<<
@@ -1238,7 +1231,7 @@ struct __pyx_obj_5_cdec_TextGrammar {
 
 
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":4
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":4
  * cimport kbest
  * 
  * cdef class Hypergraph:             # <<<<<<<<<<<<<<
@@ -1252,7 +1245,7 @@ struct __pyx_vtabstruct_5_cdec_Hypergraph {
 static struct __pyx_vtabstruct_5_cdec_Hypergraph *__pyx_vtabptr_5_cdec_Hypergraph;
 
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":196
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":196
  *         return vector
  * 
  * cdef class HypergraphEdge:             # <<<<<<<<<<<<<<
@@ -1266,7 +1259,7 @@ struct __pyx_vtabstruct_5_cdec_HypergraphEdge {
 static struct __pyx_vtabstruct_5_cdec_HypergraphEdge *__pyx_vtabptr_5_cdec_HypergraphEdge;
 
 
-/* "/home/vchahune/tools/cdec/python/src/cdec.sa._sa.pxd":12
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/cdec.sa._sa.pxd":12
  *     cdef void read_handle(self, FILE* f)
  * 
  * cdef class IntList:             # <<<<<<<<<<<<<<
@@ -1286,7 +1279,7 @@ struct __pyx_vtabstruct_4cdec_2sa_3_sa_IntList {
 static struct __pyx_vtabstruct_4cdec_2sa_3_sa_IntList *__pyx_vtabptr_4cdec_2sa_3_sa_IntList;
 
 
-/* "/home/vchahune/tools/cdec/python/src/cdec.sa._sa.pxd":3
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/cdec.sa._sa.pxd":3
  * from libc.stdio cimport FILE
  * 
  * cdef class FloatList:             # <<<<<<<<<<<<<<
@@ -1302,7 +1295,7 @@ struct __pyx_vtabstruct_4cdec_2sa_3_sa_FloatList {
 static struct __pyx_vtabstruct_4cdec_2sa_3_sa_FloatList *__pyx_vtabptr_4cdec_2sa_3_sa_FloatList;
 
 
-/* "/home/vchahune/tools/cdec/python/src/cdec.sa._sa.pxd":29
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/cdec.sa._sa.pxd":29
  *     cdef FloatList values
  * 
  * cdef class Phrase:             # <<<<<<<<<<<<<<
@@ -1317,7 +1310,7 @@ struct __pyx_vtabstruct_4cdec_2sa_3_sa_Phrase {
 static struct __pyx_vtabstruct_4cdec_2sa_3_sa_Phrase *__pyx_vtabptr_4cdec_2sa_3_sa_Phrase;
 
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":246
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":246
  *         raise NotImplemented('comparison not implemented for HypergraphEdge')
  * 
  * cdef class HypergraphNode:             # <<<<<<<<<<<<<<
@@ -1530,6 +1523,35 @@ static CYTHON_INLINE int __Pyx_PyDict_Contains(PyObject* item, PyObject* dict, i
 #define __Pyx_PyIter_Next(obj) __Pyx_PyIter_Next2(obj, NULL)
 static CYTHON_INLINE PyObject *__Pyx_PyIter_Next2(PyObject *, PyObject *); /*proto*/
 
+static PyObject* __Pyx_PyDict_GetItemDefault(PyObject* d, PyObject* key, PyObject* default_value) {
+    PyObject* value;
+#if PY_MAJOR_VERSION >= 3
+    value = PyDict_GetItemWithError(d, key);
+    if (unlikely(!value)) {
+        if (unlikely(PyErr_Occurred()))
+            return NULL;
+        value = default_value;
+    }
+    Py_INCREF(value);
+#else
+    if (PyString_CheckExact(key) || PyUnicode_CheckExact(key) || PyInt_CheckExact(key)) {
+        value = PyDict_GetItem(d, key);
+        if (unlikely(!value)) {
+            value = default_value;
+        }
+        Py_INCREF(value);
+    } else {
+        PyObject *m;
+        m = __Pyx_GetAttrString(d, "get");
+        if (!m) return NULL;
+        value = PyObject_CallFunctionObjArgs(m, key,
+                                             (default_value == Py_None) ? NULL : default_value, NULL);
+        Py_DECREF(m);
+    }
+#endif
+    return value;
+}
+
 static double __Pyx__PyObject_AsDouble(PyObject* obj); /* proto */
 #if CYTHON_COMPILING_IN_PYPY
 #define __Pyx_PyObject_AsDouble(obj) \
@@ -1547,12 +1569,12 @@ static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb);
 static CYTHON_INLINE void __Pyx_ExceptionSave(PyObject **type, PyObject **value, PyObject **tb); /*proto*/
 static void __Pyx_ExceptionReset(PyObject *type, PyObject *value, PyObject *tb); /*proto*/
 
-static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, long level); /*proto*/
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); /*proto*/
 
 static PyObject *__Pyx_FindPy2Metaclass(PyObject *bases); /*proto*/
 
 static PyObject *__Pyx_CreateClass(PyObject *bases, PyObject *dict, PyObject *name,
-                                   PyObject *modname); /*proto*/
+                                   PyObject *qualname, PyObject *modname); /*proto*/
 
 #ifndef __Pyx_CppExn2PyErr
 #include <new>
@@ -1616,6 +1638,7 @@ typedef struct {
     PyObject *func_dict;
     PyObject *func_weakreflist;
     PyObject *func_name;
+    PyObject *func_qualname;
     PyObject *func_doc;
     PyObject *func_code;
     PyObject *func_closure;
@@ -1626,10 +1649,10 @@ typedef struct {
     PyObject *(*defaults_getter)(PyObject *);
 } __pyx_CyFunctionObject;
 static PyTypeObject *__pyx_CyFunctionType = 0;
-#define __Pyx_CyFunction_NewEx(ml, flags, self, module, code) \
-    __Pyx_CyFunction_New(__pyx_CyFunctionType, ml, flags, self, module, code)
-static PyObject *__Pyx_CyFunction_New(PyTypeObject *,
-                                      PyMethodDef *ml, int flags,
+#define __Pyx_CyFunction_NewEx(ml, flags, qualname, self, module, code) \
+    __Pyx_CyFunction_New(__pyx_CyFunctionType, ml, flags, qualname, self, module, code)
+static PyObject *__Pyx_CyFunction_New(PyTypeObject *, PyMethodDef *ml,
+                                      int flags, PyObject* qualname,
                                       PyObject *self, PyObject *module,
                                       PyObject* code);
 static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *m,
@@ -2026,20 +2049,21 @@ static char __pyx_k_26[] = "digraph lattice {";
   static char __pyx_k_31[] = "\\\"";
   static char __pyx_k_33[] = "%d [shape=doublecircle]";
 static char __pyx_k_34[] = "}";
-static char __pyx_k_37[] = "/home/vchahune/tools/cdec/python/src/lattice.pxi";
-static char __pyx_k_38[] = "\n";
-static char __pyx_k_40[] = "sufficient stats vector index out of range";
-static char __pyx_k_42[] = "candidate set index out of range";
-static char __pyx_k_44[] = "%s %s";
-static char __pyx_k_45[] = "%s = %s";
+static char __pyx_k_37[] = "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi";
+static char __pyx_k_38[] = "Lattice.todot.<locals>.lines";
+static char __pyx_k_39[] = "\n";
+static char __pyx_k_41[] = "sufficient stats vector index out of range";
+static char __pyx_k_43[] = "candidate set index out of range";
+static char __pyx_k_45[] = "%s %s";
+static char __pyx_k_46[] = "%s = %s";
 static char __pyx_k_47[] = "formalism \"%s\" unknown";
 static char __pyx_k_48[] = "cannot initialize weights with %s";
 static char __pyx_k_49[] = "#";
 static char __pyx_k_52[] = "Cannot translate input type %s";
 static char __pyx_k_53[] = "cdec.sa._sa";
 static char __pyx_k_54[] = "*";
-static char __pyx_k_57[] = "/home/vchahune/tools/cdec/python/src/grammar.pxi";
-static char __pyx_k_63[] = "/home/vchahune/tools/cdec/python/src/_cdec.pyx";
+static char __pyx_k_57[] = "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi";
+static char __pyx_k_65[] = "/home/jmflanig/tools/cdec-jflanig/python/src/_cdec.pyx";
 static char __pyx_k__a[] = "a";
 static char __pyx_k__e[] = "e";
 static char __pyx_k__f[] = "f";
@@ -2048,12 +2072,12 @@ static char __pyx_k__k[] = "k";
 static char __pyx_k__pb[] = "pb";
 static char __pyx_k__yn[] = "yn";
 static char __pyx_k__CER[] = "CER";
+static char __pyx_k__SSK[] = "SSK";
 static char __pyx_k__TER[] = "TER";
 static char __pyx_k___sa[] = "_sa";
 static char __pyx_k__cat[] = "cat";
 static char __pyx_k__dot[] = "dot";
 static char __pyx_k__fst[] = "fst";
-static char __pyx_k__get[] = "get";
 static char __pyx_k__hyp[] = "hyp";
 static char __pyx_k__inp[] = "inp";
 static char __pyx_k__key[] = "key";
@@ -2062,6 +2086,7 @@ static char __pyx_k__plf[] = "plf";
 static char __pyx_k__ref[] = "ref";
 static char __pyx_k__rhs[] = "rhs";
 static char __pyx_k__BLEU[] = "BLEU";
+static char __pyx_k__QCRI[] = "QCRI";
 static char __pyx_k__eval[] = "eval";
 static char __pyx_k__info[] = "info";
 static char __pyx_k__join[] = "join";
@@ -2111,6 +2136,7 @@ static char __pyx_k__lexalign[] = "lexalign";
 static char __pyx_k__lextrans[] = "lextrans";
 static char __pyx_k__sentence[] = "sentence";
 static char __pyx_k__Exception[] = "Exception";
+static char __pyx_k__QCRI_BLEU[] = "QCRI_BLEU";
 static char __pyx_k__TypeError[] = "TypeError";
 static char __pyx_k____class__[] = "__class__";
 static char __pyx_k____enter__[] = "__enter__";
@@ -2151,12 +2177,13 @@ static PyObject *__pyx_kp_s_31;
 static PyObject *__pyx_kp_s_33;
 static PyObject *__pyx_kp_s_34;
 static PyObject *__pyx_kp_s_37;
-static PyObject *__pyx_kp_s_38;
+static PyObject *__pyx_n_s_38;
+static PyObject *__pyx_kp_s_39;
 static PyObject *__pyx_kp_s_4;
-static PyObject *__pyx_kp_s_40;
-static PyObject *__pyx_kp_s_42;
-static PyObject *__pyx_kp_s_44;
+static PyObject *__pyx_kp_s_41;
+static PyObject *__pyx_kp_s_43;
 static PyObject *__pyx_kp_s_45;
+static PyObject *__pyx_kp_s_46;
 static PyObject *__pyx_kp_s_47;
 static PyObject *__pyx_kp_s_48;
 static PyObject *__pyx_kp_s_49;
@@ -2164,7 +2191,7 @@ static PyObject *__pyx_kp_s_52;
 static PyObject *__pyx_n_s_53;
 static PyObject *__pyx_n_s_54;
 static PyObject *__pyx_kp_s_57;
-static PyObject *__pyx_kp_s_63;
+static PyObject *__pyx_kp_s_65;
 static PyObject *__pyx_kp_s_7;
 static PyObject *__pyx_kp_s_8;
 static PyObject *__pyx_kp_s_9;
@@ -2177,6 +2204,9 @@ static PyObject *__pyx_n_s__InvalidConfig;
 static PyObject *__pyx_n_s__KeyError;
 static PyObject *__pyx_n_s__NotImplemented;
 static PyObject *__pyx_n_s__ParseFailed;
+static PyObject *__pyx_n_s__QCRI;
+static PyObject *__pyx_n_s__QCRI_BLEU;
+static PyObject *__pyx_n_s__SSK;
 static PyObject *__pyx_n_s__TER;
 static PyObject *__pyx_n_s__TypeError;
 static PyObject *__pyx_n_s__ValueError;
@@ -2214,7 +2244,6 @@ static PyObject *__pyx_n_s__formalism;
 static PyObject *__pyx_n_s__format;
 static PyObject *__pyx_n_s__fst;
 static PyObject *__pyx_n_s__genexpr;
-static PyObject *__pyx_n_s__get;
 static PyObject *__pyx_n_s__grammar;
 static PyObject *__pyx_n_s__hyp;
 static PyObject *__pyx_n_s__hypergraph;
@@ -2270,10 +2299,9 @@ static PyObject *__pyx_k_tuple_24;
 static PyObject *__pyx_k_tuple_25;
 static PyObject *__pyx_k_tuple_32;
 static PyObject *__pyx_k_tuple_35;
-static PyObject *__pyx_k_tuple_39;
-static PyObject *__pyx_k_tuple_41;
-static PyObject *__pyx_k_tuple_43;
-static PyObject *__pyx_k_tuple_46;
+static PyObject *__pyx_k_tuple_40;
+static PyObject *__pyx_k_tuple_42;
+static PyObject *__pyx_k_tuple_44;
 static PyObject *__pyx_k_tuple_50;
 static PyObject *__pyx_k_tuple_51;
 static PyObject *__pyx_k_tuple_55;
@@ -2281,11 +2309,13 @@ static PyObject *__pyx_k_tuple_58;
 static PyObject *__pyx_k_tuple_59;
 static PyObject *__pyx_k_tuple_60;
 static PyObject *__pyx_k_tuple_61;
-static PyObject *__pyx_k_tuple_64;
+static PyObject *__pyx_k_tuple_62;
+static PyObject *__pyx_k_tuple_63;
+static PyObject *__pyx_k_tuple_66;
 static PyObject *__pyx_k_codeobj_36;
 static PyObject *__pyx_k_codeobj_56;
-static PyObject *__pyx_k_codeobj_62;
-static PyObject *__pyx_k_codeobj_65;
+static PyObject *__pyx_k_codeobj_64;
+static PyObject *__pyx_k_codeobj_67;
 
 /* "_cdec.pyx":6
  * cimport decoder
@@ -2300,8 +2330,8 @@ static PyObject *__pyx_f_5_cdec_as_str(PyObject *__pyx_v_data, struct __pyx_opt_
   PyObject *__pyx_v_ret = 0;
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
-  PyObject *__pyx_t_1 = NULL;
-  int __pyx_t_2;
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
   PyObject *__pyx_t_3 = NULL;
   PyObject *__pyx_t_4 = NULL;
   int __pyx_lineno = 0;
@@ -2321,11 +2351,8 @@ static PyObject *__pyx_f_5_cdec_as_str(PyObject *__pyx_v_data, struct __pyx_opt_
  *         ret = data.encode('utf8')
  *     elif isinstance(data, str):
  */
-  __pyx_t_1 = ((PyObject *)((PyObject*)(&PyUnicode_Type)));
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_data, __pyx_t_1); 
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = PyUnicode_Check(__pyx_v_data); 
+  if (__pyx_t_1) {
 
     /* "_cdec.pyx":9
  *     cdef bytes ret
@@ -2334,11 +2361,11 @@ static PyObject *__pyx_f_5_cdec_as_str(PyObject *__pyx_v_data, struct __pyx_opt_
  *     elif isinstance(data, str):
  *         ret = data
  */
-    __pyx_t_1 = PyObject_GetAttr(__pyx_v_data, __pyx_n_s__encode); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
-    __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = PyObject_GetAttr(__pyx_v_data, __pyx_n_s__encode); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_t_3 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
-    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
     if (!(likely(PyBytes_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected bytes, got %.200s", Py_TYPE(__pyx_t_3)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __pyx_v_ret = ((PyObject*)__pyx_t_3);
     __pyx_t_3 = 0;
@@ -2352,11 +2379,8 @@ static PyObject *__pyx_f_5_cdec_as_str(PyObject *__pyx_v_data, struct __pyx_opt_
  *         ret = data
  *     else:
  */
-  __pyx_t_3 = ((PyObject *)((PyObject*)(&PyString_Type)));
-  __Pyx_INCREF(__pyx_t_3);
-  __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_data, __pyx_t_3); 
-  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = PyString_Check(__pyx_v_data); 
+  if (__pyx_t_1) {
 
     /* "_cdec.pyx":11
  *         ret = data.encode('utf8')
@@ -2381,17 +2405,17 @@ static PyObject *__pyx_f_5_cdec_as_str(PyObject *__pyx_v_data, struct __pyx_opt_
  */
     __pyx_t_3 = PyBytes_FromString(__pyx_v_error_msg); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(((PyObject *)__pyx_t_3));
-    __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_t_3), __pyx_n_s__format); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
+    __pyx_t_2 = PyObject_GetAttr(((PyObject *)__pyx_t_3), __pyx_n_s__format); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
     __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
     __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
     __Pyx_INCREF(((PyObject *)Py_TYPE(__pyx_v_data)));
     PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)Py_TYPE(__pyx_v_data)));
     __Pyx_GIVEREF(((PyObject *)Py_TYPE(__pyx_v_data)));
-    __pyx_t_4 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_4 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_4);
-    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
     __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
     __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
@@ -2422,7 +2446,7 @@ static PyObject *__pyx_f_5_cdec_as_str(PyObject *__pyx_v_data, struct __pyx_opt_
   __pyx_r = ((PyObject*)Py_None); __Pyx_INCREF(Py_None);
   goto __pyx_L0;
   __pyx_L1_error:;
-  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
   __Pyx_XDECREF(__pyx_t_3);
   __Pyx_XDECREF(__pyx_t_4);
   __Pyx_AddTraceback("_cdec.as_str", __pyx_clineno, __pyx_lineno, __pyx_filename);
@@ -2452,7 +2476,7 @@ static int __pyx_pw_5_cdec_11DenseVector_1__init__(PyObject *__pyx_v_self, PyObj
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":7
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":7
  *     cdef bint owned # if True, do not manage memory
  * 
  *     def __init__(self):             # <<<<<<<<<<<<<<
@@ -2469,17 +2493,22 @@ static int __pyx_pf_5_cdec_11DenseVector___init__(struct __pyx_obj_5_cdec_DenseV
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__init__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":9
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":9
  *     def __init__(self):
  *         """DenseVector() -> Dense weight/feature vector."""
  *         self.vector = new vector[weight_t]()             # <<<<<<<<<<<<<<
  *         self.owned = False
  * 
  */
-  try {__pyx_t_1 = new std::vector<weight_t>();} catch(...) {__Pyx_CppExn2PyErr(); {__pyx_filename = __pyx_f[1]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}}
+  try {
+    __pyx_t_1 = new std::vector<weight_t>();
+  } catch(...) {
+    __Pyx_CppExn2PyErr();
+    {__pyx_filename = __pyx_f[1]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  }
   __pyx_v_self->vector = __pyx_t_1;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":10
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":10
  *         """DenseVector() -> Dense weight/feature vector."""
  *         self.vector = new vector[weight_t]()
  *         self.owned = False             # <<<<<<<<<<<<<<
@@ -2507,7 +2536,7 @@ static void __pyx_pw_5_cdec_11DenseVector_3__dealloc__(PyObject *__pyx_v_self) {
   __Pyx_RefNannyFinishContext();
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":12
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":12
  *         self.owned = False
  * 
  *     def __dealloc__(self):             # <<<<<<<<<<<<<<
@@ -2520,7 +2549,7 @@ static void __pyx_pf_5_cdec_11DenseVector_2__dealloc__(struct __pyx_obj_5_cdec_D
   int __pyx_t_1;
   __Pyx_RefNannySetupContext("__dealloc__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":13
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":13
  * 
  *     def __dealloc__(self):
  *         if not self.owned:             # <<<<<<<<<<<<<<
@@ -2530,7 +2559,7 @@ static void __pyx_pf_5_cdec_11DenseVector_2__dealloc__(struct __pyx_obj_5_cdec_D
   __pyx_t_1 = (!__pyx_v_self->owned);
   if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":14
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":14
  *     def __dealloc__(self):
  *         if not self.owned:
  *             del self.vector             # <<<<<<<<<<<<<<
@@ -2556,7 +2585,7 @@ static Py_ssize_t __pyx_pw_5_cdec_11DenseVector_5__len__(PyObject *__pyx_v_self)
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":16
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":16
  *             del self.vector
  * 
  *     def __len__(self):             # <<<<<<<<<<<<<<
@@ -2569,7 +2598,7 @@ static Py_ssize_t __pyx_pf_5_cdec_11DenseVector_4__len__(struct __pyx_obj_5_cdec
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__len__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":17
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":17
  * 
  *     def __len__(self):
  *         return self.vector.size()             # <<<<<<<<<<<<<<
@@ -2606,7 +2635,7 @@ static PyObject *__pyx_pw_5_cdec_11DenseVector_7__getitem__(PyObject *__pyx_v_se
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":19
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":19
  *         return self.vector.size()
  * 
  *     def __getitem__(self, char* fname):             # <<<<<<<<<<<<<<
@@ -2626,7 +2655,7 @@ static PyObject *__pyx_pf_5_cdec_11DenseVector_6__getitem__(struct __pyx_obj_5_c
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__getitem__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":20
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":20
  * 
  *     def __getitem__(self, char* fname):
  *         cdef int fid = FDConvert(fname)             # <<<<<<<<<<<<<<
@@ -2635,7 +2664,7 @@ static PyObject *__pyx_pf_5_cdec_11DenseVector_6__getitem__(struct __pyx_obj_5_c
  */
   __pyx_v_fid = FD::Convert(__pyx_v_fname);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":21
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":21
  *     def __getitem__(self, char* fname):
  *         cdef int fid = FDConvert(fname)
  *         if 0 <= fid < self.vector.size():             # <<<<<<<<<<<<<<
@@ -2648,7 +2677,7 @@ static PyObject *__pyx_pf_5_cdec_11DenseVector_6__getitem__(struct __pyx_obj_5_c
   }
   if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":22
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":22
  *         cdef int fid = FDConvert(fname)
  *         if 0 <= fid < self.vector.size():
  *             return self.vector[0][fid]             # <<<<<<<<<<<<<<
@@ -2665,7 +2694,7 @@ static PyObject *__pyx_pf_5_cdec_11DenseVector_6__getitem__(struct __pyx_obj_5_c
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":23
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":23
  *         if 0 <= fid < self.vector.size():
  *             return self.vector[0][fid]
  *         raise KeyError(fname)             # <<<<<<<<<<<<<<
@@ -2724,7 +2753,7 @@ static int __pyx_pw_5_cdec_11DenseVector_9__setitem__(PyObject *__pyx_v_self, Py
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":25
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":25
  *         raise KeyError(fname)
  * 
  *     def __setitem__(self, char* fname, float value):             # <<<<<<<<<<<<<<
@@ -2744,7 +2773,7 @@ static int __pyx_pf_5_cdec_11DenseVector_8__setitem__(struct __pyx_obj_5_cdec_De
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__setitem__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":26
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":26
  * 
  *     def __setitem__(self, char* fname, float value):
  *         cdef int fid = FDConvert(fname)             # <<<<<<<<<<<<<<
@@ -2753,7 +2782,7 @@ static int __pyx_pf_5_cdec_11DenseVector_8__setitem__(struct __pyx_obj_5_cdec_De
  */
   __pyx_v_fid = FD::Convert(__pyx_v_fname);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":27
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":27
  *     def __setitem__(self, char* fname, float value):
  *         cdef int fid = FDConvert(fname)
  *         if fid < 0: raise KeyError(fname)             # <<<<<<<<<<<<<<
@@ -2779,7 +2808,7 @@ static int __pyx_pf_5_cdec_11DenseVector_8__setitem__(struct __pyx_obj_5_cdec_De
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":28
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":28
  *         cdef int fid = FDConvert(fname)
  *         if fid < 0: raise KeyError(fname)
  *         if self.vector.size() <= fid:             # <<<<<<<<<<<<<<
@@ -2789,7 +2818,7 @@ static int __pyx_pf_5_cdec_11DenseVector_8__setitem__(struct __pyx_obj_5_cdec_De
   __pyx_t_1 = (__pyx_v_self->vector->size() <= __pyx_v_fid);
   if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":29
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":29
  *         if fid < 0: raise KeyError(fname)
  *         if self.vector.size() <= fid:
  *             self.vector.resize(fid + 1)             # <<<<<<<<<<<<<<
@@ -2801,7 +2830,7 @@ static int __pyx_pf_5_cdec_11DenseVector_8__setitem__(struct __pyx_obj_5_cdec_De
   }
   __pyx_L4:;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":30
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":30
  *         if self.vector.size() <= fid:
  *             self.vector.resize(fid + 1)
  *         self.vector[0][fid] = value             # <<<<<<<<<<<<<<
@@ -2834,7 +2863,7 @@ static PyObject *__pyx_pw_5_cdec_11DenseVector_11__iter__(PyObject *__pyx_v_self
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":32
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":32
  *         self.vector[0][fid] = value
  * 
  *     def __iter__(self):             # <<<<<<<<<<<<<<
@@ -2899,7 +2928,7 @@ static PyObject *__pyx_gb_5_cdec_11DenseVector_12generator(__pyx_GeneratorObject
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":34
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":34
  *     def __iter__(self):
  *         cdef unsigned fid
  *         for fid in range(1, self.vector.size()):             # <<<<<<<<<<<<<<
@@ -2910,7 +2939,7 @@ static PyObject *__pyx_gb_5_cdec_11DenseVector_12generator(__pyx_GeneratorObject
   for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
     __pyx_cur_scope->__pyx_v_fid = __pyx_t_2;
 
-    /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":35
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":35
  *         cdef unsigned fid
  *         for fid in range(1, self.vector.size()):
  *             yield str(FDConvert(fid).c_str()), self.vector[0][fid]             # <<<<<<<<<<<<<<
@@ -2983,7 +3012,7 @@ static PyObject *__pyx_pw_5_cdec_11DenseVector_14dot(PyObject *__pyx_v_self, PyO
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":37
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":37
  *             yield str(FDConvert(fid).c_str()), self.vector[0][fid]
  * 
  *     def dot(self, SparseVector other):             # <<<<<<<<<<<<<<
@@ -3002,7 +3031,7 @@ static PyObject *__pyx_pf_5_cdec_11DenseVector_13dot(struct __pyx_obj_5_cdec_Den
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("dot", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":39
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":39
  *     def dot(self, SparseVector other):
  *         """vector.dot(SparseVector other) -> Dot product of the two vectors."""
  *         return other.dot(self)             # <<<<<<<<<<<<<<
@@ -3051,7 +3080,7 @@ static PyObject *__pyx_pw_5_cdec_11DenseVector_16tosparse(PyObject *__pyx_v_self
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":41
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":41
  *         return other.dot(self)
  * 
  *     def tosparse(self):             # <<<<<<<<<<<<<<
@@ -3069,7 +3098,7 @@ static PyObject *__pyx_pf_5_cdec_11DenseVector_15tosparse(struct __pyx_obj_5_cde
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("tosparse", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":43
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":43
  *     def tosparse(self):
  *         """vector.tosparse() -> Equivalent SparseVector."""
  *         cdef SparseVector sparse = SparseVector.__new__(SparseVector)             # <<<<<<<<<<<<<<
@@ -3082,7 +3111,7 @@ static PyObject *__pyx_pf_5_cdec_11DenseVector_15tosparse(struct __pyx_obj_5_cde
   __pyx_v_sparse = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":44
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":44
  *         """vector.tosparse() -> Equivalent SparseVector."""
  *         cdef SparseVector sparse = SparseVector.__new__(SparseVector)
  *         sparse.vector = new FastSparseVector[weight_t]()             # <<<<<<<<<<<<<<
@@ -3091,7 +3120,7 @@ static PyObject *__pyx_pf_5_cdec_11DenseVector_15tosparse(struct __pyx_obj_5_cde
  */
   __pyx_v_sparse->vector = new FastSparseVector<weight_t>();
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":45
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":45
  *         cdef SparseVector sparse = SparseVector.__new__(SparseVector)
  *         sparse.vector = new FastSparseVector[weight_t]()
  *         InitSparseVector(self.vector[0], sparse.vector)             # <<<<<<<<<<<<<<
@@ -3100,7 +3129,7 @@ static PyObject *__pyx_pf_5_cdec_11DenseVector_15tosparse(struct __pyx_obj_5_cde
  */
   Weights::InitSparseVector((__pyx_v_self->vector[0]), __pyx_v_sparse->vector);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":46
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":46
  *         sparse.vector = new FastSparseVector[weight_t]()
  *         InitSparseVector(self.vector[0], sparse.vector)
  *         return sparse             # <<<<<<<<<<<<<<
@@ -3143,7 +3172,7 @@ static int __pyx_pw_5_cdec_12SparseVector_1__init__(PyObject *__pyx_v_self, PyOb
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":51
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":51
  *     cdef FastSparseVector[weight_t]* vector
  * 
  *     def __init__(self):             # <<<<<<<<<<<<<<
@@ -3156,7 +3185,7 @@ static int __pyx_pf_5_cdec_12SparseVector___init__(struct __pyx_obj_5_cdec_Spars
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__init__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":53
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":53
  *     def __init__(self):
  *         """SparseVector() -> Sparse feature/weight vector."""
  *         self.vector = new FastSparseVector[weight_t]()             # <<<<<<<<<<<<<<
@@ -3179,7 +3208,7 @@ static void __pyx_pw_5_cdec_12SparseVector_3__dealloc__(PyObject *__pyx_v_self)
   __Pyx_RefNannyFinishContext();
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":55
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":55
  *         self.vector = new FastSparseVector[weight_t]()
  * 
  *     def __dealloc__(self):             # <<<<<<<<<<<<<<
@@ -3191,7 +3220,7 @@ static void __pyx_pf_5_cdec_12SparseVector_2__dealloc__(CYTHON_UNUSED struct __p
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__dealloc__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":56
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":56
  * 
  *     def __dealloc__(self):
  *         del self.vector             # <<<<<<<<<<<<<<
@@ -3215,7 +3244,7 @@ static PyObject *__pyx_pw_5_cdec_12SparseVector_5copy(PyObject *__pyx_v_self, CY
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":58
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":58
  *         del self.vector
  * 
  *     def copy(self):             # <<<<<<<<<<<<<<
@@ -3232,7 +3261,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_4copy(struct __pyx_obj_5_cdec_Sp
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("copy", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":60
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":60
  *     def copy(self):
  *         """vector.copy() -> SparseVector copy."""
  *         return self * 1             # <<<<<<<<<<<<<<
@@ -3279,7 +3308,7 @@ static PyObject *__pyx_pw_5_cdec_12SparseVector_7__getitem__(PyObject *__pyx_v_s
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":62
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":62
  *         return self * 1
  * 
  *     def __getitem__(self, char* fname):             # <<<<<<<<<<<<<<
@@ -3299,7 +3328,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_6__getitem__(struct __pyx_obj_5_
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__getitem__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":63
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":63
  * 
  *     def __getitem__(self, char* fname):
  *         cdef int fid = FDConvert(fname)             # <<<<<<<<<<<<<<
@@ -3308,7 +3337,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_6__getitem__(struct __pyx_obj_5_
  */
   __pyx_v_fid = FD::Convert(__pyx_v_fname);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":64
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":64
  *     def __getitem__(self, char* fname):
  *         cdef int fid = FDConvert(fname)
  *         if fid < 0: raise KeyError(fname)             # <<<<<<<<<<<<<<
@@ -3334,7 +3363,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_6__getitem__(struct __pyx_obj_5_
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":65
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":65
  *         cdef int fid = FDConvert(fname)
  *         if fid < 0: raise KeyError(fname)
  *         return self.vector.value(fid)             # <<<<<<<<<<<<<<
@@ -3386,7 +3415,7 @@ static int __pyx_pw_5_cdec_12SparseVector_9__setitem__(PyObject *__pyx_v_self, P
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":67
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":67
  *         return self.vector.value(fid)
  * 
  *     def __setitem__(self, char* fname, float value):             # <<<<<<<<<<<<<<
@@ -3406,7 +3435,7 @@ static int __pyx_pf_5_cdec_12SparseVector_8__setitem__(struct __pyx_obj_5_cdec_S
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__setitem__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":68
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":68
  * 
  *     def __setitem__(self, char* fname, float value):
  *         cdef int fid = FDConvert(fname)             # <<<<<<<<<<<<<<
@@ -3415,7 +3444,7 @@ static int __pyx_pf_5_cdec_12SparseVector_8__setitem__(struct __pyx_obj_5_cdec_S
  */
   __pyx_v_fid = FD::Convert(__pyx_v_fname);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":69
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":69
  *     def __setitem__(self, char* fname, float value):
  *         cdef int fid = FDConvert(fname)
  *         if fid < 0: raise KeyError(fname)             # <<<<<<<<<<<<<<
@@ -3441,7 +3470,7 @@ static int __pyx_pf_5_cdec_12SparseVector_8__setitem__(struct __pyx_obj_5_cdec_S
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":70
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":70
  *         cdef int fid = FDConvert(fname)
  *         if fid < 0: raise KeyError(fname)
  *         self.vector.set_value(fid, value)             # <<<<<<<<<<<<<<
@@ -3474,7 +3503,7 @@ static PyObject *__pyx_pw_5_cdec_12SparseVector_11__iter__(PyObject *__pyx_v_sel
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":72
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":72
  *         self.vector.set_value(fid, value)
  * 
  *     def __iter__(self):             # <<<<<<<<<<<<<<
@@ -3539,7 +3568,7 @@ static PyObject *__pyx_gb_5_cdec_12SparseVector_12generator1(__pyx_GeneratorObje
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":73
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":73
  * 
  *     def __iter__(self):
  *         cdef FastSparseVector[weight_t].const_iterator* it = new FastSparseVector[weight_t].const_iterator(self.vector[0], False)             # <<<<<<<<<<<<<<
@@ -3548,7 +3577,7 @@ static PyObject *__pyx_gb_5_cdec_12SparseVector_12generator1(__pyx_GeneratorObje
  */
   __pyx_cur_scope->__pyx_v_it = new FastSparseVector<weight_t>::const_iterator((__pyx_cur_scope->__pyx_v_self->vector[0]), 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":75
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":75
  *         cdef FastSparseVector[weight_t].const_iterator* it = new FastSparseVector[weight_t].const_iterator(self.vector[0], False)
  *         cdef unsigned i
  *         try:             # <<<<<<<<<<<<<<
@@ -3557,7 +3586,7 @@ static PyObject *__pyx_gb_5_cdec_12SparseVector_12generator1(__pyx_GeneratorObje
  */
   /*try:*/ {
 
-    /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":76
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":76
  *         cdef unsigned i
  *         try:
  *             for i in range(self.vector.size()):             # <<<<<<<<<<<<<<
@@ -3568,7 +3597,7 @@ static PyObject *__pyx_gb_5_cdec_12SparseVector_12generator1(__pyx_GeneratorObje
     for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
       __pyx_cur_scope->__pyx_v_i = __pyx_t_2;
 
-      /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":77
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":77
  *         try:
  *             for i in range(self.vector.size()):
  *                 yield (str(FDConvert(it[0].ptr().first).c_str()), it[0].ptr().second)             # <<<<<<<<<<<<<<
@@ -3609,7 +3638,7 @@ static PyObject *__pyx_gb_5_cdec_12SparseVector_12generator1(__pyx_GeneratorObje
       __pyx_t_2 = __pyx_cur_scope->__pyx_t_1;
       if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L5;}
 
-      /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":78
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":78
  *             for i in range(self.vector.size()):
  *                 yield (str(FDConvert(it[0].ptr().first).c_str()), it[0].ptr().second)
  *                 pinc(it[0]) # ++it             # <<<<<<<<<<<<<<
@@ -3620,7 +3649,7 @@ static PyObject *__pyx_gb_5_cdec_12SparseVector_12generator1(__pyx_GeneratorObje
     }
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":80
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":80
  *                 pinc(it[0]) # ++it
  *         finally:
  *             del it             # <<<<<<<<<<<<<<
@@ -3682,7 +3711,7 @@ static PyObject *__pyx_pw_5_cdec_12SparseVector_14dot(PyObject *__pyx_v_self, Py
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":82
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":82
  *             del it
  * 
  *     def dot(self, other):             # <<<<<<<<<<<<<<
@@ -3693,28 +3722,25 @@ static PyObject *__pyx_pw_5_cdec_12SparseVector_14dot(PyObject *__pyx_v_self, Py
 static PyObject *__pyx_pf_5_cdec_12SparseVector_13dot(struct __pyx_obj_5_cdec_SparseVector *__pyx_v_self, PyObject *__pyx_v_other) {
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
-  PyObject *__pyx_t_1 = NULL;
-  int __pyx_t_2;
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
   PyObject *__pyx_t_3 = NULL;
   int __pyx_lineno = 0;
   const char *__pyx_filename = NULL;
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("dot", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":84
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":84
  *     def dot(self, other):
  *         """vector.dot(SparseVector/DenseVector other) -> Dot product of the two vectors."""
  *         if isinstance(other, DenseVector):             # <<<<<<<<<<<<<<
  *             return self.vector.dot((<DenseVector> other).vector[0])
  *         elif isinstance(other, SparseVector):
  */
-  __pyx_t_1 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_DenseVector));
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_other, __pyx_t_1); 
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = __Pyx_TypeCheck(__pyx_v_other, ((PyObject*)__pyx_ptype_5_cdec_DenseVector)); 
+  if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":85
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":85
  *         """vector.dot(SparseVector/DenseVector other) -> Dot product of the two vectors."""
  *         if isinstance(other, DenseVector):
  *             return self.vector.dot((<DenseVector> other).vector[0])             # <<<<<<<<<<<<<<
@@ -3722,28 +3748,25 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_13dot(struct __pyx_obj_5_cdec_Sp
  *             return self.vector.dot((<SparseVector> other).vector[0])
  */
     __Pyx_XDECREF(__pyx_r);
-    __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->vector->dot((((struct __pyx_obj_5_cdec_DenseVector *)__pyx_v_other)->vector[0]))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 85; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
-    __pyx_r = __pyx_t_1;
-    __pyx_t_1 = 0;
+    __pyx_t_2 = PyFloat_FromDouble(__pyx_v_self->vector->dot((((struct __pyx_obj_5_cdec_DenseVector *)__pyx_v_other)->vector[0]))); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 85; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_r = __pyx_t_2;
+    __pyx_t_2 = 0;
     goto __pyx_L0;
     goto __pyx_L3;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":86
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":86
  *         if isinstance(other, DenseVector):
  *             return self.vector.dot((<DenseVector> other).vector[0])
  *         elif isinstance(other, SparseVector):             # <<<<<<<<<<<<<<
  *             return self.vector.dot((<SparseVector> other).vector[0])
  *         raise TypeError('cannot take the dot product of %s and SparseVector' % type(other))
  */
-  __pyx_t_1 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_SparseVector));
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_other, __pyx_t_1); 
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = __Pyx_TypeCheck(__pyx_v_other, ((PyObject*)__pyx_ptype_5_cdec_SparseVector)); 
+  if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":87
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":87
  *             return self.vector.dot((<DenseVector> other).vector[0])
  *         elif isinstance(other, SparseVector):
  *             return self.vector.dot((<SparseVector> other).vector[0])             # <<<<<<<<<<<<<<
@@ -3751,40 +3774,40 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_13dot(struct __pyx_obj_5_cdec_Sp
  * 
  */
     __Pyx_XDECREF(__pyx_r);
-    __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->vector->dot((((struct __pyx_obj_5_cdec_SparseVector *)__pyx_v_other)->vector[0]))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
-    __pyx_r = __pyx_t_1;
-    __pyx_t_1 = 0;
+    __pyx_t_2 = PyFloat_FromDouble(__pyx_v_self->vector->dot((((struct __pyx_obj_5_cdec_SparseVector *)__pyx_v_other)->vector[0]))); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_r = __pyx_t_2;
+    __pyx_t_2 = 0;
     goto __pyx_L0;
     goto __pyx_L3;
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":88
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":88
  *         elif isinstance(other, SparseVector):
  *             return self.vector.dot((<SparseVector> other).vector[0])
  *         raise TypeError('cannot take the dot product of %s and SparseVector' % type(other))             # <<<<<<<<<<<<<<
  * 
  *     def __richcmp__(SparseVector x, SparseVector y, int op):
  */
-  __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_3), ((PyObject *)Py_TYPE(__pyx_v_other))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(((PyObject *)__pyx_t_1));
+  __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_3), ((PyObject *)Py_TYPE(__pyx_v_other))); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(((PyObject *)__pyx_t_2));
   __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_3);
-  PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_t_1));
-  __Pyx_GIVEREF(((PyObject *)__pyx_t_1));
-  __pyx_t_1 = 0;
-  __pyx_t_1 = PyObject_Call(__pyx_builtin_TypeError, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_t_1);
+  PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_t_2));
+  __Pyx_GIVEREF(((PyObject *)__pyx_t_2));
+  __pyx_t_2 = 0;
+  __pyx_t_2 = PyObject_Call(__pyx_builtin_TypeError, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_2);
   __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
-  __Pyx_Raise(__pyx_t_1, 0, 0, 0);
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __Pyx_Raise(__pyx_t_2, 0, 0, 0);
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
   {__pyx_filename = __pyx_f[1]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
   __pyx_r = Py_None; __Pyx_INCREF(Py_None);
   goto __pyx_L0;
   __pyx_L1_error:;
-  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
   __Pyx_XDECREF(__pyx_t_3);
   __Pyx_AddTraceback("_cdec.SparseVector.dot", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __pyx_r = NULL;
@@ -3811,7 +3834,7 @@ static PyObject *__pyx_pw_5_cdec_12SparseVector_16__richcmp__(PyObject *__pyx_v_
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":90
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":90
  *         raise TypeError('cannot take the dot product of %s and SparseVector' % type(other))
  * 
  *     def __richcmp__(SparseVector x, SparseVector y, int op):             # <<<<<<<<<<<<<<
@@ -3829,7 +3852,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_15__richcmp__(struct __pyx_obj_5
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__richcmp__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":93
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":93
  *         if op == 2: # ==
  *             return x.vector[0] == y.vector[0]
  *         elif op == 3: # !=             # <<<<<<<<<<<<<<
@@ -3838,7 +3861,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_15__richcmp__(struct __pyx_obj_5
  */
   switch (__pyx_v_op) {
 
-    /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":91
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":91
  * 
  *     def __richcmp__(SparseVector x, SparseVector y, int op):
  *         if op == 2: # ==             # <<<<<<<<<<<<<<
@@ -3847,7 +3870,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_15__richcmp__(struct __pyx_obj_5
  */
     case 2:
 
-    /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":92
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":92
  *     def __richcmp__(SparseVector x, SparseVector y, int op):
  *         if op == 2: # ==
  *             return x.vector[0] == y.vector[0]             # <<<<<<<<<<<<<<
@@ -3862,7 +3885,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_15__richcmp__(struct __pyx_obj_5
     goto __pyx_L0;
     break;
 
-    /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":93
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":93
  *         if op == 2: # ==
  *             return x.vector[0] == y.vector[0]
  *         elif op == 3: # !=             # <<<<<<<<<<<<<<
@@ -3871,7 +3894,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_15__richcmp__(struct __pyx_obj_5
  */
     case 3:
 
-    /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":94
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":94
  *             return x.vector[0] == y.vector[0]
  *         elif op == 3: # !=
  *             return not (x == y)             # <<<<<<<<<<<<<<
@@ -3890,7 +3913,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_15__richcmp__(struct __pyx_obj_5
     break;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":95
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":95
  *         elif op == 3: # !=
  *             return not (x == y)
  *         raise NotImplemented('comparison not implemented for SparseVector')             # <<<<<<<<<<<<<<
@@ -3926,7 +3949,7 @@ static Py_ssize_t __pyx_pw_5_cdec_12SparseVector_18__len__(PyObject *__pyx_v_sel
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":97
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":97
  *         raise NotImplemented('comparison not implemented for SparseVector')
  * 
  *     def __len__(self):             # <<<<<<<<<<<<<<
@@ -3939,7 +3962,7 @@ static Py_ssize_t __pyx_pf_5_cdec_12SparseVector_17__len__(struct __pyx_obj_5_cd
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__len__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":98
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":98
  * 
  *     def __len__(self):
  *         return self.vector.size()             # <<<<<<<<<<<<<<
@@ -3976,7 +3999,7 @@ static int __pyx_pw_5_cdec_12SparseVector_20__contains__(PyObject *__pyx_v_self,
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":100
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":100
  *         return self.vector.size()
  * 
  *     def __contains__(self, char* fname):             # <<<<<<<<<<<<<<
@@ -3989,7 +4012,7 @@ static int __pyx_pf_5_cdec_12SparseVector_19__contains__(struct __pyx_obj_5_cdec
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__contains__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":101
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":101
  * 
  *     def __contains__(self, char* fname):
  *         return self.vector.nonzero(FDConvert(fname))             # <<<<<<<<<<<<<<
@@ -4016,7 +4039,7 @@ static PyObject *__pyx_pw_5_cdec_12SparseVector_22__neg__(PyObject *__pyx_v_self
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":103
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":103
  *         return self.vector.nonzero(FDConvert(fname))
  * 
  *     def __neg__(self):             # <<<<<<<<<<<<<<
@@ -4034,7 +4057,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_21__neg__(struct __pyx_obj_5_cde
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__neg__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":104
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":104
  * 
  *     def __neg__(self):
  *         cdef SparseVector result = SparseVector.__new__(SparseVector)             # <<<<<<<<<<<<<<
@@ -4047,7 +4070,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_21__neg__(struct __pyx_obj_5_cde
   __pyx_v_result = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":105
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":105
  *     def __neg__(self):
  *         cdef SparseVector result = SparseVector.__new__(SparseVector)
  *         result.vector = new FastSparseVector[weight_t](self.vector[0])             # <<<<<<<<<<<<<<
@@ -4056,7 +4079,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_21__neg__(struct __pyx_obj_5_cde
  */
   __pyx_v_result->vector = new FastSparseVector<weight_t>((__pyx_v_self->vector[0]));
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":106
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":106
  *         cdef SparseVector result = SparseVector.__new__(SparseVector)
  *         result.vector = new FastSparseVector[weight_t](self.vector[0])
  *         result.vector[0] *= -1.0             # <<<<<<<<<<<<<<
@@ -4065,7 +4088,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_21__neg__(struct __pyx_obj_5_cde
  */
   (__pyx_v_result->vector[0]) *= -1.0;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":107
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":107
  *         result.vector = new FastSparseVector[weight_t](self.vector[0])
  *         result.vector[0] *= -1.0
  *         return result             # <<<<<<<<<<<<<<
@@ -4106,7 +4129,7 @@ static PyObject *__pyx_pw_5_cdec_12SparseVector_24__iadd__(PyObject *__pyx_v_sel
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":109
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":109
  *         return result
  * 
  *     def __iadd__(SparseVector self, SparseVector other):             # <<<<<<<<<<<<<<
@@ -4119,7 +4142,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_23__iadd__(struct __pyx_obj_5_cd
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__iadd__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":110
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":110
  * 
  *     def __iadd__(SparseVector self, SparseVector other):
  *         self.vector[0] += other.vector[0]             # <<<<<<<<<<<<<<
@@ -4128,7 +4151,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_23__iadd__(struct __pyx_obj_5_cd
  */
   (__pyx_v_self->vector[0]) += (__pyx_v_other->vector[0]);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":111
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":111
  *     def __iadd__(SparseVector self, SparseVector other):
  *         self.vector[0] += other.vector[0]
  *         return self             # <<<<<<<<<<<<<<
@@ -4163,7 +4186,7 @@ static PyObject *__pyx_pw_5_cdec_12SparseVector_26__isub__(PyObject *__pyx_v_sel
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":113
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":113
  *         return self
  * 
  *     def __isub__(SparseVector self, SparseVector other):             # <<<<<<<<<<<<<<
@@ -4176,7 +4199,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_25__isub__(struct __pyx_obj_5_cd
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__isub__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":114
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":114
  * 
  *     def __isub__(SparseVector self, SparseVector other):
  *         self.vector[0] -= other.vector[0]             # <<<<<<<<<<<<<<
@@ -4185,7 +4208,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_25__isub__(struct __pyx_obj_5_cd
  */
   (__pyx_v_self->vector[0]) -= (__pyx_v_other->vector[0]);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":115
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":115
  *     def __isub__(SparseVector self, SparseVector other):
  *         self.vector[0] -= other.vector[0]
  *         return self             # <<<<<<<<<<<<<<
@@ -4225,7 +4248,7 @@ static PyObject *__pyx_pw_5_cdec_12SparseVector_28__imul__(PyObject *__pyx_v_sel
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":117
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":117
  *         return self
  * 
  *     def __imul__(SparseVector self, float scalar):             # <<<<<<<<<<<<<<
@@ -4238,7 +4261,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_27__imul__(struct __pyx_obj_5_cd
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__imul__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":118
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":118
  * 
  *     def __imul__(SparseVector self, float scalar):
  *         self.vector[0] *= scalar             # <<<<<<<<<<<<<<
@@ -4247,7 +4270,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_27__imul__(struct __pyx_obj_5_cd
  */
   (__pyx_v_self->vector[0]) *= __pyx_v_scalar;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":119
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":119
  *     def __imul__(SparseVector self, float scalar):
  *         self.vector[0] *= scalar
  *         return self             # <<<<<<<<<<<<<<
@@ -4289,7 +4312,7 @@ static PyObject *__pyx_pw_5_cdec_12SparseVector_30__idiv__(PyObject *__pyx_v_sel
 }
 #endif /*!(#if PY_MAJOR_VERSION < 3)*/
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":121
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":121
  *         return self
  * 
  *     def __idiv__(SparseVector self, float scalar):             # <<<<<<<<<<<<<<
@@ -4303,7 +4326,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_29__idiv__(struct __pyx_obj_5_cd
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__idiv__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":122
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":122
  * 
  *     def __idiv__(SparseVector self, float scalar):
  *         self.vector[0] /= scalar             # <<<<<<<<<<<<<<
@@ -4312,7 +4335,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_29__idiv__(struct __pyx_obj_5_cd
  */
   (__pyx_v_self->vector[0]) /= __pyx_v_scalar;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":123
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":123
  *     def __idiv__(SparseVector self, float scalar):
  *         self.vector[0] /= scalar
  *         return self             # <<<<<<<<<<<<<<
@@ -4349,7 +4372,7 @@ static PyObject *__pyx_pw_5_cdec_12SparseVector_32__add__(PyObject *__pyx_v_x, P
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":125
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":125
  *         return self
  * 
  *     def __add__(SparseVector x, SparseVector y):             # <<<<<<<<<<<<<<
@@ -4367,7 +4390,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_31__add__(struct __pyx_obj_5_cde
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__add__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":126
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":126
  * 
  *     def __add__(SparseVector x, SparseVector y):
  *         cdef SparseVector result = SparseVector.__new__(SparseVector)             # <<<<<<<<<<<<<<
@@ -4380,7 +4403,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_31__add__(struct __pyx_obj_5_cde
   __pyx_v_result = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":127
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":127
  *     def __add__(SparseVector x, SparseVector y):
  *         cdef SparseVector result = SparseVector.__new__(SparseVector)
  *         result.vector = new FastSparseVector[weight_t](x.vector[0] + y.vector[0])             # <<<<<<<<<<<<<<
@@ -4389,7 +4412,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_31__add__(struct __pyx_obj_5_cde
  */
   __pyx_v_result->vector = new FastSparseVector<weight_t>(((__pyx_v_x->vector[0]) + (__pyx_v_y->vector[0])));
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":128
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":128
  *         cdef SparseVector result = SparseVector.__new__(SparseVector)
  *         result.vector = new FastSparseVector[weight_t](x.vector[0] + y.vector[0])
  *         return result             # <<<<<<<<<<<<<<
@@ -4431,7 +4454,7 @@ static PyObject *__pyx_pw_5_cdec_12SparseVector_34__sub__(PyObject *__pyx_v_x, P
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":130
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":130
  *         return result
  * 
  *     def __sub__(SparseVector x, SparseVector y):             # <<<<<<<<<<<<<<
@@ -4449,7 +4472,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_33__sub__(struct __pyx_obj_5_cde
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__sub__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":131
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":131
  * 
  *     def __sub__(SparseVector x, SparseVector y):
  *         cdef SparseVector result = SparseVector.__new__(SparseVector)             # <<<<<<<<<<<<<<
@@ -4462,7 +4485,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_33__sub__(struct __pyx_obj_5_cde
   __pyx_v_result = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":132
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":132
  *     def __sub__(SparseVector x, SparseVector y):
  *         cdef SparseVector result = SparseVector.__new__(SparseVector)
  *         result.vector = new FastSparseVector[weight_t](x.vector[0] - y.vector[0])             # <<<<<<<<<<<<<<
@@ -4471,7 +4494,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_33__sub__(struct __pyx_obj_5_cde
  */
   __pyx_v_result->vector = new FastSparseVector<weight_t>(((__pyx_v_x->vector[0]) - (__pyx_v_y->vector[0])));
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":133
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":133
  *         cdef SparseVector result = SparseVector.__new__(SparseVector)
  *         result.vector = new FastSparseVector[weight_t](x.vector[0] - y.vector[0])
  *         return result             # <<<<<<<<<<<<<<
@@ -4507,7 +4530,7 @@ static PyObject *__pyx_pw_5_cdec_12SparseVector_36__mul__(PyObject *__pyx_v_x, P
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":135
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":135
  *         return result
  * 
  *     def __mul__(x, y):             # <<<<<<<<<<<<<<
@@ -4521,38 +4544,35 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_35__mul__(PyObject *__pyx_v_x, P
   struct __pyx_obj_5_cdec_SparseVector *__pyx_v_result = 0;
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
-  PyObject *__pyx_t_1 = NULL;
-  int __pyx_t_2;
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
   float __pyx_t_3;
   int __pyx_lineno = 0;
   const char *__pyx_filename = NULL;
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__mul__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":138
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":138
  *         cdef SparseVector vector
  *         cdef float scalar
  *         if isinstance(x, SparseVector): vector, scalar = x, y             # <<<<<<<<<<<<<<
  *         else: vector, scalar = y, x
  *         cdef SparseVector result = SparseVector.__new__(SparseVector)
  */
-  __pyx_t_1 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_SparseVector));
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_x, __pyx_t_1); 
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = __Pyx_TypeCheck(__pyx_v_x, ((PyObject*)__pyx_ptype_5_cdec_SparseVector)); 
+  if (__pyx_t_1) {
     if (!(likely(((__pyx_v_x) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_x, __pyx_ptype_5_cdec_SparseVector))))) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __pyx_t_1 = __pyx_v_x;
-    __Pyx_INCREF(__pyx_t_1);
+    __pyx_t_2 = __pyx_v_x;
+    __Pyx_INCREF(__pyx_t_2);
     __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_y); if (unlikely((__pyx_t_3 == (float)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __pyx_v_vector = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_1);
-    __pyx_t_1 = 0;
+    __pyx_v_vector = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_2);
+    __pyx_t_2 = 0;
     __pyx_v_scalar = __pyx_t_3;
     goto __pyx_L3;
   }
   /*else*/ {
 
-    /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":139
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":139
  *         cdef float scalar
  *         if isinstance(x, SparseVector): vector, scalar = x, y
  *         else: vector, scalar = y, x             # <<<<<<<<<<<<<<
@@ -4560,29 +4580,29 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_35__mul__(PyObject *__pyx_v_x, P
  *         result.vector = new FastSparseVector[weight_t](vector.vector[0] * scalar)
  */
     if (!(likely(((__pyx_v_y) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_y, __pyx_ptype_5_cdec_SparseVector))))) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __pyx_t_1 = __pyx_v_y;
-    __Pyx_INCREF(__pyx_t_1);
+    __pyx_t_2 = __pyx_v_y;
+    __Pyx_INCREF(__pyx_t_2);
     __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_x); if (unlikely((__pyx_t_3 == (float)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __pyx_v_vector = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_1);
-    __pyx_t_1 = 0;
+    __pyx_v_vector = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_2);
+    __pyx_t_2 = 0;
     __pyx_v_scalar = __pyx_t_3;
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":140
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":140
  *         if isinstance(x, SparseVector): vector, scalar = x, y
  *         else: vector, scalar = y, x
  *         cdef SparseVector result = SparseVector.__new__(SparseVector)             # <<<<<<<<<<<<<<
  *         result.vector = new FastSparseVector[weight_t](vector.vector[0] * scalar)
  *         return result
  */
-  __pyx_t_1 = __Pyx_tp_new(((PyObject*)__pyx_ptype_5_cdec_SparseVector)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_t_1);
-  if (!(likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5_cdec_SparseVector)))) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __pyx_v_result = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_1);
-  __pyx_t_1 = 0;
+  __pyx_t_2 = __Pyx_tp_new(((PyObject*)__pyx_ptype_5_cdec_SparseVector)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_2);
+  if (!(likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_5_cdec_SparseVector)))) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_v_result = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_2);
+  __pyx_t_2 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":141
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":141
  *         else: vector, scalar = y, x
  *         cdef SparseVector result = SparseVector.__new__(SparseVector)
  *         result.vector = new FastSparseVector[weight_t](vector.vector[0] * scalar)             # <<<<<<<<<<<<<<
@@ -4591,7 +4611,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_35__mul__(PyObject *__pyx_v_x, P
  */
   __pyx_v_result->vector = new FastSparseVector<weight_t>(((__pyx_v_vector->vector[0]) * __pyx_v_scalar));
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":142
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":142
  *         cdef SparseVector result = SparseVector.__new__(SparseVector)
  *         result.vector = new FastSparseVector[weight_t](vector.vector[0] * scalar)
  *         return result             # <<<<<<<<<<<<<<
@@ -4606,7 +4626,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_35__mul__(PyObject *__pyx_v_x, P
   __pyx_r = Py_None; __Pyx_INCREF(Py_None);
   goto __pyx_L0;
   __pyx_L1_error:;
-  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
   __Pyx_AddTraceback("_cdec.SparseVector.__mul__", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __pyx_r = NULL;
   __pyx_L0:;
@@ -4630,7 +4650,7 @@ static PyObject *__pyx_pw_5_cdec_12SparseVector_38__div__(PyObject *__pyx_v_x, P
 }
 #endif /*!(#if PY_MAJOR_VERSION < 3)*/
 
-/* "/home/vchahune/tools/cdec/python/src/vectors.pxi":144
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":144
  *         return result
  * 
  *     def __div__(x, y):             # <<<<<<<<<<<<<<
@@ -4645,38 +4665,35 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_37__div__(PyObject *__pyx_v_x, P
   struct __pyx_obj_5_cdec_SparseVector *__pyx_v_result = 0;
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
-  PyObject *__pyx_t_1 = NULL;
-  int __pyx_t_2;
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
   float __pyx_t_3;
   int __pyx_lineno = 0;
   const char *__pyx_filename = NULL;
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__div__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":147
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":147
  *         cdef SparseVector vector
  *         cdef float scalar
  *         if isinstance(x, SparseVector): vector, scalar = x, y             # <<<<<<<<<<<<<<
  *         else: vector, scalar = y, x
  *         cdef SparseVector result = SparseVector.__new__(SparseVector)
  */
-  __pyx_t_1 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_SparseVector));
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_x, __pyx_t_1); 
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = __Pyx_TypeCheck(__pyx_v_x, ((PyObject*)__pyx_ptype_5_cdec_SparseVector)); 
+  if (__pyx_t_1) {
     if (!(likely(((__pyx_v_x) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_x, __pyx_ptype_5_cdec_SparseVector))))) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 147; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __pyx_t_1 = __pyx_v_x;
-    __Pyx_INCREF(__pyx_t_1);
+    __pyx_t_2 = __pyx_v_x;
+    __Pyx_INCREF(__pyx_t_2);
     __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_y); if (unlikely((__pyx_t_3 == (float)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 147; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __pyx_v_vector = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_1);
-    __pyx_t_1 = 0;
+    __pyx_v_vector = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_2);
+    __pyx_t_2 = 0;
     __pyx_v_scalar = __pyx_t_3;
     goto __pyx_L3;
   }
   /*else*/ {
 
-    /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":148
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":148
  *         cdef float scalar
  *         if isinstance(x, SparseVector): vector, scalar = x, y
  *         else: vector, scalar = y, x             # <<<<<<<<<<<<<<
@@ -4684,29 +4701,29 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_37__div__(PyObject *__pyx_v_x, P
  *         result.vector = new FastSparseVector[weight_t](vector.vector[0] / scalar)
  */
     if (!(likely(((__pyx_v_y) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_y, __pyx_ptype_5_cdec_SparseVector))))) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __pyx_t_1 = __pyx_v_y;
-    __Pyx_INCREF(__pyx_t_1);
+    __pyx_t_2 = __pyx_v_y;
+    __Pyx_INCREF(__pyx_t_2);
     __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_x); if (unlikely((__pyx_t_3 == (float)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __pyx_v_vector = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_1);
-    __pyx_t_1 = 0;
+    __pyx_v_vector = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_2);
+    __pyx_t_2 = 0;
     __pyx_v_scalar = __pyx_t_3;
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":149
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":149
  *         if isinstance(x, SparseVector): vector, scalar = x, y
  *         else: vector, scalar = y, x
  *         cdef SparseVector result = SparseVector.__new__(SparseVector)             # <<<<<<<<<<<<<<
  *         result.vector = new FastSparseVector[weight_t](vector.vector[0] / scalar)
  *         return result
  */
-  __pyx_t_1 = __Pyx_tp_new(((PyObject*)__pyx_ptype_5_cdec_SparseVector)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 149; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_t_1);
-  if (!(likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5_cdec_SparseVector)))) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 149; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __pyx_v_result = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_1);
-  __pyx_t_1 = 0;
+  __pyx_t_2 = __Pyx_tp_new(((PyObject*)__pyx_ptype_5_cdec_SparseVector)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 149; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_2);
+  if (!(likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_5_cdec_SparseVector)))) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 149; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_v_result = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_2);
+  __pyx_t_2 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":150
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":150
  *         else: vector, scalar = y, x
  *         cdef SparseVector result = SparseVector.__new__(SparseVector)
  *         result.vector = new FastSparseVector[weight_t](vector.vector[0] / scalar)             # <<<<<<<<<<<<<<
@@ -4714,7 +4731,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_37__div__(PyObject *__pyx_v_x, P
  */
   __pyx_v_result->vector = new FastSparseVector<weight_t>(((__pyx_v_vector->vector[0]) / __pyx_v_scalar));
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":151
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":151
  *         cdef SparseVector result = SparseVector.__new__(SparseVector)
  *         result.vector = new FastSparseVector[weight_t](vector.vector[0] / scalar)
  *         return result             # <<<<<<<<<<<<<<
@@ -4727,7 +4744,7 @@ static PyObject *__pyx_pf_5_cdec_12SparseVector_37__div__(PyObject *__pyx_v_x, P
   __pyx_r = Py_None; __Pyx_INCREF(Py_None);
   goto __pyx_L0;
   __pyx_L1_error:;
-  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
   __Pyx_AddTraceback("_cdec.SparseVector.__div__", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __pyx_r = NULL;
   __pyx_L0:;
@@ -4752,7 +4769,7 @@ static PyObject *__pyx_pw_5_cdec_1_phrase(PyObject *__pyx_self, PyObject *__pyx_
 }
 static PyObject *__pyx_gb_5_cdec_7_phrase_2generator18(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":6
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":6
  * 
  * def _phrase(phrase):
  *     return ' '.join(w.encode('utf8') if isinstance(w, unicode) else str(w) for w in phrase)             # <<<<<<<<<<<<<<
@@ -4804,8 +4821,8 @@ static PyObject *__pyx_gb_5_cdec_7_phrase_2generator18(__pyx_GeneratorObject *__
   Py_ssize_t __pyx_t_2;
   PyObject *(*__pyx_t_3)(PyObject *);
   PyObject *__pyx_t_4 = NULL;
-  PyObject *__pyx_t_5 = NULL;
-  int __pyx_t_6;
+  int __pyx_t_5;
+  PyObject *__pyx_t_6 = NULL;
   PyObject *__pyx_t_7 = NULL;
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("None", 0);
@@ -4858,16 +4875,13 @@ static PyObject *__pyx_gb_5_cdec_7_phrase_2generator18(__pyx_GeneratorObject *__
     __Pyx_GIVEREF(__pyx_t_4);
     __pyx_cur_scope->__pyx_v_w = __pyx_t_4;
     __pyx_t_4 = 0;
-    __pyx_t_5 = ((PyObject *)((PyObject*)(&PyUnicode_Type)));
-    __Pyx_INCREF(__pyx_t_5);
-    __pyx_t_6 = __Pyx_TypeCheck(__pyx_cur_scope->__pyx_v_w, __pyx_t_5); 
-    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
-    if (__pyx_t_6) {
-      __pyx_t_5 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_w, __pyx_n_s__encode); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_GOTREF(__pyx_t_5);
-      __pyx_t_7 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_k_tuple_6), NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_5 = PyUnicode_Check(__pyx_cur_scope->__pyx_v_w); 
+    if (__pyx_t_5) {
+      __pyx_t_6 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_w, __pyx_n_s__encode); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_6);
+      __pyx_t_7 = PyObject_Call(__pyx_t_6, ((PyObject *)__pyx_k_tuple_6), NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(__pyx_t_7);
-      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
       __pyx_t_4 = __pyx_t_7;
       __pyx_t_7 = 0;
     } else {
@@ -4876,11 +4890,11 @@ static PyObject *__pyx_gb_5_cdec_7_phrase_2generator18(__pyx_GeneratorObject *__
       __Pyx_INCREF(__pyx_cur_scope->__pyx_v_w);
       PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_cur_scope->__pyx_v_w);
       __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_w);
-      __pyx_t_5 = PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), ((PyObject *)__pyx_t_7), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_GOTREF(__pyx_t_5);
+      __pyx_t_6 = PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), ((PyObject *)__pyx_t_7), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_6);
       __Pyx_DECREF(((PyObject *)__pyx_t_7)); __pyx_t_7 = 0;
-      __pyx_t_4 = __pyx_t_5;
-      __pyx_t_5 = 0;
+      __pyx_t_4 = __pyx_t_6;
+      __pyx_t_6 = 0;
     }
     __pyx_r = __pyx_t_4;
     __pyx_t_4 = 0;
@@ -4907,7 +4921,7 @@ static PyObject *__pyx_gb_5_cdec_7_phrase_2generator18(__pyx_GeneratorObject *__
   __pyx_L1_error:;
   __Pyx_XDECREF(__pyx_t_1);
   __Pyx_XDECREF(__pyx_t_4);
-  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
   __Pyx_XDECREF(__pyx_t_7);
   __Pyx_AddTraceback("genexpr", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __pyx_L0:;
@@ -4918,7 +4932,7 @@ static PyObject *__pyx_gb_5_cdec_7_phrase_2generator18(__pyx_GeneratorObject *__
   return NULL;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":5
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":5
  * import cdec.sa._sa as _sa
  * 
  * def _phrase(phrase):             # <<<<<<<<<<<<<<
@@ -4947,7 +4961,7 @@ static PyObject *__pyx_pf_5_cdec__phrase(CYTHON_UNUSED PyObject *__pyx_self, PyO
   __Pyx_INCREF(__pyx_cur_scope->__pyx_v_phrase);
   __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_phrase);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":6
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":6
  * 
  * def _phrase(phrase):
  *     return ' '.join(w.encode('utf8') if isinstance(w, unicode) else str(w) for w in phrase)             # <<<<<<<<<<<<<<
@@ -5058,7 +5072,7 @@ static int __pyx_pw_5_cdec_2NT_1__init__(PyObject *__pyx_v_self, PyObject *__pyx
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":11
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":11
  *     cdef public bytes cat
  *     cdef public unsigned ref
  *     def __init__(self, bytes cat, unsigned ref=0):             # <<<<<<<<<<<<<<
@@ -5071,7 +5085,7 @@ static int __pyx_pf_5_cdec_2NT___init__(struct __pyx_obj_5_cdec_NT *__pyx_v_self
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__init__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":13
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":13
  *     def __init__(self, bytes cat, unsigned ref=0):
  *         """NT(bytes cat, int ref=0) -> Non-terminal from category `cat`."""
  *         self.cat = cat             # <<<<<<<<<<<<<<
@@ -5084,7 +5098,7 @@ static int __pyx_pf_5_cdec_2NT___init__(struct __pyx_obj_5_cdec_NT *__pyx_v_self
   __Pyx_DECREF(((PyObject *)__pyx_v_self->cat));
   __pyx_v_self->cat = __pyx_v_cat;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":14
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":14
  *         """NT(bytes cat, int ref=0) -> Non-terminal from category `cat`."""
  *         self.cat = cat
  *         self.ref = ref             # <<<<<<<<<<<<<<
@@ -5109,7 +5123,7 @@ static PyObject *__pyx_pw_5_cdec_2NT_3__str__(PyObject *__pyx_v_self) {
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":16
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":16
  *         self.ref = ref
  * 
  *     def __str__(self):             # <<<<<<<<<<<<<<
@@ -5128,7 +5142,7 @@ static PyObject *__pyx_pf_5_cdec_2NT_2__str__(struct __pyx_obj_5_cdec_NT *__pyx_
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__str__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":17
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":17
  * 
  *     def __str__(self):
  *         if self.ref > 0:             # <<<<<<<<<<<<<<
@@ -5138,7 +5152,7 @@ static PyObject *__pyx_pf_5_cdec_2NT_2__str__(struct __pyx_obj_5_cdec_NT *__pyx_
   __pyx_t_1 = (__pyx_v_self->ref > 0);
   if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":18
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":18
  *     def __str__(self):
  *         if self.ref > 0:
  *             return '[%s,%d]' % (self.cat, self.ref)             # <<<<<<<<<<<<<<
@@ -5166,7 +5180,7 @@ static PyObject *__pyx_pf_5_cdec_2NT_2__str__(struct __pyx_obj_5_cdec_NT *__pyx_
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":19
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":19
  *         if self.ref > 0:
  *             return '[%s,%d]' % (self.cat, self.ref)
  *         return '[%s]' % self.cat             # <<<<<<<<<<<<<<
@@ -5204,7 +5218,7 @@ static PyObject *__pyx_pw_5_cdec_2NT_3cat_1__get__(PyObject *__pyx_v_self) {
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":9
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":9
  * 
  * cdef class NT:
  *     cdef public bytes cat             # <<<<<<<<<<<<<<
@@ -5300,7 +5314,7 @@ static PyObject *__pyx_pw_5_cdec_2NT_3ref_1__get__(PyObject *__pyx_v_self) {
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":10
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":10
  * cdef class NT:
  *     cdef public bytes cat
  *     cdef public unsigned ref             # <<<<<<<<<<<<<<
@@ -5418,7 +5432,7 @@ static int __pyx_pw_5_cdec_5NTRef_1__init__(PyObject *__pyx_v_self, PyObject *__
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":23
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":23
  * cdef class NTRef:
  *     cdef public unsigned ref
  *     def __init__(self, unsigned ref):             # <<<<<<<<<<<<<<
@@ -5431,7 +5445,7 @@ static int __pyx_pf_5_cdec_5NTRef___init__(struct __pyx_obj_5_cdec_NTRef *__pyx_
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__init__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":25
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":25
  *     def __init__(self, unsigned ref):
  *         """NTRef(int ref) -> Non-terminal reference."""
  *         self.ref = ref             # <<<<<<<<<<<<<<
@@ -5456,7 +5470,7 @@ static PyObject *__pyx_pw_5_cdec_5NTRef_3__str__(PyObject *__pyx_v_self) {
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":27
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":27
  *         self.ref = ref
  * 
  *     def __str__(self):             # <<<<<<<<<<<<<<
@@ -5474,7 +5488,7 @@ static PyObject *__pyx_pf_5_cdec_5NTRef_2__str__(struct __pyx_obj_5_cdec_NTRef *
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__str__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":28
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":28
  * 
  *     def __str__(self):
  *         return '[%d]' % self.ref             # <<<<<<<<<<<<<<
@@ -5515,7 +5529,7 @@ static PyObject *__pyx_pw_5_cdec_5NTRef_3ref_1__get__(PyObject *__pyx_v_self) {
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":22
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":22
  * 
  * cdef class NTRef:
  *     cdef public unsigned ref             # <<<<<<<<<<<<<<
@@ -5582,7 +5596,7 @@ static int __pyx_pf_5_cdec_5NTRef_3ref_2__set__(struct __pyx_obj_5_cdec_NTRef *_
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":30
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":30
  *         return '[%d]' % self.ref
  * 
  * cdef TRule convert_rule(_sa.Rule rule):             # <<<<<<<<<<<<<<
@@ -5603,16 +5617,17 @@ static struct __pyx_obj_5_cdec_TRule *__pyx_f_5_cdec_convert_rule(struct __pyx_o
   __Pyx_RefNannyDeclarations
   PyObject *__pyx_t_1 = NULL;
   PyObject *__pyx_t_2 = NULL;
-  int __pyx_t_3;
+  int *__pyx_t_3;
   int __pyx_t_4;
   int __pyx_t_5;
   int __pyx_t_6;
+  int __pyx_t_7;
   int __pyx_lineno = 0;
   const char *__pyx_filename = NULL;
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("convert_rule", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":31
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":31
  * 
  * cdef TRule convert_rule(_sa.Rule rule):
  *     lhs = _sa.sym_tocat(rule.lhs)             # <<<<<<<<<<<<<<
@@ -5621,7 +5636,7 @@ static struct __pyx_obj_5_cdec_TRule *__pyx_f_5_cdec_convert_rule(struct __pyx_o
  */
   __pyx_v_lhs = __pyx_f_4cdec_2sa_3_sa_sym_tocat(__pyx_v_rule->lhs);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":32
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":32
  * cdef TRule convert_rule(_sa.Rule rule):
  *     lhs = _sa.sym_tocat(rule.lhs)
  *     scores = dict(rule.scores)             # <<<<<<<<<<<<<<
@@ -5639,7 +5654,7 @@ static struct __pyx_obj_5_cdec_TRule *__pyx_f_5_cdec_convert_rule(struct __pyx_o
   __pyx_v_scores = ((PyObject*)__pyx_t_2);
   __pyx_t_2 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":33
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":33
  *     lhs = _sa.sym_tocat(rule.lhs)
  *     scores = dict(rule.scores)
  *     f, e = [], []             # <<<<<<<<<<<<<<
@@ -5650,42 +5665,43 @@ static struct __pyx_obj_5_cdec_TRule *__pyx_f_5_cdec_convert_rule(struct __pyx_o
   __Pyx_GOTREF(__pyx_t_2);
   __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
-  __pyx_v_f = __pyx_t_2;
+  __pyx_v_f = ((PyObject*)__pyx_t_2);
   __pyx_t_2 = 0;
-  __pyx_v_e = __pyx_t_1;
+  __pyx_v_e = ((PyObject*)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":34
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":34
  *     scores = dict(rule.scores)
  *     f, e = [], []
  *     cdef int* fsyms = rule.f.syms             # <<<<<<<<<<<<<<
  *     for i in range(rule.f.n):
  *         if _sa.sym_isvar(fsyms[i]):
  */
-  __pyx_v_fsyms = __pyx_v_rule->f->syms;
+  __pyx_t_3 = __pyx_v_rule->f->syms;
+  __pyx_v_fsyms = __pyx_t_3;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":35
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":35
  *     f, e = [], []
  *     cdef int* fsyms = rule.f.syms
  *     for i in range(rule.f.n):             # <<<<<<<<<<<<<<
  *         if _sa.sym_isvar(fsyms[i]):
  *             f.append(NT(_sa.sym_tocat(fsyms[i])))
  */
-  __pyx_t_3 = __pyx_v_rule->f->n;
-  for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) {
-    __pyx_v_i = __pyx_t_4;
+  __pyx_t_4 = __pyx_v_rule->f->n;
+  for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) {
+    __pyx_v_i = __pyx_t_5;
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":36
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":36
  *     cdef int* fsyms = rule.f.syms
  *     for i in range(rule.f.n):
  *         if _sa.sym_isvar(fsyms[i]):             # <<<<<<<<<<<<<<
  *             f.append(NT(_sa.sym_tocat(fsyms[i])))
  *         else:
  */
-    __pyx_t_5 = __pyx_f_4cdec_2sa_3_sa_sym_isvar((__pyx_v_fsyms[__pyx_v_i]));
-    if (__pyx_t_5) {
+    __pyx_t_6 = __pyx_f_4cdec_2sa_3_sa_sym_isvar((__pyx_v_fsyms[__pyx_v_i]));
+    if (__pyx_t_6) {
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":37
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":37
  *     for i in range(rule.f.n):
  *         if _sa.sym_isvar(fsyms[i]):
  *             f.append(NT(_sa.sym_tocat(fsyms[i])))             # <<<<<<<<<<<<<<
@@ -5702,13 +5718,13 @@ static struct __pyx_obj_5_cdec_TRule *__pyx_f_5_cdec_convert_rule(struct __pyx_o
       __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_NT)), ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(__pyx_t_1);
       __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
-      __pyx_t_6 = PyList_Append(__pyx_v_f, __pyx_t_1); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_7 = PyList_Append(__pyx_v_f, __pyx_t_1); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
       goto __pyx_L5;
     }
     /*else*/ {
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":39
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":39
  *             f.append(NT(_sa.sym_tocat(fsyms[i])))
  *         else:
  *             f.append(_sa.sym_tostring(fsyms[i]))             # <<<<<<<<<<<<<<
@@ -5717,43 +5733,44 @@ static struct __pyx_obj_5_cdec_TRule *__pyx_f_5_cdec_convert_rule(struct __pyx_o
  */
       __pyx_t_1 = PyBytes_FromString(__pyx_f_4cdec_2sa_3_sa_sym_tostring((__pyx_v_fsyms[__pyx_v_i]))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(((PyObject *)__pyx_t_1));
-      __pyx_t_6 = PyList_Append(__pyx_v_f, ((PyObject *)__pyx_t_1)); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_7 = PyList_Append(__pyx_v_f, ((PyObject *)__pyx_t_1)); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
     }
     __pyx_L5:;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":40
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":40
  *         else:
  *             f.append(_sa.sym_tostring(fsyms[i]))
  *     cdef int* esyms = rule.e.syms             # <<<<<<<<<<<<<<
  *     for i in range(rule.e.n):
  *         if _sa.sym_isvar(esyms[i]):
  */
-  __pyx_v_esyms = __pyx_v_rule->e->syms;
+  __pyx_t_3 = __pyx_v_rule->e->syms;
+  __pyx_v_esyms = __pyx_t_3;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":41
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":41
  *             f.append(_sa.sym_tostring(fsyms[i]))
  *     cdef int* esyms = rule.e.syms
  *     for i in range(rule.e.n):             # <<<<<<<<<<<<<<
  *         if _sa.sym_isvar(esyms[i]):
  *             e.append(NTRef(_sa.sym_getindex(esyms[i])))
  */
-  __pyx_t_3 = __pyx_v_rule->e->n;
-  for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) {
-    __pyx_v_i = __pyx_t_4;
+  __pyx_t_4 = __pyx_v_rule->e->n;
+  for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) {
+    __pyx_v_i = __pyx_t_5;
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":42
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":42
  *     cdef int* esyms = rule.e.syms
  *     for i in range(rule.e.n):
  *         if _sa.sym_isvar(esyms[i]):             # <<<<<<<<<<<<<<
  *             e.append(NTRef(_sa.sym_getindex(esyms[i])))
  *         else:
  */
-    __pyx_t_5 = __pyx_f_4cdec_2sa_3_sa_sym_isvar((__pyx_v_esyms[__pyx_v_i]));
-    if (__pyx_t_5) {
+    __pyx_t_6 = __pyx_f_4cdec_2sa_3_sa_sym_isvar((__pyx_v_esyms[__pyx_v_i]));
+    if (__pyx_t_6) {
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":43
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":43
  *     for i in range(rule.e.n):
  *         if _sa.sym_isvar(esyms[i]):
  *             e.append(NTRef(_sa.sym_getindex(esyms[i])))             # <<<<<<<<<<<<<<
@@ -5770,13 +5787,13 @@ static struct __pyx_obj_5_cdec_TRule *__pyx_f_5_cdec_convert_rule(struct __pyx_o
       __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_NTRef)), ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(__pyx_t_1);
       __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
-      __pyx_t_6 = PyList_Append(__pyx_v_e, __pyx_t_1); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_7 = PyList_Append(__pyx_v_e, __pyx_t_1); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
       goto __pyx_L8;
     }
     /*else*/ {
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":45
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":45
  *             e.append(NTRef(_sa.sym_getindex(esyms[i])))
  *         else:
  *             e.append(_sa.sym_tostring(esyms[i]))             # <<<<<<<<<<<<<<
@@ -5785,13 +5802,13 @@ static struct __pyx_obj_5_cdec_TRule *__pyx_f_5_cdec_convert_rule(struct __pyx_o
  */
       __pyx_t_1 = PyBytes_FromString(__pyx_f_4cdec_2sa_3_sa_sym_tostring((__pyx_v_esyms[__pyx_v_i]))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(((PyObject *)__pyx_t_1));
-      __pyx_t_6 = PyList_Append(__pyx_v_e, ((PyObject *)__pyx_t_1)); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_7 = PyList_Append(__pyx_v_e, ((PyObject *)__pyx_t_1)); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
     }
     __pyx_L8:;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":46
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":46
  *         else:
  *             e.append(_sa.sym_tostring(esyms[i]))
  *     a = list(rule.alignments())             # <<<<<<<<<<<<<<
@@ -5814,7 +5831,7 @@ static struct __pyx_obj_5_cdec_TRule *__pyx_f_5_cdec_convert_rule(struct __pyx_o
   __pyx_v_a = ((PyObject*)__pyx_t_2);
   __pyx_t_2 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":47
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":47
  *             e.append(_sa.sym_tostring(esyms[i]))
  *     a = list(rule.alignments())
  *     return TRule(lhs, f, e, scores, a)             # <<<<<<<<<<<<<<
@@ -5884,7 +5901,7 @@ static int __pyx_pw_5_cdec_5TRule_1__init__(PyObject *__pyx_v_self, PyObject *__
     static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__lhs,&__pyx_n_s__f,&__pyx_n_s__e,&__pyx_n_s__scores,&__pyx_n_s__a,0};
     PyObject* values[5] = {0,0,0,0,0};
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":52
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":52
  *     cdef shared_ptr[grammar.TRule]* rule
  * 
  *     def __init__(self, lhs, f, e, scores, a=None):             # <<<<<<<<<<<<<<
@@ -5973,17 +5990,22 @@ static int __pyx_pf_5_cdec_5TRule___init__(struct __pyx_obj_5_cdec_TRule *__pyx_
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__init__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":59
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":59
  *         scores: dictionary of feature scores
  *         a: optional list of alignment points"""
  *         self.rule = new shared_ptr[grammar.TRule](new grammar.TRule())             # <<<<<<<<<<<<<<
  *         self.lhs = lhs
  *         self.e = e
  */
-  try {__pyx_t_1 = new TRule();} catch(...) {__Pyx_CppExn2PyErr(); {__pyx_filename = __pyx_f[2]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;}}
+  try {
+    __pyx_t_1 = new TRule();
+  } catch(...) {
+    __Pyx_CppExn2PyErr();
+    {__pyx_filename = __pyx_f[2]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  }
   __pyx_v_self->rule = new boost::shared_ptr<TRule>(__pyx_t_1);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":60
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":60
  *         a: optional list of alignment points"""
  *         self.rule = new shared_ptr[grammar.TRule](new grammar.TRule())
  *         self.lhs = lhs             # <<<<<<<<<<<<<<
@@ -5992,7 +6014,7 @@ static int __pyx_pf_5_cdec_5TRule___init__(struct __pyx_obj_5_cdec_TRule *__pyx_
  */
   if (PyObject_SetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__lhs, __pyx_v_lhs) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":61
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":61
  *         self.rule = new shared_ptr[grammar.TRule](new grammar.TRule())
  *         self.lhs = lhs
  *         self.e = e             # <<<<<<<<<<<<<<
@@ -6001,7 +6023,7 @@ static int __pyx_pf_5_cdec_5TRule___init__(struct __pyx_obj_5_cdec_TRule *__pyx_
  */
   if (PyObject_SetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__e, __pyx_v_e) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":62
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":62
  *         self.lhs = lhs
  *         self.e = e
  *         self.f = f             # <<<<<<<<<<<<<<
@@ -6010,7 +6032,7 @@ static int __pyx_pf_5_cdec_5TRule___init__(struct __pyx_obj_5_cdec_TRule *__pyx_
  */
   if (PyObject_SetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__f, __pyx_v_f) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":63
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":63
  *         self.e = e
  *         self.f = f
  *         self.scores = scores             # <<<<<<<<<<<<<<
@@ -6019,7 +6041,7 @@ static int __pyx_pf_5_cdec_5TRule___init__(struct __pyx_obj_5_cdec_TRule *__pyx_
  */
   if (PyObject_SetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__scores, __pyx_v_scores) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":64
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":64
  *         self.f = f
  *         self.scores = scores
  *         if a:             # <<<<<<<<<<<<<<
@@ -6029,7 +6051,7 @@ static int __pyx_pf_5_cdec_5TRule___init__(struct __pyx_obj_5_cdec_TRule *__pyx_
   __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_a); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   if (__pyx_t_2) {
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":65
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":65
  *         self.scores = scores
  *         if a:
  *             self.a = a             # <<<<<<<<<<<<<<
@@ -6041,7 +6063,7 @@ static int __pyx_pf_5_cdec_5TRule___init__(struct __pyx_obj_5_cdec_TRule *__pyx_
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":66
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":66
  *         if a:
  *             self.a = a
  *         self.rule.get().ComputeArity()             # <<<<<<<<<<<<<<
@@ -6069,7 +6091,7 @@ static void __pyx_pw_5_cdec_5TRule_3__dealloc__(PyObject *__pyx_v_self) {
   __Pyx_RefNannyFinishContext();
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":68
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":68
  *         self.rule.get().ComputeArity()
  * 
  *     def __dealloc__(self):             # <<<<<<<<<<<<<<
@@ -6081,7 +6103,7 @@ static void __pyx_pf_5_cdec_5TRule_2__dealloc__(CYTHON_UNUSED struct __pyx_obj_5
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__dealloc__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":69
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":69
  * 
  *     def __dealloc__(self):
  *         del self.rule             # <<<<<<<<<<<<<<
@@ -6104,7 +6126,7 @@ static PyObject *__pyx_pw_5_cdec_5TRule_5arity_1__get__(PyObject *__pyx_v_self)
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":72
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":72
  * 
  *     property arity:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -6121,7 +6143,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_5arity___get__(struct __pyx_obj_5_cdec_T
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":73
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":73
  *     property arity:
  *         def __get__(self):
  *             return self.rule.get().arity_             # <<<<<<<<<<<<<<
@@ -6158,7 +6180,7 @@ static PyObject *__pyx_pw_5_cdec_5TRule_1f_1__get__(PyObject *__pyx_v_self) {
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":76
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":76
  * 
  *     property f:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -6185,7 +6207,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1f___get__(struct __pyx_obj_5_cdec_TRule
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":77
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":77
  *     property f:
  *         def __get__(self):
  *             cdef vector[WordID]* f_ = &self.rule.get().f_             # <<<<<<<<<<<<<<
@@ -6194,7 +6216,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1f___get__(struct __pyx_obj_5_cdec_TRule
  */
   __pyx_v_f_ = (&__pyx_v_self->rule->get()->f_);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":79
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":79
  *             cdef vector[WordID]* f_ = &self.rule.get().f_
  *             cdef WordID w
  *             cdef f = []             # <<<<<<<<<<<<<<
@@ -6206,7 +6228,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1f___get__(struct __pyx_obj_5_cdec_TRule
   __pyx_v_f = ((PyObject *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":81
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":81
  *             cdef f = []
  *             cdef unsigned i
  *             cdef int idx = 0             # <<<<<<<<<<<<<<
@@ -6215,7 +6237,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1f___get__(struct __pyx_obj_5_cdec_TRule
  */
   __pyx_v_idx = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":82
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":82
  *             cdef unsigned i
  *             cdef int idx = 0
  *             for i in range(f_.size()):             # <<<<<<<<<<<<<<
@@ -6226,7 +6248,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1f___get__(struct __pyx_obj_5_cdec_TRule
   for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
     __pyx_v_i = __pyx_t_3;
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":83
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":83
  *             cdef int idx = 0
  *             for i in range(f_.size()):
  *                 w = f_[0][i]             # <<<<<<<<<<<<<<
@@ -6235,7 +6257,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1f___get__(struct __pyx_obj_5_cdec_TRule
  */
     __pyx_v_w = ((__pyx_v_f_[0])[__pyx_v_i]);
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":84
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":84
  *             for i in range(f_.size()):
  *                 w = f_[0][i]
  *                 if w < 0:             # <<<<<<<<<<<<<<
@@ -6245,7 +6267,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1f___get__(struct __pyx_obj_5_cdec_TRule
     __pyx_t_4 = (__pyx_v_w < 0);
     if (__pyx_t_4) {
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":85
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":85
  *                 w = f_[0][i]
  *                 if w < 0:
  *                     idx += 1             # <<<<<<<<<<<<<<
@@ -6254,7 +6276,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1f___get__(struct __pyx_obj_5_cdec_TRule
  */
       __pyx_v_idx = (__pyx_v_idx + 1);
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":86
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":86
  *                 if w < 0:
  *                     idx += 1
  *                     f.append(NT(TDConvert(-w).c_str(), idx))             # <<<<<<<<<<<<<<
@@ -6284,7 +6306,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1f___get__(struct __pyx_obj_5_cdec_TRule
     }
     /*else*/ {
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":88
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":88
  *                     f.append(NT(TDConvert(-w).c_str(), idx))
  *                 else:
  *                     f.append(unicode(TDConvert(w).c_str(), encoding='utf8'))             # <<<<<<<<<<<<<<
@@ -6313,7 +6335,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1f___get__(struct __pyx_obj_5_cdec_TRule
     __pyx_L5:;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":89
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":89
  *                 else:
  *                     f.append(unicode(TDConvert(w).c_str(), encoding='utf8'))
  *             return f             # <<<<<<<<<<<<<<
@@ -6351,7 +6373,7 @@ static int __pyx_pw_5_cdec_5TRule_1f_3__set__(PyObject *__pyx_v_self, PyObject *
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":91
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":91
  *             return f
  * 
  *         def __set__(self, f):             # <<<<<<<<<<<<<<
@@ -6369,15 +6391,15 @@ static int __pyx_pf_5_cdec_5TRule_1f_2__set__(struct __pyx_obj_5_cdec_TRule *__p
   Py_ssize_t __pyx_t_1;
   unsigned int __pyx_t_2;
   PyObject *__pyx_t_3 = NULL;
-  PyObject *__pyx_t_4 = NULL;
-  int __pyx_t_5;
-  char *__pyx_t_6;
+  int __pyx_t_4;
+  char *__pyx_t_5;
+  PyObject *__pyx_t_6 = NULL;
   int __pyx_lineno = 0;
   const char *__pyx_filename = NULL;
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__set__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":92
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":92
  * 
  *         def __set__(self, f):
  *             cdef vector[WordID]* f_ = &self.rule.get().f_             # <<<<<<<<<<<<<<
@@ -6386,7 +6408,7 @@ static int __pyx_pf_5_cdec_5TRule_1f_2__set__(struct __pyx_obj_5_cdec_TRule *__p
  */
   __pyx_v_f_ = (&__pyx_v_self->rule->get()->f_);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":93
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":93
  *         def __set__(self, f):
  *             cdef vector[WordID]* f_ = &self.rule.get().f_
  *             f_.resize(len(f))             # <<<<<<<<<<<<<<
@@ -6396,7 +6418,7 @@ static int __pyx_pf_5_cdec_5TRule_1f_2__set__(struct __pyx_obj_5_cdec_TRule *__p
   __pyx_t_1 = PyObject_Length(__pyx_v_f); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 93; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_v_f_->resize(__pyx_t_1);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":95
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":95
  *             f_.resize(len(f))
  *             cdef unsigned i
  *             cdef int idx = 0             # <<<<<<<<<<<<<<
@@ -6405,7 +6427,7 @@ static int __pyx_pf_5_cdec_5TRule_1f_2__set__(struct __pyx_obj_5_cdec_TRule *__p
  */
   __pyx_v_idx = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":96
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":96
  *             cdef unsigned i
  *             cdef int idx = 0
  *             for i in range(len(f)):             # <<<<<<<<<<<<<<
@@ -6416,7 +6438,7 @@ static int __pyx_pf_5_cdec_5TRule_1f_2__set__(struct __pyx_obj_5_cdec_TRule *__p
   for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
     __pyx_v_i = __pyx_t_2;
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":97
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":97
  *             cdef int idx = 0
  *             for i in range(len(f)):
  *                 if isinstance(f[i], NT):             # <<<<<<<<<<<<<<
@@ -6425,54 +6447,51 @@ static int __pyx_pf_5_cdec_5TRule_1f_2__set__(struct __pyx_obj_5_cdec_TRule *__p
  */
     __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_f, __pyx_v_i, sizeof(unsigned int)+1, PyLong_FromUnsignedLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
-    __pyx_t_4 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_NT));
-    __Pyx_INCREF(__pyx_t_4);
-    __pyx_t_5 = __Pyx_TypeCheck(__pyx_t_3, __pyx_t_4); 
+    __pyx_t_4 = __Pyx_TypeCheck(__pyx_t_3, ((PyObject*)__pyx_ptype_5_cdec_NT)); 
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
-    if (__pyx_t_5) {
+    if (__pyx_t_4) {
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":98
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":98
  *             for i in range(len(f)):
  *                 if isinstance(f[i], NT):
  *                     f_[0][i] = -TDConvert((<NT> f[i]).cat)             # <<<<<<<<<<<<<<
  *                 else:
  *                     fi = as_str(f[i])
  */
-      __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_f, __pyx_v_i, sizeof(unsigned int)+1, PyLong_FromUnsignedLong); if (!__pyx_t_4) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 98; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_GOTREF(__pyx_t_4);
-      __pyx_t_6 = PyBytes_AsString(((PyObject *)((struct __pyx_obj_5_cdec_NT *)__pyx_t_4)->cat)); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 98; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
-      ((__pyx_v_f_[0])[__pyx_v_i]) = (-TD::Convert(__pyx_t_6));
+      __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_f, __pyx_v_i, sizeof(unsigned int)+1, PyLong_FromUnsignedLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 98; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_5 = PyBytes_AsString(((PyObject *)((struct __pyx_obj_5_cdec_NT *)__pyx_t_3)->cat)); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 98; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      ((__pyx_v_f_[0])[__pyx_v_i]) = (-TD::Convert(__pyx_t_5));
       goto __pyx_L5;
     }
     /*else*/ {
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":100
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":100
  *                     f_[0][i] = -TDConvert((<NT> f[i]).cat)
  *                 else:
  *                     fi = as_str(f[i])             # <<<<<<<<<<<<<<
  *                     f_[0][i] = TDConvert(fi)
  * 
  */
-      __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_f, __pyx_v_i, sizeof(unsigned int)+1, PyLong_FromUnsignedLong); if (!__pyx_t_4) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_GOTREF(__pyx_t_4);
-      __pyx_t_3 = ((PyObject *)__pyx_f_5_cdec_as_str(__pyx_t_4, NULL)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_f, __pyx_v_i, sizeof(unsigned int)+1, PyLong_FromUnsignedLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(__pyx_t_3);
-      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      __pyx_t_6 = ((PyObject *)__pyx_f_5_cdec_as_str(__pyx_t_3, NULL)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_6);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
       __Pyx_XDECREF(((PyObject *)__pyx_v_fi));
-      __pyx_v_fi = ((PyObject*)__pyx_t_3);
-      __pyx_t_3 = 0;
+      __pyx_v_fi = ((PyObject*)__pyx_t_6);
+      __pyx_t_6 = 0;
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":101
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":101
  *                 else:
  *                     fi = as_str(f[i])
  *                     f_[0][i] = TDConvert(fi)             # <<<<<<<<<<<<<<
  * 
  *     property e:
  */
-      __pyx_t_6 = PyBytes_AsString(((PyObject *)__pyx_v_fi)); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      ((__pyx_v_f_[0])[__pyx_v_i]) = TD::Convert(__pyx_t_6);
+      __pyx_t_5 = PyBytes_AsString(((PyObject *)__pyx_v_fi)); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      ((__pyx_v_f_[0])[__pyx_v_i]) = TD::Convert(__pyx_t_5);
     }
     __pyx_L5:;
   }
@@ -6481,7 +6500,7 @@ static int __pyx_pf_5_cdec_5TRule_1f_2__set__(struct __pyx_obj_5_cdec_TRule *__p
   goto __pyx_L0;
   __pyx_L1_error:;
   __Pyx_XDECREF(__pyx_t_3);
-  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_6);
   __Pyx_AddTraceback("_cdec.TRule.f.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __pyx_r = -1;
   __pyx_L0:;
@@ -6501,7 +6520,7 @@ static PyObject *__pyx_pw_5_cdec_5TRule_1e_1__get__(PyObject *__pyx_v_self) {
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":104
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":104
  * 
  *     property e:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -6528,7 +6547,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1e___get__(struct __pyx_obj_5_cdec_TRule
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":105
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":105
  *     property e:
  *         def __get__(self):
  *             cdef vector[WordID]* e_ = &self.rule.get().e_             # <<<<<<<<<<<<<<
@@ -6537,7 +6556,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1e___get__(struct __pyx_obj_5_cdec_TRule
  */
   __pyx_v_e_ = (&__pyx_v_self->rule->get()->e_);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":107
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":107
  *             cdef vector[WordID]* e_ = &self.rule.get().e_
  *             cdef WordID w
  *             cdef e = []             # <<<<<<<<<<<<<<
@@ -6549,7 +6568,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1e___get__(struct __pyx_obj_5_cdec_TRule
   __pyx_v_e = ((PyObject *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":109
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":109
  *             cdef e = []
  *             cdef unsigned i
  *             cdef int idx = 0             # <<<<<<<<<<<<<<
@@ -6558,7 +6577,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1e___get__(struct __pyx_obj_5_cdec_TRule
  */
   __pyx_v_idx = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":110
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":110
  *             cdef unsigned i
  *             cdef int idx = 0
  *             for i in range(e_.size()):             # <<<<<<<<<<<<<<
@@ -6569,7 +6588,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1e___get__(struct __pyx_obj_5_cdec_TRule
   for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
     __pyx_v_i = __pyx_t_3;
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":111
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":111
  *             cdef int idx = 0
  *             for i in range(e_.size()):
  *                 w = e_[0][i]             # <<<<<<<<<<<<<<
@@ -6578,7 +6597,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1e___get__(struct __pyx_obj_5_cdec_TRule
  */
     __pyx_v_w = ((__pyx_v_e_[0])[__pyx_v_i]);
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":112
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":112
  *             for i in range(e_.size()):
  *                 w = e_[0][i]
  *                 if w < 1:             # <<<<<<<<<<<<<<
@@ -6588,7 +6607,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1e___get__(struct __pyx_obj_5_cdec_TRule
     __pyx_t_4 = (__pyx_v_w < 1);
     if (__pyx_t_4) {
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":113
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":113
  *                 w = e_[0][i]
  *                 if w < 1:
  *                     idx += 1             # <<<<<<<<<<<<<<
@@ -6597,7 +6616,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1e___get__(struct __pyx_obj_5_cdec_TRule
  */
       __pyx_v_idx = (__pyx_v_idx + 1);
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":114
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":114
  *                 if w < 1:
  *                     idx += 1
  *                     e.append(NTRef(1-w))             # <<<<<<<<<<<<<<
@@ -6622,7 +6641,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1e___get__(struct __pyx_obj_5_cdec_TRule
     }
     /*else*/ {
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":116
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":116
  *                     e.append(NTRef(1-w))
  *                 else:
  *                     e.append(unicode(TDConvert(w).c_str(), encoding='utf8'))             # <<<<<<<<<<<<<<
@@ -6651,7 +6670,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_1e___get__(struct __pyx_obj_5_cdec_TRule
     __pyx_L5:;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":117
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":117
  *                 else:
  *                     e.append(unicode(TDConvert(w).c_str(), encoding='utf8'))
  *             return e             # <<<<<<<<<<<<<<
@@ -6689,7 +6708,7 @@ static int __pyx_pw_5_cdec_5TRule_1e_3__set__(PyObject *__pyx_v_self, PyObject *
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":119
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":119
  *             return e
  * 
  *         def __set__(self, e):             # <<<<<<<<<<<<<<
@@ -6706,8 +6725,8 @@ static int __pyx_pf_5_cdec_5TRule_1e_2__set__(struct __pyx_obj_5_cdec_TRule *__p
   Py_ssize_t __pyx_t_1;
   unsigned int __pyx_t_2;
   PyObject *__pyx_t_3 = NULL;
-  PyObject *__pyx_t_4 = NULL;
-  int __pyx_t_5;
+  int __pyx_t_4;
+  PyObject *__pyx_t_5 = NULL;
   WordID __pyx_t_6;
   char *__pyx_t_7;
   int __pyx_lineno = 0;
@@ -6715,7 +6734,7 @@ static int __pyx_pf_5_cdec_5TRule_1e_2__set__(struct __pyx_obj_5_cdec_TRule *__p
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__set__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":120
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":120
  * 
  *         def __set__(self, e):
  *             cdef vector[WordID]* e_ = &self.rule.get().e_             # <<<<<<<<<<<<<<
@@ -6724,7 +6743,7 @@ static int __pyx_pf_5_cdec_5TRule_1e_2__set__(struct __pyx_obj_5_cdec_TRule *__p
  */
   __pyx_v_e_ = (&__pyx_v_self->rule->get()->e_);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":121
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":121
  *         def __set__(self, e):
  *             cdef vector[WordID]* e_ = &self.rule.get().e_
  *             e_.resize(len(e))             # <<<<<<<<<<<<<<
@@ -6734,7 +6753,7 @@ static int __pyx_pf_5_cdec_5TRule_1e_2__set__(struct __pyx_obj_5_cdec_TRule *__p
   __pyx_t_1 = PyObject_Length(__pyx_v_e); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_v_e_->resize(__pyx_t_1);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":123
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":123
  *             e_.resize(len(e))
  *             cdef unsigned i
  *             for i in range(len(e)):             # <<<<<<<<<<<<<<
@@ -6745,7 +6764,7 @@ static int __pyx_pf_5_cdec_5TRule_1e_2__set__(struct __pyx_obj_5_cdec_TRule *__p
   for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
     __pyx_v_i = __pyx_t_2;
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":124
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":124
  *             cdef unsigned i
  *             for i in range(len(e)):
  *                 if isinstance(e[i], NTRef):             # <<<<<<<<<<<<<<
@@ -6754,52 +6773,49 @@ static int __pyx_pf_5_cdec_5TRule_1e_2__set__(struct __pyx_obj_5_cdec_TRule *__p
  */
     __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_e, __pyx_v_i, sizeof(unsigned int)+1, PyLong_FromUnsignedLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
-    __pyx_t_4 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_NTRef));
-    __Pyx_INCREF(__pyx_t_4);
-    __pyx_t_5 = __Pyx_TypeCheck(__pyx_t_3, __pyx_t_4); 
+    __pyx_t_4 = __Pyx_TypeCheck(__pyx_t_3, ((PyObject*)__pyx_ptype_5_cdec_NTRef)); 
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
-    if (__pyx_t_5) {
+    if (__pyx_t_4) {
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":125
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":125
  *             for i in range(len(e)):
  *                 if isinstance(e[i], NTRef):
  *                     e_[0][i] = 1-e[i].ref             # <<<<<<<<<<<<<<
  *                 else:
  *                     ei = as_str(e[i])
  */
-      __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_e, __pyx_v_i, sizeof(unsigned int)+1, PyLong_FromUnsignedLong); if (!__pyx_t_4) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_GOTREF(__pyx_t_4);
-      __pyx_t_3 = PyObject_GetAttr(__pyx_t_4, __pyx_n_s__ref); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_e, __pyx_v_i, sizeof(unsigned int)+1, PyLong_FromUnsignedLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(__pyx_t_3);
-      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
-      __pyx_t_4 = PyNumber_Subtract(__pyx_int_1, __pyx_t_3); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_GOTREF(__pyx_t_4);
+      __pyx_t_5 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__ref); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_3 = PyNumber_Subtract(__pyx_int_1, __pyx_t_5); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      __pyx_t_6 = __Pyx_PyInt_from_py_WordID(__pyx_t_3); if (unlikely((__pyx_t_6 == (WordID)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-      __pyx_t_6 = __Pyx_PyInt_from_py_WordID(__pyx_t_4); if (unlikely((__pyx_t_6 == (WordID)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
       ((__pyx_v_e_[0])[__pyx_v_i]) = __pyx_t_6;
       goto __pyx_L5;
     }
     /*else*/ {
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":127
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":127
  *                     e_[0][i] = 1-e[i].ref
  *                 else:
  *                     ei = as_str(e[i])             # <<<<<<<<<<<<<<
  *                     e_[0][i] = TDConvert(ei)
  * 
  */
-      __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_e, __pyx_v_i, sizeof(unsigned int)+1, PyLong_FromUnsignedLong); if (!__pyx_t_4) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_GOTREF(__pyx_t_4);
-      __pyx_t_3 = ((PyObject *)__pyx_f_5_cdec_as_str(__pyx_t_4, NULL)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_e, __pyx_v_i, sizeof(unsigned int)+1, PyLong_FromUnsignedLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(__pyx_t_3);
-      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      __pyx_t_5 = ((PyObject *)__pyx_f_5_cdec_as_str(__pyx_t_3, NULL)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
       __Pyx_XDECREF(((PyObject *)__pyx_v_ei));
-      __pyx_v_ei = ((PyObject*)__pyx_t_3);
-      __pyx_t_3 = 0;
+      __pyx_v_ei = ((PyObject*)__pyx_t_5);
+      __pyx_t_5 = 0;
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":128
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":128
  *                 else:
  *                     ei = as_str(e[i])
  *                     e_[0][i] = TDConvert(ei)             # <<<<<<<<<<<<<<
@@ -6816,7 +6832,7 @@ static int __pyx_pf_5_cdec_5TRule_1e_2__set__(struct __pyx_obj_5_cdec_TRule *__p
   goto __pyx_L0;
   __pyx_L1_error:;
   __Pyx_XDECREF(__pyx_t_3);
-  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_5);
   __Pyx_AddTraceback("_cdec.TRule.e.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __pyx_r = -1;
   __pyx_L0:;
@@ -6837,7 +6853,7 @@ static PyObject *__pyx_pw_5_cdec_5TRule_1a_1__get__(PyObject *__pyx_v_self) {
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":131
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":131
  * 
  *     property a:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -6902,7 +6918,7 @@ static PyObject *__pyx_gb_5_cdec_5TRule_1a_2generator2(__pyx_GeneratorObject *__
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":133
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":133
  *         def __get__(self):
  *             cdef unsigned i
  *             cdef vector[grammar.AlignmentPoint]* a = &self.rule.get().a_             # <<<<<<<<<<<<<<
@@ -6911,7 +6927,7 @@ static PyObject *__pyx_gb_5_cdec_5TRule_1a_2generator2(__pyx_GeneratorObject *__
  */
   __pyx_cur_scope->__pyx_v_a = (&__pyx_cur_scope->__pyx_v_self->rule->get()->a_);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":134
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":134
  *             cdef unsigned i
  *             cdef vector[grammar.AlignmentPoint]* a = &self.rule.get().a_
  *             for i in range(a.size()):             # <<<<<<<<<<<<<<
@@ -6922,7 +6938,7 @@ static PyObject *__pyx_gb_5_cdec_5TRule_1a_2generator2(__pyx_GeneratorObject *__
   for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
     __pyx_cur_scope->__pyx_v_i = __pyx_t_2;
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":135
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":135
  *             cdef vector[grammar.AlignmentPoint]* a = &self.rule.get().a_
  *             for i in range(a.size()):
  *                 yield (a[0][i].s_, a[0][i].t_)             # <<<<<<<<<<<<<<
@@ -6981,7 +6997,7 @@ static int __pyx_pw_5_cdec_5TRule_1a_4__set__(PyObject *__pyx_v_self, PyObject *
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":137
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":137
  *                 yield (a[0][i].s_, a[0][i].t_)
  * 
  *         def __set__(self, a):             # <<<<<<<<<<<<<<
@@ -7010,7 +7026,7 @@ static int __pyx_pf_5_cdec_5TRule_1a_3__set__(struct __pyx_obj_5_cdec_TRule *__p
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__set__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":138
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":138
  * 
  *         def __set__(self, a):
  *             cdef vector[grammar.AlignmentPoint]* a_ = &self.rule.get().a_             # <<<<<<<<<<<<<<
@@ -7019,7 +7035,7 @@ static int __pyx_pf_5_cdec_5TRule_1a_3__set__(struct __pyx_obj_5_cdec_TRule *__p
  */
   __pyx_v_a_ = (&__pyx_v_self->rule->get()->a_);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":139
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":139
  *         def __set__(self, a):
  *             cdef vector[grammar.AlignmentPoint]* a_ = &self.rule.get().a_
  *             a_.resize(len(a))             # <<<<<<<<<<<<<<
@@ -7029,7 +7045,7 @@ static int __pyx_pf_5_cdec_5TRule_1a_3__set__(struct __pyx_obj_5_cdec_TRule *__p
   __pyx_t_1 = PyObject_Length(__pyx_v_a); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_v_a_->resize(__pyx_t_1);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":142
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":142
  *             cdef unsigned i
  *             cdef int s, t
  *             for i in range(len(a)):             # <<<<<<<<<<<<<<
@@ -7040,7 +7056,7 @@ static int __pyx_pf_5_cdec_5TRule_1a_3__set__(struct __pyx_obj_5_cdec_TRule *__p
   for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
     __pyx_v_i = __pyx_t_2;
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":143
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":143
  *             cdef int s, t
  *             for i in range(len(a)):
  *                 s, t = a[i]             # <<<<<<<<<<<<<<
@@ -7073,7 +7089,9 @@ static int __pyx_pf_5_cdec_5TRule_1a_3__set__(struct __pyx_obj_5_cdec_TRule *__p
       __Pyx_INCREF(__pyx_t_5);
       #else
       __pyx_t_4 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 143; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_4);
       __pyx_t_5 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 143; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
       #endif
       __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
     } else
@@ -7105,7 +7123,7 @@ static int __pyx_pf_5_cdec_5TRule_1a_3__set__(struct __pyx_obj_5_cdec_TRule *__p
     __pyx_v_s = __pyx_t_8;
     __pyx_v_t = __pyx_t_9;
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":144
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":144
  *             for i in range(len(a)):
  *                 s, t = a[i]
  *                 a_[0][i] = grammar.AlignmentPoint(s, t)             # <<<<<<<<<<<<<<
@@ -7140,7 +7158,7 @@ static PyObject *__pyx_pw_5_cdec_5TRule_6scores_1__get__(PyObject *__pyx_v_self)
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":147
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":147
  * 
  *     property scores:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -7158,7 +7176,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_6scores___get__(struct __pyx_obj_5_cdec_
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":148
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":148
  *     property scores:
  *         def __get__(self):
  *             cdef SparseVector scores = SparseVector.__new__(SparseVector)             # <<<<<<<<<<<<<<
@@ -7171,7 +7189,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_6scores___get__(struct __pyx_obj_5_cdec_
   __pyx_v_scores = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":149
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":149
  *         def __get__(self):
  *             cdef SparseVector scores = SparseVector.__new__(SparseVector)
  *             scores.vector = new FastSparseVector[double](self.rule.get().scores_)             # <<<<<<<<<<<<<<
@@ -7180,7 +7198,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_6scores___get__(struct __pyx_obj_5_cdec_
  */
   __pyx_v_scores->vector = new FastSparseVector<double>(__pyx_v_self->rule->get()->scores_);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":150
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":150
  *             cdef SparseVector scores = SparseVector.__new__(SparseVector)
  *             scores.vector = new FastSparseVector[double](self.rule.get().scores_)
  *             return scores             # <<<<<<<<<<<<<<
@@ -7216,7 +7234,7 @@ static int __pyx_pw_5_cdec_5TRule_6scores_3__set__(PyObject *__pyx_v_self, PyObj
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":152
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":152
  *             return scores
  * 
  *         def __set__(self, scores):             # <<<<<<<<<<<<<<
@@ -7248,7 +7266,7 @@ static int __pyx_pf_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_5_cdec_TRule
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__set__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":153
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":153
  * 
  *         def __set__(self, scores):
  *             cdef FastSparseVector[double]* scores_ = &self.rule.get().scores_             # <<<<<<<<<<<<<<
@@ -7257,7 +7275,7 @@ static int __pyx_pf_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_5_cdec_TRule
  */
   __pyx_v_scores_ = (&__pyx_v_self->rule->get()->scores_);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":154
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":154
  *         def __set__(self, scores):
  *             cdef FastSparseVector[double]* scores_ = &self.rule.get().scores_
  *             scores_.clear()             # <<<<<<<<<<<<<<
@@ -7266,7 +7284,7 @@ static int __pyx_pf_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_5_cdec_TRule
  */
   __pyx_v_scores_->clear();
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":157
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":157
  *             cdef int fid
  *             cdef float fval
  *             for fname, fval in scores.items():             # <<<<<<<<<<<<<<
@@ -7337,7 +7355,9 @@ static int __pyx_pf_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_5_cdec_TRule
       __Pyx_INCREF(__pyx_t_6);
       #else
       __pyx_t_5 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
       __pyx_t_6 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_6);
       #endif
       __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
     } else
@@ -7369,7 +7389,7 @@ static int __pyx_pf_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_5_cdec_TRule
     __pyx_t_5 = 0;
     __pyx_v_fval = __pyx_t_9;
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":158
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":158
  *             cdef float fval
  *             for fname, fval in scores.items():
  *                 fn = as_str(fname)             # <<<<<<<<<<<<<<
@@ -7382,7 +7402,7 @@ static int __pyx_pf_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_5_cdec_TRule
     __pyx_v_fn = ((PyObject*)__pyx_t_2);
     __pyx_t_2 = 0;
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":159
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":159
  *             for fname, fval in scores.items():
  *                 fn = as_str(fname)
  *                 fid = FDConvert(fn)             # <<<<<<<<<<<<<<
@@ -7392,7 +7412,7 @@ static int __pyx_pf_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_5_cdec_TRule
     __pyx_t_10 = PyBytes_AsString(((PyObject *)__pyx_v_fn)); if (unlikely((!__pyx_t_10) && PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __pyx_v_fid = FD::Convert(__pyx_t_10);
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":160
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":160
  *                 fn = as_str(fname)
  *                 fid = FDConvert(fn)
  *                 if fid < 0: raise KeyError(fname)             # <<<<<<<<<<<<<<
@@ -7416,7 +7436,7 @@ static int __pyx_pf_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_5_cdec_TRule
     }
     __pyx_L7:;
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":161
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":161
  *                 fid = FDConvert(fn)
  *                 if fid < 0: raise KeyError(fname)
  *                 scores_.set_value(fid, fval)             # <<<<<<<<<<<<<<
@@ -7455,7 +7475,7 @@ static PyObject *__pyx_pw_5_cdec_5TRule_3lhs_1__get__(PyObject *__pyx_v_self) {
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":164
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":164
  * 
  *     property lhs:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -7473,7 +7493,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_3lhs___get__(struct __pyx_obj_5_cdec_TRu
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":165
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":165
  *     property lhs:
  *         def __get__(self):
  *             return NT(TDConvert(-self.rule.get().lhs_).c_str())             # <<<<<<<<<<<<<<
@@ -7519,7 +7539,7 @@ static int __pyx_pw_5_cdec_5TRule_3lhs_3__set__(PyObject *__pyx_v_self, PyObject
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":167
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":167
  *             return NT(TDConvert(-self.rule.get().lhs_).c_str())
  * 
  *         def __set__(self, lhs):             # <<<<<<<<<<<<<<
@@ -7530,9 +7550,9 @@ static int __pyx_pw_5_cdec_5TRule_3lhs_3__set__(PyObject *__pyx_v_self, PyObject
 static int __pyx_pf_5_cdec_5TRule_3lhs_2__set__(struct __pyx_obj_5_cdec_TRule *__pyx_v_self, PyObject *__pyx_v_lhs) {
   int __pyx_r;
   __Pyx_RefNannyDeclarations
-  PyObject *__pyx_t_1 = NULL;
+  int __pyx_t_1;
   int __pyx_t_2;
-  int __pyx_t_3;
+  PyObject *__pyx_t_3 = NULL;
   PyObject *__pyx_t_4 = NULL;
   char *__pyx_t_5;
   int __pyx_lineno = 0;
@@ -7541,35 +7561,32 @@ static int __pyx_pf_5_cdec_5TRule_3lhs_2__set__(struct __pyx_obj_5_cdec_TRule *_
   __Pyx_RefNannySetupContext("__set__", 0);
   __Pyx_INCREF(__pyx_v_lhs);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":168
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":168
  * 
  *         def __set__(self, lhs):
  *             if not isinstance(lhs, NT):             # <<<<<<<<<<<<<<
  *                 lhs = NT(lhs)
  *             self.rule.get().lhs_ = -TDConvert((<NT> lhs).cat)
  */
-  __pyx_t_1 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_NT));
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_lhs, __pyx_t_1); 
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  __pyx_t_3 = (!__pyx_t_2);
-  if (__pyx_t_3) {
+  __pyx_t_1 = __Pyx_TypeCheck(__pyx_v_lhs, ((PyObject*)__pyx_ptype_5_cdec_NT)); 
+  __pyx_t_2 = (!__pyx_t_1);
+  if (__pyx_t_2) {
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":169
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":169
  *         def __set__(self, lhs):
  *             if not isinstance(lhs, NT):
  *                 lhs = NT(lhs)             # <<<<<<<<<<<<<<
  *             self.rule.get().lhs_ = -TDConvert((<NT> lhs).cat)
  * 
  */
-    __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 169; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
+    __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 169; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_3);
     __Pyx_INCREF(__pyx_v_lhs);
-    PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_lhs);
+    PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_lhs);
     __Pyx_GIVEREF(__pyx_v_lhs);
-    __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_NT)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 169; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_NT)), ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 169; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_4);
-    __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+    __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
     __Pyx_DECREF(__pyx_v_lhs);
     __pyx_v_lhs = __pyx_t_4;
     __pyx_t_4 = 0;
@@ -7577,7 +7594,7 @@ static int __pyx_pf_5_cdec_5TRule_3lhs_2__set__(struct __pyx_obj_5_cdec_TRule *_
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":170
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":170
  *             if not isinstance(lhs, NT):
  *                 lhs = NT(lhs)
  *             self.rule.get().lhs_ = -TDConvert((<NT> lhs).cat)             # <<<<<<<<<<<<<<
@@ -7590,7 +7607,7 @@ static int __pyx_pf_5_cdec_5TRule_3lhs_2__set__(struct __pyx_obj_5_cdec_TRule *_
   __pyx_r = 0;
   goto __pyx_L0;
   __pyx_L1_error:;
-  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_3);
   __Pyx_XDECREF(__pyx_t_4);
   __Pyx_AddTraceback("_cdec.TRule.lhs.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __pyx_r = -1;
@@ -7612,7 +7629,7 @@ static PyObject *__pyx_pw_5_cdec_5TRule_5__str__(PyObject *__pyx_v_self) {
 }
 static PyObject *__pyx_gb_5_cdec_5TRule_7__str___2generator19(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":173
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":173
  * 
  *     def __str__(self):
  *         scores = ' '.join('%s=%s' % feat for feat in self.scores)             # <<<<<<<<<<<<<<
@@ -7754,7 +7771,7 @@ static PyObject *__pyx_gb_5_cdec_5TRule_7__str___2generator19(__pyx_GeneratorObj
   return NULL;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":172
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":172
  *             self.rule.get().lhs_ = -TDConvert((<NT> lhs).cat)
  * 
  *     def __str__(self):             # <<<<<<<<<<<<<<
@@ -7786,7 +7803,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_4__str__(struct __pyx_obj_5_cdec_TRule *
   __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self);
   __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":173
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":173
  * 
  *     def __str__(self):
  *         scores = ' '.join('%s=%s' % feat for feat in self.scores)             # <<<<<<<<<<<<<<
@@ -7809,7 +7826,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_4__str__(struct __pyx_obj_5_cdec_TRule *
   __pyx_v_scores = __pyx_t_2;
   __pyx_t_2 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":174
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":174
  *     def __str__(self):
  *         scores = ' '.join('%s=%s' % feat for feat in self.scores)
  *         return '%s ||| %s ||| %s ||| %s' % (self.lhs,             # <<<<<<<<<<<<<<
@@ -7820,7 +7837,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_4__str__(struct __pyx_obj_5_cdec_TRule *
   __pyx_t_2 = PyObject_GetAttr(((PyObject *)__pyx_cur_scope->__pyx_v_self), __pyx_n_s__lhs); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":175
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":175
  *         scores = ' '.join('%s=%s' % feat for feat in self.scores)
  *         return '%s ||| %s ||| %s ||| %s' % (self.lhs,
  *                 _phrase(self.f), _phrase(self.e), scores)             # <<<<<<<<<<<<<<
@@ -7961,7 +7978,7 @@ static int __pyx_pw_5_cdec_5MRule_1__init__(PyObject *__pyx_v_self, PyObject *__
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":178
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":178
  * 
  * cdef class MRule(TRule):
  *     def __init__(self, lhs, rhs, scores):             # <<<<<<<<<<<<<<
@@ -7987,7 +8004,7 @@ static int __pyx_pf_5_cdec_5MRule___init__(struct __pyx_obj_5_cdec_MRule *__pyx_
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__init__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":183
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":183
  *         rhs: right hand side phrase (list of words/NT)
  *         scores: dictionary of feature scores"""
  *         cdef unsigned i = 1             # <<<<<<<<<<<<<<
@@ -7996,7 +8013,7 @@ static int __pyx_pf_5_cdec_5MRule___init__(struct __pyx_obj_5_cdec_MRule *__pyx_
  */
   __pyx_v_i = 1;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":184
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":184
  *         scores: dictionary of feature scores"""
  *         cdef unsigned i = 1
  *         e = []             # <<<<<<<<<<<<<<
@@ -8005,10 +8022,10 @@ static int __pyx_pf_5_cdec_5MRule___init__(struct __pyx_obj_5_cdec_MRule *__pyx_
  */
   __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 184; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
-  __pyx_v_e = __pyx_t_1;
+  __pyx_v_e = ((PyObject*)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":185
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":185
  *         cdef unsigned i = 1
  *         e = []
  *         for s in rhs:             # <<<<<<<<<<<<<<
@@ -8053,20 +8070,17 @@ static int __pyx_pf_5_cdec_5MRule___init__(struct __pyx_obj_5_cdec_MRule *__pyx_
     __pyx_v_s = __pyx_t_4;
     __pyx_t_4 = 0;
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":186
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":186
  *         e = []
  *         for s in rhs:
  *             if isinstance(s, NT):             # <<<<<<<<<<<<<<
  *                 e.append(NTRef(i))
  *                 i += 1
  */
-    __pyx_t_4 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_NT));
-    __Pyx_INCREF(__pyx_t_4);
-    __pyx_t_5 = __Pyx_TypeCheck(__pyx_v_s, __pyx_t_4); 
-    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+    __pyx_t_5 = __Pyx_TypeCheck(__pyx_v_s, ((PyObject*)__pyx_ptype_5_cdec_NT)); 
     if (__pyx_t_5) {
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":187
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":187
  *         for s in rhs:
  *             if isinstance(s, NT):
  *                 e.append(NTRef(i))             # <<<<<<<<<<<<<<
@@ -8086,7 +8100,7 @@ static int __pyx_pf_5_cdec_5MRule___init__(struct __pyx_obj_5_cdec_MRule *__pyx_
       __pyx_t_7 = PyList_Append(__pyx_v_e, __pyx_t_4); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 187; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":188
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":188
  *             if isinstance(s, NT):
  *                 e.append(NTRef(i))
  *                 i += 1             # <<<<<<<<<<<<<<
@@ -8098,7 +8112,7 @@ static int __pyx_pf_5_cdec_5MRule___init__(struct __pyx_obj_5_cdec_MRule *__pyx_
     }
     /*else*/ {
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":190
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":190
  *                 i += 1
  *             else:
  *                 e.append(s)             # <<<<<<<<<<<<<<
@@ -8111,7 +8125,7 @@ static int __pyx_pf_5_cdec_5MRule___init__(struct __pyx_obj_5_cdec_MRule *__pyx_
   }
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":191
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":191
  *             else:
  *                 e.append(s)
  *         super(MRule, self).__init__(lhs, rhs, e, scores, None)             # <<<<<<<<<<<<<<
@@ -8179,7 +8193,7 @@ static void __pyx_pw_5_cdec_7Grammar_1__dealloc__(PyObject *__pyx_v_self) {
   __Pyx_RefNannyFinishContext();
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":196
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":196
  *     cdef shared_ptr[grammar.Grammar]* grammar
  * 
  *     def __dealloc__(self):             # <<<<<<<<<<<<<<
@@ -8191,7 +8205,7 @@ static void __pyx_pf_5_cdec_7Grammar___dealloc__(CYTHON_UNUSED struct __pyx_obj_
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__dealloc__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":197
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":197
  * 
  *     def __dealloc__(self):
  *         del self.grammar             # <<<<<<<<<<<<<<
@@ -8215,7 +8229,7 @@ static PyObject *__pyx_pw_5_cdec_7Grammar_3__iter__(PyObject *__pyx_v_self) {
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":199
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":199
  *         del self.grammar
  * 
  *     def __iter__(self):             # <<<<<<<<<<<<<<
@@ -8278,7 +8292,7 @@ static PyObject *__pyx_gb_5_cdec_7Grammar_4generator3(__pyx_GeneratorObject *__p
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 199; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":200
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":200
  * 
  *     def __iter__(self):
  *         cdef grammar.const_GrammarIter* root = self.grammar.get().GetRoot()             # <<<<<<<<<<<<<<
@@ -8287,7 +8301,7 @@ static PyObject *__pyx_gb_5_cdec_7Grammar_4generator3(__pyx_GeneratorObject *__p
  */
   __pyx_cur_scope->__pyx_v_root = __pyx_cur_scope->__pyx_v_self->grammar->get()->GetRoot();
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":201
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":201
  *     def __iter__(self):
  *         cdef grammar.const_GrammarIter* root = self.grammar.get().GetRoot()
  *         cdef grammar.const_RuleBin* rbin = root.GetRules()             # <<<<<<<<<<<<<<
@@ -8296,7 +8310,7 @@ static PyObject *__pyx_gb_5_cdec_7Grammar_4generator3(__pyx_GeneratorObject *__p
  */
   __pyx_cur_scope->__pyx_v_rbin = __pyx_cur_scope->__pyx_v_root->GetRules();
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":204
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":204
  *         cdef TRule trule
  *         cdef unsigned i
  *         for i in range(rbin.GetNumRules()):             # <<<<<<<<<<<<<<
@@ -8307,7 +8321,7 @@ static PyObject *__pyx_gb_5_cdec_7Grammar_4generator3(__pyx_GeneratorObject *__p
   for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
     __pyx_cur_scope->__pyx_v_i = __pyx_t_2;
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":205
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":205
  *         cdef unsigned i
  *         for i in range(rbin.GetNumRules()):
  *             trule = TRule.__new__(TRule)             # <<<<<<<<<<<<<<
@@ -8323,7 +8337,7 @@ static PyObject *__pyx_gb_5_cdec_7Grammar_4generator3(__pyx_GeneratorObject *__p
     __pyx_cur_scope->__pyx_v_trule = ((struct __pyx_obj_5_cdec_TRule *)__pyx_t_3);
     __pyx_t_3 = 0;
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":206
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":206
  *         for i in range(rbin.GetNumRules()):
  *             trule = TRule.__new__(TRule)
  *             trule.rule = new shared_ptr[grammar.TRule](rbin.GetIthRule(i))             # <<<<<<<<<<<<<<
@@ -8332,7 +8346,7 @@ static PyObject *__pyx_gb_5_cdec_7Grammar_4generator3(__pyx_GeneratorObject *__p
  */
     __pyx_cur_scope->__pyx_v_trule->rule = new boost::shared_ptr<TRule>(__pyx_cur_scope->__pyx_v_rbin->GetIthRule(__pyx_cur_scope->__pyx_v_i));
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":207
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":207
  *             trule = TRule.__new__(TRule)
  *             trule.rule = new shared_ptr[grammar.TRule](rbin.GetIthRule(i))
  *             yield trule             # <<<<<<<<<<<<<<
@@ -8377,7 +8391,7 @@ static PyObject *__pyx_pw_5_cdec_7Grammar_4name_1__get__(PyObject *__pyx_v_self)
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":210
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":210
  * 
  *     property name:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -8395,7 +8409,7 @@ static PyObject *__pyx_pf_5_cdec_7Grammar_4name___get__(struct __pyx_obj_5_cdec_
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":211
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":211
  *     property name:
  *         def __get__(self):
  *             str(self.grammar.get().GetGrammarName().c_str())             # <<<<<<<<<<<<<<
@@ -8438,7 +8452,7 @@ static int __pyx_pw_5_cdec_7Grammar_4name_3__set__(PyObject *__pyx_v_self, PyObj
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":213
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":213
  *             str(self.grammar.get().GetGrammarName().c_str())
  * 
  *         def __set__(self, name):             # <<<<<<<<<<<<<<
@@ -8457,7 +8471,7 @@ static int __pyx_pf_5_cdec_7Grammar_4name_2__set__(struct __pyx_obj_5_cdec_Gramm
   __Pyx_RefNannySetupContext("__set__", 0);
   __Pyx_INCREF(__pyx_v_name);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":214
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":214
  * 
  *         def __set__(self, name):
  *             name = as_str(name)             # <<<<<<<<<<<<<<
@@ -8470,7 +8484,7 @@ static int __pyx_pf_5_cdec_7Grammar_4name_2__set__(struct __pyx_obj_5_cdec_Gramm
   __pyx_v_name = __pyx_t_1;
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":215
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":215
  *         def __set__(self, name):
  *             name = as_str(name)
  *             self.grammar.get().SetGrammarName(name)             # <<<<<<<<<<<<<<
@@ -8543,7 +8557,7 @@ static int __pyx_pw_5_cdec_11TextGrammar_1__init__(PyObject *__pyx_v_self, PyObj
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/grammar.pxi":218
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":218
  * 
  * cdef class TextGrammar(Grammar):
  *     def __init__(self, rules):             # <<<<<<<<<<<<<<
@@ -8561,14 +8575,13 @@ static int __pyx_pf_5_cdec_11TextGrammar___init__(struct __pyx_obj_5_cdec_TextGr
   PyObject *(*__pyx_t_3)(PyObject *);
   PyObject *__pyx_t_4 = NULL;
   int __pyx_t_5;
-  PyObject *__pyx_t_6 = NULL;
-  int __pyx_t_7;
+  int __pyx_t_6;
   int __pyx_lineno = 0;
   const char *__pyx_filename = NULL;
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__init__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":220
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":220
  *     def __init__(self, rules):
  *         """TextGrammar(rules) -> SCFG Grammar containing the rules."""
  *         self.grammar = new shared_ptr[grammar.Grammar](new grammar.TextGrammar())             # <<<<<<<<<<<<<<
@@ -8577,7 +8590,7 @@ static int __pyx_pf_5_cdec_11TextGrammar___init__(struct __pyx_obj_5_cdec_TextGr
  */
   __pyx_v_self->__pyx_base.grammar = new boost::shared_ptr<Grammar>(new TextGrammar());
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":221
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":221
  *         """TextGrammar(rules) -> SCFG Grammar containing the rules."""
  *         self.grammar = new shared_ptr[grammar.Grammar](new grammar.TextGrammar())
  *         cdef grammar.TextGrammar* _g = <grammar.TextGrammar*> self.grammar.get()             # <<<<<<<<<<<<<<
@@ -8586,7 +8599,7 @@ static int __pyx_pf_5_cdec_11TextGrammar___init__(struct __pyx_obj_5_cdec_TextGr
  */
   __pyx_v__g = ((TextGrammar *)__pyx_v_self->__pyx_base.grammar->get());
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":222
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":222
  *         self.grammar = new shared_ptr[grammar.Grammar](new grammar.TextGrammar())
  *         cdef grammar.TextGrammar* _g = <grammar.TextGrammar*> self.grammar.get()
  *         for trule in rules:             # <<<<<<<<<<<<<<
@@ -8631,20 +8644,17 @@ static int __pyx_pf_5_cdec_11TextGrammar___init__(struct __pyx_obj_5_cdec_TextGr
     __pyx_v_trule = __pyx_t_4;
     __pyx_t_4 = 0;
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":223
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":223
  *         cdef grammar.TextGrammar* _g = <grammar.TextGrammar*> self.grammar.get()
  *         for trule in rules:
  *             if isinstance(trule, _sa.Rule):             # <<<<<<<<<<<<<<
  *                 trule = convert_rule(trule)
  *             elif not isinstance(trule, TRule):
  */
-    __pyx_t_4 = ((PyObject *)((PyObject*)__pyx_ptype_4cdec_2sa_3_sa_Rule));
-    __Pyx_INCREF(__pyx_t_4);
-    __pyx_t_5 = __Pyx_TypeCheck(__pyx_v_trule, __pyx_t_4); 
-    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+    __pyx_t_5 = __Pyx_TypeCheck(__pyx_v_trule, ((PyObject*)__pyx_ptype_4cdec_2sa_3_sa_Rule)); 
     if (__pyx_t_5) {
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":224
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":224
  *         for trule in rules:
  *             if isinstance(trule, _sa.Rule):
  *                 trule = convert_rule(trule)             # <<<<<<<<<<<<<<
@@ -8652,47 +8662,41 @@ static int __pyx_pf_5_cdec_11TextGrammar___init__(struct __pyx_obj_5_cdec_TextGr
  *                 raise ValueError('the grammar should contain TRule objects')
  */
       if (!(likely(((__pyx_v_trule) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_trule, __pyx_ptype_4cdec_2sa_3_sa_Rule))))) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 224; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __pyx_t_4 = __pyx_v_trule;
-      __Pyx_INCREF(__pyx_t_4);
-      __pyx_t_6 = ((PyObject *)__pyx_f_5_cdec_convert_rule(((struct __pyx_obj_4cdec_2sa_3_sa_Rule *)__pyx_t_4))); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 224; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_GOTREF(__pyx_t_6);
-      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      __pyx_t_4 = ((PyObject *)__pyx_f_5_cdec_convert_rule(((struct __pyx_obj_4cdec_2sa_3_sa_Rule *)__pyx_v_trule))); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 224; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_4);
       __Pyx_DECREF(__pyx_v_trule);
-      __pyx_v_trule = __pyx_t_6;
-      __pyx_t_6 = 0;
+      __pyx_v_trule = __pyx_t_4;
+      __pyx_t_4 = 0;
       goto __pyx_L5;
     }
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":225
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":225
  *             if isinstance(trule, _sa.Rule):
  *                 trule = convert_rule(trule)
  *             elif not isinstance(trule, TRule):             # <<<<<<<<<<<<<<
  *                 raise ValueError('the grammar should contain TRule objects')
  *             _g.AddRule((<TRule> trule).rule[0])
  */
-    __pyx_t_6 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_TRule));
-    __Pyx_INCREF(__pyx_t_6);
-    __pyx_t_5 = __Pyx_TypeCheck(__pyx_v_trule, __pyx_t_6); 
-    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
-    __pyx_t_7 = (!__pyx_t_5);
-    if (__pyx_t_7) {
+    __pyx_t_5 = __Pyx_TypeCheck(__pyx_v_trule, ((PyObject*)__pyx_ptype_5_cdec_TRule)); 
+    __pyx_t_6 = (!__pyx_t_5);
+    if (__pyx_t_6) {
 
-      /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":226
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":226
  *                 trule = convert_rule(trule)
  *             elif not isinstance(trule, TRule):
  *                 raise ValueError('the grammar should contain TRule objects')             # <<<<<<<<<<<<<<
  *             _g.AddRule((<TRule> trule).rule[0])
  */
-      __pyx_t_6 = PyObject_Call(__pyx_builtin_ValueError, ((PyObject *)__pyx_k_tuple_14), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 226; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_GOTREF(__pyx_t_6);
-      __Pyx_Raise(__pyx_t_6, 0, 0, 0);
-      __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+      __pyx_t_4 = PyObject_Call(__pyx_builtin_ValueError, ((PyObject *)__pyx_k_tuple_14), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 226; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_4);
+      __Pyx_Raise(__pyx_t_4, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
       {__pyx_filename = __pyx_f[2]; __pyx_lineno = 226; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       goto __pyx_L5;
     }
     __pyx_L5:;
 
-    /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":227
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":227
  *             elif not isinstance(trule, TRule):
  *                 raise ValueError('the grammar should contain TRule objects')
  *             _g.AddRule((<TRule> trule).rule[0])             # <<<<<<<<<<<<<<
@@ -8706,7 +8710,6 @@ static int __pyx_pf_5_cdec_11TextGrammar___init__(struct __pyx_obj_5_cdec_TextGr
   __pyx_L1_error:;
   __Pyx_XDECREF(__pyx_t_1);
   __Pyx_XDECREF(__pyx_t_4);
-  __Pyx_XDECREF(__pyx_t_6);
   __Pyx_AddTraceback("_cdec.TextGrammar.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __pyx_r = -1;
   __pyx_L0:;
@@ -8724,7 +8727,7 @@ static void __pyx_pw_5_cdec_10Hypergraph_1__dealloc__(PyObject *__pyx_v_self) {
   __Pyx_RefNannyFinishContext();
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":8
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":8
  *     cdef MT19937* rng
  * 
  *     def __dealloc__(self):             # <<<<<<<<<<<<<<
@@ -8737,7 +8740,7 @@ static void __pyx_pf_5_cdec_10Hypergraph___dealloc__(struct __pyx_obj_5_cdec_Hyp
   int __pyx_t_1;
   __Pyx_RefNannySetupContext("__dealloc__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":9
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":9
  * 
  *     def __dealloc__(self):
  *         del self.hg             # <<<<<<<<<<<<<<
@@ -8746,7 +8749,7 @@ static void __pyx_pf_5_cdec_10Hypergraph___dealloc__(struct __pyx_obj_5_cdec_Hyp
  */
   delete __pyx_v_self->hg;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":10
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":10
  *     def __dealloc__(self):
  *         del self.hg
  *         if self.rng != NULL:             # <<<<<<<<<<<<<<
@@ -8756,7 +8759,7 @@ static void __pyx_pf_5_cdec_10Hypergraph___dealloc__(struct __pyx_obj_5_cdec_Hyp
   __pyx_t_1 = (__pyx_v_self->rng != NULL);
   if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":11
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":11
  *         del self.hg
  *         if self.rng != NULL:
  *             del self.rng             # <<<<<<<<<<<<<<
@@ -8771,7 +8774,7 @@ static void __pyx_pf_5_cdec_10Hypergraph___dealloc__(struct __pyx_obj_5_cdec_Hyp
   __Pyx_RefNannyFinishContext();
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":13
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":13
  *             del self.rng
  * 
  *     cdef MT19937* _rng(self):             # <<<<<<<<<<<<<<
@@ -8789,7 +8792,7 @@ static MT19937 *__pyx_f_5_cdec_10Hypergraph__rng(struct __pyx_obj_5_cdec_Hypergr
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("_rng", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":14
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":14
  * 
  *     cdef MT19937* _rng(self):
  *         if self.rng == NULL:             # <<<<<<<<<<<<<<
@@ -8799,20 +8802,25 @@ static MT19937 *__pyx_f_5_cdec_10Hypergraph__rng(struct __pyx_obj_5_cdec_Hypergr
   __pyx_t_1 = (__pyx_v_self->rng == NULL);
   if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":15
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":15
  *     cdef MT19937* _rng(self):
  *         if self.rng == NULL:
  *             self.rng = new MT19937()             # <<<<<<<<<<<<<<
  *         return self.rng
  * 
  */
-    try {__pyx_t_2 = new MT19937();} catch(...) {__Pyx_CppExn2PyErr(); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}}
+    try {
+      __pyx_t_2 = new MT19937();
+    } catch(...) {
+      __Pyx_CppExn2PyErr();
+      {__pyx_filename = __pyx_f[3]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    }
     __pyx_v_self->rng = __pyx_t_2;
     goto __pyx_L3;
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":16
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":16
  *         if self.rng == NULL:
  *             self.rng = new MT19937()
  *         return self.rng             # <<<<<<<<<<<<<<
@@ -8844,7 +8852,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_3viterbi(PyObject *__pyx_v_self, C
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":18
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":18
  *         return self.rng
  * 
  *     def viterbi(self):             # <<<<<<<<<<<<<<
@@ -8863,7 +8871,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_2viterbi(struct __pyx_obj_5_cdec_H
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("viterbi", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":21
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":21
  *         """hg.viterbi() -> String for the best hypothesis in the hypergraph."""
  *         cdef vector[WordID] trans
  *         hypergraph.ViterbiESentence(self.hg[0], &trans)             # <<<<<<<<<<<<<<
@@ -8872,7 +8880,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_2viterbi(struct __pyx_obj_5_cdec_H
  */
   ViterbiESentence((__pyx_v_self->hg[0]), (&__pyx_v_trans));
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":22
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":22
  *         cdef vector[WordID] trans
  *         hypergraph.ViterbiESentence(self.hg[0], &trans)
  *         return unicode(GetString(trans).c_str(), 'utf8')             # <<<<<<<<<<<<<<
@@ -8922,7 +8930,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_5viterbi_trees(PyObject *__pyx_v_s
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":24
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":24
  *         return unicode(GetString(trans).c_str(), 'utf8')
  * 
  *     def viterbi_trees(self):             # <<<<<<<<<<<<<<
@@ -8942,7 +8950,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_4viterbi_trees(struct __pyx_obj_5_
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("viterbi_trees", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":29
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":29
  *         e_tree: Target tree for the best hypothesis in the hypergraph.
  *         """
  *         f_tree = unicode(hypergraph.ViterbiFTree(self.hg[0]).c_str(), 'utf8')             # <<<<<<<<<<<<<<
@@ -8965,7 +8973,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_4viterbi_trees(struct __pyx_obj_5_
   __pyx_v_f_tree = ((PyObject*)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":30
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":30
  *         """
  *         f_tree = unicode(hypergraph.ViterbiFTree(self.hg[0]).c_str(), 'utf8')
  *         e_tree = unicode(hypergraph.ViterbiETree(self.hg[0]).c_str(), 'utf8')             # <<<<<<<<<<<<<<
@@ -8988,7 +8996,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_4viterbi_trees(struct __pyx_obj_5_
   __pyx_v_e_tree = ((PyObject*)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":31
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":31
  *         f_tree = unicode(hypergraph.ViterbiFTree(self.hg[0]).c_str(), 'utf8')
  *         e_tree = unicode(hypergraph.ViterbiETree(self.hg[0]).c_str(), 'utf8')
  *         return (f_tree, e_tree)             # <<<<<<<<<<<<<<
@@ -9035,7 +9043,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_7viterbi_features(PyObject *__pyx_
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":33
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":33
  *         return (f_tree, e_tree)
  * 
  *     def viterbi_features(self):             # <<<<<<<<<<<<<<
@@ -9053,7 +9061,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_6viterbi_features(struct __pyx_obj
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("viterbi_features", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":36
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":36
  *         """hg.viterbi_features() -> SparseVector with the features corresponding
  *         to the best derivation in the hypergraph."""
  *         cdef SparseVector fmap = SparseVector.__new__(SparseVector)             # <<<<<<<<<<<<<<
@@ -9066,7 +9074,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_6viterbi_features(struct __pyx_obj
   __pyx_v_fmap = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":37
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":37
  *         to the best derivation in the hypergraph."""
  *         cdef SparseVector fmap = SparseVector.__new__(SparseVector)
  *         fmap.vector = new FastSparseVector[weight_t](hypergraph.ViterbiFeatures(self.hg[0]))             # <<<<<<<<<<<<<<
@@ -9075,7 +9083,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_6viterbi_features(struct __pyx_obj
  */
   __pyx_v_fmap->vector = new FastSparseVector<weight_t>(ViterbiFeatures((__pyx_v_self->hg[0])));
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":38
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":38
  *         cdef SparseVector fmap = SparseVector.__new__(SparseVector)
  *         fmap.vector = new FastSparseVector[weight_t](hypergraph.ViterbiFeatures(self.hg[0]))
  *         return fmap             # <<<<<<<<<<<<<<
@@ -9111,7 +9119,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_9viterbi_forest(PyObject *__pyx_v_
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":40
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":40
  *         return fmap
  * 
  *     def viterbi_forest(self):             # <<<<<<<<<<<<<<
@@ -9129,7 +9137,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_8viterbi_forest(struct __pyx_obj_5
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("viterbi_forest", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":41
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":41
  * 
  *     def viterbi_forest(self):
  *         cdef Hypergraph hg = Hypergraph()             # <<<<<<<<<<<<<<
@@ -9141,7 +9149,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_8viterbi_forest(struct __pyx_obj_5
   __pyx_v_hg = ((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":42
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":42
  *     def viterbi_forest(self):
  *         cdef Hypergraph hg = Hypergraph()
  *         hg.hg = new hypergraph.Hypergraph(self.hg[0].CreateViterbiHypergraph(NULL).get()[0])             # <<<<<<<<<<<<<<
@@ -9150,7 +9158,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_8viterbi_forest(struct __pyx_obj_5
  */
   __pyx_v_hg->hg = new Hypergraph(((__pyx_v_self->hg[0]).CreateViterbiHypergraph(NULL).get()[0]));
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":43
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":43
  *         cdef Hypergraph hg = Hypergraph()
  *         hg.hg = new hypergraph.Hypergraph(self.hg[0].CreateViterbiHypergraph(NULL).get()[0])
  *         return hg             # <<<<<<<<<<<<<<
@@ -9187,7 +9195,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_11viterbi_joshua(PyObject *__pyx_v
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":45
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":45
  *         return hg
  * 
  *     def viterbi_joshua(self):             # <<<<<<<<<<<<<<
@@ -9205,7 +9213,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_10viterbi_joshua(struct __pyx_obj_
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("viterbi_joshua", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":47
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":47
  *     def viterbi_joshua(self):
  *         """hg.viterbi_joshua() -> Joshua representation of the best derivation."""
  *         return unicode(hypergraph.JoshuaVisualizationString(self.hg[0]).c_str(), 'utf8')             # <<<<<<<<<<<<<<
@@ -9256,7 +9264,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_13kbest(PyObject *__pyx_v_self, Py
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":49
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":49
  *         return unicode(hypergraph.JoshuaVisualizationString(self.hg[0]).c_str(), 'utf8')
  * 
  *     def kbest(self, size):             # <<<<<<<<<<<<<<
@@ -9324,7 +9332,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_14generator4(__pyx_GeneratorObject
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":51
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":51
  *     def kbest(self, size):
  *         """hg.kbest(size) -> List of k-best hypotheses in the hypergraph."""
  *         cdef kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal]* derivations = new kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal](self.hg[0], size)             # <<<<<<<<<<<<<<
@@ -9334,7 +9342,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_14generator4(__pyx_GeneratorObject
   __pyx_t_1 = __Pyx_PyInt_AsUnsignedInt(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_cur_scope->__pyx_v_derivations = new KBest::KBestDerivations<std::vector<WordID>,ESentenceTraversal>((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_t_1);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":54
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":54
  *         cdef kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal].Derivation* derivation
  *         cdef unsigned k
  *         try:             # <<<<<<<<<<<<<<
@@ -9343,7 +9351,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_14generator4(__pyx_GeneratorObject
  */
   /*try:*/ {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":55
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":55
  *         cdef unsigned k
  *         try:
  *             for k in range(size):             # <<<<<<<<<<<<<<
@@ -9354,7 +9362,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_14generator4(__pyx_GeneratorObject
     for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_2; __pyx_t_1+=1) {
       __pyx_cur_scope->__pyx_v_k = __pyx_t_1;
 
-      /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":56
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":56
  *         try:
  *             for k in range(size):
  *                 derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k)             # <<<<<<<<<<<<<<
@@ -9363,7 +9371,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_14generator4(__pyx_GeneratorObject
  */
       __pyx_cur_scope->__pyx_v_derivation = __pyx_cur_scope->__pyx_v_derivations->LazyKthBest((__pyx_cur_scope->__pyx_v_self->hg->nodes_.size() - 1), __pyx_cur_scope->__pyx_v_k);
 
-      /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":57
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":57
  *             for k in range(size):
  *                 derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k)
  *                 if not derivation: break             # <<<<<<<<<<<<<<
@@ -9377,7 +9385,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_14generator4(__pyx_GeneratorObject
       }
       __pyx_L9:;
 
-      /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":58
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":58
  *                 derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k)
  *                 if not derivation: break
  *                 yield unicode(GetString(derivation._yield).c_str(), 'utf8')             # <<<<<<<<<<<<<<
@@ -9414,7 +9422,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_14generator4(__pyx_GeneratorObject
     __pyx_L8_break:;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":60
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":60
  *                 yield unicode(GetString(derivation._yield).c_str(), 'utf8')
  *         finally:
  *             del derivations             # <<<<<<<<<<<<<<
@@ -9475,7 +9483,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_16kbest_trees(PyObject *__pyx_v_se
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":62
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":62
  *             del derivations
  * 
  *     def kbest_trees(self, size):             # <<<<<<<<<<<<<<
@@ -9545,7 +9553,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_17generator5(__pyx_GeneratorObject
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":64
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":64
  *     def kbest_trees(self, size):
  *         """hg.kbest_trees(size) -> List of k-best trees in the hypergraph."""
  *         cdef kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal]* f_derivations = new kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal](self.hg[0], size)             # <<<<<<<<<<<<<<
@@ -9555,7 +9563,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_17generator5(__pyx_GeneratorObject
   __pyx_t_1 = __Pyx_PyInt_AsUnsignedInt(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_cur_scope->__pyx_v_f_derivations = new KBest::KBestDerivations<std::vector<WordID>,FTreeTraversal>((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_t_1);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":66
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":66
  *         cdef kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal]* f_derivations = new kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal](self.hg[0], size)
  *         cdef kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal].Derivation* f_derivation
  *         cdef kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal]* e_derivations = new kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal](self.hg[0], size)             # <<<<<<<<<<<<<<
@@ -9565,7 +9573,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_17generator5(__pyx_GeneratorObject
   __pyx_t_1 = __Pyx_PyInt_AsUnsignedInt(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_cur_scope->__pyx_v_e_derivations = new KBest::KBestDerivations<std::vector<WordID>,ETreeTraversal>((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_t_1);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":69
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":69
  *         cdef kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal].Derivation* e_derivation
  *         cdef unsigned k
  *         try:             # <<<<<<<<<<<<<<
@@ -9574,7 +9582,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_17generator5(__pyx_GeneratorObject
  */
   /*try:*/ {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":70
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":70
  *         cdef unsigned k
  *         try:
  *             for k in range(size):             # <<<<<<<<<<<<<<
@@ -9585,7 +9593,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_17generator5(__pyx_GeneratorObject
     for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_2; __pyx_t_1+=1) {
       __pyx_cur_scope->__pyx_v_k = __pyx_t_1;
 
-      /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":71
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":71
  *         try:
  *             for k in range(size):
  *                 f_derivation = f_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k)             # <<<<<<<<<<<<<<
@@ -9594,7 +9602,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_17generator5(__pyx_GeneratorObject
  */
       __pyx_cur_scope->__pyx_v_f_derivation = __pyx_cur_scope->__pyx_v_f_derivations->LazyKthBest((__pyx_cur_scope->__pyx_v_self->hg->nodes_.size() - 1), __pyx_cur_scope->__pyx_v_k);
 
-      /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":72
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":72
  *             for k in range(size):
  *                 f_derivation = f_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k)
  *                 e_derivation = e_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k)             # <<<<<<<<<<<<<<
@@ -9603,7 +9611,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_17generator5(__pyx_GeneratorObject
  */
       __pyx_cur_scope->__pyx_v_e_derivation = __pyx_cur_scope->__pyx_v_e_derivations->LazyKthBest((__pyx_cur_scope->__pyx_v_self->hg->nodes_.size() - 1), __pyx_cur_scope->__pyx_v_k);
 
-      /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":73
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":73
  *                 f_derivation = f_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k)
  *                 e_derivation = e_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k)
  *                 if not f_derivation or not e_derivation: break             # <<<<<<<<<<<<<<
@@ -9623,7 +9631,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_17generator5(__pyx_GeneratorObject
       }
       __pyx_L9:;
 
-      /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":74
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":74
  *                 e_derivation = e_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k)
  *                 if not f_derivation or not e_derivation: break
  *                 f_tree = unicode(GetString(f_derivation._yield).c_str(), 'utf8')             # <<<<<<<<<<<<<<
@@ -9649,7 +9657,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_17generator5(__pyx_GeneratorObject
       __pyx_cur_scope->__pyx_v_f_tree = ((PyObject*)__pyx_t_6);
       __pyx_t_6 = 0;
 
-      /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":75
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":75
  *                 if not f_derivation or not e_derivation: break
  *                 f_tree = unicode(GetString(f_derivation._yield).c_str(), 'utf8')
  *                 e_tree = unicode(GetString(e_derivation._yield).c_str(), 'utf8')             # <<<<<<<<<<<<<<
@@ -9675,7 +9683,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_17generator5(__pyx_GeneratorObject
       __pyx_cur_scope->__pyx_v_e_tree = ((PyObject*)__pyx_t_6);
       __pyx_t_6 = 0;
 
-      /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":76
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":76
  *                 f_tree = unicode(GetString(f_derivation._yield).c_str(), 'utf8')
  *                 e_tree = unicode(GetString(e_derivation._yield).c_str(), 'utf8')
  *                 yield (f_tree, e_tree)             # <<<<<<<<<<<<<<
@@ -9707,7 +9715,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_17generator5(__pyx_GeneratorObject
     __pyx_L8_break:;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":78
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":78
  *                 yield (f_tree, e_tree)
  *         finally:
  *             del f_derivations             # <<<<<<<<<<<<<<
@@ -9731,7 +9739,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_17generator5(__pyx_GeneratorObject
     __pyx_L6:;
     delete __pyx_cur_scope->__pyx_v_f_derivations;
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":79
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":79
  *         finally:
  *             del f_derivations
  *             del e_derivations             # <<<<<<<<<<<<<<
@@ -9777,7 +9785,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_19kbest_features(PyObject *__pyx_v
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":81
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":81
  *             del e_derivations
  * 
  *     def kbest_features(self, size):             # <<<<<<<<<<<<<<
@@ -9844,7 +9852,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_20generator6(__pyx_GeneratorObject
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":83
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":83
  *     def kbest_features(self, size):
  *         """hg.kbest_trees(size) -> List of k-best feature vectors in the hypergraph."""
  *         cdef kbest.KBestDerivations[FastSparseVector[weight_t], kbest.FeatureVectorTraversal]* derivations = new kbest.KBestDerivations[FastSparseVector[weight_t], kbest.FeatureVectorTraversal](self.hg[0], size)             # <<<<<<<<<<<<<<
@@ -9854,7 +9862,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_20generator6(__pyx_GeneratorObject
   __pyx_t_1 = __Pyx_PyInt_AsUnsignedInt(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 83; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_cur_scope->__pyx_v_derivations = new KBest::KBestDerivations<FastSparseVector<weight_t>,FeatureVectorTraversal>((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_t_1);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":87
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":87
  *         cdef SparseVector fmap
  *         cdef unsigned k
  *         try:             # <<<<<<<<<<<<<<
@@ -9863,7 +9871,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_20generator6(__pyx_GeneratorObject
  */
   /*try:*/ {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":88
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":88
  *         cdef unsigned k
  *         try:
  *             for k in range(size):             # <<<<<<<<<<<<<<
@@ -9874,7 +9882,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_20generator6(__pyx_GeneratorObject
     for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_2; __pyx_t_1+=1) {
       __pyx_cur_scope->__pyx_v_k = __pyx_t_1;
 
-      /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":89
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":89
  *         try:
  *             for k in range(size):
  *                 derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k)             # <<<<<<<<<<<<<<
@@ -9883,7 +9891,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_20generator6(__pyx_GeneratorObject
  */
       __pyx_cur_scope->__pyx_v_derivation = __pyx_cur_scope->__pyx_v_derivations->LazyKthBest((__pyx_cur_scope->__pyx_v_self->hg->nodes_.size() - 1), __pyx_cur_scope->__pyx_v_k);
 
-      /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":90
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":90
  *             for k in range(size):
  *                 derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k)
  *                 if not derivation: break             # <<<<<<<<<<<<<<
@@ -9897,7 +9905,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_20generator6(__pyx_GeneratorObject
       }
       __pyx_L9:;
 
-      /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":91
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":91
  *                 derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k)
  *                 if not derivation: break
  *                 fmap = SparseVector.__new__(SparseVector)             # <<<<<<<<<<<<<<
@@ -9913,7 +9921,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_20generator6(__pyx_GeneratorObject
       __pyx_cur_scope->__pyx_v_fmap = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_4);
       __pyx_t_4 = 0;
 
-      /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":92
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":92
  *                 if not derivation: break
  *                 fmap = SparseVector.__new__(SparseVector)
  *                 fmap.vector = new FastSparseVector[weight_t](derivation._yield)             # <<<<<<<<<<<<<<
@@ -9922,7 +9930,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_20generator6(__pyx_GeneratorObject
  */
       __pyx_cur_scope->__pyx_v_fmap->vector = new FastSparseVector<weight_t>(__pyx_cur_scope->__pyx_v_derivation->yield);
 
-      /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":93
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":93
  *                 fmap = SparseVector.__new__(SparseVector)
  *                 fmap.vector = new FastSparseVector[weight_t](derivation._yield)
  *                 yield fmap             # <<<<<<<<<<<<<<
@@ -9946,7 +9954,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_20generator6(__pyx_GeneratorObject
     __pyx_L8_break:;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":95
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":95
  *                 yield fmap
  *         finally:
  *             del derivations             # <<<<<<<<<<<<<<
@@ -10015,7 +10023,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_22sample(PyObject *__pyx_v_self, P
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":97
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":97
  *             del derivations
  * 
  *     def sample(self, unsigned n):             # <<<<<<<<<<<<<<
@@ -10081,17 +10089,22 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_23generator7(__pyx_GeneratorObject
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":99
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":99
  *     def sample(self, unsigned n):
  *         """hg.sample(n) -> Sample of n hypotheses from the hypergraph."""
  *         cdef vector[hypergraph.Hypothesis]* hypos = new vector[hypergraph.Hypothesis]()             # <<<<<<<<<<<<<<
  *         hypergraph.sample_hypotheses(self.hg[0], n, self._rng(), hypos)
  *         cdef unsigned k
  */
-  try {__pyx_t_1 = new std::vector<HypergraphSampler::Hypothesis>();} catch(...) {__Pyx_CppExn2PyErr(); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 99; __pyx_clineno = __LINE__; goto __pyx_L1_error;}}
+  try {
+    __pyx_t_1 = new std::vector<HypergraphSampler::Hypothesis>();
+  } catch(...) {
+    __Pyx_CppExn2PyErr();
+    {__pyx_filename = __pyx_f[3]; __pyx_lineno = 99; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  }
   __pyx_cur_scope->__pyx_v_hypos = __pyx_t_1;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":100
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":100
  *         """hg.sample(n) -> Sample of n hypotheses from the hypergraph."""
  *         cdef vector[hypergraph.Hypothesis]* hypos = new vector[hypergraph.Hypothesis]()
  *         hypergraph.sample_hypotheses(self.hg[0], n, self._rng(), hypos)             # <<<<<<<<<<<<<<
@@ -10100,7 +10113,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_23generator7(__pyx_GeneratorObject
  */
   HypergraphSampler::sample_hypotheses((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_cur_scope->__pyx_v_n, ((struct __pyx_vtabstruct_5_cdec_Hypergraph *)__pyx_cur_scope->__pyx_v_self->__pyx_vtab)->_rng(__pyx_cur_scope->__pyx_v_self), __pyx_cur_scope->__pyx_v_hypos);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":102
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":102
  *         hypergraph.sample_hypotheses(self.hg[0], n, self._rng(), hypos)
  *         cdef unsigned k
  *         try:             # <<<<<<<<<<<<<<
@@ -10109,7 +10122,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_23generator7(__pyx_GeneratorObject
  */
   /*try:*/ {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":103
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":103
  *         cdef unsigned k
  *         try:
  *             for k in range(hypos.size()):             # <<<<<<<<<<<<<<
@@ -10120,7 +10133,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_23generator7(__pyx_GeneratorObject
     for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
       __pyx_cur_scope->__pyx_v_k = __pyx_t_3;
 
-      /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":104
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":104
  *         try:
  *             for k in range(hypos.size()):
  *                 yield unicode(GetString(hypos[0][k].words).c_str(), 'utf8')             # <<<<<<<<<<<<<<
@@ -10156,7 +10169,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_23generator7(__pyx_GeneratorObject
     }
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":106
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":106
  *                 yield unicode(GetString(hypos[0][k].words).c_str(), 'utf8')
  *         finally:
  *             del hypos             # <<<<<<<<<<<<<<
@@ -10227,7 +10240,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_25sample_trees(PyObject *__pyx_v_s
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":108
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":108
  *             del hypos
  * 
  *     def sample_trees(self, unsigned n):             # <<<<<<<<<<<<<<
@@ -10293,17 +10306,22 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_26generator8(__pyx_GeneratorObject
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":110
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":110
  *     def sample_trees(self, unsigned n):
  *        """hg.sample_trees(n) -> Sample of n trees from the hypergraph."""
  *        cdef vector[string]* trees = new vector[string]()             # <<<<<<<<<<<<<<
  *        hypergraph.sample_trees(self.hg[0], n, self._rng(), trees)
  *        cdef unsigned k
  */
-  try {__pyx_t_1 = new std::vector<std::string>();} catch(...) {__Pyx_CppExn2PyErr(); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 110; __pyx_clineno = __LINE__; goto __pyx_L1_error;}}
+  try {
+    __pyx_t_1 = new std::vector<std::string>();
+  } catch(...) {
+    __Pyx_CppExn2PyErr();
+    {__pyx_filename = __pyx_f[3]; __pyx_lineno = 110; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  }
   __pyx_cur_scope->__pyx_v_trees = __pyx_t_1;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":111
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":111
  *        """hg.sample_trees(n) -> Sample of n trees from the hypergraph."""
  *        cdef vector[string]* trees = new vector[string]()
  *        hypergraph.sample_trees(self.hg[0], n, self._rng(), trees)             # <<<<<<<<<<<<<<
@@ -10312,7 +10330,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_26generator8(__pyx_GeneratorObject
  */
   HypergraphSampler::sample_trees((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_cur_scope->__pyx_v_n, ((struct __pyx_vtabstruct_5_cdec_Hypergraph *)__pyx_cur_scope->__pyx_v_self->__pyx_vtab)->_rng(__pyx_cur_scope->__pyx_v_self), __pyx_cur_scope->__pyx_v_trees);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":113
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":113
  *        hypergraph.sample_trees(self.hg[0], n, self._rng(), trees)
  *        cdef unsigned k
  *        try:             # <<<<<<<<<<<<<<
@@ -10321,7 +10339,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_26generator8(__pyx_GeneratorObject
  */
   /*try:*/ {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":114
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":114
  *        cdef unsigned k
  *        try:
  *            for k in range(trees.size()):             # <<<<<<<<<<<<<<
@@ -10332,7 +10350,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_26generator8(__pyx_GeneratorObject
     for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
       __pyx_cur_scope->__pyx_v_k = __pyx_t_3;
 
-      /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":115
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":115
  *        try:
  *            for k in range(trees.size()):
  *                yield unicode(trees[0][k].c_str(), 'utf8')             # <<<<<<<<<<<<<<
@@ -10368,7 +10386,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_26generator8(__pyx_GeneratorObject
     }
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":117
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":117
  *                yield unicode(trees[0][k].c_str(), 'utf8')
  *        finally:
  *            del trees             # <<<<<<<<<<<<<<
@@ -10428,7 +10446,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_28intersect(PyObject *__pyx_v_self
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":119
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":119
  *            del trees
  * 
  *     def intersect(self, inp):             # <<<<<<<<<<<<<<
@@ -10440,28 +10458,25 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_27intersect(struct __pyx_obj_5_cde
   struct __pyx_obj_5_cdec_Lattice *__pyx_v_lat = 0;
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
-  PyObject *__pyx_t_1 = NULL;
-  int __pyx_t_2;
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
   PyObject *__pyx_t_3 = NULL;
   int __pyx_lineno = 0;
   const char *__pyx_filename = NULL;
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("intersect", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":122
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":122
  *         """hg.intersect(Lattice/string): Intersect the hypergraph with the provided reference."""
  *         cdef Lattice lat
  *         if isinstance(inp, Lattice):             # <<<<<<<<<<<<<<
  *             lat = <Lattice> inp
  *         elif isinstance(inp, basestring):
  */
-  __pyx_t_1 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Lattice));
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_inp, __pyx_t_1); 
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = __Pyx_TypeCheck(__pyx_v_inp, ((PyObject*)__pyx_ptype_5_cdec_Lattice)); 
+  if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":123
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":123
  *         cdef Lattice lat
  *         if isinstance(inp, Lattice):
  *             lat = <Lattice> inp             # <<<<<<<<<<<<<<
@@ -10473,41 +10488,38 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_27intersect(struct __pyx_obj_5_cde
     goto __pyx_L3;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":124
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":124
  *         if isinstance(inp, Lattice):
  *             lat = <Lattice> inp
  *         elif isinstance(inp, basestring):             # <<<<<<<<<<<<<<
  *             lat = Lattice(inp)
  *         else:
  */
-  __pyx_t_1 = __pyx_builtin_basestring;
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = PyObject_IsInstance(__pyx_v_inp, __pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = PyObject_IsInstance(__pyx_v_inp, __pyx_builtin_basestring); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":125
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":125
  *             lat = <Lattice> inp
  *         elif isinstance(inp, basestring):
  *             lat = Lattice(inp)             # <<<<<<<<<<<<<<
  *         else:
  *             raise TypeError('cannot intersect hypergraph with %s' % type(inp))
  */
-    __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
+    __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
     __Pyx_INCREF(__pyx_v_inp);
-    PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_inp);
+    PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_inp);
     __Pyx_GIVEREF(__pyx_v_inp);
-    __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Lattice)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Lattice)), ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
-    __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+    __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
     __pyx_v_lat = ((struct __pyx_obj_5_cdec_Lattice *)__pyx_t_3);
     __pyx_t_3 = 0;
     goto __pyx_L3;
   }
   /*else*/ {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":127
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":127
  *             lat = Lattice(inp)
  *         else:
  *             raise TypeError('cannot intersect hypergraph with %s' % type(inp))             # <<<<<<<<<<<<<<
@@ -10516,21 +10528,21 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_27intersect(struct __pyx_obj_5_cde
  */
     __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_15), ((PyObject *)Py_TYPE(__pyx_v_inp))); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(((PyObject *)__pyx_t_3));
-    __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
-    PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_t_3));
+    __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
+    PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_t_3));
     __Pyx_GIVEREF(((PyObject *)__pyx_t_3));
     __pyx_t_3 = 0;
-    __pyx_t_3 = PyObject_Call(__pyx_builtin_TypeError, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_3 = PyObject_Call(__pyx_builtin_TypeError, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
-    __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+    __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
     __Pyx_Raise(__pyx_t_3, 0, 0, 0);
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
     {__pyx_filename = __pyx_f[3]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":128
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":128
  *         else:
  *             raise TypeError('cannot intersect hypergraph with %s' % type(inp))
  *         return hypergraph.Intersect(lat.lattice[0], self.hg)             # <<<<<<<<<<<<<<
@@ -10547,7 +10559,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_27intersect(struct __pyx_obj_5_cde
   __pyx_r = Py_None; __Pyx_INCREF(Py_None);
   goto __pyx_L0;
   __pyx_L1_error:;
-  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
   __Pyx_XDECREF(__pyx_t_3);
   __Pyx_AddTraceback("_cdec.Hypergraph.intersect", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __pyx_r = NULL;
@@ -10626,7 +10638,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_30prune(PyObject *__pyx_v_self, Py
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":130
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":130
  *         return hypergraph.Intersect(lat.lattice[0], self.hg)
  * 
  *     def prune(self, beam_alpha=0, density=0, **kwargs):             # <<<<<<<<<<<<<<
@@ -10646,7 +10658,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_29prune(struct __pyx_obj_5_cdec_Hy
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("prune", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":134
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":134
  *         beam_alpha: use beam pruning
  *         density: use density pruning"""
  *         cdef hypergraph.EdgeMask* preserve_mask = NULL             # <<<<<<<<<<<<<<
@@ -10655,7 +10667,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_29prune(struct __pyx_obj_5_cdec_Hy
  */
   __pyx_v_preserve_mask = NULL;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":135
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":135
  *         density: use density pruning"""
  *         cdef hypergraph.EdgeMask* preserve_mask = NULL
  *         if 'csplit_preserve_full_word' in kwargs:             # <<<<<<<<<<<<<<
@@ -10665,7 +10677,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_29prune(struct __pyx_obj_5_cdec_Hy
   __pyx_t_1 = (__Pyx_PyDict_Contains(((PyObject *)__pyx_n_s_16), ((PyObject *)__pyx_v_kwargs), Py_EQ)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 135; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":136
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":136
  *         cdef hypergraph.EdgeMask* preserve_mask = NULL
  *         if 'csplit_preserve_full_word' in kwargs:
  *              preserve_mask = new hypergraph.EdgeMask(self.hg.edges_.size())             # <<<<<<<<<<<<<<
@@ -10674,7 +10686,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_29prune(struct __pyx_obj_5_cdec_Hy
  */
     __pyx_v_preserve_mask = new std::vector<bool>(__pyx_v_self->hg->edges_.size());
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":137
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":137
  *         if 'csplit_preserve_full_word' in kwargs:
  *              preserve_mask = new hypergraph.EdgeMask(self.hg.edges_.size())
  *              preserve_mask[0][hypergraph.GetFullWordEdgeIndex(self.hg[0])] = True             # <<<<<<<<<<<<<<
@@ -10686,7 +10698,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_29prune(struct __pyx_obj_5_cdec_Hy
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":138
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":138
  *              preserve_mask = new hypergraph.EdgeMask(self.hg.edges_.size())
  *              preserve_mask[0][hypergraph.GetFullWordEdgeIndex(self.hg[0])] = True
  *         self.hg.PruneInsideOutside(beam_alpha, density, preserve_mask, False, 1, False)             # <<<<<<<<<<<<<<
@@ -10697,7 +10709,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_29prune(struct __pyx_obj_5_cdec_Hy
   __pyx_t_3 = __pyx_PyFloat_AsDouble(__pyx_v_density); if (unlikely((__pyx_t_3 == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_v_self->hg->PruneInsideOutside(__pyx_t_2, __pyx_t_3, __pyx_v_preserve_mask, 0, 1.0, 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":139
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":139
  *              preserve_mask[0][hypergraph.GetFullWordEdgeIndex(self.hg[0])] = True
  *         self.hg.PruneInsideOutside(beam_alpha, density, preserve_mask, False, 1, False)
  *         if preserve_mask:             # <<<<<<<<<<<<<<
@@ -10707,7 +10719,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_29prune(struct __pyx_obj_5_cdec_Hy
   __pyx_t_1 = (__pyx_v_preserve_mask != 0);
   if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":140
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":140
  *         self.hg.PruneInsideOutside(beam_alpha, density, preserve_mask, False, 1, False)
  *         if preserve_mask:
  *             del preserve_mask             # <<<<<<<<<<<<<<
@@ -10742,7 +10754,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_32lattice(PyObject *__pyx_v_self,
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":142
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":142
  *             del preserve_mask
  * 
  *     def lattice(self): # TODO direct hg -> lattice conversion in cdec             # <<<<<<<<<<<<<<
@@ -10762,7 +10774,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_31lattice(struct __pyx_obj_5_cdec_
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("lattice", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":144
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":144
  *     def lattice(self): # TODO direct hg -> lattice conversion in cdec
  *         """hg.lattice() -> Lattice corresponding to the hypergraph."""
  *         cdef bytes plf = hypergraph.AsPLF(self.hg[0], True).c_str()             # <<<<<<<<<<<<<<
@@ -10771,10 +10783,10 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_31lattice(struct __pyx_obj_5_cdec_
  */
   __pyx_t_1 = PyBytes_FromString(HypergraphIO::AsPLF((__pyx_v_self->hg[0]), 1).c_str()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(((PyObject *)__pyx_t_1));
-  __pyx_v_plf = __pyx_t_1;
+  __pyx_v_plf = ((PyObject*)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":145
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":145
  *         """hg.lattice() -> Lattice corresponding to the hypergraph."""
  *         cdef bytes plf = hypergraph.AsPLF(self.hg[0], True).c_str()
  *         return Lattice(eval(plf))             # <<<<<<<<<<<<<<
@@ -10845,7 +10857,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_34plf(PyObject *__pyx_v_self, CYTH
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":147
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":147
  *         return Lattice(eval(plf))
  * 
  *     def plf(self):             # <<<<<<<<<<<<<<
@@ -10863,7 +10875,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_33plf(struct __pyx_obj_5_cdec_Hype
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("plf", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":149
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":149
  *     def plf(self):
  *         """hg.plf() -> Lattice PLF representation corresponding to the hypergraph."""
  *         return bytes(hypergraph.AsPLF(self.hg[0], True).c_str())             # <<<<<<<<<<<<<<
@@ -10910,7 +10922,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_36reweight(PyObject *__pyx_v_self,
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":151
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":151
  *         return bytes(hypergraph.AsPLF(self.hg[0], True).c_str())
  * 
  *     def reweight(self, weights):             # <<<<<<<<<<<<<<
@@ -10921,28 +10933,25 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_36reweight(PyObject *__pyx_v_self,
 static PyObject *__pyx_pf_5_cdec_10Hypergraph_35reweight(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_weights) {
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
-  PyObject *__pyx_t_1 = NULL;
-  int __pyx_t_2;
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
   PyObject *__pyx_t_3 = NULL;
   int __pyx_lineno = 0;
   const char *__pyx_filename = NULL;
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("reweight", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":153
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":153
  *     def reweight(self, weights):
  *         """hg.reweight(SparseVector/DenseVector): Reweight the hypergraph with a new vector."""
  *         if isinstance(weights, SparseVector):             # <<<<<<<<<<<<<<
  *             self.hg.Reweight((<SparseVector> weights).vector[0])
  *         elif isinstance(weights, DenseVector):
  */
-  __pyx_t_1 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_SparseVector));
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_weights, __pyx_t_1); 
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = __Pyx_TypeCheck(__pyx_v_weights, ((PyObject*)__pyx_ptype_5_cdec_SparseVector)); 
+  if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":154
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":154
  *         """hg.reweight(SparseVector/DenseVector): Reweight the hypergraph with a new vector."""
  *         if isinstance(weights, SparseVector):
  *             self.hg.Reweight((<SparseVector> weights).vector[0])             # <<<<<<<<<<<<<<
@@ -10953,20 +10962,17 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_35reweight(struct __pyx_obj_5_cdec
     goto __pyx_L3;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":155
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":155
  *         if isinstance(weights, SparseVector):
  *             self.hg.Reweight((<SparseVector> weights).vector[0])
  *         elif isinstance(weights, DenseVector):             # <<<<<<<<<<<<<<
  *             self.hg.Reweight((<DenseVector> weights).vector[0])
  *         else:
  */
-  __pyx_t_1 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_DenseVector));
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_weights, __pyx_t_1); 
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = __Pyx_TypeCheck(__pyx_v_weights, ((PyObject*)__pyx_ptype_5_cdec_DenseVector)); 
+  if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":156
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":156
  *             self.hg.Reweight((<SparseVector> weights).vector[0])
  *         elif isinstance(weights, DenseVector):
  *             self.hg.Reweight((<DenseVector> weights).vector[0])             # <<<<<<<<<<<<<<
@@ -10978,25 +10984,25 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_35reweight(struct __pyx_obj_5_cdec
   }
   /*else*/ {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":158
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":158
  *             self.hg.Reweight((<DenseVector> weights).vector[0])
  *         else:
  *             raise TypeError('cannot reweight hypergraph with %s' % type(weights))             # <<<<<<<<<<<<<<
  * 
  *     property edges:
  */
-    __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_17), ((PyObject *)Py_TYPE(__pyx_v_weights))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(((PyObject *)__pyx_t_1));
+    __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_17), ((PyObject *)Py_TYPE(__pyx_v_weights))); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(((PyObject *)__pyx_t_2));
     __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
-    PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_t_1));
-    __Pyx_GIVEREF(((PyObject *)__pyx_t_1));
-    __pyx_t_1 = 0;
-    __pyx_t_1 = PyObject_Call(__pyx_builtin_TypeError, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
+    PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_t_2));
+    __Pyx_GIVEREF(((PyObject *)__pyx_t_2));
+    __pyx_t_2 = 0;
+    __pyx_t_2 = PyObject_Call(__pyx_builtin_TypeError, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
     __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
-    __Pyx_Raise(__pyx_t_1, 0, 0, 0);
-    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_Raise(__pyx_t_2, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
     {__pyx_filename = __pyx_f[3]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   }
   __pyx_L3:;
@@ -11004,7 +11010,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_35reweight(struct __pyx_obj_5_cdec
   __pyx_r = Py_None; __Pyx_INCREF(Py_None);
   goto __pyx_L0;
   __pyx_L1_error:;
-  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
   __Pyx_XDECREF(__pyx_t_3);
   __Pyx_AddTraceback("_cdec.Hypergraph.reweight", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __pyx_r = NULL;
@@ -11026,7 +11032,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_5edges_1__get__(PyObject *__pyx_v_
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":161
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":161
  * 
  *     property edges:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -11090,7 +11096,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_5edges_2generator9(__pyx_Generator
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 161; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":163
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":163
  *         def __get__(self):
  *             cdef unsigned i
  *             for i in range(self.hg.edges_.size()):             # <<<<<<<<<<<<<<
@@ -11101,7 +11107,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_5edges_2generator9(__pyx_Generator
   for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
     __pyx_cur_scope->__pyx_v_i = __pyx_t_2;
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":164
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":164
  *             cdef unsigned i
  *             for i in range(self.hg.edges_.size()):
  *                 yield HypergraphEdge().init(self.hg, i)             # <<<<<<<<<<<<<<
@@ -11153,7 +11159,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_5nodes_1__get__(PyObject *__pyx_v_
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":167
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":167
  * 
  *     property nodes:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -11217,7 +11223,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_5nodes_2generator10(__pyx_Generato
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 167; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":169
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":169
  *         def __get__(self):
  *             cdef unsigned i
  *             for i in range(self.hg.nodes_.size()):             # <<<<<<<<<<<<<<
@@ -11228,7 +11234,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_5nodes_2generator10(__pyx_Generato
   for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
     __pyx_cur_scope->__pyx_v_i = __pyx_t_2;
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":170
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":170
  *             cdef unsigned i
  *             for i in range(self.hg.nodes_.size()):
  *                 yield HypergraphNode().init(self.hg, i)             # <<<<<<<<<<<<<<
@@ -11279,7 +11285,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_4goal_1__get__(PyObject *__pyx_v_s
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":173
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":173
  * 
  *     property goal:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -11297,7 +11303,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_4goal___get__(struct __pyx_obj_5_c
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":174
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":174
  *     property goal:
  *         def __get__(self):
  *             return HypergraphNode().init(self.hg, self.hg.GoalNode())             # <<<<<<<<<<<<<<
@@ -11338,7 +11344,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_6npaths_1__get__(PyObject *__pyx_v
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":177
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":177
  * 
  *     property npaths:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -11355,7 +11361,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_6npaths___get__(struct __pyx_obj_5
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":178
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":178
  *     property npaths:
  *         def __get__(self):
  *             return self.hg.NumberOfPaths()             # <<<<<<<<<<<<<<
@@ -11393,7 +11399,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_38inside_outside(PyObject *__pyx_v
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":180
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":180
  *             return self.hg.NumberOfPaths()
  * 
  *     def inside_outside(self):             # <<<<<<<<<<<<<<
@@ -11417,7 +11423,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_37inside_outside(struct __pyx_obj_
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("inside_outside", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":182
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":182
  *     def inside_outside(self):
  *         """hg.inside_outside() -> SparseVector with inside-outside scores for each feature."""
  *         cdef FastSparseVector[prob_t]* result = new FastSparseVector[prob_t]()             # <<<<<<<<<<<<<<
@@ -11426,7 +11432,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_37inside_outside(struct __pyx_obj_
  */
   __pyx_v_result = new FastSparseVector<prob_t>();
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":183
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":183
  *         """hg.inside_outside() -> SparseVector with inside-outside scores for each feature."""
  *         cdef FastSparseVector[prob_t]* result = new FastSparseVector[prob_t]()
  *         cdef prob_t z = hypergraph.InsideOutside(self.hg[0], result)             # <<<<<<<<<<<<<<
@@ -11435,7 +11441,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_37inside_outside(struct __pyx_obj_
  */
   __pyx_v_z = InsideOutside<prob_t, EdgeProb, SparseVector<prob_t>, EdgeFeaturesAndProbWeightFunction>((__pyx_v_self->hg[0]), __pyx_v_result);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":184
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":184
  *         cdef FastSparseVector[prob_t]* result = new FastSparseVector[prob_t]()
  *         cdef prob_t z = hypergraph.InsideOutside(self.hg[0], result)
  *         result[0] /= z             # <<<<<<<<<<<<<<
@@ -11444,7 +11450,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_37inside_outside(struct __pyx_obj_
  */
   (__pyx_v_result[0]) /= __pyx_v_z;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":185
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":185
  *         cdef prob_t z = hypergraph.InsideOutside(self.hg[0], result)
  *         result[0] /= z
  *         cdef SparseVector vector = SparseVector.__new__(SparseVector)             # <<<<<<<<<<<<<<
@@ -11457,7 +11463,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_37inside_outside(struct __pyx_obj_
   __pyx_v_vector = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":186
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":186
  *         result[0] /= z
  *         cdef SparseVector vector = SparseVector.__new__(SparseVector)
  *         vector.vector = new FastSparseVector[double]()             # <<<<<<<<<<<<<<
@@ -11466,7 +11472,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_37inside_outside(struct __pyx_obj_
  */
   __pyx_v_vector->vector = new FastSparseVector<double>();
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":187
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":187
  *         cdef SparseVector vector = SparseVector.__new__(SparseVector)
  *         vector.vector = new FastSparseVector[double]()
  *         cdef FastSparseVector[prob_t].const_iterator* it = new FastSparseVector[prob_t].const_iterator(result[0], False)             # <<<<<<<<<<<<<<
@@ -11475,7 +11481,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_37inside_outside(struct __pyx_obj_
  */
   __pyx_v_it = new FastSparseVector<prob_t>::const_iterator((__pyx_v_result[0]), 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":189
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":189
  *         cdef FastSparseVector[prob_t].const_iterator* it = new FastSparseVector[prob_t].const_iterator(result[0], False)
  *         cdef unsigned i
  *         for i in range(result.size()):             # <<<<<<<<<<<<<<
@@ -11486,7 +11492,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_37inside_outside(struct __pyx_obj_
   for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
     __pyx_v_i = __pyx_t_3;
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":190
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":190
  *         cdef unsigned i
  *         for i in range(result.size()):
  *             vector.vector.set_value(it[0].ptr().first, log(it[0].ptr().second))             # <<<<<<<<<<<<<<
@@ -11495,7 +11501,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_37inside_outside(struct __pyx_obj_
  */
     __pyx_v_vector->vector->set_value((__pyx_v_it[0]).operator->()->first, log((__pyx_v_it[0]).operator->()->second));
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":191
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":191
  *         for i in range(result.size()):
  *             vector.vector.set_value(it[0].ptr().first, log(it[0].ptr().second))
  *             pinc(it[0]) # ++it             # <<<<<<<<<<<<<<
@@ -11505,7 +11511,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_37inside_outside(struct __pyx_obj_
     (++(__pyx_v_it[0]));
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":192
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":192
  *             vector.vector.set_value(it[0].ptr().first, log(it[0].ptr().second))
  *             pinc(it[0]) # ++it
  *         del it             # <<<<<<<<<<<<<<
@@ -11514,7 +11520,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_37inside_outside(struct __pyx_obj_
  */
   delete __pyx_v_it;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":193
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":193
  *             pinc(it[0]) # ++it
  *         del it
  *         del result             # <<<<<<<<<<<<<<
@@ -11523,7 +11529,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_37inside_outside(struct __pyx_obj_
  */
   delete __pyx_v_result;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":194
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":194
  *         del it
  *         del result
  *         return vector             # <<<<<<<<<<<<<<
@@ -11548,7 +11554,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_37inside_outside(struct __pyx_obj_
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":201
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":201
  *     cdef public TRule trule
  * 
  *     cdef init(self, hypergraph.Hypergraph* hg, unsigned i):             # <<<<<<<<<<<<<<
@@ -11565,7 +11571,7 @@ static PyObject *__pyx_f_5_cdec_14HypergraphEdge_init(struct __pyx_obj_5_cdec_Hy
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("init", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":202
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":202
  * 
  *     cdef init(self, hypergraph.Hypergraph* hg, unsigned i):
  *         self.hg = hg             # <<<<<<<<<<<<<<
@@ -11574,7 +11580,7 @@ static PyObject *__pyx_f_5_cdec_14HypergraphEdge_init(struct __pyx_obj_5_cdec_Hy
  */
   __pyx_v_self->hg = __pyx_v_hg;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":203
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":203
  *     cdef init(self, hypergraph.Hypergraph* hg, unsigned i):
  *         self.hg = hg
  *         self.edge = &hg.edges_[i]             # <<<<<<<<<<<<<<
@@ -11583,7 +11589,7 @@ static PyObject *__pyx_f_5_cdec_14HypergraphEdge_init(struct __pyx_obj_5_cdec_Hy
  */
   __pyx_v_self->edge = (&(__pyx_v_hg->edges_[__pyx_v_i]));
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":204
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":204
  *         self.hg = hg
  *         self.edge = &hg.edges_[i]
  *         self.trule = TRule.__new__(TRule)             # <<<<<<<<<<<<<<
@@ -11599,7 +11605,7 @@ static PyObject *__pyx_f_5_cdec_14HypergraphEdge_init(struct __pyx_obj_5_cdec_Hy
   __pyx_v_self->trule = ((struct __pyx_obj_5_cdec_TRule *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":205
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":205
  *         self.edge = &hg.edges_[i]
  *         self.trule = TRule.__new__(TRule)
  *         self.trule.rule = new shared_ptr[grammar.TRule](self.edge.rule_)             # <<<<<<<<<<<<<<
@@ -11608,7 +11614,7 @@ static PyObject *__pyx_f_5_cdec_14HypergraphEdge_init(struct __pyx_obj_5_cdec_Hy
  */
   __pyx_v_self->trule->rule = new boost::shared_ptr<TRule>(__pyx_v_self->edge->rule_);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":206
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":206
  *         self.trule = TRule.__new__(TRule)
  *         self.trule.rule = new shared_ptr[grammar.TRule](self.edge.rule_)
  *         return self             # <<<<<<<<<<<<<<
@@ -11643,7 +11649,7 @@ static Py_ssize_t __pyx_pw_5_cdec_14HypergraphEdge_1__len__(PyObject *__pyx_v_se
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":208
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":208
  *         return self
  * 
  *     def __len__(self):             # <<<<<<<<<<<<<<
@@ -11656,7 +11662,7 @@ static Py_ssize_t __pyx_pf_5_cdec_14HypergraphEdge___len__(struct __pyx_obj_5_cd
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__len__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":209
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":209
  * 
  *     def __len__(self):
  *         return self.edge.tail_nodes_.size()             # <<<<<<<<<<<<<<
@@ -11683,7 +11689,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_9head_node_1__get__(PyObject *
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":212
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":212
  * 
  *     property head_node:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -11701,7 +11707,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_9head_node___get__(struct __py
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":213
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":213
  *     property head_node:
  *         def __get__(self):
  *             return HypergraphNode().init(self.hg, self.edge.head_node_)             # <<<<<<<<<<<<<<
@@ -11743,7 +11749,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_10tail_nodes_1__get__(PyObject
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":216
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":216
  * 
  *     property tail_nodes:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -11807,7 +11813,7 @@ static PyObject *__pyx_gb_5_cdec_14HypergraphEdge_10tail_nodes_2generator11(__py
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 216; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":218
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":218
  *         def __get__(self):
  *             cdef unsigned i
  *             for i in range(self.edge.tail_nodes_.size()):             # <<<<<<<<<<<<<<
@@ -11818,7 +11824,7 @@ static PyObject *__pyx_gb_5_cdec_14HypergraphEdge_10tail_nodes_2generator11(__py
   for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
     __pyx_cur_scope->__pyx_v_i = __pyx_t_2;
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":219
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":219
  *             cdef unsigned i
  *             for i in range(self.edge.tail_nodes_.size()):
  *                 yield HypergraphNode().init(self.hg, self.edge.tail_nodes_[i])             # <<<<<<<<<<<<<<
@@ -11869,7 +11875,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_4span_1__get__(PyObject *__pyx
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":222
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":222
  * 
  *     property span:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -11888,7 +11894,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_4span___get__(struct __pyx_obj
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":223
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":223
  *     property span:
  *         def __get__(self):
  *             return (self.edge.i_, self.edge.j_)             # <<<<<<<<<<<<<<
@@ -11937,7 +11943,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_8src_span_1__get__(PyObject *_
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":226
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":226
  * 
  *     property src_span:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -11956,7 +11962,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_8src_span___get__(struct __pyx
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":227
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":227
  *     property src_span:
  *         def __get__(self):
  *             return (self.edge.prev_i_, self.edge.prev_j_)             # <<<<<<<<<<<<<<
@@ -12005,7 +12011,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_14feature_values_1__get__(PyOb
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":230
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":230
  * 
  *     property feature_values:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -12023,7 +12029,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_14feature_values___get__(struc
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":231
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":231
  *     property feature_values:
  *         def __get__(self):
  *             cdef SparseVector vector = SparseVector.__new__(SparseVector)             # <<<<<<<<<<<<<<
@@ -12036,7 +12042,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_14feature_values___get__(struc
   __pyx_v_vector = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":232
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":232
  *         def __get__(self):
  *             cdef SparseVector vector = SparseVector.__new__(SparseVector)
  *             vector.vector = new FastSparseVector[double](self.edge.feature_values_)             # <<<<<<<<<<<<<<
@@ -12045,7 +12051,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_14feature_values___get__(struc
  */
   __pyx_v_vector->vector = new FastSparseVector<double>(__pyx_v_self->edge->feature_values_);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":233
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":233
  *             cdef SparseVector vector = SparseVector.__new__(SparseVector)
  *             vector.vector = new FastSparseVector[double](self.edge.feature_values_)
  *             return vector             # <<<<<<<<<<<<<<
@@ -12081,7 +12087,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_4prob_1__get__(PyObject *__pyx
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":236
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":236
  * 
  *     property prob:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -12098,7 +12104,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_4prob___get__(struct __pyx_obj
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":237
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":237
  *     property prob:
  *         def __get__(self):
  *             return self.edge.edge_prob_.as_float()             # <<<<<<<<<<<<<<
@@ -12141,7 +12147,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_3__richcmp__(PyObject *__pyx_v
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":239
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":239
  *             return self.edge.edge_prob_.as_float()
  * 
  *     def __richcmp__(HypergraphEdge x, HypergraphEdge y, int op):             # <<<<<<<<<<<<<<
@@ -12159,7 +12165,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_2__richcmp__(struct __pyx_obj_
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__richcmp__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":242
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":242
  *         if op == 2: # ==
  *             return x.edge == y.edge
  *         elif op == 3: # !=             # <<<<<<<<<<<<<<
@@ -12168,7 +12174,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_2__richcmp__(struct __pyx_obj_
  */
   switch (__pyx_v_op) {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":240
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":240
  * 
  *     def __richcmp__(HypergraphEdge x, HypergraphEdge y, int op):
  *         if op == 2: # ==             # <<<<<<<<<<<<<<
@@ -12177,7 +12183,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_2__richcmp__(struct __pyx_obj_
  */
     case 2:
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":241
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":241
  *     def __richcmp__(HypergraphEdge x, HypergraphEdge y, int op):
  *         if op == 2: # ==
  *             return x.edge == y.edge             # <<<<<<<<<<<<<<
@@ -12192,7 +12198,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_2__richcmp__(struct __pyx_obj_
     goto __pyx_L0;
     break;
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":242
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":242
  *         if op == 2: # ==
  *             return x.edge == y.edge
  *         elif op == 3: # !=             # <<<<<<<<<<<<<<
@@ -12201,7 +12207,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_2__richcmp__(struct __pyx_obj_
  */
     case 3:
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":243
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":243
  *             return x.edge == y.edge
  *         elif op == 3: # !=
  *             return not (x == y)             # <<<<<<<<<<<<<<
@@ -12220,7 +12226,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_2__richcmp__(struct __pyx_obj_
     break;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":244
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":244
  *         elif op == 3: # !=
  *             return not (x == y)
  *         raise NotImplemented('comparison not implemented for HypergraphEdge')             # <<<<<<<<<<<<<<
@@ -12256,7 +12262,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_5trule_1__get__(PyObject *__py
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":199
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":199
  *     cdef hypergraph.Hypergraph* hg
  *     cdef hypergraph.HypergraphEdge* edge
  *     cdef public TRule trule             # <<<<<<<<<<<<<<
@@ -12341,7 +12347,7 @@ static int __pyx_pf_5_cdec_14HypergraphEdge_5trule_4__del__(struct __pyx_obj_5_c
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":250
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":250
  *     cdef hypergraph.HypergraphNode* node
  * 
  *     cdef init(self, hypergraph.Hypergraph* hg, unsigned i):             # <<<<<<<<<<<<<<
@@ -12354,7 +12360,7 @@ static PyObject *__pyx_f_5_cdec_14HypergraphNode_init(struct __pyx_obj_5_cdec_Hy
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("init", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":251
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":251
  * 
  *     cdef init(self, hypergraph.Hypergraph* hg, unsigned i):
  *         self.hg = hg             # <<<<<<<<<<<<<<
@@ -12363,7 +12369,7 @@ static PyObject *__pyx_f_5_cdec_14HypergraphNode_init(struct __pyx_obj_5_cdec_Hy
  */
   __pyx_v_self->hg = __pyx_v_hg;
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":252
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":252
  *     cdef init(self, hypergraph.Hypergraph* hg, unsigned i):
  *         self.hg = hg
  *         self.node = &hg.nodes_[i]             # <<<<<<<<<<<<<<
@@ -12372,7 +12378,7 @@ static PyObject *__pyx_f_5_cdec_14HypergraphNode_init(struct __pyx_obj_5_cdec_Hy
  */
   __pyx_v_self->node = (&(__pyx_v_hg->nodes_[__pyx_v_i]));
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":253
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":253
  *         self.hg = hg
  *         self.node = &hg.nodes_[i]
  *         return self             # <<<<<<<<<<<<<<
@@ -12403,7 +12409,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphNode_8in_edges_1__get__(PyObject *_
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":256
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":256
  * 
  *     property in_edges:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -12467,7 +12473,7 @@ static PyObject *__pyx_gb_5_cdec_14HypergraphNode_8in_edges_2generator12(__pyx_G
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 256; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":258
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":258
  *         def __get__(self):
  *             cdef unsigned i
  *             for i in range(self.node.in_edges_.size()):             # <<<<<<<<<<<<<<
@@ -12478,7 +12484,7 @@ static PyObject *__pyx_gb_5_cdec_14HypergraphNode_8in_edges_2generator12(__pyx_G
   for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
     __pyx_cur_scope->__pyx_v_i = __pyx_t_2;
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":259
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":259
  *             cdef unsigned i
  *             for i in range(self.node.in_edges_.size()):
  *                 yield HypergraphEdge().init(self.hg, self.node.in_edges_[i])             # <<<<<<<<<<<<<<
@@ -12530,7 +12536,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphNode_9out_edges_1__get__(PyObject *
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":262
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":262
  * 
  *     property out_edges:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -12594,7 +12600,7 @@ static PyObject *__pyx_gb_5_cdec_14HypergraphNode_9out_edges_2generator13(__pyx_
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":264
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":264
  *         def __get__(self):
  *             cdef unsigned i
  *             for i in range(self.node.out_edges_.size()):             # <<<<<<<<<<<<<<
@@ -12605,7 +12611,7 @@ static PyObject *__pyx_gb_5_cdec_14HypergraphNode_9out_edges_2generator13(__pyx_
   for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
     __pyx_cur_scope->__pyx_v_i = __pyx_t_2;
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":265
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":265
  *             cdef unsigned i
  *             for i in range(self.node.out_edges_.size()):
  *                 yield HypergraphEdge().init(self.hg, self.node.out_edges_[i])             # <<<<<<<<<<<<<<
@@ -12656,7 +12662,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphNode_4span_1__get__(PyObject *__pyx
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":268
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":268
  * 
  *     property span:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -12674,7 +12680,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode_4span___get__(struct __pyx_obj
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":269
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":269
  *     property span:
  *         def __get__(self):
  *             return next(self.in_edges).span             # <<<<<<<<<<<<<<
@@ -12718,7 +12724,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphNode_3cat_1__get__(PyObject *__pyx_
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":272
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":272
  * 
  *     property cat:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -12736,7 +12742,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode_3cat___get__(struct __pyx_obj_
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":273
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":273
  *     property cat:
  *         def __get__(self):
  *             if self.node.cat_:             # <<<<<<<<<<<<<<
@@ -12745,7 +12751,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode_3cat___get__(struct __pyx_obj_
  */
   if (__pyx_v_self->node->cat_) {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":274
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":274
  *         def __get__(self):
  *             if self.node.cat_:
  *                 return str(TDConvert(-self.node.cat_).c_str())             # <<<<<<<<<<<<<<
@@ -12800,7 +12806,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphNode_1__richcmp__(PyObject *__pyx_v
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":276
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":276
  *                 return str(TDConvert(-self.node.cat_).c_str())
  * 
  *     def __richcmp__(HypergraphNode x, HypergraphNode y, int op):             # <<<<<<<<<<<<<<
@@ -12818,7 +12824,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode___richcmp__(struct __pyx_obj_5
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__richcmp__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":279
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":279
  *         if op == 2: # ==
  *             return x.node == y.node
  *         elif op == 3: # !=             # <<<<<<<<<<<<<<
@@ -12827,7 +12833,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode___richcmp__(struct __pyx_obj_5
  */
   switch (__pyx_v_op) {
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":277
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":277
  * 
  *     def __richcmp__(HypergraphNode x, HypergraphNode y, int op):
  *         if op == 2: # ==             # <<<<<<<<<<<<<<
@@ -12836,7 +12842,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode___richcmp__(struct __pyx_obj_5
  */
     case 2:
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":278
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":278
  *     def __richcmp__(HypergraphNode x, HypergraphNode y, int op):
  *         if op == 2: # ==
  *             return x.node == y.node             # <<<<<<<<<<<<<<
@@ -12851,7 +12857,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode___richcmp__(struct __pyx_obj_5
     goto __pyx_L0;
     break;
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":279
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":279
  *         if op == 2: # ==
  *             return x.node == y.node
  *         elif op == 3: # !=             # <<<<<<<<<<<<<<
@@ -12860,7 +12866,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode___richcmp__(struct __pyx_obj_5
  */
     case 3:
 
-    /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":280
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":280
  *             return x.node == y.node
  *         elif op == 3: # !=
  *             return not (x == y)             # <<<<<<<<<<<<<<
@@ -12878,7 +12884,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode___richcmp__(struct __pyx_obj_5
     break;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":281
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":281
  *         elif op == 3: # !=
  *             return not (x == y)
  *         raise NotImplemented('comparison not implemented for HypergraphNode')             # <<<<<<<<<<<<<<
@@ -12915,7 +12921,7 @@ static int __pyx_pw_5_cdec_7Lattice_1__cinit__(PyObject *__pyx_v_self, PyObject
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/lattice.pxi":6
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":6
  *     cdef lattice.Lattice* lattice
  * 
  *     def __cinit__(self):             # <<<<<<<<<<<<<<
@@ -12928,7 +12934,7 @@ static int __pyx_pf_5_cdec_7Lattice___cinit__(struct __pyx_obj_5_cdec_Lattice *_
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__cinit__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":7
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":7
  * 
  *     def __cinit__(self):
  *         self.lattice = new lattice.Lattice()             # <<<<<<<<<<<<<<
@@ -12993,7 +12999,7 @@ static int __pyx_pw_5_cdec_7Lattice_3__init__(PyObject *__pyx_v_self, PyObject *
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/lattice.pxi":9
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":9
  *         self.lattice = new lattice.Lattice()
  * 
  *     def __init__(self, inp):             # <<<<<<<<<<<<<<
@@ -13006,9 +13012,9 @@ static int __pyx_pf_5_cdec_7Lattice_2__init__(struct __pyx_obj_5_cdec_Lattice *_
   PyObject *__pyx_v_arcs = NULL;
   int __pyx_r;
   __Pyx_RefNannyDeclarations
-  PyObject *__pyx_t_1 = NULL;
-  int __pyx_t_2;
-  Py_ssize_t __pyx_t_3;
+  int __pyx_t_1;
+  Py_ssize_t __pyx_t_2;
+  PyObject *__pyx_t_3 = NULL;
   PyObject *__pyx_t_4 = NULL;
   PyObject *(*__pyx_t_5)(PyObject *);
   PyObject *__pyx_t_6 = NULL;
@@ -13018,30 +13024,27 @@ static int __pyx_pf_5_cdec_7Lattice_2__init__(struct __pyx_obj_5_cdec_Lattice *_
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__init__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":12
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":12
  *         """Lattice(tuple) -> Lattice from node list.
  *         Lattice(string) -> Lattice from PLF representation."""
  *         if isinstance(inp, tuple):             # <<<<<<<<<<<<<<
  *             self.lattice.resize(len(inp))
  *             for i, arcs in enumerate(inp):
  */
-  __pyx_t_1 = ((PyObject *)((PyObject*)(&PyTuple_Type)));
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_inp, __pyx_t_1); 
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = PyTuple_Check(__pyx_v_inp); 
+  if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":13
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":13
  *         Lattice(string) -> Lattice from PLF representation."""
  *         if isinstance(inp, tuple):
  *             self.lattice.resize(len(inp))             # <<<<<<<<<<<<<<
  *             for i, arcs in enumerate(inp):
  *                 self[i] = arcs
  */
-    __pyx_t_3 = PyObject_Length(__pyx_v_inp); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __pyx_v_self->lattice->resize(__pyx_t_3);
+    __pyx_t_2 = PyObject_Length(__pyx_v_inp); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_v_self->lattice->resize(__pyx_t_2);
 
-    /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":14
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":14
  *         if isinstance(inp, tuple):
  *             self.lattice.resize(len(inp))
  *             for i, arcs in enumerate(inp):             # <<<<<<<<<<<<<<
@@ -13049,29 +13052,29 @@ static int __pyx_pf_5_cdec_7Lattice_2__init__(struct __pyx_obj_5_cdec_Lattice *_
  *         elif isinstance(inp, basestring):
  */
     __Pyx_INCREF(__pyx_int_0);
-    __pyx_t_1 = __pyx_int_0;
+    __pyx_t_3 = __pyx_int_0;
     if (PyList_CheckExact(__pyx_v_inp) || PyTuple_CheckExact(__pyx_v_inp)) {
-      __pyx_t_4 = __pyx_v_inp; __Pyx_INCREF(__pyx_t_4); __pyx_t_3 = 0;
+      __pyx_t_4 = __pyx_v_inp; __Pyx_INCREF(__pyx_t_4); __pyx_t_2 = 0;
       __pyx_t_5 = NULL;
     } else {
-      __pyx_t_3 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_v_inp); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_2 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_v_inp); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(__pyx_t_4);
       __pyx_t_5 = Py_TYPE(__pyx_t_4)->tp_iternext;
     }
     for (;;) {
       if (!__pyx_t_5 && PyList_CheckExact(__pyx_t_4)) {
-        if (__pyx_t_3 >= PyList_GET_SIZE(__pyx_t_4)) break;
+        if (__pyx_t_2 >= PyList_GET_SIZE(__pyx_t_4)) break;
         #if CYTHON_COMPILING_IN_CPYTHON
-        __pyx_t_6 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_3); __Pyx_INCREF(__pyx_t_6); __pyx_t_3++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __pyx_t_6 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_6); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         #else
-        __pyx_t_6 = PySequence_ITEM(__pyx_t_4, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __pyx_t_6 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         #endif
       } else if (!__pyx_t_5 && PyTuple_CheckExact(__pyx_t_4)) {
-        if (__pyx_t_3 >= PyTuple_GET_SIZE(__pyx_t_4)) break;
+        if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_4)) break;
         #if CYTHON_COMPILING_IN_CPYTHON
-        __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_3); __Pyx_INCREF(__pyx_t_6); __pyx_t_3++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_6); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         #else
-        __pyx_t_6 = PySequence_ITEM(__pyx_t_4, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __pyx_t_6 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         #endif
       } else {
         __pyx_t_6 = __pyx_t_5(__pyx_t_4);
@@ -13087,16 +13090,16 @@ static int __pyx_pf_5_cdec_7Lattice_2__init__(struct __pyx_obj_5_cdec_Lattice *_
       __Pyx_XDECREF(__pyx_v_arcs);
       __pyx_v_arcs = __pyx_t_6;
       __pyx_t_6 = 0;
-      __Pyx_INCREF(__pyx_t_1);
+      __Pyx_INCREF(__pyx_t_3);
       __Pyx_XDECREF(__pyx_v_i);
-      __pyx_v_i = __pyx_t_1;
-      __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_int_1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_v_i = __pyx_t_3;
+      __pyx_t_6 = PyNumber_Add(__pyx_t_3, __pyx_int_1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(__pyx_t_6);
-      __Pyx_DECREF(__pyx_t_1);
-      __pyx_t_1 = __pyx_t_6;
+      __Pyx_DECREF(__pyx_t_3);
+      __pyx_t_3 = __pyx_t_6;
       __pyx_t_6 = 0;
 
-      /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":15
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":15
  *             self.lattice.resize(len(inp))
  *             for i, arcs in enumerate(inp):
  *                 self[i] = arcs             # <<<<<<<<<<<<<<
@@ -13106,58 +13109,55 @@ static int __pyx_pf_5_cdec_7Lattice_2__init__(struct __pyx_obj_5_cdec_Lattice *_
       if (PyObject_SetItem(((PyObject *)__pyx_v_self), __pyx_v_i, __pyx_v_arcs) < 0) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     }
     __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
-    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
     goto __pyx_L3;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":16
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":16
  *             for i, arcs in enumerate(inp):
  *                 self[i] = arcs
  *         elif isinstance(inp, basestring):             # <<<<<<<<<<<<<<
  *             lattice.ConvertTextOrPLF(as_str(inp), self.lattice)
  *         else:
  */
-  __pyx_t_1 = __pyx_builtin_basestring;
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = PyObject_IsInstance(__pyx_v_inp, __pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = PyObject_IsInstance(__pyx_v_inp, __pyx_builtin_basestring); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":17
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":17
  *                 self[i] = arcs
  *         elif isinstance(inp, basestring):
  *             lattice.ConvertTextOrPLF(as_str(inp), self.lattice)             # <<<<<<<<<<<<<<
  *         else:
  *             raise TypeError('cannot create lattice from %s' % type(inp))
  */
-    __pyx_t_1 = ((PyObject *)__pyx_f_5_cdec_as_str(__pyx_v_inp, NULL)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
-    __pyx_t_7 = __pyx_convert_string_from_py_(__pyx_t_1); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __pyx_t_3 = ((PyObject *)__pyx_f_5_cdec_as_str(__pyx_v_inp, NULL)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_3);
+    __pyx_t_7 = __pyx_convert_string_from_py_(__pyx_t_3); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
     LatticeTools::ConvertTextOrPLF(__pyx_t_7, __pyx_v_self->lattice);
     goto __pyx_L3;
   }
   /*else*/ {
 
-    /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":19
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":19
  *             lattice.ConvertTextOrPLF(as_str(inp), self.lattice)
  *         else:
  *             raise TypeError('cannot create lattice from %s' % type(inp))             # <<<<<<<<<<<<<<
  * 
  *     def __dealloc__(self):
  */
-    __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_22), ((PyObject *)Py_TYPE(__pyx_v_inp))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 19; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(((PyObject *)__pyx_t_1));
+    __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_22), ((PyObject *)Py_TYPE(__pyx_v_inp))); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 19; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(((PyObject *)__pyx_t_3));
     __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 19; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_4);
-    PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_t_1));
-    __Pyx_GIVEREF(((PyObject *)__pyx_t_1));
-    __pyx_t_1 = 0;
-    __pyx_t_1 = PyObject_Call(__pyx_builtin_TypeError, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 19; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
+    PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_t_3));
+    __Pyx_GIVEREF(((PyObject *)__pyx_t_3));
+    __pyx_t_3 = 0;
+    __pyx_t_3 = PyObject_Call(__pyx_builtin_TypeError, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 19; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_3);
     __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0;
-    __Pyx_Raise(__pyx_t_1, 0, 0, 0);
-    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
     {__pyx_filename = __pyx_f[4]; __pyx_lineno = 19; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   }
   __pyx_L3:;
@@ -13165,7 +13165,7 @@ static int __pyx_pf_5_cdec_7Lattice_2__init__(struct __pyx_obj_5_cdec_Lattice *_
   __pyx_r = 0;
   goto __pyx_L0;
   __pyx_L1_error:;
-  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_3);
   __Pyx_XDECREF(__pyx_t_4);
   __Pyx_XDECREF(__pyx_t_6);
   __Pyx_AddTraceback("_cdec.Lattice.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename);
@@ -13186,7 +13186,7 @@ static void __pyx_pw_5_cdec_7Lattice_5__dealloc__(PyObject *__pyx_v_self) {
   __Pyx_RefNannyFinishContext();
 }
 
-/* "/home/vchahune/tools/cdec/python/src/lattice.pxi":21
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":21
  *             raise TypeError('cannot create lattice from %s' % type(inp))
  * 
  *     def __dealloc__(self):             # <<<<<<<<<<<<<<
@@ -13198,7 +13198,7 @@ static void __pyx_pf_5_cdec_7Lattice_4__dealloc__(CYTHON_UNUSED struct __pyx_obj
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__dealloc__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":22
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":22
  * 
  *     def __dealloc__(self):
  *         del self.lattice             # <<<<<<<<<<<<<<
@@ -13231,7 +13231,7 @@ static PyObject *__pyx_pw_5_cdec_7Lattice_7__getitem__(PyObject *__pyx_v_self, P
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/lattice.pxi":24
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":24
  *         del self.lattice
  * 
  *     def __getitem__(self, int index):             # <<<<<<<<<<<<<<
@@ -13261,7 +13261,7 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_6__getitem__(struct __pyx_obj_5_cdec_L
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__getitem__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":25
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":25
  * 
  *     def __getitem__(self, int index):
  *         if not 0 <= index < len(self):             # <<<<<<<<<<<<<<
@@ -13276,7 +13276,7 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_6__getitem__(struct __pyx_obj_5_cdec_L
   __pyx_t_3 = (!__pyx_t_1);
   if (__pyx_t_3) {
 
-    /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":26
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":26
  *     def __getitem__(self, int index):
  *         if not 0 <= index < len(self):
  *             raise IndexError('lattice index out of range')             # <<<<<<<<<<<<<<
@@ -13292,7 +13292,7 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_6__getitem__(struct __pyx_obj_5_cdec_L
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":27
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":27
  *         if not 0 <= index < len(self):
  *             raise IndexError('lattice index out of range')
  *         arcs = []             # <<<<<<<<<<<<<<
@@ -13301,10 +13301,10 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_6__getitem__(struct __pyx_obj_5_cdec_L
  */
   __pyx_t_4 = PyList_New(0); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_4);
-  __pyx_v_arcs = __pyx_t_4;
+  __pyx_v_arcs = ((PyObject*)__pyx_t_4);
   __pyx_t_4 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":28
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":28
  *             raise IndexError('lattice index out of range')
  *         arcs = []
  *         cdef vector[lattice.LatticeArc] arc_vector = self.lattice[0][index]             # <<<<<<<<<<<<<<
@@ -13313,7 +13313,7 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_6__getitem__(struct __pyx_obj_5_cdec_L
  */
   __pyx_v_arc_vector = ((__pyx_v_self->lattice[0])[__pyx_v_index]);
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":31
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":31
  *         cdef lattice.LatticeArc* arc
  *         cdef unsigned i
  *         for i in range(arc_vector.size()):             # <<<<<<<<<<<<<<
@@ -13324,7 +13324,7 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_6__getitem__(struct __pyx_obj_5_cdec_L
   for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) {
     __pyx_v_i = __pyx_t_6;
 
-    /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":32
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":32
  *         cdef unsigned i
  *         for i in range(arc_vector.size()):
  *             arc = &arc_vector[i]             # <<<<<<<<<<<<<<
@@ -13333,7 +13333,7 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_6__getitem__(struct __pyx_obj_5_cdec_L
  */
     __pyx_v_arc = (&(__pyx_v_arc_vector[__pyx_v_i]));
 
-    /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":33
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":33
  *         for i in range(arc_vector.size()):
  *             arc = &arc_vector[i]
  *             label = unicode(TDConvert(arc.label).c_str(), 'utf8')             # <<<<<<<<<<<<<<
@@ -13357,7 +13357,7 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_6__getitem__(struct __pyx_obj_5_cdec_L
     __pyx_v_label = ((PyObject*)__pyx_t_4);
     __pyx_t_4 = 0;
 
-    /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":34
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":34
  *             arc = &arc_vector[i]
  *             label = unicode(TDConvert(arc.label).c_str(), 'utf8')
  *             arcs.append((label, arc.cost, arc.dist2next))             # <<<<<<<<<<<<<<
@@ -13383,7 +13383,7 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_6__getitem__(struct __pyx_obj_5_cdec_L
     __Pyx_DECREF(((PyObject *)__pyx_t_8)); __pyx_t_8 = 0;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":35
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":35
  *             label = unicode(TDConvert(arc.label).c_str(), 'utf8')
  *             arcs.append((label, arc.cost, arc.dist2next))
  *         return tuple(arcs)             # <<<<<<<<<<<<<<
@@ -13439,7 +13439,7 @@ static int __pyx_pw_5_cdec_7Lattice_9__setitem__(PyObject *__pyx_v_self, PyObjec
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/lattice.pxi":37
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":37
  *         return tuple(arcs)
  * 
  *     def __setitem__(self, int index, tuple arcs):             # <<<<<<<<<<<<<<
@@ -13473,7 +13473,7 @@ static int __pyx_pf_5_cdec_7Lattice_8__setitem__(struct __pyx_obj_5_cdec_Lattice
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__setitem__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":38
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":38
  * 
  *     def __setitem__(self, int index, tuple arcs):
  *         if not 0 <= index < len(self):             # <<<<<<<<<<<<<<
@@ -13488,7 +13488,7 @@ static int __pyx_pf_5_cdec_7Lattice_8__setitem__(struct __pyx_obj_5_cdec_Lattice
   __pyx_t_3 = (!__pyx_t_1);
   if (__pyx_t_3) {
 
-    /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":39
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":39
  *     def __setitem__(self, int index, tuple arcs):
  *         if not 0 <= index < len(self):
  *             raise IndexError('lattice index out of range')             # <<<<<<<<<<<<<<
@@ -13504,7 +13504,7 @@ static int __pyx_pf_5_cdec_7Lattice_8__setitem__(struct __pyx_obj_5_cdec_Lattice
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":41
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":41
  *             raise IndexError('lattice index out of range')
  *         cdef lattice.LatticeArc* arc
  *         for (label, cost, dist2next) in arcs:             # <<<<<<<<<<<<<<
@@ -13550,8 +13550,11 @@ static int __pyx_pf_5_cdec_7Lattice_8__setitem__(struct __pyx_obj_5_cdec_Lattice
       __Pyx_INCREF(__pyx_t_8);
       #else
       __pyx_t_6 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_6);
       __pyx_t_7 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_7);
       __pyx_t_8 = PySequence_ITEM(sequence, 2); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_8);
       #endif
       __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
     } else
@@ -13588,7 +13591,7 @@ static int __pyx_pf_5_cdec_7Lattice_8__setitem__(struct __pyx_obj_5_cdec_Lattice
     __pyx_v_dist2next = __pyx_t_8;
     __pyx_t_8 = 0;
 
-    /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":42
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":42
  *         cdef lattice.LatticeArc* arc
  *         for (label, cost, dist2next) in arcs:
  *             label_str = as_str(label)             # <<<<<<<<<<<<<<
@@ -13601,7 +13604,7 @@ static int __pyx_pf_5_cdec_7Lattice_8__setitem__(struct __pyx_obj_5_cdec_Lattice
     __pyx_v_label_str = ((PyObject*)__pyx_t_5);
     __pyx_t_5 = 0;
 
-    /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":43
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":43
  *         for (label, cost, dist2next) in arcs:
  *             label_str = as_str(label)
  *             arc = new lattice.LatticeArc(TDConvert(label_str), cost, dist2next)             # <<<<<<<<<<<<<<
@@ -13613,7 +13616,7 @@ static int __pyx_pf_5_cdec_7Lattice_8__setitem__(struct __pyx_obj_5_cdec_Lattice
     __pyx_t_13 = __Pyx_PyInt_AsInt(__pyx_v_dist2next); if (unlikely((__pyx_t_13 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __pyx_v_arc = new LatticeArc(TD::Convert(__pyx_t_11), __pyx_t_12, __pyx_t_13);
 
-    /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":44
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":44
  *             label_str = as_str(label)
  *             arc = new lattice.LatticeArc(TDConvert(label_str), cost, dist2next)
  *             self.lattice[0][index].push_back(arc[0])             # <<<<<<<<<<<<<<
@@ -13622,7 +13625,7 @@ static int __pyx_pf_5_cdec_7Lattice_8__setitem__(struct __pyx_obj_5_cdec_Lattice
  */
     ((__pyx_v_self->lattice[0])[__pyx_v_index]).push_back((__pyx_v_arc[0]));
 
-    /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":45
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":45
  *             arc = new lattice.LatticeArc(TDConvert(label_str), cost, dist2next)
  *             self.lattice[0][index].push_back(arc[0])
  *             del arc             # <<<<<<<<<<<<<<
@@ -13664,7 +13667,7 @@ static Py_ssize_t __pyx_pw_5_cdec_7Lattice_11__len__(PyObject *__pyx_v_self) {
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/lattice.pxi":47
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":47
  *             del arc
  * 
  *     def __len__(self):             # <<<<<<<<<<<<<<
@@ -13677,7 +13680,7 @@ static Py_ssize_t __pyx_pf_5_cdec_7Lattice_10__len__(struct __pyx_obj_5_cdec_Lat
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__len__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":48
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":48
  * 
  *     def __len__(self):
  *         return self.lattice.size()             # <<<<<<<<<<<<<<
@@ -13704,7 +13707,7 @@ static PyObject *__pyx_pw_5_cdec_7Lattice_13__str__(PyObject *__pyx_v_self) {
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/lattice.pxi":50
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":50
  *         return self.lattice.size()
  * 
  *     def __str__(self):             # <<<<<<<<<<<<<<
@@ -13722,7 +13725,7 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_12__str__(struct __pyx_obj_5_cdec_Latt
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__str__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":51
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":51
  * 
  *     def __str__(self):
  *         return str(hypergraph.AsPLF(self.lattice[0], True).c_str())             # <<<<<<<<<<<<<<
@@ -13768,7 +13771,7 @@ static PyObject *__pyx_pw_5_cdec_7Lattice_15__unicode__(PyObject *__pyx_v_self,
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/lattice.pxi":53
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":53
  *         return str(hypergraph.AsPLF(self.lattice[0], True).c_str())
  * 
  *     def __unicode__(self):             # <<<<<<<<<<<<<<
@@ -13786,7 +13789,7 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_14__unicode__(struct __pyx_obj_5_cdec_
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__unicode__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":54
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":54
  * 
  *     def __unicode__(self):
  *         return unicode(str(self), 'utf8')             # <<<<<<<<<<<<<<
@@ -13842,7 +13845,7 @@ static PyObject *__pyx_pw_5_cdec_7Lattice_17__iter__(PyObject *__pyx_v_self) {
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/lattice.pxi":56
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":56
  *         return unicode(str(self), 'utf8')
  * 
  *     def __iter__(self):             # <<<<<<<<<<<<<<
@@ -13905,7 +13908,7 @@ static PyObject *__pyx_gb_5_cdec_7Lattice_18generator14(__pyx_GeneratorObject *_
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":58
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":58
  *     def __iter__(self):
  *         cdef unsigned i
  *         for i in range(len(self)):             # <<<<<<<<<<<<<<
@@ -13916,7 +13919,7 @@ static PyObject *__pyx_gb_5_cdec_7Lattice_18generator14(__pyx_GeneratorObject *_
   for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
     __pyx_cur_scope->__pyx_v_i = __pyx_t_2;
 
-    /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":59
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":59
  *         cdef unsigned i
  *         for i in range(len(self)):
  *             yield self[i]             # <<<<<<<<<<<<<<
@@ -13977,7 +13980,7 @@ static PyObject *__pyx_pw_5_cdec_7Lattice_5todot_1lines(PyObject *__pyx_self, CY
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/lattice.pxi":63
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":63
  *     def todot(self):
  *         """lattice.todot() -> Representation of the lattice in GraphViz dot format."""
  *         def lines():             # <<<<<<<<<<<<<<
@@ -14054,7 +14057,7 @@ static PyObject *__pyx_gb_5_cdec_7Lattice_5todot_2generator20(__pyx_GeneratorObj
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":64
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":64
  *         """lattice.todot() -> Representation of the lattice in GraphViz dot format."""
  *         def lines():
  *             yield 'digraph lattice {'             # <<<<<<<<<<<<<<
@@ -14071,7 +14074,7 @@ static PyObject *__pyx_gb_5_cdec_7Lattice_5todot_2generator20(__pyx_GeneratorObj
   __pyx_L4_resume_from_yield:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":65
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":65
  *         def lines():
  *             yield 'digraph lattice {'
  *             yield 'rankdir = LR;'             # <<<<<<<<<<<<<<
@@ -14088,7 +14091,7 @@ static PyObject *__pyx_gb_5_cdec_7Lattice_5todot_2generator20(__pyx_GeneratorObj
   __pyx_L5_resume_from_yield:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":66
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":66
  *             yield 'digraph lattice {'
  *             yield 'rankdir = LR;'
  *             yield 'node [shape=circle];'             # <<<<<<<<<<<<<<
@@ -14105,7 +14108,7 @@ static PyObject *__pyx_gb_5_cdec_7Lattice_5todot_2generator20(__pyx_GeneratorObj
   __pyx_L6_resume_from_yield:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":67
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":67
  *             yield 'rankdir = LR;'
  *             yield 'node [shape=circle];'
  *             for i in range(len(self)):             # <<<<<<<<<<<<<<
@@ -14168,7 +14171,7 @@ static PyObject *__pyx_gb_5_cdec_7Lattice_5todot_2generator20(__pyx_GeneratorObj
     __pyx_cur_scope->__pyx_v_i = __pyx_t_1;
     __pyx_t_1 = 0;
 
-    /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":68
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":68
  *             yield 'node [shape=circle];'
  *             for i in range(len(self)):
  *                 for label, weight, delta in self[i]:             # <<<<<<<<<<<<<<
@@ -14239,8 +14242,11 @@ static PyObject *__pyx_gb_5_cdec_7Lattice_5todot_2generator20(__pyx_GeneratorObj
         __Pyx_INCREF(__pyx_t_10);
         #else
         __pyx_t_8 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __Pyx_GOTREF(__pyx_t_8);
         __pyx_t_9 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __Pyx_GOTREF(__pyx_t_9);
         __pyx_t_10 = PySequence_ITEM(sequence, 2); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __Pyx_GOTREF(__pyx_t_10);
         #endif
         __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
       } else
@@ -14283,7 +14289,7 @@ static PyObject *__pyx_gb_5_cdec_7Lattice_5todot_2generator20(__pyx_GeneratorObj
       __pyx_cur_scope->__pyx_v_delta = __pyx_t_10;
       __pyx_t_10 = 0;
 
-      /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":69
+      /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":69
  *             for i in range(len(self)):
  *                 for label, weight, delta in self[i]:
  *                     yield '%d -> %d [label="%s"];' % (i, i+delta, label.replace('"', '\\"'))             # <<<<<<<<<<<<<<
@@ -14343,7 +14349,7 @@ static PyObject *__pyx_gb_5_cdec_7Lattice_5todot_2generator20(__pyx_GeneratorObj
   }
   __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":70
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":70
  *                 for label, weight, delta in self[i]:
  *                     yield '%d -> %d [label="%s"];' % (i, i+delta, label.replace('"', '\\"'))
  *             yield '%d [shape=doublecircle]' % len(self)             # <<<<<<<<<<<<<<
@@ -14369,7 +14375,7 @@ static PyObject *__pyx_gb_5_cdec_7Lattice_5todot_2generator20(__pyx_GeneratorObj
   __pyx_L14_resume_from_yield:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":71
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":71
  *                     yield '%d -> %d [label="%s"];' % (i, i+delta, label.replace('"', '\\"'))
  *             yield '%d [shape=doublecircle]' % len(self)
  *             yield '}'             # <<<<<<<<<<<<<<
@@ -14404,7 +14410,7 @@ static PyObject *__pyx_gb_5_cdec_7Lattice_5todot_2generator20(__pyx_GeneratorObj
   return NULL;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/lattice.pxi":61
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":61
  *             yield self[i]
  * 
  *     def todot(self):             # <<<<<<<<<<<<<<
@@ -14434,19 +14440,19 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_19todot(struct __pyx_obj_5_cdec_Lattic
   __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self);
   __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self);
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":63
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":63
  *     def todot(self):
  *         """lattice.todot() -> Representation of the lattice in GraphViz dot format."""
  *         def lines():             # <<<<<<<<<<<<<<
  *             yield 'digraph lattice {'
  *             yield 'rankdir = LR;'
  */
-  __pyx_t_1 = __Pyx_CyFunction_NewEx(&__pyx_mdef_5_cdec_7Lattice_5todot_1lines, 0, ((PyObject*)__pyx_cur_scope), __pyx_n_s___cdec, ((PyObject *)__pyx_k_codeobj_36)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = __Pyx_CyFunction_NewEx(&__pyx_mdef_5_cdec_7Lattice_5todot_1lines, 0, __pyx_n_s_38, ((PyObject*)__pyx_cur_scope), __pyx_n_s___cdec, ((PyObject *)__pyx_k_codeobj_36)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
   __pyx_v_lines = __pyx_t_1;
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":72
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":72
  *             yield '%d [shape=doublecircle]' % len(self)
  *             yield '}'
  *         return '\n'.join(lines()).encode('utf8')             # <<<<<<<<<<<<<<
@@ -14454,9 +14460,9 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_19todot(struct __pyx_obj_5_cdec_Lattic
  *     def as_hypergraph(self):
  */
   __Pyx_XDECREF(__pyx_r);
-  __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_38), __pyx_n_s__join); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_39), __pyx_n_s__join); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
-  __pyx_t_2 = PyObject_Call(__pyx_v_lines, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_2 = __pyx_pf_5_cdec_7Lattice_5todot_lines(__pyx_v_lines); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
   __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_3);
@@ -14470,7 +14476,7 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_19todot(struct __pyx_obj_5_cdec_Lattic
   __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__encode); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_3);
   __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
-  __pyx_t_2 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_k_tuple_39), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_2 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_k_tuple_40), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
   __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
   __pyx_r = __pyx_t_2;
@@ -14505,7 +14511,7 @@ static PyObject *__pyx_pw_5_cdec_7Lattice_22as_hypergraph(PyObject *__pyx_v_self
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/lattice.pxi":74
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":74
  *         return '\n'.join(lines()).encode('utf8')
  * 
  *     def as_hypergraph(self):             # <<<<<<<<<<<<<<
@@ -14526,7 +14532,7 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_21as_hypergraph(struct __pyx_obj_5_cde
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("as_hypergraph", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":76
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":76
  *     def as_hypergraph(self):
  *         """lattice.as_hypergraph() -> Hypergraph representation of the lattice."""
  *         cdef Hypergraph result = Hypergraph.__new__(Hypergraph)             # <<<<<<<<<<<<<<
@@ -14539,7 +14545,7 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_21as_hypergraph(struct __pyx_obj_5_cde
   __pyx_v_result = ((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":77
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":77
  *         """lattice.as_hypergraph() -> Hypergraph representation of the lattice."""
  *         cdef Hypergraph result = Hypergraph.__new__(Hypergraph)
  *         result.hg = new hypergraph.Hypergraph()             # <<<<<<<<<<<<<<
@@ -14548,7 +14554,7 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_21as_hypergraph(struct __pyx_obj_5_cde
  */
   __pyx_v_result->hg = new Hypergraph();
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":78
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":78
  *         cdef Hypergraph result = Hypergraph.__new__(Hypergraph)
  *         result.hg = new hypergraph.Hypergraph()
  *         cdef bytes plf = str(self)             # <<<<<<<<<<<<<<
@@ -14567,7 +14573,7 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_21as_hypergraph(struct __pyx_obj_5_cde
   __pyx_v_plf = ((PyObject*)__pyx_t_2);
   __pyx_t_2 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":79
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":79
  *         result.hg = new hypergraph.Hypergraph()
  *         cdef bytes plf = str(self)
  *         hypergraph.ReadFromPLF(plf, result.hg)             # <<<<<<<<<<<<<<
@@ -14576,7 +14582,7 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_21as_hypergraph(struct __pyx_obj_5_cde
   __pyx_t_3 = __pyx_convert_string_from_py_(((PyObject *)__pyx_v_plf)); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   HypergraphIO::ReadFromPLF(__pyx_t_3, __pyx_v_result->hg);
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":80
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":80
  *         cdef bytes plf = str(self)
  *         hypergraph.ReadFromPLF(plf, result.hg)
  *         return result             # <<<<<<<<<<<<<<
@@ -14601,7 +14607,7 @@ static PyObject *__pyx_pf_5_cdec_7Lattice_21as_hypergraph(struct __pyx_obj_5_cde
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":3
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":3
  * cimport mteval
  * 
  * cdef SufficientStats as_stats(x, y):             # <<<<<<<<<<<<<<
@@ -14613,29 +14619,27 @@ static struct __pyx_obj_5_cdec_SufficientStats *__pyx_f_5_cdec_as_stats(PyObject
   struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_stats = NULL;
   struct __pyx_obj_5_cdec_SufficientStats *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
-  PyObject *__pyx_t_1 = NULL;
-  int __pyx_t_2;
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
   int __pyx_t_3;
   int __pyx_t_4;
+  EvaluationMetric *__pyx_t_5;
   int __pyx_lineno = 0;
   const char *__pyx_filename = NULL;
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("as_stats", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":4
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":4
  * 
  * cdef SufficientStats as_stats(x, y):
  *     if isinstance(x, SufficientStats):             # <<<<<<<<<<<<<<
  *         return x
  *     elif x == 0 and isinstance(y, SufficientStats):
  */
-  __pyx_t_1 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_SufficientStats));
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_x, __pyx_t_1); 
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = __Pyx_TypeCheck(__pyx_v_x, ((PyObject*)__pyx_ptype_5_cdec_SufficientStats)); 
+  if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":5
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":5
  * cdef SufficientStats as_stats(x, y):
  *     if isinstance(x, SufficientStats):
  *         return x             # <<<<<<<<<<<<<<
@@ -14650,40 +14654,37 @@ static struct __pyx_obj_5_cdec_SufficientStats *__pyx_f_5_cdec_as_stats(PyObject
     goto __pyx_L3;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":6
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":6
  *     if isinstance(x, SufficientStats):
  *         return x
  *     elif x == 0 and isinstance(y, SufficientStats):             # <<<<<<<<<<<<<<
  *         stats = SufficientStats()
  *         stats.stats = new mteval.SufficientStats()
  */
-  __pyx_t_1 = PyObject_RichCompare(__pyx_v_x, __pyx_int_0, Py_EQ); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
-    __pyx_t_1 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_SufficientStats));
-    __Pyx_INCREF(__pyx_t_1);
-    __pyx_t_3 = __Pyx_TypeCheck(__pyx_v_y, __pyx_t_1); 
-    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_2 = PyObject_RichCompare(__pyx_v_x, __pyx_int_0, Py_EQ); __Pyx_XGOTREF(__pyx_t_2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  if (__pyx_t_1) {
+    __pyx_t_3 = __Pyx_TypeCheck(__pyx_v_y, ((PyObject*)__pyx_ptype_5_cdec_SufficientStats)); 
     __pyx_t_4 = __pyx_t_3;
   } else {
-    __pyx_t_4 = __pyx_t_2;
+    __pyx_t_4 = __pyx_t_1;
   }
   if (__pyx_t_4) {
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":7
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":7
  *         return x
  *     elif x == 0 and isinstance(y, SufficientStats):
  *         stats = SufficientStats()             # <<<<<<<<<<<<<<
  *         stats.stats = new mteval.SufficientStats()
  *         stats.metric = (<SufficientStats> y).metric
  */
-    __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_SufficientStats)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
-    __pyx_v_stats = ((struct __pyx_obj_5_cdec_SufficientStats *)__pyx_t_1);
-    __pyx_t_1 = 0;
+    __pyx_t_2 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_SufficientStats)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_v_stats = ((struct __pyx_obj_5_cdec_SufficientStats *)__pyx_t_2);
+    __pyx_t_2 = 0;
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":8
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":8
  *     elif x == 0 and isinstance(y, SufficientStats):
  *         stats = SufficientStats()
  *         stats.stats = new mteval.SufficientStats()             # <<<<<<<<<<<<<<
@@ -14692,16 +14693,17 @@ static struct __pyx_obj_5_cdec_SufficientStats *__pyx_f_5_cdec_as_stats(PyObject
  */
     __pyx_v_stats->stats = new SufficientStats();
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":9
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":9
  *         stats = SufficientStats()
  *         stats.stats = new mteval.SufficientStats()
  *         stats.metric = (<SufficientStats> y).metric             # <<<<<<<<<<<<<<
  *         return stats
  * 
  */
-    __pyx_v_stats->metric = ((struct __pyx_obj_5_cdec_SufficientStats *)__pyx_v_y)->metric;
+    __pyx_t_5 = ((struct __pyx_obj_5_cdec_SufficientStats *)__pyx_v_y)->metric;
+    __pyx_v_stats->metric = __pyx_t_5;
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":10
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":10
  *         stats.stats = new mteval.SufficientStats()
  *         stats.metric = (<SufficientStats> y).metric
  *         return stats             # <<<<<<<<<<<<<<
@@ -14719,7 +14721,7 @@ static struct __pyx_obj_5_cdec_SufficientStats *__pyx_f_5_cdec_as_stats(PyObject
   __pyx_r = ((struct __pyx_obj_5_cdec_SufficientStats *)Py_None); __Pyx_INCREF(Py_None);
   goto __pyx_L0;
   __pyx_L1_error:;
-  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
   __Pyx_AddTraceback("_cdec.as_stats", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __pyx_r = 0;
   __pyx_L0:;
@@ -14740,7 +14742,7 @@ static PyObject *__pyx_pw_5_cdec_9Candidate_5words_1__get__(PyObject *__pyx_v_se
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":17
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":17
  * 
  *     property words:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -14759,7 +14761,7 @@ static PyObject *__pyx_pf_5_cdec_9Candidate_5words___get__(struct __pyx_obj_5_cd
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":18
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":18
  *     property words:
  *         def __get__(self):
  *             return unicode(GetString(self.candidate.ewords).c_str(), encoding='utf8')             # <<<<<<<<<<<<<<
@@ -14810,7 +14812,7 @@ static PyObject *__pyx_pw_5_cdec_9Candidate_4fmap_1__get__(PyObject *__pyx_v_sel
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":21
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":21
  * 
  *     property fmap:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -14828,7 +14830,7 @@ static PyObject *__pyx_pf_5_cdec_9Candidate_4fmap___get__(struct __pyx_obj_5_cde
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":22
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":22
  *     property fmap:
  *         def __get__(self):
  *             cdef SparseVector fmap = SparseVector.__new__(SparseVector)             # <<<<<<<<<<<<<<
@@ -14841,7 +14843,7 @@ static PyObject *__pyx_pf_5_cdec_9Candidate_4fmap___get__(struct __pyx_obj_5_cde
   __pyx_v_fmap = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":23
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":23
  *         def __get__(self):
  *             cdef SparseVector fmap = SparseVector.__new__(SparseVector)
  *             fmap.vector = new FastSparseVector[weight_t](self.candidate.fmap)             # <<<<<<<<<<<<<<
@@ -14850,7 +14852,7 @@ static PyObject *__pyx_pf_5_cdec_9Candidate_4fmap___get__(struct __pyx_obj_5_cde
  */
   __pyx_v_fmap->vector = new FastSparseVector<weight_t>(__pyx_v_self->candidate->fmap);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":24
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":24
  *             cdef SparseVector fmap = SparseVector.__new__(SparseVector)
  *             fmap.vector = new FastSparseVector[weight_t](self.candidate.fmap)
  *             return fmap             # <<<<<<<<<<<<<<
@@ -14886,7 +14888,7 @@ static PyObject *__pyx_pw_5_cdec_9Candidate_5score_1__get__(PyObject *__pyx_v_se
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":14
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":14
  * cdef class Candidate:
  *     cdef mteval.const_Candidate* candidate
  *     cdef public float score             # <<<<<<<<<<<<<<
@@ -14962,7 +14964,7 @@ static void __pyx_pw_5_cdec_15SufficientStats_1__dealloc__(PyObject *__pyx_v_sel
   __Pyx_RefNannyFinishContext();
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":30
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":30
  *     cdef mteval.EvaluationMetric* metric
  * 
  *     def __dealloc__(self):             # <<<<<<<<<<<<<<
@@ -14974,7 +14976,7 @@ static void __pyx_pf_5_cdec_15SufficientStats___dealloc__(CYTHON_UNUSED struct _
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__dealloc__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":31
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":31
  * 
  *     def __dealloc__(self):
  *         del self.stats             # <<<<<<<<<<<<<<
@@ -14997,7 +14999,7 @@ static PyObject *__pyx_pw_5_cdec_15SufficientStats_5score_1__get__(PyObject *__p
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":34
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":34
  * 
  *     property score:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -15014,7 +15016,7 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_5score___get__(struct __pyx_o
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":35
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":35
  *     property score:
  *         def __get__(self):
  *             return self.metric.ComputeScore(self.stats[0])             # <<<<<<<<<<<<<<
@@ -15051,7 +15053,7 @@ static PyObject *__pyx_pw_5_cdec_15SufficientStats_6detail_1__get__(PyObject *__
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":38
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":38
  * 
  *     property detail:
  *         def __get__(self):             # <<<<<<<<<<<<<<
@@ -15069,7 +15071,7 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_6detail___get__(struct __pyx_
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__get__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":39
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":39
  *     property detail:
  *         def __get__(self):
  *             return str(self.metric.DetailedScore(self.stats[0]).c_str())             # <<<<<<<<<<<<<<
@@ -15115,7 +15117,7 @@ static Py_ssize_t __pyx_pw_5_cdec_15SufficientStats_3__len__(PyObject *__pyx_v_s
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":41
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":41
  *             return str(self.metric.DetailedScore(self.stats[0]).c_str())
  * 
  *     def __len__(self):             # <<<<<<<<<<<<<<
@@ -15128,7 +15130,7 @@ static Py_ssize_t __pyx_pf_5_cdec_15SufficientStats_2__len__(struct __pyx_obj_5_
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__len__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":42
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":42
  * 
  *     def __len__(self):
  *         return self.stats.size()             # <<<<<<<<<<<<<<
@@ -15156,7 +15158,7 @@ static PyObject *__pyx_pw_5_cdec_15SufficientStats_5__iter__(PyObject *__pyx_v_s
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":44
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":44
  *         return self.stats.size()
  * 
  *     def __iter__(self):             # <<<<<<<<<<<<<<
@@ -15220,7 +15222,7 @@ static PyObject *__pyx_gb_5_cdec_15SufficientStats_6generator15(__pyx_GeneratorO
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":45
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":45
  * 
  *     def __iter__(self):
  *         for i in range(len(self)):             # <<<<<<<<<<<<<<
@@ -15279,7 +15281,7 @@ static PyObject *__pyx_gb_5_cdec_15SufficientStats_6generator15(__pyx_GeneratorO
     __pyx_cur_scope->__pyx_v_i = __pyx_t_2;
     __pyx_t_2 = 0;
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":46
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":46
  *     def __iter__(self):
  *         for i in range(len(self)):
  *             yield self[i]             # <<<<<<<<<<<<<<
@@ -15343,7 +15345,7 @@ static PyObject *__pyx_pw_5_cdec_15SufficientStats_8__getitem__(PyObject *__pyx_
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":48
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":48
  *             yield self[i]
  * 
  *     def __getitem__(self, int index):             # <<<<<<<<<<<<<<
@@ -15363,7 +15365,7 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_7__getitem__(struct __pyx_obj
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__getitem__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":49
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":49
  * 
  *     def __getitem__(self, int index):
  *         if not 0 <= index < len(self):             # <<<<<<<<<<<<<<
@@ -15378,14 +15380,14 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_7__getitem__(struct __pyx_obj
   __pyx_t_3 = (!__pyx_t_1);
   if (__pyx_t_3) {
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":50
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":50
  *     def __getitem__(self, int index):
  *         if not 0 <= index < len(self):
  *             raise IndexError('sufficient stats vector index out of range')             # <<<<<<<<<<<<<<
  *         return self.stats[0][index]
  * 
  */
-    __pyx_t_4 = PyObject_Call(__pyx_builtin_IndexError, ((PyObject *)__pyx_k_tuple_41), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_4 = PyObject_Call(__pyx_builtin_IndexError, ((PyObject *)__pyx_k_tuple_42), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_4);
     __Pyx_Raise(__pyx_t_4, 0, 0, 0);
     __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
@@ -15394,7 +15396,7 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_7__getitem__(struct __pyx_obj
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":51
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":51
  *         if not 0 <= index < len(self):
  *             raise IndexError('sufficient stats vector index out of range')
  *         return self.stats[0][index]             # <<<<<<<<<<<<<<
@@ -15436,7 +15438,7 @@ static PyObject *__pyx_pw_5_cdec_15SufficientStats_10__iadd__(PyObject *__pyx_v_
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":53
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":53
  *         return self.stats[0][index]
  * 
  *     def __iadd__(SufficientStats self, SufficientStats other):             # <<<<<<<<<<<<<<
@@ -15449,7 +15451,7 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_9__iadd__(struct __pyx_obj_5_
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__iadd__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":54
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":54
  * 
  *     def __iadd__(SufficientStats self, SufficientStats other):
  *         self.stats[0] += other.stats[0]             # <<<<<<<<<<<<<<
@@ -15458,7 +15460,7 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_9__iadd__(struct __pyx_obj_5_
  */
   (__pyx_v_self->stats[0]) += (__pyx_v_other->stats[0]);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":55
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":55
  *     def __iadd__(SufficientStats self, SufficientStats other):
  *         self.stats[0] += other.stats[0]
  *         return self             # <<<<<<<<<<<<<<
@@ -15488,7 +15490,7 @@ static PyObject *__pyx_pw_5_cdec_15SufficientStats_12__add__(PyObject *__pyx_v_x
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":57
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":57
  *         return self
  * 
  *     def __add__(x, y):             # <<<<<<<<<<<<<<
@@ -15503,12 +15505,13 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_11__add__(PyObject *__pyx_v_x
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
   PyObject *__pyx_t_1 = NULL;
+  EvaluationMetric *__pyx_t_2;
   int __pyx_lineno = 0;
   const char *__pyx_filename = NULL;
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__add__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":58
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":58
  * 
  *     def __add__(x, y):
  *         cdef SufficientStats sx = as_stats(x, y)             # <<<<<<<<<<<<<<
@@ -15520,7 +15523,7 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_11__add__(PyObject *__pyx_v_x
   __pyx_v_sx = ((struct __pyx_obj_5_cdec_SufficientStats *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":59
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":59
  *     def __add__(x, y):
  *         cdef SufficientStats sx = as_stats(x, y)
  *         cdef SufficientStats sy = as_stats(y, x)             # <<<<<<<<<<<<<<
@@ -15532,7 +15535,7 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_11__add__(PyObject *__pyx_v_x
   __pyx_v_sy = ((struct __pyx_obj_5_cdec_SufficientStats *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":60
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":60
  *         cdef SufficientStats sx = as_stats(x, y)
  *         cdef SufficientStats sy = as_stats(y, x)
  *         cdef SufficientStats result = SufficientStats()             # <<<<<<<<<<<<<<
@@ -15544,7 +15547,7 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_11__add__(PyObject *__pyx_v_x
   __pyx_v_result = ((struct __pyx_obj_5_cdec_SufficientStats *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":61
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":61
  *         cdef SufficientStats sy = as_stats(y, x)
  *         cdef SufficientStats result = SufficientStats()
  *         result.stats = new mteval.SufficientStats(mteval.add(sx.stats[0], sy.stats[0]))             # <<<<<<<<<<<<<<
@@ -15553,16 +15556,17 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_11__add__(PyObject *__pyx_v_x
  */
   __pyx_v_result->stats = new SufficientStats(operator+((__pyx_v_sx->stats[0]), (__pyx_v_sy->stats[0])));
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":62
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":62
  *         cdef SufficientStats result = SufficientStats()
  *         result.stats = new mteval.SufficientStats(mteval.add(sx.stats[0], sy.stats[0]))
  *         result.metric = sx.metric             # <<<<<<<<<<<<<<
  *         return result
  * 
  */
-  __pyx_v_result->metric = __pyx_v_sx->metric;
+  __pyx_t_2 = __pyx_v_sx->metric;
+  __pyx_v_result->metric = __pyx_t_2;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":63
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":63
  *         result.stats = new mteval.SufficientStats(mteval.add(sx.stats[0], sy.stats[0]))
  *         result.metric = sx.metric
  *         return result             # <<<<<<<<<<<<<<
@@ -15641,7 +15645,7 @@ static int __pyx_pw_5_cdec_12CandidateSet_1__cinit__(PyObject *__pyx_v_self, PyO
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":70
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":70
  *     cdef mteval.CandidateSet* cs
  * 
  *     def __cinit__(self, SegmentEvaluator evaluator):             # <<<<<<<<<<<<<<
@@ -15652,9 +15656,10 @@ static int __pyx_pw_5_cdec_12CandidateSet_1__cinit__(PyObject *__pyx_v_self, PyO
 static int __pyx_pf_5_cdec_12CandidateSet___cinit__(struct __pyx_obj_5_cdec_CandidateSet *__pyx_v_self, struct __pyx_obj_5_cdec_SegmentEvaluator *__pyx_v_evaluator) {
   int __pyx_r;
   __Pyx_RefNannyDeclarations
+  EvaluationMetric *__pyx_t_1;
   __Pyx_RefNannySetupContext("__cinit__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":71
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":71
  * 
  *     def __cinit__(self, SegmentEvaluator evaluator):
  *         self.scorer = new shared_ptr[mteval.SegmentEvaluator](evaluator.scorer[0])             # <<<<<<<<<<<<<<
@@ -15663,16 +15668,17 @@ static int __pyx_pf_5_cdec_12CandidateSet___cinit__(struct __pyx_obj_5_cdec_Cand
  */
   __pyx_v_self->scorer = new boost::shared_ptr<SegmentEvaluator>((__pyx_v_evaluator->scorer[0]));
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":72
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":72
  *     def __cinit__(self, SegmentEvaluator evaluator):
  *         self.scorer = new shared_ptr[mteval.SegmentEvaluator](evaluator.scorer[0])
  *         self.metric = evaluator.metric             # <<<<<<<<<<<<<<
  *         self.cs = new mteval.CandidateSet()
  * 
  */
-  __pyx_v_self->metric = __pyx_v_evaluator->metric;
+  __pyx_t_1 = __pyx_v_evaluator->metric;
+  __pyx_v_self->metric = __pyx_t_1;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":73
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":73
  *         self.scorer = new shared_ptr[mteval.SegmentEvaluator](evaluator.scorer[0])
  *         self.metric = evaluator.metric
  *         self.cs = new mteval.CandidateSet()             # <<<<<<<<<<<<<<
@@ -15695,7 +15701,7 @@ static void __pyx_pw_5_cdec_12CandidateSet_3__dealloc__(PyObject *__pyx_v_self)
   __Pyx_RefNannyFinishContext();
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":75
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":75
  *         self.cs = new mteval.CandidateSet()
  * 
  *     def __dealloc__(self):             # <<<<<<<<<<<<<<
@@ -15707,7 +15713,7 @@ static void __pyx_pf_5_cdec_12CandidateSet_2__dealloc__(CYTHON_UNUSED struct __p
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__dealloc__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":76
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":76
  * 
  *     def __dealloc__(self):
  *         del self.scorer             # <<<<<<<<<<<<<<
@@ -15716,7 +15722,7 @@ static void __pyx_pf_5_cdec_12CandidateSet_2__dealloc__(CYTHON_UNUSED struct __p
  */
   delete __pyx_v_self->scorer;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":77
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":77
  *     def __dealloc__(self):
  *         del self.scorer
  *         del self.cs             # <<<<<<<<<<<<<<
@@ -15739,7 +15745,7 @@ static Py_ssize_t __pyx_pw_5_cdec_12CandidateSet_5__len__(PyObject *__pyx_v_self
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":79
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":79
  *         del self.cs
  * 
  *     def __len__(self):             # <<<<<<<<<<<<<<
@@ -15752,7 +15758,7 @@ static Py_ssize_t __pyx_pf_5_cdec_12CandidateSet_4__len__(struct __pyx_obj_5_cde
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__len__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":80
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":80
  * 
  *     def __len__(self):
  *         return self.cs.size()             # <<<<<<<<<<<<<<
@@ -15789,7 +15795,7 @@ static PyObject *__pyx_pw_5_cdec_12CandidateSet_7__getitem__(PyObject *__pyx_v_s
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":82
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":82
  *         return self.cs.size()
  * 
  *     def __getitem__(self,int k):             # <<<<<<<<<<<<<<
@@ -15809,7 +15815,7 @@ static PyObject *__pyx_pf_5_cdec_12CandidateSet_6__getitem__(struct __pyx_obj_5_
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__getitem__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":83
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":83
  * 
  *     def __getitem__(self,int k):
  *         if not 0 <= k < self.cs.size():             # <<<<<<<<<<<<<<
@@ -15823,14 +15829,14 @@ static PyObject *__pyx_pf_5_cdec_12CandidateSet_6__getitem__(struct __pyx_obj_5_
   __pyx_t_2 = (!__pyx_t_1);
   if (__pyx_t_2) {
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":84
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":84
  *     def __getitem__(self,int k):
  *         if not 0 <= k < self.cs.size():
  *             raise IndexError('candidate set index out of range')             # <<<<<<<<<<<<<<
  *         cdef Candidate candidate = Candidate()
  *         candidate.candidate = &self.cs[0][k]
  */
-    __pyx_t_3 = PyObject_Call(__pyx_builtin_IndexError, ((PyObject *)__pyx_k_tuple_43), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 84; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_3 = PyObject_Call(__pyx_builtin_IndexError, ((PyObject *)__pyx_k_tuple_44), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 84; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
     __Pyx_Raise(__pyx_t_3, 0, 0, 0);
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
@@ -15839,7 +15845,7 @@ static PyObject *__pyx_pf_5_cdec_12CandidateSet_6__getitem__(struct __pyx_obj_5_
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":85
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":85
  *         if not 0 <= k < self.cs.size():
  *             raise IndexError('candidate set index out of range')
  *         cdef Candidate candidate = Candidate()             # <<<<<<<<<<<<<<
@@ -15851,7 +15857,7 @@ static PyObject *__pyx_pf_5_cdec_12CandidateSet_6__getitem__(struct __pyx_obj_5_
   __pyx_v_candidate = ((struct __pyx_obj_5_cdec_Candidate *)__pyx_t_3);
   __pyx_t_3 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":86
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":86
  *             raise IndexError('candidate set index out of range')
  *         cdef Candidate candidate = Candidate()
  *         candidate.candidate = &self.cs[0][k]             # <<<<<<<<<<<<<<
@@ -15860,7 +15866,7 @@ static PyObject *__pyx_pf_5_cdec_12CandidateSet_6__getitem__(struct __pyx_obj_5_
  */
   __pyx_v_candidate->candidate = (&((__pyx_v_self->cs[0])[__pyx_v_k]));
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":87
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":87
  *         cdef Candidate candidate = Candidate()
  *         candidate.candidate = &self.cs[0][k]
  *         candidate.score = self.metric.ComputeScore(self.cs[0][k].eval_feats)             # <<<<<<<<<<<<<<
@@ -15869,7 +15875,7 @@ static PyObject *__pyx_pf_5_cdec_12CandidateSet_6__getitem__(struct __pyx_obj_5_
  */
   __pyx_v_candidate->score = __pyx_v_self->metric->ComputeScore(((__pyx_v_self->cs[0])[__pyx_v_k]).eval_feats);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":88
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":88
  *         candidate.candidate = &self.cs[0][k]
  *         candidate.score = self.metric.ComputeScore(self.cs[0][k].eval_feats)
  *         return candidate             # <<<<<<<<<<<<<<
@@ -15906,7 +15912,7 @@ static PyObject *__pyx_pw_5_cdec_12CandidateSet_9__iter__(PyObject *__pyx_v_self
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":90
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":90
  *         return candidate
  * 
  *     def __iter__(self):             # <<<<<<<<<<<<<<
@@ -15969,7 +15975,7 @@ static PyObject *__pyx_gb_5_cdec_12CandidateSet_10generator16(__pyx_GeneratorObj
   __pyx_L3_first_run:;
   if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":92
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":92
  *     def __iter__(self):
  *         cdef unsigned i
  *         for i in range(len(self)):             # <<<<<<<<<<<<<<
@@ -15980,7 +15986,7 @@ static PyObject *__pyx_gb_5_cdec_12CandidateSet_10generator16(__pyx_GeneratorObj
   for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
     __pyx_cur_scope->__pyx_v_i = __pyx_t_2;
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":93
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":93
  *         cdef unsigned i
  *         for i in range(len(self)):
  *             yield self[i]             # <<<<<<<<<<<<<<
@@ -16078,7 +16084,7 @@ static PyObject *__pyx_pw_5_cdec_12CandidateSet_12add_kbest(PyObject *__pyx_v_se
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":95
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":95
  *             yield self[i]
  * 
  *     def add_kbest(self, Hypergraph hypergraph, unsigned k):             # <<<<<<<<<<<<<<
@@ -16091,7 +16097,7 @@ static PyObject *__pyx_pf_5_cdec_12CandidateSet_11add_kbest(struct __pyx_obj_5_c
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("add_kbest", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":98
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":98
  *         """cs.add_kbest(Hypergraph hypergraph, int k) -> Extract K-best hypotheses
  *         from the hypergraph and add them to the candidate set."""
  *         self.cs.AddKBestCandidates(hypergraph.hg[0], k, self.scorer.get())             # <<<<<<<<<<<<<<
@@ -16115,7 +16121,7 @@ static void __pyx_pw_5_cdec_16SegmentEvaluator_1__dealloc__(PyObject *__pyx_v_se
   __Pyx_RefNannyFinishContext();
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":104
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":104
  *     cdef mteval.EvaluationMetric* metric
  * 
  *     def __dealloc__(self):             # <<<<<<<<<<<<<<
@@ -16127,7 +16133,7 @@ static void __pyx_pf_5_cdec_16SegmentEvaluator___dealloc__(CYTHON_UNUSED struct
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__dealloc__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":105
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":105
  * 
  *     def __dealloc__(self):
  *         del self.scorer             # <<<<<<<<<<<<<<
@@ -16151,7 +16157,7 @@ static PyObject *__pyx_pw_5_cdec_16SegmentEvaluator_3evaluate(PyObject *__pyx_v_
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":107
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":107
  *         del self.scorer
  * 
  *     def evaluate(self, sentence):             # <<<<<<<<<<<<<<
@@ -16165,14 +16171,15 @@ static PyObject *__pyx_pf_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx_obj_5
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
   PyObject *__pyx_t_1 = NULL;
-  PyObject *__pyx_t_2 = NULL;
-  std::string __pyx_t_3;
+  EvaluationMetric *__pyx_t_2;
+  PyObject *__pyx_t_3 = NULL;
+  std::string __pyx_t_4;
   int __pyx_lineno = 0;
   const char *__pyx_filename = NULL;
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("evaluate", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":110
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":110
  *         """se.evaluate(sentence) -> SufficientStats for the given hypothesis."""
  *         cdef vector[WordID] hyp
  *         cdef SufficientStats sf = SufficientStats()             # <<<<<<<<<<<<<<
@@ -16184,16 +16191,17 @@ static PyObject *__pyx_pf_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx_obj_5
   __pyx_v_sf = ((struct __pyx_obj_5_cdec_SufficientStats *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":111
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":111
  *         cdef vector[WordID] hyp
  *         cdef SufficientStats sf = SufficientStats()
  *         sf.metric = self.metric             # <<<<<<<<<<<<<<
  *         sf.stats = new mteval.SufficientStats()
  *         ConvertSentence(as_str(sentence.strip()), &hyp)
  */
-  __pyx_v_sf->metric = __pyx_v_self->metric;
+  __pyx_t_2 = __pyx_v_self->metric;
+  __pyx_v_sf->metric = __pyx_t_2;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":112
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":112
  *         cdef SufficientStats sf = SufficientStats()
  *         sf.metric = self.metric
  *         sf.stats = new mteval.SufficientStats()             # <<<<<<<<<<<<<<
@@ -16202,7 +16210,7 @@ static PyObject *__pyx_pf_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx_obj_5
  */
   __pyx_v_sf->stats = new SufficientStats();
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":113
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":113
  *         sf.metric = self.metric
  *         sf.stats = new mteval.SufficientStats()
  *         ConvertSentence(as_str(sentence.strip()), &hyp)             # <<<<<<<<<<<<<<
@@ -16211,17 +16219,17 @@ static PyObject *__pyx_pf_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx_obj_5
  */
   __pyx_t_1 = PyObject_GetAttr(__pyx_v_sentence, __pyx_n_s__strip); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
-  __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_3);
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  __pyx_t_1 = ((PyObject *)__pyx_f_5_cdec_as_str(__pyx_t_2, NULL)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = ((PyObject *)__pyx_f_5_cdec_as_str(__pyx_t_3, NULL)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
-  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
-  __pyx_t_3 = __pyx_convert_string_from_py_(__pyx_t_1); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __pyx_t_4 = __pyx_convert_string_from_py_(__pyx_t_1); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  TD::ConvertSentence(__pyx_t_3, (&__pyx_v_hyp));
+  TD::ConvertSentence(__pyx_t_4, (&__pyx_v_hyp));
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":114
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":114
  *         sf.stats = new mteval.SufficientStats()
  *         ConvertSentence(as_str(sentence.strip()), &hyp)
  *         self.scorer.get().Evaluate(hyp, sf.stats)             # <<<<<<<<<<<<<<
@@ -16230,7 +16238,7 @@ static PyObject *__pyx_pf_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx_obj_5
  */
   __pyx_v_self->scorer->get()->Evaluate(__pyx_v_hyp, __pyx_v_sf->stats);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":115
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":115
  *         ConvertSentence(as_str(sentence.strip()), &hyp)
  *         self.scorer.get().Evaluate(hyp, sf.stats)
  *         return sf             # <<<<<<<<<<<<<<
@@ -16246,7 +16254,7 @@ static PyObject *__pyx_pf_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx_obj_5
   goto __pyx_L0;
   __pyx_L1_error:;
   __Pyx_XDECREF(__pyx_t_1);
-  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
   __Pyx_AddTraceback("_cdec.SegmentEvaluator.evaluate", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __pyx_r = NULL;
   __pyx_L0:;
@@ -16268,7 +16276,7 @@ static PyObject *__pyx_pw_5_cdec_16SegmentEvaluator_5candidate_set(PyObject *__p
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":117
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":117
  *         return sf
  * 
  *     def candidate_set(self):             # <<<<<<<<<<<<<<
@@ -16286,7 +16294,7 @@ static PyObject *__pyx_pf_5_cdec_16SegmentEvaluator_4candidate_set(struct __pyx_
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("candidate_set", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":119
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":119
  *     def candidate_set(self):
  *         """se.candidate_set() -> Candidate set using this segment evaluator for scoring."""
  *         return CandidateSet(self)             # <<<<<<<<<<<<<<
@@ -16330,7 +16338,7 @@ static int __pyx_pw_5_cdec_6Scorer_1__cinit__(PyObject *__pyx_v_self, PyObject *
     static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__name,0};
     PyObject* values[1] = {0};
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":125
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":125
  *     cdef mteval.EvaluationMetric* metric
  * 
  *     def __cinit__(self, bytes name=None):             # <<<<<<<<<<<<<<
@@ -16395,7 +16403,7 @@ static int __pyx_pf_5_cdec_6Scorer___cinit__(struct __pyx_obj_5_cdec_Scorer *__p
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__cinit__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":126
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":126
  * 
  *     def __cinit__(self, bytes name=None):
  *         if name:             # <<<<<<<<<<<<<<
@@ -16405,7 +16413,7 @@ static int __pyx_pf_5_cdec_6Scorer___cinit__(struct __pyx_obj_5_cdec_Scorer *__p
   __pyx_t_1 = (((PyObject *)__pyx_v_name) != Py_None) && (PyBytes_GET_SIZE(((PyObject *)__pyx_v_name)) != 0);
   if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":127
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":127
  *     def __cinit__(self, bytes name=None):
  *         if name:
  *             self.name = new string(name)             # <<<<<<<<<<<<<<
@@ -16413,10 +16421,15 @@ static int __pyx_pf_5_cdec_6Scorer___cinit__(struct __pyx_obj_5_cdec_Scorer *__p
  * 
  */
     __pyx_t_2 = PyBytes_AsString(((PyObject *)__pyx_v_name)); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    try {__pyx_t_3 = new std::string(__pyx_t_2);} catch(...) {__Pyx_CppExn2PyErr(); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;}}
+    try {
+      __pyx_t_3 = new std::string(__pyx_t_2);
+    } catch(...) {
+      __Pyx_CppExn2PyErr();
+      {__pyx_filename = __pyx_f[5]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    }
     __pyx_v_self->name = __pyx_t_3;
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":128
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":128
  *         if name:
  *             self.name = new string(name)
  *             self.metric = mteval.MetricInstance(self.name[0])             # <<<<<<<<<<<<<<
@@ -16447,7 +16460,7 @@ static void __pyx_pw_5_cdec_6Scorer_3__dealloc__(PyObject *__pyx_v_self) {
   __Pyx_RefNannyFinishContext();
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":130
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":130
  *             self.metric = mteval.MetricInstance(self.name[0])
  * 
  *     def __dealloc__(self):             # <<<<<<<<<<<<<<
@@ -16459,7 +16472,7 @@ static void __pyx_pf_5_cdec_6Scorer_2__dealloc__(CYTHON_UNUSED struct __pyx_obj_
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__dealloc__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":131
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":131
  * 
  *     def __dealloc__(self):
  *         del self.name             # <<<<<<<<<<<<<<
@@ -16518,7 +16531,7 @@ static PyObject *__pyx_pw_5_cdec_6Scorer_5__call__(PyObject *__pyx_v_self, PyObj
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":133
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":133
  *         del self.name
  * 
  *     def __call__(self, refs):             # <<<<<<<<<<<<<<
@@ -16533,8 +16546,8 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score
   struct __pyx_obj_5_cdec_SegmentEvaluator *__pyx_v_evaluator = 0;
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
-  PyObject *__pyx_t_1 = NULL;
-  int __pyx_t_2;
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
   std::vector<std::vector<WordID> > *__pyx_t_3;
   Py_ssize_t __pyx_t_4;
   PyObject *(*__pyx_t_5)(PyObject *);
@@ -16542,55 +16555,58 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score
   std::vector<WordID> *__pyx_t_7;
   PyObject *__pyx_t_8 = NULL;
   std::string __pyx_t_9;
+  EvaluationMetric *__pyx_t_10;
   int __pyx_lineno = 0;
   const char *__pyx_filename = NULL;
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__call__", 0);
   __Pyx_INCREF(__pyx_v_refs);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":134
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":134
  * 
  *     def __call__(self, refs):
  *         if isinstance(refs, basestring):             # <<<<<<<<<<<<<<
  *             refs = [refs]
  *         cdef vector[vector[WordID]]* refsv = new vector[vector[WordID]]()
  */
-  __pyx_t_1 = __pyx_builtin_basestring;
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = PyObject_IsInstance(__pyx_v_refs, __pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 134; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = PyObject_IsInstance(__pyx_v_refs, __pyx_builtin_basestring); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 134; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  if (__pyx_t_1) {
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":135
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":135
  *     def __call__(self, refs):
  *         if isinstance(refs, basestring):
  *             refs = [refs]             # <<<<<<<<<<<<<<
  *         cdef vector[vector[WordID]]* refsv = new vector[vector[WordID]]()
  *         cdef vector[WordID]* refv
  */
-    __pyx_t_1 = PyList_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 135; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
+    __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 135; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
     __Pyx_INCREF(__pyx_v_refs);
-    PyList_SET_ITEM(__pyx_t_1, 0, __pyx_v_refs);
+    PyList_SET_ITEM(__pyx_t_2, 0, __pyx_v_refs);
     __Pyx_GIVEREF(__pyx_v_refs);
     __Pyx_DECREF(__pyx_v_refs);
-    __pyx_v_refs = ((PyObject *)__pyx_t_1);
-    __pyx_t_1 = 0;
+    __pyx_v_refs = ((PyObject *)__pyx_t_2);
+    __pyx_t_2 = 0;
     goto __pyx_L3;
   }
   __pyx_L3:;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":136
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":136
  *         if isinstance(refs, basestring):
  *             refs = [refs]
  *         cdef vector[vector[WordID]]* refsv = new vector[vector[WordID]]()             # <<<<<<<<<<<<<<
  *         cdef vector[WordID]* refv
  *         for ref in refs:
  */
-  try {__pyx_t_3 = new std::vector<std::vector<WordID> >();} catch(...) {__Pyx_CppExn2PyErr(); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 136; __pyx_clineno = __LINE__; goto __pyx_L1_error;}}
+  try {
+    __pyx_t_3 = new std::vector<std::vector<WordID> >();
+  } catch(...) {
+    __Pyx_CppExn2PyErr();
+    {__pyx_filename = __pyx_f[5]; __pyx_lineno = 136; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  }
   __pyx_v_refsv = __pyx_t_3;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":138
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":138
  *         cdef vector[vector[WordID]]* refsv = new vector[vector[WordID]]()
  *         cdef vector[WordID]* refv
  *         for ref in refs:             # <<<<<<<<<<<<<<
@@ -16598,30 +16614,30 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score
  *             ConvertSentence(as_str(ref.strip()), refv)
  */
   if (PyList_CheckExact(__pyx_v_refs) || PyTuple_CheckExact(__pyx_v_refs)) {
-    __pyx_t_1 = __pyx_v_refs; __Pyx_INCREF(__pyx_t_1); __pyx_t_4 = 0;
+    __pyx_t_2 = __pyx_v_refs; __Pyx_INCREF(__pyx_t_2); __pyx_t_4 = 0;
     __pyx_t_5 = NULL;
   } else {
-    __pyx_t_4 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_refs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
-    __pyx_t_5 = Py_TYPE(__pyx_t_1)->tp_iternext;
+    __pyx_t_4 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_refs); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_t_5 = Py_TYPE(__pyx_t_2)->tp_iternext;
   }
   for (;;) {
-    if (!__pyx_t_5 && PyList_CheckExact(__pyx_t_1)) {
-      if (__pyx_t_4 >= PyList_GET_SIZE(__pyx_t_1)) break;
+    if (!__pyx_t_5 && PyList_CheckExact(__pyx_t_2)) {
+      if (__pyx_t_4 >= PyList_GET_SIZE(__pyx_t_2)) break;
       #if CYTHON_COMPILING_IN_CPYTHON
-      __pyx_t_6 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_4); __Pyx_INCREF(__pyx_t_6); __pyx_t_4++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_6 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_6); __pyx_t_4++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       #else
-      __pyx_t_6 = PySequence_ITEM(__pyx_t_1, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_6 = PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       #endif
-    } else if (!__pyx_t_5 && PyTuple_CheckExact(__pyx_t_1)) {
-      if (__pyx_t_4 >= PyTuple_GET_SIZE(__pyx_t_1)) break;
+    } else if (!__pyx_t_5 && PyTuple_CheckExact(__pyx_t_2)) {
+      if (__pyx_t_4 >= PyTuple_GET_SIZE(__pyx_t_2)) break;
       #if CYTHON_COMPILING_IN_CPYTHON
-      __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_4); __Pyx_INCREF(__pyx_t_6); __pyx_t_4++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_6); __pyx_t_4++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       #else
-      __pyx_t_6 = PySequence_ITEM(__pyx_t_1, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_6 = PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       #endif
     } else {
-      __pyx_t_6 = __pyx_t_5(__pyx_t_1);
+      __pyx_t_6 = __pyx_t_5(__pyx_t_2);
       if (unlikely(!__pyx_t_6)) {
         if (PyErr_Occurred()) {
           if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear();
@@ -16635,17 +16651,22 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score
     __pyx_v_ref = __pyx_t_6;
     __pyx_t_6 = 0;
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":139
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":139
  *         cdef vector[WordID]* refv
  *         for ref in refs:
  *             refv = new vector[WordID]()             # <<<<<<<<<<<<<<
  *             ConvertSentence(as_str(ref.strip()), refv)
  *             refsv.push_back(refv[0])
  */
-    try {__pyx_t_7 = new std::vector<WordID>();} catch(...) {__Pyx_CppExn2PyErr(); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;}}
+    try {
+      __pyx_t_7 = new std::vector<WordID>();
+    } catch(...) {
+      __Pyx_CppExn2PyErr();
+      {__pyx_filename = __pyx_f[5]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    }
     __pyx_v_refv = __pyx_t_7;
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":140
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":140
  *         for ref in refs:
  *             refv = new vector[WordID]()
  *             ConvertSentence(as_str(ref.strip()), refv)             # <<<<<<<<<<<<<<
@@ -16664,7 +16685,7 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score
     __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
     TD::ConvertSentence(__pyx_t_9, __pyx_v_refv);
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":141
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":141
  *             refv = new vector[WordID]()
  *             ConvertSentence(as_str(ref.strip()), refv)
  *             refsv.push_back(refv[0])             # <<<<<<<<<<<<<<
@@ -16673,7 +16694,7 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score
  */
     __pyx_v_refsv->push_back((__pyx_v_refv[0]));
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":142
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":142
  *             ConvertSentence(as_str(ref.strip()), refv)
  *             refsv.push_back(refv[0])
  *             del refv             # <<<<<<<<<<<<<<
@@ -16682,30 +16703,31 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score
  */
     delete __pyx_v_refv;
   }
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":144
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":144
  *             del refv
  *         cdef unsigned i
  *         cdef SegmentEvaluator evaluator = SegmentEvaluator()             # <<<<<<<<<<<<<<
  *         evaluator.metric = self.metric
  *         evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator](
- */
-  __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_SegmentEvaluator)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_t_1);
-  __pyx_v_evaluator = ((struct __pyx_obj_5_cdec_SegmentEvaluator *)__pyx_t_1);
-  __pyx_t_1 = 0;
+ */
+  __pyx_t_2 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_SegmentEvaluator)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_v_evaluator = ((struct __pyx_obj_5_cdec_SegmentEvaluator *)__pyx_t_2);
+  __pyx_t_2 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":145
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":145
  *         cdef unsigned i
  *         cdef SegmentEvaluator evaluator = SegmentEvaluator()
  *         evaluator.metric = self.metric             # <<<<<<<<<<<<<<
  *         evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator](
  *                 self.metric.CreateSegmentEvaluator(refsv[0]))
  */
-  __pyx_v_evaluator->metric = __pyx_v_self->metric;
+  __pyx_t_10 = __pyx_v_self->metric;
+  __pyx_v_evaluator->metric = __pyx_t_10;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":146
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":146
  *         cdef SegmentEvaluator evaluator = SegmentEvaluator()
  *         evaluator.metric = self.metric
  *         evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator](             # <<<<<<<<<<<<<<
@@ -16714,7 +16736,7 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score
  */
   __pyx_v_evaluator->scorer = new boost::shared_ptr<SegmentEvaluator>(__pyx_v_self->metric->CreateSegmentEvaluator((__pyx_v_refsv[0])));
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":148
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":148
  *         evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator](
  *                 self.metric.CreateSegmentEvaluator(refsv[0]))
  *         del refsv # in theory should not delete but store in SegmentEvaluator             # <<<<<<<<<<<<<<
@@ -16723,7 +16745,7 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score
  */
   delete __pyx_v_refsv;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":149
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":149
  *                 self.metric.CreateSegmentEvaluator(refsv[0]))
  *         del refsv # in theory should not delete but store in SegmentEvaluator
  *         return evaluator             # <<<<<<<<<<<<<<
@@ -16738,7 +16760,7 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score
   __pyx_r = Py_None; __Pyx_INCREF(Py_None);
   goto __pyx_L0;
   __pyx_L1_error:;
-  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
   __Pyx_XDECREF(__pyx_t_6);
   __Pyx_XDECREF(__pyx_t_8);
   __Pyx_AddTraceback("_cdec.Scorer.__call__", __pyx_clineno, __pyx_lineno, __pyx_filename);
@@ -16763,7 +16785,7 @@ static PyObject *__pyx_pw_5_cdec_6Scorer_7__str__(PyObject *__pyx_v_self) {
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":151
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":151
  *         return evaluator
  * 
  *     def __str__(self):             # <<<<<<<<<<<<<<
@@ -16781,7 +16803,7 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_6__str__(struct __pyx_obj_5_cdec_Scorer
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__str__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":152
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":152
  * 
  *     def __str__(self):
  *         return str(self.name.c_str())             # <<<<<<<<<<<<<<
@@ -16816,7 +16838,7 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_6__str__(struct __pyx_obj_5_cdec_Scorer
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":154
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":154
  *         return str(self.name.c_str())
  * 
  * cdef float _compute_score(void* metric_, mteval.SufficientStats* stats):             # <<<<<<<<<<<<<<
@@ -16842,7 +16864,7 @@ static float __pyx_f_5_cdec__compute_score(void *__pyx_v_metric_, SufficientStat
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("_compute_score", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":155
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":155
  * 
  * cdef float _compute_score(void* metric_, mteval.SufficientStats* stats):
  *     cdef Metric metric = <Metric> metric_             # <<<<<<<<<<<<<<
@@ -16852,7 +16874,7 @@ static float __pyx_f_5_cdec__compute_score(void *__pyx_v_metric_, SufficientStat
   __Pyx_INCREF(((PyObject *)((struct __pyx_obj_5_cdec_Metric *)__pyx_v_metric_)));
   __pyx_v_metric = ((struct __pyx_obj_5_cdec_Metric *)__pyx_v_metric_);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":156
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":156
  * cdef float _compute_score(void* metric_, mteval.SufficientStats* stats):
  *     cdef Metric metric = <Metric> metric_
  *     cdef list ss = []             # <<<<<<<<<<<<<<
@@ -16861,10 +16883,10 @@ static float __pyx_f_5_cdec__compute_score(void *__pyx_v_metric_, SufficientStat
  */
   __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 156; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
-  __pyx_v_ss = __pyx_t_1;
+  __pyx_v_ss = ((PyObject*)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":158
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":158
  *     cdef list ss = []
  *     cdef unsigned i
  *     for i in range(stats.size()):             # <<<<<<<<<<<<<<
@@ -16875,7 +16897,7 @@ static float __pyx_f_5_cdec__compute_score(void *__pyx_v_metric_, SufficientStat
   for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
     __pyx_v_i = __pyx_t_3;
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":159
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":159
  *     cdef unsigned i
  *     for i in range(stats.size()):
  *         ss.append(stats[0][i])             # <<<<<<<<<<<<<<
@@ -16888,7 +16910,7 @@ static float __pyx_f_5_cdec__compute_score(void *__pyx_v_metric_, SufficientStat
     __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":160
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":160
  *     for i in range(stats.size()):
  *         ss.append(stats[0][i])
  *     return metric.score(ss)             # <<<<<<<<<<<<<<
@@ -16926,7 +16948,7 @@ static float __pyx_f_5_cdec__compute_score(void *__pyx_v_metric_, SufficientStat
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":162
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":162
  *     return metric.score(ss)
  * 
  * cdef void _compute_sufficient_stats(void* metric_,             # <<<<<<<<<<<<<<
@@ -16953,7 +16975,7 @@ static void __pyx_f_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_, std:
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("_compute_sufficient_stats", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":166
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":166
  *         vector[string]* refs,
  *         mteval.SufficientStats* out):
  *     cdef Metric metric = <Metric> metric_             # <<<<<<<<<<<<<<
@@ -16963,7 +16985,7 @@ static void __pyx_f_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_, std:
   __Pyx_INCREF(((PyObject *)((struct __pyx_obj_5_cdec_Metric *)__pyx_v_metric_)));
   __pyx_v_metric = ((struct __pyx_obj_5_cdec_Metric *)__pyx_v_metric_);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":167
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":167
  *         mteval.SufficientStats* out):
  *     cdef Metric metric = <Metric> metric_
  *     cdef list refs_ = []             # <<<<<<<<<<<<<<
@@ -16972,10 +16994,10 @@ static void __pyx_f_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_, std:
  */
   __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 167; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
-  __pyx_v_refs_ = __pyx_t_1;
+  __pyx_v_refs_ = ((PyObject*)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":169
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":169
  *     cdef list refs_ = []
  *     cdef unsigned i
  *     for i in range(refs.size()):             # <<<<<<<<<<<<<<
@@ -16986,7 +17008,7 @@ static void __pyx_f_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_, std:
   for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
     __pyx_v_i = __pyx_t_3;
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":170
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":170
  *     cdef unsigned i
  *     for i in range(refs.size()):
  *         refs_.append(str(refs[0][i].c_str()))             # <<<<<<<<<<<<<<
@@ -17007,7 +17029,7 @@ static void __pyx_f_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_, std:
     __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
   }
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":171
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":171
  *     for i in range(refs.size()):
  *         refs_.append(str(refs[0][i].c_str()))
  *     cdef list ss = metric.evaluate(str(hyp.c_str()), refs_)             # <<<<<<<<<<<<<<
@@ -17042,7 +17064,7 @@ static void __pyx_f_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_, std:
   __pyx_v_ss = ((PyObject*)__pyx_t_4);
   __pyx_t_4 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":172
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":172
  *         refs_.append(str(refs[0][i].c_str()))
  *     cdef list ss = metric.evaluate(str(hyp.c_str()), refs_)
  *     out.fields.resize(len(ss))             # <<<<<<<<<<<<<<
@@ -17056,7 +17078,7 @@ static void __pyx_f_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_, std:
   __pyx_t_7 = PyList_GET_SIZE(((PyObject *)__pyx_v_ss)); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 172; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_v_out->fields.resize(__pyx_t_7);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":173
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":173
  *     cdef list ss = metric.evaluate(str(hyp.c_str()), refs_)
  *     out.fields.resize(len(ss))
  *     for i in range(len(ss)):             # <<<<<<<<<<<<<<
@@ -17071,7 +17093,7 @@ static void __pyx_f_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_, std:
   for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_7; __pyx_t_3+=1) {
     __pyx_v_i = __pyx_t_3;
 
-    /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":174
+    /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":174
  *     out.fields.resize(len(ss))
  *     for i in range(len(ss)):
  *         out.fields[i] = ss[i]             # <<<<<<<<<<<<<<
@@ -17116,7 +17138,7 @@ static int __pyx_pw_5_cdec_6Metric_1__cinit__(PyObject *__pyx_v_self, PyObject *
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":178
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":178
  * cdef class Metric:
  *     cdef Scorer scorer
  *     def __cinit__(self):             # <<<<<<<<<<<<<<
@@ -17137,7 +17159,7 @@ static int __pyx_pf_5_cdec_6Metric___cinit__(struct __pyx_obj_5_cdec_Metric *__p
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__cinit__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":179
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":179
  *     cdef Scorer scorer
  *     def __cinit__(self):
  *         self.scorer = Scorer()             # <<<<<<<<<<<<<<
@@ -17152,7 +17174,7 @@ static int __pyx_pf_5_cdec_6Metric___cinit__(struct __pyx_obj_5_cdec_Metric *__p
   __pyx_v_self->scorer = ((struct __pyx_obj_5_cdec_Scorer *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":180
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":180
  *     def __cinit__(self):
  *         self.scorer = Scorer()
  *         cdef bytes class_name = self.__class__.__name__             # <<<<<<<<<<<<<<
@@ -17168,7 +17190,7 @@ static int __pyx_pf_5_cdec_6Metric___cinit__(struct __pyx_obj_5_cdec_Metric *__p
   __pyx_v_class_name = ((PyObject*)__pyx_t_2);
   __pyx_t_2 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":181
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":181
  *         self.scorer = Scorer()
  *         cdef bytes class_name = self.__class__.__name__
  *         self.scorer.name = new string(class_name)             # <<<<<<<<<<<<<<
@@ -17176,10 +17198,15 @@ static int __pyx_pf_5_cdec_6Metric___cinit__(struct __pyx_obj_5_cdec_Metric *__p
  *                 <void*> self, _compute_sufficient_stats, _compute_score)
  */
   __pyx_t_3 = PyBytes_AsString(((PyObject *)__pyx_v_class_name)); if (unlikely((!__pyx_t_3) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 181; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  try {__pyx_t_4 = new std::string(__pyx_t_3);} catch(...) {__Pyx_CppExn2PyErr(); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 181; __pyx_clineno = __LINE__; goto __pyx_L1_error;}}
+  try {
+    __pyx_t_4 = new std::string(__pyx_t_3);
+  } catch(...) {
+    __Pyx_CppExn2PyErr();
+    {__pyx_filename = __pyx_f[5]; __pyx_lineno = 181; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  }
   __pyx_v_self->scorer->name = __pyx_t_4;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":182
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":182
  *         cdef bytes class_name = self.__class__.__name__
  *         self.scorer.name = new string(class_name)
  *         self.scorer.metric = mteval.PyMetricInstance(self.scorer.name[0],             # <<<<<<<<<<<<<<
@@ -17248,7 +17275,7 @@ static PyObject *__pyx_pw_5_cdec_6Metric_3__call__(PyObject *__pyx_v_self, PyObj
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":185
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":185
  *                 <void*> self, _compute_sufficient_stats, _compute_score)
  * 
  *     def __call__(self, refs):             # <<<<<<<<<<<<<<
@@ -17266,7 +17293,7 @@ static PyObject *__pyx_pf_5_cdec_6Metric_2__call__(struct __pyx_obj_5_cdec_Metri
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__call__", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":186
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":186
  * 
  *     def __call__(self, refs):
  *         return self.scorer(refs)             # <<<<<<<<<<<<<<
@@ -17310,7 +17337,7 @@ static PyObject *__pyx_pw_5_cdec_6Metric_5score(PyObject *__pyx_v_stats, CYTHON_
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":188
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":188
  *         return self.scorer(refs)
  * 
  *     def score(SufficientStats stats):             # <<<<<<<<<<<<<<
@@ -17323,7 +17350,7 @@ static PyObject *__pyx_pf_5_cdec_6Metric_4score(CYTHON_UNUSED struct __pyx_obj_5
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("score", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":189
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":189
  * 
  *     def score(SufficientStats stats):
  *         return 0             # <<<<<<<<<<<<<<
@@ -17398,7 +17425,7 @@ static PyObject *__pyx_pw_5_cdec_6Metric_7evaluate(PyObject *__pyx_v_self, PyObj
   return __pyx_r;
 }
 
-/* "/home/vchahune/tools/cdec/python/src/mteval.pxi":191
+/* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":191
  *         return 0
  * 
  *     def evaluate(self, hyp, refs):             # <<<<<<<<<<<<<<
@@ -17415,7 +17442,7 @@ static PyObject *__pyx_pf_5_cdec_6Metric_6evaluate(CYTHON_UNUSED struct __pyx_ob
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("evaluate", 0);
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":192
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":192
  * 
  *     def evaluate(self, hyp, refs):
  *         return []             # <<<<<<<<<<<<<<
@@ -17650,7 +17677,9 @@ static PyObject *__pyx_gb_5_cdec_6generator17(__pyx_GeneratorObject *__pyx_gener
       __Pyx_INCREF(__pyx_t_6);
       #else
       __pyx_t_5 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
       __pyx_t_6 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_6);
       #endif
       __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
     } else
@@ -17693,10 +17722,7 @@ static PyObject *__pyx_gb_5_cdec_6generator17(__pyx_GeneratorObject *__pyx_gener
  *             for name, info in value.items():
  *                 yield key, '%s %s' % (name, info)
  */
-    __pyx_t_2 = ((PyObject *)((PyObject*)(&PyDict_Type)));
-    __Pyx_INCREF(__pyx_t_2);
-    __pyx_t_9 = __Pyx_TypeCheck(__pyx_cur_scope->__pyx_v_value, __pyx_t_2); 
-    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __pyx_t_9 = PyDict_Check(__pyx_cur_scope->__pyx_v_value); 
     if (__pyx_t_9) {
 
       /* "_cdec.pyx":35
@@ -17770,7 +17796,9 @@ static PyObject *__pyx_gb_5_cdec_6generator17(__pyx_GeneratorObject *__pyx_gener
           __Pyx_INCREF(__pyx_t_7);
           #else
           __pyx_t_5 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+          __Pyx_GOTREF(__pyx_t_5);
           __pyx_t_7 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+          __Pyx_GOTREF(__pyx_t_7);
           #endif
           __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
         } else
@@ -17821,7 +17849,7 @@ static PyObject *__pyx_gb_5_cdec_6generator17(__pyx_GeneratorObject *__pyx_gener
         __Pyx_INCREF(__pyx_cur_scope->__pyx_v_info);
         PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_cur_scope->__pyx_v_info);
         __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_info);
-        __pyx_t_7 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_44), ((PyObject *)__pyx_t_6)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __pyx_t_7 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_45), ((PyObject *)__pyx_t_6)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(((PyObject *)__pyx_t_7));
         __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0;
         __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -17871,10 +17899,7 @@ static PyObject *__pyx_gb_5_cdec_6generator17(__pyx_GeneratorObject *__pyx_gener
  *             for name in value:
  *                 yield key, name
  */
-    __pyx_t_2 = ((PyObject *)((PyObject*)(&PyList_Type)));
-    __Pyx_INCREF(__pyx_t_2);
-    __pyx_t_9 = __Pyx_TypeCheck(__pyx_cur_scope->__pyx_v_value, __pyx_t_2); 
-    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __pyx_t_9 = PyList_Check(__pyx_cur_scope->__pyx_v_value); 
     if (__pyx_t_9) {
 
       /* "_cdec.pyx":38
@@ -18174,9 +18199,9 @@ static PyObject *__pyx_gb_5_cdec_7Decoder_8__init___2generator21(__pyx_Generator
   if (unlikely(!__pyx_cur_scope->__pyx_outer_scope->__pyx_v_config)) { __Pyx_RaiseClosureNameError("config"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;} }
   __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
-  __Pyx_INCREF(__pyx_cur_scope->__pyx_outer_scope->__pyx_v_config);
-  PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_cur_scope->__pyx_outer_scope->__pyx_v_config);
-  __Pyx_GIVEREF(__pyx_cur_scope->__pyx_outer_scope->__pyx_v_config);
+  __Pyx_INCREF(((PyObject *)__pyx_cur_scope->__pyx_outer_scope->__pyx_v_config));
+  PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_cur_scope->__pyx_outer_scope->__pyx_v_config));
+  __Pyx_GIVEREF(((PyObject *)__pyx_cur_scope->__pyx_outer_scope->__pyx_v_config));
   __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_3);
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
@@ -18221,7 +18246,7 @@ static PyObject *__pyx_gb_5_cdec_7Decoder_8__init___2generator21(__pyx_Generator
     __Pyx_GIVEREF(__pyx_t_3);
     __pyx_cur_scope->__pyx_v_kv = __pyx_t_3;
     __pyx_t_3 = 0;
-    __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_45), __pyx_cur_scope->__pyx_v_kv); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_46), __pyx_cur_scope->__pyx_v_kv); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(((PyObject *)__pyx_t_3));
     __pyx_r = ((PyObject *)__pyx_t_3);
     __pyx_t_3 = 0;
@@ -18311,13 +18336,10 @@ static int __pyx_pf_5_cdec_7Decoder___init__(struct __pyx_obj_5_cdec_Decoder *__
  *             if formalism not in ('scfg', 'fst', 'lextrans', 'pb',
  *                     'csplit', 'tagger', 'lexalign'):
  */
-    __pyx_t_2 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_config, __pyx_n_s__get); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = __Pyx_PyDict_GetItemDefault(((PyObject *)__pyx_cur_scope->__pyx_v_config), ((PyObject *)__pyx_n_s__formalism), Py_None); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_2);
-    __pyx_t_3 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_46), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_3);
-    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
-    __pyx_v_formalism = __pyx_t_3;
-    __pyx_t_3 = 0;
+    __pyx_v_formalism = __pyx_t_2;
+    __pyx_t_2 = 0;
 
     /* "_cdec.pyx":53
  *         if config_str is None:
@@ -18327,59 +18349,59 @@ static int __pyx_pf_5_cdec_7Decoder___init__(struct __pyx_obj_5_cdec_Decoder *__
  *                 raise InvalidConfig('formalism "%s" unknown' % formalism)
  */
     __Pyx_INCREF(__pyx_v_formalism);
-    __pyx_t_3 = __pyx_v_formalism;
-    __pyx_t_2 = PyObject_RichCompare(__pyx_t_3, ((PyObject *)__pyx_n_s__scfg), Py_NE); __Pyx_XGOTREF(__pyx_t_2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __pyx_t_2 = __pyx_v_formalism;
+    __pyx_t_3 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_n_s__scfg), Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
     if (((int)__pyx_t_1)) {
-      __pyx_t_2 = PyObject_RichCompare(__pyx_t_3, ((PyObject *)__pyx_n_s__fst), Py_NE); __Pyx_XGOTREF(__pyx_t_2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+      __pyx_t_3 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_n_s__fst), Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
       __pyx_t_5 = ((int)__pyx_t_4);
     } else {
       __pyx_t_5 = ((int)__pyx_t_1);
     }
     if (__pyx_t_5) {
-      __pyx_t_2 = PyObject_RichCompare(__pyx_t_3, ((PyObject *)__pyx_n_s__lextrans), Py_NE); __Pyx_XGOTREF(__pyx_t_2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+      __pyx_t_3 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_n_s__lextrans), Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
       __pyx_t_4 = ((int)__pyx_t_1);
     } else {
       __pyx_t_4 = __pyx_t_5;
     }
     if (__pyx_t_4) {
-      __pyx_t_2 = PyObject_RichCompare(__pyx_t_3, ((PyObject *)__pyx_n_s__pb), Py_NE); __Pyx_XGOTREF(__pyx_t_2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+      __pyx_t_3 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_n_s__pb), Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
       __pyx_t_1 = ((int)__pyx_t_5);
     } else {
       __pyx_t_1 = __pyx_t_4;
     }
     if (__pyx_t_1) {
-      __pyx_t_2 = PyObject_RichCompare(__pyx_t_3, ((PyObject *)__pyx_n_s__csplit), Py_NE); __Pyx_XGOTREF(__pyx_t_2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+      __pyx_t_3 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_n_s__csplit), Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
       __pyx_t_5 = ((int)__pyx_t_4);
     } else {
       __pyx_t_5 = __pyx_t_1;
     }
     if (__pyx_t_5) {
-      __pyx_t_2 = PyObject_RichCompare(__pyx_t_3, ((PyObject *)__pyx_n_s__tagger), Py_NE); __Pyx_XGOTREF(__pyx_t_2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+      __pyx_t_3 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_n_s__tagger), Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
       __pyx_t_4 = ((int)__pyx_t_1);
     } else {
       __pyx_t_4 = __pyx_t_5;
     }
     if (__pyx_t_4) {
-      __pyx_t_2 = PyObject_RichCompare(__pyx_t_3, ((PyObject *)__pyx_n_s__lexalign), Py_NE); __Pyx_XGOTREF(__pyx_t_2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+      __pyx_t_3 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_n_s__lexalign), Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
       __pyx_t_1 = ((int)__pyx_t_5);
     } else {
       __pyx_t_1 = __pyx_t_4;
     }
-    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
     __pyx_t_4 = __pyx_t_1;
     if (__pyx_t_4) {
 
@@ -18390,21 +18412,21 @@ static int __pyx_pf_5_cdec_7Decoder___init__(struct __pyx_obj_5_cdec_Decoder *__
  *             config_str = '\n'.join('%s = %s' % kv for kv in _make_config(config))
  *         cdef istringstream* config_stream = new istringstream(config_str)
  */
-      __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s__InvalidConfig); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_GOTREF(__pyx_t_3);
-      __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_47), __pyx_v_formalism); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_GOTREF(((PyObject *)__pyx_t_2));
+      __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__InvalidConfig); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_2);
+      __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_47), __pyx_v_formalism); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(((PyObject *)__pyx_t_3));
       __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(__pyx_t_6);
-      PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_t_2));
-      __Pyx_GIVEREF(((PyObject *)__pyx_t_2));
-      __pyx_t_2 = 0;
-      __pyx_t_2 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_6), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_GOTREF(__pyx_t_2);
-      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-      __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0;
-      __Pyx_Raise(__pyx_t_2, 0, 0, 0);
+      PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_t_3));
+      __Pyx_GIVEREF(((PyObject *)__pyx_t_3));
+      __pyx_t_3 = 0;
+      __pyx_t_3 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_t_6), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
       __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+      __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0;
+      __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
       {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       goto __pyx_L4;
     }
@@ -18417,19 +18439,19 @@ static int __pyx_pf_5_cdec_7Decoder___init__(struct __pyx_obj_5_cdec_Decoder *__
  *         cdef istringstream* config_stream = new istringstream(config_str)
  *         self.dec = new decoder.Decoder(config_stream)
  */
-    __pyx_t_2 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_38), __pyx_n_s__join); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_39), __pyx_n_s__join); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_3);
     __pyx_t_6 = __pyx_pf_5_cdec_7Decoder_8__init___genexpr(((PyObject*)__pyx_cur_scope)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_6);
-    __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_3);
-    PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_6);
+    __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
+    PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_6);
     __Pyx_GIVEREF(__pyx_t_6);
     __pyx_t_6 = 0;
-    __pyx_t_6 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_6 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_6);
-    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
-    __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
     __Pyx_DECREF(__pyx_v_config_str);
     __pyx_v_config_str = __pyx_t_6;
     __pyx_t_6 = 0;
@@ -18615,8 +18637,8 @@ static int __pyx_pf_5_cdec_7Decoder_7weights_2__set__(struct __pyx_obj_5_cdec_De
   PyObject *__pyx_v_fval = NULL;
   int __pyx_r;
   __Pyx_RefNannyDeclarations
-  PyObject *__pyx_t_1 = NULL;
-  int __pyx_t_2;
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
   PyObject *__pyx_t_3 = NULL;
   Py_ssize_t __pyx_t_4;
   PyObject *(*__pyx_t_5)(PyObject *);
@@ -18636,11 +18658,8 @@ static int __pyx_pf_5_cdec_7Decoder_7weights_2__set__(struct __pyx_obj_5_cdec_De
  *                 self.weights.vector[0] = (<DenseVector> weights).vector[0]
  *             elif isinstance(weights, SparseVector):
  */
-  __pyx_t_1 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_DenseVector));
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_weights, __pyx_t_1); 
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = __Pyx_TypeCheck(__pyx_v_weights, ((PyObject*)__pyx_ptype_5_cdec_DenseVector)); 
+  if (__pyx_t_1) {
 
     /* "_cdec.pyx":73
  *         def __set__(self, weights):
@@ -18660,11 +18679,8 @@ static int __pyx_pf_5_cdec_7Decoder_7weights_2__set__(struct __pyx_obj_5_cdec_De
  *                 self.weights.vector.clear()
  *                 ((<SparseVector> weights).vector[0]).init_vector(self.weights.vector)
  */
-  __pyx_t_1 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_SparseVector));
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_weights, __pyx_t_1); 
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = __Pyx_TypeCheck(__pyx_v_weights, ((PyObject*)__pyx_ptype_5_cdec_SparseVector)); 
+  if (__pyx_t_1) {
 
     /* "_cdec.pyx":75
  *                 self.weights.vector[0] = (<DenseVector> weights).vector[0]
@@ -18693,11 +18709,8 @@ static int __pyx_pf_5_cdec_7Decoder_7weights_2__set__(struct __pyx_obj_5_cdec_De
  *                 self.weights.vector.clear()
  *                 for fname, fval in weights.items():
  */
-  __pyx_t_1 = ((PyObject *)((PyObject*)(&PyDict_Type)));
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_weights, __pyx_t_1); 
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = PyDict_Check(__pyx_v_weights); 
+  if (__pyx_t_1) {
 
     /* "_cdec.pyx":78
  *                 ((<SparseVector> weights).vector[0]).init_vector(self.weights.vector)
@@ -18715,37 +18728,37 @@ static int __pyx_pf_5_cdec_7Decoder_7weights_2__set__(struct __pyx_obj_5_cdec_De
  *                     self.weights[fname] = fval
  *             else:
  */
-    __pyx_t_1 = PyObject_GetAttr(__pyx_v_weights, __pyx_n_s__items); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
-    __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = PyObject_GetAttr(__pyx_v_weights, __pyx_n_s__items); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_t_3 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
-    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
     if (PyList_CheckExact(__pyx_t_3) || PyTuple_CheckExact(__pyx_t_3)) {
-      __pyx_t_1 = __pyx_t_3; __Pyx_INCREF(__pyx_t_1); __pyx_t_4 = 0;
+      __pyx_t_2 = __pyx_t_3; __Pyx_INCREF(__pyx_t_2); __pyx_t_4 = 0;
       __pyx_t_5 = NULL;
     } else {
-      __pyx_t_4 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_GOTREF(__pyx_t_1);
-      __pyx_t_5 = Py_TYPE(__pyx_t_1)->tp_iternext;
+      __pyx_t_4 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_2);
+      __pyx_t_5 = Py_TYPE(__pyx_t_2)->tp_iternext;
     }
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
     for (;;) {
-      if (!__pyx_t_5 && PyList_CheckExact(__pyx_t_1)) {
-        if (__pyx_t_4 >= PyList_GET_SIZE(__pyx_t_1)) break;
+      if (!__pyx_t_5 && PyList_CheckExact(__pyx_t_2)) {
+        if (__pyx_t_4 >= PyList_GET_SIZE(__pyx_t_2)) break;
         #if CYTHON_COMPILING_IN_CPYTHON
-        __pyx_t_3 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __pyx_t_3 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         #else
-        __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __pyx_t_3 = PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         #endif
-      } else if (!__pyx_t_5 && PyTuple_CheckExact(__pyx_t_1)) {
-        if (__pyx_t_4 >= PyTuple_GET_SIZE(__pyx_t_1)) break;
+      } else if (!__pyx_t_5 && PyTuple_CheckExact(__pyx_t_2)) {
+        if (__pyx_t_4 >= PyTuple_GET_SIZE(__pyx_t_2)) break;
         #if CYTHON_COMPILING_IN_CPYTHON
-        __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         #else
-        __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __pyx_t_3 = PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         #endif
       } else {
-        __pyx_t_3 = __pyx_t_5(__pyx_t_1);
+        __pyx_t_3 = __pyx_t_5(__pyx_t_2);
         if (unlikely(!__pyx_t_3)) {
           if (PyErr_Occurred()) {
             if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear();
@@ -18779,7 +18792,9 @@ static int __pyx_pf_5_cdec_7Decoder_7weights_2__set__(struct __pyx_obj_5_cdec_De
         __Pyx_INCREF(__pyx_t_7);
         #else
         __pyx_t_6 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __Pyx_GOTREF(__pyx_t_6);
         __pyx_t_7 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __Pyx_GOTREF(__pyx_t_7);
         #endif
         __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
       } else
@@ -18820,7 +18835,7 @@ static int __pyx_pf_5_cdec_7Decoder_7weights_2__set__(struct __pyx_obj_5_cdec_De
  */
       if (PyObject_SetItem(((PyObject *)__pyx_v_self->weights), __pyx_v_fname, __pyx_v_fval) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     }
-    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
     goto __pyx_L3;
   }
   /*else*/ {
@@ -18832,18 +18847,18 @@ static int __pyx_pf_5_cdec_7Decoder_7weights_2__set__(struct __pyx_obj_5_cdec_De
  * 
  *     property formalism:
  */
-    __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_48), ((PyObject *)Py_TYPE(__pyx_v_weights))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 82; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(((PyObject *)__pyx_t_1));
+    __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_48), ((PyObject *)Py_TYPE(__pyx_v_weights))); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 82; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(((PyObject *)__pyx_t_2));
     __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 82; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
-    PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_t_1));
-    __Pyx_GIVEREF(((PyObject *)__pyx_t_1));
-    __pyx_t_1 = 0;
-    __pyx_t_1 = PyObject_Call(__pyx_builtin_TypeError, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 82; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
+    PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_t_2));
+    __Pyx_GIVEREF(((PyObject *)__pyx_t_2));
+    __pyx_t_2 = 0;
+    __pyx_t_2 = PyObject_Call(__pyx_builtin_TypeError, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 82; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
     __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
-    __Pyx_Raise(__pyx_t_1, 0, 0, 0);
-    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_Raise(__pyx_t_2, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
     {__pyx_filename = __pyx_f[0]; __pyx_lineno = 82; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   }
   __pyx_L3:;
@@ -18851,7 +18866,7 @@ static int __pyx_pf_5_cdec_7Decoder_7weights_2__set__(struct __pyx_obj_5_cdec_De
   __pyx_r = 0;
   goto __pyx_L0;
   __pyx_L1_error:;
-  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
   __Pyx_XDECREF(__pyx_t_3);
   __Pyx_XDECREF(__pyx_t_6);
   __Pyx_XDECREF(__pyx_t_7);
@@ -19129,7 +19144,9 @@ static PyObject *__pyx_pf_5_cdec_7Decoder_4read_weights(struct __pyx_obj_5_cdec_
               __Pyx_INCREF(__pyx_t_11);
               #else
               __pyx_t_1 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 94; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+              __Pyx_GOTREF(__pyx_t_1);
               __pyx_t_11 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 94; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+              __Pyx_GOTREF(__pyx_t_11);
               #endif
               __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
             } else
@@ -19368,8 +19385,8 @@ static PyObject *__pyx_pf_5_cdec_7Decoder_6translate(struct __pyx_obj_5_cdec_Dec
   struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_hg = 0;
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
-  PyObject *__pyx_t_1 = NULL;
-  int __pyx_t_2;
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
   PyObject *__pyx_t_3 = NULL;
   std::string __pyx_t_4;
   int __pyx_lineno = 0;
@@ -19384,11 +19401,8 @@ static PyObject *__pyx_pf_5_cdec_7Decoder_6translate(struct __pyx_obj_5_cdec_Dec
  *             input_str = as_str(sentence.strip())
  *         elif isinstance(sentence, Lattice):
  */
-  __pyx_t_1 = __pyx_builtin_basestring;
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = PyObject_IsInstance(__pyx_v_sentence, __pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = PyObject_IsInstance(__pyx_v_sentence, __pyx_builtin_basestring); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  if (__pyx_t_1) {
 
     /* "_cdec.pyx":102
  *         cdef bytes input_str
@@ -19397,16 +19411,16 @@ static PyObject *__pyx_pf_5_cdec_7Decoder_6translate(struct __pyx_obj_5_cdec_Dec
  *         elif isinstance(sentence, Lattice):
  *             input_str = str(sentence) # PLF format
  */
-    __pyx_t_1 = PyObject_GetAttr(__pyx_v_sentence, __pyx_n_s__strip); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
-    __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = PyObject_GetAttr(__pyx_v_sentence, __pyx_n_s__strip); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_t_3 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
-    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-    __pyx_t_1 = ((PyObject *)__pyx_f_5_cdec_as_str(__pyx_t_3, NULL)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __pyx_t_2 = ((PyObject *)__pyx_f_5_cdec_as_str(__pyx_t_3, NULL)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-    __pyx_v_input_str = ((PyObject*)__pyx_t_1);
-    __pyx_t_1 = 0;
+    __pyx_v_input_str = ((PyObject*)__pyx_t_2);
+    __pyx_t_2 = 0;
     goto __pyx_L3;
   }
 
@@ -19417,11 +19431,8 @@ static PyObject *__pyx_pf_5_cdec_7Decoder_6translate(struct __pyx_obj_5_cdec_Dec
  *             input_str = str(sentence) # PLF format
  *         else:
  */
-  __pyx_t_1 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Lattice));
-  __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_sentence, __pyx_t_1); 
-  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  if (__pyx_t_2) {
+  __pyx_t_1 = __Pyx_TypeCheck(__pyx_v_sentence, ((PyObject*)__pyx_ptype_5_cdec_Lattice)); 
+  if (__pyx_t_1) {
 
     /* "_cdec.pyx":104
  *             input_str = as_str(sentence.strip())
@@ -19430,14 +19441,14 @@ static PyObject *__pyx_pf_5_cdec_7Decoder_6translate(struct __pyx_obj_5_cdec_Dec
  *         else:
  *             raise TypeError('Cannot translate input type %s' % type(sentence))
  */
-    __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
+    __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
     __Pyx_INCREF(__pyx_v_sentence);
-    PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_sentence);
+    PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_sentence);
     __Pyx_GIVEREF(__pyx_v_sentence);
-    __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
-    __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+    __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
     if (!(likely(PyBytes_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected bytes, got %.200s", Py_TYPE(__pyx_t_3)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __pyx_v_input_str = ((PyObject*)__pyx_t_3);
     __pyx_t_3 = 0;
@@ -19454,14 +19465,14 @@ static PyObject *__pyx_pf_5_cdec_7Decoder_6translate(struct __pyx_obj_5_cdec_Dec
  */
     __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_52), ((PyObject *)Py_TYPE(__pyx_v_sentence))); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(((PyObject *)__pyx_t_3));
-    __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
-    PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_t_3));
+    __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
+    PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_t_3));
     __Pyx_GIVEREF(((PyObject *)__pyx_t_3));
     __pyx_t_3 = 0;
-    __pyx_t_3 = PyObject_Call(__pyx_builtin_TypeError, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_3 = PyObject_Call(__pyx_builtin_TypeError, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
-    __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+    __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
     __Pyx_Raise(__pyx_t_3, 0, 0, 0);
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
     {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -19475,8 +19486,8 @@ static PyObject *__pyx_pf_5_cdec_7Decoder_6translate(struct __pyx_obj_5_cdec_Dec
  *             if isinstance(grammar, basestring):
  *                 self.dec.AddSupplementalGrammarFromString(as_str(grammar))
  */
-  __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_grammar); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  if (__pyx_t_2) {
+  __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_grammar); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  if (__pyx_t_1) {
 
     /* "_cdec.pyx":108
  *             raise TypeError('Cannot translate input type %s' % type(sentence))
@@ -19485,11 +19496,8 @@ static PyObject *__pyx_pf_5_cdec_7Decoder_6translate(struct __pyx_obj_5_cdec_Dec
  *                 self.dec.AddSupplementalGrammarFromString(as_str(grammar))
  *             else:
  */
-    __pyx_t_3 = __pyx_builtin_basestring;
-    __Pyx_INCREF(__pyx_t_3);
-    __pyx_t_2 = PyObject_IsInstance(__pyx_v_grammar, __pyx_t_3); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-    if (__pyx_t_2) {
+    __pyx_t_1 = PyObject_IsInstance(__pyx_v_grammar, __pyx_builtin_basestring); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    if (__pyx_t_1) {
 
       /* "_cdec.pyx":109
  *         if grammar:
@@ -19519,11 +19527,11 @@ static PyObject *__pyx_pf_5_cdec_7Decoder_6translate(struct __pyx_obj_5_cdec_Dec
       __Pyx_INCREF(__pyx_v_grammar);
       PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_grammar);
       __Pyx_GIVEREF(__pyx_v_grammar);
-      __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_TextGrammar)), ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-      __Pyx_GOTREF(__pyx_t_1);
+      __pyx_t_2 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_TextGrammar)), ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_2);
       __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
-      __pyx_v_self->dec->AddSupplementalGrammar((((struct __pyx_obj_5_cdec_TextGrammar *)__pyx_t_1)->__pyx_base.grammar[0]));
-      __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+      __pyx_v_self->dec->AddSupplementalGrammar((((struct __pyx_obj_5_cdec_TextGrammar *)__pyx_t_2)->__pyx_base.grammar[0]));
+      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
     }
     __pyx_L5:;
     goto __pyx_L4;
@@ -19556,8 +19564,8 @@ static PyObject *__pyx_pf_5_cdec_7Decoder_6translate(struct __pyx_obj_5_cdec_Dec
  *             raise ParseFailed()
  *         cdef Hypergraph hg = Hypergraph()
  */
-  __pyx_t_2 = (__pyx_v_observer.hypergraph == NULL);
-  if (__pyx_t_2) {
+  __pyx_t_1 = (__pyx_v_observer.hypergraph == NULL);
+  if (__pyx_t_1) {
 
     /* "_cdec.pyx":115
  *         self.dec.Decode(input_str, &observer)
@@ -19566,11 +19574,11 @@ static PyObject *__pyx_pf_5_cdec_7Decoder_6translate(struct __pyx_obj_5_cdec_Dec
  *         cdef Hypergraph hg = Hypergraph()
  *         hg.hg = new hypergraph.Hypergraph(observer.hypergraph[0])
  */
-    __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__ParseFailed); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
-    __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__ParseFailed); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_t_3 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
-    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
     __Pyx_Raise(__pyx_t_3, 0, 0, 0);
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
     {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -19611,7 +19619,7 @@ static PyObject *__pyx_pf_5_cdec_7Decoder_6translate(struct __pyx_obj_5_cdec_Dec
   __pyx_r = Py_None; __Pyx_INCREF(Py_None);
   goto __pyx_L0;
   __pyx_L1_error:;
-  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
   __Pyx_XDECREF(__pyx_t_3);
   __Pyx_AddTraceback("_cdec.Decoder.translate", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __pyx_r = NULL;
@@ -19623,7 +19631,7 @@ static PyObject *__pyx_pf_5_cdec_7Decoder_6translate(struct __pyx_obj_5_cdec_Dec
   return __pyx_r;
 }
 
-/* "string.from_py":11
+/* "string.from_py":12
  * 
  * @cname("__pyx_convert_string_from_py_")
  * cdef string __pyx_convert_string_from_py_(object o) except *:             # <<<<<<<<<<<<<<
@@ -19641,15 +19649,15 @@ static std::string __pyx_convert_string_from_py_(PyObject *__pyx_v_o) {
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__pyx_convert_string_from_py_", 0);
 
-  /* "string.from_py":12
+  /* "string.from_py":13
  * @cname("__pyx_convert_string_from_py_")
  * cdef string __pyx_convert_string_from_py_(object o) except *:
  *     return string(<char*>o, len(o))             # <<<<<<<<<<<<<<
  * 
  * 
  */
-  __pyx_t_1 = PyBytes_AsString(__pyx_v_o); if (unlikely((!__pyx_t_1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __pyx_t_2 = PyObject_Length(__pyx_v_o); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = PyBytes_AsString(__pyx_v_o); if (unlikely((!__pyx_t_1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_2 = PyObject_Length(__pyx_v_o); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_r = std::string(((char *)__pyx_t_1), __pyx_t_2);
   goto __pyx_L0;
 
@@ -20066,7 +20074,9 @@ static PyObject *__pyx_tp_new_5_cdec_NT(PyTypeObject *t, CYTHON_UNUSED PyObject
 
 static void __pyx_tp_dealloc_5_cdec_NT(PyObject *o) {
   struct __pyx_obj_5_cdec_NT *p = (struct __pyx_obj_5_cdec_NT *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->cat);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -21493,7 +21503,9 @@ static PyObject *__pyx_tp_new_5_cdec_HypergraphEdge(PyTypeObject *t, CYTHON_UNUS
 
 static void __pyx_tp_dealloc_5_cdec_HypergraphEdge(PyObject *o) {
   struct __pyx_obj_5_cdec_HypergraphEdge *p = (struct __pyx_obj_5_cdec_HypergraphEdge *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->trule);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -23074,7 +23086,9 @@ static PyObject *__pyx_tp_new_5_cdec_Metric(PyTypeObject *t, CYTHON_UNUSED PyObj
 
 static void __pyx_tp_dealloc_5_cdec_Metric(PyObject *o) {
   struct __pyx_obj_5_cdec_Metric *p = (struct __pyx_obj_5_cdec_Metric *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->scorer);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -23267,6 +23281,7 @@ static PyObject *__pyx_tp_new_5_cdec_Decoder(PyTypeObject *t, CYTHON_UNUSED PyOb
 
 static void __pyx_tp_dealloc_5_cdec_Decoder(PyObject *o) {
   struct __pyx_obj_5_cdec_Decoder *p = (struct __pyx_obj_5_cdec_Decoder *)o;
+  PyObject_GC_UnTrack(o);
   {
     PyObject *etype, *eval, *etb;
     PyErr_Fetch(&etype, &eval, &etb);
@@ -23277,6 +23292,7 @@ static void __pyx_tp_dealloc_5_cdec_Decoder(PyObject *o) {
     PyErr_Restore(etype, eval, etb);
   }
   Py_CLEAR(p->weights);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -23493,7 +23509,9 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct____iter__(PyTypeObject *
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct____iter__(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct____iter__ *p = (struct __pyx_obj_5_cdec___pyx_scope_struct____iter__ *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_self);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -23684,7 +23702,9 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_1___iter__(PyTypeObject
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_1___iter__(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_1___iter__ *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_1___iter__ *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_self);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -23875,7 +23895,9 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_2__phrase(PyTypeObject *
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_2__phrase(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_2__phrase *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_2__phrase *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_phrase);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -24068,9 +24090,11 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_3_genexpr(PyTypeObject *
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_3_genexpr(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_3_genexpr *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_3_genexpr *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_outer_scope);
   Py_CLEAR(p->__pyx_v_w);
   Py_CLEAR(p->__pyx_t_0);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -24273,7 +24297,9 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_4___get__(PyTypeObject *
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_4___get__(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_4___get__ *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_4___get__ *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_self);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -24464,7 +24490,9 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_5___str__(PyTypeObject *
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_5___str__(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_5___str__ *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_5___str__ *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_self);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -24657,9 +24685,11 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_6_genexpr(PyTypeObject *
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_6_genexpr(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_6_genexpr *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_6_genexpr *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_outer_scope);
   Py_CLEAR(p->__pyx_v_feat);
   Py_CLEAR(p->__pyx_t_0);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -24863,8 +24893,10 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_7___iter__(PyTypeObject
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_7___iter__(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_7___iter__ *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_7___iter__ *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_self);
   Py_CLEAR(p->__pyx_v_trule);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -25062,8 +25094,10 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_8_kbest(PyTypeObject *t,
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_8_kbest(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_8_kbest *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_8_kbest *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_self);
   Py_CLEAR(p->__pyx_v_size);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -25263,10 +25297,12 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_9_kbest_trees(PyTypeObje
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_9_kbest_trees(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_9_kbest_trees *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_9_kbest_trees *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_e_tree);
   Py_CLEAR(p->__pyx_v_f_tree);
   Py_CLEAR(p->__pyx_v_self);
   Py_CLEAR(p->__pyx_v_size);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -25477,9 +25513,11 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_10_kbest_features(PyType
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_10_kbest_features(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_10_kbest_features *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_10_kbest_features *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_fmap);
   Py_CLEAR(p->__pyx_v_self);
   Py_CLEAR(p->__pyx_v_size);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -25682,7 +25720,9 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_11_sample(PyTypeObject *
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_11_sample(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_11_sample *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_11_sample *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_self);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -25873,7 +25913,9 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_12_sample_trees(PyTypeOb
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_12_sample_trees(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_12_sample_trees *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_12_sample_trees *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_self);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -26064,7 +26106,9 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_13___get__(PyTypeObject
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_13___get__(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_13___get__ *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_13___get__ *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_self);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -26255,7 +26299,9 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_14___get__(PyTypeObject
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_14___get__(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_14___get__ *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_14___get__ *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_self);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -26446,7 +26492,9 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_15___get__(PyTypeObject
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_15___get__(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_15___get__ *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_15___get__ *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_self);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -26637,7 +26685,9 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_16___get__(PyTypeObject
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_16___get__(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_16___get__ *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_16___get__ *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_self);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -26828,7 +26878,9 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_17___get__(PyTypeObject
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_17___get__(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_17___get__ *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_17___get__ *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_self);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -27019,7 +27071,9 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_18___iter__(PyTypeObject
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_18___iter__(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_18___iter__ *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_18___iter__ *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_self);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -27210,7 +27264,9 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_19_todot(PyTypeObject *t
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_19_todot(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_19_todot *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_19_todot *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_self);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -27407,6 +27463,7 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_20_lines(PyTypeObject *t
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_20_lines(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_20_lines *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_20_lines *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_outer_scope);
   Py_CLEAR(p->__pyx_v_delta);
   Py_CLEAR(p->__pyx_v_i);
@@ -27414,6 +27471,7 @@ static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_20_lines(PyObject *o) {
   Py_CLEAR(p->__pyx_v_weight);
   Py_CLEAR(p->__pyx_t_1);
   Py_CLEAR(p->__pyx_t_3);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -27642,9 +27700,11 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_21___iter__(PyTypeObject
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_21___iter__(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_21___iter__ *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_21___iter__ *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_i);
   Py_CLEAR(p->__pyx_v_self);
   Py_CLEAR(p->__pyx_t_1);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -27847,7 +27907,9 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_22___iter__(PyTypeObject
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_22___iter__(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_22___iter__ *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_22___iter__ *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_self);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -28044,6 +28106,7 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_23__make_config(PyTypeOb
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_23__make_config(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_23__make_config *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_23__make_config *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_config);
   Py_CLEAR(p->__pyx_v_info);
   Py_CLEAR(p->__pyx_v_key);
@@ -28051,6 +28114,7 @@ static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_23__make_config(PyObject
   Py_CLEAR(p->__pyx_v_value);
   Py_CLEAR(p->__pyx_t_0);
   Py_CLEAR(p->__pyx_t_1);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -28277,7 +28341,9 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_24___init__(PyTypeObject
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_24___init__(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_24___init__ *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_24___init__ *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_v_config);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -28294,7 +28360,7 @@ static int __pyx_tp_clear_5_cdec___pyx_scope_struct_24___init__(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_24___init__ *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_24___init__ *)o;
   PyObject* tmp;
   tmp = ((PyObject*)p->__pyx_v_config);
-  p->__pyx_v_config = Py_None; Py_INCREF(Py_None);
+  p->__pyx_v_config = ((PyObject*)Py_None); Py_INCREF(Py_None);
   Py_XDECREF(tmp);
   return 0;
 }
@@ -28470,9 +28536,11 @@ static PyObject *__pyx_tp_new_5_cdec___pyx_scope_struct_25_genexpr(PyTypeObject
 
 static void __pyx_tp_dealloc_5_cdec___pyx_scope_struct_25_genexpr(PyObject *o) {
   struct __pyx_obj_5_cdec___pyx_scope_struct_25_genexpr *p = (struct __pyx_obj_5_cdec___pyx_scope_struct_25_genexpr *)o;
+  PyObject_GC_UnTrack(o);
   Py_CLEAR(p->__pyx_outer_scope);
   Py_CLEAR(p->__pyx_v_kv);
   Py_CLEAR(p->__pyx_t_0);
+  PyObject_GC_Track(o);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
@@ -28670,7 +28738,11 @@ static PyMethodDef __pyx_methods[] = {
 
 #if PY_MAJOR_VERSION >= 3
 static struct PyModuleDef __pyx_moduledef = {
+  #if PY_VERSION_HEX < 0x03020000
+    { PyObject_HEAD_INIT(NULL) NULL, 0, NULL },
+  #else
     PyModuleDef_HEAD_INIT,
+  #endif
     __Pyx_NAMESTR("_cdec"),
     0, /* m_doc */
     -1, /* m_size */
@@ -28704,12 +28776,13 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = {
   {&__pyx_kp_s_33, __pyx_k_33, sizeof(__pyx_k_33), 0, 0, 1, 0},
   {&__pyx_kp_s_34, __pyx_k_34, sizeof(__pyx_k_34), 0, 0, 1, 0},
   {&__pyx_kp_s_37, __pyx_k_37, sizeof(__pyx_k_37), 0, 0, 1, 0},
-  {&__pyx_kp_s_38, __pyx_k_38, sizeof(__pyx_k_38), 0, 0, 1, 0},
+  {&__pyx_n_s_38, __pyx_k_38, sizeof(__pyx_k_38), 0, 0, 1, 1},
+  {&__pyx_kp_s_39, __pyx_k_39, sizeof(__pyx_k_39), 0, 0, 1, 0},
   {&__pyx_kp_s_4, __pyx_k_4, sizeof(__pyx_k_4), 0, 0, 1, 0},
-  {&__pyx_kp_s_40, __pyx_k_40, sizeof(__pyx_k_40), 0, 0, 1, 0},
-  {&__pyx_kp_s_42, __pyx_k_42, sizeof(__pyx_k_42), 0, 0, 1, 0},
-  {&__pyx_kp_s_44, __pyx_k_44, sizeof(__pyx_k_44), 0, 0, 1, 0},
+  {&__pyx_kp_s_41, __pyx_k_41, sizeof(__pyx_k_41), 0, 0, 1, 0},
+  {&__pyx_kp_s_43, __pyx_k_43, sizeof(__pyx_k_43), 0, 0, 1, 0},
   {&__pyx_kp_s_45, __pyx_k_45, sizeof(__pyx_k_45), 0, 0, 1, 0},
+  {&__pyx_kp_s_46, __pyx_k_46, sizeof(__pyx_k_46), 0, 0, 1, 0},
   {&__pyx_kp_s_47, __pyx_k_47, sizeof(__pyx_k_47), 0, 0, 1, 0},
   {&__pyx_kp_s_48, __pyx_k_48, sizeof(__pyx_k_48), 0, 0, 1, 0},
   {&__pyx_kp_s_49, __pyx_k_49, sizeof(__pyx_k_49), 0, 0, 1, 0},
@@ -28717,7 +28790,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = {
   {&__pyx_n_s_53, __pyx_k_53, sizeof(__pyx_k_53), 0, 0, 1, 1},
   {&__pyx_n_s_54, __pyx_k_54, sizeof(__pyx_k_54), 0, 0, 1, 1},
   {&__pyx_kp_s_57, __pyx_k_57, sizeof(__pyx_k_57), 0, 0, 1, 0},
-  {&__pyx_kp_s_63, __pyx_k_63, sizeof(__pyx_k_63), 0, 0, 1, 0},
+  {&__pyx_kp_s_65, __pyx_k_65, sizeof(__pyx_k_65), 0, 0, 1, 0},
   {&__pyx_kp_s_7, __pyx_k_7, sizeof(__pyx_k_7), 0, 0, 1, 0},
   {&__pyx_kp_s_8, __pyx_k_8, sizeof(__pyx_k_8), 0, 0, 1, 0},
   {&__pyx_kp_s_9, __pyx_k_9, sizeof(__pyx_k_9), 0, 0, 1, 0},
@@ -28730,6 +28803,9 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = {
   {&__pyx_n_s__KeyError, __pyx_k__KeyError, sizeof(__pyx_k__KeyError), 0, 0, 1, 1},
   {&__pyx_n_s__NotImplemented, __pyx_k__NotImplemented, sizeof(__pyx_k__NotImplemented), 0, 0, 1, 1},
   {&__pyx_n_s__ParseFailed, __pyx_k__ParseFailed, sizeof(__pyx_k__ParseFailed), 0, 0, 1, 1},
+  {&__pyx_n_s__QCRI, __pyx_k__QCRI, sizeof(__pyx_k__QCRI), 0, 0, 1, 1},
+  {&__pyx_n_s__QCRI_BLEU, __pyx_k__QCRI_BLEU, sizeof(__pyx_k__QCRI_BLEU), 0, 0, 1, 1},
+  {&__pyx_n_s__SSK, __pyx_k__SSK, sizeof(__pyx_k__SSK), 0, 0, 1, 1},
   {&__pyx_n_s__TER, __pyx_k__TER, sizeof(__pyx_k__TER), 0, 0, 1, 1},
   {&__pyx_n_s__TypeError, __pyx_k__TypeError, sizeof(__pyx_k__TypeError), 0, 0, 1, 1},
   {&__pyx_n_s__ValueError, __pyx_k__ValueError, sizeof(__pyx_k__ValueError), 0, 0, 1, 1},
@@ -28767,7 +28843,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = {
   {&__pyx_n_s__format, __pyx_k__format, sizeof(__pyx_k__format), 0, 0, 1, 1},
   {&__pyx_n_s__fst, __pyx_k__fst, sizeof(__pyx_k__fst), 0, 0, 1, 1},
   {&__pyx_n_s__genexpr, __pyx_k__genexpr, sizeof(__pyx_k__genexpr), 0, 0, 1, 1},
-  {&__pyx_n_s__get, __pyx_k__get, sizeof(__pyx_k__get), 0, 0, 1, 1},
   {&__pyx_n_s__grammar, __pyx_k__grammar, sizeof(__pyx_k__grammar), 0, 0, 1, 1},
   {&__pyx_n_s__hyp, __pyx_k__hyp, sizeof(__pyx_k__hyp), 0, 0, 1, 1},
   {&__pyx_n_s__hypergraph, __pyx_k__hypergraph, sizeof(__pyx_k__hypergraph), 0, 0, 1, 1},
@@ -28842,207 +28917,139 @@ static int __Pyx_InitCachedConstants(void) {
  *     elif isinstance(data, str):
  *         ret = data
  */
-  __pyx_k_tuple_2 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_2 = PyTuple_Pack(1, ((PyObject *)__pyx_n_s__utf8)); if (unlikely(!__pyx_k_tuple_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_2);
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__utf8));
-  PyTuple_SET_ITEM(__pyx_k_tuple_2, 0, ((PyObject *)__pyx_n_s__utf8));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__utf8));
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_2));
 
-  /* "/home/vchahune/tools/cdec/python/src/vectors.pxi":95
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/vectors.pxi":95
  *         elif op == 3: # !=
  *             return not (x == y)
  *         raise NotImplemented('comparison not implemented for SparseVector')             # <<<<<<<<<<<<<<
  * 
  *     def __len__(self):
  */
-  __pyx_k_tuple_5 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_5 = PyTuple_Pack(1, ((PyObject *)__pyx_kp_s_4)); if (unlikely(!__pyx_k_tuple_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_5);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_4));
-  PyTuple_SET_ITEM(__pyx_k_tuple_5, 0, ((PyObject *)__pyx_kp_s_4));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_4));
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_5));
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":6
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":6
  * 
  * def _phrase(phrase):
  *     return ' '.join(w.encode('utf8') if isinstance(w, unicode) else str(w) for w in phrase)             # <<<<<<<<<<<<<<
  * 
  * cdef class NT:
  */
-  __pyx_k_tuple_6 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_6)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_6 = PyTuple_Pack(1, ((PyObject *)__pyx_n_s__utf8)); if (unlikely(!__pyx_k_tuple_6)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_6);
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__utf8));
-  PyTuple_SET_ITEM(__pyx_k_tuple_6, 0, ((PyObject *)__pyx_n_s__utf8));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__utf8));
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_6));
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":226
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":226
  *                 trule = convert_rule(trule)
  *             elif not isinstance(trule, TRule):
  *                 raise ValueError('the grammar should contain TRule objects')             # <<<<<<<<<<<<<<
  *             _g.AddRule((<TRule> trule).rule[0])
  */
-  __pyx_k_tuple_14 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_14)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 226; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_14 = PyTuple_Pack(1, ((PyObject *)__pyx_kp_s_13)); if (unlikely(!__pyx_k_tuple_14)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 226; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_14);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_13));
-  PyTuple_SET_ITEM(__pyx_k_tuple_14, 0, ((PyObject *)__pyx_kp_s_13));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_13));
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_14));
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":244
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":244
  *         elif op == 3: # !=
  *             return not (x == y)
  *         raise NotImplemented('comparison not implemented for HypergraphEdge')             # <<<<<<<<<<<<<<
  * 
  * cdef class HypergraphNode:
  */
-  __pyx_k_tuple_19 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_19)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_19 = PyTuple_Pack(1, ((PyObject *)__pyx_kp_s_18)); if (unlikely(!__pyx_k_tuple_19)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_19);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_18));
-  PyTuple_SET_ITEM(__pyx_k_tuple_19, 0, ((PyObject *)__pyx_kp_s_18));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_18));
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_19));
 
-  /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":281
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/hypergraph.pxi":281
  *         elif op == 3: # !=
  *             return not (x == y)
  *         raise NotImplemented('comparison not implemented for HypergraphNode')             # <<<<<<<<<<<<<<
  */
-  __pyx_k_tuple_21 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_21)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_21 = PyTuple_Pack(1, ((PyObject *)__pyx_kp_s_20)); if (unlikely(!__pyx_k_tuple_21)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_21);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_20));
-  PyTuple_SET_ITEM(__pyx_k_tuple_21, 0, ((PyObject *)__pyx_kp_s_20));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_20));
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_21));
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":26
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":26
  *     def __getitem__(self, int index):
  *         if not 0 <= index < len(self):
  *             raise IndexError('lattice index out of range')             # <<<<<<<<<<<<<<
  *         arcs = []
  *         cdef vector[lattice.LatticeArc] arc_vector = self.lattice[0][index]
  */
-  __pyx_k_tuple_24 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_24)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_24 = PyTuple_Pack(1, ((PyObject *)__pyx_kp_s_23)); if (unlikely(!__pyx_k_tuple_24)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_24);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_23));
-  PyTuple_SET_ITEM(__pyx_k_tuple_24, 0, ((PyObject *)__pyx_kp_s_23));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_23));
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_24));
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":39
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":39
  *     def __setitem__(self, int index, tuple arcs):
  *         if not 0 <= index < len(self):
  *             raise IndexError('lattice index out of range')             # <<<<<<<<<<<<<<
  *         cdef lattice.LatticeArc* arc
  *         for (label, cost, dist2next) in arcs:
  */
-  __pyx_k_tuple_25 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_25)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_25 = PyTuple_Pack(1, ((PyObject *)__pyx_kp_s_23)); if (unlikely(!__pyx_k_tuple_25)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_25);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_23));
-  PyTuple_SET_ITEM(__pyx_k_tuple_25, 0, ((PyObject *)__pyx_kp_s_23));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_23));
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_25));
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":69
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":69
  *             for i in range(len(self)):
  *                 for label, weight, delta in self[i]:
  *                     yield '%d -> %d [label="%s"];' % (i, i+delta, label.replace('"', '\\"'))             # <<<<<<<<<<<<<<
  *             yield '%d [shape=doublecircle]' % len(self)
  *             yield '}'
  */
-  __pyx_k_tuple_32 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_32)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_32 = PyTuple_Pack(2, ((PyObject *)__pyx_kp_s_30), ((PyObject *)__pyx_kp_s_31)); if (unlikely(!__pyx_k_tuple_32)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_32);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_30));
-  PyTuple_SET_ITEM(__pyx_k_tuple_32, 0, ((PyObject *)__pyx_kp_s_30));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_30));
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_31));
-  PyTuple_SET_ITEM(__pyx_k_tuple_32, 1, ((PyObject *)__pyx_kp_s_31));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_31));
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_32));
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":63
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":63
  *     def todot(self):
  *         """lattice.todot() -> Representation of the lattice in GraphViz dot format."""
  *         def lines():             # <<<<<<<<<<<<<<
  *             yield 'digraph lattice {'
  *             yield 'rankdir = LR;'
  */
-  __pyx_k_tuple_35 = PyTuple_New(4); if (unlikely(!__pyx_k_tuple_35)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_35 = PyTuple_Pack(4, ((PyObject *)__pyx_n_s__i), ((PyObject *)__pyx_n_s__label), ((PyObject *)__pyx_n_s__weight), ((PyObject *)__pyx_n_s__delta)); if (unlikely(!__pyx_k_tuple_35)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_35);
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__i));
-  PyTuple_SET_ITEM(__pyx_k_tuple_35, 0, ((PyObject *)__pyx_n_s__i));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__i));
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__label));
-  PyTuple_SET_ITEM(__pyx_k_tuple_35, 1, ((PyObject *)__pyx_n_s__label));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__label));
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__weight));
-  PyTuple_SET_ITEM(__pyx_k_tuple_35, 2, ((PyObject *)__pyx_n_s__weight));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__weight));
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__delta));
-  PyTuple_SET_ITEM(__pyx_k_tuple_35, 3, ((PyObject *)__pyx_n_s__delta));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__delta));
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_35));
   __pyx_k_codeobj_36 = (PyObject*)__Pyx_PyCode_New(0, 0, 4, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_35, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_37, __pyx_n_s__lines, 63, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_36)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/lattice.pxi":72
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/lattice.pxi":72
  *             yield '%d [shape=doublecircle]' % len(self)
  *             yield '}'
  *         return '\n'.join(lines()).encode('utf8')             # <<<<<<<<<<<<<<
  * 
  *     def as_hypergraph(self):
  */
-  __pyx_k_tuple_39 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_39)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_39);
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__utf8));
-  PyTuple_SET_ITEM(__pyx_k_tuple_39, 0, ((PyObject *)__pyx_n_s__utf8));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__utf8));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_39));
+  __pyx_k_tuple_40 = PyTuple_Pack(1, ((PyObject *)__pyx_n_s__utf8)); if (unlikely(!__pyx_k_tuple_40)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_40);
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_40));
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":50
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":50
  *     def __getitem__(self, int index):
  *         if not 0 <= index < len(self):
  *             raise IndexError('sufficient stats vector index out of range')             # <<<<<<<<<<<<<<
  *         return self.stats[0][index]
  * 
  */
-  __pyx_k_tuple_41 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_41)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_41);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_40));
-  PyTuple_SET_ITEM(__pyx_k_tuple_41, 0, ((PyObject *)__pyx_kp_s_40));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_40));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_41));
+  __pyx_k_tuple_42 = PyTuple_Pack(1, ((PyObject *)__pyx_kp_s_41)); if (unlikely(!__pyx_k_tuple_42)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_42);
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_42));
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":84
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":84
  *     def __getitem__(self,int k):
  *         if not 0 <= k < self.cs.size():
  *             raise IndexError('candidate set index out of range')             # <<<<<<<<<<<<<<
  *         cdef Candidate candidate = Candidate()
  *         candidate.candidate = &self.cs[0][k]
  */
-  __pyx_k_tuple_43 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_43)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 84; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_43);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_42));
-  PyTuple_SET_ITEM(__pyx_k_tuple_43, 0, ((PyObject *)__pyx_kp_s_42));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_42));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_43));
-
-  /* "_cdec.pyx":52
- *         Create a decoder using a given configuration. Formalism is required."""
- *         if config_str is None:
- *             formalism = config.get('formalism', None)             # <<<<<<<<<<<<<<
- *             if formalism not in ('scfg', 'fst', 'lextrans', 'pb',
- *                     'csplit', 'tagger', 'lexalign'):
- */
-  __pyx_k_tuple_46 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_46)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_46);
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__formalism));
-  PyTuple_SET_ITEM(__pyx_k_tuple_46, 0, ((PyObject *)__pyx_n_s__formalism));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__formalism));
-  __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_46, 1, Py_None);
-  __Pyx_GIVEREF(Py_None);
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_46));
+  __pyx_k_tuple_44 = PyTuple_Pack(1, ((PyObject *)__pyx_kp_s_43)); if (unlikely(!__pyx_k_tuple_44)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 84; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_44);
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_44));
 
   /* "_cdec.pyx":93
  *         with open(weights) as fp:
@@ -29051,11 +29058,8 @@ static int __Pyx_InitCachedConstants(void) {
  *                 fname, value = line.split()
  *                 self.weights[fname.strip()] = float(value)
  */
-  __pyx_k_tuple_50 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_50)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 93; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_50 = PyTuple_Pack(1, ((PyObject *)__pyx_kp_s_49)); if (unlikely(!__pyx_k_tuple_50)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 93; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_50);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_49));
-  PyTuple_SET_ITEM(__pyx_k_tuple_50, 0, ((PyObject *)__pyx_kp_s_49));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_49));
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_50));
 
   /* "_cdec.pyx":91
@@ -29065,79 +29069,74 @@ static int __Pyx_InitCachedConstants(void) {
  *             for line in fp:
  *                 if line.strip().startswith('#'): continue
  */
-  __pyx_k_tuple_51 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_51)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_51 = PyTuple_Pack(3, Py_None, Py_None, Py_None); if (unlikely(!__pyx_k_tuple_51)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_51);
-  __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_51, 0, Py_None);
-  __Pyx_GIVEREF(Py_None);
-  __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_51, 1, Py_None);
-  __Pyx_GIVEREF(Py_None);
-  __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_51, 2, Py_None);
-  __Pyx_GIVEREF(Py_None);
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_51));
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":5
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":5
  * import cdec.sa._sa as _sa
  * 
  * def _phrase(phrase):             # <<<<<<<<<<<<<<
  *     return ' '.join(w.encode('utf8') if isinstance(w, unicode) else str(w) for w in phrase)
  * 
  */
-  __pyx_k_tuple_55 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_55)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 5; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_55 = PyTuple_Pack(3, ((PyObject *)__pyx_n_s__phrase), ((PyObject *)__pyx_n_s__genexpr), ((PyObject *)__pyx_n_s__genexpr)); if (unlikely(!__pyx_k_tuple_55)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 5; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_55);
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__phrase));
-  PyTuple_SET_ITEM(__pyx_k_tuple_55, 0, ((PyObject *)__pyx_n_s__phrase));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__phrase));
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__genexpr));
-  PyTuple_SET_ITEM(__pyx_k_tuple_55, 1, ((PyObject *)__pyx_n_s__genexpr));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__genexpr));
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__genexpr));
-  PyTuple_SET_ITEM(__pyx_k_tuple_55, 2, ((PyObject *)__pyx_n_s__genexpr));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__genexpr));
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_55));
   __pyx_k_codeobj_56 = (PyObject*)__Pyx_PyCode_New(1, 0, 3, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_55, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_57, __pyx_n_s___phrase, 5, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_56)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 5; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":194
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":194
  *         return []
  * 
  * BLEU = Scorer('IBM_BLEU')             # <<<<<<<<<<<<<<
+ * QCRI = Scorer('QCRI_BLEU')
  * TER = Scorer('TER')
- * CER = Scorer('CER')
  */
-  __pyx_k_tuple_58 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_58)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 194; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_58 = PyTuple_Pack(1, ((PyObject *)__pyx_n_s__IBM_BLEU)); if (unlikely(!__pyx_k_tuple_58)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 194; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_58);
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__IBM_BLEU));
-  PyTuple_SET_ITEM(__pyx_k_tuple_58, 0, ((PyObject *)__pyx_n_s__IBM_BLEU));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__IBM_BLEU));
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_58));
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":195
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":195
  * 
  * BLEU = Scorer('IBM_BLEU')
- * TER = Scorer('TER')             # <<<<<<<<<<<<<<
+ * QCRI = Scorer('QCRI_BLEU')             # <<<<<<<<<<<<<<
+ * TER = Scorer('TER')
  * CER = Scorer('CER')
  */
-  __pyx_k_tuple_59 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_59)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 195; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_59 = PyTuple_Pack(1, ((PyObject *)__pyx_n_s__QCRI_BLEU)); if (unlikely(!__pyx_k_tuple_59)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 195; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_59);
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__TER));
-  PyTuple_SET_ITEM(__pyx_k_tuple_59, 0, ((PyObject *)__pyx_n_s__TER));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__TER));
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_59));
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":196
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":196
  * BLEU = Scorer('IBM_BLEU')
- * TER = Scorer('TER')
- * CER = Scorer('CER')             # <<<<<<<<<<<<<<
+ * QCRI = Scorer('QCRI_BLEU')
+ * TER = Scorer('TER')             # <<<<<<<<<<<<<<
+ * CER = Scorer('CER')
+ * SSK = Scorer('SSK')
  */
-  __pyx_k_tuple_60 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_60)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 196; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_60 = PyTuple_Pack(1, ((PyObject *)__pyx_n_s__TER)); if (unlikely(!__pyx_k_tuple_60)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 196; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_60);
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__CER));
-  PyTuple_SET_ITEM(__pyx_k_tuple_60, 0, ((PyObject *)__pyx_n_s__CER));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__CER));
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_60));
 
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":197
+ * QCRI = Scorer('QCRI_BLEU')
+ * TER = Scorer('TER')
+ * CER = Scorer('CER')             # <<<<<<<<<<<<<<
+ * SSK = Scorer('SSK')
+ */
+  __pyx_k_tuple_61 = PyTuple_Pack(1, ((PyObject *)__pyx_n_s__CER)); if (unlikely(!__pyx_k_tuple_61)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 197; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_61);
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_61));
+
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":198
+ * TER = Scorer('TER')
+ * CER = Scorer('CER')
+ * SSK = Scorer('SSK')             # <<<<<<<<<<<<<<
+ */
+  __pyx_k_tuple_62 = PyTuple_Pack(1, ((PyObject *)__pyx_n_s__SSK)); if (unlikely(!__pyx_k_tuple_62)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_62);
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_62));
+
   /* "_cdec.pyx":28
  * class ParseFailed(Exception): pass
  * 
@@ -29145,13 +29144,10 @@ static int __Pyx_InitCachedConstants(void) {
  *     """set_silent(bool): Configure the verbosity of cdec."""
  *     SetSilent(yn)
  */
-  __pyx_k_tuple_61 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_61)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_61);
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__yn));
-  PyTuple_SET_ITEM(__pyx_k_tuple_61, 0, ((PyObject *)__pyx_n_s__yn));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__yn));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_61));
-  __pyx_k_codeobj_62 = (PyObject*)__Pyx_PyCode_New(1, 0, 1, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_61, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_63, __pyx_n_s__set_silent, 28, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_62)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_63 = PyTuple_Pack(1, ((PyObject *)__pyx_n_s__yn)); if (unlikely(!__pyx_k_tuple_63)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_63);
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_63));
+  __pyx_k_codeobj_64 = (PyObject*)__Pyx_PyCode_New(1, 0, 1, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_63, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_65, __pyx_n_s__set_silent, 28, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_64)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
   /* "_cdec.pyx":32
  *     SetSilent(yn)
@@ -29160,25 +29156,10 @@ static int __Pyx_InitCachedConstants(void) {
  *     for key, value in config.items():
  *         if isinstance(value, dict):
  */
-  __pyx_k_tuple_64 = PyTuple_New(5); if (unlikely(!__pyx_k_tuple_64)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_64);
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__config));
-  PyTuple_SET_ITEM(__pyx_k_tuple_64, 0, ((PyObject *)__pyx_n_s__config));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__config));
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__key));
-  PyTuple_SET_ITEM(__pyx_k_tuple_64, 1, ((PyObject *)__pyx_n_s__key));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__key));
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__value));
-  PyTuple_SET_ITEM(__pyx_k_tuple_64, 2, ((PyObject *)__pyx_n_s__value));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__value));
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__name));
-  PyTuple_SET_ITEM(__pyx_k_tuple_64, 3, ((PyObject *)__pyx_n_s__name));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__name));
-  __Pyx_INCREF(((PyObject *)__pyx_n_s__info));
-  PyTuple_SET_ITEM(__pyx_k_tuple_64, 4, ((PyObject *)__pyx_n_s__info));
-  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__info));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_64));
-  __pyx_k_codeobj_65 = (PyObject*)__Pyx_PyCode_New(1, 0, 5, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_64, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_63, __pyx_n_s___make_config, 32, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_65)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_66 = PyTuple_Pack(5, ((PyObject *)__pyx_n_s__config), ((PyObject *)__pyx_n_s__key), ((PyObject *)__pyx_n_s__value), ((PyObject *)__pyx_n_s__name), ((PyObject *)__pyx_n_s__info)); if (unlikely(!__pyx_k_tuple_66)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_66);
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_66));
+  __pyx_k_codeobj_67 = (PyObject*)__Pyx_PyCode_New(1, 0, 5, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_66, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_65, __pyx_n_s___make_config, 32, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_67)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_RefNannyFinishContext();
   return 0;
   __pyx_L1_error:;
@@ -29244,6 +29225,14 @@ PyMODINIT_FUNC PyInit__cdec(void)
   __pyx_m = PyModule_Create(&__pyx_moduledef);
   #endif
   if (unlikely(!__pyx_m)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  #if PY_MAJOR_VERSION >= 3
+  {
+    PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    if (!PyDict_GetItemString(modules, "_cdec")) {
+      if (unlikely(PyDict_SetItemString(modules, "_cdec", __pyx_m) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    }
+  }
+  #endif
   __pyx_b = PyImport_AddModule(__Pyx_NAMESTR(__Pyx_BUILTIN_MODULE_NAME)); if (unlikely(!__pyx_b)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   #if CYTHON_COMPILING_IN_PYPY
   Py_INCREF(__pyx_b);
@@ -29491,7 +29480,7 @@ PyMODINIT_FUNC PyInit__cdec(void)
   Py_DECREF(__pyx_t_1); __pyx_t_1 = 0;
   /*--- Execution code ---*/
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":3
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":3
  * cimport grammar
  * cimport cdec.sa._sa as _sa
  * import cdec.sa._sa as _sa             # <<<<<<<<<<<<<<
@@ -29509,7 +29498,7 @@ PyMODINIT_FUNC PyInit__cdec(void)
   if (PyObject_SetAttr(__pyx_m, __pyx_n_s___sa, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/grammar.pxi":5
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/grammar.pxi":5
  * import cdec.sa._sa as _sa
  * 
  * def _phrase(phrase):             # <<<<<<<<<<<<<<
@@ -29521,37 +29510,61 @@ PyMODINIT_FUNC PyInit__cdec(void)
   if (PyObject_SetAttr(__pyx_m, __pyx_n_s___phrase, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 5; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":194
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":194
  *         return []
  * 
  * BLEU = Scorer('IBM_BLEU')             # <<<<<<<<<<<<<<
+ * QCRI = Scorer('QCRI_BLEU')
  * TER = Scorer('TER')
- * CER = Scorer('CER')
  */
   __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Scorer)), ((PyObject *)__pyx_k_tuple_58), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 194; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_3);
   if (PyObject_SetAttr(__pyx_m, __pyx_n_s__BLEU, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 194; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":195
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":195
  * 
  * BLEU = Scorer('IBM_BLEU')
- * TER = Scorer('TER')             # <<<<<<<<<<<<<<
+ * QCRI = Scorer('QCRI_BLEU')             # <<<<<<<<<<<<<<
+ * TER = Scorer('TER')
  * CER = Scorer('CER')
  */
   __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Scorer)), ((PyObject *)__pyx_k_tuple_59), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 195; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_3);
-  if (PyObject_SetAttr(__pyx_m, __pyx_n_s__TER, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 195; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  if (PyObject_SetAttr(__pyx_m, __pyx_n_s__QCRI, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 195; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-  /* "/home/vchahune/tools/cdec/python/src/mteval.pxi":196
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":196
  * BLEU = Scorer('IBM_BLEU')
+ * QCRI = Scorer('QCRI_BLEU')
+ * TER = Scorer('TER')             # <<<<<<<<<<<<<<
+ * CER = Scorer('CER')
+ * SSK = Scorer('SSK')
+ */
+  __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Scorer)), ((PyObject *)__pyx_k_tuple_60), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 196; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_3);
+  if (PyObject_SetAttr(__pyx_m, __pyx_n_s__TER, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 196; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":197
+ * QCRI = Scorer('QCRI_BLEU')
  * TER = Scorer('TER')
  * CER = Scorer('CER')             # <<<<<<<<<<<<<<
+ * SSK = Scorer('SSK')
  */
-  __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Scorer)), ((PyObject *)__pyx_k_tuple_60), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 196; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Scorer)), ((PyObject *)__pyx_k_tuple_61), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 197; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_3);
-  if (PyObject_SetAttr(__pyx_m, __pyx_n_s__CER, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 196; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  if (PyObject_SetAttr(__pyx_m, __pyx_n_s__CER, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 197; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+  /* "/home/jmflanig/tools/cdec-jflanig/python/src/mteval.pxi":198
+ * TER = Scorer('TER')
+ * CER = Scorer('CER')
+ * SSK = Scorer('SSK')             # <<<<<<<<<<<<<<
+ */
+  __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Scorer)), ((PyObject *)__pyx_k_tuple_62), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_3);
+  if (PyObject_SetAttr(__pyx_m, __pyx_n_s__SSK, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
   /* "_cdec.pyx":22
@@ -29586,7 +29599,7 @@ PyMODINIT_FUNC PyInit__cdec(void)
   __Pyx_INCREF(__pyx_builtin_Exception);
   PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_builtin_Exception);
   __Pyx_GIVEREF(__pyx_builtin_Exception);
-  __pyx_t_4 = __Pyx_CreateClass(((PyObject *)__pyx_t_2), ((PyObject *)__pyx_t_3), __pyx_n_s__InvalidConfig, __pyx_n_s___cdec); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_4 = __Pyx_CreateClass(((PyObject *)__pyx_t_2), ((PyObject *)__pyx_t_3), __pyx_n_s__InvalidConfig, __pyx_n_s__InvalidConfig, __pyx_n_s___cdec); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_4);
   __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
   if (PyObject_SetAttr(__pyx_m, __pyx_n_s__InvalidConfig, __pyx_t_4) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -29607,7 +29620,7 @@ PyMODINIT_FUNC PyInit__cdec(void)
   __Pyx_INCREF(__pyx_builtin_Exception);
   PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_builtin_Exception);
   __Pyx_GIVEREF(__pyx_builtin_Exception);
-  __pyx_t_2 = __Pyx_CreateClass(((PyObject *)__pyx_t_4), ((PyObject *)__pyx_t_3), __pyx_n_s__ParseFailed, __pyx_n_s___cdec); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_2 = __Pyx_CreateClass(((PyObject *)__pyx_t_4), ((PyObject *)__pyx_t_3), __pyx_n_s__ParseFailed, __pyx_n_s__ParseFailed, __pyx_n_s___cdec); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
   __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0;
   if (PyObject_SetAttr(__pyx_m, __pyx_n_s__ParseFailed, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -29648,7 +29661,7 @@ PyMODINIT_FUNC PyInit__cdec(void)
   if (PyObject_SetAttr(__pyx_m, __pyx_n_s____test__, ((PyObject *)__pyx_t_3)) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
 
-  /* "string.from_py":11
+  /* "string.from_py":12
  * 
  * @cname("__pyx_convert_string_from_py_")
  * cdef string __pyx_convert_string_from_py_(object o) except *:             # <<<<<<<<<<<<<<
@@ -30315,16 +30328,18 @@ static void __Pyx_ExceptionReset(PyObject *type, PyObject *value, PyObject *tb)
 #endif
 }
 
-static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, long level) {
-    PyObject *py_import = 0;
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) {
     PyObject *empty_list = 0;
     PyObject *module = 0;
     PyObject *global_dict = 0;
     PyObject *empty_dict = 0;
     PyObject *list;
+    #if PY_VERSION_HEX < 0x03030000
+    PyObject *py_import = 0;
     py_import = __Pyx_GetAttrString(__pyx_b, "__import__");
     if (!py_import)
         goto bad;
+    #endif
     if (from_list)
         list = from_list;
     else {
@@ -30344,13 +30359,17 @@ static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, long level) {
         #if PY_MAJOR_VERSION >= 3
         if (level == -1) {
             if (strchr(__Pyx_MODULE_NAME, '.')) {
-                /* try package relative import first */
+                #if PY_VERSION_HEX < 0x03030000
                 PyObject *py_level = PyInt_FromLong(1);
                 if (!py_level)
                     goto bad;
                 module = PyObject_CallFunctionObjArgs(py_import,
                     name, global_dict, empty_dict, list, py_level, NULL);
                 Py_DECREF(py_level);
+                #else
+                module = PyImport_ImportModuleLevelObject(
+                    name, global_dict, empty_dict, list, 1);
+                #endif
                 if (!module) {
                     if (!PyErr_ExceptionMatches(PyExc_ImportError))
                         goto bad;
@@ -30361,12 +30380,17 @@ static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, long level) {
         }
         #endif
         if (!module) {
+            #if PY_VERSION_HEX < 0x03030000
             PyObject *py_level = PyInt_FromLong(level);
             if (!py_level)
                 goto bad;
             module = PyObject_CallFunctionObjArgs(py_import,
                 name, global_dict, empty_dict, list, py_level, NULL);
             Py_DECREF(py_level);
+            #else
+            module = PyImport_ImportModuleLevelObject(
+                name, global_dict, empty_dict, list, level);
+            #endif
         }
     }
     #else
@@ -30378,8 +30402,10 @@ static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, long level) {
         name, global_dict, empty_dict, list, NULL);
     #endif
 bad:
-    Py_XDECREF(empty_list);
+    #if PY_VERSION_HEX < 0x03030000
     Py_XDECREF(py_import);
+    #endif
+    Py_XDECREF(empty_list);
     Py_XDECREF(empty_dict);
     return module;
 }
@@ -30410,11 +30436,13 @@ static PyObject *__Pyx_FindPy2Metaclass(PyObject *bases) {
 }
 
 static PyObject *__Pyx_CreateClass(PyObject *bases, PyObject *dict, PyObject *name,
-                                   PyObject *modname) {
+                                   PyObject *qualname, PyObject *modname) {
     PyObject *result;
     PyObject *metaclass;
     if (PyDict_SetItemString(dict, "__module__", modname) < 0)
         return NULL;
+    if (PyDict_SetItemString(dict, "__qualname__", qualname) < 0)
+        return NULL;
     metaclass = PyDict_GetItemString(dict, "__metaclass__");
     if (metaclass) {
         Py_INCREF(metaclass);
@@ -30455,6 +30483,10 @@ static CYTHON_INLINE WordID __Pyx_PyInt_from_py_WordID(PyObject* x) {
         else
             return (WordID)__Pyx_PyInt_AsSignedLongLong(x);
     }  else {
+        #if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
+        PyErr_SetString(PyExc_RuntimeError,
+                        "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
+        #else
         WordID val;
         PyObject *v = __Pyx_PyNumber_Int(x);
         #if PY_VERSION_HEX < 0x03000000
@@ -30474,6 +30506,7 @@ static CYTHON_INLINE WordID __Pyx_PyInt_from_py_WordID(PyObject* x) {
             if (likely(!ret))
                 return val;
         }
+        #endif
         return (WordID)-1;
     }
 }
@@ -30529,16 +30562,19 @@ bad:
 static PyObject *
 __Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *closure)
 {
-    if (op->func_doc == NULL && op->func.m_ml->ml_doc) {
+    if (unlikely(op->func_doc == NULL)) {
+        if (op->func.m_ml->ml_doc) {
 #if PY_MAJOR_VERSION >= 3
-        op->func_doc = PyUnicode_FromString(op->func.m_ml->ml_doc);
+            op->func_doc = PyUnicode_FromString(op->func.m_ml->ml_doc);
 #else
-        op->func_doc = PyString_FromString(op->func.m_ml->ml_doc);
+            op->func_doc = PyString_FromString(op->func.m_ml->ml_doc);
 #endif
-    }
-    if (op->func_doc == 0) {
-        Py_INCREF(Py_None);
-        return Py_None;
+            if (unlikely(op->func_doc == NULL))
+                return NULL;
+        } else {
+            Py_INCREF(Py_None);
+            return Py_None;
+        }
     }
     Py_INCREF(op->func_doc);
     return op->func_doc;
@@ -30548,22 +30584,23 @@ __Pyx_CyFunction_set_doc(__pyx_CyFunctionObject *op, PyObject *value)
 {
     PyObject *tmp = op->func_doc;
     if (value == NULL)
-        op->func_doc = Py_None; /* Mark as deleted */
-    else
-        op->func_doc = value;
-    Py_INCREF(op->func_doc);
+        value = Py_None; /* Mark as deleted */
+    Py_INCREF(value);
+    op->func_doc = value;
     Py_XDECREF(tmp);
     return 0;
 }
 static PyObject *
 __Pyx_CyFunction_get_name(__pyx_CyFunctionObject *op)
 {
-    if (op->func_name == NULL) {
+    if (unlikely(op->func_name == NULL)) {
 #if PY_MAJOR_VERSION >= 3
         op->func_name = PyUnicode_InternFromString(op->func.m_ml->ml_name);
 #else
         op->func_name = PyString_InternFromString(op->func.m_ml->ml_name);
 #endif
+        if (unlikely(op->func_name == NULL))
+            return NULL;
     }
     Py_INCREF(op->func_name);
     return op->func_name;
@@ -30573,9 +30610,9 @@ __Pyx_CyFunction_set_name(__pyx_CyFunctionObject *op, PyObject *value)
 {
     PyObject *tmp;
 #if PY_MAJOR_VERSION >= 3
-    if (value == NULL || !PyUnicode_Check(value)) {
+    if (unlikely(value == NULL || !PyUnicode_Check(value))) {
 #else
-    if (value == NULL || !PyString_Check(value)) {
+    if (unlikely(value == NULL || !PyString_Check(value))) {
 #endif
         PyErr_SetString(PyExc_TypeError,
                         "__name__ must be set to a string object");
@@ -30588,6 +30625,31 @@ __Pyx_CyFunction_set_name(__pyx_CyFunctionObject *op, PyObject *value)
     return 0;
 }
 static PyObject *
+__Pyx_CyFunction_get_qualname(__pyx_CyFunctionObject *op)
+{
+    Py_INCREF(op->func_qualname);
+    return op->func_qualname;
+}
+static int
+__Pyx_CyFunction_set_qualname(__pyx_CyFunctionObject *op, PyObject *value)
+{
+    PyObject *tmp;
+#if PY_MAJOR_VERSION >= 3
+    if (unlikely(value == NULL || !PyUnicode_Check(value))) {
+#else
+    if (unlikely(value == NULL || !PyString_Check(value))) {
+#endif
+        PyErr_SetString(PyExc_TypeError,
+                        "__qualname__ must be set to a string object");
+        return -1;
+    }
+    tmp = op->func_qualname;
+    Py_INCREF(value);
+    op->func_qualname = value;
+    Py_XDECREF(tmp);
+    return 0;
+}
+static PyObject *
 __Pyx_CyFunction_get_self(__pyx_CyFunctionObject *m, CYTHON_UNUSED void *closure)
 {
     PyObject *self;
@@ -30600,9 +30662,9 @@ __Pyx_CyFunction_get_self(__pyx_CyFunctionObject *m, CYTHON_UNUSED void *closure
 static PyObject *
 __Pyx_CyFunction_get_dict(__pyx_CyFunctionObject *op)
 {
-    if (op->func_dict == NULL) {
+    if (unlikely(op->func_dict == NULL)) {
         op->func_dict = PyDict_New();
-        if (op->func_dict == NULL)
+        if (unlikely(op->func_dict == NULL))
             return NULL;
     }
     Py_INCREF(op->func_dict);
@@ -30612,12 +30674,12 @@ static int
 __Pyx_CyFunction_set_dict(__pyx_CyFunctionObject *op, PyObject *value)
 {
     PyObject *tmp;
-    if (value == NULL) {
+    if (unlikely(value == NULL)) {
         PyErr_SetString(PyExc_TypeError,
                "function's dictionary may not be deleted");
         return -1;
     }
-    if (!PyDict_Check(value)) {
+    if (unlikely(!PyDict_Check(value))) {
         PyErr_SetString(PyExc_TypeError,
                "setting function's dictionary to a non-dict");
         return -1;
@@ -30657,7 +30719,7 @@ __Pyx_CyFunction_get_defaults(__pyx_CyFunctionObject *op)
     }
     if (op->defaults_getter) {
         PyObject *res = op->defaults_getter((PyObject *) op);
-        if (res) {
+        if (likely(res)) {
             Py_INCREF(res);
             op->defaults_tuple = res;
         }
@@ -30671,6 +30733,7 @@ static PyGetSetDef __pyx_CyFunction_getsets[] = {
     {(char *) "__doc__",  (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0},
     {(char *) "func_name", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0},
     {(char *) "__name__", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0},
+    {(char *) "__qualname__", (getter)__Pyx_CyFunction_get_qualname, (setter)__Pyx_CyFunction_set_qualname, 0, 0},
     {(char *) "__self__", (getter)__Pyx_CyFunction_get_self, 0, 0, 0},
     {(char *) "func_dict", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0},
     {(char *) "__dict__", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0},
@@ -30704,7 +30767,7 @@ static PyMethodDef __pyx_CyFunction_methods[] = {
     {__Pyx_NAMESTR("__reduce__"), (PyCFunction)__Pyx_CyFunction_reduce, METH_VARARGS, 0},
     {0, 0, 0, 0}
 };
-static PyObject *__Pyx_CyFunction_New(PyTypeObject *type, PyMethodDef *ml, int flags,
+static PyObject *__Pyx_CyFunction_New(PyTypeObject *type, PyMethodDef *ml, int flags, PyObject* qualname,
                                       PyObject *closure, PyObject *module, PyObject* code) {
     __pyx_CyFunctionObject *op = PyObject_GC_New(__pyx_CyFunctionObject, type);
     if (op == NULL)
@@ -30719,6 +30782,8 @@ static PyObject *__Pyx_CyFunction_New(PyTypeObject *type, PyMethodDef *ml, int f
     op->func.m_module = module;
     op->func_dict = NULL;
     op->func_name = NULL;
+    Py_INCREF(qualname);
+    op->func_qualname = qualname;
     op->func_doc = NULL;
     op->func_classobj = NULL;
     Py_XINCREF(code);
@@ -30737,6 +30802,7 @@ __Pyx_CyFunction_clear(__pyx_CyFunctionObject *m)
     Py_CLEAR(m->func.m_module);
     Py_CLEAR(m->func_dict);
     Py_CLEAR(m->func_name);
+    Py_CLEAR(m->func_qualname);
     Py_CLEAR(m->func_doc);
     Py_CLEAR(m->func_code);
     Py_CLEAR(m->func_classobj);
@@ -30765,6 +30831,7 @@ static int __Pyx_CyFunction_traverse(__pyx_CyFunctionObject *m, visitproc visit,
     Py_VISIT(m->func.m_module);
     Py_VISIT(m->func_dict);
     Py_VISIT(m->func_name);
+    Py_VISIT(m->func_qualname);
     Py_VISIT(m->func_doc);
     Py_VISIT(m->func_code);
     Py_VISIT(m->func_classobj);
@@ -30797,13 +30864,12 @@ static PyObject *__Pyx_CyFunction_descr_get(PyObject *func, PyObject *obj, PyObj
 static PyObject*
 __Pyx_CyFunction_repr(__pyx_CyFunctionObject *op)
 {
-    PyObject *func_name = __Pyx_CyFunction_get_name(op);
 #if PY_MAJOR_VERSION >= 3
     return PyUnicode_FromFormat("<cyfunction %U at %p>",
-                                func_name, (void *)op);
+                                op->func_qualname, (void *)op);
 #else
     return PyString_FromFormat("<cyfunction %s at %p>",
-                               PyString_AsString(func_name), (void *)op);
+                               PyString_AsString(op->func_qualname), (void *)op);
 #endif
 }
 #if CYTHON_COMPILING_IN_PYPY
@@ -31936,6 +32002,10 @@ static PyTypeObject *__Pyx_ImportType(const char *module_name, const char *class
     PyObject *result = 0;
     PyObject *py_name = 0;
     char warning[200];
+    Py_ssize_t basicsize;
+#ifdef Py_LIMITED_API
+    PyObject *py_basicsize;
+#endif
     py_module = __Pyx_ImportModule(module_name);
     if (!py_module)
         goto bad;
@@ -31955,7 +32025,19 @@ static PyTypeObject *__Pyx_ImportType(const char *module_name, const char *class
             module_name, class_name);
         goto bad;
     }
-    if (!strict && (size_t)((PyTypeObject *)result)->tp_basicsize > size) {
+#ifndef Py_LIMITED_API
+    basicsize = ((PyTypeObject *)result)->tp_basicsize;
+#else
+    py_basicsize = PyObject_GetAttrString(result, "__basicsize__");
+    if (!py_basicsize)
+        goto bad;
+    basicsize = PyLong_AsSsize_t(py_basicsize);
+    Py_DECREF(py_basicsize);
+    py_basicsize = 0;
+    if (basicsize == (Py_ssize_t)-1 && PyErr_Occurred())
+        goto bad;
+#endif
+    if (!strict && (size_t)basicsize > size) {
         PyOS_snprintf(warning, sizeof(warning),
             "%s.%s size changed, may indicate binary incompatibility",
             module_name, class_name);
@@ -31965,7 +32047,7 @@ static PyTypeObject *__Pyx_ImportType(const char *module_name, const char *class
         if (PyErr_WarnEx(NULL, warning, 0) < 0) goto bad;
         #endif
     }
-    else if ((size_t)((PyTypeObject *)result)->tp_basicsize != size) {
+    else if ((size_t)basicsize != size) {
         PyErr_Format(PyExc_ValueError,
             "%s.%s has the wrong size, try recompiling",
             module_name, class_name);
@@ -32243,15 +32325,11 @@ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
     return 0;
 }
 
-
-/* Type Conversion Functions */
-
 static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
    int is_true = x == Py_True;
    if (is_true | (x == Py_False) | (x == Py_None)) return is_true;
    else return PyObject_IsTrue(x);
 }
-
 static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {
   PyNumberMethods *m;
   const char *name = NULL;
@@ -32297,7 +32375,6 @@ static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {
   }
   return res;
 }
-
 static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
   Py_ssize_t ival;
   PyObject* x = PyNumber_Index(b);
@@ -32306,7 +32383,6 @@ static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
   Py_DECREF(x);
   return ival;
 }
-
 static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
 #if PY_VERSION_HEX < 0x02050000
    if (ival <= LONG_MAX)
@@ -32320,7 +32396,6 @@ static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
    return PyInt_FromSize_t(ival);
 #endif
 }
-
 static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject* x) {
    unsigned PY_LONG_LONG val = __Pyx_PyInt_AsUnsignedLongLong(x);
    if (unlikely(val == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred())) {
-- 
cgit v1.2.3


From 754e9fd68b617b9dfee89461d51a5dcb98b18fbe Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>
Date: Sat, 23 Feb 2013 04:23:48 -0500
Subject: one missing quote type

---
 corpus/support/quote-norm.pl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/corpus/support/quote-norm.pl b/corpus/support/quote-norm.pl
index e4e5055e..d2980092 100755
--- a/corpus/support/quote-norm.pl
+++ b/corpus/support/quote-norm.pl
@@ -20,6 +20,7 @@ while(<STDIN>) {
   s/&\s*#45\s*;/--/g;
   s/ï¿½c/--/g;
   s/ ,,/ "/g;
+  s/„/"/g;
   s/``/"/g;
   s/''/"/g;
   s/[「」]/"/g;
-- 
cgit v1.2.3


From 4c788f659e7131b3bcdf2c2372ab546342f3a1b1 Mon Sep 17 00:00:00 2001
From: Victor Chahuneau <vchahune@cs.cmu.edu>
Date: Sat, 23 Feb 2013 16:29:40 -0500
Subject: Add compression option to grammar extractor

---
 python/pkg/cdec/sa/extract.py | 44 +++++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 20 deletions(-)

diff --git a/python/pkg/cdec/sa/extract.py b/python/pkg/cdec/sa/extract.py
index 2e596bd3..782bed8b 100644
--- a/python/pkg/cdec/sa/extract.py
+++ b/python/pkg/cdec/sa/extract.py
@@ -1,22 +1,25 @@
 #!/usr/bin/env python
 import sys
 import os
+import re
+import gzip
 import argparse
 import logging
-import re
-import multiprocessing as mp
 import signal
+import multiprocessing as mp
 import cdec.sa
 
 extractor, prefix = None, None
-online = False
+online, compress = False, False
 
-def make_extractor(config, grammars, features):
-    global extractor, prefix, online
+def make_extractor(args):
+    global extractor, prefix, online, compress
     signal.signal(signal.SIGINT, signal.SIG_IGN) # Let parent process catch Ctrl+C
-    load_features(features)
-    extractor = cdec.sa.GrammarExtractor(config, online)
-    prefix = grammars
+    load_features(args.features)
+    extractor = cdec.sa.GrammarExtractor(args.config, online)
+    prefix = args.grammars
+    online = args.online
+    compress = args.compress
 
 def load_features(features):
     for featdef in features:
@@ -27,7 +30,7 @@ def load_features(features):
         sys.path.remove(prefix)
 
 def extract(inp):
-    global extractor, prefix, online
+    global extractor, prefix, online, compress
     i, sentence = inp
     sentence = sentence[:-1]
     fields = re.split('\s*\|\|\|\s*', sentence)
@@ -36,7 +39,7 @@ def extract(inp):
     if online:
         if len(fields) < 3:
             sys.stderr.write('Error: online mode requires references and alignments.'
-                    '  Not adding sentence to training data: {0}\n'.format(sentence))
+                    '  Not adding sentence to training data: {}\n'.format(sentence))
             sentence = fields[0]
         else:
             sentence, reference, alignment = fields[0:3]
@@ -46,18 +49,19 @@ def extract(inp):
         if len(fields) > 1:
             sentence = fields[0]
             suffix = ' ||| ' + ' ||| '.join(fields[1:])
-    grammar_file = os.path.join(prefix, 'grammar.{0}'.format(i))
-    with open(grammar_file, 'w') as output:
+
+    grammar_file = os.path.join(prefix, 'grammar.'+str(i))
+    if compress: grammar_file += '.gz'
+    with (gzip.open if compress else open)(grammar_file, 'w') as output:
         for rule in extractor.grammar(sentence):
             output.write(str(rule)+'\n')
     # Add training instance _after_ extracting grammars
     if online:
         extractor.add_instance(sentence, reference, alignment)
     grammar_file = os.path.abspath(grammar_file)
-    return '<seg grammar="{0}" id="{1}"> {2} </seg>{3}'.format(grammar_file, i, sentence, suffix)
+    return '<seg grammar="{}" id="{}">{}</seg>{}'.format(grammar_file, i, sentence, suffix)
 
 def main():
-    global online
     logging.basicConfig(level=logging.INFO)
     parser = argparse.ArgumentParser(description='Extract grammars from a compiled corpus.')
     parser.add_argument('-c', '--config', required=True,
@@ -70,30 +74,30 @@ def main():
                         help='number of sentences / chunk')
     parser.add_argument('-f', '--features', nargs='*', default=[],
                         help='additional feature definitions')
-    parser.add_argument('-o', '--online', action='store_true', default=False,
+    parser.add_argument('-o', '--online', action='store_true',
                         help='online grammar extraction')
+    parser.add_argument('-z', '--compress', action='store_true',
+                        help='compress grammars with gzip')
     args = parser.parse_args()
 
     if not os.path.exists(args.grammars):
         os.mkdir(args.grammars)
     for featdef in args.features:
         if not featdef.endswith('.py'):
-            sys.stderr.write('Error: feature definition file <{0}>'
+            sys.stderr.write('Error: feature definition file <{}>'
                     ' should be a python module\n'.format(featdef))
             sys.exit(1)
     
-    online = args.online
-    
     if args.jobs > 1:
         logging.info('Starting %d workers; chunk size: %d', args.jobs, args.chunksize)
-        pool = mp.Pool(args.jobs, make_extractor, (args.config, args.grammars, args.features))
+        pool = mp.Pool(args.jobs, make_extractor, (args,))
         try:
             for output in pool.imap(extract, enumerate(sys.stdin), args.chunksize):
                 print(output)
         except KeyboardInterrupt:
             pool.terminate()
     else:
-        make_extractor(args.config, args.grammars, args.features)
+        make_extractor(args)
         for output in map(extract, enumerate(sys.stdin)):
             print(output)
 
-- 
cgit v1.2.3


From 7973046992b710a170a84888820ce76a9e4f2346 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@Chriss-MacBook-Air.local>
Date: Wed, 27 Feb 2013 20:14:18 -0500
Subject: quick fix

---
 corpus/tokenize-anything.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/corpus/tokenize-anything.sh b/corpus/tokenize-anything.sh
index 1a24193d..028992cf 100755
--- a/corpus/tokenize-anything.sh
+++ b/corpus/tokenize-anything.sh
@@ -9,5 +9,5 @@ $SUPPORT/utf8-normalize.sh |
   sed -e 's/ al - / al-/g' |
   $SUPPORT/fix-contract.pl |
   sed -e 's/^ //' | sed -e 's/ $//' |
-  perl -e 'while(<>){s/(\d+)(\.+)$/$1 ./;print;}'
+  perl -e 'while(<>){s/(\d+)(\.+)$/$1 ./; s/(\d+)(\.+) \|\|\|/$1 . |||/;  print;}'
 
-- 
cgit v1.2.3


From 349ee7d5599bb973506c8bbb56926cf9d366b564 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <simianer@cl.uni-heidelberg.de>
Date: Sun, 3 Mar 2013 12:06:25 +0100
Subject: dtrain parallelize.rb fixes

---
 training/dtrain/parallelize.rb | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index 9b0923f6..23f2a7ed 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -1,7 +1,7 @@
 #!/usr/bin/env ruby
 
 
-if ARGV.size != 7
+if ARGV.size != 8
   STDERR.write "Usage: "
   STDERR.write "ruby parallelize.rb <dtrain.ini> <epochs> <rand=true|false> <#shards|predef> <at once> <input> <refs> <qsub>\n"
   exit
@@ -95,6 +95,7 @@ end
   remaining_shards = num_shards
   while remaining_shards > 0
     shards_at_once.times {
+      break if remaining_shards==0
       qsub_str_start = qsub_str_end = ''
       local_end = ''
       if use_qsub
-- 
cgit v1.2.3


From e419e52189ce8472bc5f5517796d00f0887affa7 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>
Date: Fri, 8 Mar 2013 22:44:49 -0500
Subject: few preproc fixes

---
 corpus/paste-files.pl        | 1 +
 corpus/support/quote-norm.pl | 2 ++
 corpus/support/token_list    | 2 ++
 3 files changed, 5 insertions(+)

diff --git a/corpus/paste-files.pl b/corpus/paste-files.pl
index 4cb424ad..ef2cd937 100755
--- a/corpus/paste-files.pl
+++ b/corpus/paste-files.pl
@@ -34,6 +34,7 @@ while(1) {
       $done = 1;
       last;
     }
+    $r =~ s/\r//g;
     chomp $r;
     if ($r =~ /\|\|\|/) {
       $r = '';
diff --git a/corpus/support/quote-norm.pl b/corpus/support/quote-norm.pl
index d2980092..b104e73c 100755
--- a/corpus/support/quote-norm.pl
+++ b/corpus/support/quote-norm.pl
@@ -11,6 +11,8 @@ while(<STDIN>) {
   s/&\s*squot\s*;/'/gi;
   s/&\s*quot\s*;/"/gi;
   s/&\s*amp\s*;/&/gi;
+  s/&\s*nbsp\s*;/&/gi;
+  s/&\s*#\s*160\s*;/ /gi;
   s/ (\d\d): (\d\d)/ $1:$2/g;
   s/[\x{20a0}]\x{20ac}]/ EUR /g;
   s/[\x{00A3}]/ GBP /g;
diff --git a/corpus/support/token_list b/corpus/support/token_list
index d470cb22..366cd7ff 100644
--- a/corpus/support/token_list
+++ b/corpus/support/token_list
@@ -37,6 +37,8 @@ tel.
 10.
 
 ##################### abbreviation: words that contain period.
+EE.UU.
+ee.uu.
 U.A.E
 Ala.
 Ph.D.
-- 
cgit v1.2.3


From bd65d6a4492e172a7840c010c5414ceb6f6acd56 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>
Date: Sat, 9 Mar 2013 00:05:40 -0500
Subject: bump release

---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 402ddd0a..98deac86 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([cdec],[2013-01-20])
+AC_INIT([cdec],[2013-03-08])
 AC_CONFIG_SRCDIR([decoder/cdec.cc])
 AM_INIT_AUTOMAKE
 AC_CONFIG_HEADERS(config.h)
-- 
cgit v1.2.3


From 46da9f37aa407dd483d953c7a10cce16c83cf001 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@Chriss-MacBook-Air.local>
Date: Sat, 9 Mar 2013 00:32:41 -0500
Subject: fix readme

---
 README.md | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index d89e9558..c282d468 100644
--- a/README.md
+++ b/README.md
@@ -1,24 +1,30 @@
 `cdec` is a research platform for machine translation and similar structured prediction problems.
 
-## Installation
+## Building from a downloaded archive
 
-Build `cdec`:
-
-	autoreconf -ifv
 	./configure
 	make
 	./tests/run-system-tests.pl
 
-You will need the following libraries / tools:
+You will need the following software:
 
-- [Autoconf / Automake / Libtool](http://www.gnu.org/software/autoconf/)
-    - Older versions of GNU autotools may not work properly.
 - [Boost C++ libraries (version 1.44 or later)](http://www.boost.org/)
     - If you build your own boost, you _must install it_ using `bjam install`.
     - Older versions of Boost _may_ work, but problems have been reported with command line option parsing on some platforms with older versions.
 - [GNU Flex](http://flex.sourceforge.net/)
 
+## Building from a git clone
+
+In addition to the standard `cdec` third party requirements, you will additionally need the following software:
+
+- [Autoconf / Automake / Libtool](http://www.gnu.org/software/autoconf/)
+    - Older versions of GNU autotools may not work properly.
+	autoreconf -ifv
+	./configure
+	make
+	./tests/run-system-tests.pl
+
 ## Further information
 
-[For more information, refer to the cdec documentation](http://www.cdec-decoder.org)
+[For more information, refer to the `cdec` documentation](http://www.cdec-decoder.org)
 
-- 
cgit v1.2.3


From 25d441846880948f7809b2d0e1acd96fc947905d Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@Chriss-MacBook-Air.local>
Date: Sat, 9 Mar 2013 00:33:27 -0500
Subject: fix readme

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index c282d468..b5349836 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,7 @@ In addition to the standard `cdec` third party requirements, you will additional
 
 - [Autoconf / Automake / Libtool](http://www.gnu.org/software/autoconf/)
     - Older versions of GNU autotools may not work properly.
+
 	autoreconf -ifv
 	./configure
 	make
-- 
cgit v1.2.3


From d216d5ad8f1a31eb5eedbdf963de4dea91272526 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@Chriss-MacBook-Air.local>
Date: Sat, 9 Mar 2013 00:34:21 -0500
Subject: fix readme

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index b5349836..42d9953c 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,8 @@
 
 ## Building from a downloaded archive
 
+Instructions:
+
 	./configure
 	make
 	./tests/run-system-tests.pl
@@ -20,6 +22,8 @@ In addition to the standard `cdec` third party requirements, you will additional
 - [Autoconf / Automake / Libtool](http://www.gnu.org/software/autoconf/)
     - Older versions of GNU autotools may not work properly.
 
+Instructions:
+
 	autoreconf -ifv
 	./configure
 	make
-- 
cgit v1.2.3


From 2a13c36833c44106c9cf220443b263ea39d69436 Mon Sep 17 00:00:00 2001
From: Michael Denkowski <michael.j.denkowski@gmail.com>
Date: Mon, 11 Mar 2013 09:56:20 -0400
Subject: Moved to rulefactory

---
 python/pkg/cdec/sa/online_extractor.py | 337 ---------------------------------
 1 file changed, 337 deletions(-)
 delete mode 100755 python/pkg/cdec/sa/online_extractor.py

diff --git a/python/pkg/cdec/sa/online_extractor.py b/python/pkg/cdec/sa/online_extractor.py
deleted file mode 100755
index 03a46b3b..00000000
--- a/python/pkg/cdec/sa/online_extractor.py
+++ /dev/null
@@ -1,337 +0,0 @@
-#!/usr/bin/env python
-
-import collections, sys
-
-import cdec.configobj
-
-CAT = '[X]'  # Default non-terminal
-MAX_SIZE = 15  # Max span of a grammar rule (source)
-MAX_LEN = 5  # Max number of terminals and non-terminals in a rule (source)
-MAX_NT = 2  # Max number of non-terminals in a rule
-MIN_GAP = 1  # Min number of terminals between non-terminals (source)
-
-# Spans are _inclusive_ on both ends [i, j]
-# TODO: Replace all of this with bit vectors?
-def span_check(vec, i, j):
-    k = i
-    while k <= j:
-        if vec[k]:
-            return False
-        k += 1
-    return True
-
-def span_flip(vec, i, j):
-    k = i
-    while k <= j:
-        vec[k] = ~vec[k]
-        k += 1
-
-# Next non-terminal
-def next_nt(nt):
-     if not nt:
-         return 1
-     return nt[-1][0] + 1
-
-class NonTerminal:
-    def __init__(self, index):
-        self.index = index
-    def __str__(self):
-        return '[X,{0}]'.format(self.index)
-
-def fmt_rule(f_sym, e_sym, links):
-    a_str = ' '.join('{0}-{1}'.format(i, j) for (i, j) in links)
-    return '[X] ||| {0} ||| {1} ||| {2}'.format(' '.join(str(sym) for sym in f_sym),
-                                                ' '.join(str(sym) for sym in e_sym),
-                                                a_str)
-
-class OnlineGrammarExtractor:
-
-    def __init__(self, config=None):
-        if isinstance(config, str) or isinstance(config, unicode):
-            if not os.path.exists(config):
-                raise IOError('cannot read configuration from {0}'.format(config))
-            config = cdec.configobj.ConfigObj(config, unrepr=True)
-        elif not config:
-            config = collections.defaultdict(lambda: None)
-        self.category = CAT
-        self.max_size = MAX_SIZE
-        self.max_length = config['max_len'] or MAX_LEN
-        self.max_nonterminals = config['max_nt'] or MAX_NT
-        self.min_gap_size = MIN_GAP
-        # Hard coded: require at least one aligned word
-        # Hard coded: require tight phrases
-
-        # Phrase counts
-        self.phrases_f = collections.defaultdict(lambda: 0)
-        self.phrases_e = collections.defaultdict(lambda: 0)
-        self.phrases_fe = collections.defaultdict(lambda: collections.defaultdict(lambda: 0))
-
-        # Bilexical counts
-        self.bilex_f = collections.defaultdict(lambda: 0)
-        self.bilex_e = collections.defaultdict(lambda: 0)
-        self.bilex_fe = collections.defaultdict(lambda: collections.defaultdict(lambda: 0))
-
-    # Aggregate bilexical counts
-    def aggr_bilex(self, f_words, e_words):
-
-        for e_w in e_words:
-            self.bilex_e[e_w] += 1
-
-        for f_w in f_words:
-            self.bilex_f[f_w] += 1
-            for e_w in e_words:
-                self.bilex_fe[f_w][e_w] += 1
-
-    # Aggregate stats from a training instance:
-    # Extract hierarchical phrase pairs
-    # Update bilexical counts
-    def add_instance(self, f_words, e_words, alignment):
-
-        # Bilexical counts
-        self.aggr_bilex(f_words, e_words)
-
-        # Phrase pairs extracted from this instance
-        phrases = set()
-
-        f_len = len(f_words)
-        e_len = len(e_words)
-
-        # Pre-compute alignment info
-        al = [[] for i in range(f_len)]
-        al_span = [[f_len + 1, -1] for i in range(f_len)]
-        for (f, e) in alignment:
-            al[f].append(e)
-            al_span[f][0] = min(al_span[f][0], e)
-            al_span[f][1] = max(al_span[f][1], e)
-
-        # Target side word coverage
-        # TODO: Does Cython do bit vectors?
-        cover = [0] * e_len
-
-        # Extract all possible hierarchical phrases starting at a source index
-        # f_ i and j are current, e_ i and j are previous
-        def extract(f_i, f_j, e_i, e_j, wc, links, nt, nt_open):
-            # Phrase extraction limits
-            if wc + len(nt) > self.max_length or (f_j + 1) > f_len or \
-                    (f_j - f_i) + 1 > self.max_size:
-                return
-            # Unaligned word
-            if not al[f_j]:
-                # Open non-terminal: extend
-                if nt_open:
-                    nt[-1][2] += 1
-                    extract(f_i, f_j + 1, e_i, e_j, wc, links, nt, True)
-                    nt[-1][2] -= 1
-                # No open non-terminal: extend with word
-                else:
-                    extract(f_i, f_j + 1, e_i, e_j, wc + 1, links, nt, False)
-                return
-            # Aligned word
-            link_i = al_span[f_j][0]
-            link_j = al_span[f_j][1]
-            new_e_i = min(link_i, e_i)
-            new_e_j = max(link_j, e_j)
-            # Open non-terminal: close, extract, extend
-            if nt_open:
-                # Close non-terminal, checking for collisions
-                old_last_nt = nt[-1][:]
-                nt[-1][2] = f_j
-                if link_i < nt[-1][3]:
-                    if not span_check(cover, link_i, nt[-1][3] - 1):
-                        nt[-1] = old_last_nt
-                        return
-                    span_flip(cover, link_i, nt[-1][3] - 1)
-                    nt[-1][3] = link_i
-                if link_j > nt[-1][4]:
-                    if not span_check(cover, nt[-1][4] + 1, link_j):
-                        nt[-1] = old_last_nt
-                        return
-                    span_flip(cover, nt[-1][4] + 1, link_j)
-                    nt[-1][4] = link_j
-                for rule in self.form_rules(f_i, new_e_i, f_words[f_i:f_j + 1], e_words[new_e_i:new_e_j + 1], nt, links):
-                    phrases.add(rule)
-                extract(f_i, f_j + 1, new_e_i, new_e_j, wc, links, nt, False)
-                nt[-1] = old_last_nt
-                if link_i < nt[-1][3]:
-                    span_flip(cover, link_i, nt[-1][3] - 1)
-                if link_j > nt[-1][4]:
-                    span_flip(cover, nt[-1][4] + 1, link_j)
-                return
-            # No open non-terminal
-            # Extract, extend with word
-            collision = False
-            for link in al[f_j]:
-                if cover[link]:
-                    collision = True
-            # Collisions block extraction and extension, but may be okay for
-            # continuing non-terminals
-            if not collision:
-                plus_links = []
-                for link in al[f_j]:
-                    plus_links.append((f_j, link))
-                    cover[link] = ~cover[link]
-                links.append(plus_links)
-                for rule in self.form_rules(f_i, new_e_i, f_words[f_i:f_j + 1], e_words[new_e_i:new_e_j + 1], nt, links):
-                    phrases.add(rule)
-                extract(f_i, f_j + 1, new_e_i, new_e_j, wc + 1, links, nt, False)
-                links.pop()
-                for link in al[f_j]:
-                    cover[link] = ~cover[link]
-            # Try to add a word to a (closed) non-terminal, extract, extend
-            if nt and nt[-1][2] == f_j - 1:
-                # Add to non-terminal, checking for collisions
-                old_last_nt = nt[-1][:]
-                nt[-1][2] = f_j
-                if link_i < nt[-1][3]:
-                    if not span_check(cover, link_i, nt[-1][3] - 1):
-                        nt[-1] = old_last_nt
-                        return
-                    span_flip(cover, link_i, nt[-1][3] - 1)
-                    nt[-1][3] = link_i
-                if link_j > nt[-1][4]:
-                    if not span_check(cover, nt[-1][4] + 1, link_j):
-                        nt[-1] = old_last_nt
-                        return
-                    span_flip(cover, nt[-1][4] + 1, link_j)
-                    nt[-1][4] = link_j
-                # Require at least one word in phrase
-                if links:
-                    for rule in self.form_rules(f_i, new_e_i, f_words[f_i:f_j + 1], e_words[new_e_i:new_e_j + 1], nt, links):
-                        phrases.add(rule)
-                extract(f_i, f_j + 1, new_e_i, new_e_j, wc, links, nt, False)
-                nt[-1] = old_last_nt
-                if new_e_i < nt[-1][3]:
-                    span_flip(cover, link_i, nt[-1][3] - 1)
-                if link_j > nt[-1][4]:
-                    span_flip(cover, nt[-1][4] + 1, link_j)
-            # Try to start a new non-terminal, extract, extend
-            if (not nt or f_j - nt[-1][2] > 1) and len(nt) < self.max_nonterminals:
-                # Check for collisions
-                if not span_check(cover, link_i, link_j):
-                    return
-                span_flip(cover, link_i, link_j)
-                nt.append([next_nt(nt), f_j, f_j, link_i, link_j])
-                # Require at least one word in phrase
-                if links:
-                    for rule in self.form_rules(f_i, new_e_i, f_words[f_i:f_j + 1], e_words[new_e_i:new_e_j + 1], nt, links):
-                        phrases.add(rule)
-                extract(f_i, f_j + 1, new_e_i, new_e_j, wc, links, nt, False)
-                nt.pop()
-                span_flip(cover, link_i, link_j)
-            # TODO: try adding NT to start, end, both
-            # check: one aligned word on boundary that is not part of a NT
-
-        # Try to extract phrases from every f index
-        f_i = 0
-        while f_i < f_len:
-            # Skip if phrases won't be tight on left side
-            if not al[f_i]:
-                f_i += 1
-                continue
-            extract(f_i, f_i, f_len + 1, -1, 1, [], [], False)
-            f_i += 1
-
-        for rule in sorted(phrases):
-            print rule
-
-    # Create a rule from source, target, non-terminals, and alignments
-    def form_rules(self, f_i, e_i, f_span, e_span, nt, al):
-    
-        # This could be more efficient but is unlikely to be the bottleneck
-    
-        rules = []
-    
-        nt_inv = sorted(nt, cmp=lambda x, y: cmp(x[3], y[3]))
-    
-        f_sym = f_span[:]
-        off = f_i
-        for next_nt in nt:
-            nt_len = (next_nt[2] - next_nt[1]) + 1
-            i = 0
-            while i < nt_len:
-                f_sym.pop(next_nt[1] - off)
-                i += 1
-            f_sym.insert(next_nt[1] - off, NonTerminal(next_nt[0]))
-            off += (nt_len - 1)
-    
-        e_sym = e_span[:]
-        off = e_i
-        for next_nt in nt_inv:
-            nt_len = (next_nt[4] - next_nt[3]) + 1
-            i = 0
-            while i < nt_len:
-                e_sym.pop(next_nt[3] - off)
-                i += 1
-            e_sym.insert(next_nt[3] - off, NonTerminal(next_nt[0]))
-            off += (nt_len - 1)
-    
-        # Adjusting alignment links takes some doing
-        links = [list(link) for sub in al for link in sub]
-        links_len = len(links)
-        nt_len = len(nt)
-        nt_i = 0
-        off = f_i
-        i = 0
-        while i < links_len:
-            while nt_i < nt_len and links[i][0] > nt[nt_i][1]:
-                off += (nt[nt_i][2] - nt[nt_i][1])
-                nt_i += 1
-            links[i][0] -= off
-            i += 1
-        nt_i = 0
-        off = e_i
-        i = 0
-        while i < links_len:
-            while nt_i < nt_len and links[i][1] > nt_inv[nt_i][3]:
-                off += (nt_inv[nt_i][4] - nt_inv[nt_i][3])
-                nt_i += 1
-            links[i][1] -= off
-            i += 1
-    
-        # Rule
-        rules.append(fmt_rule(f_sym, e_sym, links))
-        if len(f_sym) >= self.max_length or len(nt) >= self.max_nonterminals:
-            return rules
-        last_index = nt[-1][0] if nt else 0
-        # Rule [X]
-        if not nt or not isinstance(f_sym[-1], NonTerminal):
-            f_sym.append(NonTerminal(last_index + 1))
-            e_sym.append(NonTerminal(last_index + 1))
-            rules.append(fmt_rule(f_sym, e_sym, links))
-            f_sym.pop()
-            e_sym.pop()
-        # [X] Rule
-        if not nt or not isinstance(f_sym[0], NonTerminal):
-            for sym in f_sym:
-                if isinstance(sym, NonTerminal):
-                    sym.index += 1
-            for sym in e_sym:
-                if isinstance(sym, NonTerminal):
-                    sym.index += 1
-            for link in links:
-                link[0] += 1
-                link[1] += 1
-            f_sym.insert(0, NonTerminal(1))
-            e_sym.insert(0, NonTerminal(1))
-            rules.append(fmt_rule(f_sym, e_sym, links))
-        if len(f_sym) >= self.max_length or len(nt) + 1 >= self.max_nonterminals:
-            return rules
-        # [X] Rule [X]
-        if not nt or not isinstance(f_sym[-1], NonTerminal):
-            f_sym.append(NonTerminal(last_index + 2))
-            e_sym.append(NonTerminal(last_index + 2))
-            rules.append(fmt_rule(f_sym, e_sym, links))
-        return rules
-
-def main(argv):
-
-    extractor = OnlineGrammarExtractor()
-
-    for line in sys.stdin:
-        print >> sys.stderr, line.strip()
-        f_words, e_words, a_str = (x.split() for x in line.split('|||'))
-        alignment = sorted(tuple(int(y) for y in x.split('-')) for x in a_str)
-        extractor.add_instance(f_words, e_words, alignment)
-
-if __name__ == '__main__':
-    main(sys.argv)
-- 
cgit v1.2.3


From bf9ee90b00ebc1fc4f3ce16cb33bdbd1032675c9 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <p@simianer.de>
Date: Mon, 11 Mar 2013 15:30:09 +0100
Subject: parallelize.rb: proper command line arguments

---
 training/dtrain/parallelize.rb | 46 +++++++++++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index 23f2a7ed..50c966d7 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -1,12 +1,30 @@
 #!/usr/bin/env ruby
 
+require 'trollop'
 
-if ARGV.size != 8
-  STDERR.write "Usage: "
-  STDERR.write "ruby parallelize.rb <dtrain.ini> <epochs> <rand=true|false> <#shards|predef> <at once> <input> <refs> <qsub>\n"
-  exit
+def usage
+  if ARGV.size != 8
+    STDERR.write "Usage: "
+    STDERR.write "ruby parallelize.rb -c <dtrain.ini> -e <epochs> [--randomize/-z] -s <#shards|0> -p <at once> -i <input> -r <refs> [--qsub/-q]\n"
+    exit 1
+  end
+end
+usage if not [12, 13, 14].include? ARGV.size
+
+opts = Trollop::options do
+  opt :config, "dtrain config file", :type => :string
+  opt :epochs, "number of epochs", :type => :int
+  opt :randomize, "randomize shards before each epoch", :type => :bool, :short => '-z', :default => false
+  opt :shards, "number of shards", :type => :int
+  opt :processes_at_once, "have this number (max) running at the same time", :type => :int, :default => 9999
+  opt :input, "input", :type => :string
+  opt :references, "references", :type => :string
+  opt :qsub, "use qsub", :type => :bool, :default => false
 end
 
+puts opts.to_s
+
+
 dtrain_dir = File.expand_path File.dirname(__FILE__)
 dtrain_bin = "#{dtrain_dir}/dtrain"
 ruby       = '/usr/bin/ruby'
@@ -14,22 +32,22 @@ lplp_rb    = "#{dtrain_dir}/hstreaming/lplp.rb"
 lplp_args  = 'l2 select_k 100000'
 cat        = '/bin/cat'
 
-ini        = ARGV[0]
-epochs     = ARGV[1].to_i
+ini        = opts[:config]
+epochs     = opts[:epochs]
 rand = false
-rand = true if ARGV[2]=='true'
+rand = true if opts[:randomize]
 predefined_shards = false
-if ARGV[3] == 'predef'
+if opts[:shards] == 0
   predefined_shards = true
-  num_shards = -1
+  num_shards = 0
 else
-  num_shards = ARGV[3].to_i
+  num_shards = opts[:shards]
 end
-shards_at_once = ARGV[4].to_i
-input = ARGV[5]
-refs  = ARGV[6]
+shards_at_once = opts[:processes_at_once]
+input = opts[:input]
+refs  = opts[:references]
 use_qsub   = false
-use_qsub = true if ARGV[7]
+use_qsub = true if opts[:qsub]
 
 `mkdir work`
 
-- 
cgit v1.2.3


From 5125f56e6f0f5ee5427f2687eb5f962589ae4c5e Mon Sep 17 00:00:00 2001
From: Patrick Simianer <p@simianer.de>
Date: Mon, 11 Mar 2013 15:35:41 +0100
Subject: parallelize.rb: proper command line arguments

---
 training/dtrain/parallelize.rb | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index 50c966d7..acfd7290 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -5,7 +5,7 @@ require 'trollop'
 def usage
   if ARGV.size != 8
     STDERR.write "Usage: "
-    STDERR.write "ruby parallelize.rb -c <dtrain.ini> -e <epochs> [--randomize/-z] -s <#shards|0> -p <at once> -i <input> -r <refs> [--qsub/-q]\n"
+    STDERR.write "ruby parallelize.rb -c <dtrain.ini> -e <epochs> [--randomize/-z] -s <#shards|0> -p <at once> -i <input> -r <refs> [--qsub/-q] --dtrain_binary <path to dtrain binary>\n"
     exit 1
   end
 end
@@ -20,13 +20,18 @@ opts = Trollop::options do
   opt :input, "input", :type => :string
   opt :references, "references", :type => :string
   opt :qsub, "use qsub", :type => :bool, :default => false
+  opt :dtrain_binary, "path to dtrain binary", :type => :string
 end
 
 puts opts.to_s
 
 
 dtrain_dir = File.expand_path File.dirname(__FILE__)
-dtrain_bin = "#{dtrain_dir}/dtrain"
+if not opts[:dtrain_binary]
+  dtrain_bin = "#{dtrain_dir}/dtrain"
+else
+  dtrain_bin = opts[:dtrain_binary]
+end
 ruby       = '/usr/bin/ruby'
 lplp_rb    = "#{dtrain_dir}/hstreaming/lplp.rb"
 lplp_args  = 'l2 select_k 100000'
-- 
cgit v1.2.3


From 2482bc590bc38b0256322c52e135672a222e84d0 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <p@simianer.de>
Date: Mon, 11 Mar 2013 15:57:34 +0100
Subject: parallelize.rb: proper command line arguments

---
 training/dtrain/parallelize.rb | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index acfd7290..30fb0008 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -21,6 +21,7 @@ opts = Trollop::options do
   opt :references, "references", :type => :string
   opt :qsub, "use qsub", :type => :bool, :default => false
   opt :dtrain_binary, "path to dtrain binary", :type => :string
+  opt :lplp_args, "arguments for lplp arguments", :type => :string, :default => "l2 select_k 100000"
 end
 
 puts opts.to_s
@@ -34,7 +35,7 @@ else
 end
 ruby       = '/usr/bin/ruby'
 lplp_rb    = "#{dtrain_dir}/hstreaming/lplp.rb"
-lplp_args  = 'l2 select_k 100000'
+lplp_args  = opts[:lplp_args]
 cat        = '/bin/cat'
 
 ini        = opts[:config]
-- 
cgit v1.2.3


From a72761780f54734ba20800fd5f099032fa1cd947 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <p@simianer.de>
Date: Mon, 11 Mar 2013 16:00:42 +0100
Subject: bla

---
 training/dtrain/parallelize.rb | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index 30fb0008..a1826e98 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -9,7 +9,7 @@ def usage
     exit 1
   end
 end
-usage if not [12, 13, 14].include? ARGV.size
+usage if not [11, 12, 13, 14].include? ARGV.size
 
 opts = Trollop::options do
   opt :config, "dtrain config file", :type => :string
@@ -24,8 +24,6 @@ opts = Trollop::options do
   opt :lplp_args, "arguments for lplp arguments", :type => :string, :default => "l2 select_k 100000"
 end
 
-puts opts.to_s
-
 
 dtrain_dir = File.expand_path File.dirname(__FILE__)
 if not opts[:dtrain_binary]
-- 
cgit v1.2.3


From 33e25664131ddd9507b1c9eef2fb238e1e8840a1 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>
Date: Mon, 11 Mar 2013 17:06:38 -0400
Subject: russian ortho norm

---
 word-aligner/ortho-norm/ru.pl | 44 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100755 word-aligner/ortho-norm/ru.pl

diff --git a/word-aligner/ortho-norm/ru.pl b/word-aligner/ortho-norm/ru.pl
new file mode 100755
index 00000000..34452d06
--- /dev/null
+++ b/word-aligner/ortho-norm/ru.pl
@@ -0,0 +1,44 @@
+#!/usr/bin/perl -w
+use strict;
+use utf8;
+binmode(STDIN,":utf8");
+binmode(STDOUT,":utf8");
+while(<STDIN>) {
+  $_ = uc $_;
+  s/А/a/g;
+  s/І/i/g;
+  s/Б/b/g;
+  s/В/v/g;
+  s/Г/g/g;
+  s/Д/d/g;
+  s/Е/e/g;
+  s/Ж/zh/g;
+  s/З/z/g;
+  s/И/i/g;
+  s/Й/i/g;
+  s/К/k/g;
+  s/Л/l/g;
+  s/М/m/g;
+  s/Н/n/g;
+  s/О/o/g;
+  s/П/p/g;
+  s/Р/r/g;
+  s/С/s/g;
+  s/Т/t/g;
+  s/У/u/g;
+  s/Ф/f/g;
+  s/Х/kh/g;
+  s/Ц/c/g;
+  s/Ч/ch/g;
+  s/Ш/sh/g;
+  s/Щ/shch/g;
+  s/Ъ//g;
+  s/Ы//g;
+  s/Ь//g;
+  s/Э/e/g;
+  s/Ю/yo/g;
+  s/Я/ya/g;
+  $_ = lc $_;
+  print;
+}
+
-- 
cgit v1.2.3


From 68fbe21c181d6804ebce52058ebccd1a0d77444c Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@Chriss-MacBook-Air.local>
Date: Thu, 14 Mar 2013 23:38:41 -0400
Subject: source path features

---
 decoder/Makefile.am       |  2 ++
 decoder/cdec_ff.cc        |  2 ++
 decoder/ff_source_path.cc | 40 ++++++++++++++++++++++++++++++++++++++++
 decoder/ff_source_path.h  | 26 ++++++++++++++++++++++++++
 4 files changed, 70 insertions(+)
 create mode 100644 decoder/ff_source_path.cc
 create mode 100644 decoder/ff_source_path.h

diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index 6499b38b..82b50f19 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -60,6 +60,7 @@ libcdec_a_SOURCES = \
   ff_rules.h \
   ff_ruleshape.h \
   ff_sample_fsa.h \
+  ff_source_path.h \
   ff_source_syntax.h \
   ff_spans.h \
   ff_tagger.h \
@@ -140,6 +141,7 @@ libcdec_a_SOURCES = \
   ff_wordalign.cc \
   ff_csplit.cc \
   ff_tagger.cc \
+  ff_source_path.cc \
   ff_source_syntax.cc \
   ff_bleu.cc \
   ff_factory.cc \
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index 3ab0f9f6..a60f2c33 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -14,6 +14,7 @@
 #include "ff_rules.h"
 #include "ff_ruleshape.h"
 #include "ff_bleu.h"
+#include "ff_source_path.h"
 #include "ff_source_syntax.h"
 #include "ff_register.h"
 #include "ff_charset.h"
@@ -70,6 +71,7 @@ void register_feature_functions() {
   ff_registry.Register("InputIndicator", new FFFactory<InputIndicator>);
   ff_registry.Register("LexicalTranslationTrigger", new FFFactory<LexicalTranslationTrigger>);
   ff_registry.Register("WordPairFeatures", new FFFactory<WordPairFeatures>);
+  ff_registry.Register("SourthPathFeatures", new FFFactory<SourcePathFeatures>);
   ff_registry.Register("WordSet", new FFFactory<WordSet>);
   ff_registry.Register("Dwarf", new FFFactory<Dwarf>);
   ff_registry.Register("External", new FFFactory<ExternalFeature>);
diff --git a/decoder/ff_source_path.cc b/decoder/ff_source_path.cc
new file mode 100644
index 00000000..d5fa6bb3
--- /dev/null
+++ b/decoder/ff_source_path.cc
@@ -0,0 +1,40 @@
+#include "ff_source_path.h"
+
+#include "hg.h"
+
+using namespace std;
+
+SourcePathFeatures::SourcePathFeatures(const string& param) : FeatureFunction(4) {}
+
+void SourcePathFeatures::FireBigramFeature(WordID prev, WordID cur, SparseVector<double>* features) const {
+  int& fid = bigram_fids[prev][cur];
+  if (!fid) fid = FD::Convert("SB:"+TD::Convert(prev) + "_" + TD::Convert(cur));
+  if (fid) features->add_value(fid, 1.0);
+}
+
+void SourcePathFeatures::FireUnigramFeature(WordID cur, SparseVector<double>* features) const {
+  int& fid = unigram_fids[cur];
+  if (!fid) fid = FD::Convert("SU:" + TD::Convert(cur));
+  if (fid) features->add_value(fid, 1.0);
+}
+
+void SourcePathFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+                                               const HG::Edge& edge,
+                                               const vector<const void*>& ant_contexts,
+                                               SparseVector<double>* features,
+                                               SparseVector<double>* estimated_features,
+                                               void* context) const {
+  WordID* res = reinterpret_cast<WordID*>(context);
+  const vector<int>& f = edge.rule_->f();
+  int prev = 0;
+  for (unsigned i = 0; i < f.size(); ++i) {
+    int cur = f[i];
+    if (cur <= 0)
+      cur = *reinterpret_cast<const WordID*>(ant_contexts[cur]);
+    else
+      FireUnigramFeature(cur, features);
+    if (prev) FireBigramFeature(prev, cur, features);
+    prev = cur;
+  }
+  *res = prev;
+}
diff --git a/decoder/ff_source_path.h b/decoder/ff_source_path.h
new file mode 100644
index 00000000..03126412
--- /dev/null
+++ b/decoder/ff_source_path.h
@@ -0,0 +1,26 @@
+#ifndef _FF_SOURCE_PATH_H_
+#define _FF_SOURCE_PATH_H_
+
+#include <vector>
+#include <map>
+#include "ff.h"
+
+class SourcePathFeatures : public FeatureFunction {
+ public:
+  SourcePathFeatures(const std::string& param);
+ protected:
+  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+                                     const HG::Edge& edge,
+                                     const std::vector<const void*>& ant_contexts,
+                                     SparseVector<double>* features,
+                                     SparseVector<double>* estimated_features,
+                                     void* context) const;
+
+ private:
+  void FireBigramFeature(WordID prev, WordID cur, SparseVector<double>* features) const;
+  void FireUnigramFeature(WordID cur, SparseVector<double>* features) const;
+  mutable std::map<WordID, std::map<WordID, int> > bigram_fids;
+  mutable std::map<WordID, int> unigram_fids;
+};
+
+#endif
-- 
cgit v1.2.3


From 37162522a07551b261c445a92245445c5458f759 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>
Date: Thu, 14 Mar 2013 23:46:46 -0400
Subject: fix source path

---
 decoder/cdec_ff.cc        | 2 +-
 decoder/ff_source_path.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index a60f2c33..0bf441d4 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -71,7 +71,7 @@ void register_feature_functions() {
   ff_registry.Register("InputIndicator", new FFFactory<InputIndicator>);
   ff_registry.Register("LexicalTranslationTrigger", new FFFactory<LexicalTranslationTrigger>);
   ff_registry.Register("WordPairFeatures", new FFFactory<WordPairFeatures>);
-  ff_registry.Register("SourthPathFeatures", new FFFactory<SourcePathFeatures>);
+  ff_registry.Register("SourcePathFeatures", new FFFactory<SourcePathFeatures>);
   ff_registry.Register("WordSet", new FFFactory<WordSet>);
   ff_registry.Register("Dwarf", new FFFactory<Dwarf>);
   ff_registry.Register("External", new FFFactory<ExternalFeature>);
diff --git a/decoder/ff_source_path.cc b/decoder/ff_source_path.cc
index d5fa6bb3..56cbfc48 100644
--- a/decoder/ff_source_path.cc
+++ b/decoder/ff_source_path.cc
@@ -30,7 +30,7 @@ void SourcePathFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
   for (unsigned i = 0; i < f.size(); ++i) {
     int cur = f[i];
     if (cur <= 0)
-      cur = *reinterpret_cast<const WordID*>(ant_contexts[cur]);
+      cur = *reinterpret_cast<const WordID*>(ant_contexts[-cur]);
     else
       FireUnigramFeature(cur, features);
     if (prev) FireBigramFeature(prev, cur, features);
-- 
cgit v1.2.3


From cba324bee086f4dd0fc9df204d33ff7eb9b6c323 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>
Date: Fri, 15 Mar 2013 00:17:55 -0400
Subject: fix bug

---
 decoder/ff_source_path.cc | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/decoder/ff_source_path.cc b/decoder/ff_source_path.cc
index 56cbfc48..2a3bee2e 100644
--- a/decoder/ff_source_path.cc
+++ b/decoder/ff_source_path.cc
@@ -4,7 +4,7 @@
 
 using namespace std;
 
-SourcePathFeatures::SourcePathFeatures(const string& param) : FeatureFunction(4) {}
+SourcePathFeatures::SourcePathFeatures(const string& param) : FeatureFunction(sizeof(int)) {}
 
 void SourcePathFeatures::FireBigramFeature(WordID prev, WordID cur, SparseVector<double>* features) const {
   int& fid = bigram_fids[prev][cur];
@@ -27,10 +27,11 @@ void SourcePathFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
   WordID* res = reinterpret_cast<WordID*>(context);
   const vector<int>& f = edge.rule_->f();
   int prev = 0;
+  unsigned ntc = 0;
   for (unsigned i = 0; i < f.size(); ++i) {
     int cur = f[i];
-    if (cur <= 0)
-      cur = *reinterpret_cast<const WordID*>(ant_contexts[-cur]);
+    if (cur < 0)
+      cur = *reinterpret_cast<const WordID*>(ant_contexts[ntc++]);
     else
       FireUnigramFeature(cur, features);
     if (prev) FireBigramFeature(prev, cur, features);
@@ -38,3 +39,4 @@ void SourcePathFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
   }
   *res = prev;
 }
+
-- 
cgit v1.2.3


From fe8ad704d0b5ecf06c798d75d54789e6532fd3c1 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <simianer@cl.uni-heidelberg.de>
Date: Fri, 15 Mar 2013 09:28:04 +0100
Subject: resharding

---
 training/dtrain/parallelize.rb | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index a1826e98..fca9b10d 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -5,16 +5,16 @@ require 'trollop'
 def usage
   if ARGV.size != 8
     STDERR.write "Usage: "
-    STDERR.write "ruby parallelize.rb -c <dtrain.ini> -e <epochs> [--randomize/-z] -s <#shards|0> -p <at once> -i <input> -r <refs> [--qsub/-q] --dtrain_binary <path to dtrain binary>\n"
+    STDERR.write "ruby parallelize.rb -c <dtrain.ini> -e <epochs> [--randomize/-z] [--reshard/-y] -s <#shards|0> -p <at once> -i <input> -r <refs> [--qsub/-q] --dtrain_binary <path to dtrain binary> -l \"l2 select_k 100000\"\n"
     exit 1
   end
 end
-usage if not [11, 12, 13, 14].include? ARGV.size
 
 opts = Trollop::options do
   opt :config, "dtrain config file", :type => :string
   opt :epochs, "number of epochs", :type => :int
   opt :randomize, "randomize shards before each epoch", :type => :bool, :short => '-z', :default => false
+  opt :reshard, "reshard after each epoch", :type => :bool, :short => '-y', :default => false
   opt :shards, "number of shards", :type => :int
   opt :processes_at_once, "have this number (max) running at the same time", :type => :int, :default => 9999
   opt :input, "input", :type => :string
@@ -40,6 +40,8 @@ ini        = opts[:config]
 epochs     = opts[:epochs]
 rand = false
 rand = true if opts[:randomize]
+reshard = false
+reshard = true if opts[:reshard]
 predefined_shards = false
 if opts[:shards] == 0
   predefined_shards = true
@@ -142,7 +144,7 @@ end
   end
   `#{cat} work/weights.*.#{epoch} > work/weights_cat`
   `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat > work/weights.#{epoch}`
-  if rand and epoch+1!=epochs
+  if rand and reshard and epoch+1!=epochs
     input_files, refs_files = make_shards input, refs, num_shards, epoch+1, rand
   end
 }
-- 
cgit v1.2.3


From 72b07dfc1534862aea06c102b4382513183ce253 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <p@simianer.de>
Date: Fri, 15 Mar 2013 09:56:26 +0100
Subject: added fixed BLEU+1

---
 training/dtrain/dtrain.cc |  2 ++
 training/dtrain/score.cc  | 31 ++++++++++++++++++++++++++++++-
 training/dtrain/score.h   |  5 +++++
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index b317c365..53487d34 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -163,6 +163,8 @@ main(int argc, char** argv)
     scorer = dynamic_cast<BleuScorer*>(new BleuScorer);
   } else if (scorer_str == "stupid_bleu") {
     scorer = dynamic_cast<StupidBleuScorer*>(new StupidBleuScorer);
+  } else if (scorer_str == "fixed_stupid_bleu") {
+    scorer = dynamic_cast<FixedStupidBleuScorer*>(new FixedStupidBleuScorer);
   } else if (scorer_str == "smooth_bleu") {
     scorer = dynamic_cast<SmoothBleuScorer*>(new SmoothBleuScorer);
   } else if (scorer_str == "sum_bleu") {
diff --git a/training/dtrain/score.cc b/training/dtrain/score.cc
index 34fc86a9..96d6e10a 100644
--- a/training/dtrain/score.cc
+++ b/training/dtrain/score.cc
@@ -49,7 +49,7 @@ BleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
  *        for Machine Translation"
  * (Lin & Och '04)
  *
- * NOTE: 0 iff no 1gram match
+ * NOTE: 0 iff no 1gram match ('grounded')
  */
 score_t
 StupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
@@ -73,6 +73,35 @@ StupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
   return  brevity_penalty(hyp_len, ref_len) * exp(sum);
 }
 
+/*
+ * fixed 'stupid' bleu
+ *
+ * as in "Optimizing for Sentence-Level BLEU+1
+ *        Yields Short Translations"
+ * (Nakov et al. '12)
+ */
+score_t
+FixedStupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+                        const unsigned /*rank*/, const unsigned /*src_len*/)
+{
+  unsigned hyp_len = hyp.size(), ref_len = ref.size();
+  if (hyp_len == 0 || ref_len == 0) return 0.;
+  NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+  unsigned M = N_;
+  vector<score_t> v = w_;
+  if (ref_len < N_) {
+    M = ref_len;
+    for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M);
+  }
+  score_t sum = 0, add = 0;
+  for (unsigned i = 0; i < M; i++) {
+    if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.;
+    if (i == 1) add = 1;
+    sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add)));
+  }
+  return  brevity_penalty(hyp_len, ref_len+1) * exp(sum); // <- fix
+}
+
 /*
  * smooth bleu
  *
diff --git a/training/dtrain/score.h b/training/dtrain/score.h
index f317c903..bddaa071 100644
--- a/training/dtrain/score.h
+++ b/training/dtrain/score.h
@@ -148,6 +148,11 @@ struct StupidBleuScorer : public LocalScorer
   score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
 };
 
+struct FixedStupidBleuScorer : public LocalScorer
+{
+  score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+};
+
 struct SmoothBleuScorer : public LocalScorer
 {
   score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
-- 
cgit v1.2.3


From 529c8f0671ce0b09c2a797278a8f84242c86465d Mon Sep 17 00:00:00 2001
From: Patrick Simianer <p@simianer.de>
Date: Fri, 15 Mar 2013 10:29:13 +0100
Subject: removed hadoop/hstreaming mode

---
 training/dtrain/README.md                          |  28 +----
 training/dtrain/dtrain.cc                          | 121 +------------------
 training/dtrain/dtrain.h                           |   8 +-
 training/dtrain/hstreaming/avg.rb                  |  32 -----
 training/dtrain/hstreaming/cdec.ini                |  22 ----
 training/dtrain/hstreaming/dtrain.ini              |  15 ---
 training/dtrain/hstreaming/dtrain.sh               |   9 --
 training/dtrain/hstreaming/hadoop-streaming-job.sh |  30 -----
 training/dtrain/hstreaming/lplp.rb                 | 131 ---------------------
 training/dtrain/hstreaming/red-test                |   9 --
 training/dtrain/lplp.rb                            | 131 +++++++++++++++++++++
 training/dtrain/parallelize.rb                     |   4 +-
 training/dtrain/test/example/cdec.ini              |   2 +-
 13 files changed, 144 insertions(+), 398 deletions(-)
 delete mode 100755 training/dtrain/hstreaming/avg.rb
 delete mode 100644 training/dtrain/hstreaming/cdec.ini
 delete mode 100644 training/dtrain/hstreaming/dtrain.ini
 delete mode 100755 training/dtrain/hstreaming/dtrain.sh
 delete mode 100755 training/dtrain/hstreaming/hadoop-streaming-job.sh
 delete mode 100755 training/dtrain/hstreaming/lplp.rb
 delete mode 100644 training/dtrain/hstreaming/red-test
 create mode 100755 training/dtrain/lplp.rb

diff --git a/training/dtrain/README.md b/training/dtrain/README.md
index 7edabbf1..2ab2f232 100644
--- a/training/dtrain/README.md
+++ b/training/dtrain/README.md
@@ -13,36 +13,18 @@ Builds when building cdec, see ../BUILDING .
 To build only parts needed for dtrain do
 ```
   autoreconf -ifv
-  ./configure [--disable-gtest]
-  cd dtrain/; make
+  ./configure
+  cd training/dtrain/; make
 ```
 
 Running
 -------
-To run this on a dev set locally:
-```
-    #define DTRAIN_LOCAL
-```
-otherwise remove that line or undef, then recompile. You need a single
-grammar file or input annotated with per-sentence grammars (psg) as you
-would use with cdec. Additionally you need to give dtrain a file with
-references (--refs) when running locally.
-
-The input for use with hadoop streaming looks like this:
-```
-    <sid>\t<source>\t<ref>\t<grammar rules separated by \t>
-```
-To convert a psg to this format you need to replace all "\n"
-by "\t". Make sure there are no tabs in your data.
-
-For an example of local usage (with the 'distributed' format)
-the see test/example/ . This expects dtrain to be built without
-DTRAIN_LOCAL.
+See directories under test/ .
 
 Legal
 -----
-Copyright (c) 2012 by Patrick Simianer <p@simianer.de>
+Copyright (c) 2012-2013 by Patrick Simianer <p@simianer.de>
 
-See the file ../LICENSE.txt for the licensing terms that this software is
+See the file LICENSE.txt in the root folder for the licensing terms that this software is
 released under.
 
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index 53487d34..dfb5b351 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -12,9 +12,7 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
     ("decoder_config",    po::value<string>(),                                                      "configuration file for cdec")
     ("print_weights",     po::value<string>(),                                               "weights to print on each iteration")
     ("stop_after",        po::value<unsigned>()->default_value(0),                                 "stop after X input sentences")
-    ("tmp",               po::value<string>()->default_value("/tmp"),                                           "temp dir to use")
     ("keep",              po::value<bool>()->zero_tokens(),                               "keep weights files for each iteration")
-    ("hstreaming",        po::value<string>(),                                   "run in hadoop streaming mode, arg is a task id")
     ("epochs",            po::value<unsigned>()->default_value(10),                               "# of iterations T (per shard)")
     ("k",                 po::value<unsigned>()->default_value(100),                            "how many translations to sample")
     ("sample_from",       po::value<string>()->default_value("kbest"),     "where to sample translations from: 'kbest', 'forest'")
@@ -28,16 +26,14 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
     ("gamma",             po::value<weight_t>()->default_value(0.),                            "gamma for SVM (0 for perceptron)")
     ("select_weights",    po::value<string>()->default_value("last"),     "output best, last, avg weights ('VOID' to throw away)")
     ("rescale",           po::value<bool>()->zero_tokens(),                              "rescale weight vector after each input")
-    ("l1_reg",            po::value<string>()->default_value("none"),      "apply l1 regularization as in 'Tsuroka et al' (2010)")
+    ("l1_reg",            po::value<string>()->default_value("none"), "apply l1 regularization as in 'Tsuroka et al' (2010) UNTESTED")
     ("l1_reg_strength",   po::value<weight_t>(),                                                     "l1 regularization strength")
     ("fselect",           po::value<weight_t>()->default_value(-1), "select top x percent (or by threshold) of features after each epoch NOT IMPLEMENTED") // TODO
     ("approx_bleu_d",     po::value<score_t>()->default_value(0.9),                                   "discount for approx. BLEU")
     ("scale_bleu_diff",   po::value<bool>()->zero_tokens(),                      "learning rate <- bleu diff of a misranked pair")
     ("loss_margin",       po::value<weight_t>()->default_value(0.),  "update if no error in pref pair but model scores this near")
     ("max_pairs",         po::value<unsigned>()->default_value(std::numeric_limits<unsigned>::max()), "max. # of pairs per Sent.")
-#ifdef DTRAIN_LOCAL
     ("refs,r",            po::value<string>(),                                                         "references in local mode")
-#endif
     ("noup",              po::value<bool>()->zero_tokens(),                                               "do not update weights");
   po::options_description cl("Command Line Options");
   cl.add_options()
@@ -55,16 +51,6 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
     cerr << cl << endl;
     return false;
   }
-  if (cfg->count("hstreaming") && (*cfg)["output"].as<string>() != "-") {
-    cerr << "When using 'hstreaming' the 'output' param should be '-'." << endl;
-    return false;
-  }
-#ifdef DTRAIN_LOCAL
-  if ((*cfg)["input"].as<string>() == "-") {
-    cerr << "Can't use stdin as input with this binary. Recompile without DTRAIN_LOCAL" << endl;
-    return false;
-  }
-#endif
   if ((*cfg)["sample_from"].as<string>() != "kbest"
        && (*cfg)["sample_from"].as<string>() != "forest") {
     cerr << "Wrong 'sample_from' param: '" << (*cfg)["sample_from"].as<string>() << "', use 'kbest' or 'forest'." << endl;
@@ -111,17 +97,8 @@ main(int argc, char** argv)
   if (cfg.count("verbose")) verbose = true;
   bool noup = false;
   if (cfg.count("noup")) noup = true;
-  bool hstreaming = false;
-  string task_id;
-  if (cfg.count("hstreaming")) {
-    hstreaming = true;
-    quiet = true;
-    task_id = cfg["hstreaming"].as<string>();
-    cerr.precision(17);
-  }
   bool rescale = false;
   if (cfg.count("rescale")) rescale = true;
-  HSReporter rep(task_id);
   bool keep = false;
   if (cfg.count("keep")) keep = true;
 
@@ -224,16 +201,8 @@ main(int argc, char** argv)
   // buffer input for t > 0
   vector<string> src_str_buf;          // source strings (decoder takes only strings)
   vector<vector<WordID> > ref_ids_buf; // references as WordID vecs
-  // where temp files go
-  string tmp_path = cfg["tmp"].as<string>();
-#ifdef DTRAIN_LOCAL
   string refs_fn = cfg["refs"].as<string>();
   ReadFile refs(refs_fn);
-#else
-  string grammar_buf_fn = gettmpf(tmp_path, "dtrain-grammars");
-  ogzstream grammar_buf_out;
-  grammar_buf_out.open(grammar_buf_fn.c_str());
-#endif
 
   unsigned in_sz = std::numeric_limits<unsigned>::max(); // input index, input size
   vector<pair<score_t, score_t> > all_scores;
@@ -270,9 +239,7 @@ main(int argc, char** argv)
     cerr << setw(25) << "max pairs " << max_pairs << endl;
     cerr << setw(25) << "cdec cfg " << "'" << cfg["decoder_config"].as<string>() << "'" << endl;
     cerr << setw(25) << "input " << "'" << input_fn << "'" << endl;
-#ifdef DTRAIN_LOCAL
     cerr << setw(25) << "refs " << "'" << refs_fn << "'" << endl;
-#endif
     cerr << setw(25) << "output " << "'" << output_fn << "'" << endl;
     if (cfg.count("input_weights"))
       cerr << setw(25) << "weights in " << "'" << cfg["input_weights"].as<string>() << "'" << endl;
@@ -285,14 +252,10 @@ main(int argc, char** argv)
   for (unsigned t = 0; t < T; t++) // T epochs
   {
 
-  if (hstreaming) cerr << "reporter:status:Iteration #" << t+1 << " of " << T << endl;
-
   time_t start, end;
   time(&start);
-#ifndef DTRAIN_LOCAL
   igzstream grammar_buf_in;
   if (t > 0) grammar_buf_in.open(grammar_buf_fn.c_str());
-#endif
   score_t score_sum = 0.;
   score_t model_sum(0);
   unsigned ii = 0, rank_errors = 0, margin_violations = 0, npairs = 0, f_count = 0, list_sz = 0;
@@ -340,52 +303,6 @@ main(int argc, char** argv)
 
     // getting input
     vector<WordID> ref_ids; // reference as vector<WordID>
-#ifndef DTRAIN_LOCAL
-    vector<string> in_split; // input: sid\tsrc\tref\tpsg
-    if (t == 0) {
-      // handling input
-      split_in(in, in_split);
-      if (hstreaming && ii == 0) cerr << "reporter:counter:" << task_id << ",First ID," << in_split[0] << endl;
-      // getting reference
-      vector<string> ref_tok;
-      boost::split(ref_tok, in_split[2], boost::is_any_of(" "));
-      register_and_convert(ref_tok, ref_ids);
-      ref_ids_buf.push_back(ref_ids);
-      // process and set grammar
-      bool broken_grammar = true; // ignore broken grammars
-      for (string::iterator it = in.begin(); it != in.end(); it++) {
-        if (!isspace(*it)) {
-          broken_grammar = false;
-          break;
-        }
-      }
-      if (broken_grammar) {
-        cerr << "Broken grammar for " << ii+1 << "! Ignoring this input." << endl;
-        continue;
-      }
-      boost::replace_all(in, "\t", "\n");
-      in += "\n";
-      grammar_buf_out << in << DTRAIN_GRAMMAR_DELIM << " " << in_split[0] << endl;
-      decoder.AddSupplementalGrammarFromString(in);
-      src_str_buf.push_back(in_split[1]);
-      // decode
-      observer->SetRef(ref_ids);
-      decoder.Decode(in_split[1], observer);
-    } else {
-      // get buffered grammar
-      string grammar_str;
-      while (true) {
-        string rule;
-        getline(grammar_buf_in, rule);
-        if (boost::starts_with(rule, DTRAIN_GRAMMAR_DELIM)) break;
-        grammar_str += rule + "\n";
-      }
-      decoder.AddSupplementalGrammarFromString(grammar_str);
-      // decode
-      observer->SetRef(ref_ids_buf[ii]);
-      decoder.Decode(src_str_buf[ii], observer);
-    }
-#else
     if (t == 0) {
       string r_;
       getline(*refs, r_);
@@ -402,7 +319,6 @@ main(int argc, char** argv)
       decoder.Decode(in, observer);
     else
       decoder.Decode(src_str_buf[ii], observer);
-#endif
 
     // get (scored) samples
     vector<ScoredHyp>* samples = observer->GetSamples();
@@ -505,11 +421,6 @@ main(int argc, char** argv)
 
     ++ii;
 
-    if (hstreaming) {
-      rep.update_counter("Seen #"+boost::lexical_cast<string>(t+1), 1u);
-      rep.update_counter("Seen", 1u);
-    }
-
   } // input loop
 
   if (average) w_average += lambdas;
@@ -518,21 +429,8 @@ main(int argc, char** argv)
 
   if (t == 0) {
     in_sz = ii; // remember size of input (# lines)
-    if (hstreaming) {
-      rep.update_counter("|Input|", ii);
-      rep.update_gcounter("|Input|", ii);
-      rep.update_gcounter("Shards", 1u);
-    }
   }
 
-#ifndef DTRAIN_LOCAL
-  if (t == 0) {
-    grammar_buf_out.close();
-  } else {
-    grammar_buf_in.close();
-  }
-#endif
-
   // print some stats
   score_t score_avg = score_sum/(score_t)in_sz;
   score_t model_avg = model_sum/(score_t)in_sz;
@@ -546,7 +444,7 @@ main(int argc, char** argv)
   }
 
   unsigned nonz = 0;
-  if (!quiet || hstreaming) nonz = (unsigned)lambdas.num_nonzero();
+  if (!quiet) nonz = (unsigned)lambdas.num_nonzero();
 
   if (!quiet) {
     cerr << _p5 << _p << "WEIGHTS" << endl;
@@ -571,16 +469,6 @@ main(int argc, char** argv)
     cerr << "           avg f count: " << f_count/(float)list_sz << endl;
   }
 
-  if (hstreaming) {
-    rep.update_counter("Score 1best avg #"+boost::lexical_cast<string>(t+1), (unsigned)(score_avg*DTRAIN_SCALE));
-    rep.update_counter("Model 1best avg #"+boost::lexical_cast<string>(t+1), (unsigned)(model_avg*DTRAIN_SCALE));
-    rep.update_counter("Pairs avg #"+boost::lexical_cast<string>(t+1), (unsigned)((npairs/(weight_t)in_sz)*DTRAIN_SCALE));
-    rep.update_counter("Rank errors avg #"+boost::lexical_cast<string>(t+1), (unsigned)((rank_errors/(weight_t)in_sz)*DTRAIN_SCALE));
-    rep.update_counter("Margin violations avg #"+boost::lexical_cast<string>(t+1), (unsigned)((margin_violations/(weight_t)in_sz)*DTRAIN_SCALE));
-    rep.update_counter("Non zero feature count #"+boost::lexical_cast<string>(t+1), nonz);
-    rep.update_gcounter("Non zero feature count #"+boost::lexical_cast<string>(t+1), nonz);
-  }
-
   pair<score_t,score_t> remember;
   remember.first = score_avg;
   remember.second = model_avg;
@@ -611,10 +499,6 @@ main(int argc, char** argv)
 
   if (average) w_average /= (weight_t)T;
 
-#ifndef DTRAIN_LOCAL
-  unlink(grammar_buf_fn.c_str());
-#endif
-
   if (!noup) {
     if (!quiet) cerr << endl << "Writing weights file to '" << output_fn << "' ..." << endl;
     if (select_weights == "last" || average) { // last, average
@@ -651,7 +535,6 @@ main(int argc, char** argv)
         }
       }
     }
-    if (output_fn == "-" && hstreaming) cout << "__SHARD_COUNT__\t1" << endl;
     if (!quiet) cerr << "done" << endl;
   }
 
diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h
index 572fd613..f368d810 100644
--- a/training/dtrain/dtrain.h
+++ b/training/dtrain/dtrain.h
@@ -1,14 +1,12 @@
 #ifndef _DTRAIN_H_
 #define _DTRAIN_H_
 
-#undef DTRAIN_FASTER_PERCEPTRON // only look at misranked pairs
-                                 // DO NOT USE WITH SVM!
-#define DTRAIN_LOCAL
+#undef DTRAIN_FASTER_PERCEPTRON // only consider actually misranked pairs
+                                // DO NOT ENABLE  WITH SVM (gamma > 0) OR loss_margin!
+
 #define DTRAIN_DOTS 10 // after how many inputs to display a '.'
-#define DTRAIN_GRAMMAR_DELIM "########EOS########"
 #define DTRAIN_SCALE 100000
 
-
 #include <iomanip>
 #include <climits>
 #include <string.h>
diff --git a/training/dtrain/hstreaming/avg.rb b/training/dtrain/hstreaming/avg.rb
deleted file mode 100755
index 2599c732..00000000
--- a/training/dtrain/hstreaming/avg.rb
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env ruby
-# first arg may be an int of custom shard count
-
-shard_count_key = "__SHARD_COUNT__"
-
-STDIN.set_encoding 'utf-8'
-STDOUT.set_encoding 'utf-8'
-
-w = {}
-c = {}
-w.default = 0
-c.default = 0
-while line = STDIN.gets
-  key, val = line.split /\s/
-  w[key] += val.to_f
-  c[key] += 1
-end
-
-if ARGV.size == 0
-  shard_count = w["__SHARD_COUNT__"]
-else
-  shard_count = ARGV[0].to_f
-end
-w.each_key { |k|
-  if k == shard_count_key
-    next
-  else
-    puts "#{k}\t#{w[k]/shard_count}"
-    #puts "# #{c[k]}"
-  end
-}
-
diff --git a/training/dtrain/hstreaming/cdec.ini b/training/dtrain/hstreaming/cdec.ini
deleted file mode 100644
index d4f5cecd..00000000
--- a/training/dtrain/hstreaming/cdec.ini
+++ /dev/null
@@ -1,22 +0,0 @@
-formalism=scfg
-add_pass_through_rules=true
-scfg_max_span_limit=15
-intersection_strategy=cube_pruning
-cubepruning_pop_limit=30
-feature_function=WordPenalty
-feature_function=KLanguageModel nc-wmt11.en.srilm.gz
-#feature_function=ArityPenalty
-#feature_function=CMR2008ReorderingFeatures
-#feature_function=Dwarf
-#feature_function=InputIndicator
-#feature_function=LexNullJump
-#feature_function=NewJump
-#feature_function=NgramFeatures
-#feature_function=NonLatinCount
-#feature_function=OutputIndicator
-#feature_function=RuleIdentityFeatures
-#feature_function=RuleNgramFeatures
-#feature_function=RuleShape
-#feature_function=SourceSpanSizeFeatures
-#feature_function=SourceWordPenalty
-#feature_function=SpanFeatures
diff --git a/training/dtrain/hstreaming/dtrain.ini b/training/dtrain/hstreaming/dtrain.ini
deleted file mode 100644
index a2c219a1..00000000
--- a/training/dtrain/hstreaming/dtrain.ini
+++ /dev/null
@@ -1,15 +0,0 @@
-input=-
-output=-
-decoder_config=cdec.ini
-tmp=/var/hadoop/mapred/local/
-epochs=1
-k=100
-N=4
-learning_rate=0.0001
-gamma=0
-scorer=stupid_bleu
-sample_from=kbest
-filter=uniq
-pair_sampling=XYX
-pair_threshold=0
-select_weights=last
diff --git a/training/dtrain/hstreaming/dtrain.sh b/training/dtrain/hstreaming/dtrain.sh
deleted file mode 100755
index 877ff94c..00000000
--- a/training/dtrain/hstreaming/dtrain.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-# script to run dtrain with a task id
-
-pushd . &>/dev/null
-cd ..
-ID=$(basename $(pwd)) # attempt_...
-popd &>/dev/null
-./dtrain -c dtrain.ini --hstreaming $ID
-
diff --git a/training/dtrain/hstreaming/hadoop-streaming-job.sh b/training/dtrain/hstreaming/hadoop-streaming-job.sh
deleted file mode 100755
index 92419956..00000000
--- a/training/dtrain/hstreaming/hadoop-streaming-job.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/sh
-
-EXP=a_simple_test
-
-# change these vars to fit your hadoop installation
-HADOOP_HOME=/usr/lib/hadoop-0.20
-JAR=contrib/streaming/hadoop-streaming-0.20.2-cdh3u1.jar
-HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR"
-
- IN=input_on_hdfs
-OUT=output_weights_on_hdfs
-
-# you can -reducer to NONE if you want to
-# do feature selection/averaging locally (e.g. to
-# keep weights of all epochs)
-$HSTREAMING \
-    -mapper "dtrain.sh" \
-    -reducer "ruby lplp.rb l2 select_k 100000" \
-    -input $IN \
-    -output $OUT \
-    -file dtrain.sh \
-    -file lplp.rb \
-    -file ../dtrain \
-    -file dtrain.ini \
-    -file cdec.ini \
-    -file ../test/example/nc-wmt11.en.srilm.gz \
-    -jobconf mapred.reduce.tasks=30 \
-    -jobconf mapred.max.map.failures.percent=0 \
-    -jobconf mapred.job.name="dtrain $EXP"
-
diff --git a/training/dtrain/hstreaming/lplp.rb b/training/dtrain/hstreaming/lplp.rb
deleted file mode 100755
index f0cd58c5..00000000
--- a/training/dtrain/hstreaming/lplp.rb
+++ /dev/null
@@ -1,131 +0,0 @@
-# lplp.rb
-
-# norms
-def l0(feature_column, n)
-  if feature_column.size >= n then return 1 else return 0 end
-end
-
-def l1(feature_column, n=-1)
-  return feature_column.map { |i| i.abs }.reduce { |sum,i| sum+i }
-end
-
-def l2(feature_column, n=-1)
-  return Math.sqrt feature_column.map { |i| i.abs2 }.reduce { |sum,i| sum+i }
-end
-
-def linfty(feature_column, n=-1)
-  return feature_column.map { |i| i.abs }.max
-end
-
-# stats
-def median(feature_column, n)
-  return feature_column.concat(0.step(n-feature_column.size-1).map{|i|0}).sort[feature_column.size/2]
-end
-
-def mean(feature_column, n)
-  return feature_column.reduce { |sum, i| sum+i } / n
-end
-
-# selection
-def select_k(weights, norm_fun, n, k=10000)
-  weights.sort{|a,b| norm_fun.call(b[1], n) <=> norm_fun.call(a[1], n)}.each { |p|
-    puts "#{p[0]}\t#{mean(p[1], n)}"
-    k -= 1
-    if k == 0 then break end
-  }
-end
-
-def cut(weights, norm_fun, n, epsilon=0.0001)
-  weights.each { |k,v|
-    if norm_fun.call(v, n).abs >= epsilon
-      puts "#{k}\t#{mean(v, n)}"
-    end
-  }
-end
-
-# test
-def _test()
-  puts
-  w = {}
-  w["a"] = [1, 2, 3]
-  w["b"] = [1, 2]
-  w["c"] = [66]
-  w["d"] = [10, 20, 30]
-  n = 3
-  puts w.to_s
-  puts
-  puts "select_k"
-  puts "l0 expect ad"
-  select_k(w, method(:l0), n, 2)
-  puts "l1 expect cd"
-  select_k(w, method(:l1), n, 2)
-  puts "l2 expect c"
-  select_k(w, method(:l2), n, 1)
-  puts
-  puts "cut"
-  puts "l1 expect cd"
-  cut(w, method(:l1), n, 7)
-  puts
-  puts "median"
-  a = [1,2,3,4,5]
-  puts a.to_s
-  puts median(a, 5)
-  puts
-  puts "#{median(a, 7)} <- that's because we add missing 0s:"
-  puts a.concat(0.step(7-a.size-1).map{|i|0}).to_s
-  puts
-  puts "mean expect bc"
-  w.clear
-  w["a"] = [2]
-  w["b"] = [2.1]
-  w["c"] = [2.2]
-  cut(w, method(:mean), 1, 2.05)
- exit
-end
-#_test()
-
-# actually do something
-def usage()
-  puts "lplp.rb <l0,l1,l2,linfty,mean,median> <cut|select_k> <k|threshold> [n] < <input>"
-  puts "   l0...: norms for selection"
-  puts "select_k: only output top k (according to the norm of their column vector) features"
-  puts "     cut: output features with weight >= threshold"
-  puts "       n: if we do not have a shard count use this number for averaging"
-  exit
-end
-
-if ARGV.size < 3 then usage end
-norm_fun = method(ARGV[0].to_sym)
-type = ARGV[1]
-x = ARGV[2].to_f
-
-shard_count_key = "__SHARD_COUNT__"
-
-STDIN.set_encoding 'utf-8'
-STDOUT.set_encoding 'utf-8'
-
-w = {}
-shard_count = 0
-while line = STDIN.gets
-  key, val = line.split /\s+/
-  if key == shard_count_key
-    shard_count += 1
-    next
-  end
-  if w.has_key? key
-    w[key].push val.to_f
-  else
-    w[key] = [val.to_f]
-  end
-end
-
-if ARGV.size == 4 then shard_count = ARGV[3].to_f end
-
-if type == 'cut'
-  cut(w, norm_fun, shard_count, x)
-elsif type == 'select_k'
-  select_k(w, norm_fun, shard_count, x)
-else
-  puts "oh oh"
-end
-
diff --git a/training/dtrain/hstreaming/red-test b/training/dtrain/hstreaming/red-test
deleted file mode 100644
index 2623d697..00000000
--- a/training/dtrain/hstreaming/red-test
+++ /dev/null
@@ -1,9 +0,0 @@
-a	1
-b	2
-c	3.5
-a	1
-b	2
-c	3.5
-d	1
-e	2
-__SHARD_COUNT__	2
diff --git a/training/dtrain/lplp.rb b/training/dtrain/lplp.rb
new file mode 100755
index 00000000..f0cd58c5
--- /dev/null
+++ b/training/dtrain/lplp.rb
@@ -0,0 +1,131 @@
+# lplp.rb
+
+# norms
+def l0(feature_column, n)
+  if feature_column.size >= n then return 1 else return 0 end
+end
+
+def l1(feature_column, n=-1)
+  return feature_column.map { |i| i.abs }.reduce { |sum,i| sum+i }
+end
+
+def l2(feature_column, n=-1)
+  return Math.sqrt feature_column.map { |i| i.abs2 }.reduce { |sum,i| sum+i }
+end
+
+def linfty(feature_column, n=-1)
+  return feature_column.map { |i| i.abs }.max
+end
+
+# stats
+def median(feature_column, n)
+  return feature_column.concat(0.step(n-feature_column.size-1).map{|i|0}).sort[feature_column.size/2]
+end
+
+def mean(feature_column, n)
+  return feature_column.reduce { |sum, i| sum+i } / n
+end
+
+# selection
+def select_k(weights, norm_fun, n, k=10000)
+  weights.sort{|a,b| norm_fun.call(b[1], n) <=> norm_fun.call(a[1], n)}.each { |p|
+    puts "#{p[0]}\t#{mean(p[1], n)}"
+    k -= 1
+    if k == 0 then break end
+  }
+end
+
+def cut(weights, norm_fun, n, epsilon=0.0001)
+  weights.each { |k,v|
+    if norm_fun.call(v, n).abs >= epsilon
+      puts "#{k}\t#{mean(v, n)}"
+    end
+  }
+end
+
+# test
+def _test()
+  puts
+  w = {}
+  w["a"] = [1, 2, 3]
+  w["b"] = [1, 2]
+  w["c"] = [66]
+  w["d"] = [10, 20, 30]
+  n = 3
+  puts w.to_s
+  puts
+  puts "select_k"
+  puts "l0 expect ad"
+  select_k(w, method(:l0), n, 2)
+  puts "l1 expect cd"
+  select_k(w, method(:l1), n, 2)
+  puts "l2 expect c"
+  select_k(w, method(:l2), n, 1)
+  puts
+  puts "cut"
+  puts "l1 expect cd"
+  cut(w, method(:l1), n, 7)
+  puts
+  puts "median"
+  a = [1,2,3,4,5]
+  puts a.to_s
+  puts median(a, 5)
+  puts
+  puts "#{median(a, 7)} <- that's because we add missing 0s:"
+  puts a.concat(0.step(7-a.size-1).map{|i|0}).to_s
+  puts
+  puts "mean expect bc"
+  w.clear
+  w["a"] = [2]
+  w["b"] = [2.1]
+  w["c"] = [2.2]
+  cut(w, method(:mean), 1, 2.05)
+ exit
+end
+#_test()
+
+# actually do something
+def usage()
+  puts "lplp.rb <l0,l1,l2,linfty,mean,median> <cut|select_k> <k|threshold> [n] < <input>"
+  puts "   l0...: norms for selection"
+  puts "select_k: only output top k (according to the norm of their column vector) features"
+  puts "     cut: output features with weight >= threshold"
+  puts "       n: if we do not have a shard count use this number for averaging"
+  exit
+end
+
+if ARGV.size < 3 then usage end
+norm_fun = method(ARGV[0].to_sym)
+type = ARGV[1]
+x = ARGV[2].to_f
+
+shard_count_key = "__SHARD_COUNT__"
+
+STDIN.set_encoding 'utf-8'
+STDOUT.set_encoding 'utf-8'
+
+w = {}
+shard_count = 0
+while line = STDIN.gets
+  key, val = line.split /\s+/
+  if key == shard_count_key
+    shard_count += 1
+    next
+  end
+  if w.has_key? key
+    w[key].push val.to_f
+  else
+    w[key] = [val.to_f]
+  end
+end
+
+if ARGV.size == 4 then shard_count = ARGV[3].to_f end
+
+if type == 'cut'
+  cut(w, norm_fun, shard_count, x)
+elsif type == 'select_k'
+  select_k(w, norm_fun, shard_count, x)
+else
+  puts "oh oh"
+end
+
diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index fca9b10d..24e7f49e 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -80,7 +80,7 @@ def make_shards(input, refs, num_shards, epoch, rand)
     shard_refs = File.new refs_fn, 'w+'
     refs_fns << refs_fn
     0.upto(shard_sz-1) { |i|
-      j = index.pop 
+      j = index.pop
       shard_in.write in_lines[j]
       shard_refs.write refs_lines[j]
     }
@@ -125,7 +125,7 @@ end
       if use_qsub
         qsub_str_start = "qsub -cwd -sync y -b y -j y -o work/out.#{shard}.#{epoch} -N dtrain.#{shard}.#{epoch} \""
         qsub_str_end = "\""
-        local_end = '' 
+        local_end = ''
       else
         local_end = "&>work/out.#{shard}.#{epoch}"
       end
diff --git a/training/dtrain/test/example/cdec.ini b/training/dtrain/test/example/cdec.ini
index 068ebd4d..0215416d 100644
--- a/training/dtrain/test/example/cdec.ini
+++ b/training/dtrain/test/example/cdec.ini
@@ -2,7 +2,7 @@ formalism=scfg
 add_pass_through_rules=true
 scfg_max_span_limit=15
 intersection_strategy=cube_pruning
-cubepruning_pop_limit=30
+cubepruning_pop_limit=200
 feature_function=WordPenalty
 feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz
 # all currently working feature functions for translation:
-- 
cgit v1.2.3


From 2a48d73eb794fdd736d1df035c8a31af887cde0a Mon Sep 17 00:00:00 2001
From: Patrick Simianer <p@simianer.de>
Date: Fri, 15 Mar 2013 11:31:18 +0100
Subject: overhauled ruby scripts and examples

---
 training/dtrain/dtrain.cc                          |    2 -
 training/dtrain/examples/parallelized/README       |    5 +
 training/dtrain/examples/parallelized/cdec.ini     |   22 +
 training/dtrain/examples/parallelized/dtrain.ini   |   16 +
 .../examples/parallelized/grammar/grammar.out.0.gz |  Bin 0 -> 8318 bytes
 .../examples/parallelized/grammar/grammar.out.1.gz |  Bin 0 -> 358560 bytes
 .../examples/parallelized/grammar/grammar.out.2.gz |  Bin 0 -> 1014466 bytes
 .../examples/parallelized/grammar/grammar.out.3.gz |  Bin 0 -> 391811 bytes
 .../examples/parallelized/grammar/grammar.out.4.gz |  Bin 0 -> 149590 bytes
 .../examples/parallelized/grammar/grammar.out.5.gz |  Bin 0 -> 537024 bytes
 .../examples/parallelized/grammar/grammar.out.6.gz |  Bin 0 -> 291286 bytes
 .../examples/parallelized/grammar/grammar.out.7.gz |  Bin 0 -> 1038140 bytes
 .../examples/parallelized/grammar/grammar.out.8.gz |  Bin 0 -> 419889 bytes
 .../examples/parallelized/grammar/grammar.out.9.gz |  Bin 0 -> 409140 bytes
 training/dtrain/examples/parallelized/in           |   10 +
 training/dtrain/examples/parallelized/refs         |   10 +
 training/dtrain/examples/parallelized/work/out.0.0 |   61 +
 training/dtrain/examples/parallelized/work/out.0.1 |   62 +
 training/dtrain/examples/parallelized/work/out.1.0 |   61 +
 training/dtrain/examples/parallelized/work/out.1.1 |   62 +
 .../dtrain/examples/parallelized/work/shard.0.0.in |    5 +
 .../examples/parallelized/work/shard.0.0.refs      |    5 +
 .../dtrain/examples/parallelized/work/shard.1.0.in |    5 +
 .../examples/parallelized/work/shard.1.0.refs      |    5 +
 .../dtrain/examples/parallelized/work/weights.0    |   12 +
 .../dtrain/examples/parallelized/work/weights.0.0  |   12 +
 .../dtrain/examples/parallelized/work/weights.0.1  |   12 +
 .../dtrain/examples/parallelized/work/weights.1    |   12 +
 .../dtrain/examples/parallelized/work/weights.1.0  |   11 +
 .../dtrain/examples/parallelized/work/weights.1.1  |   12 +
 training/dtrain/examples/standard/README           |    2 +
 training/dtrain/examples/standard/cdec.ini         |   26 +
 training/dtrain/examples/standard/dtrain.ini       |   24 +
 training/dtrain/examples/standard/expected-output  | 1206 ++++++++++++++++++++
 training/dtrain/examples/standard/nc-wmt11.de.gz   |  Bin 0 -> 58324 bytes
 training/dtrain/examples/standard/nc-wmt11.en.gz   |  Bin 0 -> 49600 bytes
 .../dtrain/examples/standard/nc-wmt11.en.srilm.gz  |  Bin 0 -> 16017291 bytes
 .../dtrain/examples/standard/nc-wmt11.grammar.gz   |  Bin 0 -> 1399924 bytes
 training/dtrain/examples/toy/cdec.ini              |    3 +
 training/dtrain/examples/toy/dtrain.ini            |   13 +
 training/dtrain/examples/toy/expected-output       |   77 ++
 training/dtrain/examples/toy/grammar.gz            |  Bin 0 -> 219 bytes
 training/dtrain/examples/toy/src                   |    2 +
 training/dtrain/examples/toy/tgt                   |    2 +
 training/dtrain/lplp.rb                            |   18 +-
 training/dtrain/parallelize.rb                     |   26 +-
 training/dtrain/test/example/README                |    8 -
 training/dtrain/test/example/cdec.ini              |   25 -
 training/dtrain/test/example/dtrain.ini            |   22 -
 training/dtrain/test/example/expected-output       |   89 --
 training/dtrain/test/parallelize/README            |    5 -
 training/dtrain/test/parallelize/cdec.ini          |   22 -
 training/dtrain/test/parallelize/dtrain.ini        |   15 -
 .../dtrain/test/parallelize/g/grammar.out.0.gz     |  Bin 8318 -> 0 bytes
 .../dtrain/test/parallelize/g/grammar.out.1.gz     |  Bin 358560 -> 0 bytes
 .../dtrain/test/parallelize/g/grammar.out.2.gz     |  Bin 1014466 -> 0 bytes
 .../dtrain/test/parallelize/g/grammar.out.3.gz     |  Bin 391811 -> 0 bytes
 .../dtrain/test/parallelize/g/grammar.out.4.gz     |  Bin 149590 -> 0 bytes
 .../dtrain/test/parallelize/g/grammar.out.5.gz     |  Bin 537024 -> 0 bytes
 .../dtrain/test/parallelize/g/grammar.out.6.gz     |  Bin 291286 -> 0 bytes
 .../dtrain/test/parallelize/g/grammar.out.7.gz     |  Bin 1038140 -> 0 bytes
 .../dtrain/test/parallelize/g/grammar.out.8.gz     |  Bin 419889 -> 0 bytes
 .../dtrain/test/parallelize/g/grammar.out.9.gz     |  Bin 409140 -> 0 bytes
 training/dtrain/test/parallelize/in                |   10 -
 training/dtrain/test/parallelize/refs              |   10 -
 training/dtrain/test/toy/cdec.ini                  |    2 -
 training/dtrain/test/toy/dtrain.ini                |   12 -
 training/dtrain/test/toy/input                     |    2 -
 68 files changed, 1771 insertions(+), 252 deletions(-)
 create mode 100644 training/dtrain/examples/parallelized/README
 create mode 100644 training/dtrain/examples/parallelized/cdec.ini
 create mode 100644 training/dtrain/examples/parallelized/dtrain.ini
 create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.0.gz
 create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.1.gz
 create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.2.gz
 create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.3.gz
 create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.4.gz
 create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.5.gz
 create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.6.gz
 create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.7.gz
 create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.8.gz
 create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.9.gz
 create mode 100644 training/dtrain/examples/parallelized/in
 create mode 100644 training/dtrain/examples/parallelized/refs
 create mode 100644 training/dtrain/examples/parallelized/work/out.0.0
 create mode 100644 training/dtrain/examples/parallelized/work/out.0.1
 create mode 100644 training/dtrain/examples/parallelized/work/out.1.0
 create mode 100644 training/dtrain/examples/parallelized/work/out.1.1
 create mode 100644 training/dtrain/examples/parallelized/work/shard.0.0.in
 create mode 100644 training/dtrain/examples/parallelized/work/shard.0.0.refs
 create mode 100644 training/dtrain/examples/parallelized/work/shard.1.0.in
 create mode 100644 training/dtrain/examples/parallelized/work/shard.1.0.refs
 create mode 100644 training/dtrain/examples/parallelized/work/weights.0
 create mode 100644 training/dtrain/examples/parallelized/work/weights.0.0
 create mode 100644 training/dtrain/examples/parallelized/work/weights.0.1
 create mode 100644 training/dtrain/examples/parallelized/work/weights.1
 create mode 100644 training/dtrain/examples/parallelized/work/weights.1.0
 create mode 100644 training/dtrain/examples/parallelized/work/weights.1.1
 create mode 100644 training/dtrain/examples/standard/README
 create mode 100644 training/dtrain/examples/standard/cdec.ini
 create mode 100644 training/dtrain/examples/standard/dtrain.ini
 create mode 100644 training/dtrain/examples/standard/expected-output
 create mode 100644 training/dtrain/examples/standard/nc-wmt11.de.gz
 create mode 100644 training/dtrain/examples/standard/nc-wmt11.en.gz
 create mode 100644 training/dtrain/examples/standard/nc-wmt11.en.srilm.gz
 create mode 100644 training/dtrain/examples/standard/nc-wmt11.grammar.gz
 create mode 100644 training/dtrain/examples/toy/cdec.ini
 create mode 100644 training/dtrain/examples/toy/dtrain.ini
 create mode 100644 training/dtrain/examples/toy/expected-output
 create mode 100644 training/dtrain/examples/toy/grammar.gz
 create mode 100644 training/dtrain/examples/toy/src
 create mode 100644 training/dtrain/examples/toy/tgt
 delete mode 100644 training/dtrain/test/example/README
 delete mode 100644 training/dtrain/test/example/cdec.ini
 delete mode 100644 training/dtrain/test/example/dtrain.ini
 delete mode 100644 training/dtrain/test/example/expected-output
 delete mode 100644 training/dtrain/test/parallelize/README
 delete mode 100644 training/dtrain/test/parallelize/cdec.ini
 delete mode 100644 training/dtrain/test/parallelize/dtrain.ini
 delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.0.gz
 delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.1.gz
 delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.2.gz
 delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.3.gz
 delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.4.gz
 delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.5.gz
 delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.6.gz
 delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.7.gz
 delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.8.gz
 delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.9.gz
 delete mode 100644 training/dtrain/test/parallelize/in
 delete mode 100644 training/dtrain/test/parallelize/refs
 delete mode 100644 training/dtrain/test/toy/cdec.ini
 delete mode 100644 training/dtrain/test/toy/dtrain.ini
 delete mode 100644 training/dtrain/test/toy/input

diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index dfb5b351..fcb46db2 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -254,8 +254,6 @@ main(int argc, char** argv)
 
   time_t start, end;
   time(&start);
-  igzstream grammar_buf_in;
-  if (t > 0) grammar_buf_in.open(grammar_buf_fn.c_str());
   score_t score_sum = 0.;
   score_t model_sum(0);
   unsigned ii = 0, rank_errors = 0, margin_violations = 0, npairs = 0, f_count = 0, list_sz = 0;
diff --git a/training/dtrain/examples/parallelized/README b/training/dtrain/examples/parallelized/README
new file mode 100644
index 00000000..89715105
--- /dev/null
+++ b/training/dtrain/examples/parallelized/README
@@ -0,0 +1,5 @@
+run for example
+  ../../parallelize.rb ./dtrain.ini 4 false 2 2 ./in ./refs
+
+final weights will be in the file work/weights.3
+
diff --git a/training/dtrain/examples/parallelized/cdec.ini b/training/dtrain/examples/parallelized/cdec.ini
new file mode 100644
index 00000000..e43ba1c4
--- /dev/null
+++ b/training/dtrain/examples/parallelized/cdec.ini
@@ -0,0 +1,22 @@
+formalism=scfg
+add_pass_through_rules=true
+intersection_strategy=cube_pruning
+cubepruning_pop_limit=200
+scfg_max_span_limit=15
+feature_function=WordPenalty
+feature_function=KLanguageModel ../example/nc-wmt11.en.srilm.gz
+#feature_function=ArityPenalty
+#feature_function=CMR2008ReorderingFeatures
+#feature_function=Dwarf
+#feature_function=InputIndicator
+#feature_function=LexNullJump
+#feature_function=NewJump
+#feature_function=NgramFeatures
+#feature_function=NonLatinCount
+#feature_function=OutputIndicator
+#feature_function=RuleIdentityFeatures
+#feature_function=RuleNgramFeatures
+#feature_function=RuleShape
+#feature_function=SourceSpanSizeFeatures
+#feature_function=SourceWordPenalty
+#feature_function=SpanFeatures
diff --git a/training/dtrain/examples/parallelized/dtrain.ini b/training/dtrain/examples/parallelized/dtrain.ini
new file mode 100644
index 00000000..f19ef891
--- /dev/null
+++ b/training/dtrain/examples/parallelized/dtrain.ini
@@ -0,0 +1,16 @@
+k=100
+N=4
+learning_rate=0.0001
+gamma=0
+loss_margin=1.0
+epochs=1
+scorer=stupid_bleu
+sample_from=kbest
+filter=uniq
+pair_sampling=XYX
+hi_lo=0.1
+select_weights=last
+print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
+# newer version of the grammar extractor use different feature names: 
+#print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
+decoder_config=cdec.ini
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz
new file mode 100644
index 00000000..1e28a24b
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz
new file mode 100644
index 00000000..372f5675
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz
new file mode 100644
index 00000000..145d0dc0
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz
new file mode 100644
index 00000000..105593ff
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz
new file mode 100644
index 00000000..30781f48
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz
new file mode 100644
index 00000000..834ee759
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz
new file mode 100644
index 00000000..2e76f348
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz
new file mode 100644
index 00000000..3741a887
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz
new file mode 100644
index 00000000..ebf6bd0c
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz
new file mode 100644
index 00000000..c1791059
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz differ
diff --git a/training/dtrain/examples/parallelized/in b/training/dtrain/examples/parallelized/in
new file mode 100644
index 00000000..51d01fe7
--- /dev/null
+++ b/training/dtrain/examples/parallelized/in
@@ -0,0 +1,10 @@
+<seg grammar="grammar/grammar.out.0.gz" id="0">europas nach rassen geteiltes haus</seg>
+<seg grammar="grammar/grammar.out.1.gz" id="1">ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .</seg>
+<seg grammar="grammar/grammar.out.2.gz" id="2">der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .</seg>
+<seg grammar="grammar/grammar.out.3.gz" id="3">während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .</seg>
+<seg grammar="grammar/grammar.out.4.gz" id="4">eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .</seg>
+<seg grammar="grammar/grammar.out.5.gz" id="5">die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .</seg>
+<seg grammar="grammar/grammar.out.6.gz" id="6">das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .</seg>
+<seg grammar="grammar/grammar.out.7.gz" id="7">die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .</seg>
+<seg grammar="grammar/grammar.out.8.gz" id="8">der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .</seg>
+<seg grammar="grammar/grammar.out.9.gz" id="9">genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .</seg>
diff --git a/training/dtrain/examples/parallelized/refs b/training/dtrain/examples/parallelized/refs
new file mode 100644
index 00000000..632e27b0
--- /dev/null
+++ b/training/dtrain/examples/parallelized/refs
@@ -0,0 +1,10 @@
+europe 's divided racial house
+a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge .
+the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them .
+while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon .
+an aging population at home and ever more open borders imply increasing racial fragmentation in european countries .
+mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear .
+it will not , as america 's racial history clearly shows .
+race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes .
+the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths .
+this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us .
diff --git a/training/dtrain/examples/parallelized/work/out.0.0 b/training/dtrain/examples/parallelized/work/out.0.0
new file mode 100644
index 00000000..7a00ed0f
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/out.0.0
@@ -0,0 +1,61 @@
+                cdec cfg 'cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ../example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+Seeding random number sequence to 3121929377
+
+dtrain
+Parameters:
+                       k 100
+                       N 4
+                       T 1
+                  scorer 'stupid_bleu'
+             sample from 'kbest'
+                  filter 'uniq'
+           learning rate 0.0001
+                   gamma 0
+             loss margin 1
+                   pairs 'XYX'
+                   hi lo 0.1
+          pair threshold 0
+          select weights 'last'
+                  l1 reg 0 'none'
+               max pairs 4294967295
+                cdec cfg 'cdec.ini'
+                   input 'work/shard.0.0.in'
+                    refs 'work/shard.0.0.refs'
+                  output 'work/weights.0.0'
+(a dot represents 10 inputs)
+Iteration #1 of 1.
+  5
+WEIGHTS
+              Glue = +0.2663
+       WordPenalty = -0.0079042
+     LanguageModel = +0.44782
+ LanguageModel_OOV = -0.0401
+     PhraseModel_0 = -0.193
+     PhraseModel_1 = +0.71321
+     PhraseModel_2 = +0.85196
+     PhraseModel_3 = -0.43986
+     PhraseModel_4 = -0.44803
+     PhraseModel_5 = -0.0538
+     PhraseModel_6 = -0.1788
+       PassThrough = -0.1477
+        ---
+       1best avg score: 0.17521 (+0.17521)
+ 1best avg model score: 21.556 (+21.556)
+           avg # pairs: 1671.2
+        avg # rank err: 1118.6
+     avg # margin viol: 552.6
+    non0 feature count: 12
+           avg list sz: 100
+           avg f count: 11.32
+(time 0.37 min, 4.4 s/S)
+
+Writing weights file to 'work/weights.0.0' ...
+done
+
+---
+Best iteration: 1 [SCORE 'stupid_bleu'=0.17521].
+This took 0.36667 min.
diff --git a/training/dtrain/examples/parallelized/work/out.0.1 b/training/dtrain/examples/parallelized/work/out.0.1
new file mode 100644
index 00000000..e2bd6649
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/out.0.1
@@ -0,0 +1,62 @@
+                cdec cfg 'cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ../example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+Seeding random number sequence to 2767202922
+
+dtrain
+Parameters:
+                       k 100
+                       N 4
+                       T 1
+                  scorer 'stupid_bleu'
+             sample from 'kbest'
+                  filter 'uniq'
+           learning rate 0.0001
+                   gamma 0
+             loss margin 1
+                   pairs 'XYX'
+                   hi lo 0.1
+          pair threshold 0
+          select weights 'last'
+                  l1 reg 0 'none'
+               max pairs 4294967295
+                cdec cfg 'cdec.ini'
+                   input 'work/shard.0.0.in'
+                    refs 'work/shard.0.0.refs'
+                  output 'work/weights.0.1'
+              weights in 'work/weights.0'
+(a dot represents 10 inputs)
+Iteration #1 of 1.
+  5
+WEIGHTS
+              Glue = -0.2699
+       WordPenalty = +0.080605
+     LanguageModel = -0.026572
+ LanguageModel_OOV = -0.30025
+     PhraseModel_0 = -0.32076
+     PhraseModel_1 = +0.67451
+     PhraseModel_2 = +0.92
+     PhraseModel_3 = -0.36402
+     PhraseModel_4 = -0.592
+     PhraseModel_5 = -0.0269
+     PhraseModel_6 = -0.28755
+       PassThrough = -0.33285
+        ---
+       1best avg score: 0.26638 (+0.26638)
+ 1best avg model score: 53.197 (+53.197)
+           avg # pairs: 2028.6
+        avg # rank err: 998.2
+     avg # margin viol: 918.8
+    non0 feature count: 12
+           avg list sz: 100
+           avg f count: 10.496
+(time 0.32 min, 3.8 s/S)
+
+Writing weights file to 'work/weights.0.1' ...
+done
+
+---
+Best iteration: 1 [SCORE 'stupid_bleu'=0.26638].
+This took 0.31667 min.
diff --git a/training/dtrain/examples/parallelized/work/out.1.0 b/training/dtrain/examples/parallelized/work/out.1.0
new file mode 100644
index 00000000..6e790e38
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/out.1.0
@@ -0,0 +1,61 @@
+                cdec cfg 'cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ../example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+Seeding random number sequence to 1432415010
+
+dtrain
+Parameters:
+                       k 100
+                       N 4
+                       T 1
+                  scorer 'stupid_bleu'
+             sample from 'kbest'
+                  filter 'uniq'
+           learning rate 0.0001
+                   gamma 0
+             loss margin 1
+                   pairs 'XYX'
+                   hi lo 0.1
+          pair threshold 0
+          select weights 'last'
+                  l1 reg 0 'none'
+               max pairs 4294967295
+                cdec cfg 'cdec.ini'
+                   input 'work/shard.1.0.in'
+                    refs 'work/shard.1.0.refs'
+                  output 'work/weights.1.0'
+(a dot represents 10 inputs)
+Iteration #1 of 1.
+  5
+WEIGHTS
+              Glue = -0.3815
+       WordPenalty = +0.20064
+     LanguageModel = +0.95304
+ LanguageModel_OOV = -0.264
+     PhraseModel_0 = -0.22362
+     PhraseModel_1 = +0.12254
+     PhraseModel_2 = +0.26328
+     PhraseModel_3 = +0.38018
+     PhraseModel_4 = -0.48654
+     PhraseModel_5 = +0
+     PhraseModel_6 = -0.3645
+       PassThrough = -0.2216
+        ---
+       1best avg score: 0.10863 (+0.10863)
+ 1best avg model score: -4.9841 (-4.9841)
+           avg # pairs: 1345.4
+        avg # rank err: 822.4
+     avg # margin viol: 501
+    non0 feature count: 11
+           avg list sz: 100
+           avg f count: 11.814
+(time 0.45 min, 5.4 s/S)
+
+Writing weights file to 'work/weights.1.0' ...
+done
+
+---
+Best iteration: 1 [SCORE 'stupid_bleu'=0.10863].
+This took 0.45 min.
diff --git a/training/dtrain/examples/parallelized/work/out.1.1 b/training/dtrain/examples/parallelized/work/out.1.1
new file mode 100644
index 00000000..0b984761
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/out.1.1
@@ -0,0 +1,62 @@
+                cdec cfg 'cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ../example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+Seeding random number sequence to 1771918374
+
+dtrain
+Parameters:
+                       k 100
+                       N 4
+                       T 1
+                  scorer 'stupid_bleu'
+             sample from 'kbest'
+                  filter 'uniq'
+           learning rate 0.0001
+                   gamma 0
+             loss margin 1
+                   pairs 'XYX'
+                   hi lo 0.1
+          pair threshold 0
+          select weights 'last'
+                  l1 reg 0 'none'
+               max pairs 4294967295
+                cdec cfg 'cdec.ini'
+                   input 'work/shard.1.0.in'
+                    refs 'work/shard.1.0.refs'
+                  output 'work/weights.1.1'
+              weights in 'work/weights.0'
+(a dot represents 10 inputs)
+Iteration #1 of 1.
+  5
+WEIGHTS
+              Glue = -0.3178
+       WordPenalty = +0.11092
+     LanguageModel = +0.17269
+ LanguageModel_OOV = -0.13485
+     PhraseModel_0 = -0.45371
+     PhraseModel_1 = +0.38789
+     PhraseModel_2 = +0.75311
+     PhraseModel_3 = -0.38163
+     PhraseModel_4 = -0.58817
+     PhraseModel_5 = -0.0269
+     PhraseModel_6 = -0.27315
+       PassThrough = -0.16745
+        ---
+       1best avg score: 0.13169 (+0.13169)
+ 1best avg model score: 24.226 (+24.226)
+           avg # pairs: 1951.2
+        avg # rank err: 985.4
+     avg # margin viol: 951
+    non0 feature count: 12
+           avg list sz: 100
+           avg f count: 11.224
+(time 0.42 min, 5 s/S)
+
+Writing weights file to 'work/weights.1.1' ...
+done
+
+---
+Best iteration: 1 [SCORE 'stupid_bleu'=0.13169].
+This took 0.41667 min.
diff --git a/training/dtrain/examples/parallelized/work/shard.0.0.in b/training/dtrain/examples/parallelized/work/shard.0.0.in
new file mode 100644
index 00000000..92f9c78e
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/shard.0.0.in
@@ -0,0 +1,5 @@
+<seg grammar="grammar/grammar.out.0.gz" id="0">europas nach rassen geteiltes haus</seg>
+<seg grammar="grammar/grammar.out.1.gz" id="1">ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .</seg>
+<seg grammar="grammar/grammar.out.2.gz" id="2">der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .</seg>
+<seg grammar="grammar/grammar.out.3.gz" id="3">während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .</seg>
+<seg grammar="grammar/grammar.out.4.gz" id="4">eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .</seg>
diff --git a/training/dtrain/examples/parallelized/work/shard.0.0.refs b/training/dtrain/examples/parallelized/work/shard.0.0.refs
new file mode 100644
index 00000000..bef68fee
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/shard.0.0.refs
@@ -0,0 +1,5 @@
+europe 's divided racial house
+a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge .
+the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them .
+while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon .
+an aging population at home and ever more open borders imply increasing racial fragmentation in european countries .
diff --git a/training/dtrain/examples/parallelized/work/shard.1.0.in b/training/dtrain/examples/parallelized/work/shard.1.0.in
new file mode 100644
index 00000000..b7695ce7
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/shard.1.0.in
@@ -0,0 +1,5 @@
+<seg grammar="grammar/grammar.out.5.gz" id="5">die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .</seg>
+<seg grammar="grammar/grammar.out.6.gz" id="6">das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .</seg>
+<seg grammar="grammar/grammar.out.7.gz" id="7">die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .</seg>
+<seg grammar="grammar/grammar.out.8.gz" id="8">der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .</seg>
+<seg grammar="grammar/grammar.out.9.gz" id="9">genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .</seg>
diff --git a/training/dtrain/examples/parallelized/work/shard.1.0.refs b/training/dtrain/examples/parallelized/work/shard.1.0.refs
new file mode 100644
index 00000000..6076f6d5
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/shard.1.0.refs
@@ -0,0 +1,5 @@
+mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear .
+it will not , as america 's racial history clearly shows .
+race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes .
+the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths .
+this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us .
diff --git a/training/dtrain/examples/parallelized/work/weights.0 b/training/dtrain/examples/parallelized/work/weights.0
new file mode 100644
index 00000000..ddd595a8
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.0
@@ -0,0 +1,12 @@
+LanguageModel	0.7004298992212881
+PhraseModel_2	0.5576194336478857
+PhraseModel_1	0.41787318415343155
+PhraseModel_4	-0.46728502545635164
+PhraseModel_3	-0.029839521598455515
+Glue	-0.05760000000000068
+PhraseModel_6	-0.2716499999999978
+PhraseModel_0	-0.20831031065605327
+LanguageModel_OOV	-0.15205000000000077
+PassThrough	-0.1846500000000006
+WordPenalty	0.09636994553433414
+PhraseModel_5	-0.026900000000000257
diff --git a/training/dtrain/examples/parallelized/work/weights.0.0 b/training/dtrain/examples/parallelized/work/weights.0.0
new file mode 100644
index 00000000..c9370b18
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.0.0
@@ -0,0 +1,12 @@
+WordPenalty	-0.0079041595706392243
+LanguageModel	0.44781580828279532
+LanguageModel_OOV	-0.04010000000000042
+Glue	0.26629999999999948
+PhraseModel_0	-0.19299677809125185
+PhraseModel_1	0.71321026861732773
+PhraseModel_2	0.85195540993310537
+PhraseModel_3	-0.43986310822842656
+PhraseModel_4	-0.44802855630415955
+PhraseModel_5	-0.053800000000000514
+PhraseModel_6	-0.17879999999999835
+PassThrough	-0.14770000000000036
diff --git a/training/dtrain/examples/parallelized/work/weights.0.1 b/training/dtrain/examples/parallelized/work/weights.0.1
new file mode 100644
index 00000000..8fad3de8
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.0.1
@@ -0,0 +1,12 @@
+WordPenalty	0.080605055841244472
+LanguageModel	-0.026571720531022844
+LanguageModel_OOV	-0.30024999999999141
+Glue	-0.26989999999999842
+PhraseModel_2	0.92000295209089566
+PhraseModel_1	0.67450748692470841
+PhraseModel_4	-0.5920000014976784
+PhraseModel_3	-0.36402437203127397
+PhraseModel_6	-0.28754999999999603
+PhraseModel_0	-0.32076244202907672
+PassThrough	-0.33284999999999004
+PhraseModel_5	-0.026900000000000257
diff --git a/training/dtrain/examples/parallelized/work/weights.1 b/training/dtrain/examples/parallelized/work/weights.1
new file mode 100644
index 00000000..03058a16
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.1
@@ -0,0 +1,12 @@
+PhraseModel_2	0.8365578543552836
+PhraseModel_4	-0.5900840266009169
+PhraseModel_1	0.5312000609786991
+PhraseModel_0	-0.3872342271319619
+PhraseModel_3	-0.3728279676912084
+Glue	-0.2938500000000036
+PhraseModel_6	-0.2803499999999967
+PassThrough	-0.25014999999999626
+LanguageModel_OOV	-0.21754999999999702
+LanguageModel	0.07306061161169894
+WordPenalty	0.09576193325966899
+PhraseModel_5	-0.026900000000000257
diff --git a/training/dtrain/examples/parallelized/work/weights.1.0 b/training/dtrain/examples/parallelized/work/weights.1.0
new file mode 100644
index 00000000..6a6a65c1
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.1.0
@@ -0,0 +1,11 @@
+WordPenalty	0.20064405063930751
+LanguageModel	0.9530439901597807
+LanguageModel_OOV	-0.26400000000000112
+Glue	-0.38150000000000084
+PhraseModel_0	-0.22362384322085468
+PhraseModel_1	0.12253609968953538
+PhraseModel_2	0.26328345736266612
+PhraseModel_3	0.38018406503151553
+PhraseModel_4	-0.48654149460854373
+PhraseModel_6	-0.36449999999999722
+PassThrough	-0.22160000000000085
diff --git a/training/dtrain/examples/parallelized/work/weights.1.1 b/training/dtrain/examples/parallelized/work/weights.1.1
new file mode 100644
index 00000000..f56ea4a2
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.1.1
@@ -0,0 +1,12 @@
+WordPenalty	0.1109188106780935
+LanguageModel	0.17269294375442074
+LanguageModel_OOV	-0.13485000000000266
+Glue	-0.3178000000000088
+PhraseModel_2	0.75311275661967159
+PhraseModel_1	0.38789263503268989
+PhraseModel_4	-0.58816805170415531
+PhraseModel_3	-0.38163156335114284
+PhraseModel_6	-0.27314999999999739
+PhraseModel_0	-0.45370601223484697
+PassThrough	-0.16745000000000249
+PhraseModel_5	-0.026900000000000257
diff --git a/training/dtrain/examples/standard/README b/training/dtrain/examples/standard/README
new file mode 100644
index 00000000..ce37d31a
--- /dev/null
+++ b/training/dtrain/examples/standard/README
@@ -0,0 +1,2 @@
+Call `dtrain` from this folder with ../../dtrain -c dtrain.ini .
+
diff --git a/training/dtrain/examples/standard/cdec.ini b/training/dtrain/examples/standard/cdec.ini
new file mode 100644
index 00000000..e1edc68d
--- /dev/null
+++ b/training/dtrain/examples/standard/cdec.ini
@@ -0,0 +1,26 @@
+formalism=scfg
+add_pass_through_rules=true
+scfg_max_span_limit=15
+intersection_strategy=cube_pruning
+cubepruning_pop_limit=200
+grammar=nc-wmt11.grammar.gz
+feature_function=WordPenalty
+feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz
+# all currently working feature functions for translation:
+# (with those features active that were used in the ACL paper)
+#feature_function=ArityPenalty
+#feature_function=CMR2008ReorderingFeatures
+#feature_function=Dwarf
+#feature_function=InputIndicator
+#feature_function=LexNullJump
+#feature_function=NewJump
+#feature_function=NgramFeatures
+#feature_function=NonLatinCount
+#feature_function=OutputIndicator
+feature_function=RuleIdentityFeatures
+feature_function=RuleSourceBigramFeatures
+feature_function=RuleTargetBigramFeatures
+feature_function=RuleShape
+#feature_function=SourceSpanSizeFeatures
+#feature_function=SourceWordPenalty
+#feature_function=SpanFeatures
diff --git a/training/dtrain/examples/standard/dtrain.ini b/training/dtrain/examples/standard/dtrain.ini
new file mode 100644
index 00000000..a05e9c29
--- /dev/null
+++ b/training/dtrain/examples/standard/dtrain.ini
@@ -0,0 +1,24 @@
+input=./nc-wmt11.de.gz
+refs=./nc-wmt11.en.gz
+output=-                  # a weights file (add .gz for gzip compression) or STDOUT '-'
+select_weights=avg        # output average (over epochs) weight vector
+decoder_config=./cdec.ini # config for cdec
+# weights for these features will be printed on each iteration
+print_weights= EgivenFCoherent SampleCountF CountEF MaxLexFgivenE MaxLexEgivenF IsSingletonF IsSingletonFE Glue WordPenalty PassThrough LanguageModel LanguageModel_OOV
+# newer version of the grammar extractor use different feature names: 
+#print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
+stop_after=10 # stop epoch after 10 inputs
+
+# interesting stuff
+epochs=2                # run over input 2 times
+k=100                   # use 100best lists
+N=4                     # optimize (approx) BLEU4
+scorer=stupid_bleu      # use 'stupid' BLEU+1
+learning_rate=1.0       # learning rate, don't care if gamma=0 (perceptron)
+gamma=0                 # use SVM reg
+sample_from=kbest       # use kbest lists (as opposed to forest)
+filter=uniq             # only unique entries in kbest (surface form)
+pair_sampling=XYX       #
+hi_lo=0.1               # 10 vs 80 vs 10 and 80 vs 10 here
+pair_threshold=0        # minimum distance in BLEU (here: > 0)
+loss_margin=0
diff --git a/training/dtrain/examples/standard/expected-output b/training/dtrain/examples/standard/expected-output
new file mode 100644
index 00000000..8d72f4c3
--- /dev/null
+++ b/training/dtrain/examples/standard/expected-output
@@ -0,0 +1,1206 @@
+                cdec cfg './cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ./nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+  Example feature: Shape_S00000_T00000
+Seeding random number sequence to 1511823303
+
+dtrain
+Parameters:
+                       k 100
+                       N 4
+                       T 2
+                  scorer 'stupid_bleu'
+             sample from 'kbest'
+                  filter 'uniq'
+           learning rate 1
+                   gamma 0
+             loss margin 0
+                   pairs 'XYX'
+                   hi lo 0.1
+          pair threshold 0
+          select weights 'avg'
+                  l1 reg 0 'none'
+               max pairs 4294967295
+                cdec cfg './cdec.ini'
+                   input './nc-wmt11.de.gz'
+                    refs './nc-wmt11.en.gz'
+                  output '-'
+              stop_after 10
+(a dot represents 10 inputs)
+Iteration #1 of 2.
+ . 10
+Stopping after 10 input sentences.
+WEIGHTS
+   EgivenFCoherent = +0
+      SampleCountF = +0
+           CountEF = +0
+     MaxLexFgivenE = +0
+     MaxLexEgivenF = +0
+      IsSingletonF = +0
+     IsSingletonFE = +0
+              Glue = -576
+       WordPenalty = +417.79
+       PassThrough = -1455
+     LanguageModel = +5117.5
+ LanguageModel_OOV = -1307
+        ---
+       1best avg score: 0.27697 (+0.27697)
+ 1best avg model score: -47918 (-47918)
+           avg # pairs: 1129.8
+        avg # rank err: 581.9
+     avg # margin viol: 0
+    non0 feature count: 703
+           avg list sz: 90.9
+           avg f count: 100.09
+(time 0.33 min, 2 s/S)
+
+Iteration #2 of 2.
+ . 10
+WEIGHTS
+   EgivenFCoherent = +0
+      SampleCountF = +0
+           CountEF = +0
+     MaxLexFgivenE = +0
+     MaxLexEgivenF = +0
+      IsSingletonF = +0
+     IsSingletonFE = +0
+              Glue = -622
+       WordPenalty = +898.56
+       PassThrough = -2578
+     LanguageModel = +8066.2
+ LanguageModel_OOV = -2590
+        ---
+       1best avg score: 0.37119 (+0.094226)
+ 1best avg model score: -1.3174e+05 (-83822)
+           avg # pairs: 1214.9
+        avg # rank err: 584.1
+     avg # margin viol: 0
+    non0 feature count: 1115
+           avg list sz: 91.3
+           avg f count: 90.755
+(time 0.27 min, 1.6 s/S)
+
+Writing weights file to '-' ...
+R:X:NX_sein:N1_its	61.5
+WordPenalty	658.17328732437022
+LanguageModel	6591.8747593425214
+LanguageModel_OOV	-1948.5
+R:X:das_NX:this_N1	12
+R:X:NX_sein_NX:N1_from_ever_being_able_to_N2	30
+R:X:NX_bemühen:N1_effort	2.5
+RBS:X_bemühen	2.5
+R:X:sich:sich	-17.5
+RBT:<r>_sich	-17.5
+RBT:sich_</r>	-17.5
+RBS:sich_X	17.5
+RBS:<r>_als	147
+RBS:als_</r>	-59
+Shape_S10000_T10000	-1711.5
+RBT:<r>_when	84
+R:X:zum_NX:as_N1	-134
+RBS:<r>_zum	-30
+R:X:als_NX:as_N1	63
+R:X:zum_NX:'s_N1	33
+R:X:zum_NX:the_N1	24
+RBS:X_sich	-12
+R:X:zum_NX:to_N1	-36
+R:X:zum_NX:with_the_N1	83
+R:X:NX_zum:N1_the	-66
+R:X:NX_zum:N1_to	66
+R:X:als_NX:when_N1	84
+RBS:als_das	59
+RBS:X_das	-104
+R:X:NX_das:N1_a	28.5
+R:X:er_sich_NX:he_N1	86.5
+RBS:er_sich	29.5
+R:X:NX_das:N1_it	-6
+R:X:er_sich_NX:him_N1	-57
+RBT:<r>_declared	-488
+R:X:NX_das:N1_that	-5
+RBT:declared_</r>	-8
+R:X:NX_das:N1_the	-57
+R:X:NX_das:N1_this	-17
+R:X:NX_.:N1_.	-323
+RBS:X_.	134
+R:X:NX_.:N1_debate_.	6.5
+R:X:NX_.:N1_disruptions_.	-14.5
+R:X:NX_.:N1_established_.	7.5
+R:X:NX_.:N1_heading_.	17
+R:X:NX_.:N1_on_.	94
+R:X:NX_.:N1_pace_.	51.5
+R:X:NX_das_NX:N1_a_growing_N2	-45
+R:X:general:general	-23.5
+R:X:NX_.:N1_politics_.	84
+R:X:NX_das_NX:N1_a_N2	-0.5
+R:X:NX_.:N1_power_.	-99.5
+RBS:general_</r>	-23.5
+R:X:NX_.:N1_-_range_missiles_.	-28.5
+Shape_S11000_T11000	40
+RBT:general_</r>	-23.5
+RBT:<r>_.	-645
+R:X:betrat:entered	-91
+R:X:NX_.:N1_war_.	68.5
+RBS:<r>_betrat	23.5
+Shape_S11000_T01100	475.5
+RBT:<r>_entered	-91
+RBT:entered_</r>	-91
+R:X:NX_das_NX:N1_the_N2	-2
+R:X:betrat:betrat	114.5
+RBT:<r>_betrat	114.5
+RBT:betrat_</r>	114.5
+R:X:12:12	79
+R:X:maßnahmen:action	24
+R:X:.:.	-566
+RBS:12_</r>	79
+RBS:<r>_maßnahmen	-44.5
+RBS:<r>_.	-645
+RBT:._</r>	-566
+RBT:<r>_action	24
+RBT:12_</r>	79
+RBT:action_</r>	24
+R:X:maßnahmen:actions	-13
+RBT:<r>_actions	-13
+RBT:actions_</r>	-13
+R:X:12_NX:12_N1	-79
+RBT:declared_a	-428
+RBS:12_X	-79
+RBT:a_state	-428
+RBT:state_of	-428
+R:X:maßnahmen:maßnahmen	-55.5
+R:X:internationale_NX:global_N1	-270
+RBS:X_am	316.5
+RBT:<r>_maßnahmen	-55.5
+RBS:am_</r>	267.5
+RBT:maßnahmen_</r>	-55.5
+RBS:<r>_den	883
+R:X:internationale_NX:international_N1	270
+RBS:den_X	-286.5
+R:X:NX_am:N1_of	267.5
+R:X:NX_als:N1_a	-273.5
+RBS:am_X	-281
+R:X:den_NX:'s_N1	-31
+R:X:NX_am_NX:N1_of_N2	-30
+R:X:NX_am_NX:N1_on_N2	79
+R:X:NX_als:N1_'s	273.5
+R:X:NX_betrat:N1_entered	-23.5
+R:X:ins_NX:into_the_N1	-32.5
+RBS:X_betrat	-23.5
+RBT:into_the	-55
+R:X:ins_NX:into_N1	32.5
+RBT:<r>_their	303
+R:X:general_NX:general_N1	23.5
+RBS:general_X	23.5
+RBS:<r>_am	-316.5
+R:X:den_NX:the_N1	89
+R:X:den_NX_.:the_N1_.	86.5
+R:X:NX_und:and_N1	-216
+RBS:X_und	-203.5
+RBS:und_</r>	522.5
+RBT:<r>_and	438.5
+R:X:am_NX:at_N1	23
+R:X:NX_als_das:N1_than_the	59
+R:X:NX_und:N1_-	-114
+R:X:NX_und:N1_,	114
+R:X:am_NX:of_N1	-4
+R:X:am_NX:on_N1	-158.5
+R:X:am_NX:the_N1	-190
+RBS:<r>_seine	-16.5
+RBS:seine_</r>	39
+R:X:oktober:october	-79.5
+R:X:seine:his	-5.5
+RBS:<r>_oktober	-79.5
+R:X:seine:its	50
+RBT:<r>_october	-79.5
+RBT:october_</r>	-79.5
+R:X:seine_NX:a_N1	7.5
+RBS:seine_X	-39
+R:X:NX_und_NX:and_N1_N2	-22
+RBS:und_X	160.5
+R:X:seine_NX:his_N1	-97
+R:X:seine_NX:its_N1	102.5
+R:X:NX_und_NX:N1_,_and_N2	-4
+R:X:NX_maßnahmen:N1_actions	44.5
+RBS:X_maßnahmen	44.5
+R:X:seine_NX_als:his_N1_than	5.5
+R:X:seine_NX_als:its_N1_as	-64.5
+R:X:NX_und_NX:N1_,_N2	-7
+Shape_S01100_T11000	-312.5
+RBS:und_den	-822.5
+Shape_S01100_T01100	-537.5
+Shape_S01100_T11100	15
+R:X:NX_seine:'s_N1	-5.5
+RBS:X_seine	16.5
+RBS:X_den	-38
+R:X:amerika_NX_sich_NX:america_N1_N2	-12
+R:X:NX_seine_NX:'s_N1_N2	22
+R:X:auf_NX_den_NX:to_N1_the_N2	-23
+R:X:auf_NX_den_NX:to_N1_N2	-23
+RBS:<r>_unterstützen	-716
+RBS:unterstützen_</r>	-1
+Shape_S11100_T11000	783.5
+Shape_S11100_T01100	-716
+Shape_S11100_T11100	488
+R:X:unterstützen:unterstützen	-1
+RBT:<r>_unterstützen	-1
+RBT:unterstützen_</r>	-1
+R:X:unterstützen_NX:support_N1	-715
+RBS:unterstützen_X	-715
+RBT:<r>_will	-6
+RBS:X_unterstützen	716
+RBT:<r>_if	35
+R:X:NX_den_NX_.:N1_N2_.	41
+R:X:verfassung:constitution	15
+RBS:<r>_verfassung	-43
+RBT:<r>_constitution	15
+RBT:constitution_</r>	15
+R:X:verfassung:constitutional	9.5
+RBT:<r>_constitutional	9.5
+RBS:unterstützen_.	716
+RBT:constitutional_</r>	9.5
+R:X:NX_unterstützen_.:N1_.	716
+R:X:verfassung:verfassung	-67.5
+R:X:eine_NX:an_N1	162
+RBT:<r>_verfassung	-67.5
+RBT:verfassung_</r>	-67.5
+R:X:und:,	-21.5
+R:X:,_NX_zu_NX:to_N2_N1	-153
+RBS:<r>_und	-389.5
+R:X:und:and	-35
+RBS:angeführten_</r>	-716
+RBT:and_</r>	-35
+RBT:<r>_as	63
+RBS:versucht_</r>	68
+R:X:und:with	-3
+R:X:eine_NX:is_N1	-162
+RBS:angeführten_X	716
+R:X:und:und	91
+RBT:<r>_und	91
+RBT:und_</r>	91
+R:X:versucht:tried	68
+RBT:tried_</r>	68
+RBS:versucht_X	-68
+R:X:versucht_NX:tried_N1	-68
+R:X:und_NX:and_N1	250
+R:X:und_NX:with_N1	-18
+R:X:und_NX:,_N1	-7
+R:X:und_NX:N1_and	-12
+R:X:und_den_NX:and_N1	-716
+R:X:er:he	17
+R:X:NX_eine:N1_is	-7
+RBS:<r>_er	-47.5
+RBS:er_</r>	54
+RBT:<r>_he	485.5
+RBT:he_</r>	17
+RBT:<r>_him	-1
+R:X:und_NX_.:,_N1_.	-3
+R:X:er:his	91
+R:X:und_den_NX_.:and_the_N1_.	88
+R:X:NX_eine:N1_will	7
+R:X:er:it	3
+R:X:und_den_NX_.:and_N1_.	-216.5
+R:X:er:er	-196
+RBT:<r>_er	-196
+RBT:er_</r>	-196
+RBS:er_X	8
+R:X:er_NX:he_N1	399
+R:X:er_NX:it_N1	-379
+Shape_S01010_T01010	-599
+RBS:pakistanischen_</r>	43
+R:X:NX_versucht:N1_tried	196
+RBT:<r>_pakistan	-43
+RBT:<r>_pakistani	2
+R:X:er_NX_,_NX:he_N1_N2	-12
+R:X:NX_hat_er:N1_,_he_has	196
+RBS:hat_er	196
+R:X:NX_er:he_N1	-17
+RBS:X_er	-148.5
+RBS:pakistanischen_X	-43
+R:X:NX_er:it_N1	-7
+RBS:X_verfassung	43
+R:X:NX_verfassung:N1_'s_constitution	43
+R:X:NX_hat_NX_versucht:N1_N2_has_tried	-190
+R:X:NX_hat_NX_versucht:N1_,_N2_has_tried	-6
+RBS:der_pakistanischen	43
+RBS:X_pakistanischen	-43
+RBS:<r>_aber	46
+RBS:,_als	-147
+RBT:<r>_but	-321
+R:X:aber_NX:but_N1	46
+R:X:von_NX_angeführten:N1_-_led	-716
+R:X:von_NX_angeführten_NX:N1_-_led_N2	716
+RBS:,_aber	-114
+RBS:X_aber	68
+R:X:,_als_NX:,_as_N1	-40
+R:X:NX_aber_NX_,:N1_N2_to	68
+R:X:NX_pakistanischen_NX_.:pakistan_N1_N2_.	-43
+R:X:NX_,_aber_NX:N1_,_N2	-114
+RBS:<r>_rahmen	43
+RBS:rahmen_</r>	43
+R:X:rahmen:within	20
+R:X:rahmen:rahmen	23
+RBT:<r>_rahmen	23
+RBT:rahmen_</r>	23
+Shape_S01110_T11010	35.5
+R:X:NX_der_pakistanischen:N1_pakistan	43
+Shape_S01110_T01110	-1195
+Shape_S01110_T11110	-6.5
+R:X:NX_,_NX_er:N1_N2_he	-33
+RBS:geben_X	-577.5
+RBS:<r>_gestalten	196
+Shape_S01110_T01011	278
+RBS:gestalten_</r>	196
+RBS:geben_und	577.5
+R:X:gestalten:more	221
+Shape_S01110_T01111	-181.5
+RBT:<r>_more	221
+RBT:more_</r>	221
+R:X:gestalten:gestalten	-25
+RBT:<r>_gestalten	-25
+RBT:gestalten_</r>	-25
+R:X:effektiver:effectively	-151
+RBS:<r>_effektiver	54
+RBS:effektiver_</r>	-221
+RBT:<r>_effectively	-151
+RBT:effectively_</r>	-151
+R:X:effektiver:effektiver	-99
+RBT:<r>_effektiver	-99
+RBT:effektiver_</r>	-99
+Shape_S11110_T11010	-1130
+RBS:zu_geben	-107.5
+R:X:effektiver_zu_NX:N1_effectively	304
+RBS:effektiver_zu	221
+RBS:X_geben	107.5
+Shape_S11110_T01110	621
+Shape_S11110_T11110	-75
+RBS:X_gestalten	-196
+R:X:NX_gestalten_.:N1_.	-196
+RBS:gestalten_.	-196
+R:X:terror:terror	672
+RBS:<r>_terror	-16
+RBS:terror_</r>	640
+R:X:den:-	-4
+RBT:<r>_terror	136
+RBT:terror_</r>	646
+RBS:den_</r>	42.5
+R:X:den:for	-11.5
+R:X:terror:terrorism	-54
+RBT:<r>_terrorism	-54
+Shape_S11110_T11011	-4.5
+RBT:terrorism_</r>	-54
+R:X:terror_NX:terror_N1	-634
+R:X:den:of	-17
+RBS:terror_X	-640
+R:X:den:'s	32.5
+Shape_S11110_T01111	-1.5
+R:X:NX_effektiver:N1_more_effectively	29
+RBS:X_effektiver	-54
+R:X:den:the	68
+R:X:NX_geben_und:N1_and	107.5
+R:X:NX_effektiver_zu_NX:N1_N2_effectively	-83
+R:X:den:to	-33
+RBS:1999_</r>	-302.5
+R:X:,_NX_zu_geben_NX:to_N1_N2	-577.5
+R:X:den:with	-10
+RBS:X_terror	-4.5
+R:X:,_NX_zu_geben_und:to_N1_and	470
+R:X:NX_1999:N1_1999	-302.5
+R:X:NX_1999_NX:N2_N1_1999	302.5
+RBS:1999_X	302.5
+R:X:den_NX_zu:to_N1	783.5
+R:X:NX_rahmen_der:N1_the	-43
+RBS:X_rahmen	-43
+RBS:rahmen_der	-43
+RBS:gegen_</r>	22.5
+R:X:gegen:against	-2
+RBT:<r>_against	-2
+RBT:against_</r>	-2
+R:X:._NX:._N1	-79
+RBS:._X	-79.5
+RBS:gegen_den	-22.5
+R:X:NX_._oktober:october_N1	79.5
+RBS:._oktober	79.5
+R:X:am_NX_._NX:the_N2_N1	-0.5
+R:X:gegen_den_NX:on_N1	2
+RBS:den_terror	20.5
+RBT:on_terror	-26
+R:X:NX_den_terror:the_N1_terror	29
+R:X:den_NX_den_NX:the_N1_N2	-110.5
+R:X:den_NX_den_NX:N2_the_N1	-95
+RBT:<unk>_the	-1.5
+R:X:krieg:war	-4.5
+RBS:<r>_krieg	-22
+R:X:musharraf:musharraf	43
+RBS:krieg_</r>	-4.5
+RBT:<r>_war	-22
+RBS:<r>_musharraf	66.5
+RBS:musharraf_</r>	-23.5
+RBT:war_</r>	-4.5
+R:X:musharraf_NX:musharraf_imposed_N1	23.5
+RBS:musharraf_X	23.5
+RBT:musharraf_imposed	23.5
+RBS:krieg_gegen	4.5
+R:X:musharraf_NX:musharraf_N1	107
+R:X:krieg_gegen:war_on	24.5
+RBT:war_on	-17.5
+RBS:X_gegen	-4.5
+R:X:musharraf_NX_,_als_NX:musharraf_N1_as_N2	-20
+R:X:musharraf_NX_,_als_NX:musharraf_N1_N2	-87
+R:X:krieg_gegen_den_NX:war_on_N1	-16
+R:X:krieg_gegen_den_terror:war_on_terror	-26
+R:X:pervez:pervez	22
+RBS:<r>_pervez	22
+RBS:pervez_</r>	57.5
+RBS:X_krieg	22
+RBT:<r>_pervez	22
+RBT:pervez_</r>	22
+RBS:pervez_musharraf	-57.5
+RBS:X_musharraf	-9
+R:X:NX_musharraf:N1_musharraf	-9
+R:X:den_NX_gegen_den:the_N1_on	-4.5
+R:X:den_NX_den_terror:the_N1_terror	-3
+R:X:NX_krieg_gegen_den_terror:N1_war_on_terror	22
+R:X:den_NX_den_terror_NX:N2_the_N1_terror	-1.5
+RBT:<r>_project	91
+RBS:hat_</r>	2
+RBS:X_-	14
+R:X:NX_-:,_N1	48.5
+R:X:NX_-:N1_months_of	32
+R:X:NX_-:N1_relief_and	64
+R:X:NX_-:N1_'s	-144.5
+RBS:hat_X	-198
+R:X:und_NX_terror_NX:and_N2_N1_terror	-4.5
+RBT:and_<unk>	-4.5
+R:X:sorgen:bring	-19
+RBS:X_pervez	-22
+RBT:<r>_bring	-19
+RBT:bring_</r>	-19
+R:X:sorgen:ensure	19
+RBT:<r>_ensure	19
+RBT:ensure_</r>	19
+R:X:NX_-_NX:N1_N2_security	-4
+R:X:NX_projekt_NX:N2_N1_project	-156
+R:X:NX_-_NX_.:N1_N2_.	18
+R:X:NX_projekt_NX_.:N2_N1_project_.	156
+RBS:<r>_-	-14
+RBT:to_ensure	0.5
+R:X:NX_hat:has_N1	-5
+R:X:NX_hat:N1_,	3
+R:X:NX_hat:,_N1	21.5
+R:X:NX_hat:N1_has	-17
+R:X:NX_hat:N1_is	-0.5
+R:X:-_NX:of_N1	-26
+R:X:-_NX:'s_N1	-58
+R:X:NX_hat_NX:N1_,_N2	-73
+R:X:NX_hat_NX:N1_N2_has	28
+R:X:-_NX:-_N1	122
+R:X:NX_hat_NX:N1_,_N2_has	21
+R:X:-_NX:--_N1	-21
+R:X:-_NX:,_N1	-31
+R:X:stabilität:stability	-118
+RBS:<r>_stabilität	-129
+RBT:<r>_stability	-118
+RBT:stability_</r>	-118
+R:X:stabilität:stabilität	-11
+RBT:<r>_stabilität	-11
+RBT:stabilität_</r>	-11
+RBT:<r>_country	253
+RBS:<r>_für	101
+RBS:für_</r>	129
+RBS:X_ihres	-16
+R:X:NX_ihres_NX:N1_of_their_N2	-16
+R:X:für:that	129
+RBT:<r>_political	-16
+RBS:für_X	-129
+R:X:,_NX_und_NX:,_N1_N2	-2
+R:X:für_NX:to_N1	-28
+R:X:NX_stabilität:N1_stability	129
+RBS:X_stabilität	129
+RBS:X_für	22
+RBT:<unk>_with	-109
+RBS:,_für	-123
+R:X:,_für_NX:,_N1	15.5
+R:X:,_NX_den_NX_zu:to_N2_N1	69
+R:X:NX_für_NX_.:N1_N2_.	22
+RBS:<r>_ihres	16
+R:X:ihres_NX:its_N1	-50
+R:X:ihres_NX:their_N1	66
+R:X:NX_zu_verkaufen_NX:sell_N1_N2	140.5
+RBS:verkaufen_X	140.5
+RBS:<r>_würde	-204
+RBS:würde_</r>	-117
+R:X:würde:would	-204
+RBS:würde_X	126
+R:X:in_NX_hat_NX:in_N1_N2	22
+R:X:NX_dem_NX_pervez:N1_N2_pervez	35.5
+RBS:<r>_halten	284
+RBS:halten_</r>	204
+R:X:NX_dem_NX_pervez_musharraf:N1_N2_pervez_musharraf	-57.5
+Shape_S01111_T01011	560.5
+Shape_S01111_T11011	-20.5
+Shape_S01111_T01111	-5
+RBT:<r>_maintain	30
+R:X:halten:halten	284
+RBT:<r>_halten	284
+RBT:halten_</r>	284
+RBS:halten_X	-204
+R:X:NX_würde:if_N1	35
+RBS:X_würde	204
+R:X:NX_würde:will_N1	-6
+Shape_S11111_T11010	69
+R:X:NX_würde:would_face_a_N1	-9.5
+RBT:would_face	-18.5
+RBT:face_a	-18.5
+Shape_S11111_T11110	-57
+R:X:NX_würde:would_N1	78
+R:X:NX_würde:N1_will	-10.5
+R:X:NX_würde_NX:would_N1_N2	126
+R:X:NX_würde_.:would_face_a_N1_.	-9
+RBS:würde_.	-9
+PhraseModel_0	-2973.8953021225416
+R:X:vielleicht:may	-177
+PhraseModel_1	-4012.0052074229625
+PhraseModel_2	-1203.5725821427027
+RBS:vielleicht_</r>	-284
+PhraseModel_3	2747.8420998127522
+PhraseModel_4	-3205.3163436680484
+PhraseModel_5	720.5
+PhraseModel_6	275
+R:X:vielleicht:vielleicht	-107
+RBT:<r>_vielleicht	-107
+RBT:vielleicht_</r>	-107
+R:X:vielleicht_NX:perhaps_N1	284
+RBS:vielleicht_X	284
+R:X:NX_halten:maintain_the_N1	-29
+RBS:X_halten	-284
+RBT:maintain_the	-174
+R:X:NX_halten:N1_hold	-51
+R:X:NX_halten_NX:N2_maintain_the_N1	-204
+RBT:<unk>_maintain	-204
+RBS:<r>_versprechen	30
+RBS:versprechen_</r>	-75
+RBT:<r>_commitment	107
+R:X:versprechen_NX:commitment_N1	30
+RBS:versprechen_X	75
+R:X:NX_versprechen:N1_commitment	-75
+RBS:X_versprechen	-30
+R:X:NX_,_für_NX:N1_,_N2	-138.5
+R:X:NX_versprechen_NX:N1_commitment_N2	45
+RBS:<r>_dass	-451
+RBS:dass_</r>	-91.5
+R:X:dass_NX:that_N1	-451
+RBS:dass_X	91.5
+R:X:NX_er_sein:N1_to_make_up_for_his	-91.5
+RBS:er_sein	-91.5
+R:X:seine_NX_und:a_N1_,	-15
+R:X:NX_,_NX_und:N1_N2_,	129
+RBS:,_dass	851.5
+R:X:NX_,_dass:N1_keep	-27
+R:X:NX_,_dass:N1_said_that	-0.5
+R:X:NX_,_dass:N1_to_let	-9.5
+R:X:NX_dass:that_N1	-8.5
+RBS:X_dass	-400.5
+R:X:NX_dass:N1_let	-51.5
+R:X:NX_dass:N1_see	-243.5
+R:X:NX_dass:N1_thought	-97
+R:X:NX_,_dass_NX:N1_that_N2	134
+Glue	-599
+PassThrough	-2016.5
+R:X:musharrafs:his	2
+RBS:musharrafs_</r>	-29
+R:X:NX_und_den:N1_and_the	22
+RBT:<r>_his	250.5
+RBT:his_</r>	160.5
+R:X:musharrafs:musharraf	-1.5
+RBT:<r>_musharraf	135.5
+RBT:musharraf_</r>	41.5
+R:X:NX_,_dass_NX_.:N1_N2_.	91.5
+R:X:musharrafs:musharrafs	-29.5
+RBT:<r>_musharrafs	-29.5
+RBT:musharrafs_</r>	-29.5
+RBS:sie_X	346
+RBS:<r>_X	-1369.5
+R:X:dies:so	-74.5
+RBS:X_</r>	-1743
+RBS:dies_</r>	-348
+R:X:dies:so_,_this	47
+RBT:so_,	47
+R:X:sie_NX:it_N1	22
+RBT:,_this	47
+R:X:dies:that	-256.5
+R:X:NX_?:N1_?	-134.5
+R:X:dies:these	-5.5
+RBS:X_?	-235
+RBT:<r>_these	-5.5
+RBT:these_</r>	-5.5
+R:X:NX_?:N1_consulting_?	-100.5
+R:X:dies:this	-58.5
+R:X:letzter_NX:last_N1	-14
+RBS:<r>_letzter	-20
+RBS:letzter_X	19.5
+RBT:<r>_last	-2
+R:X:letzter:last	7
+RBS:letzter_</r>	-19.5
+R:X:sein:be	1.5
+RBT:last_</r>	7
+R:X:letzter:late	11.5
+RBT:<r>_they	-6
+RBS:sein_</r>	68
+RBT:<r>_late	11.5
+R:X:ist_NX:be_N1	464.5
+RBT:<r>_be	-10.5
+RBT:late_</r>	11.5
+R:X:sie_NX:they_N1	-22
+RBS:<r>_ist	415.5
+RBT:be_</r>	120
+R:X:letzter:letzter	-24.5
+RBS:ist_X	8
+R:X:sein:being	-16
+RBT:<r>_letzter	-24.5
+R:X:ist_NX:has_N1	16
+RBT:<r>_being	-79
+RBT:letzter_</r>	-24.5
+R:X:ist_NX:is_at_N1	6
+RBT:being_</r>	-16
+R:X:musharrafs_NX:his_N1	-25
+R:X:sein:his	73
+RBS:musharrafs_X	29
+R:X:ist_NX:is_well_N1	6
+R:X:sein:its	-15.5
+R:X:musharrafs_NX:musharraf_'s_N1	77.5
+R:X:sein:sein	55
+RBT:musharraf_'s	55.5
+R:X:ist_NX:is_N1	23
+RBT:<r>_sein	55
+R:X:musharrafs_NX:musharraf_N1	-23.5
+R:X:ist_NX:more_N1	-130.5
+RBT:sein_</r>	55
+R:X:NX_letzter:N1_late	-26.5
+R:X:ist_NX:N1_be	176
+R:X:ziel:aim	-32.5
+RBS:X_letzter	20
+R:X:ist_NX:N1_has	-67
+RBS:<r>_ziel	-143
+R:X:NX_letzter:N1_'s_last	13
+R:X:ist_NX:N1_is	-19
+RBS:ziel_</r>	-219
+R:S:NS_NX:N1_N2	-599
+R:X:ist_NX:N1_,_is	18
+RBT:<r>_aim	-32.5
+RBS:<r>_S	-599
+R:X:ist_NX:N1_it_is	49
+RBT:aim_</r>	-32.5
+RBS:S_X	-599
+R:X:ist:are	-65.5
+R:X:ziel:goal	45
+R:X:NX_letzter_NX:N1_'s_last_N2	33.5
+RBS:ist_</r>	-8
+RBT:<r>_goal	45
+R:X:?:?	235
+RBT:goal_</r>	45
+RBS:<r>_?	235
+R:X:ziel:target	-22.5
+RBT:<r>_?	235
+RBS:X__	-347
+RBT:<r>_target	-22.5
+RBT:?_</r>	235
+RBT:target_</r>	-22.5
+R:X:ist:'s	-61
+R:X:ziel:targets	-18
+RBS:in_</r>	-22
+RBT:<r>_targets	-18
+RBT:targets_</r>	-18
+RBT:<r>_,	24.5
+R:X:ziel:ziel	-125
+RBT:,_</r>	-38
+R:X:NX___NX:N1___N2	-347
+R:X:dies_NX:so_N1	200
+RBT:<r>_ziel	-125
+RBS:dies_X	256
+RBT:ziel_</r>	-125
+RBT:<r>_at	23
+R:X:dies_NX:this_to_N1	156.5
+R:X:ziel_NX:goal_N1	49
+RBT:this_to	156.5
+RBS:ziel_X	219
+R:X:dies_NX:this_N1	-100.5
+R:X:ziel_NX:targets_N1	-19
+R:X:dies_ist:could_be	118.5
+R:X:ziel_NX:target_N1	-20
+RBS:dies_ist	92
+R:X:sein_NX:being_able_to_N1	-71.5
+RBT:in_</r>	-65.5
+R:X:in:for	31
+RBT:<r>_could	118.5
+RBS:sein_X	-68
+RBT:could_be	118.5
+RBT:being_able	-63
+RBT:<r>_for	14.5
+RBT:able_to	-63
+RBT:for_</r>	14.5
+R:X:sein_NX:be_N1	-10
+R:X:sein_NX:his_N1	184.5
+RBS:X_ist	-507.5
+R:X:sein_NX:its_N1	-26.5
+R:X:in:in	-53
+R:X:sein_NX:N1_be	-174.5
+R:X:NX_ziel:N1_aim	-32.5
+RBT:<r>_in	-75.5
+RBS:X_ziel	143
+R:X:NX_ziel:N1_goal	20
+R:X:NX_ziel:N1_target	-26.5
+R:X:NX_ziel:N1_targets	-27
+RBT:<r>_into	-270
+R:X:NX_ziel_NX:N1_goal_N2	60
+R:X:NX_ziel_NX:N1_targets_N2	-6
+R:X:NX_sie_NX_,_dass:N1_N2_that	346
+R:X:NX_ziel_NX:N1_target_N2	-6
+R:X:dies_ist_NX:this_is_N1	-26.5
+R:X:NX_ziel_NX:N2_N1_goal	161
+RBT:<r>_of	-38
+RBT:of_</r>	-17
+R:X:NX_ist_NX:is_N1_N2	-129
+RBS:<r>_die	428.5
+R:X:NX_ist_NX:is_N1_,_N2	16.5
+RBS:die_</r>	-116
+RBT:<r>_on	-653.5
+RBT:on_</r>	84.5
+R:X:NX_ist_NX:'s_N1_N2	-41.5
+R:X:die:,	-9
+RBT:<r>_over	45
+R:X:die:a	-5
+R:X:NX_ist_NX:N1_has_N2	-104.5
+R:X:blieben_NX:remained_N1	135
+R:X:die:an	-123
+R:X:NX_ist_NX:N1_is_at_N2	-5.5
+RBS:<r>_blieben	187.5
+R:X:NX_ist_NX:N1_is_well_N2	-5
+RBS:blieben_X	-13
+RBT:<r>_are	-65.5
+RBT:<r>_'s	16
+R:X:NX_ist_NX:N1_is_N2	-31
+RBT:are_</r>	-65.5
+RBT:'s_</r>	-28.5
+R:X:blieben_NX:N1_remained	81.5
+R:X:NX_ist_NX:N1_,_is_N2	59.5
+R:X:die:by	-10
+R:X:die:its	302.5
+RBS:<r>_pakistanis	57
+RBS:pakistanis_</r>	116.5
+RBT:<r>_to	93.5
+RBT:<r>_pakistanis	161
+R:X:NX_ist_NX:N1_N2_has	-75
+R:X:die:the	-28
+RBT:to_</r>	18
+R:X:NX_ist_NX:N1_N2_is	-97.5
+R:X:pakistanis_NX:pakistanis_N1	57
+R:X:NX_ist_NX:N1_,_N2_is	-1
+RBT:<r>_those	-6
+RBT:<r>_within	20
+RBT:within_</r>	20
+RBS:pakistanis_X	-116.5
+R:X:NX_blieben_NX:N1_,_N2_remained	-229.5
+R:X:NX_ist_NX:N2_is_N1	-47
+RBS:X_blieben	-187.5
+RBT:<unk>_is	-21
+R:X:NX_pakistanis:pakistanis_,_N1	235.5
+RBS:X_pakistanis	-57
+RBT:pakistanis_,	104
+R:X:NX_pakistanis:N1_pakistanis	-119
+R:X:NX_ist_NX:N2_N1_is	-46.5
+RBS:blieben_</r>	13
+RBT:<r>_is	-251
+R:X:blieben:blieben	-29
+RBT:<r>_blieben	-29
+RBT:blieben_</r>	-29
+R:X:NX_pakistanis_NX:pakistanis_,_N1_,_N2	-23
+RBS:<r>_zu	-560
+R:X:NX_pakistanis_NX:N1_pakistanis_N2	-150.5
+RBS:zu_X	-717.5
+R:X:NX_blieben:N1_,_remained	42
+RBS:<r>__	347
+RBS:<r>_ein	37.5
+RBS:ein_</r>	-9.5
+RBS:der_</r>	-88.5
+R:X:zu_NX:for_N1	43
+R:X:__NX:__N1	-97
+RBT:<r>_-	113
+RBT:-_</r>	-4
+R:X:__NX:,_N1	444
+R:X:zu_NX:in_N1	37.5
+RBT:<r>_a	-27.5
+RBT:a_</r>	-5
+RBS:sie_</r>	-346
+RBT:the_</r>	40
+R:X:zu_NX:to_N1	-716
+R:X:zu_NX:with_N1	40.5
+R:X:zu_NX:N1_on	30
+RBT:<r>_the	324.5
+R:X:NX_sie:but_N1	-346
+RBS:X_ein	-37.5
+RBT:be_transformed	-12
+R:X:medien:media	299.5
+RBS:<r>_medien	-71.5
+RBT:<r>_with	54.5
+RBS:medien_</r>	-156
+RBT:with_</r>	-19
+RBT:<r>_media	299.5
+R:X:NX_ein:N1_has_an	-3.5
+RBT:media_</r>	299.5
+R:X:NX_ein:N1_put_forward_a	-6
+R:X:medien:medien	-371
+RBT:<r>_medien	-371
+RBT:medien_</r>	-371
+RBS:der_X	45
+RBS:medien_X	156
+R:X:NX_zu_NX:in_N2_N1	-9.5
+RBS:X_zu	339
+RBT:in_<unk>	-2.5
+R:X:NX_zu_NX:of_N2_N1	-52.5
+RBT:to_<unk>	-102.5
+RBT:<unk>_to	30
+R:X:,_dass_NX:that_N1	317
+R:X:NX_zu_NX:to_N2_N1	19
+R:X:NX_zu_NX:N1_in_N2	-2
+R:X:NX_zu_NX:N1_is_N2	-2
+RBS:X_macht	-0.5
+R:X:NX_zu_NX:N1_to_N2	48
+R:X:NX_macht_NX:N1_N2_does	-0.5
+R:X:NX_zu_NX:N2_N1_to	-28
+R:X:NX_zu_NX_.:to_N2_N1_.	22.5
+RBS:an_</r>	28
+R:X:NX_zu_NX_.:N1_is_N2_.	-3.5
+R:X:NX_zu_NX_.:N1_to_N2_.	7.5
+R:X:NX_zu_NX_.:N1_with_N2_.	-3
+R:X:NX_zu_NX_.:N1_N2_.	-221.5
+R:X:NX_zu_NX_.:N2_N1_.	4.5
+R:X:freien:free	-83.5
+RBS:<r>_freien	-118
+RBS:freien_</r>	-201.5
+RBT:<r>_free	210
+RBT:free_</r>	-83.5
+R:X:freien:freien	-276
+RBT:<r>_freien	-276
+RBT:freien_</r>	-276
+RBT:<r>_an	31.5
+R:X:freien_NX:free_N1	248
+RBT:an_</r>	-123
+RBS:freien_X	201.5
+R:X:NX_medien:N1_media	-90
+RBS:X_medien	71.5
+R:X:amerika:america	193
+RBS:<r>_amerika	-36
+R:X:NX_medien_NX:N2_N1_media	5
+R:X:an_NX:in_N1	210
+R:X:freien_NX_.:free_N1_.	-6.5
+RBS:amerika_</r>	-131
+R:X:NX_medien_NX_.:N2_N1_media_.	151
+RBT:<r>_america	283.5
+RBT:america_</r>	193
+R:X:die_NX:an_N1	-7.5
+R:X:amerika:american	-3
+RBS:die_X	-45.5
+RBT:<r>_american	-3
+RBT:american_</r>	-3
+R:X:amerika:amerika	-321
+RBS:<r>_jener	62.5
+R:X:die_NX:a_N1	19
+RBT:<r>_amerika	-321
+RBS:jener_X	62.5
+RBT:amerika_</r>	-321
+R:X:jener_NX:the_N1	62.5
+R:X:an_NX:to_N1	-210
+RBS:X_jener	-62.5
+RBS:amerika_X	131
+R:X:amerika_NX:america_N1	107
+R:X:die_NX:is_N1	-2.5
+RBS:an_der	-28
+R:X:auf:,	-5
+R:X:die_NX:its_N1	-14
+RBS:auf_</r>	46.5
+R:X:die_NX:'s_N1	46.5
+RBS:X_der	71
+R:X:NX_der:N1_for	-74
+R:X:NX_der:N1_in	-43
+R:X:auf:in	-5.5
+RBT:<r>_choice	-103
+R:X:die_NX:the_N1	-86.5
+RBT:<r>_decision	103
+R:X:auf:on	60
+R:X:die_NX:those_N1	-6
+R:X:NX_der:N1_to	72
+R:X:entscheidung_NX:choice_is_N1	-103
+R:X:die_NX:with_N1	73.5
+R:X:auf:auf	-3
+RBT:choice_is	-103
+RBT:<r>_auf	-3
+R:X:entscheidung_NX:decision_N1	103
+R:X:die_NX:,_N1	57
+R:X:die_NX:N1_is	-0.5
+RBT:auf_</r>	-3
+R:X:die_NX:N1_'s	-1
+RBS:auf_X	-46.5
+R:X:die_NX:N1_the	-1
+R:X:NX_freien:N1_free	158
+RBT:of_<unk>	-13
+RBS:X_freien	118
+R:X:NX_der_NX:over_N2_N1	45
+R:X:NX_freien_NX:N1_free_N2	-34
+R:X:NX_freien_NX:N1_free_,_N2	-6
+RBT:over_<unk>	45
+R:X:die_NX_medien:the_N1_media	5.5
+R:X:auf_NX:in_N1	-46.5
+RBT:the_<unk>	-0.5
+R:X:auf_NX:on_N1	66
+R:X:auf_NX:to_N1	-2
+R:X:auf_NX:,_N1	-18
+RBS:X_amerika	36
+RBT:<r>_may	-177
+RBS:und_die	139.5
+RBT:may_</r>	-177
+RBT:<r>_<unk>	585.5
+RBT:<r>_would	-18.5
+RBS:X_die	-568
+RBT:would_</r>	-204
+R:X:NX_die:the_N1	34.5
+R:X:NX_amerika_NX:N2_N1_america	36
+R:X:terroranschläge:terrorist	-22
+R:X:NX_die:,_N1	-42
+R:X:NX_die:N1_,	-173
+RBS:<r>_terroranschläge	-161.5
+RBS:der_macht	0.5
+R:X:NX_die:-_N1	-5
+RBS:terroranschläge_</r>	-46
+R:X:NX_die:N1_a	-1
+RBT:<r>_terrorist	-119.5
+R:X:NX_der_macht_NX:N1_hold_N2_power	28
+RBT:terrorist_</r>	-22
+R:X:,:,	-2.5
+RBT:terrorist_attacks	77.5
+RBS:<r>_,	-182
+RBT:attacks_</r>	28
+RBS:,_</r>	-160.5
+R:X:terroranschläge:terroranschläge	-52
+RBT:<r>_terroranschläge	-52
+RBT:<r>__	-139
+RBT:terroranschläge_</r>	-52
+R:X:NX_die:N1_its	-128.5
+RBS:terroranschläge_X	46
+RBT:<r>_--	-64
+R:X:terroranschläge_NX:terrorist_attacks_N1	-87.5
+RBT:<r>_by	-10
+RBT:by_</r>	-10
+R:X:,:out	-3.5
+RBT:<r>_out	-3.5
+R:X:und_die_NX:and_N1	218
+RBT:out_</r>	-3.5
+RBT:<r>_that	-261.5
+R:X:NX_die_NX:the_N1_N2	-1
+RBT:that_</r>	-127.5
+R:X:NX_die_NX:the_N2_N1	-4
+RBS:,_X	-335
+RBT:,_as	-40
+R:X:,_NX:in_N1	-239
+R:X:,_NX:of_N1	-4
+R:X:,_NX:on_N1	-166
+R:X:,_NX:to_N1	649
+R:X:NX_die_NX:N1_the_N2	-4
+R:X:,_NX:,_N1	-399
+R:X:,_NX:__N1	-42
+R:X:,_NX:--_N1	-102
+R:X:,_an:to	28
+RBS:,_an	28
+R:X:NX_die_NX:N1_,_N2	-5
+R:X:NX_die_NX:N1_N2_the	-4
+RBS:X_an	-28
+RBS:die_terroranschläge	161.5
+R:X:die_terroranschläge:,_terrorist_attacks	28
+RBT:,_terrorist	175
+R:X:die_terroranschläge_NX:,_terrorist_attacks_N1	147
+R:X:NX_so:N1_as	-1.5
+R:X:justiz:judiciary	-90
+RBS:<r>_justiz	-1
+RBS:justiz_</r>	-220.5
+R:X:NX_so:N1_that	-14
+RBT:<r>_judiciary	215
+R:X:NX_so:N1_the	15.5
+RBT:judiciary_</r>	-90
+R:X:justiz:justiz	-216
+RBT:<r>_justiz	-216
+RBT:justiz_</r>	-216
+R:X:justiz_NX:judiciary_N1	305
+RBS:justiz_X	205
+RBS:<r>_brachten	-28
+RBS:justiz_und	15.5
+RBS:brachten_</r>	-175
+R:X:NX_und_die:'s_N1_and	-5
+R:X:brachten:brachten	-175
+RBT:<r>_brachten	-175
+RBT:brachten_</r>	-175
+R:X:NX_an_der:N1_the	-0.5
+R:X:brachten_NX:N1_brought	147
+RBS:brachten_X	175
+R:X:NX_die_terroranschläge_NX:,_terrorist_attacks_N2_N1	-13.5
+R:X:NX_und_die:N1_'s	-12
+R:X:NX_und_die_NX:'s_N2_N1	-16
+RBS:<r>_2001	-14.5
+RBS:2001_</r>	28
+RBT:<r>_2001	37.5
+R:X:NX_und_die_NX:N1_and_N2	-159
+RBT:2001_</r>	28
+R:X:2001_NX:2001_N1	147
+RBS:2001_X	-28
+R:X:NX_brachten_NX:N1_N2_brought	28
+RBS:X_brachten	28
+RBT:,_<unk>	-109.5
+R:X:2001_NX_die_NX:2001_,_N2_N1	-161.5
+R:X:unabhängige:independent	38
+RBT:2001_,	-109.5
+RBS:<r>_unabhängige	127
+RBS:unabhängige_</r>	-197
+RBT:<r>_independent	343
+RBT:independent_</r>	38
+RBT:<r>_september	-13.5
+R:X:unabhängige:unabhängige	-198
+RBT:<r>_unabhängige	-198
+RBS:ein_X	9.5
+RBT:unabhängige_</r>	-198
+RBS:september_X	-14.5
+R:X:unabhängige_NX:independent_N1	287
+R:X:ein_NX:an_N1	132
+R:X:ein_NX:any_N1	25
+RBS:unabhängige_X	197
+R:X:NX_justiz:N1_judiciary	85.5
+R:X:NX_an_der_macht_NX:N1_of_power_N2	-27.5
+RBS:X_justiz	1
+R:X:NX_justiz_NX:N1_judiciary_N2	-43
+R:X:NX_justiz_und:N1_judiciary_and	15.5
+RBS:<r>_11	-13.5
+R:X:NX_unabhängige:N1_independent	-37
+R:X:ein_NX:a_N1	-93
+RBS:X_unabhängige	-127
+R:X:ein_NX:one_N1	-15
+R:X:NX_unabhängige_NX:N1_independent_N2	-90
+R:X:ein_NX:-_N1	-11.5
+R:X:NX_ein_NX:an_N1_N2	-6
+R:X:NX_ein_NX:be_transformed_N1_N2	-22
+RBS:X_,	-3.5
+RBS:september_2001	14.5
+RBT:,_2001	14.5
+R:X:NX_,:to_N1	68
+R:X:NX_,:N1__	1
+R:X:NX_,:N1_--	-172.5
+R:X:11_._september_2001_NX:september_11_,_2001_N1	-13.5
+R:X:die_NX_und_NX:the_N1_N2	-10
+R:X:NX_,:N1_for	-127.5
+R:X:NX_,:N1_in	-13.5
+R:X:NX_,:N1_of	-55
+R:X:NX_,:N1_on	257.5
+R:X:NX_,:N1_out	-58
+RBS:am_11	13.5
+R:X:die_NX_justiz_NX_die:the_N1_judiciary_N2	-57
+R:X:NX_,:N1_refuses_to	-232.5
+R:X:die_NX_und_die:the_N1_and	148
+R:X:die_NX_und_die:the_N1_and_the	-2.5
+RBT:the_september	13.5
+R:X:die_NX_die_NX:the_N1_N2	-3
+R:X:am_11_._september_NX:the_september_11_,_N1	-14.5
+R:X:die_NX_und_die_NX:the_N1_and_N2	-32
+RBS:zu_</r>	672
+R:X:NX_,_NX:N1_,_N2	-78
+R:X:NX_,_NX:N1_N2_,	80
+R:X:am_11_._september_2001:the_september_11_,_2001	28
+R:X:zu:for	-5
+R:X:zu:in	-7
+R:X:zu:to	23
+R:X:taliban:taliban	-251.5
+RBS:<r>_taliban	-223.5
+RBS:taliban_</r>	-157.5
+R:X:zu:with	-6
+R:X:verzweifelten:desperate	28.5
+RBT:<r>_taliban	-205.5
+RBT:<r>_desperate	28.5
+RBT:taliban_</r>	-107
+RBT:desperate_</r>	28.5
+R:X:taliban_NX:taliban_N1	28
+R:X:verzweifelten:verzweifelten	-28.5
+RBS:taliban_X	157.5
+R:X:NX_zu:to_N1	-229
+RBT:<r>_verzweifelten	-28.5
+R:X:den_taliban:the_taliban	144.5
+RBT:verzweifelten_</r>	-28.5
+RBS:den_taliban	223.5
+RBT:the_taliban	144.5
+R:X:NX_zu:N1_for	-152
+R:X:NX_zu:N1_in	-6
+R:X:NX_zu:N1_is	251
+R:X:NX_zu:N1_of	-49.5
+RBS:<r>_dem	22
+RBT:<r>_its	458
+RBT:its_</r>	337
+R:X:NX_den_taliban:N1_taliban	-50.5
+R:X:NX_den_taliban_NX:N1_taliban_N2	-2.5
+R:X:NX_den_taliban_NX:N2_N1_taliban	132
+R:X:erklärte:declared	-8
+RBS:<r>_erklärte	-185.5
+RBS:erklärte_</r>	-124.5
+RBT:<r>_declaring	-9
+R:X:erklärte:erklärte	-116.5
+RBT:<r>_erklärte	-116.5
+RBT:erklärte_</r>	-116.5
+R:X:erklärte_NX:declared_N1	-52
+RBS:erklärte_X	-61
+RBS:jener_</r>	-62.5
+R:X:erklärte_NX:declaring_N1	-9
+RBS:erklärte_,	185.5
+R:X:NX_jener:N1_of	-62.5
+R:X:dem_NX:the_N1	22
+R:X:verkaufen:sell	-153
+RBS:<r>_verkaufen	-153
+RBS:verkaufen_</r>	-140.5
+RBT:sell_</r>	-153
+RBS:bereit_</r>	86
+RBS:zu_verkaufen	153
+RBS:<r>_bemühen	-2.5
+R:X:bereit:bereit	86
+RBT:<r>_bereit	86
+RBT:bereit_</r>	86
+R:X:bereit_NX:ready_N1	-31
+RBS:bereit_X	-86
+R:X:bereit_NX:N1_ready	-55
+RBS:X_zum	30
+R:X:bemühen:bemühen	-2.5
+R:X:NX_erklärte_,:N1_,	110
+RBT:<r>_bemühen	-2.5
+RBS:X_erklärte	185.5
+RBT:bemühen_</r>	-2.5
+R:X:NX_erklärte_,_NX:N1_,_N2	75.5
+RBS:in_X	22
+RBS:<r>_sich	-17.5
+R:X:NX_zu_verkaufen:sell_N1	12.5
+RBS:sich_</r>	-17.5
+R:X:NX_zum_NX:N2_to_further_N1	30
+RBS:<r>_das	45
+RBS:das_</r>	2.5
+RBT:to_further	30
+RBT:<r>_it	-381
+RBT:it_</r>	3
+RBT:<r>_so	172.5
+RBT:so_</r>	-74.5
+RBT:<r>_this	9.5
+RBT:this_</r>	-11.5
+RBS:X_dem	-22
+R:X:das_NX:a_growing_N1	77
+RBS:das_X	-2.5
+RBT:a_growing	-41
+R:X:das_NX:be_N1	169
+R:X:das_NX:its_N1	-95
+R:X:das_NX:so_N1	-38
+RBS:X_sein	91.5
+R:X:das_NX:the_N1	-80
+done
+
+---
+Best iteration: 2 [SCORE 'stupid_bleu'=0.37119].
+This took 0.6 min.
diff --git a/training/dtrain/examples/standard/nc-wmt11.de.gz b/training/dtrain/examples/standard/nc-wmt11.de.gz
new file mode 100644
index 00000000..0741fd92
Binary files /dev/null and b/training/dtrain/examples/standard/nc-wmt11.de.gz differ
diff --git a/training/dtrain/examples/standard/nc-wmt11.en.gz b/training/dtrain/examples/standard/nc-wmt11.en.gz
new file mode 100644
index 00000000..1c0bd401
Binary files /dev/null and b/training/dtrain/examples/standard/nc-wmt11.en.gz differ
diff --git a/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz b/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz
new file mode 100644
index 00000000..7ce81057
Binary files /dev/null and b/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz differ
diff --git a/training/dtrain/examples/standard/nc-wmt11.grammar.gz b/training/dtrain/examples/standard/nc-wmt11.grammar.gz
new file mode 100644
index 00000000..ce4024a1
Binary files /dev/null and b/training/dtrain/examples/standard/nc-wmt11.grammar.gz differ
diff --git a/training/dtrain/examples/toy/cdec.ini b/training/dtrain/examples/toy/cdec.ini
new file mode 100644
index 00000000..b14f4819
--- /dev/null
+++ b/training/dtrain/examples/toy/cdec.ini
@@ -0,0 +1,3 @@
+formalism=scfg
+add_pass_through_rules=true
+grammar=grammar.gz
diff --git a/training/dtrain/examples/toy/dtrain.ini b/training/dtrain/examples/toy/dtrain.ini
new file mode 100644
index 00000000..cd715f26
--- /dev/null
+++ b/training/dtrain/examples/toy/dtrain.ini
@@ -0,0 +1,13 @@
+decoder_config=cdec.ini
+input=src
+refs=tgt
+output=-
+print_weights=logp shell_rule house_rule small_rule little_rule PassThrough
+k=4
+N=4
+epochs=2
+scorer=bleu
+sample_from=kbest
+filter=uniq
+pair_sampling=all
+learning_rate=1
diff --git a/training/dtrain/examples/toy/expected-output b/training/dtrain/examples/toy/expected-output
new file mode 100644
index 00000000..1da2aadd
--- /dev/null
+++ b/training/dtrain/examples/toy/expected-output
@@ -0,0 +1,77 @@
+Warning: hi_lo only works with pair_sampling XYX.
+                cdec cfg 'cdec.ini'
+Seeding random number sequence to 1664825829
+
+dtrain
+Parameters:
+                       k 4
+                       N 4
+                       T 2
+                  scorer 'bleu'
+             sample from 'kbest'
+                  filter 'uniq'
+           learning rate 1
+                   gamma 0
+             loss margin 0
+                   pairs 'all'
+          pair threshold 0
+          select weights 'last'
+                  l1 reg 0 'none'
+               max pairs 4294967295
+                cdec cfg 'cdec.ini'
+                   input 'src'
+                    refs 'tgt'
+                  output '-'
+(a dot represents 10 inputs)
+Iteration #1 of 2.
+  2
+WEIGHTS
+              logp = +0
+        shell_rule = -1
+        house_rule = +2
+        small_rule = -2
+       little_rule = +3
+       PassThrough = -5
+        ---
+       1best avg score: 0.5 (+0.5)
+ 1best avg model score: 2.5 (+2.5)
+           avg # pairs: 4
+        avg # rank err: 1.5
+     avg # margin viol: 0
+    non0 feature count: 6
+           avg list sz: 4
+           avg f count: 2.875
+(time 0 min, 0 s/S)
+
+Iteration #2 of 2.
+  2
+WEIGHTS
+              logp = +0
+        shell_rule = -1
+        house_rule = +2
+        small_rule = -2
+       little_rule = +3
+       PassThrough = -5
+        ---
+       1best avg score: 1 (+0.5)
+ 1best avg model score: 5 (+2.5)
+           avg # pairs: 5
+        avg # rank err: 0
+     avg # margin viol: 0
+    non0 feature count: 6
+           avg list sz: 4
+           avg f count: 3
+(time 0 min, 0 s/S)
+
+Writing weights file to '-' ...
+house_rule	2
+little_rule	3
+Glue	-4
+PassThrough	-5
+small_rule	-2
+shell_rule	-1
+done
+
+---
+Best iteration: 2 [SCORE 'bleu'=1].
+This took 0 min.
diff --git a/training/dtrain/examples/toy/grammar.gz b/training/dtrain/examples/toy/grammar.gz
new file mode 100644
index 00000000..8eb0d29e
Binary files /dev/null and b/training/dtrain/examples/toy/grammar.gz differ
diff --git a/training/dtrain/examples/toy/src b/training/dtrain/examples/toy/src
new file mode 100644
index 00000000..87e39ef2
--- /dev/null
+++ b/training/dtrain/examples/toy/src
@@ -0,0 +1,2 @@
+ich sah ein kleines haus
+ich fand ein kleines haus
diff --git a/training/dtrain/examples/toy/tgt b/training/dtrain/examples/toy/tgt
new file mode 100644
index 00000000..174926b3
--- /dev/null
+++ b/training/dtrain/examples/toy/tgt
@@ -0,0 +1,2 @@
+i saw a little house
+i found a little house
diff --git a/training/dtrain/lplp.rb b/training/dtrain/lplp.rb
index f0cd58c5..86e835e8 100755
--- a/training/dtrain/lplp.rb
+++ b/training/dtrain/lplp.rb
@@ -84,34 +84,28 @@ def _test()
 end
 #_test()
 
-# actually do something
+
 def usage()
-  puts "lplp.rb <l0,l1,l2,linfty,mean,median> <cut|select_k> <k|threshold> [n] < <input>"
+  puts "lplp.rb <l0,l1,l2,linfty,mean,median> <cut|select_k> <k|threshold> <#shards> < <input>"
   puts "   l0...: norms for selection"
   puts "select_k: only output top k (according to the norm of their column vector) features"
   puts "     cut: output features with weight >= threshold"
   puts "       n: if we do not have a shard count use this number for averaging"
-  exit
+  exit 1
 end
 
-if ARGV.size < 3 then usage end
+if ARGV.size < 4 then usage end
 norm_fun = method(ARGV[0].to_sym)
 type = ARGV[1]
 x = ARGV[2].to_f
-
-shard_count_key = "__SHARD_COUNT__"
+shard_count = ARGV[3].to_f
 
 STDIN.set_encoding 'utf-8'
 STDOUT.set_encoding 'utf-8'
 
 w = {}
-shard_count = 0
 while line = STDIN.gets
   key, val = line.split /\s+/
-  if key == shard_count_key
-    shard_count += 1
-    next
-  end
   if w.has_key? key
     w[key].push val.to_f
   else
@@ -119,8 +113,6 @@ while line = STDIN.gets
   end
 end
 
-if ARGV.size == 4 then shard_count = ARGV[3].to_f end
-
 if type == 'cut'
   cut(w, norm_fun, shard_count, x)
 elsif type == 'select_k'
diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index 24e7f49e..e661416e 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -3,16 +3,15 @@
 require 'trollop'
 
 def usage
-  if ARGV.size != 8
-    STDERR.write "Usage: "
-    STDERR.write "ruby parallelize.rb -c <dtrain.ini> -e <epochs> [--randomize/-z] [--reshard/-y] -s <#shards|0> -p <at once> -i <input> -r <refs> [--qsub/-q] --dtrain_binary <path to dtrain binary> -l \"l2 select_k 100000\"\n"
-    exit 1
-  end
+  STDERR.write "Usage: "
+  STDERR.write "ruby parallelize.rb -c <dtrain.ini> [-e <epochs=10>] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p <at once=9999>] -i <input> -r <refs> [--qsub/-q] [--dtrain_binary <path to dtrain binary>] [-l \"l2 select_k 100000\"]\n"
+  exit 1
 end
 
 opts = Trollop::options do
   opt :config, "dtrain config file", :type => :string
-  opt :epochs, "number of epochs", :type => :int
+  opt :epochs, "number of epochs", :type => :int, :default => 10
+  opt :lplp_args, "arguments for lplp.rb", :type => :string, :default => "l2 select_k 100000"
   opt :randomize, "randomize shards before each epoch", :type => :bool, :short => '-z', :default => false
   opt :reshard, "reshard after each epoch", :type => :bool, :short => '-y', :default => false
   opt :shards, "number of shards", :type => :int
@@ -21,8 +20,8 @@ opts = Trollop::options do
   opt :references, "references", :type => :string
   opt :qsub, "use qsub", :type => :bool, :default => false
   opt :dtrain_binary, "path to dtrain binary", :type => :string
-  opt :lplp_args, "arguments for lplp arguments", :type => :string, :default => "l2 select_k 100000"
 end
+usage if not opts[:config]&&opts[:shards]&&opts[:input]&&opts[:references]
 
 
 dtrain_dir = File.expand_path File.dirname(__FILE__)
@@ -32,16 +31,14 @@ else
   dtrain_bin = opts[:dtrain_binary]
 end
 ruby       = '/usr/bin/ruby'
-lplp_rb    = "#{dtrain_dir}/hstreaming/lplp.rb"
+lplp_rb    = "#{dtrain_dir}/lplp.rb"
 lplp_args  = opts[:lplp_args]
 cat        = '/bin/cat'
 
 ini        = opts[:config]
 epochs     = opts[:epochs]
-rand = false
-rand = true if opts[:randomize]
-reshard = false
-reshard = true if opts[:reshard]
+rand       = opts[:randomize]
+reshard    = opts[:reshard]
 predefined_shards = false
 if opts[:shards] == 0
   predefined_shards = true
@@ -49,11 +46,10 @@ if opts[:shards] == 0
 else
   num_shards = opts[:shards]
 end
-shards_at_once = opts[:processes_at_once]
 input = opts[:input]
 refs  = opts[:references]
-use_qsub   = false
-use_qsub = true if opts[:qsub]
+use_qsub       = opts[:qsub]
+shards_at_once = opts[:processes_at_once]
 
 `mkdir work`
 
diff --git a/training/dtrain/test/example/README b/training/dtrain/test/example/README
deleted file mode 100644
index 2df77086..00000000
--- a/training/dtrain/test/example/README
+++ /dev/null
@@ -1,8 +0,0 @@
-Small example of input format for distributed training.
-Call dtrain from this folder with ../../dtrain -c test/example/dtrain.ini .
-
-For this to work, undef 'DTRAIN_LOCAL' in dtrain.h
-and recompile.
-
-data can be found here: http://simianer.de/#dtrain
-
diff --git a/training/dtrain/test/example/cdec.ini b/training/dtrain/test/example/cdec.ini
deleted file mode 100644
index 0215416d..00000000
--- a/training/dtrain/test/example/cdec.ini
+++ /dev/null
@@ -1,25 +0,0 @@
-formalism=scfg
-add_pass_through_rules=true
-scfg_max_span_limit=15
-intersection_strategy=cube_pruning
-cubepruning_pop_limit=200
-feature_function=WordPenalty
-feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz
-# all currently working feature functions for translation:
-# (with those features active that were used in the ACL paper)
-#feature_function=ArityPenalty
-#feature_function=CMR2008ReorderingFeatures
-#feature_function=Dwarf
-#feature_function=InputIndicator
-#feature_function=LexNullJump
-#feature_function=NewJump
-#feature_function=NgramFeatures
-#feature_function=NonLatinCount
-#feature_function=OutputIndicator
-feature_function=RuleIdentityFeatures
-feature_function=RuleSourceBigramFeatures
-feature_function=RuleTargetBigramFeatures
-feature_function=RuleShape
-#feature_function=SourceSpanSizeFeatures
-#feature_function=SourceWordPenalty
-#feature_function=SpanFeatures
diff --git a/training/dtrain/test/example/dtrain.ini b/training/dtrain/test/example/dtrain.ini
deleted file mode 100644
index 97fce7f0..00000000
--- a/training/dtrain/test/example/dtrain.ini
+++ /dev/null
@@ -1,22 +0,0 @@
-input=./nc-wmt11.1k.gz    # use '-' for STDIN
-output=-                             # a weights file (add .gz for gzip compression) or STDOUT '-'
-select_weights=VOID                  # don't output weights
-decoder_config=./cdec.ini # config for cdec
-# weights for these features will be printed on each iteration
-print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
-tmp=/tmp
-stop_after=10 # stop epoch after 10 inputs
-
-# interesting stuff
-epochs=2                # run over input 2 times
-k=100                   # use 100best lists
-N=4                     # optimize (approx) BLEU4
-scorer=stupid_bleu      # use 'stupid' BLEU+1
-learning_rate=1.0       # learning rate, don't care if gamma=0 (perceptron)
-gamma=0                 # use SVM reg
-sample_from=kbest       # use kbest lists (as opposed to forest)
-filter=uniq             # only unique entries in kbest (surface form)
-pair_sampling=XYX
-hi_lo=0.1               # 10 vs 80 vs 10 and 80 vs 10 here
-pair_threshold=0        # minimum distance in BLEU (this will still only use pairs with diff > 0)
-loss_margin=0
diff --git a/training/dtrain/test/example/expected-output b/training/dtrain/test/example/expected-output
deleted file mode 100644
index 05326763..00000000
--- a/training/dtrain/test/example/expected-output
+++ /dev/null
@@ -1,89 +0,0 @@
-                cdec cfg 'test/example/cdec.ini'
-Loading the LM will be faster if you build a binary file.
-Reading test/example/nc-wmt11.en.srilm.gz
-----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
-****************************************************************************************************
-  Example feature: Shape_S00000_T00000
-Seeding random number sequence to 2912000813
-
-dtrain
-Parameters:
-                       k 100
-                       N 4
-                       T 2
-                 scorer 'stupid_bleu'
-             sample from 'kbest'
-                  filter 'uniq'
-           learning rate 1
-                   gamma 0
-             loss margin 0
-                   pairs 'XYX'
-                   hi lo 0.1
-          pair threshold 0
-          select weights 'VOID'
-                  l1 reg 0 'none'
-               max pairs 4294967295
-                cdec cfg 'test/example/cdec.ini'
-                   input 'test/example/nc-wmt11.1k.gz'
-                  output '-'
-              stop_after 10
-(a dot represents 10 inputs)
-Iteration #1 of 2.
- . 10
-Stopping after 10 input sentences.
-WEIGHTS
-              Glue = -637
-       WordPenalty = +1064
-     LanguageModel = +1175.3
- LanguageModel_OOV = -1437
-     PhraseModel_0 = +1935.6
-     PhraseModel_1 = +2499.3
-     PhraseModel_2 = +964.96
-     PhraseModel_3 = +1410.8
-     PhraseModel_4 = -5977.9
-     PhraseModel_5 = +522
-     PhraseModel_6 = +1089
-       PassThrough = -1308
-        ---
-       1best avg score: 0.16963 (+0.16963)
- 1best avg model score: 64485 (+64485)
-           avg # pairs: 1494.4
-        avg # rank err: 702.6
-     avg # margin viol: 0
-    non0 feature count: 528
-           avg list sz: 85.7
-           avg f count: 102.75
-(time 0.083 min, 0.5 s/S)
-
-Iteration #2 of 2.
- . 10
-WEIGHTS
-              Glue = -1196
-       WordPenalty = +809.52
-     LanguageModel = +3112.1
- LanguageModel_OOV = -1464
-     PhraseModel_0 = +3895.5
-     PhraseModel_1 = +4683.4
-     PhraseModel_2 = +1092.8
-     PhraseModel_3 = +1079.6
-     PhraseModel_4 = -6827.7
-     PhraseModel_5 = -888
-     PhraseModel_6 = +142
-       PassThrough = -1335
-        ---
-       1best avg score: 0.277 (+0.10736)
- 1best avg model score: -3110.5 (-67595)
-           avg # pairs: 1144.2
-        avg # rank err: 529.1
-     avg # margin viol: 0
-    non0 feature count: 859
-           avg list sz: 74.9
-           avg f count: 112.84
-(time 0.067 min, 0.4 s/S)
-
-Writing weights file to '-' ...
-done
-
----
-Best iteration: 2 [SCORE 'stupid_bleu'=0.277].
-This took 0.15 min.
diff --git a/training/dtrain/test/parallelize/README b/training/dtrain/test/parallelize/README
deleted file mode 100644
index 89715105..00000000
--- a/training/dtrain/test/parallelize/README
+++ /dev/null
@@ -1,5 +0,0 @@
-run for example
-  ../../parallelize.rb ./dtrain.ini 4 false 2 2 ./in ./refs
-
-final weights will be in the file work/weights.3
-
diff --git a/training/dtrain/test/parallelize/cdec.ini b/training/dtrain/test/parallelize/cdec.ini
deleted file mode 100644
index e43ba1c4..00000000
--- a/training/dtrain/test/parallelize/cdec.ini
+++ /dev/null
@@ -1,22 +0,0 @@
-formalism=scfg
-add_pass_through_rules=true
-intersection_strategy=cube_pruning
-cubepruning_pop_limit=200
-scfg_max_span_limit=15
-feature_function=WordPenalty
-feature_function=KLanguageModel ../example/nc-wmt11.en.srilm.gz
-#feature_function=ArityPenalty
-#feature_function=CMR2008ReorderingFeatures
-#feature_function=Dwarf
-#feature_function=InputIndicator
-#feature_function=LexNullJump
-#feature_function=NewJump
-#feature_function=NgramFeatures
-#feature_function=NonLatinCount
-#feature_function=OutputIndicator
-#feature_function=RuleIdentityFeatures
-#feature_function=RuleNgramFeatures
-#feature_function=RuleShape
-#feature_function=SourceSpanSizeFeatures
-#feature_function=SourceWordPenalty
-#feature_function=SpanFeatures
diff --git a/training/dtrain/test/parallelize/dtrain.ini b/training/dtrain/test/parallelize/dtrain.ini
deleted file mode 100644
index 03f9d240..00000000
--- a/training/dtrain/test/parallelize/dtrain.ini
+++ /dev/null
@@ -1,15 +0,0 @@
-k=100
-N=4
-learning_rate=0.0001
-gamma=0
-loss_margin=0
-epochs=1
-scorer=stupid_bleu
-sample_from=kbest
-filter=uniq
-pair_sampling=XYX
-hi_lo=0.1
-select_weights=last
-print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
-tmp=/tmp
-decoder_config=cdec.ini
diff --git a/training/dtrain/test/parallelize/g/grammar.out.0.gz b/training/dtrain/test/parallelize/g/grammar.out.0.gz
deleted file mode 100644
index 1e28a24b..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.0.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.1.gz b/training/dtrain/test/parallelize/g/grammar.out.1.gz
deleted file mode 100644
index 372f5675..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.1.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.2.gz b/training/dtrain/test/parallelize/g/grammar.out.2.gz
deleted file mode 100644
index 145d0dc0..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.2.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.3.gz b/training/dtrain/test/parallelize/g/grammar.out.3.gz
deleted file mode 100644
index 105593ff..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.3.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.4.gz b/training/dtrain/test/parallelize/g/grammar.out.4.gz
deleted file mode 100644
index 30781f48..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.4.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.5.gz b/training/dtrain/test/parallelize/g/grammar.out.5.gz
deleted file mode 100644
index 834ee759..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.5.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.6.gz b/training/dtrain/test/parallelize/g/grammar.out.6.gz
deleted file mode 100644
index 2e76f348..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.6.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.7.gz b/training/dtrain/test/parallelize/g/grammar.out.7.gz
deleted file mode 100644
index 3741a887..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.7.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.8.gz b/training/dtrain/test/parallelize/g/grammar.out.8.gz
deleted file mode 100644
index ebf6bd0c..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.8.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.9.gz b/training/dtrain/test/parallelize/g/grammar.out.9.gz
deleted file mode 100644
index c1791059..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.9.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/in b/training/dtrain/test/parallelize/in
deleted file mode 100644
index 3b7dec39..00000000
--- a/training/dtrain/test/parallelize/in
+++ /dev/null
@@ -1,10 +0,0 @@
-<seg grammar="g/grammar.out.0.gz" id="0">europas nach rassen geteiltes haus</seg>
-<seg grammar="g/grammar.out.1.gz" id="1">ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .</seg>
-<seg grammar="g/grammar.out.2.gz" id="2">der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .</seg>
-<seg grammar="g/grammar.out.3.gz" id="3">während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .</seg>
-<seg grammar="g/grammar.out.4.gz" id="4">eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .</seg>
-<seg grammar="g/grammar.out.5.gz" id="5">die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .</seg>
-<seg grammar="g/grammar.out.6.gz" id="6">das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .</seg>
-<seg grammar="g/grammar.out.7.gz" id="7">die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .</seg>
-<seg grammar="g/grammar.out.8.gz" id="8">der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .</seg>
-<seg grammar="g/grammar.out.9.gz" id="9">genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .</seg>
diff --git a/training/dtrain/test/parallelize/refs b/training/dtrain/test/parallelize/refs
deleted file mode 100644
index 632e27b0..00000000
--- a/training/dtrain/test/parallelize/refs
+++ /dev/null
@@ -1,10 +0,0 @@
-europe 's divided racial house
-a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge .
-the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them .
-while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon .
-an aging population at home and ever more open borders imply increasing racial fragmentation in european countries .
-mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear .
-it will not , as america 's racial history clearly shows .
-race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes .
-the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths .
-this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us .
diff --git a/training/dtrain/test/toy/cdec.ini b/training/dtrain/test/toy/cdec.ini
deleted file mode 100644
index 98b02d44..00000000
--- a/training/dtrain/test/toy/cdec.ini
+++ /dev/null
@@ -1,2 +0,0 @@
-formalism=scfg
-add_pass_through_rules=true
diff --git a/training/dtrain/test/toy/dtrain.ini b/training/dtrain/test/toy/dtrain.ini
deleted file mode 100644
index a091732f..00000000
--- a/training/dtrain/test/toy/dtrain.ini
+++ /dev/null
@@ -1,12 +0,0 @@
-decoder_config=test/toy/cdec.ini
-input=test/toy/input
-output=-
-print_weights=logp shell_rule house_rule small_rule little_rule PassThrough
-k=4
-N=4
-epochs=2
-scorer=bleu
-sample_from=kbest
-filter=uniq
-pair_sampling=all
-learning_rate=1
diff --git a/training/dtrain/test/toy/input b/training/dtrain/test/toy/input
deleted file mode 100644
index 4d10a9ea..00000000
--- a/training/dtrain/test/toy/input
+++ /dev/null
@@ -1,2 +0,0 @@
-0	ich sah ein kleines haus	i saw a little house	[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0	[NP] ||| ich ||| i ||| logp=0	[NP] ||| ein [NN,1] ||| a [1] ||| logp=0	[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1	[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1	[JJ] ||| kleines ||| small ||| logp=0 small_rule=1	[JJ] ||| kleines ||| little ||| logp=0 little_rule=1	[JJ] ||| grosses ||| big ||| logp=0	[JJ] ||| grosses ||| large ||| logp=0	[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0	[V] ||| sah ||| saw ||| logp=0	[V] ||| fand ||| found ||| logp=0
-1	ich fand ein kleines haus	i found a little house	[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0	[NP] ||| ich ||| i ||| logp=0	[NP] ||| ein [NN,1] ||| a [1] ||| logp=0	[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1	[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1	[JJ] ||| kleines ||| small ||| logp=0 small_rule=1	[JJ] ||| kleines ||| little ||| logp=0 little_rule=1	[JJ] ||| grosses ||| big ||| logp=0	[JJ] ||| grosses ||| large ||| logp=0	[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0	[V] ||| sah ||| saw ||| logp=0	[V] ||| fand ||| found ||| logp=0
-- 
cgit v1.2.3


From ce2f5608e15a3d3e080ab4b26b5f263fead215e2 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <p@simianer.de>
Date: Fri, 15 Mar 2013 12:46:03 +0100
Subject: make perceptron automatically faster

---
 training/dtrain/dtrain.cc      | 36 ++++++++++++++++++++----------------
 training/dtrain/pairsampling.h | 21 ++++++---------------
 2 files changed, 26 insertions(+), 31 deletions(-)

diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index fcb46db2..2bb4ec98 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -6,7 +6,8 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
 {
   po::options_description ini("Configuration File Options");
   ini.add_options()
-    ("input",             po::value<string>()->default_value("-"),                                                   "input file")
+    ("input",             po::value<string>()->default_value("-"),                                             "input file (src)")
+    ("refs,r",            po::value<string>(),                                                                       "references")
     ("output",            po::value<string>()->default_value("-"),                          "output weights file, '-' for STDOUT")
     ("input_weights",     po::value<string>(),                                "input weights file (e.g. from previous iteration)")
     ("decoder_config",    po::value<string>(),                                                      "configuration file for cdec")
@@ -33,8 +34,8 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
     ("scale_bleu_diff",   po::value<bool>()->zero_tokens(),                      "learning rate <- bleu diff of a misranked pair")
     ("loss_margin",       po::value<weight_t>()->default_value(0.),  "update if no error in pref pair but model scores this near")
     ("max_pairs",         po::value<unsigned>()->default_value(std::numeric_limits<unsigned>::max()), "max. # of pairs per Sent.")
-    ("refs,r",            po::value<string>(),                                                         "references in local mode")
-    ("noup",              po::value<bool>()->zero_tokens(),                                               "do not update weights");
+    ("noup",              po::value<bool>()->zero_tokens(),                                               "do not update weights")
+    ("pair_stats",        po::value<bool>()->zero_tokens(), "stats about correctly ranked/misranked pairs even if loss_margin=0 and gamma=0");
   po::options_description cl("Command Line Options");
   cl.add_options()
     ("config,c",         po::value<string>(),              "dtrain config file")
@@ -124,6 +125,10 @@ main(int argc, char** argv)
   vector<string> print_weights;
   if (cfg.count("print_weights"))
     boost::split(print_weights, cfg["print_weights"].as<string>(), boost::is_any_of(" "));
+  bool pair_stats = false;
+  if (cfg.count("pair_stats")) pair_stats = true;
+  bool faster_perceptron = false;
+  if (gamma==0 && loss_margin==0 && !pair_stats) faster_perceptron = true;
 
   // setup decoder
   register_feature_functions();
@@ -346,25 +351,26 @@ main(int argc, char** argv)
       // get pairs
       vector<pair<ScoredHyp,ScoredHyp> > pairs;
       if (pair_sampling == "all")
-        all_pairs(samples, pairs, pair_threshold, max_pairs);
+        all_pairs(samples, pairs, pair_threshold, max_pairs, faster_perceptron);
       if (pair_sampling == "XYX")
-        partXYX(samples, pairs, pair_threshold, max_pairs, hi_lo);
+        partXYX(samples, pairs, pair_threshold, max_pairs, faster_perceptron, hi_lo);
       if (pair_sampling == "PRO")
         PROsampling(samples, pairs, pair_threshold, max_pairs);
       npairs += pairs.size();
 
       for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin();
            it != pairs.end(); it++) {
-#ifdef DTRAIN_FASTER_PERCEPTRON
-        bool rank_error = true; // pair sampling already did this for us
-        rank_errors++;
-        score_t margin = std::numeric_limits<float>::max();
-#else
-        bool rank_error = it->first.model <= it->second.model;
+        bool rank_error;
+        score_t margin;
+        if (faster_perceptron) { // we only have considering misranked pairs
+          rank_error = true; // pair sampling already did this for us
+          margin = std::numeric_limits<float>::max();
+        } else {
+          rank_error = it->first.model <= it->second.model;
+          margin = fabs(fabs(it->first.model) - fabs(it->second.model));
+          if (!rank_error && margin < loss_margin) margin_violations++;
+        }
         if (rank_error) rank_errors++;
-        score_t margin = fabs(fabs(it->first.model) - fabs(it->second.model));
-        if (!rank_error && margin < loss_margin) margin_violations++;
-#endif
         if (scale_bleu_diff) eta = it->first.score - it->second.score;
         if (rank_error || margin < loss_margin) {
           SparseVector<weight_t> diff_vec = it->first.f - it->second.f;
@@ -458,10 +464,8 @@ main(int argc, char** argv)
     cerr << _np << npairs/(float)in_sz << endl;
     cerr << "        avg # rank err: ";
     cerr << rank_errors/(float)in_sz << endl;
-#ifndef DTRAIN_FASTER_PERCEPTRON
     cerr << "     avg # margin viol: ";
     cerr << margin_violations/(float)in_sz << endl;
-#endif
     cerr << "    non0 feature count: " <<  nonz << endl;
     cerr << "           avg list sz: " << list_sz/(float)in_sz << endl;
     cerr << "           avg f count: " << f_count/(float)list_sz << endl;
diff --git a/training/dtrain/pairsampling.h b/training/dtrain/pairsampling.h
index 84be1efb..3f67e209 100644
--- a/training/dtrain/pairsampling.h
+++ b/training/dtrain/pairsampling.h
@@ -19,7 +19,7 @@ cmp_hyp_by_score_d(ScoredHyp a, ScoredHyp b)
 }
 
 inline void
-all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, float _unused=1)
+all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, bool misranked_only, float _unused=1)
 {
   sort(s->begin(), s->end(), cmp_hyp_by_score_d);
   unsigned sz = s->size();
@@ -27,6 +27,7 @@ all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, sc
   unsigned count = 0;
   for (unsigned i = 0; i < sz-1; i++) {
     for (unsigned j = i+1; j < sz; j++) {
+      if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue;
       if (threshold > 0) {
         if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
           training.push_back(make_pair((*s)[i], (*s)[j]));
@@ -51,7 +52,7 @@ all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, sc
  */
 
 inline void
-partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, float hi_lo)
+partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, bool misranked_only, float hi_lo)
 {
   unsigned sz = s->size();
   if (sz < 2) return;
@@ -64,9 +65,7 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
   unsigned count = 0;
   for (unsigned i = 0; i < sep_hi; i++) {
     for (unsigned j = sep_hi; j < sz; j++) {
-#ifdef DTRAIN_FASTER_PERCEPTRON
-      if ((*s)[i].model <= (*s)[j].model) {
-#endif
+      if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue;
       if (threshold > 0) {
         if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
           training.push_back(make_pair((*s)[i], (*s)[j]));
@@ -78,9 +77,6 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
         b = true;
         break;
       }
-#ifdef DTRAIN_FASTER_PERCEPTRON
-      }
-#endif
     }
     if (b) break;
   }
@@ -88,9 +84,7 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
   while (sep_lo > 0 && (*s)[sep_lo-1].score == (*s)[sep_lo].score) --sep_lo;
   for (unsigned i = sep_hi; i < sz-sep_lo; i++) {
     for (unsigned j = sz-sep_lo; j < sz; j++) {
-#ifdef DTRAIN_FASTER_PERCEPTRON
-      if ((*s)[i].model <= (*s)[j].model) {
-#endif
+      if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue;
       if (threshold > 0) {
         if (accept_pair((*s)[i].score, (*s)[j].score, threshold))
           training.push_back(make_pair((*s)[i], (*s)[j]));
@@ -99,9 +93,6 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor
           training.push_back(make_pair((*s)[i], (*s)[j]));
       }
       if (++count == max) return;
-#ifdef DTRAIN_FASTER_PERCEPTRON
-      }
-#endif
     }
   }
 }
@@ -119,7 +110,7 @@ _PRO_cmp_pair_by_diff_d(pair<ScoredHyp,ScoredHyp> a, pair<ScoredHyp,ScoredHyp> b
   return (fabs(a.first.score - a.second.score)) > (fabs(b.first.score - b.second.score));
 }
 inline void
-PROsampling(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, float _unused=1)
+PROsampling(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, bool _unused=false, float _also_unused=0)
 {
   unsigned max_count = 5000, count = 0, sz = s->size();
   bool b = false;
-- 
cgit v1.2.3


From d2b1c3d182863b7d39d22b589661d71608bebac8 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <p@simianer.de>
Date: Fri, 15 Mar 2013 16:06:05 +0100
Subject: fix

---
 training/dtrain/dtrain.cc                         |   18 +-
 training/dtrain/dtrain.h                          |    3 -
 training/dtrain/examples/standard/dtrain.ini      |    8 +-
 training/dtrain/examples/standard/expected-output | 1163 +--------------------
 4 files changed, 39 insertions(+), 1153 deletions(-)

diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index 2bb4ec98..149f87d4 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -34,8 +34,7 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
     ("scale_bleu_diff",   po::value<bool>()->zero_tokens(),                      "learning rate <- bleu diff of a misranked pair")
     ("loss_margin",       po::value<weight_t>()->default_value(0.),  "update if no error in pref pair but model scores this near")
     ("max_pairs",         po::value<unsigned>()->default_value(std::numeric_limits<unsigned>::max()), "max. # of pairs per Sent.")
-    ("noup",              po::value<bool>()->zero_tokens(),                                               "do not update weights")
-    ("pair_stats",        po::value<bool>()->zero_tokens(), "stats about correctly ranked/misranked pairs even if loss_margin=0 and gamma=0");
+    ("noup",              po::value<bool>()->zero_tokens(),                                               "do not update weights");
   po::options_description cl("Command Line Options");
   cl.add_options()
     ("config,c",         po::value<string>(),              "dtrain config file")
@@ -125,10 +124,7 @@ main(int argc, char** argv)
   vector<string> print_weights;
   if (cfg.count("print_weights"))
     boost::split(print_weights, cfg["print_weights"].as<string>(), boost::is_any_of(" "));
-  bool pair_stats = false;
-  if (cfg.count("pair_stats")) pair_stats = true;
-  bool faster_perceptron = false;
-  if (gamma==0 && loss_margin==0 && !pair_stats) faster_perceptron = true;
+
 
   // setup decoder
   register_feature_functions();
@@ -185,6 +181,11 @@ main(int argc, char** argv)
   weight_t eta = cfg["learning_rate"].as<weight_t>();
   weight_t gamma = cfg["gamma"].as<weight_t>();
 
+  // faster perceptron: consider only misranked pairs, see
+  // DO NOT ENABLE  WITH SVM (gamma > 0) OR loss_margin!
+  bool faster_perceptron = false;
+  if (gamma==0 && loss_margin==0) faster_perceptron = true;
+
   // l1 regularization
   bool l1naive = false;
   bool l1clip = false;
@@ -232,6 +233,7 @@ main(int argc, char** argv)
     else cerr << setw(25) << "learning rate " << "bleu diff" << endl;
     cerr << setw(25) << "gamma " << gamma << endl;
     cerr << setw(25) << "loss margin " << loss_margin << endl;
+    cerr << setw(25) << "faster perceptron " << faster_perceptron << endl;
     cerr << setw(25) << "pairs " << "'" << pair_sampling << "'" << endl;
     if (pair_sampling == "XYX")
       cerr << setw(25) << "hi lo " << hi_lo << endl;
@@ -461,7 +463,9 @@ main(int argc, char** argv)
     cerr << _np << " 1best avg model score: " << model_avg;
     cerr << _p << " (" << model_diff << ")" << endl;
     cerr << "           avg # pairs: ";
-    cerr << _np << npairs/(float)in_sz << endl;
+    cerr << _np << npairs/(float)in_sz;
+    if (faster_perceptron) cerr << " (meaningless)";
+    cerr << endl;
     cerr << "        avg # rank err: ";
     cerr << rank_errors/(float)in_sz << endl;
     cerr << "     avg # margin viol: ";
diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h
index f368d810..eb0b9f17 100644
--- a/training/dtrain/dtrain.h
+++ b/training/dtrain/dtrain.h
@@ -1,9 +1,6 @@
 #ifndef _DTRAIN_H_
 #define _DTRAIN_H_
 
-#undef DTRAIN_FASTER_PERCEPTRON // only consider actually misranked pairs
-                                // DO NOT ENABLE  WITH SVM (gamma > 0) OR loss_margin!
-
 #define DTRAIN_DOTS 10 // after how many inputs to display a '.'
 #define DTRAIN_SCALE 100000
 
diff --git a/training/dtrain/examples/standard/dtrain.ini b/training/dtrain/examples/standard/dtrain.ini
index a05e9c29..e1072d30 100644
--- a/training/dtrain/examples/standard/dtrain.ini
+++ b/training/dtrain/examples/standard/dtrain.ini
@@ -1,12 +1,12 @@
 input=./nc-wmt11.de.gz
 refs=./nc-wmt11.en.gz
 output=-                  # a weights file (add .gz for gzip compression) or STDOUT '-'
-select_weights=avg        # output average (over epochs) weight vector
+select_weights=VOID       # output average (over epochs) weight vector
 decoder_config=./cdec.ini # config for cdec
 # weights for these features will be printed on each iteration
-print_weights= EgivenFCoherent SampleCountF CountEF MaxLexFgivenE MaxLexEgivenF IsSingletonF IsSingletonFE Glue WordPenalty PassThrough LanguageModel LanguageModel_OOV
+print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
 # newer version of the grammar extractor use different feature names: 
-#print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
+#print_weights= EgivenFCoherent SampleCountF CountEF MaxLexFgivenE MaxLexEgivenF IsSingletonF IsSingletonFE Glue WordPenalty PassThrough LanguageModel LanguageModel_OOV
 stop_after=10 # stop epoch after 10 inputs
 
 # interesting stuff
@@ -21,4 +21,4 @@ filter=uniq             # only unique entries in kbest (surface form)
 pair_sampling=XYX       #
 hi_lo=0.1               # 10 vs 80 vs 10 and 80 vs 10 here
 pair_threshold=0        # minimum distance in BLEU (here: > 0)
-loss_margin=0
+loss_margin=0           # update if correctly ranked, but within this margin
diff --git a/training/dtrain/examples/standard/expected-output b/training/dtrain/examples/standard/expected-output
index 8d72f4c3..7cd09dbf 100644
--- a/training/dtrain/examples/standard/expected-output
+++ b/training/dtrain/examples/standard/expected-output
@@ -4,7 +4,7 @@ Reading ./nc-wmt11.en.srilm.gz
 ----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
 ****************************************************************************************************
   Example feature: Shape_S00000_T00000
-Seeding random number sequence to 1511823303
+Seeding random number sequence to 2679584485
 
 dtrain
 Parameters:
@@ -17,10 +17,11 @@ Parameters:
            learning rate 1
                    gamma 0
              loss margin 0
+       faster perceptron 1
                    pairs 'XYX'
                    hi lo 0.1
           pair threshold 0
-          select weights 'avg'
+          select weights 'VOID'
                   l1 reg 0 'none'
                max pairs 4294967295
                 cdec cfg './cdec.ini'
@@ -33,1174 +34,58 @@ Iteration #1 of 2.
  . 10
 Stopping after 10 input sentences.
 WEIGHTS
-   EgivenFCoherent = +0
-      SampleCountF = +0
-           CountEF = +0
-     MaxLexFgivenE = +0
-     MaxLexEgivenF = +0
-      IsSingletonF = +0
-     IsSingletonFE = +0
               Glue = -576
        WordPenalty = +417.79
-       PassThrough = -1455
      LanguageModel = +5117.5
  LanguageModel_OOV = -1307
+     PhraseModel_0 = -1612
+     PhraseModel_1 = -2159.6
+     PhraseModel_2 = -677.36
+     PhraseModel_3 = +2663.8
+     PhraseModel_4 = -1025.9
+     PhraseModel_5 = -8
+     PhraseModel_6 = +70
+       PassThrough = -1455
         ---
        1best avg score: 0.27697 (+0.27697)
  1best avg model score: -47918 (-47918)
-           avg # pairs: 1129.8
+           avg # pairs: 581.9 (meaningless)
         avg # rank err: 581.9
      avg # margin viol: 0
     non0 feature count: 703
            avg list sz: 90.9
            avg f count: 100.09
-(time 0.33 min, 2 s/S)
+(time 0.25 min, 1.5 s/S)
 
 Iteration #2 of 2.
  . 10
 WEIGHTS
-   EgivenFCoherent = +0
-      SampleCountF = +0
-           CountEF = +0
-     MaxLexFgivenE = +0
-     MaxLexEgivenF = +0
-      IsSingletonF = +0
-     IsSingletonFE = +0
               Glue = -622
        WordPenalty = +898.56
-       PassThrough = -2578
      LanguageModel = +8066.2
  LanguageModel_OOV = -2590
+     PhraseModel_0 = -4335.8
+     PhraseModel_1 = -5864.4
+     PhraseModel_2 = -1729.8
+     PhraseModel_3 = +2831.9
+     PhraseModel_4 = -5384.8
+     PhraseModel_5 = +1449
+     PhraseModel_6 = +480
+       PassThrough = -2578
         ---
        1best avg score: 0.37119 (+0.094226)
  1best avg model score: -1.3174e+05 (-83822)
-           avg # pairs: 1214.9
+           avg # pairs: 584.1 (meaningless)
         avg # rank err: 584.1
      avg # margin viol: 0
     non0 feature count: 1115
            avg list sz: 91.3
            avg f count: 90.755
-(time 0.27 min, 1.6 s/S)
+(time 0.3 min, 1.8 s/S)
 
 Writing weights file to '-' ...
-R:X:NX_sein:N1_its	61.5
-WordPenalty	658.17328732437022
-LanguageModel	6591.8747593425214
-LanguageModel_OOV	-1948.5
-R:X:das_NX:this_N1	12
-R:X:NX_sein_NX:N1_from_ever_being_able_to_N2	30
-R:X:NX_bemühen:N1_effort	2.5
-RBS:X_bemühen	2.5
-R:X:sich:sich	-17.5
-RBT:<r>_sich	-17.5
-RBT:sich_</r>	-17.5
-RBS:sich_X	17.5
-RBS:<r>_als	147
-RBS:als_</r>	-59
-Shape_S10000_T10000	-1711.5
-RBT:<r>_when	84
-R:X:zum_NX:as_N1	-134
-RBS:<r>_zum	-30
-R:X:als_NX:as_N1	63
-R:X:zum_NX:'s_N1	33
-R:X:zum_NX:the_N1	24
-RBS:X_sich	-12
-R:X:zum_NX:to_N1	-36
-R:X:zum_NX:with_the_N1	83
-R:X:NX_zum:N1_the	-66
-R:X:NX_zum:N1_to	66
-R:X:als_NX:when_N1	84
-RBS:als_das	59
-RBS:X_das	-104
-R:X:NX_das:N1_a	28.5
-R:X:er_sich_NX:he_N1	86.5
-RBS:er_sich	29.5
-R:X:NX_das:N1_it	-6
-R:X:er_sich_NX:him_N1	-57
-RBT:<r>_declared	-488
-R:X:NX_das:N1_that	-5
-RBT:declared_</r>	-8
-R:X:NX_das:N1_the	-57
-R:X:NX_das:N1_this	-17
-R:X:NX_.:N1_.	-323
-RBS:X_.	134
-R:X:NX_.:N1_debate_.	6.5
-R:X:NX_.:N1_disruptions_.	-14.5
-R:X:NX_.:N1_established_.	7.5
-R:X:NX_.:N1_heading_.	17
-R:X:NX_.:N1_on_.	94
-R:X:NX_.:N1_pace_.	51.5
-R:X:NX_das_NX:N1_a_growing_N2	-45
-R:X:general:general	-23.5
-R:X:NX_.:N1_politics_.	84
-R:X:NX_das_NX:N1_a_N2	-0.5
-R:X:NX_.:N1_power_.	-99.5
-RBS:general_</r>	-23.5
-R:X:NX_.:N1_-_range_missiles_.	-28.5
-Shape_S11000_T11000	40
-RBT:general_</r>	-23.5
-RBT:<r>_.	-645
-R:X:betrat:entered	-91
-R:X:NX_.:N1_war_.	68.5
-RBS:<r>_betrat	23.5
-Shape_S11000_T01100	475.5
-RBT:<r>_entered	-91
-RBT:entered_</r>	-91
-R:X:NX_das_NX:N1_the_N2	-2
-R:X:betrat:betrat	114.5
-RBT:<r>_betrat	114.5
-RBT:betrat_</r>	114.5
-R:X:12:12	79
-R:X:maßnahmen:action	24
-R:X:.:.	-566
-RBS:12_</r>	79
-RBS:<r>_maßnahmen	-44.5
-RBS:<r>_.	-645
-RBT:._</r>	-566
-RBT:<r>_action	24
-RBT:12_</r>	79
-RBT:action_</r>	24
-R:X:maßnahmen:actions	-13
-RBT:<r>_actions	-13
-RBT:actions_</r>	-13
-R:X:12_NX:12_N1	-79
-RBT:declared_a	-428
-RBS:12_X	-79
-RBT:a_state	-428
-RBT:state_of	-428
-R:X:maßnahmen:maßnahmen	-55.5
-R:X:internationale_NX:global_N1	-270
-RBS:X_am	316.5
-RBT:<r>_maßnahmen	-55.5
-RBS:am_</r>	267.5
-RBT:maßnahmen_</r>	-55.5
-RBS:<r>_den	883
-R:X:internationale_NX:international_N1	270
-RBS:den_X	-286.5
-R:X:NX_am:N1_of	267.5
-R:X:NX_als:N1_a	-273.5
-RBS:am_X	-281
-R:X:den_NX:'s_N1	-31
-R:X:NX_am_NX:N1_of_N2	-30
-R:X:NX_am_NX:N1_on_N2	79
-R:X:NX_als:N1_'s	273.5
-R:X:NX_betrat:N1_entered	-23.5
-R:X:ins_NX:into_the_N1	-32.5
-RBS:X_betrat	-23.5
-RBT:into_the	-55
-R:X:ins_NX:into_N1	32.5
-RBT:<r>_their	303
-R:X:general_NX:general_N1	23.5
-RBS:general_X	23.5
-RBS:<r>_am	-316.5
-R:X:den_NX:the_N1	89
-R:X:den_NX_.:the_N1_.	86.5
-R:X:NX_und:and_N1	-216
-RBS:X_und	-203.5
-RBS:und_</r>	522.5
-RBT:<r>_and	438.5
-R:X:am_NX:at_N1	23
-R:X:NX_als_das:N1_than_the	59
-R:X:NX_und:N1_-	-114
-R:X:NX_und:N1_,	114
-R:X:am_NX:of_N1	-4
-R:X:am_NX:on_N1	-158.5
-R:X:am_NX:the_N1	-190
-RBS:<r>_seine	-16.5
-RBS:seine_</r>	39
-R:X:oktober:october	-79.5
-R:X:seine:his	-5.5
-RBS:<r>_oktober	-79.5
-R:X:seine:its	50
-RBT:<r>_october	-79.5
-RBT:october_</r>	-79.5
-R:X:seine_NX:a_N1	7.5
-RBS:seine_X	-39
-R:X:NX_und_NX:and_N1_N2	-22
-RBS:und_X	160.5
-R:X:seine_NX:his_N1	-97
-R:X:seine_NX:its_N1	102.5
-R:X:NX_und_NX:N1_,_and_N2	-4
-R:X:NX_maßnahmen:N1_actions	44.5
-RBS:X_maßnahmen	44.5
-R:X:seine_NX_als:his_N1_than	5.5
-R:X:seine_NX_als:its_N1_as	-64.5
-R:X:NX_und_NX:N1_,_N2	-7
-Shape_S01100_T11000	-312.5
-RBS:und_den	-822.5
-Shape_S01100_T01100	-537.5
-Shape_S01100_T11100	15
-R:X:NX_seine:'s_N1	-5.5
-RBS:X_seine	16.5
-RBS:X_den	-38
-R:X:amerika_NX_sich_NX:america_N1_N2	-12
-R:X:NX_seine_NX:'s_N1_N2	22
-R:X:auf_NX_den_NX:to_N1_the_N2	-23
-R:X:auf_NX_den_NX:to_N1_N2	-23
-RBS:<r>_unterstützen	-716
-RBS:unterstützen_</r>	-1
-Shape_S11100_T11000	783.5
-Shape_S11100_T01100	-716
-Shape_S11100_T11100	488
-R:X:unterstützen:unterstützen	-1
-RBT:<r>_unterstützen	-1
-RBT:unterstützen_</r>	-1
-R:X:unterstützen_NX:support_N1	-715
-RBS:unterstützen_X	-715
-RBT:<r>_will	-6
-RBS:X_unterstützen	716
-RBT:<r>_if	35
-R:X:NX_den_NX_.:N1_N2_.	41
-R:X:verfassung:constitution	15
-RBS:<r>_verfassung	-43
-RBT:<r>_constitution	15
-RBT:constitution_</r>	15
-R:X:verfassung:constitutional	9.5
-RBT:<r>_constitutional	9.5
-RBS:unterstützen_.	716
-RBT:constitutional_</r>	9.5
-R:X:NX_unterstützen_.:N1_.	716
-R:X:verfassung:verfassung	-67.5
-R:X:eine_NX:an_N1	162
-RBT:<r>_verfassung	-67.5
-RBT:verfassung_</r>	-67.5
-R:X:und:,	-21.5
-R:X:,_NX_zu_NX:to_N2_N1	-153
-RBS:<r>_und	-389.5
-R:X:und:and	-35
-RBS:angeführten_</r>	-716
-RBT:and_</r>	-35
-RBT:<r>_as	63
-RBS:versucht_</r>	68
-R:X:und:with	-3
-R:X:eine_NX:is_N1	-162
-RBS:angeführten_X	716
-R:X:und:und	91
-RBT:<r>_und	91
-RBT:und_</r>	91
-R:X:versucht:tried	68
-RBT:tried_</r>	68
-RBS:versucht_X	-68
-R:X:versucht_NX:tried_N1	-68
-R:X:und_NX:and_N1	250
-R:X:und_NX:with_N1	-18
-R:X:und_NX:,_N1	-7
-R:X:und_NX:N1_and	-12
-R:X:und_den_NX:and_N1	-716
-R:X:er:he	17
-R:X:NX_eine:N1_is	-7
-RBS:<r>_er	-47.5
-RBS:er_</r>	54
-RBT:<r>_he	485.5
-RBT:he_</r>	17
-RBT:<r>_him	-1
-R:X:und_NX_.:,_N1_.	-3
-R:X:er:his	91
-R:X:und_den_NX_.:and_the_N1_.	88
-R:X:NX_eine:N1_will	7
-R:X:er:it	3
-R:X:und_den_NX_.:and_N1_.	-216.5
-R:X:er:er	-196
-RBT:<r>_er	-196
-RBT:er_</r>	-196
-RBS:er_X	8
-R:X:er_NX:he_N1	399
-R:X:er_NX:it_N1	-379
-Shape_S01010_T01010	-599
-RBS:pakistanischen_</r>	43
-R:X:NX_versucht:N1_tried	196
-RBT:<r>_pakistan	-43
-RBT:<r>_pakistani	2
-R:X:er_NX_,_NX:he_N1_N2	-12
-R:X:NX_hat_er:N1_,_he_has	196
-RBS:hat_er	196
-R:X:NX_er:he_N1	-17
-RBS:X_er	-148.5
-RBS:pakistanischen_X	-43
-R:X:NX_er:it_N1	-7
-RBS:X_verfassung	43
-R:X:NX_verfassung:N1_'s_constitution	43
-R:X:NX_hat_NX_versucht:N1_N2_has_tried	-190
-R:X:NX_hat_NX_versucht:N1_,_N2_has_tried	-6
-RBS:der_pakistanischen	43
-RBS:X_pakistanischen	-43
-RBS:<r>_aber	46
-RBS:,_als	-147
-RBT:<r>_but	-321
-R:X:aber_NX:but_N1	46
-R:X:von_NX_angeführten:N1_-_led	-716
-R:X:von_NX_angeführten_NX:N1_-_led_N2	716
-RBS:,_aber	-114
-RBS:X_aber	68
-R:X:,_als_NX:,_as_N1	-40
-R:X:NX_aber_NX_,:N1_N2_to	68
-R:X:NX_pakistanischen_NX_.:pakistan_N1_N2_.	-43
-R:X:NX_,_aber_NX:N1_,_N2	-114
-RBS:<r>_rahmen	43
-RBS:rahmen_</r>	43
-R:X:rahmen:within	20
-R:X:rahmen:rahmen	23
-RBT:<r>_rahmen	23
-RBT:rahmen_</r>	23
-Shape_S01110_T11010	35.5
-R:X:NX_der_pakistanischen:N1_pakistan	43
-Shape_S01110_T01110	-1195
-Shape_S01110_T11110	-6.5
-R:X:NX_,_NX_er:N1_N2_he	-33
-RBS:geben_X	-577.5
-RBS:<r>_gestalten	196
-Shape_S01110_T01011	278
-RBS:gestalten_</r>	196
-RBS:geben_und	577.5
-R:X:gestalten:more	221
-Shape_S01110_T01111	-181.5
-RBT:<r>_more	221
-RBT:more_</r>	221
-R:X:gestalten:gestalten	-25
-RBT:<r>_gestalten	-25
-RBT:gestalten_</r>	-25
-R:X:effektiver:effectively	-151
-RBS:<r>_effektiver	54
-RBS:effektiver_</r>	-221
-RBT:<r>_effectively	-151
-RBT:effectively_</r>	-151
-R:X:effektiver:effektiver	-99
-RBT:<r>_effektiver	-99
-RBT:effektiver_</r>	-99
-Shape_S11110_T11010	-1130
-RBS:zu_geben	-107.5
-R:X:effektiver_zu_NX:N1_effectively	304
-RBS:effektiver_zu	221
-RBS:X_geben	107.5
-Shape_S11110_T01110	621
-Shape_S11110_T11110	-75
-RBS:X_gestalten	-196
-R:X:NX_gestalten_.:N1_.	-196
-RBS:gestalten_.	-196
-R:X:terror:terror	672
-RBS:<r>_terror	-16
-RBS:terror_</r>	640
-R:X:den:-	-4
-RBT:<r>_terror	136
-RBT:terror_</r>	646
-RBS:den_</r>	42.5
-R:X:den:for	-11.5
-R:X:terror:terrorism	-54
-RBT:<r>_terrorism	-54
-Shape_S11110_T11011	-4.5
-RBT:terrorism_</r>	-54
-R:X:terror_NX:terror_N1	-634
-R:X:den:of	-17
-RBS:terror_X	-640
-R:X:den:'s	32.5
-Shape_S11110_T01111	-1.5
-R:X:NX_effektiver:N1_more_effectively	29
-RBS:X_effektiver	-54
-R:X:den:the	68
-R:X:NX_geben_und:N1_and	107.5
-R:X:NX_effektiver_zu_NX:N1_N2_effectively	-83
-R:X:den:to	-33
-RBS:1999_</r>	-302.5
-R:X:,_NX_zu_geben_NX:to_N1_N2	-577.5
-R:X:den:with	-10
-RBS:X_terror	-4.5
-R:X:,_NX_zu_geben_und:to_N1_and	470
-R:X:NX_1999:N1_1999	-302.5
-R:X:NX_1999_NX:N2_N1_1999	302.5
-RBS:1999_X	302.5
-R:X:den_NX_zu:to_N1	783.5
-R:X:NX_rahmen_der:N1_the	-43
-RBS:X_rahmen	-43
-RBS:rahmen_der	-43
-RBS:gegen_</r>	22.5
-R:X:gegen:against	-2
-RBT:<r>_against	-2
-RBT:against_</r>	-2
-R:X:._NX:._N1	-79
-RBS:._X	-79.5
-RBS:gegen_den	-22.5
-R:X:NX_._oktober:october_N1	79.5
-RBS:._oktober	79.5
-R:X:am_NX_._NX:the_N2_N1	-0.5
-R:X:gegen_den_NX:on_N1	2
-RBS:den_terror	20.5
-RBT:on_terror	-26
-R:X:NX_den_terror:the_N1_terror	29
-R:X:den_NX_den_NX:the_N1_N2	-110.5
-R:X:den_NX_den_NX:N2_the_N1	-95
-RBT:<unk>_the	-1.5
-R:X:krieg:war	-4.5
-RBS:<r>_krieg	-22
-R:X:musharraf:musharraf	43
-RBS:krieg_</r>	-4.5
-RBT:<r>_war	-22
-RBS:<r>_musharraf	66.5
-RBS:musharraf_</r>	-23.5
-RBT:war_</r>	-4.5
-R:X:musharraf_NX:musharraf_imposed_N1	23.5
-RBS:musharraf_X	23.5
-RBT:musharraf_imposed	23.5
-RBS:krieg_gegen	4.5
-R:X:musharraf_NX:musharraf_N1	107
-R:X:krieg_gegen:war_on	24.5
-RBT:war_on	-17.5
-RBS:X_gegen	-4.5
-R:X:musharraf_NX_,_als_NX:musharraf_N1_as_N2	-20
-R:X:musharraf_NX_,_als_NX:musharraf_N1_N2	-87
-R:X:krieg_gegen_den_NX:war_on_N1	-16
-R:X:krieg_gegen_den_terror:war_on_terror	-26
-R:X:pervez:pervez	22
-RBS:<r>_pervez	22
-RBS:pervez_</r>	57.5
-RBS:X_krieg	22
-RBT:<r>_pervez	22
-RBT:pervez_</r>	22
-RBS:pervez_musharraf	-57.5
-RBS:X_musharraf	-9
-R:X:NX_musharraf:N1_musharraf	-9
-R:X:den_NX_gegen_den:the_N1_on	-4.5
-R:X:den_NX_den_terror:the_N1_terror	-3
-R:X:NX_krieg_gegen_den_terror:N1_war_on_terror	22
-R:X:den_NX_den_terror_NX:N2_the_N1_terror	-1.5
-RBT:<r>_project	91
-RBS:hat_</r>	2
-RBS:X_-	14
-R:X:NX_-:,_N1	48.5
-R:X:NX_-:N1_months_of	32
-R:X:NX_-:N1_relief_and	64
-R:X:NX_-:N1_'s	-144.5
-RBS:hat_X	-198
-R:X:und_NX_terror_NX:and_N2_N1_terror	-4.5
-RBT:and_<unk>	-4.5
-R:X:sorgen:bring	-19
-RBS:X_pervez	-22
-RBT:<r>_bring	-19
-RBT:bring_</r>	-19
-R:X:sorgen:ensure	19
-RBT:<r>_ensure	19
-RBT:ensure_</r>	19
-R:X:NX_-_NX:N1_N2_security	-4
-R:X:NX_projekt_NX:N2_N1_project	-156
-R:X:NX_-_NX_.:N1_N2_.	18
-R:X:NX_projekt_NX_.:N2_N1_project_.	156
-RBS:<r>_-	-14
-RBT:to_ensure	0.5
-R:X:NX_hat:has_N1	-5
-R:X:NX_hat:N1_,	3
-R:X:NX_hat:,_N1	21.5
-R:X:NX_hat:N1_has	-17
-R:X:NX_hat:N1_is	-0.5
-R:X:-_NX:of_N1	-26
-R:X:-_NX:'s_N1	-58
-R:X:NX_hat_NX:N1_,_N2	-73
-R:X:NX_hat_NX:N1_N2_has	28
-R:X:-_NX:-_N1	122
-R:X:NX_hat_NX:N1_,_N2_has	21
-R:X:-_NX:--_N1	-21
-R:X:-_NX:,_N1	-31
-R:X:stabilität:stability	-118
-RBS:<r>_stabilität	-129
-RBT:<r>_stability	-118
-RBT:stability_</r>	-118
-R:X:stabilität:stabilität	-11
-RBT:<r>_stabilität	-11
-RBT:stabilität_</r>	-11
-RBT:<r>_country	253
-RBS:<r>_für	101
-RBS:für_</r>	129
-RBS:X_ihres	-16
-R:X:NX_ihres_NX:N1_of_their_N2	-16
-R:X:für:that	129
-RBT:<r>_political	-16
-RBS:für_X	-129
-R:X:,_NX_und_NX:,_N1_N2	-2
-R:X:für_NX:to_N1	-28
-R:X:NX_stabilität:N1_stability	129
-RBS:X_stabilität	129
-RBS:X_für	22
-RBT:<unk>_with	-109
-RBS:,_für	-123
-R:X:,_für_NX:,_N1	15.5
-R:X:,_NX_den_NX_zu:to_N2_N1	69
-R:X:NX_für_NX_.:N1_N2_.	22
-RBS:<r>_ihres	16
-R:X:ihres_NX:its_N1	-50
-R:X:ihres_NX:their_N1	66
-R:X:NX_zu_verkaufen_NX:sell_N1_N2	140.5
-RBS:verkaufen_X	140.5
-RBS:<r>_würde	-204
-RBS:würde_</r>	-117
-R:X:würde:would	-204
-RBS:würde_X	126
-R:X:in_NX_hat_NX:in_N1_N2	22
-R:X:NX_dem_NX_pervez:N1_N2_pervez	35.5
-RBS:<r>_halten	284
-RBS:halten_</r>	204
-R:X:NX_dem_NX_pervez_musharraf:N1_N2_pervez_musharraf	-57.5
-Shape_S01111_T01011	560.5
-Shape_S01111_T11011	-20.5
-Shape_S01111_T01111	-5
-RBT:<r>_maintain	30
-R:X:halten:halten	284
-RBT:<r>_halten	284
-RBT:halten_</r>	284
-RBS:halten_X	-204
-R:X:NX_würde:if_N1	35
-RBS:X_würde	204
-R:X:NX_würde:will_N1	-6
-Shape_S11111_T11010	69
-R:X:NX_würde:would_face_a_N1	-9.5
-RBT:would_face	-18.5
-RBT:face_a	-18.5
-Shape_S11111_T11110	-57
-R:X:NX_würde:would_N1	78
-R:X:NX_würde:N1_will	-10.5
-R:X:NX_würde_NX:would_N1_N2	126
-R:X:NX_würde_.:would_face_a_N1_.	-9
-RBS:würde_.	-9
-PhraseModel_0	-2973.8953021225416
-R:X:vielleicht:may	-177
-PhraseModel_1	-4012.0052074229625
-PhraseModel_2	-1203.5725821427027
-RBS:vielleicht_</r>	-284
-PhraseModel_3	2747.8420998127522
-PhraseModel_4	-3205.3163436680484
-PhraseModel_5	720.5
-PhraseModel_6	275
-R:X:vielleicht:vielleicht	-107
-RBT:<r>_vielleicht	-107
-RBT:vielleicht_</r>	-107
-R:X:vielleicht_NX:perhaps_N1	284
-RBS:vielleicht_X	284
-R:X:NX_halten:maintain_the_N1	-29
-RBS:X_halten	-284
-RBT:maintain_the	-174
-R:X:NX_halten:N1_hold	-51
-R:X:NX_halten_NX:N2_maintain_the_N1	-204
-RBT:<unk>_maintain	-204
-RBS:<r>_versprechen	30
-RBS:versprechen_</r>	-75
-RBT:<r>_commitment	107
-R:X:versprechen_NX:commitment_N1	30
-RBS:versprechen_X	75
-R:X:NX_versprechen:N1_commitment	-75
-RBS:X_versprechen	-30
-R:X:NX_,_für_NX:N1_,_N2	-138.5
-R:X:NX_versprechen_NX:N1_commitment_N2	45
-RBS:<r>_dass	-451
-RBS:dass_</r>	-91.5
-R:X:dass_NX:that_N1	-451
-RBS:dass_X	91.5
-R:X:NX_er_sein:N1_to_make_up_for_his	-91.5
-RBS:er_sein	-91.5
-R:X:seine_NX_und:a_N1_,	-15
-R:X:NX_,_NX_und:N1_N2_,	129
-RBS:,_dass	851.5
-R:X:NX_,_dass:N1_keep	-27
-R:X:NX_,_dass:N1_said_that	-0.5
-R:X:NX_,_dass:N1_to_let	-9.5
-R:X:NX_dass:that_N1	-8.5
-RBS:X_dass	-400.5
-R:X:NX_dass:N1_let	-51.5
-R:X:NX_dass:N1_see	-243.5
-R:X:NX_dass:N1_thought	-97
-R:X:NX_,_dass_NX:N1_that_N2	134
-Glue	-599
-PassThrough	-2016.5
-R:X:musharrafs:his	2
-RBS:musharrafs_</r>	-29
-R:X:NX_und_den:N1_and_the	22
-RBT:<r>_his	250.5
-RBT:his_</r>	160.5
-R:X:musharrafs:musharraf	-1.5
-RBT:<r>_musharraf	135.5
-RBT:musharraf_</r>	41.5
-R:X:NX_,_dass_NX_.:N1_N2_.	91.5
-R:X:musharrafs:musharrafs	-29.5
-RBT:<r>_musharrafs	-29.5
-RBT:musharrafs_</r>	-29.5
-RBS:sie_X	346
-RBS:<r>_X	-1369.5
-R:X:dies:so	-74.5
-RBS:X_</r>	-1743
-RBS:dies_</r>	-348
-R:X:dies:so_,_this	47
-RBT:so_,	47
-R:X:sie_NX:it_N1	22
-RBT:,_this	47
-R:X:dies:that	-256.5
-R:X:NX_?:N1_?	-134.5
-R:X:dies:these	-5.5
-RBS:X_?	-235
-RBT:<r>_these	-5.5
-RBT:these_</r>	-5.5
-R:X:NX_?:N1_consulting_?	-100.5
-R:X:dies:this	-58.5
-R:X:letzter_NX:last_N1	-14
-RBS:<r>_letzter	-20
-RBS:letzter_X	19.5
-RBT:<r>_last	-2
-R:X:letzter:last	7
-RBS:letzter_</r>	-19.5
-R:X:sein:be	1.5
-RBT:last_</r>	7
-R:X:letzter:late	11.5
-RBT:<r>_they	-6
-RBS:sein_</r>	68
-RBT:<r>_late	11.5
-R:X:ist_NX:be_N1	464.5
-RBT:<r>_be	-10.5
-RBT:late_</r>	11.5
-R:X:sie_NX:they_N1	-22
-RBS:<r>_ist	415.5
-RBT:be_</r>	120
-R:X:letzter:letzter	-24.5
-RBS:ist_X	8
-R:X:sein:being	-16
-RBT:<r>_letzter	-24.5
-R:X:ist_NX:has_N1	16
-RBT:<r>_being	-79
-RBT:letzter_</r>	-24.5
-R:X:ist_NX:is_at_N1	6
-RBT:being_</r>	-16
-R:X:musharrafs_NX:his_N1	-25
-R:X:sein:his	73
-RBS:musharrafs_X	29
-R:X:ist_NX:is_well_N1	6
-R:X:sein:its	-15.5
-R:X:musharrafs_NX:musharraf_'s_N1	77.5
-R:X:sein:sein	55
-RBT:musharraf_'s	55.5
-R:X:ist_NX:is_N1	23
-RBT:<r>_sein	55
-R:X:musharrafs_NX:musharraf_N1	-23.5
-R:X:ist_NX:more_N1	-130.5
-RBT:sein_</r>	55
-R:X:NX_letzter:N1_late	-26.5
-R:X:ist_NX:N1_be	176
-R:X:ziel:aim	-32.5
-RBS:X_letzter	20
-R:X:ist_NX:N1_has	-67
-RBS:<r>_ziel	-143
-R:X:NX_letzter:N1_'s_last	13
-R:X:ist_NX:N1_is	-19
-RBS:ziel_</r>	-219
-R:S:NS_NX:N1_N2	-599
-R:X:ist_NX:N1_,_is	18
-RBT:<r>_aim	-32.5
-RBS:<r>_S	-599
-R:X:ist_NX:N1_it_is	49
-RBT:aim_</r>	-32.5
-RBS:S_X	-599
-R:X:ist:are	-65.5
-R:X:ziel:goal	45
-R:X:NX_letzter_NX:N1_'s_last_N2	33.5
-RBS:ist_</r>	-8
-RBT:<r>_goal	45
-R:X:?:?	235
-RBT:goal_</r>	45
-RBS:<r>_?	235
-R:X:ziel:target	-22.5
-RBT:<r>_?	235
-RBS:X__	-347
-RBT:<r>_target	-22.5
-RBT:?_</r>	235
-RBT:target_</r>	-22.5
-R:X:ist:'s	-61
-R:X:ziel:targets	-18
-RBS:in_</r>	-22
-RBT:<r>_targets	-18
-RBT:targets_</r>	-18
-RBT:<r>_,	24.5
-R:X:ziel:ziel	-125
-RBT:,_</r>	-38
-R:X:NX___NX:N1___N2	-347
-R:X:dies_NX:so_N1	200
-RBT:<r>_ziel	-125
-RBS:dies_X	256
-RBT:ziel_</r>	-125
-RBT:<r>_at	23
-R:X:dies_NX:this_to_N1	156.5
-R:X:ziel_NX:goal_N1	49
-RBT:this_to	156.5
-RBS:ziel_X	219
-R:X:dies_NX:this_N1	-100.5
-R:X:ziel_NX:targets_N1	-19
-R:X:dies_ist:could_be	118.5
-R:X:ziel_NX:target_N1	-20
-RBS:dies_ist	92
-R:X:sein_NX:being_able_to_N1	-71.5
-RBT:in_</r>	-65.5
-R:X:in:for	31
-RBT:<r>_could	118.5
-RBS:sein_X	-68
-RBT:could_be	118.5
-RBT:being_able	-63
-RBT:<r>_for	14.5
-RBT:able_to	-63
-RBT:for_</r>	14.5
-R:X:sein_NX:be_N1	-10
-R:X:sein_NX:his_N1	184.5
-RBS:X_ist	-507.5
-R:X:sein_NX:its_N1	-26.5
-R:X:in:in	-53
-R:X:sein_NX:N1_be	-174.5
-R:X:NX_ziel:N1_aim	-32.5
-RBT:<r>_in	-75.5
-RBS:X_ziel	143
-R:X:NX_ziel:N1_goal	20
-R:X:NX_ziel:N1_target	-26.5
-R:X:NX_ziel:N1_targets	-27
-RBT:<r>_into	-270
-R:X:NX_ziel_NX:N1_goal_N2	60
-R:X:NX_ziel_NX:N1_targets_N2	-6
-R:X:NX_sie_NX_,_dass:N1_N2_that	346
-R:X:NX_ziel_NX:N1_target_N2	-6
-R:X:dies_ist_NX:this_is_N1	-26.5
-R:X:NX_ziel_NX:N2_N1_goal	161
-RBT:<r>_of	-38
-RBT:of_</r>	-17
-R:X:NX_ist_NX:is_N1_N2	-129
-RBS:<r>_die	428.5
-R:X:NX_ist_NX:is_N1_,_N2	16.5
-RBS:die_</r>	-116
-RBT:<r>_on	-653.5
-RBT:on_</r>	84.5
-R:X:NX_ist_NX:'s_N1_N2	-41.5
-R:X:die:,	-9
-RBT:<r>_over	45
-R:X:die:a	-5
-R:X:NX_ist_NX:N1_has_N2	-104.5
-R:X:blieben_NX:remained_N1	135
-R:X:die:an	-123
-R:X:NX_ist_NX:N1_is_at_N2	-5.5
-RBS:<r>_blieben	187.5
-R:X:NX_ist_NX:N1_is_well_N2	-5
-RBS:blieben_X	-13
-RBT:<r>_are	-65.5
-RBT:<r>_'s	16
-R:X:NX_ist_NX:N1_is_N2	-31
-RBT:are_</r>	-65.5
-RBT:'s_</r>	-28.5
-R:X:blieben_NX:N1_remained	81.5
-R:X:NX_ist_NX:N1_,_is_N2	59.5
-R:X:die:by	-10
-R:X:die:its	302.5
-RBS:<r>_pakistanis	57
-RBS:pakistanis_</r>	116.5
-RBT:<r>_to	93.5
-RBT:<r>_pakistanis	161
-R:X:NX_ist_NX:N1_N2_has	-75
-R:X:die:the	-28
-RBT:to_</r>	18
-R:X:NX_ist_NX:N1_N2_is	-97.5
-R:X:pakistanis_NX:pakistanis_N1	57
-R:X:NX_ist_NX:N1_,_N2_is	-1
-RBT:<r>_those	-6
-RBT:<r>_within	20
-RBT:within_</r>	20
-RBS:pakistanis_X	-116.5
-R:X:NX_blieben_NX:N1_,_N2_remained	-229.5
-R:X:NX_ist_NX:N2_is_N1	-47
-RBS:X_blieben	-187.5
-RBT:<unk>_is	-21
-R:X:NX_pakistanis:pakistanis_,_N1	235.5
-RBS:X_pakistanis	-57
-RBT:pakistanis_,	104
-R:X:NX_pakistanis:N1_pakistanis	-119
-R:X:NX_ist_NX:N2_N1_is	-46.5
-RBS:blieben_</r>	13
-RBT:<r>_is	-251
-R:X:blieben:blieben	-29
-RBT:<r>_blieben	-29
-RBT:blieben_</r>	-29
-R:X:NX_pakistanis_NX:pakistanis_,_N1_,_N2	-23
-RBS:<r>_zu	-560
-R:X:NX_pakistanis_NX:N1_pakistanis_N2	-150.5
-RBS:zu_X	-717.5
-R:X:NX_blieben:N1_,_remained	42
-RBS:<r>__	347
-RBS:<r>_ein	37.5
-RBS:ein_</r>	-9.5
-RBS:der_</r>	-88.5
-R:X:zu_NX:for_N1	43
-R:X:__NX:__N1	-97
-RBT:<r>_-	113
-RBT:-_</r>	-4
-R:X:__NX:,_N1	444
-R:X:zu_NX:in_N1	37.5
-RBT:<r>_a	-27.5
-RBT:a_</r>	-5
-RBS:sie_</r>	-346
-RBT:the_</r>	40
-R:X:zu_NX:to_N1	-716
-R:X:zu_NX:with_N1	40.5
-R:X:zu_NX:N1_on	30
-RBT:<r>_the	324.5
-R:X:NX_sie:but_N1	-346
-RBS:X_ein	-37.5
-RBT:be_transformed	-12
-R:X:medien:media	299.5
-RBS:<r>_medien	-71.5
-RBT:<r>_with	54.5
-RBS:medien_</r>	-156
-RBT:with_</r>	-19
-RBT:<r>_media	299.5
-R:X:NX_ein:N1_has_an	-3.5
-RBT:media_</r>	299.5
-R:X:NX_ein:N1_put_forward_a	-6
-R:X:medien:medien	-371
-RBT:<r>_medien	-371
-RBT:medien_</r>	-371
-RBS:der_X	45
-RBS:medien_X	156
-R:X:NX_zu_NX:in_N2_N1	-9.5
-RBS:X_zu	339
-RBT:in_<unk>	-2.5
-R:X:NX_zu_NX:of_N2_N1	-52.5
-RBT:to_<unk>	-102.5
-RBT:<unk>_to	30
-R:X:,_dass_NX:that_N1	317
-R:X:NX_zu_NX:to_N2_N1	19
-R:X:NX_zu_NX:N1_in_N2	-2
-R:X:NX_zu_NX:N1_is_N2	-2
-RBS:X_macht	-0.5
-R:X:NX_zu_NX:N1_to_N2	48
-R:X:NX_macht_NX:N1_N2_does	-0.5
-R:X:NX_zu_NX:N2_N1_to	-28
-R:X:NX_zu_NX_.:to_N2_N1_.	22.5
-RBS:an_</r>	28
-R:X:NX_zu_NX_.:N1_is_N2_.	-3.5
-R:X:NX_zu_NX_.:N1_to_N2_.	7.5
-R:X:NX_zu_NX_.:N1_with_N2_.	-3
-R:X:NX_zu_NX_.:N1_N2_.	-221.5
-R:X:NX_zu_NX_.:N2_N1_.	4.5
-R:X:freien:free	-83.5
-RBS:<r>_freien	-118
-RBS:freien_</r>	-201.5
-RBT:<r>_free	210
-RBT:free_</r>	-83.5
-R:X:freien:freien	-276
-RBT:<r>_freien	-276
-RBT:freien_</r>	-276
-RBT:<r>_an	31.5
-R:X:freien_NX:free_N1	248
-RBT:an_</r>	-123
-RBS:freien_X	201.5
-R:X:NX_medien:N1_media	-90
-RBS:X_medien	71.5
-R:X:amerika:america	193
-RBS:<r>_amerika	-36
-R:X:NX_medien_NX:N2_N1_media	5
-R:X:an_NX:in_N1	210
-R:X:freien_NX_.:free_N1_.	-6.5
-RBS:amerika_</r>	-131
-R:X:NX_medien_NX_.:N2_N1_media_.	151
-RBT:<r>_america	283.5
-RBT:america_</r>	193
-R:X:die_NX:an_N1	-7.5
-R:X:amerika:american	-3
-RBS:die_X	-45.5
-RBT:<r>_american	-3
-RBT:american_</r>	-3
-R:X:amerika:amerika	-321
-RBS:<r>_jener	62.5
-R:X:die_NX:a_N1	19
-RBT:<r>_amerika	-321
-RBS:jener_X	62.5
-RBT:amerika_</r>	-321
-R:X:jener_NX:the_N1	62.5
-R:X:an_NX:to_N1	-210
-RBS:X_jener	-62.5
-RBS:amerika_X	131
-R:X:amerika_NX:america_N1	107
-R:X:die_NX:is_N1	-2.5
-RBS:an_der	-28
-R:X:auf:,	-5
-R:X:die_NX:its_N1	-14
-RBS:auf_</r>	46.5
-R:X:die_NX:'s_N1	46.5
-RBS:X_der	71
-R:X:NX_der:N1_for	-74
-R:X:NX_der:N1_in	-43
-R:X:auf:in	-5.5
-RBT:<r>_choice	-103
-R:X:die_NX:the_N1	-86.5
-RBT:<r>_decision	103
-R:X:auf:on	60
-R:X:die_NX:those_N1	-6
-R:X:NX_der:N1_to	72
-R:X:entscheidung_NX:choice_is_N1	-103
-R:X:die_NX:with_N1	73.5
-R:X:auf:auf	-3
-RBT:choice_is	-103
-RBT:<r>_auf	-3
-R:X:entscheidung_NX:decision_N1	103
-R:X:die_NX:,_N1	57
-R:X:die_NX:N1_is	-0.5
-RBT:auf_</r>	-3
-R:X:die_NX:N1_'s	-1
-RBS:auf_X	-46.5
-R:X:die_NX:N1_the	-1
-R:X:NX_freien:N1_free	158
-RBT:of_<unk>	-13
-RBS:X_freien	118
-R:X:NX_der_NX:over_N2_N1	45
-R:X:NX_freien_NX:N1_free_N2	-34
-R:X:NX_freien_NX:N1_free_,_N2	-6
-RBT:over_<unk>	45
-R:X:die_NX_medien:the_N1_media	5.5
-R:X:auf_NX:in_N1	-46.5
-RBT:the_<unk>	-0.5
-R:X:auf_NX:on_N1	66
-R:X:auf_NX:to_N1	-2
-R:X:auf_NX:,_N1	-18
-RBS:X_amerika	36
-RBT:<r>_may	-177
-RBS:und_die	139.5
-RBT:may_</r>	-177
-RBT:<r>_<unk>	585.5
-RBT:<r>_would	-18.5
-RBS:X_die	-568
-RBT:would_</r>	-204
-R:X:NX_die:the_N1	34.5
-R:X:NX_amerika_NX:N2_N1_america	36
-R:X:terroranschläge:terrorist	-22
-R:X:NX_die:,_N1	-42
-R:X:NX_die:N1_,	-173
-RBS:<r>_terroranschläge	-161.5
-RBS:der_macht	0.5
-R:X:NX_die:-_N1	-5
-RBS:terroranschläge_</r>	-46
-R:X:NX_die:N1_a	-1
-RBT:<r>_terrorist	-119.5
-R:X:NX_der_macht_NX:N1_hold_N2_power	28
-RBT:terrorist_</r>	-22
-R:X:,:,	-2.5
-RBT:terrorist_attacks	77.5
-RBS:<r>_,	-182
-RBT:attacks_</r>	28
-RBS:,_</r>	-160.5
-R:X:terroranschläge:terroranschläge	-52
-RBT:<r>_terroranschläge	-52
-RBT:<r>__	-139
-RBT:terroranschläge_</r>	-52
-R:X:NX_die:N1_its	-128.5
-RBS:terroranschläge_X	46
-RBT:<r>_--	-64
-R:X:terroranschläge_NX:terrorist_attacks_N1	-87.5
-RBT:<r>_by	-10
-RBT:by_</r>	-10
-R:X:,:out	-3.5
-RBT:<r>_out	-3.5
-R:X:und_die_NX:and_N1	218
-RBT:out_</r>	-3.5
-RBT:<r>_that	-261.5
-R:X:NX_die_NX:the_N1_N2	-1
-RBT:that_</r>	-127.5
-R:X:NX_die_NX:the_N2_N1	-4
-RBS:,_X	-335
-RBT:,_as	-40
-R:X:,_NX:in_N1	-239
-R:X:,_NX:of_N1	-4
-R:X:,_NX:on_N1	-166
-R:X:,_NX:to_N1	649
-R:X:NX_die_NX:N1_the_N2	-4
-R:X:,_NX:,_N1	-399
-R:X:,_NX:__N1	-42
-R:X:,_NX:--_N1	-102
-R:X:,_an:to	28
-RBS:,_an	28
-R:X:NX_die_NX:N1_,_N2	-5
-R:X:NX_die_NX:N1_N2_the	-4
-RBS:X_an	-28
-RBS:die_terroranschläge	161.5
-R:X:die_terroranschläge:,_terrorist_attacks	28
-RBT:,_terrorist	175
-R:X:die_terroranschläge_NX:,_terrorist_attacks_N1	147
-R:X:NX_so:N1_as	-1.5
-R:X:justiz:judiciary	-90
-RBS:<r>_justiz	-1
-RBS:justiz_</r>	-220.5
-R:X:NX_so:N1_that	-14
-RBT:<r>_judiciary	215
-R:X:NX_so:N1_the	15.5
-RBT:judiciary_</r>	-90
-R:X:justiz:justiz	-216
-RBT:<r>_justiz	-216
-RBT:justiz_</r>	-216
-R:X:justiz_NX:judiciary_N1	305
-RBS:justiz_X	205
-RBS:<r>_brachten	-28
-RBS:justiz_und	15.5
-RBS:brachten_</r>	-175
-R:X:NX_und_die:'s_N1_and	-5
-R:X:brachten:brachten	-175
-RBT:<r>_brachten	-175
-RBT:brachten_</r>	-175
-R:X:NX_an_der:N1_the	-0.5
-R:X:brachten_NX:N1_brought	147
-RBS:brachten_X	175
-R:X:NX_die_terroranschläge_NX:,_terrorist_attacks_N2_N1	-13.5
-R:X:NX_und_die:N1_'s	-12
-R:X:NX_und_die_NX:'s_N2_N1	-16
-RBS:<r>_2001	-14.5
-RBS:2001_</r>	28
-RBT:<r>_2001	37.5
-R:X:NX_und_die_NX:N1_and_N2	-159
-RBT:2001_</r>	28
-R:X:2001_NX:2001_N1	147
-RBS:2001_X	-28
-R:X:NX_brachten_NX:N1_N2_brought	28
-RBS:X_brachten	28
-RBT:,_<unk>	-109.5
-R:X:2001_NX_die_NX:2001_,_N2_N1	-161.5
-R:X:unabhängige:independent	38
-RBT:2001_,	-109.5
-RBS:<r>_unabhängige	127
-RBS:unabhängige_</r>	-197
-RBT:<r>_independent	343
-RBT:independent_</r>	38
-RBT:<r>_september	-13.5
-R:X:unabhängige:unabhängige	-198
-RBT:<r>_unabhängige	-198
-RBS:ein_X	9.5
-RBT:unabhängige_</r>	-198
-RBS:september_X	-14.5
-R:X:unabhängige_NX:independent_N1	287
-R:X:ein_NX:an_N1	132
-R:X:ein_NX:any_N1	25
-RBS:unabhängige_X	197
-R:X:NX_justiz:N1_judiciary	85.5
-R:X:NX_an_der_macht_NX:N1_of_power_N2	-27.5
-RBS:X_justiz	1
-R:X:NX_justiz_NX:N1_judiciary_N2	-43
-R:X:NX_justiz_und:N1_judiciary_and	15.5
-RBS:<r>_11	-13.5
-R:X:NX_unabhängige:N1_independent	-37
-R:X:ein_NX:a_N1	-93
-RBS:X_unabhängige	-127
-R:X:ein_NX:one_N1	-15
-R:X:NX_unabhängige_NX:N1_independent_N2	-90
-R:X:ein_NX:-_N1	-11.5
-R:X:NX_ein_NX:an_N1_N2	-6
-R:X:NX_ein_NX:be_transformed_N1_N2	-22
-RBS:X_,	-3.5
-RBS:september_2001	14.5
-RBT:,_2001	14.5
-R:X:NX_,:to_N1	68
-R:X:NX_,:N1__	1
-R:X:NX_,:N1_--	-172.5
-R:X:11_._september_2001_NX:september_11_,_2001_N1	-13.5
-R:X:die_NX_und_NX:the_N1_N2	-10
-R:X:NX_,:N1_for	-127.5
-R:X:NX_,:N1_in	-13.5
-R:X:NX_,:N1_of	-55
-R:X:NX_,:N1_on	257.5
-R:X:NX_,:N1_out	-58
-RBS:am_11	13.5
-R:X:die_NX_justiz_NX_die:the_N1_judiciary_N2	-57
-R:X:NX_,:N1_refuses_to	-232.5
-R:X:die_NX_und_die:the_N1_and	148
-R:X:die_NX_und_die:the_N1_and_the	-2.5
-RBT:the_september	13.5
-R:X:die_NX_die_NX:the_N1_N2	-3
-R:X:am_11_._september_NX:the_september_11_,_N1	-14.5
-R:X:die_NX_und_die_NX:the_N1_and_N2	-32
-RBS:zu_</r>	672
-R:X:NX_,_NX:N1_,_N2	-78
-R:X:NX_,_NX:N1_N2_,	80
-R:X:am_11_._september_2001:the_september_11_,_2001	28
-R:X:zu:for	-5
-R:X:zu:in	-7
-R:X:zu:to	23
-R:X:taliban:taliban	-251.5
-RBS:<r>_taliban	-223.5
-RBS:taliban_</r>	-157.5
-R:X:zu:with	-6
-R:X:verzweifelten:desperate	28.5
-RBT:<r>_taliban	-205.5
-RBT:<r>_desperate	28.5
-RBT:taliban_</r>	-107
-RBT:desperate_</r>	28.5
-R:X:taliban_NX:taliban_N1	28
-R:X:verzweifelten:verzweifelten	-28.5
-RBS:taliban_X	157.5
-R:X:NX_zu:to_N1	-229
-RBT:<r>_verzweifelten	-28.5
-R:X:den_taliban:the_taliban	144.5
-RBT:verzweifelten_</r>	-28.5
-RBS:den_taliban	223.5
-RBT:the_taliban	144.5
-R:X:NX_zu:N1_for	-152
-R:X:NX_zu:N1_in	-6
-R:X:NX_zu:N1_is	251
-R:X:NX_zu:N1_of	-49.5
-RBS:<r>_dem	22
-RBT:<r>_its	458
-RBT:its_</r>	337
-R:X:NX_den_taliban:N1_taliban	-50.5
-R:X:NX_den_taliban_NX:N1_taliban_N2	-2.5
-R:X:NX_den_taliban_NX:N2_N1_taliban	132
-R:X:erklärte:declared	-8
-RBS:<r>_erklärte	-185.5
-RBS:erklärte_</r>	-124.5
-RBT:<r>_declaring	-9
-R:X:erklärte:erklärte	-116.5
-RBT:<r>_erklärte	-116.5
-RBT:erklärte_</r>	-116.5
-R:X:erklärte_NX:declared_N1	-52
-RBS:erklärte_X	-61
-RBS:jener_</r>	-62.5
-R:X:erklärte_NX:declaring_N1	-9
-RBS:erklärte_,	185.5
-R:X:NX_jener:N1_of	-62.5
-R:X:dem_NX:the_N1	22
-R:X:verkaufen:sell	-153
-RBS:<r>_verkaufen	-153
-RBS:verkaufen_</r>	-140.5
-RBT:sell_</r>	-153
-RBS:bereit_</r>	86
-RBS:zu_verkaufen	153
-RBS:<r>_bemühen	-2.5
-R:X:bereit:bereit	86
-RBT:<r>_bereit	86
-RBT:bereit_</r>	86
-R:X:bereit_NX:ready_N1	-31
-RBS:bereit_X	-86
-R:X:bereit_NX:N1_ready	-55
-RBS:X_zum	30
-R:X:bemühen:bemühen	-2.5
-R:X:NX_erklärte_,:N1_,	110
-RBT:<r>_bemühen	-2.5
-RBS:X_erklärte	185.5
-RBT:bemühen_</r>	-2.5
-R:X:NX_erklärte_,_NX:N1_,_N2	75.5
-RBS:in_X	22
-RBS:<r>_sich	-17.5
-R:X:NX_zu_verkaufen:sell_N1	12.5
-RBS:sich_</r>	-17.5
-R:X:NX_zum_NX:N2_to_further_N1	30
-RBS:<r>_das	45
-RBS:das_</r>	2.5
-RBT:to_further	30
-RBT:<r>_it	-381
-RBT:it_</r>	3
-RBT:<r>_so	172.5
-RBT:so_</r>	-74.5
-RBT:<r>_this	9.5
-RBT:this_</r>	-11.5
-RBS:X_dem	-22
-R:X:das_NX:a_growing_N1	77
-RBS:das_X	-2.5
-RBT:a_growing	-41
-R:X:das_NX:be_N1	169
-R:X:das_NX:its_N1	-95
-R:X:das_NX:so_N1	-38
-RBS:X_sein	91.5
-R:X:das_NX:the_N1	-80
 done
 
 ---
 Best iteration: 2 [SCORE 'stupid_bleu'=0.37119].
-This took 0.6 min.
+This took 0.55 min.
-- 
cgit v1.2.3


From f67d074917d61b0f255dab5ae6adf5781430c9fd Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>
Date: Sun, 17 Mar 2013 23:26:24 -0400
Subject: fix possible utf8 bug

---
 corpus/lowercase.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/corpus/lowercase.pl b/corpus/lowercase.pl
index 688e493b..9fd91dac 100755
--- a/corpus/lowercase.pl
+++ b/corpus/lowercase.pl
@@ -2,7 +2,7 @@
 use strict;
 binmode(STDIN,":utf8");
 binmode(STDOUT,":utf8");
-while(<>) {
+while(<STDIN>) {
   $_ = lc $_;
   print;
 }
-- 
cgit v1.2.3


From 8acded145b7f23b2c57a6ad93487f727b8a19b3b Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@Chriss-MacBook-Air.local>
Date: Wed, 20 Mar 2013 12:24:01 -0400
Subject: n-gram word class features

---
 decoder/ff_ngrams.cc | 68 ++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 63 insertions(+), 5 deletions(-)

diff --git a/decoder/ff_ngrams.cc b/decoder/ff_ngrams.cc
index 9c13fdbb..d337b28b 100644
--- a/decoder/ff_ngrams.cc
+++ b/decoder/ff_ngrams.cc
@@ -60,7 +60,7 @@ namespace {
   }
 }
 
-static bool ParseArgs(string const& in, bool* explicit_markers, unsigned* order, vector<string>& prefixes, string& target_separator) {
+static bool ParseArgs(string const& in, bool* explicit_markers, unsigned* order, vector<string>& prefixes, string& target_separator, string* cluster_file) {
   vector<string> const& argv=SplitOnWhitespace(in);
   *explicit_markers = false;
   *order = 3;
@@ -103,6 +103,10 @@ static bool ParseArgs(string const& in, bool* explicit_markers, unsigned* order,
 	LMSPEC_NEXTARG;
 	prefixes[5] = *i;
 	break;
+      case 'c':
+        LMSPEC_NEXTARG;
+        *cluster_file = *i;
+        break;
       case 'S':
 	LMSPEC_NEXTARG;
 	target_separator = *i;
@@ -124,6 +128,7 @@ usage:
 
        << "NgramFeatures Usage: \n"			     
        << " feature_function=NgramFeatures filename.lm [-x] [-o <order>] \n"
+       << " [-c <cluster-file>]\n"
        << " [-U <unigram-prefix>] [-B <bigram-prefix>][-T <trigram-prefix>]\n"
        << " [-4 <4-gram-prefix>] [-5 <5-gram-prefix>] [-S <separator>]\n\n" 
     
@@ -203,6 +208,12 @@ class NgramDetectorImpl {
     SetFlag(flag, HAS_FULL_CONTEXT, state);
   }
 
+  WordID MapToClusterIfNecessary(WordID w) const {
+    if (cluster_map.size() == 0) return w;
+    if (w >= cluster_map.size()) return kCDEC_UNK;
+    return cluster_map[w];
+  }
+
   void FireFeatures(const State<5>& state, WordID cur, SparseVector<double>* feats) {
     FidTree* ft = &fidroot_;
     int n = 0;
@@ -285,7 +296,7 @@ class NgramDetectorImpl {
           context_complete = true;
         }
       } else {   // handle terminal
-        const WordID cur_word = e[j];
+        const WordID cur_word = MapToClusterIfNecessary(e[j]);
         SparseVector<double> p;
         if (cur_word == kSOS_) {
           state = BeginSentenceState();
@@ -348,9 +359,52 @@ class NgramDetectorImpl {
     }
   }
 
+  void ReadClusterFile(const string& clusters) {
+    ReadFile rf(clusters);
+    istream& in = *rf.stream();
+    string line;
+    int lc = 0;
+    string cluster;
+    string word;
+    while(getline(in, line)) {
+      ++lc;
+      if (line.size() == 0) continue;
+      if (line[0] == '#') continue;
+      unsigned cend = 1;
+      while((line[cend] != ' ' && line[cend] != '\t') && cend < line.size()) {
+        ++cend;
+      }
+      if (cend == line.size()) {
+        cerr << "Line " << lc << " in " << clusters << " malformed: " << line << endl;
+        abort();
+      }
+      unsigned wbeg = cend + 1;
+      while((line[wbeg] == ' ' || line[wbeg] == '\t') && wbeg < line.size()) {
+        ++wbeg;
+      }
+      if (wbeg == line.size()) {
+        cerr << "Line " << lc << " in " << clusters << " malformed: " << line << endl;
+        abort();
+      }
+      unsigned wend = wbeg + 1;
+      while((line[wend] != ' ' && line[wend] != '\t') && wend < line.size()) {
+        ++wend;
+      }
+      const WordID clusterid = TD::Convert(line.substr(0, cend));
+      const WordID wordid = TD::Convert(line.substr(wbeg, wend - wbeg));
+      if (wordid >= cluster_map.size())
+        cluster_map.resize(wordid + 10, kCDEC_UNK);
+      cluster_map[wordid] = clusterid;
+    }
+    cluster_map[kSOS_] = kSOS_;
+    cluster_map[kEOS_] = kEOS_;
+  }
+
+  vector<WordID> cluster_map;
+
  public:
   explicit NgramDetectorImpl(bool explicit_markers, unsigned order,
-			     vector<string>& prefixes, string& target_separator) :
+			     vector<string>& prefixes, string& target_separator, const string& clusters) :
       kCDEC_UNK(TD::Convert("<unk>")) ,
       add_sos_eos_(!explicit_markers) {
     order_ = order;
@@ -369,6 +423,9 @@ class NgramDetectorImpl {
     dummy_rule_.reset(new TRule("[DUMMY] ||| [BOS] [DUMMY] ||| [1] [2] </s> ||| X=0"));
     kSOS_ = TD::Convert("<s>");
     kEOS_ = TD::Convert("</s>");
+
+    if (clusters.size())
+      ReadClusterFile(clusters);
   }
 
   ~NgramDetectorImpl() {
@@ -409,9 +466,10 @@ NgramDetector::NgramDetector(const string& param) {
   vector<string> prefixes;
   bool explicit_markers = false;
   unsigned order = 3;
-  ParseArgs(param, &explicit_markers, &order, prefixes, target_separator);
+  string clusters;
+  ParseArgs(param, &explicit_markers, &order, prefixes, target_separator, &clusters);
   pimpl_ = new NgramDetectorImpl(explicit_markers, order, prefixes, 
-				 target_separator);
+				 target_separator, clusters);
   SetStateSize(pimpl_->ReserveStateSize());
 }
 
-- 
cgit v1.2.3


From a931d2df4bad5ecc220b62874fb63dc3b8d00ee9 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@Chriss-MacBook-Air.local>
Date: Wed, 20 Mar 2013 12:56:46 -0400
Subject: switch to new score interface for mira

---
 training/mira/kbest_mira.cc | 41 +++++++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/training/mira/kbest_mira.cc b/training/mira/kbest_mira.cc
index 8b7993dd..bcb261c9 100644
--- a/training/mira/kbest_mira.cc
+++ b/training/mira/kbest_mira.cc
@@ -8,9 +8,11 @@
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 
+#include "stringlib.h"
 #include "hg_sampler.h"
 #include "sentence_metadata.h"
-#include "scorer.h"
+#include "ns.h"
+#include "ns_docscorer.h"
 #include "verbose.h"
 #include "viterbi.h"
 #include "hg.h"
@@ -91,8 +93,9 @@ struct GoodBadOracle {
 };
 
 struct TrainingObserver : public DecoderObserver {
-  TrainingObserver(const int k, const DocScorer& d, bool sf, vector<GoodBadOracle>* o) : ds(d), oracles(*o), kbest_size(k), sample_forest(sf) {}
-  const DocScorer& ds;
+  TrainingObserver(const int k, const DocumentScorer& d, const EvaluationMetric& m, bool sf, vector<GoodBadOracle>* o) : ds(d), metric(m), oracles(*o), kbest_size(k), sample_forest(sf) {}
+  const DocumentScorer& ds;
+  const EvaluationMetric& metric;
   vector<GoodBadOracle>& oracles;
   std::tr1::shared_ptr<HypothesisInfo> cur_best;
   const int kbest_size;
@@ -121,13 +124,16 @@ struct TrainingObserver : public DecoderObserver {
     if (sample_forest) {
       vector<WordID> cur_prediction;
       ViterbiESentence(forest, &cur_prediction);
-      float sentscore = ds[sent_id]->ScoreCandidate(cur_prediction)->ComputeScore();
+      SufficientStats sstats;
+      ds[sent_id]->Evaluate(cur_prediction, &sstats);
+      float sentscore = metric.ComputeScore(sstats);
       cur_best = MakeHypothesisInfo(ViterbiFeatures(forest), sentscore);
 
       vector<HypergraphSampler::Hypothesis> samples;
       HypergraphSampler::sample_hypotheses(forest, kbest_size, &*rng, &samples);
       for (unsigned i = 0; i < samples.size(); ++i) {
-        sentscore = ds[sent_id]->ScoreCandidate(samples[i].words)->ComputeScore();
+        ds[sent_id]->Evaluate(samples[i].words, &sstats);
+        float sentscore = metric.ComputeScore(sstats);
         if (invert_score) sentscore *= -1.0;
         if (!cur_good || sentscore > cur_good->mt_metric)
           cur_good = MakeHypothesisInfo(samples[i].fmap, sentscore);
@@ -136,11 +142,13 @@ struct TrainingObserver : public DecoderObserver {
       }
     } else {
       KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(forest, kbest_size);
+      SufficientStats sstats;
       for (int i = 0; i < kbest_size; ++i) {
         const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
           kbest.LazyKthBest(forest.nodes_.size() - 1, i);
         if (!d) break;
-        float sentscore = ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore();
+        ds[sent_id]->Evaluate(d->yield, &sstats);
+        float sentscore = metric.ComputeScore(sstats);
         if (invert_score) sentscore *= -1.0;
         // cerr << TD::GetString(d->yield) << " ||| " << d->score << " ||| " << sentscore << endl;
         if (i == 0)
@@ -192,15 +200,20 @@ int main(int argc, char** argv) {
   }
   vector<string> corpus;
   ReadTrainingCorpus(conf["source"].as<string>(), &corpus);
-  const string metric_name = conf["mt_metric"].as<string>();
-  ScoreType type = ScoreTypeFromString(metric_name);
-  if (type == TER) {
-    invert_score = true;
-  } else {
-    invert_score = false;
+
+  string metric_name = UppercaseString(conf["evaluation_metric"].as<string>());
+  if (metric_name == "COMBI") {
+    cerr << "WARNING: 'combi' metric is no longer supported, switching to 'COMB:TER=-0.5;IBM_BLEU=0.5'\n";
+    metric_name = "COMB:TER=-0.5;IBM_BLEU=0.5";
+  } else if (metric_name == "BLEU") {
+    cerr << "WARNING: 'BLEU' is ambiguous, assuming 'IBM_BLEU'\n";
+    metric_name = "IBM_BLEU";
   }
-  DocScorer ds(type, conf["reference"].as<vector<string> >(), "");
+  EvaluationMetric* metric = EvaluationMetric::Instance(metric_name);
+  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
   cerr << "Loaded " << ds.size() << " references for scoring with " << metric_name << endl;
+  invert_score = metric->IsErrorMetric();
+
   if (ds.size() != corpus.size()) {
     cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n";
     return 1;
@@ -221,7 +234,7 @@ int main(int argc, char** argv) {
   assert(corpus.size() > 0);
   vector<GoodBadOracle> oracles(corpus.size());
 
-  TrainingObserver observer(conf["k_best_size"].as<int>(), ds, sample_forest, &oracles);
+  TrainingObserver observer(conf["k_best_size"].as<int>(), ds, *metric, sample_forest, &oracles);
   int cur_sent = 0;
   int lcount = 0;
   int normalizer = 0;
-- 
cgit v1.2.3


From da52ee6fa4af02b811b8b558ec8437384d2ba5bd Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@Chriss-MacBook-Air.local>
Date: Wed, 20 Mar 2013 13:00:22 -0400
Subject: bug fix

---
 training/mira/kbest_mira.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/training/mira/kbest_mira.cc b/training/mira/kbest_mira.cc
index bcb261c9..d59b4224 100644
--- a/training/mira/kbest_mira.cc
+++ b/training/mira/kbest_mira.cc
@@ -201,7 +201,7 @@ int main(int argc, char** argv) {
   vector<string> corpus;
   ReadTrainingCorpus(conf["source"].as<string>(), &corpus);
 
-  string metric_name = UppercaseString(conf["evaluation_metric"].as<string>());
+  string metric_name = UppercaseString(conf["mt_metric"].as<string>());
   if (metric_name == "COMBI") {
     cerr << "WARNING: 'combi' metric is no longer supported, switching to 'COMB:TER=-0.5;IBM_BLEU=0.5'\n";
     metric_name = "COMB:TER=-0.5;IBM_BLEU=0.5";
-- 
cgit v1.2.3


From 4db3a0164dcd97d7540ff4239548df39398fd23c Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>
Date: Sat, 23 Mar 2013 23:09:37 -0400
Subject: fix rules features

---
 decoder/ff_rules.cc | 20 ++++++++++++++++----
 decoder/ff_rules.h  |  1 +
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/decoder/ff_rules.cc b/decoder/ff_rules.cc
index 6716d3da..410e083c 100644
--- a/decoder/ff_rules.cc
+++ b/decoder/ff_rules.cc
@@ -107,7 +107,12 @@ void RuleSourceBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& sme
   (*features) += it->second;
 }
 
-RuleTargetBigramFeatures::RuleTargetBigramFeatures(const std::string& param) {
+RuleTargetBigramFeatures::RuleTargetBigramFeatures(const std::string& param) : inds(1000) {
+  for (unsigned i = 0; i < inds.size(); ++i) {
+    ostringstream os;
+    os << (i + 1);
+    inds[i] = os.str();
+  }
 }
 
 void RuleTargetBigramFeatures::PrepareForInput(const SentenceMetadata& smeta) {
@@ -126,11 +131,18 @@ void RuleTargetBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& sme
     it = rule2_feats_.insert(make_pair(&rule, SparseVector<double>())).first;
     SparseVector<double>& f = it->second;
     string prev = "<r>";
+    vector<WordID> nt_types(rule.Arity());
+    unsigned ntc = 0;
+    for (int i = 0; i < rule.f_.size(); ++i)
+      if (rule.f_[i] < 0) nt_types[ntc++] = -rule.f_[i];
     for (int i = 0; i < rule.e_.size(); ++i) {
       WordID w = rule.e_[i];
-      if (w < 0) w = -w;
-      if (w == 0) return;
-      const string& cur = TD::Convert(w);
+      string cur;
+      if (w > 0) {
+        cur = TD::Convert(w);
+      } else {
+        cur = TD::Convert(nt_types[-w]) + inds[-w];
+      }
       ostringstream os;
       os << "RBT:" << prev << '_' << cur;
       const int fid = FD::Convert(Escape(os.str()));
diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h
index b100ec34..f210dc65 100644
--- a/decoder/ff_rules.h
+++ b/decoder/ff_rules.h
@@ -51,6 +51,7 @@ class RuleTargetBigramFeatures : public FeatureFunction {
                                      void* context) const;
   virtual void PrepareForInput(const SentenceMetadata& smeta);
  private:
+  std::vector<std::string> inds;
   mutable std::map<const TRule*, SparseVector<double> > rule2_feats_;
 };
 
-- 
cgit v1.2.3


From b203f8c5dc8cff1b9c9c2073832b248fcad0765a Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>
Date: Tue, 26 Mar 2013 10:44:45 -0400
Subject: swahili abbreviations

---
 corpus/support/token_list | 152 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 152 insertions(+)

diff --git a/corpus/support/token_list b/corpus/support/token_list
index 366cd7ff..43dd80d9 100644
--- a/corpus/support/token_list
+++ b/corpus/support/token_list
@@ -294,3 +294,155 @@ Z.
 т.н.
 т.ч.
 н.э.
+# Swahili
+A.D.
+Afr.
+A.G.
+agh.
+A.H.
+A.M.
+a.s.
+B.A.
+B.C.
+Bi.
+B.J.
+B.K.
+B.O.M.
+Brig.
+Bro.
+bt.
+bw.
+Bw.
+Cap.
+C.C.
+cCM.
+C.I.A.
+cit.
+C.M.S.
+Co.
+Corp.
+C.S.Sp.
+C.W.
+D.C.
+Dk.
+Dkt.
+Dk.B.
+Dr.
+E.C.
+e.g.
+E.M.
+E.n.
+etc.
+Feb.
+F.F.U.
+F.M.
+Fr.
+F.W.
+I.C.O.
+i.e.
+I.L.C.
+Inc.
+Jan.
+J.F.
+Jr.
+J.S.
+J.V.W.A.
+K.A.R.
+K.A.U.
+K.C.M.C.
+K.k.
+K.K.
+k.m.
+km.
+K.m.
+K.N.C.U.
+K.O.
+K.S.
+Ksh.
+kt.
+kumb.
+k.v.
+kv.
+L.G.
+ltd.
+Ltd.
+M.A.
+M.D.
+mf.
+Mh.
+Mhe.
+mil.
+m.m.
+M.m.
+Mm.
+M.M.
+Mr.
+Mrs.
+M.S.
+Mt.
+Mw.
+M.W.
+Mwl.
+na.
+Na.
+N.F.
+N.J.
+n.k.
+nk.
+n.k.w.
+N.N.
+Nov.
+O.C.D.
+op.
+P.C.
+Phd.
+Ph.D.
+P.J.
+P.o.
+P.O.
+P.O.P.
+P.P.F.
+Prof.
+P.s.
+P.S.
+Q.C.
+Rd.
+s.a.w.
+S.A.W.
+S.D.
+Sept.
+sh.
+Sh.
+SH.
+shs.
+Shs.
+S.J.
+S.L.
+S.L.P.
+S.s.
+S.S.
+St.
+s.w.
+s.w.T.
+taz.
+Taz.
+T.C.
+T.E.C.
+T.L.P.
+T.O.H.S.
+Tsh.
+T.V.
+tz.
+uk.
+Uk.
+U.M.C.A.
+U.N.
+U.S.
+Ush.
+U.W.T.
+Viii.
+Vol.
+V.T.C.
+W.H.
+yamb.
+Y.M.C.A.
-- 
cgit v1.2.3


From d26d05bb60d0b9687c942a74a0f59cef632f9bf4 Mon Sep 17 00:00:00 2001
From: Avneesh Saluja <asaluja@gmail.com>
Date: Thu, 28 Mar 2013 18:57:58 -0700
Subject: re-organized latent SVM (sub-dir of training now)

---
 latent_svm/Makefile.am   |   6 -
 latent_svm/latent_svm.cc | 412 -----------------------------------------------
 2 files changed, 418 deletions(-)
 delete mode 100644 latent_svm/Makefile.am
 delete mode 100644 latent_svm/latent_svm.cc

diff --git a/latent_svm/Makefile.am b/latent_svm/Makefile.am
deleted file mode 100644
index 673b9159..00000000
--- a/latent_svm/Makefile.am
+++ /dev/null
@@ -1,6 +0,0 @@
-bin_PROGRAMS = latent_svm
-
-latent_svm_SOURCES = latent_svm.cc
-latent_svm_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/latent_svm/latent_svm.cc b/latent_svm/latent_svm.cc
deleted file mode 100644
index ab9c1d5d..00000000
--- a/latent_svm/latent_svm.cc
+++ /dev/null
@@ -1,412 +0,0 @@
-/*
-Points to note regarding variable names:
-total_loss and prev_loss actually refer not to loss, but the metric (usually BLEU)
-*/
-#include <sstream>
-#include <iostream>
-#include <vector>
-#include <cassert>
-#include <cmath>
-
-//boost libraries
-#include <boost/shared_ptr.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-//cdec libraries
-#include "config.h"
-#include "hg_sampler.h"
-#include "sentence_metadata.h"
-#include "scorer.h"
-#include "verbose.h"
-#include "viterbi.h"
-#include "hg.h"
-#include "prob.h"
-#include "kbest.h"
-#include "ff_register.h"
-#include "decoder.h"
-#include "filelib.h"
-#include "fdict.h"
-#include "weights.h"
-#include "sparse_vector.h"
-#include "sampler.h"
-
-using namespace std;
-using boost::shared_ptr;
-namespace po = boost::program_options;
-
-bool invert_score; 
-boost::shared_ptr<MT19937> rng; //random seed ptr
-
-void RandomPermutation(int len, vector<int>* p_ids) {
-  vector<int>& ids = *p_ids;
-  ids.resize(len);
-  for (int i = 0; i < len; ++i) ids[i] = i;
-  for (int i = len; i > 0; --i) {
-    int j = rng->next() * i;
-    if (j == i) i--;
-    swap(ids[i-1], ids[j]);
-  }  
-}
-
-bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("weights,w",po::value<string>(),"[REQD] Input feature weights file")
-        ("input,i",po::value<string>(),"[REQD] Input source file for development set")
-        ("passes,p", po::value<int>()->default_value(15), "Number of passes through the training data")
-        ("weights_write_interval,n", po::value<int>()->default_value(1000), "Number of lines between writing out weights")
-        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation(s) (tokenized text file)")
-        ("mt_metric,m",po::value<string>()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)")
-        ("regularizer_strength,C", po::value<double>()->default_value(0.01), "regularization strength")
-        ("mt_metric_scale,s", po::value<double>()->default_value(1.0), "Cost function is -mt_metric_scale*BLEU")
-        ("costaug_log_bleu,l", "Flag converts BLEU to log space. Cost function is thus -mt_metric_scale*log(BLEU). Not on by default")
-        ("average,A", "Average the weights (this is a weighted average due to the scaling factor)")
-        ("mu,u", po::value<double>()->default_value(0.0), "weight (between 0 and 1) to scale model score by for oracle selection")
-        ("stepsize_param,a", po::value<double>()->default_value(0.01), "Stepsize parameter, during optimization")
-        ("stepsize_reduce,t", "Divide step size by sqrt(number of examples seen so far), as per Ratliff et al., 2007")
-	("metric_threshold,T", po::value<double>()->default_value(0.0), "Threshold for diff between oracle BLEU and cost-aug BLEU for updating the weights")
-	("check_positive,P", "Check that the loss is positive before updating")
-        ("k_best_size,k", po::value<int>()->default_value(250), "Size of hypothesis list to search for oracles")
-        ("best_ever,b", "Keep track of the best hypothesis we've ever seen (metric score), and use that as the reference")
-        ("random_seed,S", po::value<uint32_t>(), "Random seed (if not specified, /dev/random will be used)")
-        ("decoder_config,c",po::value<string>(),"Decoder configuration file");
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config", po::value<string>(), "Configuration file")
-        ("help,h", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help") || !conf->count("weights") || !conf->count("input") || !conf->count("decoder_config") || !conf->count("reference")) {
-    cerr << dcmdline_options << endl;
-    return false;
-  }
-  return true;
-}
-
-double scaling_trick = 1; // see http://blog.smola.org/post/940672544/fast-quadratic-regularization-for-online-learning
-/*computes and returns cost augmented score for negative example selection*/
-double cost_augmented_score(const LogVal<double> model_score, const double mt_metric_score, const double mt_metric_scale, const bool logbleu) {
-  if(logbleu) {
-    if(mt_metric_score != 0)
-      // NOTE: log(model_score) is just the model score feature weights * features
-      return log(model_score) * scaling_trick + (- mt_metric_scale * log(mt_metric_score));
-    else
-      return -1000000;
-  }
-  // NOTE: log(model_score) is just the model score feature weights * features
-  return log(model_score) * scaling_trick + (- mt_metric_scale * mt_metric_score);
-}
-
-/*computes and returns mu score, for oracle selection*/
-double muscore(const vector<weight_t>& feature_weights, const SparseVector<double>& feature_values, const double mt_metric_score, const double mu, const bool logbleu) {
-  if(logbleu) {
-    if(mt_metric_score != 0)
-      return feature_values.dot(feature_weights) * mu + (1 - mu) * log(mt_metric_score);
-    else
-      return feature_values.dot(feature_weights) * mu + (1 - mu) * (-1000000);  // log(0) is -inf
-  }
-  return feature_values.dot(feature_weights) * mu + (1 - mu) * mt_metric_score;
-}
-
-static const double kMINUS_EPSILON = -1e-6;
-
-struct HypothesisInfo {
-  SparseVector<double> features;
-  double mt_metric_score;
-  // The model score changes when the feature weights change, so it is not stored here
-  // It must be recomputed every time
-};
-
-struct GoodOracle {
-  shared_ptr<HypothesisInfo> good;
-};
-
-struct TrainingObserver : public DecoderObserver {
-  TrainingObserver(const int k,
-                   const DocScorer& d,
-                   vector<GoodOracle>* o,
-                   const vector<weight_t>& feat_weights,
-                   const double metric_scale,
-                   const double Mu,
-                   const bool bestever,
-                   const bool LogBleu) : ds(d), feature_weights(feat_weights), oracles(*o), kbest_size(k), mt_metric_scale(metric_scale), mu(Mu), best_ever(bestever), log_bleu(LogBleu) {}
-  const DocScorer& ds;
-  const vector<weight_t>& feature_weights;
-  vector<GoodOracle>& oracles;
-  shared_ptr<HypothesisInfo> cur_best;
-  shared_ptr<HypothesisInfo> cur_costaug_best;
-  shared_ptr<HypothesisInfo> cur_ref; 
-  const int kbest_size;
-  const double mt_metric_scale;
-  const double mu;
-  const bool best_ever;
-  const bool log_bleu;
-
-  const HypothesisInfo& GetCurrentBestHypothesis() const {
-    return *cur_best;
-  }
-
-  const HypothesisInfo& GetCurrentCostAugmentedHypothesis() const {
-    return *cur_costaug_best;
-  }
-
-  const HypothesisInfo& GetCurrentReference() const {
-    return *cur_ref; 
-  }
-
-  virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) {
-    UpdateOracles(smeta.GetSentenceID(), *hg);
-  }
-
-  shared_ptr<HypothesisInfo> MakeHypothesisInfo(const SparseVector<double>& feats, const double metric) {
-    shared_ptr<HypothesisInfo> h(new HypothesisInfo);
-    h->features = feats;
-    h->mt_metric_score = metric;
-    return h;
-  }
-
-  void UpdateOracles(int sent_id, const Hypergraph& forest) {
-    //shared_ptr<HypothesisInfo>& cur_ref = oracles[sent_id].good;
-    cur_ref = oracles[sent_id].good; 
-    if(!best_ever)
-      cur_ref.reset();
-
-    KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(forest, kbest_size);
-    double costaug_best_score = 0;
-
-    for (int i = 0; i < kbest_size; ++i) {
-      const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
-        kbest.LazyKthBest(forest.nodes_.size() - 1, i);
-      if (!d) break;
-      double mt_metric_score = ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore(); //this might need to change!!
-      const SparseVector<double>& feature_vals = d->feature_values; 
-      double costaugmented_score = cost_augmented_score(d->score, mt_metric_score, mt_metric_scale, log_bleu); //note that d->score, i.e., model score, is passed in
-      if (i == 0) { //i.e., setting up cur_best to be model score highest, and initializing costaug_best
-        cur_best = MakeHypothesisInfo(feature_vals, mt_metric_score);
-        cur_costaug_best = cur_best;
-        costaug_best_score = costaugmented_score; 
-      }
-      if (costaugmented_score > costaug_best_score) {   // kbest_mira's cur_bad, i.e., "fear" derivation
-        cur_costaug_best = MakeHypothesisInfo(feature_vals, mt_metric_score);
-        costaug_best_score = costaugmented_score;
-      }
-      double cur_muscore = mt_metric_score;
-      if (!cur_ref)   // kbest_mira's cur_good, i.e., "hope" derivation
-        cur_ref =  MakeHypothesisInfo(feature_vals, cur_muscore);
-      else {
-          double cur_ref_muscore = cur_ref->mt_metric_score;
-          if(mu > 0) { //select oracle with mixture of model score and BLEU
-              cur_ref_muscore =  muscore(feature_weights, cur_ref->features, cur_ref->mt_metric_score, mu, log_bleu);
-              cur_muscore = muscore(feature_weights, d->feature_values, mt_metric_score, mu, log_bleu);
-          }
-          if (cur_muscore > cur_ref_muscore) //replace oracle
-            cur_ref = MakeHypothesisInfo(feature_vals, mt_metric_score);
-      }
-    }
-  }
-};
-
-void ReadTrainingCorpus(const string& fname, vector<string>* c) {
-  ReadFile rf(fname);
-  istream& in = *rf.stream();
-  string line;
-  while(in) {
-    getline(in, line);
-    if (!in) break;
-    c->push_back(line);
-  }
-}
-
-bool ApproxEqual(double a, double b) {
-  if (a == b) return true;
-  return (fabs(a-b)/fabs(b)) < 0.000001;
-}
-
-int main(int argc, char** argv) {
-  register_feature_functions();
-  SetSilent(true);  // turn off verbose decoder output
-
-  po::variables_map conf;
-  if (!InitCommandLine(argc, argv, &conf)) return 1;
-
-  if (conf.count("random_seed"))
-    rng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
-  else
-    rng.reset(new MT19937);
-
-  const bool best_ever = conf.count("best_ever") > 0;
-  vector<string> corpus;
-  ReadTrainingCorpus(conf["input"].as<string>(), &corpus);
-
-  const string metric_name = conf["mt_metric"].as<string>(); //set up scoring; this may need to be changed!!
-  
-  ScoreType type = ScoreTypeFromString(metric_name);
-  if (type == TER) {
-    invert_score = true;
-  } else {
-    invert_score = false;
-  } 
-  DocScorer ds(type, conf["reference"].as<vector<string> >(), "");
-  cerr << "Loaded " << ds.size() << " references for scoring with " << metric_name << endl;
-  if (ds.size() != corpus.size()) {
-    cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n";
-    return 1;
-  }
-
-  ReadFile ini_rf(conf["decoder_config"].as<string>());
-  Decoder decoder(ini_rf.stream());
-
-  // load initial weights
-  vector<weight_t>& decoder_weights = decoder.CurrentWeightVector(); //equivalent to "dense_weights" vector in kbest_mira.cc
-  SparseVector<weight_t> sparse_weights; //equivaelnt to  kbest_mira.cc "lambdas"
-  Weights::InitFromFile(conf["weights"].as<string>(), &decoder_weights);
-  Weights::InitSparseVector(decoder_weights, &sparse_weights);
-
-  //initializing other algorithm and output parameters
-  const double c = conf["regularizer_strength"].as<double>();
-  const int weights_write_interval = conf["weights_write_interval"].as<int>();
-  const double mt_metric_scale = conf["mt_metric_scale"].as<double>();
-  const double mu = conf["mu"].as<double>();
-  const double metric_threshold = conf["metric_threshold"].as<double>();
-  const double stepsize_param = conf["stepsize_param"].as<double>(); //step size in structured SGD optimization step
-  const bool stepsize_reduce = conf.count("stepsize_reduce") > 0; 
-  const bool costaug_log_bleu = conf.count("costaug_log_bleu") > 0;
-  const bool average = conf.count("average") > 0;
-  const bool checkpositive = conf.count("check_positive") > 0;
-
-  assert(corpus.size() > 0);
-  vector<GoodOracle> oracles(corpus.size());
-  TrainingObserver observer(conf["k_best_size"].as<int>(),  // kbest size
-                            ds,                             // doc scorer
-                            &oracles,
-                            decoder_weights,
-                            mt_metric_scale,
-                            mu,
-                            best_ever,
-                            costaug_log_bleu);
-  int cur_sent = 0;
-  int line_count = 0;
-  int normalizer = 0; 
-  double total_loss = 0;
-  double prev_loss = 0;
-  int dots = 0;             // progess bar
-  int cur_pass = 0;
-  SparseVector<double> tot;
-  tot += sparse_weights; //add initial weights to total
-  normalizer++; //add 1 to normalizer
-  int max_iteration = conf["passes"].as<int>();
-  string msg = "# LatentSVM tuned weights";
-  vector<int> order;
-  int interval_counter = 0;
-  RandomPermutation(corpus.size(), &order); //shuffle corpus
-  while (line_count <= max_iteration * corpus.size()) { //loop over all (passes * num sentences) examples
-    //if ((interval_counter * 40 / weights_write_interval) > dots) { ++dots; cerr << '.'; } //check this
-    if ((cur_sent * 40 / corpus.size()) > dots) { ++dots; cerr << '.';}
-    if (interval_counter == weights_write_interval) { //i.e., we need to write out weights
-      sparse_weights *= scaling_trick;
-      tot *= scaling_trick;
-      scaling_trick = 1;
-      cerr << " [SENTENCE NUMBER= " << cur_sent << "\n";
-      cerr << " [AVG METRIC LAST INTERVAL =" << ((total_loss - prev_loss) / weights_write_interval) << "]\n";
-      cerr << " [AVG METRIC THIS PASS THUS FAR =" << (total_loss / cur_sent) << "]\n";
-      cerr << " [TOTAL LOSS: =" << total_loss << "\n";
-      Weights::ShowLargestFeatures(decoder_weights);
-      //dots = 0;
-      interval_counter = 0;
-      prev_loss = total_loss;
-      if (average){
-	SparseVector<double> x = tot;
-	x /= normalizer;
-	ostringstream sa;
-	sa << "weights.latentsvm-" << line_count/weights_write_interval << "-avg.gz";
-	x.init_vector(&decoder_weights);
-	Weights::WriteToFile(sa.str(), decoder_weights, true, &msg); 
-      }
-      else {
-	ostringstream os;
-	os << "weights.latentsvm-" << line_count/weights_write_interval << ".gz";
-	sparse_weights.init_vector(&decoder_weights);
-	Weights::WriteToFile(os.str(), decoder_weights, true, &msg);
-      }
-    }
-    if (corpus.size() == cur_sent) { //i.e., finished a pass
-      //cerr << " [AVG METRIC LAST PASS=" << (document_metric_score / corpus.size()) << "]\n";
-      cerr << " [AVG METRIC LAST PASS=" << (total_loss / corpus.size()) << "]\n";
-      cerr << " TOTAL LOSS: " << total_loss << "\n";
-      Weights::ShowLargestFeatures(decoder_weights);
-      cur_sent = 0;
-      total_loss = 0;
-      dots = 0;
-      if(average) {
-        SparseVector<double> x = tot; 
-        x /= normalizer;
-        ostringstream sa;
-        sa << "weights.latentsvm-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "-avg.gz";
-        x.init_vector(&decoder_weights);
-        Weights::WriteToFile(sa.str(), decoder_weights, true, &msg);
-      }
-      else {
-	ostringstream os;
-	os << "weights.latentsvm-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << ".gz";
-	Weights::WriteToFile(os.str(), decoder_weights, true, &msg);	
-      }
-      cur_pass++;
-      RandomPermutation(corpus.size(), &order);
-    }
-    if (cur_sent == 0) { //i.e., starting a new pass
-      cerr << "PASS " << (line_count / corpus.size() + 1) << endl;
-    }
-    sparse_weights.init_vector(&decoder_weights);   // copy sparse_weights to the decoder weights
-    decoder.SetId(order[cur_sent]); //assign current sentence
-    decoder.Decode(corpus[order[cur_sent]], &observer);  // decode/update oracles
-
-    const HypothesisInfo& cur_best = observer.GetCurrentBestHypothesis(); //model score best
-    const HypothesisInfo& cur_costaug = observer.GetCurrentCostAugmentedHypothesis(); //(model + cost) best; cost = -metric_scale*log(BLEU) or -metric_scale*BLEU
-    //const HypothesisInfo& cur_ref = *oracles[order[cur_sent]].good; //this oracle-best line only picks based on BLEU
-    const HypothesisInfo& cur_ref = observer.GetCurrentReference();  //if mu > 0, this mu-mixed oracle will be picked; otherwise, only on BLEU
-    total_loss += cur_best.mt_metric_score; 
-
-    double step_size = stepsize_param;
-    if (stepsize_reduce){       // w_{t+1} = w_t - stepsize_t * grad(Loss) 
-        step_size  /= (sqrt(cur_sent+1.0)); 
-    }
-    //actual update step - compute gradient, and modify sparse_weights
-    if(cur_ref.mt_metric_score - cur_costaug.mt_metric_score > metric_threshold) {
-      const double loss = (cur_costaug.features.dot(decoder_weights) - cur_ref.features.dot(decoder_weights)) * scaling_trick + mt_metric_scale * (cur_ref.mt_metric_score - cur_costaug.mt_metric_score);
-      if (!checkpositive || loss > 0.0) { //can update either all the time if check positive is off, or only when loss > 0 if it's on
-	sparse_weights -= cur_costaug.features * step_size / ((1.0-2.0*step_size*c)*scaling_trick);    // cost augmented hyp orig -
-	sparse_weights += cur_ref.features * step_size / ((1.0-2.0*step_size*c)*scaling_trick);        // ref orig +
-      }
-    }
-    scaling_trick *= (1.0 - 2.0 * step_size * c);
-
-    tot += sparse_weights; //for averaging purposes
-    normalizer++; //for averaging purposes
-    line_count++;
-    interval_counter++;
-    cur_sent++;
-  }
-  cerr << endl;
-  if(average) {
-    tot /= normalizer;
-    tot.init_vector(decoder_weights);
-    msg = "# Latent SSVM tuned weights (averaged vector)";
-    Weights::WriteToFile("weights.latentsvm-final-avg.gz", decoder_weights, true, &msg); 
-    cerr << "Optimization complete.\n" << "AVERAGED WEIGHTS: weights.latentsvm-final-avg.gz\n";
-  } else {
-    Weights::WriteToFile("weights.latentsvm-final.gz", decoder_weights, true, &msg);    
-    cerr << "Optimization complete.\n";
-  }
-  return 0;
-}
-
-- 
cgit v1.2.3


From 2e589c5b297e27a82729084991841d8ab1e1d336 Mon Sep 17 00:00:00 2001
From: Avneesh Saluja <asaluja@gmail.com>
Date: Thu, 28 Mar 2013 18:58:31 -0700
Subject: latent SVM

---
 training/latent_svm/Makefile.am   |   6 +
 training/latent_svm/latent_svm.cc | 412 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 418 insertions(+)
 create mode 100644 training/latent_svm/Makefile.am
 create mode 100644 training/latent_svm/latent_svm.cc

diff --git a/training/latent_svm/Makefile.am b/training/latent_svm/Makefile.am
new file mode 100644
index 00000000..673b9159
--- /dev/null
+++ b/training/latent_svm/Makefile.am
@@ -0,0 +1,6 @@
+bin_PROGRAMS = latent_svm
+
+latent_svm_SOURCES = latent_svm.cc
+latent_svm_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
+
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/training/latent_svm/latent_svm.cc b/training/latent_svm/latent_svm.cc
new file mode 100644
index 00000000..ab9c1d5d
--- /dev/null
+++ b/training/latent_svm/latent_svm.cc
@@ -0,0 +1,412 @@
+/*
+Points to note regarding variable names:
+total_loss and prev_loss actually refer not to loss, but the metric (usually BLEU)
+*/
+#include <sstream>
+#include <iostream>
+#include <vector>
+#include <cassert>
+#include <cmath>
+
+//boost libraries
+#include <boost/shared_ptr.hpp>
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+//cdec libraries
+#include "config.h"
+#include "hg_sampler.h"
+#include "sentence_metadata.h"
+#include "scorer.h"
+#include "verbose.h"
+#include "viterbi.h"
+#include "hg.h"
+#include "prob.h"
+#include "kbest.h"
+#include "ff_register.h"
+#include "decoder.h"
+#include "filelib.h"
+#include "fdict.h"
+#include "weights.h"
+#include "sparse_vector.h"
+#include "sampler.h"
+
+using namespace std;
+using boost::shared_ptr;
+namespace po = boost::program_options;
+
+bool invert_score; 
+boost::shared_ptr<MT19937> rng; //random seed ptr
+
+void RandomPermutation(int len, vector<int>* p_ids) {
+  vector<int>& ids = *p_ids;
+  ids.resize(len);
+  for (int i = 0; i < len; ++i) ids[i] = i;
+  for (int i = len; i > 0; --i) {
+    int j = rng->next() * i;
+    if (j == i) i--;
+    swap(ids[i-1], ids[j]);
+  }  
+}
+
+bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("weights,w",po::value<string>(),"[REQD] Input feature weights file")
+        ("input,i",po::value<string>(),"[REQD] Input source file for development set")
+        ("passes,p", po::value<int>()->default_value(15), "Number of passes through the training data")
+        ("weights_write_interval,n", po::value<int>()->default_value(1000), "Number of lines between writing out weights")
+        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation(s) (tokenized text file)")
+        ("mt_metric,m",po::value<string>()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)")
+        ("regularizer_strength,C", po::value<double>()->default_value(0.01), "regularization strength")
+        ("mt_metric_scale,s", po::value<double>()->default_value(1.0), "Cost function is -mt_metric_scale*BLEU")
+        ("costaug_log_bleu,l", "Flag converts BLEU to log space. Cost function is thus -mt_metric_scale*log(BLEU). Not on by default")
+        ("average,A", "Average the weights (this is a weighted average due to the scaling factor)")
+        ("mu,u", po::value<double>()->default_value(0.0), "weight (between 0 and 1) to scale model score by for oracle selection")
+        ("stepsize_param,a", po::value<double>()->default_value(0.01), "Stepsize parameter, during optimization")
+        ("stepsize_reduce,t", "Divide step size by sqrt(number of examples seen so far), as per Ratliff et al., 2007")
+	("metric_threshold,T", po::value<double>()->default_value(0.0), "Threshold for diff between oracle BLEU and cost-aug BLEU for updating the weights")
+	("check_positive,P", "Check that the loss is positive before updating")
+        ("k_best_size,k", po::value<int>()->default_value(250), "Size of hypothesis list to search for oracles")
+        ("best_ever,b", "Keep track of the best hypothesis we've ever seen (metric score), and use that as the reference")
+        ("random_seed,S", po::value<uint32_t>(), "Random seed (if not specified, /dev/random will be used)")
+        ("decoder_config,c",po::value<string>(),"Decoder configuration file");
+  po::options_description clo("Command line options");
+  clo.add_options()
+        ("config", po::value<string>(), "Configuration file")
+        ("help,h", "Print this help message and exit");
+  po::options_description dconfig_options, dcmdline_options;
+  dconfig_options.add(opts);
+  dcmdline_options.add(opts).add(clo);
+  
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  if (conf->count("config")) {
+    ifstream config((*conf)["config"].as<string>().c_str());
+    po::store(po::parse_config_file(config, dconfig_options), *conf);
+  }
+  po::notify(*conf);
+
+  if (conf->count("help") || !conf->count("weights") || !conf->count("input") || !conf->count("decoder_config") || !conf->count("reference")) {
+    cerr << dcmdline_options << endl;
+    return false;
+  }
+  return true;
+}
+
+double scaling_trick = 1; // see http://blog.smola.org/post/940672544/fast-quadratic-regularization-for-online-learning
+/*computes and returns cost augmented score for negative example selection*/
+double cost_augmented_score(const LogVal<double> model_score, const double mt_metric_score, const double mt_metric_scale, const bool logbleu) {
+  if(logbleu) {
+    if(mt_metric_score != 0)
+      // NOTE: log(model_score) is just the model score feature weights * features
+      return log(model_score) * scaling_trick + (- mt_metric_scale * log(mt_metric_score));
+    else
+      return -1000000;
+  }
+  // NOTE: log(model_score) is just the model score feature weights * features
+  return log(model_score) * scaling_trick + (- mt_metric_scale * mt_metric_score);
+}
+
+/*computes and returns mu score, for oracle selection*/
+double muscore(const vector<weight_t>& feature_weights, const SparseVector<double>& feature_values, const double mt_metric_score, const double mu, const bool logbleu) {
+  if(logbleu) {
+    if(mt_metric_score != 0)
+      return feature_values.dot(feature_weights) * mu + (1 - mu) * log(mt_metric_score);
+    else
+      return feature_values.dot(feature_weights) * mu + (1 - mu) * (-1000000);  // log(0) is -inf
+  }
+  return feature_values.dot(feature_weights) * mu + (1 - mu) * mt_metric_score;
+}
+
+static const double kMINUS_EPSILON = -1e-6;
+
+struct HypothesisInfo {
+  SparseVector<double> features;
+  double mt_metric_score;
+  // The model score changes when the feature weights change, so it is not stored here
+  // It must be recomputed every time
+};
+
+struct GoodOracle {
+  shared_ptr<HypothesisInfo> good;
+};
+
+struct TrainingObserver : public DecoderObserver {
+  TrainingObserver(const int k,
+                   const DocScorer& d,
+                   vector<GoodOracle>* o,
+                   const vector<weight_t>& feat_weights,
+                   const double metric_scale,
+                   const double Mu,
+                   const bool bestever,
+                   const bool LogBleu) : ds(d), feature_weights(feat_weights), oracles(*o), kbest_size(k), mt_metric_scale(metric_scale), mu(Mu), best_ever(bestever), log_bleu(LogBleu) {}
+  const DocScorer& ds;
+  const vector<weight_t>& feature_weights;
+  vector<GoodOracle>& oracles;
+  shared_ptr<HypothesisInfo> cur_best;
+  shared_ptr<HypothesisInfo> cur_costaug_best;
+  shared_ptr<HypothesisInfo> cur_ref; 
+  const int kbest_size;
+  const double mt_metric_scale;
+  const double mu;
+  const bool best_ever;
+  const bool log_bleu;
+
+  const HypothesisInfo& GetCurrentBestHypothesis() const {
+    return *cur_best;
+  }
+
+  const HypothesisInfo& GetCurrentCostAugmentedHypothesis() const {
+    return *cur_costaug_best;
+  }
+
+  const HypothesisInfo& GetCurrentReference() const {
+    return *cur_ref; 
+  }
+
+  virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) {
+    UpdateOracles(smeta.GetSentenceID(), *hg);
+  }
+
+  shared_ptr<HypothesisInfo> MakeHypothesisInfo(const SparseVector<double>& feats, const double metric) {
+    shared_ptr<HypothesisInfo> h(new HypothesisInfo);
+    h->features = feats;
+    h->mt_metric_score = metric;
+    return h;
+  }
+
+  void UpdateOracles(int sent_id, const Hypergraph& forest) {
+    //shared_ptr<HypothesisInfo>& cur_ref = oracles[sent_id].good;
+    cur_ref = oracles[sent_id].good; 
+    if(!best_ever)
+      cur_ref.reset();
+
+    KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(forest, kbest_size);
+    double costaug_best_score = 0;
+
+    for (int i = 0; i < kbest_size; ++i) {
+      const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
+        kbest.LazyKthBest(forest.nodes_.size() - 1, i);
+      if (!d) break;
+      double mt_metric_score = ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore(); //this might need to change!!
+      const SparseVector<double>& feature_vals = d->feature_values; 
+      double costaugmented_score = cost_augmented_score(d->score, mt_metric_score, mt_metric_scale, log_bleu); //note that d->score, i.e., model score, is passed in
+      if (i == 0) { //i.e., setting up cur_best to be model score highest, and initializing costaug_best
+        cur_best = MakeHypothesisInfo(feature_vals, mt_metric_score);
+        cur_costaug_best = cur_best;
+        costaug_best_score = costaugmented_score; 
+      }
+      if (costaugmented_score > costaug_best_score) {   // kbest_mira's cur_bad, i.e., "fear" derivation
+        cur_costaug_best = MakeHypothesisInfo(feature_vals, mt_metric_score);
+        costaug_best_score = costaugmented_score;
+      }
+      double cur_muscore = mt_metric_score;
+      if (!cur_ref)   // kbest_mira's cur_good, i.e., "hope" derivation
+        cur_ref =  MakeHypothesisInfo(feature_vals, cur_muscore);
+      else {
+          double cur_ref_muscore = cur_ref->mt_metric_score;
+          if(mu > 0) { //select oracle with mixture of model score and BLEU
+              cur_ref_muscore =  muscore(feature_weights, cur_ref->features, cur_ref->mt_metric_score, mu, log_bleu);
+              cur_muscore = muscore(feature_weights, d->feature_values, mt_metric_score, mu, log_bleu);
+          }
+          if (cur_muscore > cur_ref_muscore) //replace oracle
+            cur_ref = MakeHypothesisInfo(feature_vals, mt_metric_score);
+      }
+    }
+  }
+};
+
+void ReadTrainingCorpus(const string& fname, vector<string>* c) {
+  ReadFile rf(fname);
+  istream& in = *rf.stream();
+  string line;
+  while(in) {
+    getline(in, line);
+    if (!in) break;
+    c->push_back(line);
+  }
+}
+
+bool ApproxEqual(double a, double b) {
+  if (a == b) return true;
+  return (fabs(a-b)/fabs(b)) < 0.000001;
+}
+
+int main(int argc, char** argv) {
+  register_feature_functions();
+  SetSilent(true);  // turn off verbose decoder output
+
+  po::variables_map conf;
+  if (!InitCommandLine(argc, argv, &conf)) return 1;
+
+  if (conf.count("random_seed"))
+    rng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
+  else
+    rng.reset(new MT19937);
+
+  const bool best_ever = conf.count("best_ever") > 0;
+  vector<string> corpus;
+  ReadTrainingCorpus(conf["input"].as<string>(), &corpus);
+
+  const string metric_name = conf["mt_metric"].as<string>(); //set up scoring; this may need to be changed!!
+  
+  ScoreType type = ScoreTypeFromString(metric_name);
+  if (type == TER) {
+    invert_score = true;
+  } else {
+    invert_score = false;
+  } 
+  DocScorer ds(type, conf["reference"].as<vector<string> >(), "");
+  cerr << "Loaded " << ds.size() << " references for scoring with " << metric_name << endl;
+  if (ds.size() != corpus.size()) {
+    cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n";
+    return 1;
+  }
+
+  ReadFile ini_rf(conf["decoder_config"].as<string>());
+  Decoder decoder(ini_rf.stream());
+
+  // load initial weights
+  vector<weight_t>& decoder_weights = decoder.CurrentWeightVector(); //equivalent to "dense_weights" vector in kbest_mira.cc
+  SparseVector<weight_t> sparse_weights; //equivaelnt to  kbest_mira.cc "lambdas"
+  Weights::InitFromFile(conf["weights"].as<string>(), &decoder_weights);
+  Weights::InitSparseVector(decoder_weights, &sparse_weights);
+
+  //initializing other algorithm and output parameters
+  const double c = conf["regularizer_strength"].as<double>();
+  const int weights_write_interval = conf["weights_write_interval"].as<int>();
+  const double mt_metric_scale = conf["mt_metric_scale"].as<double>();
+  const double mu = conf["mu"].as<double>();
+  const double metric_threshold = conf["metric_threshold"].as<double>();
+  const double stepsize_param = conf["stepsize_param"].as<double>(); //step size in structured SGD optimization step
+  const bool stepsize_reduce = conf.count("stepsize_reduce") > 0; 
+  const bool costaug_log_bleu = conf.count("costaug_log_bleu") > 0;
+  const bool average = conf.count("average") > 0;
+  const bool checkpositive = conf.count("check_positive") > 0;
+
+  assert(corpus.size() > 0);
+  vector<GoodOracle> oracles(corpus.size());
+  TrainingObserver observer(conf["k_best_size"].as<int>(),  // kbest size
+                            ds,                             // doc scorer
+                            &oracles,
+                            decoder_weights,
+                            mt_metric_scale,
+                            mu,
+                            best_ever,
+                            costaug_log_bleu);
+  int cur_sent = 0;
+  int line_count = 0;
+  int normalizer = 0; 
+  double total_loss = 0;
+  double prev_loss = 0;
+  int dots = 0;             // progess bar
+  int cur_pass = 0;
+  SparseVector<double> tot;
+  tot += sparse_weights; //add initial weights to total
+  normalizer++; //add 1 to normalizer
+  int max_iteration = conf["passes"].as<int>();
+  string msg = "# LatentSVM tuned weights";
+  vector<int> order;
+  int interval_counter = 0;
+  RandomPermutation(corpus.size(), &order); //shuffle corpus
+  while (line_count <= max_iteration * corpus.size()) { //loop over all (passes * num sentences) examples
+    //if ((interval_counter * 40 / weights_write_interval) > dots) { ++dots; cerr << '.'; } //check this
+    if ((cur_sent * 40 / corpus.size()) > dots) { ++dots; cerr << '.';}
+    if (interval_counter == weights_write_interval) { //i.e., we need to write out weights
+      sparse_weights *= scaling_trick;
+      tot *= scaling_trick;
+      scaling_trick = 1;
+      cerr << " [SENTENCE NUMBER= " << cur_sent << "\n";
+      cerr << " [AVG METRIC LAST INTERVAL =" << ((total_loss - prev_loss) / weights_write_interval) << "]\n";
+      cerr << " [AVG METRIC THIS PASS THUS FAR =" << (total_loss / cur_sent) << "]\n";
+      cerr << " [TOTAL LOSS: =" << total_loss << "\n";
+      Weights::ShowLargestFeatures(decoder_weights);
+      //dots = 0;
+      interval_counter = 0;
+      prev_loss = total_loss;
+      if (average){
+	SparseVector<double> x = tot;
+	x /= normalizer;
+	ostringstream sa;
+	sa << "weights.latentsvm-" << line_count/weights_write_interval << "-avg.gz";
+	x.init_vector(&decoder_weights);
+	Weights::WriteToFile(sa.str(), decoder_weights, true, &msg); 
+      }
+      else {
+	ostringstream os;
+	os << "weights.latentsvm-" << line_count/weights_write_interval << ".gz";
+	sparse_weights.init_vector(&decoder_weights);
+	Weights::WriteToFile(os.str(), decoder_weights, true, &msg);
+      }
+    }
+    if (corpus.size() == cur_sent) { //i.e., finished a pass
+      //cerr << " [AVG METRIC LAST PASS=" << (document_metric_score / corpus.size()) << "]\n";
+      cerr << " [AVG METRIC LAST PASS=" << (total_loss / corpus.size()) << "]\n";
+      cerr << " TOTAL LOSS: " << total_loss << "\n";
+      Weights::ShowLargestFeatures(decoder_weights);
+      cur_sent = 0;
+      total_loss = 0;
+      dots = 0;
+      if(average) {
+        SparseVector<double> x = tot; 
+        x /= normalizer;
+        ostringstream sa;
+        sa << "weights.latentsvm-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "-avg.gz";
+        x.init_vector(&decoder_weights);
+        Weights::WriteToFile(sa.str(), decoder_weights, true, &msg);
+      }
+      else {
+	ostringstream os;
+	os << "weights.latentsvm-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << ".gz";
+	Weights::WriteToFile(os.str(), decoder_weights, true, &msg);	
+      }
+      cur_pass++;
+      RandomPermutation(corpus.size(), &order);
+    }
+    if (cur_sent == 0) { //i.e., starting a new pass
+      cerr << "PASS " << (line_count / corpus.size() + 1) << endl;
+    }
+    sparse_weights.init_vector(&decoder_weights);   // copy sparse_weights to the decoder weights
+    decoder.SetId(order[cur_sent]); //assign current sentence
+    decoder.Decode(corpus[order[cur_sent]], &observer);  // decode/update oracles
+
+    const HypothesisInfo& cur_best = observer.GetCurrentBestHypothesis(); //model score best
+    const HypothesisInfo& cur_costaug = observer.GetCurrentCostAugmentedHypothesis(); //(model + cost) best; cost = -metric_scale*log(BLEU) or -metric_scale*BLEU
+    //const HypothesisInfo& cur_ref = *oracles[order[cur_sent]].good; //this oracle-best line only picks based on BLEU
+    const HypothesisInfo& cur_ref = observer.GetCurrentReference();  //if mu > 0, this mu-mixed oracle will be picked; otherwise, only on BLEU
+    total_loss += cur_best.mt_metric_score; 
+
+    double step_size = stepsize_param;
+    if (stepsize_reduce){       // w_{t+1} = w_t - stepsize_t * grad(Loss) 
+        step_size  /= (sqrt(cur_sent+1.0)); 
+    }
+    //actual update step - compute gradient, and modify sparse_weights
+    if(cur_ref.mt_metric_score - cur_costaug.mt_metric_score > metric_threshold) {
+      const double loss = (cur_costaug.features.dot(decoder_weights) - cur_ref.features.dot(decoder_weights)) * scaling_trick + mt_metric_scale * (cur_ref.mt_metric_score - cur_costaug.mt_metric_score);
+      if (!checkpositive || loss > 0.0) { //can update either all the time if check positive is off, or only when loss > 0 if it's on
+	sparse_weights -= cur_costaug.features * step_size / ((1.0-2.0*step_size*c)*scaling_trick);    // cost augmented hyp orig -
+	sparse_weights += cur_ref.features * step_size / ((1.0-2.0*step_size*c)*scaling_trick);        // ref orig +
+      }
+    }
+    scaling_trick *= (1.0 - 2.0 * step_size * c);
+
+    tot += sparse_weights; //for averaging purposes
+    normalizer++; //for averaging purposes
+    line_count++;
+    interval_counter++;
+    cur_sent++;
+  }
+  cerr << endl;
+  if(average) {
+    tot /= normalizer;
+    tot.init_vector(decoder_weights);
+    msg = "# Latent SSVM tuned weights (averaged vector)";
+    Weights::WriteToFile("weights.latentsvm-final-avg.gz", decoder_weights, true, &msg); 
+    cerr << "Optimization complete.\n" << "AVERAGED WEIGHTS: weights.latentsvm-final-avg.gz\n";
+  } else {
+    Weights::WriteToFile("weights.latentsvm-final.gz", decoder_weights, true, &msg);    
+    cerr << "Optimization complete.\n";
+  }
+  return 0;
+}
+
-- 
cgit v1.2.3


From 4c805062bf38fc1c97bf1f28adaa2dc8f160caad Mon Sep 17 00:00:00 2001
From: Avneesh Saluja <asaluja@gmail.com>
Date: Thu, 28 Mar 2013 19:01:24 -0700
Subject: updated Makefiles

---
 training/Makefile.am            | 1 +
 training/latent_svm/Makefile.am | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/training/Makefile.am b/training/Makefile.am
index e95e045f..8ef3c939 100644
--- a/training/Makefile.am
+++ b/training/Makefile.am
@@ -6,6 +6,7 @@ SUBDIRS = \
   dpmert \
   pro \
   dtrain \
+  latent_svm \
   mira \
   rampion
 
diff --git a/training/latent_svm/Makefile.am b/training/latent_svm/Makefile.am
index 673b9159..65c5e038 100644
--- a/training/latent_svm/Makefile.am
+++ b/training/latent_svm/Makefile.am
@@ -1,6 +1,6 @@
 bin_PROGRAMS = latent_svm
 
 latent_svm_SOURCES = latent_svm.cc
-latent_svm_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
+latent_svm_LDADD = ../..//decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a
 
 AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
-- 
cgit v1.2.3


From f6ec15dd08b74130a2efd93fb4b0dcb4a2d9b7ae Mon Sep 17 00:00:00 2001
From: Avneesh Saluja <asaluja@gmail.com>
Date: Thu, 28 Mar 2013 22:42:04 -0700
Subject: updated configure.ac

---
 configure.ac | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure.ac b/configure.ac
index 98deac86..8632fb51 100644
--- a/configure.ac
+++ b/configure.ac
@@ -128,6 +128,7 @@ AC_CONFIG_FILES([training/pro/Makefile])
 AC_CONFIG_FILES([training/rampion/Makefile])
 AC_CONFIG_FILES([training/minrisk/Makefile])
 AC_CONFIG_FILES([training/mira/Makefile])
+AC_CONFIG_FILES([training/latent_svm/Makefile])
 AC_CONFIG_FILES([training/dtrain/Makefile])
 
 # external feature function example code
-- 
cgit v1.2.3


From 50bbf29fa49e695e721724a137ff4695eea87906 Mon Sep 17 00:00:00 2001
From: Michael Denkowski <michael.j.denkowski@gmail.com>
Date: Mon, 8 Apr 2013 13:08:04 -0400
Subject: online wasn't getting set

---
 python/pkg/cdec/sa/extract.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/pkg/cdec/sa/extract.py b/python/pkg/cdec/sa/extract.py
index 782bed8b..bf39d080 100644
--- a/python/pkg/cdec/sa/extract.py
+++ b/python/pkg/cdec/sa/extract.py
@@ -62,6 +62,7 @@ def extract(inp):
     return '<seg grammar="{}" id="{}">{}</seg>{}'.format(grammar_file, i, sentence, suffix)
 
 def main():
+    global online
     logging.basicConfig(level=logging.INFO)
     parser = argparse.ArgumentParser(description='Extract grammars from a compiled corpus.')
     parser.add_argument('-c', '--config', required=True,
@@ -88,6 +89,8 @@ def main():
                     ' should be a python module\n'.format(featdef))
             sys.exit(1)
     
+    online = args.online
+
     if args.jobs > 1:
         logging.info('Starting %d workers; chunk size: %d', args.jobs, args.chunksize)
         pool = mp.Pool(args.jobs, make_extractor, (args,))
-- 
cgit v1.2.3


From 9a4f693870214e56d51aa22ceb97a67b34b7a0d0 Mon Sep 17 00:00:00 2001
From: vlade <vlad@nod.umiacs.umd.edu>
Date: Sat, 13 Apr 2013 00:48:10 -0400
Subject: inital commit of mira code

---
 training/mira/kbest_mirav5.cc | 1148 +++++++++++++++++++++++++++++++++++++++++
 training/mira/run_mira.pl     |  548 ++++++++++++++++++++
 2 files changed, 1696 insertions(+)
 create mode 100644 training/mira/kbest_mirav5.cc
 create mode 100755 training/mira/run_mira.pl

diff --git a/training/mira/kbest_mirav5.cc b/training/mira/kbest_mirav5.cc
new file mode 100644
index 00000000..cea5cf67
--- /dev/null
+++ b/training/mira/kbest_mirav5.cc
@@ -0,0 +1,1148 @@
+#include <sstream>
+#include <iostream>
+#include <vector>
+#include <cassert>
+#include <cmath>
+#include <algorithm>
+
+#include "config.h"
+
+
+#include <boost/shared_ptr.hpp>
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "sentence_metadata.h"
+#include "scorer.h"
+#include "verbose.h"
+#include "viterbi.h"
+#include "hg.h"
+#include "prob.h"
+#include "kbest.h"
+#include "ff_register.h"
+#include "decoder.h"
+#include "filelib.h"
+#include "fdict.h"
+#include "time.h"
+#include "sampler.h"
+
+#include "weights.h"
+#include "sparse_vector.h"
+
+using namespace std;
+using boost::shared_ptr;
+namespace po = boost::program_options;
+
+bool invert_score;
+boost::shared_ptr<MT19937> rng;
+bool approx_score;
+bool no_reweight;
+bool no_select;
+bool unique_kbest;
+int update_list_size;
+vector<weight_t> dense_weights_g;
+double mt_metric_scale;
+int optimizer;
+int fear_select;
+int hope_select;
+
+bool pseudo_doc;
+
+void SanityCheck(const vector<double>& w) {
+  for (int i = 0; i < w.size(); ++i) {
+    assert(!isnan(w[i]));
+    assert(!isinf(w[i]));
+  }
+}
+
+struct FComp {
+  const vector<double>& w_;
+  FComp(const vector<double>& w) : w_(w) {}
+  bool operator()(int a, int b) const {
+    return fabs(w_[a]) > fabs(w_[b]);
+  }
+};
+
+void ShowLargestFeatures(const vector<double>& w) {
+  vector<int> fnums(w.size());
+  for (int i = 0; i < w.size(); ++i)
+    fnums[i] = i;
+  vector<int>::iterator mid = fnums.begin();
+  mid += (w.size() > 10 ? 10 : w.size());
+  partial_sort(fnums.begin(), mid, fnums.end(), FComp(w));
+  cerr << "TOP FEATURES:";
+  for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) {
+    cerr << ' ' << FD::Convert(*i) << '=' << w[*i];
+  }
+  cerr << endl;
+}
+
+bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+    ("input_weights,w",po::value<string>(),"Input feature weights file")
+    ("source,i",po::value<string>(),"Source file for development set")
+    ("passes,p", po::value<int>()->default_value(15), "Number of passes through the training data")
+    ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation(s) (tokenized text file)")
+    ("mt_metric,m",po::value<string>()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)")
+    ("optimizer,o",po::value<int>()->default_value(1), "Optimizer (sgd=1, mira 1-fear=2, full mira w/ cutting plane=3, full mira w/ nbest list=5, local update=4)")
+    ("fear,f",po::value<int>()->default_value(1), "Fear selection (model-cost=1, max-cost=2, pred-base=3)")
+    ("hope,h",po::value<int>()->default_value(1), "Hope selection (model+cost=1, max-cost=2, local-cost=3)")
+    ("max_step_size,C", po::value<double>()->default_value(0.01), "regularization strength (C)")
+    ("random_seed,S", po::value<uint32_t>(), "Random seed (if not specified, /dev/random will be used)")
+    ("mt_metric_scale,s", po::value<double>()->default_value(1.0), "Amount to scale MT loss function by")
+    ("approx_score,a", "Use smoothed sentence-level BLEU score for approximate scoring")
+    ("no_reweight,d","Do not reweight forest for cutting plane")
+    ("no_select,n", "Do not use selection heuristic")
+    ("k_best_size,k", po::value<int>()->default_value(250), "Size of hypothesis list to search for oracles")
+    ("update_k_best,b", po::value<int>()->default_value(1), "Size of good, bad lists to perform update with")
+    ("unique_k_best,u", "Unique k-best translation list")
+    ("weights_output,O",po::value<string>(),"Directory to write weights to")
+    ("output_dir,D",po::value<string>(),"Directory to place output in")
+    ("decoder_config,c",po::value<string>(),"Decoder configuration file");
+  po::options_description clo("Command line options");
+  clo.add_options()
+    ("config", po::value<string>(), "Configuration file")
+    ("help,H", "Print this help message and exit");
+  po::options_description dconfig_options, dcmdline_options;
+  dconfig_options.add(opts);
+  dcmdline_options.add(opts).add(clo);
+  
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  if (conf->count("config")) {
+    ifstream config((*conf)["config"].as<string>().c_str());
+    po::store(po::parse_config_file(config, dconfig_options), *conf);
+  }
+  po::notify(*conf);
+
+  if (conf->count("help") || !conf->count("input_weights") || !conf->count("decoder_config") || !conf->count("reference")) {
+    cerr << dcmdline_options << endl;
+    return false;
+  }
+  return true;
+}
+
+//load previous translation, store array of each sentences score, subtract it from current sentence and replace with new translation score
+
+
+static const double kMINUS_EPSILON = -1e-6;
+static const double EPSILON = 0.000001;
+static const double SMO_EPSILON = 0.0001;
+static const double PSEUDO_SCALE = 0.95;
+static const int MAX_SMO = 10;
+int cur_pass;
+
+struct HypothesisInfo {
+  SparseVector<double> features;
+  vector<WordID> hyp;
+  double mt_metric;
+  double hope;
+  double fear;
+  double alpha;
+  double oracle_loss;
+  SparseVector<double> oracle_feat_diff;
+  shared_ptr<HypothesisInfo> oracleN;
+};
+
+bool ApproxEqual(double a, double b) {
+  if (a == b) return true;
+  return (fabs(a-b)/fabs(b)) < EPSILON;
+}
+
+typedef shared_ptr<HypothesisInfo> HI;
+bool HypothesisCompareB(const HI& h1, const HI& h2 ) 
+{
+  return h1->mt_metric > h2->mt_metric;
+};
+
+
+bool HopeCompareB(const HI& h1, const HI& h2 ) 
+{
+  return h1->hope > h2->hope;
+};
+
+bool FearCompareB(const HI& h1, const HI& h2 ) 
+{
+  return h1->fear > h2->fear;
+};
+
+bool FearComparePred(const HI& h1, const HI& h2 ) 
+{
+  return h1->features.dot(dense_weights_g) > h2->features.dot(dense_weights_g);
+};
+
+bool HypothesisCompareG(const HI& h1, const HI& h2 ) 
+{
+  return h1->mt_metric < h2->mt_metric;
+};
+
+
+void CuttingPlane(vector<shared_ptr<HypothesisInfo> >* cur_c, bool* again, vector<shared_ptr<HypothesisInfo> >& all_hyp, vector<weight_t> dense_weights)
+{
+  bool DEBUG_CUT = false;
+  shared_ptr<HypothesisInfo> max_fear, max_fear_in_set;
+  vector<shared_ptr<HypothesisInfo> >& cur_constraint = *cur_c;
+
+  if(no_reweight)
+    {
+      //find new hope hypothesis
+      for(int u=0;u!=all_hyp.size();u++)	
+	{ 
+	  double t_score = all_hyp[u]->features.dot(dense_weights);
+	  all_hyp[u]->hope = 1 * all_hyp[u]->mt_metric + t_score;
+	  //if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " S:" << t_score << endl; 
+	  
+	}
+      
+      //sort hyps by hope score
+      sort(all_hyp.begin(),all_hyp.end(),HopeCompareB);    
+      
+      double hope_score = all_hyp[0]->features.dot(dense_weights);
+      if(DEBUG_CUT) cerr << "New hope derivation score " << hope_score << endl;
+     
+      for(int u=0;u!=all_hyp.size();u++)	
+	{ 
+	  double t_score = all_hyp[u]->features.dot(dense_weights);
+	  //all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - hope_score + t_score;
+	  
+	  all_hyp[u]->fear = -1*all_hyp[u]->mt_metric + 1*all_hyp[0]->mt_metric - hope_score + t_score; //relative loss
+	  //      all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric - -1*all_hyp[0]->mt_metric;
+	  //all_hyp[u]->oracle_feat_diff = all_hyp[0]->features - all_hyp[u]->features;
+	  //	all_hyp[u]->fear = -1 * all_hyp[u]->mt_metric + t_score;
+	  //if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " F:" << all_hyp[u]->fear << endl; 
+	  
+	}
+    
+      sort(all_hyp.begin(),all_hyp.end(),FearCompareB);
+      
+    }
+  //assign maximum fear derivation from all derivations
+  max_fear = all_hyp[0];
+  
+  if(DEBUG_CUT) cerr <<"Cutting Plane Max Fear "<<max_fear->fear ;
+  for(int i=0; i < cur_constraint.size();i++) //select maximal violator already in constraint set
+    {
+      if (!max_fear_in_set || cur_constraint[i]->fear > max_fear_in_set->fear)
+	max_fear_in_set = cur_constraint[i];
+    }
+  if(DEBUG_CUT) cerr << "Max Fear in constraint set " << max_fear_in_set->fear << endl;
+  
+  if(max_fear->fear > max_fear_in_set->fear + SMO_EPSILON)
+    {
+      cur_constraint.push_back(max_fear);
+      *again = true;
+      if(DEBUG_CUT) cerr << "Optimize Again " << *again << endl;
+    }
+}
+
+
+double ComputeDelta(vector<shared_ptr<HypothesisInfo> >* cur_p, double max_step_size,vector<weight_t> dense_weights )
+{
+  vector<shared_ptr<HypothesisInfo> >& cur_pair = *cur_p;
+   double loss = cur_pair[0]->oracle_loss - cur_pair[1]->oracle_loss;
+   //double margin = -cur_pair[0]->oracle_feat_diff.dot(dense_weights) + cur_pair[1]->oracle_feat_diff.dot(dense_weights); //TODO: is it a problem that new oracle is used in diff?
+   //double num = loss - margin;
+  
+
+   double margin = -(cur_pair[0]->oracleN->features.dot(dense_weights)- cur_pair[0]->features.dot(dense_weights)) + (cur_pair[1]->oracleN->features.dot(dense_weights) - cur_pair[1]->features.dot(dense_weights));
+   const double num = margin +  loss;
+   cerr << "LOSS: " << num << " Margin:" << margin << " BLEUL:" << loss << " " << cur_pair[1]->features.dot(dense_weights) << " " << cur_pair[0]->features.dot(dense_weights) <<endl;
+   
+   // double margin = cur_pair[1]->features.dot(dense_weights) - cur_pair[0]->features.dot(dense_weights);
+   // double loss =  cur_pair[1]->oracle_loss; //good.mt_metric - cur_bad.mt_metric);
+   //const double num = margin +  loss;
+  
+   //cerr << "Compute Delta " << loss << " " << margin << " ";
+
+  //  double margin = cur_pair[0]->features.dot(dense_weights) - cur_pair[1]->features.dot(dense_weights); //TODO: is it a problem that new oracle is used in diff?
+/*  double num = 
+    (cur_pair[0]->oracle_loss - cur_pair[0]->oracle_feat_diff.dot(dense_weights))
+    - (cur_pair[1]->oracle_loss - cur_pair[1]->oracle_feat_diff.dot(dense_weights));
+  */
+
+  SparseVector<double> diff = cur_pair[0]->features;
+  diff -= cur_pair[1]->features;
+  /*  SparseVector<double> diff = cur_pair[0]->oracle_feat_diff;
+  diff -= cur_pair[1]->oracle_feat_diff;*/
+  double diffsqnorm = diff.l2norm_sq();
+  double delta;
+  if (diffsqnorm > 0)
+    delta = num / (diffsqnorm * max_step_size);
+  else
+    delta = 0;
+  cerr << " D1:" << delta;
+  //clip delta (enforce margin constraints)
+
+  delta = max(-cur_pair[0]->alpha, min(delta, cur_pair[1]->alpha));
+  cerr << " D2:" << delta;
+  return delta;
+}
+
+
+vector<shared_ptr<HypothesisInfo> > SelectPair(vector<shared_ptr<HypothesisInfo> >* cur_c)
+{
+  bool DEBUG_SELECT= false;
+  vector<shared_ptr<HypothesisInfo> >& cur_constraint = *cur_c;
+  
+  vector<shared_ptr<HypothesisInfo> > pair;
+
+  if (no_select || optimizer == 2){ //skip heuristic search and return oracle and fear for 1-mira
+  //    if(optimizer == 2)      {
+      pair.push_back(cur_constraint[0]);
+      pair.push_back(cur_constraint[1]);
+      return pair;
+      //   }
+    }
+  
+  for(int u=0;u != cur_constraint.size();u++)	
+    {
+      shared_ptr<HypothesisInfo> max_fear;
+      
+      if(DEBUG_SELECT) cerr<< "cur alpha " << u  << " " << cur_constraint[u]->alpha;
+      for(int i=0; i < cur_constraint.size();i++) //select maximal violator
+	{
+	  if(i != u)
+	    if (!max_fear || cur_constraint[i]->fear > max_fear->fear)
+	      max_fear = cur_constraint[i];
+	}
+      if(!max_fear) return pair; //
+      
+      if(DEBUG_SELECT) cerr << " F" << max_fear->fear << endl;
+
+      
+      if ((cur_constraint[u]->alpha == 0) && (cur_constraint[u]->fear > max_fear->fear + SMO_EPSILON))
+	{
+	  for(int i=0; i < cur_constraint.size();i++) //select maximal violator
+	    {
+	      if(i != u)
+		if (cur_constraint[i]->alpha > 0)
+		  {
+		    pair.push_back(cur_constraint[u]);
+		    pair.push_back(cur_constraint[i]);
+		    cerr << "RETJURN from 1" << endl;
+		    return pair;
+		  }
+	    }
+	}	       
+      if ((cur_constraint[u]->alpha > 0) && (cur_constraint[u]->fear < max_fear->fear - SMO_EPSILON))
+	{
+	  for(int i=0; i < cur_constraint.size();i++) //select maximal violator
+	    {
+	      if(i != u)	
+		if (cur_constraint[i]->fear > cur_constraint[u]->fear)
+		  {
+		    pair.push_back(cur_constraint[u]);
+		    pair.push_back(cur_constraint[i]);
+		    return pair;
+		  }
+	    }  
+	}
+    
+    } 
+  return pair; //no more constraints to optimize, we're done here
+
+}
+
+struct GoodBadOracle {
+  vector<shared_ptr<HypothesisInfo> > good;
+  vector<shared_ptr<HypothesisInfo> > bad;
+};
+
+struct TrainingObserver : public DecoderObserver {
+  TrainingObserver(const int k, const DocScorer& d, vector<GoodBadOracle>* o, vector<ScoreP>* cbs) : ds(d), oracles(*o), corpus_bleu_sent_stats(*cbs), kbest_size(k) {
+  // TrainingObserver(const int k, const DocScorer& d, vector<GoodBadOracle>* o) : ds(d), oracles(*o), kbest_size(k) {
+    
+    //calculate corpus bleu score from previous iterations 1-best for BLEU gain
+    if(!pseudo_doc)
+    if(cur_pass > 0)
+      {
+	ScoreP acc;
+	for (int ii = 0; ii < corpus_bleu_sent_stats.size(); ii++) {
+	  if (!acc) { acc = corpus_bleu_sent_stats[ii]->GetZero(); }
+	  acc->PlusEquals(*corpus_bleu_sent_stats[ii]);
+	  
+	}
+	corpus_bleu_stats = acc;
+	corpus_bleu_score = acc->ComputeScore();
+      }
+    //corpus_src_length = 0;
+}
+  const DocScorer& ds;
+  vector<ScoreP>& corpus_bleu_sent_stats;
+  vector<GoodBadOracle>& oracles;
+  vector<shared_ptr<HypothesisInfo> > cur_best;
+  shared_ptr<HypothesisInfo> cur_oracle;
+  const int kbest_size;
+  Hypergraph forest;
+  int cur_sent;
+  ScoreP corpus_bleu_stats;
+  float corpus_bleu_score;
+
+  float corpus_src_length;
+  float curr_src_length;
+
+  const int GetCurrentSent() const {
+    return cur_sent;
+  }
+
+  const HypothesisInfo& GetCurrentBestHypothesis() const {
+    return *cur_best[0];
+  }
+
+  const vector<shared_ptr<HypothesisInfo> > GetCurrentBest() const {
+    return cur_best;
+  }
+  
+ const HypothesisInfo& GetCurrentOracle() const {
+    return *cur_oracle;
+  }
+  
+  const Hypergraph& GetCurrentForest() const {
+    return forest;
+  }
+  
+
+  virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) {
+    cur_sent = smeta.GetSentenceID();
+    //cerr << "SOURCE " << smeta.GetSourceLength() << endl;
+    curr_src_length = (float) smeta.GetSourceLength();
+    //UpdateOracles(smeta.GetSentenceID(), *hg);
+    if(unique_kbest)
+      UpdateOracles<KBest::FilterUnique>(smeta.GetSentenceID(), *hg);
+    else
+      UpdateOracles<KBest::NoFilter<std::vector<WordID> > >(smeta.GetSentenceID(), *hg);
+    forest = *hg;
+    
+  }
+
+  shared_ptr<HypothesisInfo> MakeHypothesisInfo(const SparseVector<double>& feats, const double score, const vector<WordID>& hyp) {
+    shared_ptr<HypothesisInfo> h(new HypothesisInfo);
+    h->features = feats;
+    h->mt_metric = score;
+    h->hyp = hyp;
+    return h;
+  }
+
+  template <class Filter>  
+  void UpdateOracles(int sent_id, const Hypergraph& forest) {
+
+    bool PRINT_LIST= false;    
+    vector<shared_ptr<HypothesisInfo> >& cur_good = oracles[sent_id].good;
+    vector<shared_ptr<HypothesisInfo> >& cur_bad = oracles[sent_id].bad;
+    //TODO: look at keeping previous iterations hypothesis lists around
+    cur_best.clear();
+    cur_good.clear();
+    cur_bad.clear();
+
+    vector<shared_ptr<HypothesisInfo> > all_hyp;
+
+    typedef KBest::KBestDerivations<vector<WordID>, ESentenceTraversal,Filter> K;
+    K kbest(forest,kbest_size);
+    
+    //KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(forest, kbest_size);
+    for (int i = 0; i < kbest_size; ++i) {
+      //const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
+      typename K::Derivation *d =
+        kbest.LazyKthBest(forest.nodes_.size() - 1, i);
+      if (!d) break;
+
+      float sentscore;
+      if(approx_score)
+	{
+
+	  if(cur_pass > 0 && !pseudo_doc)
+	    {
+	      ScoreP sent_stats = ds[sent_id]->ScoreCandidate(d->yield);
+	      ScoreP corpus_no_best = corpus_bleu_stats->GetZero();
+
+	      corpus_bleu_stats->Subtract(*corpus_bleu_sent_stats[sent_id], &*corpus_no_best);
+	      sent_stats->PlusEquals(*corpus_no_best, 0.5);
+	      
+	      //compute gain from new sentence in 1-best corpus
+	      sentscore = mt_metric_scale * (sent_stats->ComputeScore() - corpus_no_best->ComputeScore());// - corpus_bleu_score);
+	    }
+	  else if(pseudo_doc)
+	    {
+	      //cerr << "CORP:" << corpus_bleu_score << " NEW:" << sent_stats->ComputeScore() << " sentscore:" << sentscore << endl;
+
+	  //-----pseudo-corpus approach
+	      float src_scale = corpus_src_length + curr_src_length;
+	      ScoreP sent_stats = ds[sent_id]->ScoreCandidate(d->yield);
+	      if(!corpus_bleu_stats){ corpus_bleu_stats = sent_stats->GetZero();}
+	      
+	      sent_stats->PlusEquals(*corpus_bleu_stats);
+	      sentscore =  mt_metric_scale  * src_scale * sent_stats->ComputeScore();
+
+	    }
+	  else
+	    {
+	      //cerr << "Using sentence-level approximation - PASS - " << boost::lexical_cast<std::string>(cur_pass) << endl;
+	      //approx style of computation, used for 0th iteration
+	      sentscore = mt_metric_scale * (ds[sent_id]->ScoreCandidate(d->yield)->ComputeSentScore());
+
+	      //use pseudo-doc
+	    }
+	  
+	 
+	}
+      else
+	{
+	  sentscore = mt_metric_scale * (ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore());
+	}
+     
+      if (invert_score) sentscore *= -1.0;
+      //cerr << TD::GetString(d->yield) << " ||| " << d->score << " ||| " << sentscore << " " << approx_sentscore << endl;
+
+      if (i < update_list_size){ 
+	if (i == 0) //take cur best and add its bleu statistics counts to the pseudo-doc
+	  {  }
+	if(PRINT_LIST)cerr << TD::GetString(d->yield) << " ||| " << d->score << " ||| " << sentscore << endl; 
+	cur_best.push_back( MakeHypothesisInfo(d->feature_values, sentscore, d->yield));
+      }
+      
+      all_hyp.push_back(MakeHypothesisInfo(d->feature_values, sentscore,d->yield));   //store all hyp to extract oracle best and worst
+         
+    }
+    
+    if(pseudo_doc){
+    //update psuedo-doc stats
+      string details, details2;     
+      corpus_bleu_stats->ScoreDetails(&details2);   
+      ScoreP sent_stats = ds[sent_id]->ScoreCandidate(cur_best[0]->hyp);
+      corpus_bleu_stats->PlusEquals(*sent_stats);
+      
+     
+      sent_stats->ScoreDetails(&details);
+      
+      
+      sent_stats = corpus_bleu_stats;
+      corpus_bleu_stats = sent_stats->GetZero();
+      corpus_bleu_stats->PlusEquals(*sent_stats, PSEUDO_SCALE);
+      
+      
+      corpus_src_length = PSEUDO_SCALE * (corpus_src_length + curr_src_length);
+      cerr << "CORP S " << corpus_src_length << " " << curr_src_length << "\n" << details << "\n " << details2 << endl;
+      
+
+    }
+
+
+    //figure out how many hyps we can keep maximum
+    int temp_update_size = update_list_size;
+    if (all_hyp.size() < update_list_size){ temp_update_size = all_hyp.size();}
+
+    //sort all hyps by sentscore (bleu)
+    sort(all_hyp.begin(),all_hyp.end(),HypothesisCompareB);
+    
+    if(PRINT_LIST){  cerr << "Sorting " << endl; for(int u=0;u!=all_hyp.size();u++)	cerr << all_hyp[u]->mt_metric << " " << all_hyp[u]->features.dot(dense_weights_g) << endl; }
+    
+    //if(optimizer != 4 )
+    if(hope_select == 1)
+      {
+	//find hope hypothesis using model + bleu
+	if (PRINT_LIST) cerr << "HOPE " << endl;
+	for(int u=0;u!=all_hyp.size();u++)	
+	  { 
+	    double t_score = all_hyp[u]->features.dot(dense_weights_g);
+	    all_hyp[u]->hope = all_hyp[u]->mt_metric + t_score;
+	    if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " S:" << t_score << endl; 
+	    
+	  }
+	
+	//sort hyps by hope score
+	sort(all_hyp.begin(),all_hyp.end(),HopeCompareB);
+      }
+        
+
+    //assign cur_good the sorted list
+    cur_good.insert(cur_good.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size);    
+    if(PRINT_LIST) { cerr << "GOOD" << endl;  for(int u=0;u!=cur_good.size();u++) cerr << cur_good[u]->mt_metric << " " << cur_good[u]->hope << endl;}     
+    /*    if (!cur_oracle) {      cur_oracle = cur_good[0];
+      cerr << "Set oracle " << cur_oracle->hope << " " << cur_oracle->fear << " " << cur_oracle->mt_metric << endl;      }
+    else      {
+	cerr << "Stay oracle " << cur_oracle->hope << " " << cur_oracle->fear << " " << cur_oracle->mt_metric << endl;      }    */
+
+    shared_ptr<HypothesisInfo>& oracleN = cur_good[0];
+    //if(optimizer != 4){
+    if(fear_select == 1){
+      //compute fear hyps
+      if (PRINT_LIST) cerr << "FEAR " << endl;
+      double hope_score = oracleN->features.dot(dense_weights_g);
+      //double hope_score = cur_oracle->features.dot(dense_weights);
+      if (PRINT_LIST) cerr << "hope score " << hope_score << endl;
+      for(int u=0;u!=all_hyp.size();u++)	
+	{ 
+	  double t_score = all_hyp[u]->features.dot(dense_weights_g);
+	  //all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - hope_score + t_score;
+	  
+	  /*	  all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - -1*cur_oracle->mt_metric - hope_score + t_score; //relative loss
+	  all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric - -1*cur_oracle->mt_metric;
+	  all_hyp[u]->oracle_feat_diff = cur_oracle->features - all_hyp[u]->features;*/
+
+	  all_hyp[u]->fear = -1*all_hyp[u]->mt_metric + 1*oracleN->mt_metric - hope_score + t_score; //relative loss
+	  all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric + 1*oracleN->mt_metric;
+	  all_hyp[u]->oracle_feat_diff = oracleN->features - all_hyp[u]->features;
+	  all_hyp[u]->oracleN=oracleN;
+	  //	all_hyp[u]->fear = -1 * all_hyp[u]->mt_metric + t_score;
+	  if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " F:" << all_hyp[u]->fear << endl; 
+	  
+	}
+      
+      sort(all_hyp.begin(),all_hyp.end(),FearCompareB);
+      
+      cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size);    
+    }
+    else if(fear_select == 2) //select fear based on cost
+      {
+	cur_bad.insert(cur_bad.begin(), all_hyp.end()-temp_update_size, all_hyp.end()); 
+	reverse(cur_bad.begin(),cur_bad.end());
+      }
+    else //pred-based, fear_select = 3
+      {
+	sort(all_hyp.begin(),all_hyp.end(),FearComparePred);
+	cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); 
+      }
+
+
+    if(PRINT_LIST){ cerr<< "BAD"<<endl; for(int u=0;u!=cur_bad.size();u++) cerr << cur_bad[u]->mt_metric << " H:" << cur_bad[u]->hope << " F:" << cur_bad[u]->fear << endl;}
+    
+    cerr << "GOOD (BEST): " << cur_good[0]->mt_metric << endl;
+    cerr << " CUR: " << cur_best[0]->mt_metric << endl;
+    cerr << " BAD (WORST): " << cur_bad[0]->mt_metric << endl;
+  }
+};
+
+void ReadTrainingCorpus(const string& fname, vector<string>* c) {
+
+
+  ReadFile rf(fname);
+  istream& in = *rf.stream();
+  string line;
+  while(in) {
+    getline(in, line);
+    if (!in) break;
+    c->push_back(line);
+  }
+}
+
+void ReadPastTranslationForScore(const int cur_pass, vector<ScoreP>* c, DocScorer& ds, const string& od)
+{
+  cerr << "Reading BLEU gain file ";
+  string fname;
+  if(cur_pass == 0)
+    {
+      fname = od + "/run.raw.init";
+    }
+  else
+    {
+      int last_pass = cur_pass - 1; 
+      fname = od + "/run.raw."  +  boost::lexical_cast<std::string>(last_pass) + ".B";
+    }
+  cerr << fname << "\n";
+  ReadFile rf(fname);
+  istream& in = *rf.stream();
+  ScoreP acc;
+  string line;
+  int lc = 0;
+  while(in) {
+    getline(in, line);
+    if (line.empty() && !in) break;
+    vector<WordID> sent;
+    TD::ConvertSentence(line, &sent);
+    ScoreP sentscore = ds[lc]->ScoreCandidate(sent);
+    c->push_back(sentscore);
+    if (!acc) { acc = sentscore->GetZero(); }
+    acc->PlusEquals(*sentscore);
+    ++lc;
+ 
+  }
+
+  
+  assert(lc > 0);
+  float score = acc->ComputeScore();
+  string details;
+  acc->ScoreDetails(&details);
+  cerr << "INIT RUN " << details << score << endl;
+
+}
+
+
+int main(int argc, char** argv) {
+  register_feature_functions();
+  SetSilent(true);  // turn off verbose decoder output
+
+  po::variables_map conf;
+  if (!InitCommandLine(argc, argv, &conf)) return 1;
+
+  if (conf.count("random_seed"))
+    rng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
+  else
+    rng.reset(new MT19937);
+  
+  vector<string> corpus;
+  //ReadTrainingCorpus(conf["source"].as<string>(), &corpus);
+
+  const string metric_name = conf["mt_metric"].as<string>();
+  optimizer = conf["optimizer"].as<int>();
+  fear_select = conf["fear"].as<int>();
+  hope_select = conf["hope"].as<int>();
+  mt_metric_scale = conf["mt_metric_scale"].as<double>();
+  approx_score = conf.count("approx_score");
+  no_reweight = conf.count("no_reweight");
+  no_select = conf.count("no_select");
+  update_list_size = conf["update_k_best"].as<int>();
+  unique_kbest = conf.count("unique_k_best");
+  pseudo_doc = true;
+
+  const string weights_dir = conf["weights_output"].as<string>();
+  const string output_dir = conf["output_dir"].as<string>();
+  ScoreType type = ScoreTypeFromString(metric_name);
+
+  //establish metric used for tuning
+  if (type == TER) {
+    invert_score = true;
+    // approx_score = false;
+  } else {
+    invert_score = false;
+  }
+
+  //load references
+  DocScorer ds(type, conf["reference"].as<vector<string> >(), "");
+  cerr << "Loaded " << ds.size() << " references for scoring with " << metric_name << endl;
+  vector<ScoreP> corpus_bleu_sent_stats;
+  
+  //check training pass,if >0, then use previous iterations corpus bleu stats
+  cur_pass = conf["passes"].as<int>();
+  if(cur_pass > 0)
+    {
+      ReadPastTranslationForScore(cur_pass, &corpus_bleu_sent_stats, ds, output_dir);
+    }
+  /*  if (ds.size() != corpus.size()) {
+    cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n";
+    return 1;
+    }*/
+  cerr << "Optimizing with " << optimizer << endl;
+  // load initial weights
+  /*Weights weights;
+  weights.InitFromFile(conf["input_weights"].as<string>());
+  SparseVector<double> lambdas;
+  weights.InitSparseVector(&lambdas);
+  */
+
+  
+  
+  ReadFile ini_rf(conf["decoder_config"].as<string>());
+  Decoder decoder(ini_rf.stream());
+
+  vector<weight_t>& dense_weights = decoder.CurrentWeightVector();
+  
+  SparseVector<weight_t> lambdas;
+  Weights::InitFromFile(conf["input_weights"].as<string>(), &dense_weights);
+  Weights::InitSparseVector(dense_weights, &lambdas);
+
+  const string input = decoder.GetConf()["input"].as<string>();
+  //const bool show_feature_dictionary = decoder.GetConf().count("show_feature_dictionary");
+  if (!SILENT) cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl;
+  ReadFile in_read(input);
+  istream *in = in_read.stream();
+  assert(*in);  
+  string buf;
+  
+  const double max_step_size = conf["max_step_size"].as<double>();
+
+
+  //  assert(corpus.size() > 0);
+  vector<GoodBadOracle> oracles(ds.size());
+
+  TrainingObserver observer(conf["k_best_size"].as<int>(), ds, &oracles, &corpus_bleu_sent_stats);
+
+  int cur_sent = 0;
+  int lcount = 0;
+  double objective=0;
+  double tot_loss = 0;
+  int dots = 0;
+  //  int cur_pass = 1;
+  //  vector<double> dense_weights;
+  SparseVector<double> tot;
+  SparseVector<double> final_tot;
+  //  tot += lambdas;          // initial weights
+  //  lcount++;                // count for initial weights
+
+  //string msg = "# MIRA tuned weights";
+  // while (cur_pass <= max_iteration) {
+    SparseVector<double> old_lambdas = lambdas;
+    tot.clear();
+    tot += lambdas;
+    cerr << "PASS " << cur_pass << " " << endl << lambdas << endl; 
+    ScoreP acc, acc_h, acc_f;
+    
+    while(*in) {
+      getline(*in, buf);
+      if (buf.empty()) continue;
+      //for (cur_sent = 0; cur_sent < corpus.size(); cur_sent++) {
+      
+      cerr << "SENT: " << cur_sent << endl;
+      //TODO: allow batch updating
+      //dense_weights.clear();
+      //weights.InitFromVector(lambdas);
+      //weights.InitVector(&dense_weights);
+      //decoder.SetWeights(dense_weights);  
+      lambdas.init_vector(&dense_weights);
+      dense_weights_g = dense_weights;
+      decoder.SetId(cur_sent);
+      decoder.Decode(buf, &observer);  // decode the sentence, calling Notify to get the hope,fear, and model best hyps. 
+      
+      cur_sent = observer.GetCurrentSent();
+      const HypothesisInfo& cur_hyp = observer.GetCurrentBestHypothesis();
+      const HypothesisInfo& cur_good = *oracles[cur_sent].good[0];
+      const HypothesisInfo& cur_bad = *oracles[cur_sent].bad[0];
+
+      vector<shared_ptr<HypothesisInfo> >& cur_good_v = oracles[cur_sent].good;
+      vector<shared_ptr<HypothesisInfo> >& cur_bad_v = oracles[cur_sent].bad;
+      vector<shared_ptr<HypothesisInfo> > cur_best_v = observer.GetCurrentBest();
+
+      tot_loss += cur_hyp.mt_metric;
+      
+      //score hyps to be able to compute corpus level bleu after we finish this iteration through the corpus
+      ScoreP sentscore = ds[cur_sent]->ScoreCandidate(cur_hyp.hyp);
+      if (!acc) { acc = sentscore->GetZero(); }
+      acc->PlusEquals(*sentscore);
+
+      ScoreP hope_sentscore = ds[cur_sent]->ScoreCandidate(cur_good.hyp);
+      if (!acc_h) { acc_h = hope_sentscore->GetZero(); }
+      acc_h->PlusEquals(*hope_sentscore);
+
+      ScoreP fear_sentscore = ds[cur_sent]->ScoreCandidate(cur_bad.hyp);
+      if (!acc_f) { acc_f = fear_sentscore->GetZero(); }
+      acc_f->PlusEquals(*fear_sentscore);
+      
+      if(optimizer == 4) { //single dual coordinate update, cur_good selected on BLEU score only (not model+BLEU)
+	//	if (!ApproxEqual(cur_hyp.mt_metric, cur_good.mt_metric)) {
+      
+	  double margin = cur_bad.features.dot(dense_weights) - cur_good.features.dot(dense_weights);
+	  double mt_loss = (cur_good.mt_metric - cur_bad.mt_metric);
+	  const double loss = margin +  mt_loss;
+	  cerr << "LOSS: " << loss << " Margin:" << margin << " BLEUL:" << mt_loss << " " << cur_bad.features.dot(dense_weights) << " " << cur_good.features.dot(dense_weights) <<endl;
+	  //	  if (loss > 0.0) {
+	    SparseVector<double> diff = cur_good.features;
+	    diff -= cur_bad.features;	    
+
+	    double diffsqnorm = diff.l2norm_sq();
+	    double delta;
+	    if (diffsqnorm > 0)
+	      delta = loss / (diffsqnorm);
+	    else
+	      delta = 0;
+	    
+	    //double step_size = loss / diff.l2norm_sq();
+	    cerr << loss << " " << delta << " " << diff << endl;
+	    if (delta > max_step_size) delta = max_step_size;
+	    lambdas += (cur_good.features * delta);
+	    lambdas -= (cur_bad.features * delta);
+	    //cerr << "L: " << lambdas << endl;
+	    //	  }
+	    //	  }
+      }
+      else if(optimizer == 1) //sgd - nonadapted step size
+	{
+	   
+	  lambdas += (cur_good.features) * max_step_size;
+	  lambdas -= (cur_bad.features) * max_step_size;
+	}
+      //cerr << "L: " << lambdas << endl;
+      else if(optimizer == 5) //full mira with n-best list of constraints from oracle, fear, best
+	{
+	  vector<shared_ptr<HypothesisInfo> > cur_constraint;
+	  cur_constraint.insert(cur_constraint.begin(), cur_bad_v.begin(), cur_bad_v.end());
+	  cur_constraint.insert(cur_constraint.begin(), cur_best_v.begin(), cur_best_v.end());
+	  cur_constraint.insert(cur_constraint.begin(), cur_good_v.begin(), cur_good_v.end());
+
+	  bool optimize_again;
+	  vector<shared_ptr<HypothesisInfo> > cur_pair;
+	  //SMO 
+	  for(int u=0;u!=cur_constraint.size();u++)	
+	    cur_constraint[u]->alpha =0;	      
+	  
+	  cur_constraint[0]->alpha =1; //set oracle to alpha=1
+
+	  cerr <<"Optimizing with " << cur_constraint.size() << " constraints" << endl;
+	  int smo_iter = 10, smo_iter2 = 10;
+	  int iter, iter2 =0;
+	  bool DEBUG_SMO = false;
+	  while (iter2 < smo_iter2)
+	    {
+	      iter =0;
+	      while (iter < smo_iter)
+		{
+		  optimize_again = true;
+		  for (int i = 0; i< cur_constraint.size(); i++)
+		    for (int j = i+1; j< cur_constraint.size(); j++)
+		      {
+			if(DEBUG_SMO) cerr << "start " << i << " " << j <<  endl;
+			cur_pair.clear();
+			cur_pair.push_back(cur_constraint[j]);
+			cur_pair.push_back(cur_constraint[i]);
+			double delta = ComputeDelta(&cur_pair,max_step_size, dense_weights);
+			
+			if (delta == 0) optimize_again = false;
+			//			cur_pair[0]->alpha += delta;
+			//	cur_pair[1]->alpha -= delta;
+			cur_constraint[j]->alpha += delta;
+			cur_constraint[i]->alpha -= delta;
+			double step_size = delta * max_step_size;
+			/*lambdas += (cur_pair[1]->features) * step_size;
+			lambdas -= (cur_pair[0]->features) * step_size;*/
+			lambdas += (cur_constraint[i]->features) * step_size;
+			lambdas -= (cur_constraint[j]->features) * step_size;
+			if(DEBUG_SMO) cerr << "SMO opt " << iter << " " << i << " " << j << " " <<  delta << " " << cur_pair[0]->alpha << " " << cur_pair[1]->alpha <<  endl;		
+			
+			//reload weights based on update
+			/*dense_weights.clear();
+			weights.InitFromVector(lambdas);
+			weights.InitVector(&dense_weights);*/
+		      }
+		  iter++;
+		  
+		  if(!optimize_again)
+		    { 
+		      iter = 100;
+		      cerr << "Optimization stopped, delta =0" << endl;
+		    }
+		  
+		  
+		}
+	      iter2++;
+	    }
+
+	  
+	}
+      else if(optimizer == 2 || optimizer == 3) //1-fear and cutting plane mira
+	  {
+	    bool DEBUG_SMO= true;
+	    vector<shared_ptr<HypothesisInfo> > cur_constraint;
+	    cur_constraint.push_back(cur_good_v[0]); //add oracle to constraint set
+	    bool optimize_again = true;
+	    int cut_plane_calls = 0;
+	    while (optimize_again)
+	      { 
+		if(DEBUG_SMO) cerr<< "optimize again: " << optimize_again << endl;
+		if(optimizer == 2){ //1-fear
+		  cur_constraint.push_back(cur_bad_v[0]);
+
+		  //check if we have a violation
+		  if(!(cur_constraint[1]->fear > cur_constraint[0]->fear + SMO_EPSILON))
+		    {
+		      optimize_again = false;
+		      cerr << "Constraint not violated" << endl;
+		    }
+		}
+		else
+		  { //cutting plane to add constraints
+		    if(DEBUG_SMO) cerr<< "Cutting Plane " << cut_plane_calls << " with " << lambdas << endl;
+		    optimize_again = false;
+		    cut_plane_calls++;
+		    CuttingPlane(&cur_constraint, &optimize_again, oracles[cur_sent].bad, dense_weights);
+		    if (cut_plane_calls >= MAX_SMO) optimize_again = false;
+		  }
+
+		if(optimize_again)
+		  {
+		    //SMO 
+		    for(int u=0;u!=cur_constraint.size();u++)	
+		      { 
+			cur_constraint[u]->alpha =0;
+			//cur_good_v[0]->alpha = 1; cur_bad_v[0]->alpha = 0;
+		      }
+		    cur_constraint[0]->alpha = 1;
+		    cerr <<"Optimizing with " << cur_constraint.size() << " constraints" << endl;
+		    int smo_iter = MAX_SMO;
+		    int iter =0;
+		    while (iter < smo_iter)
+		      {			
+			//select pair to optimize from constraint set
+			vector<shared_ptr<HypothesisInfo> > cur_pair = SelectPair(&cur_constraint);
+			
+			if(cur_pair.empty()){iter=MAX_SMO; cerr << "Undefined pair " << endl; continue;} //pair is undefined so we are done with this smo 
+
+			//double num = cur_good_v[0]->fear - cur_bad_v[0]->fear;
+			/*double loss = cur_good_v[0]->oracle_loss - cur_bad_v[0]->oracle_loss;
+			  double margin = cur_good_v[0]->oracle_feat_diff.dot(dense_weights) - cur_bad_v[0]->oracle_feat_diff.dot(dense_weights);
+			  double num = loss - margin;
+			  SparseVector<double> diff = cur_good_v[0]->features;
+			  diff -= cur_bad_v[0]->features;
+			  double delta = num / (diff.l2norm_sq() * max_step_size);
+			  delta = max(-cur_good_v[0]->alpha, min(delta, cur_bad_v[0]->alpha));
+			  cur_good_v[0]->alpha += delta;
+			  cur_bad_v[0]->alpha -= delta;
+			  double step_size = delta * max_step_size;
+			  lambdas += (cur_bad_v[0]->features) * step_size;
+			  lambdas -= (cur_good_v[0]->features) * step_size;
+			*/
+			
+			double delta = ComputeDelta(&cur_pair,max_step_size, dense_weights);
+
+			cur_pair[0]->alpha += delta;
+			cur_pair[1]->alpha -= delta;
+			double step_size = delta * max_step_size;
+			/*			lambdas += (cur_pair[1]->oracle_feat_diff) * step_size;
+						lambdas -= (cur_pair[0]->oracle_feat_diff) * step_size;*/
+			
+			cerr << "step " << step_size << endl;
+			double alpha_sum=0;
+			SparseVector<double> temp_lambdas = lambdas;
+			
+			for(int u=0;u!=cur_constraint.size();u++)	
+			  { 
+			    cerr << cur_constraint[u]->alpha << " " << cur_constraint[u]->hope << endl;
+			    temp_lambdas += (cur_constraint[u]->oracleN->features-cur_constraint[u]->features) * cur_constraint[u]->alpha * step_size;
+			    alpha_sum += cur_constraint[u]->alpha;
+			  }
+			cerr << "Alpha sum " << alpha_sum << " " << temp_lambdas << endl;
+						
+			lambdas += (cur_pair[1]->features) * step_size;
+			lambdas -= (cur_pair[0]->features) * step_size;
+			cerr << " Lambdas " << lambdas << endl;
+			//reload weights based on update
+			dense_weights.clear();
+			//weights.InitFromVector(lambdas);
+			//weights.InitVector(&dense_weights);
+			lambdas.init_vector(&dense_weights);
+			dense_weights_g = dense_weights;
+			iter++;
+					
+			if(DEBUG_SMO) cerr << "SMO opt " << iter << " " << delta << " " << cur_pair[0]->alpha << " " << cur_pair[1]->alpha <<  endl;		
+			//		cerr << "SMO opt " << iter << " " << delta << " " << cur_good_v[0]->alpha << " " << cur_bad_v[0]->alpha <<  endl;
+			if(no_select) //don't use selection heuristic to determine when to stop SMO, rather just when delta =0 
+			  if (delta == 0) iter = MAX_SMO;
+			
+			//only perform one dual coordinate ascent step
+			if(optimizer == 2) 
+			  {
+			    optimize_again = false;
+			    iter = MAX_SMO;
+			  }		
+			
+		      }
+		    if(optimizer == 3)
+		      {
+			if(!no_reweight)
+			  {
+			    if(DEBUG_SMO) cerr<< "Decoding with new weights -- now orac are " << oracles[cur_sent].good.size() << endl;
+			    Hypergraph hg = observer.GetCurrentForest();
+			    hg.Reweight(dense_weights);
+			    //observer.UpdateOracles(cur_sent, hg);
+			    if(unique_kbest)
+                              observer.UpdateOracles<KBest::FilterUnique>(cur_sent, hg);
+                            else
+                              observer.UpdateOracles<KBest::NoFilter<std::vector<WordID> > >(cur_sent, hg);
+
+			    
+			  }
+		      }
+		  }
+		
+		
+	      }
+	   
+	    //print objective after this sentence
+	    double lambda_change = (lambdas - old_lambdas).l2norm_sq();
+	    double max_fear = cur_constraint[cur_constraint.size()-1]->fear;
+	    double temp_objective = 0.5 * lambda_change;// + max_step_size * max_fear;
+
+	    for(int u=0;u!=cur_constraint.size();u++)	
+	      { 
+		cerr << cur_constraint[u]->alpha << " " << cur_constraint[u]->hope << " " << cur_constraint[u]->fear << endl;
+		temp_objective += cur_constraint[u]->alpha * cur_constraint[u]->fear;
+	      }
+	    objective += temp_objective;
+	    
+	    cerr << "SENT OBJ: " << temp_objective << " NEW OBJ: " << objective << endl;
+	  }
+      
+    
+      if ((cur_sent * 40 / ds.size()) > dots) { ++dots; cerr << '.'; }
+      tot += lambdas;
+      ++lcount;
+      cur_sent++;
+      
+      cout << TD::GetString(cur_good_v[0]->hyp) << " ||| " << TD::GetString(cur_best_v[0]->hyp) << " ||| " << TD::GetString(cur_bad_v[0]->hyp) << endl;
+
+      //clear good/bad lists from oracles for this sentences  - you want to keep them around for things
+      
+      //      oracles[cur_sent].good.clear();
+      //oracles[cur_sent].bad.clear();
+    }
+
+    cerr << "FINAL OBJECTIVE: "<< objective << endl;
+    final_tot += tot;
+    cerr << "Translated " << lcount << " sentences " << endl;
+    cerr << " [AVG METRIC LAST PASS=" << (tot_loss / lcount) << "]\n";
+    tot_loss = 0;
+    /*
+      float corpus_score = acc->ComputeScore();
+      string corpus_details;
+      acc->ScoreDetails(&corpus_details);
+      cerr << "MODEL " << corpus_details << endl;
+      cout << corpus_score << endl;
+      
+      corpus_score = acc_h->ComputeScore();
+      acc_h->ScoreDetails(&corpus_details);
+      cerr << "HOPE " << corpus_details << endl;
+      cout << corpus_score << endl;
+      
+      corpus_score = acc_f->ComputeScore();
+      acc_f->ScoreDetails(&corpus_details);
+      cerr << "FEAR " << corpus_details << endl;
+      cout << corpus_score << endl;
+    */
+    int node_id = rng->next() * 100000;
+    cerr << " Writing weights to " << node_id << endl;
+    Weights::ShowLargestFeatures(dense_weights);
+    dots = 0;
+    ostringstream os;
+    os << weights_dir << "/weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "." << node_id << ".gz";
+    string msg = "# MIRA tuned weights ||| " + boost::lexical_cast<std::string>(node_id) + " ||| " + boost::lexical_cast<std::string>(lcount);
+    //Weights.InitFromVector(lambdas);
+    lambdas.init_vector(&dense_weights);
+    Weights::WriteToFile(os.str(), dense_weights, true, &msg);
+
+    SparseVector<double> x = tot;
+    x /= lcount;
+    ostringstream sa;
+    string msga = "# MIRA tuned weights AVERAGED ||| " + boost::lexical_cast<std::string>(node_id) + " ||| " + boost::lexical_cast<std::string>(lcount);
+    sa << weights_dir << "/weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "." << node_id << "-avg.gz";
+    //Weights ww;
+    //ww.InitFromVector(x);
+    x.init_vector(&dense_weights);
+    Weights::WriteToFile(sa.str(), dense_weights, true, &msga);
+
+    //assign averaged lambdas to initialize next iteration
+    //lambdas = x;
+
+    /*    double lambda_change = (old_lambdas - lambdas).l2norm_sq();
+    cerr << "Change in lambda " << lambda_change << endl;
+    
+    if ( lambda_change < EPSILON)
+      {
+	cur_pass = max_iteration;
+	cerr << "Weights converged - breaking" << endl;
+      }
+            
+    ++cur_pass;
+    */
+    
+    //} iteration while loop
+ 
+    /* cerr << endl;
+  weights.WriteToFile("weights.mira-final.gz", true, &msg);
+  final_tot /= (lcount + 1);//max_iteration);
+  tot /= (corpus.size() + 1);
+  weights.InitFromVector(final_tot);
+  cerr << tot << "||||" << final_tot << endl;
+  msg = "# MIRA tuned weights (averaged vector)";
+  weights.WriteToFile("weights.mira-final-avg.gz", true, &msg);
+    */
+  cerr << "Optimization complete.\\AVERAGED WEIGHTS: weights.mira-final-avg.gz\n";
+  return 0;
+}
+
diff --git a/training/mira/run_mira.pl b/training/mira/run_mira.pl
new file mode 100755
index 00000000..f4d61407
--- /dev/null
+++ b/training/mira/run_mira.pl
@@ -0,0 +1,548 @@
+#!/usr/bin/env perl
+use strict;
+my @ORIG_ARGV=@ARGV;
+use Cwd qw(getcwd);
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0));
+push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; }
+
+# Skip local config (used for distributing jobs) if we're running in local-only mode
+use LocalConfig;
+use Getopt::Long;
+use IPC::Open2;
+use POSIX ":sys_wait_h";
+my $QSUB_CMD = qsub_args(mert_memory());
+
+require "libcall.pl";
+
+
+my $srcFile;
+my $refFiles;
+my $bin_dir = $SCRIPT_DIR;
+die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
+my $FAST_SCORE="$bin_dir/../mteval/fast_score";
+die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
+
+my $iteration = 0.0;
+my $max_iterations = 6;
+my $metric = "ibm_bleu";
+my $iniFile;
+my $weights;
+my $initialWeights;
+my $decode_nodes = 1;   # number of decode nodes
+my $pmem = "1g";
+my $dir;
+
+my $SCORER = $FAST_SCORE;
+my $local_server = "$bin_dir/local_parallelize.pl";
+my $parallelize = "$bin_dir/../dpmert/parallelize.pl";
+my $libcall = "$bin_dir/../dpmert/libcall.pl";
+my $sentserver = "$bin_dir/../dpmert/sentserver";
+my $sentclient = "$bin_dir/../dpmert/sentclient";
+my $run_local_server = 0;
+my $run_local = 0;
+my $usefork;
+my $pass_suffix = '';
+
+my $cdec ="$bin_dir/kbest_mirav5"; #"$bin_dir/kbest_mira_rmmv2"; #"$bin_dir/kbest_mira_lv";
+
+#my $cdec ="$bin_dir/kbest_mira_rmmv2"; #"$bin_dir/kbest_mirav5"; #"$bin_dir/kbest_mira_rmmv2"; #"$bin_dir/kbest_mira_lv";
+die "Can't find decoder in $cdec" unless -x $cdec;
+my $decoder = $cdec;
+my $decoderOpt;
+my $update_size=250;
+my $approx_score;
+my $kbest_size=250;
+my $metric_scale=1;
+my $optimizer=2;
+my $disable_clean = 0;
+my $use_make;  # use make to parallelize line search
+my $density_prune;
+my $cpbin=1;
+my $help = 0;
+my $epsilon = 0.0001;
+my $step_size = 0.01;
+my $gpref;
+my $unique_kbest;
+my $freeze;
+my $latent;
+my $sample_max;
+my $hopes=1;
+my $fears=1;
+
+my $range = 35000;
+my $minimum = 15000;
+my $portn = int(rand($range)) + $minimum;
+
+
+# Process command-line options
+Getopt::Long::Configure("no_auto_abbrev");
+if (GetOptions(
+        "decoder=s" => \$decoderOpt,
+        "decode-nodes=i" => \$decode_nodes,
+        "density-prune=f" => \$density_prune,
+        "dont-clean" => \$disable_clean,
+        "pass-suffix=s" => \$pass_suffix,
+        "use-fork" => \$usefork,
+        "epsilon=s" => \$epsilon,
+        "help" => \$help,
+        "local" => \$run_local,
+	"local_server" => \$run_local_server,
+        "use-make=i" => \$use_make,
+        "max-iterations=i" => \$max_iterations,
+        "pmem=s" => \$pmem,
+        "cpbin!" => \$cpbin,
+        "ref-files=s" => \$refFiles,
+        "metric=s" => \$metric,
+        "source-file=s" => \$srcFile,
+        "weights=s" => \$initialWeights,
+	"optimizer=i" => \$optimizer,
+	"metric-scale=i" => \$metric_scale,
+	"kbest-size=i" => \$kbest_size,
+	"update-size=i" => \$update_size,
+	"step-size=f" => \$step_size,
+	"hope-select=i" => \$hopes,
+	"fear-select=i" => \$fears,
+	"approx-score" => \$approx_score,
+	"unique-kbest" => \$unique_kbest,
+	"latent" => \$latent,
+	"sample-max=i" => \$sample_max,
+        "grammar-prefix=s" => \$gpref,
+	"freeze" => \$freeze,
+        "workdir=s" => \$dir,
+	) == 0 || @ARGV!=1 || $help) {
+        print_help();
+        exit;
+}
+
+($iniFile) = @ARGV;
+
+
+sub write_config;
+sub enseg;
+sub print_help;
+
+my $nodelist;
+my $host =check_output("hostname"); chomp $host;
+my $bleu;
+my $interval_count = 0;
+my $logfile;
+my $projected_score;
+
+
+#my $refs_comma_sep = get_comma_sep_refs($refFiles);
+my $refs_comma_sep = get_comma_sep_refs('r',$refFiles);
+
+#my $refs_comma_sep_4cdec = get_comma_sep_refs_4cdec($refFiles);
+
+unless ($dir){
+        $dir = "mira";
+}
+unless ($dir =~ /^\//){  # convert relative path to absolute path
+        my $basedir = check_output("pwd");
+        chomp $basedir;
+        $dir = "$basedir/$dir";
+}
+
+if ($decoderOpt){ $decoder = $decoderOpt; }
+
+# Initializations and helper functions
+srand;
+
+my @childpids = ();
+my @cleanupcmds = ();
+
+sub cleanup {
+        print STDERR "Cleanup...\n";
+        for my $pid (@childpids){ unchecked_call("kill $pid"); }
+        for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); }
+        exit 1;
+};
+
+# Always call cleanup, no matter how we exit
+*CORE::GLOBAL::exit =
+    sub{ cleanup(); };
+$SIG{INT} = "cleanup";
+$SIG{TERM} = "cleanup";
+$SIG{HUP} = "cleanup";
+
+
+my $decoderBase = check_output("basename $decoder"); chomp $decoderBase;
+my $newIniFile = "$dir/$decoderBase.ini";
+my $inputFileName = "$dir/input";
+my $user = $ENV{"USER"};
+
+
+# process ini file
+-e $iniFile || die "Error: could not open $iniFile for reading\n";
+open(INI, $iniFile);
+
+use File::Basename qw(basename);
+#pass bindir, refs to vars holding bin
+sub modbin {
+    local $_;
+    my $bindir=shift;
+    check_call("mkdir -p $bindir");
+    -d $bindir || die "couldn't make bindir $bindir";
+    for (@_) {
+        my $src=$$_;
+        $$_="$bindir/".basename($src);
+        check_call("cp -p $src $$_");
+    }
+}
+sub dirsize {
+    opendir ISEMPTY,$_[0];
+    return scalar(readdir(ISEMPTY))-1;
+}
+
+
+
+
+if (-e $dir && dirsize($dir)>1 && -e "$dir/weights" ){ # allow preexisting logfile, binaries, but not dist-vest.pl outputs
+    die "ERROR: working dir $dir already exists\n\n";
+} else {
+    -e $dir || mkdir $dir;
+    mkdir "$dir/scripts";
+    my $cmdfile="$dir/rerun-mira.sh";
+    open CMD,'>',$cmdfile;
+    print CMD "cd ",&getcwd,"\n";
+    my $cline=&cmdline."\n";
+    print CMD $cline;
+    close CMD;
+    print STDERR $cline;
+    chmod(0755,$cmdfile);
+    unless (-e $initialWeights) {
+        print STDERR "Please specify an initial weights file with --initial-weights\n";
+        print_help();
+        exit;
+    }
+    check_call("cp $initialWeights $dir/weights.0");
+    die "Can't find weights.0" unless (-e "$dir/weights.0");
+}
+write_config(*STDERR);
+
+# Generate initial files and values
+check_call("cp $iniFile $newIniFile");
+$iniFile = $newIniFile;
+
+my $newsrc = "$dir/dev.input";
+enseg($srcFile, $newsrc, $gpref);
+
+$srcFile = $newsrc;
+my $devSize = 0;
+open F, "<$srcFile" or die "Can't read $srcFile: $!";
+while(<F>) { $devSize++; }
+close F;
+
+my $lastPScore = 0;
+my $lastWeightsFile;
+
+# main optimization loop
+#while (1){
+for (my $opt_iter=0; $opt_iter<$max_iterations; $opt_iter++) {
+
+	print STDERR "\n\nITERATION $opt_iter\n==========\n";
+	print STDERR "Using port $portn\n";
+
+	# iteration-specific files
+	my $runFile="$dir/run.raw.$opt_iter";
+	my $onebestFile="$dir/1best.$opt_iter";
+	my $logdir="$dir/logs.$opt_iter";
+	my $decoderLog="$logdir/decoder.sentserver.log.$opt_iter";
+	my $scorerLog="$logdir/scorer.log.$opt_iter";
+	my $weightdir="$dir/weights.pass$opt_iter/";
+	check_call("mkdir -p $logdir");
+	check_call("mkdir -p $weightdir");
+
+	#decode
+	print STDERR "RUNNING DECODER AT ";
+	print STDERR unchecked_output("date");
+#	my $im1 = $opt_iter - 1;
+	my $weightsFile="$dir/weights.$opt_iter";
+	print "ITER $iteration " ;
+	my $cur_pass = "-p 0$opt_iter";
+	my $decoder_cmd = "$decoder -c $iniFile -w $weightsFile $refs_comma_sep -m $metric -s $metric_scale -a -b $update_size -k $kbest_size -o $optimizer $cur_pass -O $weightdir -D $dir  -h $hopes -f $fears -C $step_size";
+	if($unique_kbest){
+		$decoder_cmd .= " -u";
+	}
+	if($latent){
+		$decoder_cmd .= " -l";
+	}
+	if($sample_max){
+		$decoder_cmd .= " -t $sample_max";
+	}
+	if ($density_prune) {
+		$decoder_cmd .= " --density_prune $density_prune";
+	}
+	my $pcmd;
+	if ($run_local) {
+		$pcmd = "cat $srcFile |";
+	} elsif ($use_make) {
+	    # TODO: Throw error when decode_nodes is specified along with use_make
+		$pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $use_make --";
+	} elsif ($run_local_server){
+	    $pcmd = "cat $srcFile | $local_server $usefork -p $pmem -e $logdir -n $decode_nodes --";
+	}
+	else {
+	    $pcmd = "cat $srcFile | $parallelize $usefork -p $pmem -e $logdir -j $decode_nodes --baseport $portn --";
+	}
+	my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile";
+	print STDERR "COMMAND:\n$cmd\n";
+	check_bash_call($cmd);
+
+	my $retries = 0;
+        my $num_topbest;
+        while($retries < 5) {
+            $num_topbest = check_output("wc -l < $runFile");
+            print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n";
+            if($devSize == $num_topbest) {
+                last;
+            } else {
+                print STDERR "Incorrect number of topbest. Waiting for distributed filesystem and retrying...\n";
+                sleep(3);
+            }
+            $retries++;
+        }
+	 die "Dev set contains $devSize sentences, but we don't have topbest for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_topbest);
+
+
+	#score the output from this iteration
+	open RUN, "<$runFile" or die "Can't read $runFile: $!";
+	open H, ">$runFile.H" or die;
+	open F, ">$runFile.F" or die;
+	open B, ">$runFile.B" or die;
+	while(<RUN>) {
+	    chomp();
+	    (my $hope,my $best,my $fear) = split(/ \|\|\| /);
+	    print H "$hope \n"; 	    
+	    print B "$best \n";
+ 	    print F "$fear \n";
+	}
+	close RUN;
+	close F; close B; close H;
+	
+	my $dec_score = check_output("cat $runFile.B | $SCORER $refs_comma_sep -l $metric");
+	my $dec_score_h = check_output("cat $runFile.H | $SCORER $refs_comma_sep -l $metric");
+	my $dec_score_f = check_output("cat $runFile.F | $SCORER $refs_comma_sep -l $metric");
+	chomp $dec_score; chomp $dec_score_h; chomp $dec_score_f;
+	print STDERR "DECODER SCORE: $dec_score HOPE: $dec_score_h FEAR: $dec_score_f\n";
+
+	# save space
+	check_call("gzip -f $runFile");
+	check_call("gzip -f $decoderLog");
+		my $iter_filler="";
+	if($opt_iter < 10)
+	{$iter_filler="0";}
+
+	my $nextIter = $opt_iter + 1;
+	my $newWeightsFile = "$dir/weights.$nextIter";
+	$lastWeightsFile = "$dir/weights.$opt_iter";
+
+	average_weights("$weightdir/weights.mira-pass*.*[0-9].gz", $newWeightsFile, $logdir);
+#	check_call("cp $lastW $newWeightsFile");
+#	if ($icc < 2) {
+#		print STDERR "\nREACHED STOPPING CRITERION: score change too little\n";
+#		last;
+#	}
+	system("gzip -f $logdir/kbes*");
+	print STDERR "\n==========\n";
+	$iteration++;
+}
+#find 
+#my $cmd = `grep SCORE /fs/clip-galep5/lexical_tm/log.runmira.nist.20 | cat -n | sort -k +2 | tail -1`;
+#$cmd =~ m/([0-9]+)/;
+#$lastWeightsFile = "$dir/weights.$1";
+#check_call("ln -s $lastWeightsFile $dir/weights.tuned");
+print STDERR "\nFINAL WEIGHTS: $lastWeightsFile\n(Use -w <this file> with the decoder)\n\n";
+
+print STDOUT "$lastWeightsFile\n";
+
+sub get_lines {
+  my $fn = shift @_;
+  open FL, "<$fn" or die "Couldn't read $fn: $!";
+  my $lc = 0;
+  while(<FL>) { $lc++; }
+  return $lc;
+}
+
+sub get_comma_sep_refs {
+  my ($r,$p) = @_;
+  my $o = check_output("echo $p");
+  chomp $o;
+  my @files = split /\s+/, $o;
+  return "-$r " . join(" -$r ", @files);
+}
+
+
+sub read_weights_file {
+  my ($file) = @_;
+  open F, "<$file" or die "Couldn't read $file: $!";
+  my @r = ();
+  my $pm = -1;
+  while(<F>) {
+    next if /^#/;
+    next if /^\s*$/;
+    chomp;
+    if (/^(.+)\s+(.+)$/) {
+      my $m = $1;
+      my $w = $2;
+      die "Weights out of order: $m <= $pm" unless $m > $pm;
+      push @r, $w;
+    } else {
+      warn "Unexpected feature name in weight file: $_";
+    }
+  }
+  close F;
+  return join ' ', @r;
+}
+
+sub write_config {
+	my $fh = shift;
+	my $cleanup = "yes";
+	if ($disable_clean) {$cleanup = "no";}
+
+	print $fh "\n";
+	print $fh "DECODER:          $decoder\n";
+	print $fh "INI FILE:         $iniFile\n";
+	print $fh "WORKING DIR:      $dir\n";
+	print $fh "SOURCE (DEV):     $srcFile\n";
+	print $fh "REFS (DEV):       $refFiles\n";
+	print $fh "EVAL METRIC:      $metric\n";
+	print $fh "START ITERATION:  $iteration\n";
+	print $fh "MAX ITERATIONS:   $max_iterations\n";
+	print $fh "DECODE NODES:     $decode_nodes\n";
+	print $fh "HEAD NODE:        $host\n";
+	print $fh "PMEM (DECODING):  $pmem\n";
+	print $fh "CLEANUP:          $cleanup\n";
+	print $fh "INITIAL WEIGHTS:  $initialWeights\n";
+        print $fh "GRAMMAR PREFIX:   $gpref\n";
+}
+
+sub update_weights_file {
+  my ($neww, $rfn, $rpts) = @_;
+  my @feats = @$rfn;
+  my @pts = @$rpts;
+  my $num_feats = scalar @feats;
+  my $num_pts = scalar @pts;
+  die "$num_feats (num_feats) != $num_pts (num_pts)" unless $num_feats == $num_pts;
+  open G, ">$neww" or die;
+  for (my $i = 0; $i < $num_feats; $i++) {
+    my $f = $feats[$i];
+    my $lambda = $pts[$i];
+    print G "$f $lambda\n";
+  }
+  close G;
+}
+
+sub enseg {
+    my $src = shift;
+    my $newsrc = shift;
+    my $grammarpref = shift;
+
+    open(SRC, $src);
+    open(NEWSRC, ">$newsrc");
+    my $i=0;
+    while (my $line=<SRC>){
+	chomp $line;
+	if ($line =~ /^\s*<seg/i) {
+	    if($line =~ /id="[0-9]+"/) {
+		print NEWSRC "$line\n";
+	    } else {
+		die "When using segments with pre-generated <seg> tags, you must include a zero-based id attribute";
+	    }
+	}
+	elsif (defined $grammarpref) {
+	    print NEWSRC "<seg id=\"$i\" grammar=\"$grammarpref.$i.gz\">$line</seg>\n";}
+	else {
+	    print NEWSRC "<seg id=\"$i\">$line</seg>\n";
+	}
+	$i++;
+    }
+    close SRC;
+    close NEWSRC;
+}
+
+sub print_help {
+	print "Something wrong\n";
+}
+
+sub cmdline {
+    return join ' ',($0,@ORIG_ARGV);
+}
+
+#buggy: last arg gets quoted sometimes?
+my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]};
+my $shell_escape_in_quote=qr{[\\"\$`!]};
+
+sub escape_shell {
+    my ($arg)=@_;
+    return undef unless defined $arg;
+    if ($arg =~ /$is_shell_special/) {
+        $arg =~ s/($shell_escape_in_quote)/\\$1/g;
+        return "\"$arg\"";
+    }
+    return $arg;
+}
+
+sub escaped_shell_args {
+    return map {local $_=$_;chomp;escape_shell($_)} @_;
+}
+
+sub escaped_shell_args_str {
+    return join ' ',&escaped_shell_args(@_);
+}
+
+sub escaped_cmdline {
+    return "$0 ".&escaped_shell_args_str(@ORIG_ARGV);
+}
+
+sub average_weights {
+
+    my $path = shift;
+    my $out = shift;
+    my $logpath = shift;
+    print "AVERAGE $path $out\n";
+    my %feature_weights= ();
+    my $total =0;
+    my $total_mult =0;
+    sleep(10);
+    foreach my $file (glob "$path")
+    {
+	$file =~ /\/([^\/]+).gz$/;
+	my $fname = $1;
+	my $cmd = "gzip -d $file";
+	$file =~ s/\.gz//;
+	check_bash_call($cmd);
+	my $mult = 0;
+	print "FILE $file \n";
+	open SCORE, "< $file" or next;
+	$total++;
+	while( <SCORE> ) {
+	    my $line = $_;
+	    if ($line !~ m/^\#/)
+	    {
+		my @s = split(" ",$line);
+		$feature_weights{$s[0]}+= $mult * $s[1];
+	    }
+	    else
+	    {
+		(my $msg,my $ran,$mult) = split(/ \|\|\| /);
+		print "RAN $ran $mult\n";
+	    }
+	}
+	$total_mult += $mult;
+	
+	close SCORE;
+	$cmd = "gzip $file"; check_bash_call($cmd);
+    }
+    
+#print out new averaged weights
+    open OUT, "> $out" or next;
+    for my $f ( keys %feature_weights ) {
+	print "$f $feature_weights{$f} $total_mult\n";
+	my $ave = $feature_weights{$f} / $total_mult;
+	
+	print "Printing $f $ave ||| ";
+	print OUT "$f $ave\n";
+    }
+    
+}
-- 
cgit v1.2.3


From 2d58182ec6c961fe2f08f4a88886f3e128fb0113 Mon Sep 17 00:00:00 2001
From: Vladimir Eidelman <vlad@umiacs.umd.edu>
Date: Sat, 13 Apr 2013 21:57:37 -0400
Subject: mira run script

---
 environment/LocalConfig.pm      |    2 +-
 training/mira/Makefile.am       |    7 +-
 training/mira/kbest_cut_mira.cc | 1010 ++++++++++++++++++++++++++++++++++
 training/mira/kbest_mirav5.cc   | 1148 ---------------------------------------
 training/mira/run_mira.pl       |  181 ++++--
 5 files changed, 1141 insertions(+), 1207 deletions(-)
 create mode 100644 training/mira/kbest_cut_mira.cc
 delete mode 100644 training/mira/kbest_mirav5.cc

diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm
index 627f7f8c..f7c3b1c7 100644
--- a/environment/LocalConfig.pm
+++ b/environment/LocalConfig.pm
@@ -34,7 +34,7 @@ my $CCONFIG = {
     #'QSubQueue' => '-q long',
   },
   'UMIACS' => {
-    'HOST_REGEXP' => qr/^d.*\.umiacs\.umd\.edu$/,
+    'HOST_REGEXP' => qr/^(n|s|d).*\.umiacs\.umd\.edu$/,
     'JobControl'  => 'qsub',
     'QSubMemFlag' => '-l pmem=',
     'QSubQueue' => '-q batch',
diff --git a/training/mira/Makefile.am b/training/mira/Makefile.am
index fa4fb22d..8cddc2d7 100644
--- a/training/mira/Makefile.am
+++ b/training/mira/Makefile.am
@@ -1,6 +1,11 @@
-bin_PROGRAMS = kbest_mira
+bin_PROGRAMS = kbest_mira \
+		kbest_cut_mira 
 
 kbest_mira_SOURCES = kbest_mira.cc
 kbest_mira_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a
 
+
+kbest_cut_mira_SOURCES = kbest_cut_mira.cc
+kbest_cut_mira_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a
+
 AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/training/mira/kbest_cut_mira.cc b/training/mira/kbest_cut_mira.cc
new file mode 100644
index 00000000..34eb00dc
--- /dev/null
+++ b/training/mira/kbest_cut_mira.cc
@@ -0,0 +1,1010 @@
+#include <sstream>
+#include <iostream>
+#include <vector>
+#include <cassert>
+#include <cmath>
+#include <algorithm>
+
+#include "config.h"
+
+
+#include <boost/shared_ptr.hpp>
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "sentence_metadata.h"
+#include "scorer.h"
+#include "verbose.h"
+#include "viterbi.h"
+#include "hg.h"
+#include "prob.h"
+#include "kbest.h"
+#include "ff_register.h"
+#include "decoder.h"
+#include "filelib.h"
+#include "fdict.h"
+#include "time.h"
+#include "sampler.h"
+
+#include "weights.h"
+#include "sparse_vector.h"
+
+using namespace std;
+using boost::shared_ptr;
+namespace po = boost::program_options;
+
+bool invert_score;
+boost::shared_ptr<MT19937> rng;
+bool approx_score;
+bool no_reweight;
+bool no_select;
+bool unique_kbest;
+int update_list_size;
+vector<weight_t> dense_weights_g;
+double mt_metric_scale;
+int optimizer;
+int fear_select;
+int hope_select;
+bool pseudo_doc;
+bool sent_approx;
+bool checkloss;
+
+void SanityCheck(const vector<double>& w) {
+  for (int i = 0; i < w.size(); ++i) {
+    assert(!isnan(w[i]));
+    assert(!isinf(w[i]));
+  }
+}
+
+struct FComp {
+  const vector<double>& w_;
+  FComp(const vector<double>& w) : w_(w) {}
+  bool operator()(int a, int b) const {
+    return fabs(w_[a]) > fabs(w_[b]);
+  }
+};
+
+void ShowLargestFeatures(const vector<double>& w) {
+  vector<int> fnums(w.size());
+  for (int i = 0; i < w.size(); ++i)
+    fnums[i] = i;
+  vector<int>::iterator mid = fnums.begin();
+  mid += (w.size() > 10 ? 10 : w.size());
+  partial_sort(fnums.begin(), mid, fnums.end(), FComp(w));
+  cerr << "TOP FEATURES:";
+  for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) {
+    cerr << ' ' << FD::Convert(*i) << '=' << w[*i];
+  }
+  cerr << endl;
+}
+
+bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+    ("input_weights,w",po::value<string>(),"Input feature weights file")
+    ("source,i",po::value<string>(),"Source file for development set")
+    ("pass,p", po::value<int>()->default_value(15), "Current pass through the training data")
+    ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation(s) (tokenized text file)")
+    ("mt_metric,m",po::value<string>()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)")
+    ("optimizer,o",po::value<int>()->default_value(1), "Optimizer (SGD=1, PA MIRA w/Delta=2, Cutting Plane MIRA=3, PA MIRA=4, Triple nbest list MIRA=5)")
+    ("fear,f",po::value<int>()->default_value(1), "Fear selection (model-cost=1, maxcost=2, maxscore=3)")
+    ("hope,h",po::value<int>()->default_value(1), "Hope selection (model+cost=1, mincost=2)")
+    ("max_step_size,C", po::value<double>()->default_value(0.01), "regularization strength (C)")
+    ("random_seed,S", po::value<uint32_t>(), "Random seed (if not specified, /dev/random will be used)")
+    ("mt_metric_scale,s", po::value<double>()->default_value(1.0), "Amount to scale MT loss function by")
+    ("sent_approx,a", "Use smoothed sentence-level BLEU score for approximate scoring")
+    ("pseudo_doc,e", "Use pseudo-document BLEU score for approximate scoring")
+    ("no_reweight,d","Do not reweight forest for cutting plane")
+    ("no_select,n", "Do not use selection heuristic")
+    ("k_best_size,k", po::value<int>()->default_value(250), "Size of hypothesis list to search for oracles")
+    ("update_k_best,b", po::value<int>()->default_value(1), "Size of good, bad lists to perform update with")
+    ("unique_k_best,u", "Unique k-best translation list")
+    ("weights_output,O",po::value<string>(),"Directory to write weights to")
+    ("output_dir,D",po::value<string>(),"Directory to place output in")
+    ("decoder_config,c",po::value<string>(),"Decoder configuration file");
+  po::options_description clo("Command line options");
+  clo.add_options()
+    ("config", po::value<string>(), "Configuration file")
+    ("help,H", "Print this help message and exit");
+  po::options_description dconfig_options, dcmdline_options;
+  dconfig_options.add(opts);
+  dcmdline_options.add(opts).add(clo);
+  
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  if (conf->count("config")) {
+    ifstream config((*conf)["config"].as<string>().c_str());
+    po::store(po::parse_config_file(config, dconfig_options), *conf);
+  }
+  po::notify(*conf);
+
+  if (conf->count("help") || !conf->count("input_weights") || !conf->count("decoder_config") || !conf->count("reference")) {
+    cerr << dcmdline_options << endl;
+    return false;
+  }
+  return true;
+}
+
+//load previous translation, store array of each sentences score, subtract it from current sentence and replace with new translation score
+
+
+static const double kMINUS_EPSILON = -1e-6;
+static const double EPSILON = 0.000001;
+static const double SMO_EPSILON = 0.0001;
+static const double PSEUDO_SCALE = 0.95;
+static const int MAX_SMO = 10;
+int cur_pass;
+
+struct HypothesisInfo {
+  SparseVector<double> features;
+  vector<WordID> hyp;
+  double mt_metric;
+  double hope;
+  double fear;
+  double alpha;
+  double oracle_loss;
+  SparseVector<double> oracle_feat_diff;
+  shared_ptr<HypothesisInfo> oracleN;
+};
+
+bool ApproxEqual(double a, double b) {
+  if (a == b) return true;
+  return (fabs(a-b)/fabs(b)) < EPSILON;
+}
+
+typedef shared_ptr<HypothesisInfo> HI;
+bool HypothesisCompareB(const HI& h1, const HI& h2 ) 
+{
+  return h1->mt_metric > h2->mt_metric;
+};
+
+
+bool HopeCompareB(const HI& h1, const HI& h2 ) 
+{
+  return h1->hope > h2->hope;
+};
+
+bool FearCompareB(const HI& h1, const HI& h2 ) 
+{
+  return h1->fear > h2->fear;
+};
+
+bool FearComparePred(const HI& h1, const HI& h2 ) 
+{
+  return h1->features.dot(dense_weights_g) > h2->features.dot(dense_weights_g);
+};
+
+bool HypothesisCompareG(const HI& h1, const HI& h2 ) 
+{
+  return h1->mt_metric < h2->mt_metric;
+};
+
+
+void CuttingPlane(vector<shared_ptr<HypothesisInfo> >* cur_c, bool* again, vector<shared_ptr<HypothesisInfo> >& all_hyp, vector<weight_t> dense_weights)
+{
+  bool DEBUG_CUT = false;
+  shared_ptr<HypothesisInfo> max_fear, max_fear_in_set;
+  vector<shared_ptr<HypothesisInfo> >& cur_constraint = *cur_c;
+
+  if(no_reweight)
+    {
+      //find new hope hypothesis
+      for(int u=0;u!=all_hyp.size();u++)	
+	{ 
+	  double t_score = all_hyp[u]->features.dot(dense_weights);
+	  all_hyp[u]->hope = 1 * all_hyp[u]->mt_metric + t_score;
+	}
+      
+      //sort hyps by hope score
+      sort(all_hyp.begin(),all_hyp.end(),HopeCompareB);    
+      
+      double hope_score = all_hyp[0]->features.dot(dense_weights);
+      if(DEBUG_CUT) cerr << "New hope derivation score " << hope_score << endl;
+     
+      for(int u=0;u!=all_hyp.size();u++)	
+	{ 
+	  double t_score = all_hyp[u]->features.dot(dense_weights);
+	  //all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - hope_score + t_score;
+	  
+	  all_hyp[u]->fear = -1*all_hyp[u]->mt_metric + 1*all_hyp[0]->mt_metric - hope_score + t_score; //relative loss
+	  //      all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric - -1*all_hyp[0]->mt_metric;
+	  //all_hyp[u]->oracle_feat_diff = all_hyp[0]->features - all_hyp[u]->features;
+	  //	all_hyp[u]->fear = -1 * all_hyp[u]->mt_metric + t_score;
+	}
+    
+      sort(all_hyp.begin(),all_hyp.end(),FearCompareB);
+      
+    }
+  //assign maximum fear derivation from all derivations
+  max_fear = all_hyp[0];
+  
+  if(DEBUG_CUT) cerr <<"Cutting Plane Max Fear "<<max_fear->fear ;
+  for(int i=0; i < cur_constraint.size();i++) //select maximal violator already in constraint set
+    {
+      if (!max_fear_in_set || cur_constraint[i]->fear > max_fear_in_set->fear)
+	max_fear_in_set = cur_constraint[i];
+    }
+  if(DEBUG_CUT) cerr << "Max Fear in constraint set " << max_fear_in_set->fear << endl;
+  
+  if(max_fear->fear > max_fear_in_set->fear + SMO_EPSILON)
+    {
+      cur_constraint.push_back(max_fear);
+      *again = true;
+      if(DEBUG_CUT) cerr << "Optimize Again " << *again << endl;
+    }
+}
+
+
+double ComputeDelta(vector<shared_ptr<HypothesisInfo> >* cur_p, double max_step_size,vector<weight_t> dense_weights )
+{
+  vector<shared_ptr<HypothesisInfo> >& cur_pair = *cur_p;
+   double loss = cur_pair[0]->oracle_loss - cur_pair[1]->oracle_loss;
+   //double margin = -cur_pair[0]->oracle_feat_diff.dot(dense_weights) + cur_pair[1]->oracle_feat_diff.dot(dense_weights); //TODO: is it a problem that new oracle is used in diff?
+   //double num = loss - margin;
+  
+
+   double margin = -(cur_pair[0]->oracleN->features.dot(dense_weights)- cur_pair[0]->features.dot(dense_weights)) + (cur_pair[1]->oracleN->features.dot(dense_weights) - cur_pair[1]->features.dot(dense_weights));
+   const double num = margin +  loss;
+   cerr << "LOSS: " << num << " Margin:" << margin << " BLEUL:" << loss << " " << cur_pair[1]->features.dot(dense_weights) << " " << cur_pair[0]->features.dot(dense_weights) <<endl;
+   
+
+/*  double num = 
+    (cur_pair[0]->oracle_loss - cur_pair[0]->oracle_feat_diff.dot(dense_weights))
+    - (cur_pair[1]->oracle_loss - cur_pair[1]->oracle_feat_diff.dot(dense_weights));
+  */
+
+  SparseVector<double> diff = cur_pair[0]->features;
+  diff -= cur_pair[1]->features;
+  /*  SparseVector<double> diff = cur_pair[0]->oracle_feat_diff;
+  diff -= cur_pair[1]->oracle_feat_diff;*/
+  double diffsqnorm = diff.l2norm_sq();
+  double delta;
+  if (diffsqnorm > 0)
+    delta = num / (diffsqnorm * max_step_size);
+  else
+    delta = 0;
+  cerr << " D1:" << delta;
+  //clip delta (enforce margin constraints)
+
+  delta = max(-cur_pair[0]->alpha, min(delta, cur_pair[1]->alpha));
+  cerr << " D2:" << delta;
+  return delta;
+}
+
+
+vector<shared_ptr<HypothesisInfo> > SelectPair(vector<shared_ptr<HypothesisInfo> >* cur_c)
+{
+  bool DEBUG_SELECT= false;
+  vector<shared_ptr<HypothesisInfo> >& cur_constraint = *cur_c;
+  
+  vector<shared_ptr<HypothesisInfo> > pair;
+
+  if (no_select || optimizer == 2){ //skip heuristic search and return oracle and fear for 1-mira
+  //    if(optimizer == 2)      {
+      pair.push_back(cur_constraint[0]);
+      pair.push_back(cur_constraint[1]);
+      return pair;
+      //   }
+    }
+  
+  for(int u=0;u != cur_constraint.size();u++)	
+    {
+      shared_ptr<HypothesisInfo> max_fear;
+      
+      if(DEBUG_SELECT) cerr<< "cur alpha " << u  << " " << cur_constraint[u]->alpha;
+      for(int i=0; i < cur_constraint.size();i++) //select maximal violator
+	{
+	  if(i != u)
+	    if (!max_fear || cur_constraint[i]->fear > max_fear->fear)
+	      max_fear = cur_constraint[i];
+	}
+      if(!max_fear) return pair; //
+      
+      if(DEBUG_SELECT) cerr << " F" << max_fear->fear << endl;
+
+      
+      if ((cur_constraint[u]->alpha == 0) && (cur_constraint[u]->fear > max_fear->fear + SMO_EPSILON))
+	{
+	  for(int i=0; i < cur_constraint.size();i++) //select maximal violator
+	    {
+	      if(i != u)
+		if (cur_constraint[i]->alpha > 0)
+		  {
+		    pair.push_back(cur_constraint[u]);
+		    pair.push_back(cur_constraint[i]);
+		    cerr << "RETJURN from 1" << endl;
+		    return pair;
+		  }
+	    }
+	}	       
+      if ((cur_constraint[u]->alpha > 0) && (cur_constraint[u]->fear < max_fear->fear - SMO_EPSILON))
+	{
+	  for(int i=0; i < cur_constraint.size();i++) //select maximal violator
+	    {
+	      if(i != u)	
+		if (cur_constraint[i]->fear > cur_constraint[u]->fear)
+		  {
+		    pair.push_back(cur_constraint[u]);
+		    pair.push_back(cur_constraint[i]);
+		    return pair;
+		  }
+	    }  
+	}
+    
+    } 
+  return pair; //no more constraints to optimize, we're done here
+
+}
+
+struct GoodBadOracle {
+  vector<shared_ptr<HypothesisInfo> > good;
+  vector<shared_ptr<HypothesisInfo> > bad;
+};
+
+struct TrainingObserver : public DecoderObserver {
+  TrainingObserver(const int k, const DocScorer& d, vector<GoodBadOracle>* o, vector<ScoreP>* cbs) : ds(d), oracles(*o), corpus_bleu_sent_stats(*cbs), kbest_size(k) {
+  // TrainingObserver(const int k, const DocScorer& d, vector<GoodBadOracle>* o) : ds(d), oracles(*o), kbest_size(k) {
+    
+    //calculate corpus bleu score from previous iterations 1-best for BLEU gain
+    if(!pseudo_doc && !sent_approx)
+    if(cur_pass > 0)
+      {
+	ScoreP acc;
+	for (int ii = 0; ii < corpus_bleu_sent_stats.size(); ii++) {
+	  if (!acc) { acc = corpus_bleu_sent_stats[ii]->GetZero(); }
+	  acc->PlusEquals(*corpus_bleu_sent_stats[ii]);
+	  
+	}
+	corpus_bleu_stats = acc;
+	corpus_bleu_score = acc->ComputeScore();
+      }
+    //corpus_src_length = 0;
+}
+  const DocScorer& ds;
+  vector<ScoreP>& corpus_bleu_sent_stats;
+  vector<GoodBadOracle>& oracles;
+  vector<shared_ptr<HypothesisInfo> > cur_best;
+  shared_ptr<HypothesisInfo> cur_oracle;
+  const int kbest_size;
+  Hypergraph forest;
+  int cur_sent;
+  ScoreP corpus_bleu_stats;
+  float corpus_bleu_score;
+
+  float corpus_src_length;
+  float curr_src_length;
+
+  const int GetCurrentSent() const {
+    return cur_sent;
+  }
+
+  const HypothesisInfo& GetCurrentBestHypothesis() const {
+    return *cur_best[0];
+  }
+
+  const vector<shared_ptr<HypothesisInfo> > GetCurrentBest() const {
+    return cur_best;
+  }
+  
+ const HypothesisInfo& GetCurrentOracle() const {
+    return *cur_oracle;
+  }
+  
+  const Hypergraph& GetCurrentForest() const {
+    return forest;
+  }
+  
+
+  virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) {
+    cur_sent = smeta.GetSentenceID();
+    //cerr << "SOURCE " << smeta.GetSourceLength() << endl;
+    curr_src_length = (float) smeta.GetSourceLength();
+    //UpdateOracles(smeta.GetSentenceID(), *hg);
+    if(unique_kbest)
+      UpdateOracles<KBest::FilterUnique>(smeta.GetSentenceID(), *hg);
+    else
+      UpdateOracles<KBest::NoFilter<std::vector<WordID> > >(smeta.GetSentenceID(), *hg);
+    forest = *hg;
+    
+  }
+
+  shared_ptr<HypothesisInfo> MakeHypothesisInfo(const SparseVector<double>& feats, const double score, const vector<WordID>& hyp) {
+    shared_ptr<HypothesisInfo> h(new HypothesisInfo);
+    h->features = feats;
+    h->mt_metric = score;
+    h->hyp = hyp;
+    return h;
+  }
+
+  template <class Filter>  
+  void UpdateOracles(int sent_id, const Hypergraph& forest) {
+
+    bool PRINT_LIST= false;    
+    vector<shared_ptr<HypothesisInfo> >& cur_good = oracles[sent_id].good;
+    vector<shared_ptr<HypothesisInfo> >& cur_bad = oracles[sent_id].bad;
+    //TODO: look at keeping previous iterations hypothesis lists around
+    cur_best.clear();
+    cur_good.clear();
+    cur_bad.clear();
+
+    vector<shared_ptr<HypothesisInfo> > all_hyp;
+
+    typedef KBest::KBestDerivations<vector<WordID>, ESentenceTraversal,Filter> K;
+    K kbest(forest,kbest_size);
+    
+    //KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(forest, kbest_size);
+    for (int i = 0; i < kbest_size; ++i) {
+      //const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
+      typename K::Derivation *d =
+        kbest.LazyKthBest(forest.nodes_.size() - 1, i);
+      if (!d) break;
+
+      float sentscore;
+	  if(cur_pass > 0 && !pseudo_doc && !sent_approx)
+	    {
+	      ScoreP sent_stats = ds[sent_id]->ScoreCandidate(d->yield);
+	      ScoreP corpus_no_best = corpus_bleu_stats->GetZero();
+
+	      corpus_bleu_stats->Subtract(*corpus_bleu_sent_stats[sent_id], &*corpus_no_best);
+	      sent_stats->PlusEquals(*corpus_no_best, 0.5);
+	      
+	      //compute gain from new sentence in 1-best corpus
+	      sentscore = mt_metric_scale * (sent_stats->ComputeScore() - corpus_no_best->ComputeScore());// - corpus_bleu_score);
+	    }
+	  else if(pseudo_doc)   //pseudo-corpus smoothing 
+	    {
+	      float src_scale = corpus_src_length + curr_src_length;
+	      ScoreP sent_stats = ds[sent_id]->ScoreCandidate(d->yield);
+	      if(!corpus_bleu_stats){ corpus_bleu_stats = sent_stats->GetZero();}
+	      
+	      sent_stats->PlusEquals(*corpus_bleu_stats);
+	      sentscore =  mt_metric_scale  * src_scale * sent_stats->ComputeScore();
+
+	    }
+	  else //use sentence-level smoothing ( used when cur_pass=0 if not pseudo_doc)
+	    {
+	     
+	      sentscore = mt_metric_scale * (ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore());
+	    }
+	
+      if (invert_score) sentscore *= -1.0;
+      
+      if (i < update_list_size){ 
+	if(PRINT_LIST)cerr << TD::GetString(d->yield) << " ||| " << d->score << " ||| " << sentscore << endl; 
+	cur_best.push_back( MakeHypothesisInfo(d->feature_values, sentscore, d->yield));
+      }
+      
+      all_hyp.push_back(MakeHypothesisInfo(d->feature_values, sentscore,d->yield));   //store all hyp to extract hope and fear         
+    }
+    
+    if(pseudo_doc){
+    //update psuedo-doc stats
+      string details, details2;     
+      corpus_bleu_stats->ScoreDetails(&details2);   
+      ScoreP sent_stats = ds[sent_id]->ScoreCandidate(cur_best[0]->hyp);
+      corpus_bleu_stats->PlusEquals(*sent_stats);
+      
+      sent_stats->ScoreDetails(&details);
+      sent_stats = corpus_bleu_stats;
+      corpus_bleu_stats = sent_stats->GetZero();
+      corpus_bleu_stats->PlusEquals(*sent_stats, PSEUDO_SCALE);
+            
+      corpus_src_length = PSEUDO_SCALE * (corpus_src_length + curr_src_length);
+      cerr << "CORP S " << corpus_src_length << " " << curr_src_length << "\n" << details << "\n" << details2 << endl;
+    }
+
+
+    //figure out how many hyps we can keep maximum
+    int temp_update_size = update_list_size;
+    if (all_hyp.size() < update_list_size){ temp_update_size = all_hyp.size();}
+
+    //sort all hyps by sentscore (eg. bleu)
+    sort(all_hyp.begin(),all_hyp.end(),HypothesisCompareB);
+    
+    if(PRINT_LIST){  cerr << "Sorting " << endl; for(int u=0;u!=all_hyp.size();u++)	cerr << all_hyp[u]->mt_metric << " " << all_hyp[u]->features.dot(dense_weights_g) << endl; }
+    
+    if(hope_select == 1)
+      {
+	//find hope hypothesis using model + bleu
+	if (PRINT_LIST) cerr << "HOPE " << endl;
+	for(int u=0;u!=all_hyp.size();u++)	
+	  { 
+	    double t_score = all_hyp[u]->features.dot(dense_weights_g);
+	    all_hyp[u]->hope = all_hyp[u]->mt_metric + t_score;
+	    if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " S:" << t_score << endl; 
+	    
+	  }
+	
+	//sort hyps by hope score
+	sort(all_hyp.begin(),all_hyp.end(),HopeCompareB);
+      }        
+
+    //assign cur_good the sorted list
+    cur_good.insert(cur_good.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size);    
+    if(PRINT_LIST) { cerr << "GOOD" << endl;  for(int u=0;u!=cur_good.size();u++) cerr << cur_good[u]->mt_metric << " " << cur_good[u]->hope << endl;}     
+
+    shared_ptr<HypothesisInfo>& oracleN = cur_good[0];
+
+
+    if(fear_select == 1){   //compute fear hyps with model - bleu
+      if (PRINT_LIST) cerr << "FEAR " << endl;
+      double hope_score = oracleN->features.dot(dense_weights_g);
+
+      if (PRINT_LIST) cerr << "hope score " << hope_score << endl;
+      for(int u=0;u!=all_hyp.size();u++)	
+	{ 
+	  double t_score = all_hyp[u]->features.dot(dense_weights_g);
+	  //all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - hope_score + t_score;
+	  
+	  /*	  all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - -1*cur_oracle->mt_metric - hope_score + t_score; //relative loss
+	  all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric - -1*cur_oracle->mt_metric;
+	  all_hyp[u]->oracle_feat_diff = cur_oracle->features - all_hyp[u]->features;*/
+
+	  all_hyp[u]->fear = -1*all_hyp[u]->mt_metric + 1*oracleN->mt_metric - hope_score + t_score; //relative loss
+	  all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric + 1*oracleN->mt_metric;
+	  all_hyp[u]->oracle_feat_diff = oracleN->features - all_hyp[u]->features;
+	  all_hyp[u]->oracleN=oracleN;
+	  //	all_hyp[u]->fear = -1 * all_hyp[u]->mt_metric + t_score;
+	  if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " F:" << all_hyp[u]->fear << endl; 
+	  
+	}
+      
+      sort(all_hyp.begin(),all_hyp.end(),FearCompareB);
+      
+      cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size);    
+    }
+    else if(fear_select == 2) //select fear based on cost
+      {
+	cur_bad.insert(cur_bad.begin(), all_hyp.end()-temp_update_size, all_hyp.end()); 
+	reverse(cur_bad.begin(),cur_bad.end());
+      }
+    else //pred-based, fear_select = 3
+      {
+	sort(all_hyp.begin(),all_hyp.end(),FearComparePred);
+	cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); 
+      }
+
+
+    if(PRINT_LIST){ cerr<< "BAD"<<endl; for(int u=0;u!=cur_bad.size();u++) cerr << cur_bad[u]->mt_metric << " H:" << cur_bad[u]->hope << " F:" << cur_bad[u]->fear << endl;}
+    
+    cerr << "GOOD (BEST): " << cur_good[0]->mt_metric << endl;
+    cerr << " CUR: " << cur_best[0]->mt_metric << endl;
+    cerr << " BAD (WORST): " << cur_bad[0]->mt_metric << endl;
+  }
+};
+
+void ReadTrainingCorpus(const string& fname, vector<string>* c) {
+
+
+  ReadFile rf(fname);
+  istream& in = *rf.stream();
+  string line;
+  while(in) {
+    getline(in, line);
+    if (!in) break;
+    c->push_back(line);
+  }
+}
+
+void ReadPastTranslationForScore(const int cur_pass, vector<ScoreP>* c, DocScorer& ds, const string& od)
+{
+  cerr << "Reading BLEU gain file ";
+  string fname;
+  if(cur_pass == 0)
+    {
+      fname = od + "/run.raw.init";
+    }
+  else
+    {
+      int last_pass = cur_pass - 1; 
+      fname = od + "/run.raw."  +  boost::lexical_cast<std::string>(last_pass) + ".B";
+    }
+  cerr << fname << "\n";
+  ReadFile rf(fname);
+  istream& in = *rf.stream();
+  ScoreP acc;
+  string line;
+  int lc = 0;
+  while(in) {
+    getline(in, line);
+    if (line.empty() && !in) break;
+    vector<WordID> sent;
+    TD::ConvertSentence(line, &sent);
+    ScoreP sentscore = ds[lc]->ScoreCandidate(sent);
+    c->push_back(sentscore);
+    if (!acc) { acc = sentscore->GetZero(); }
+    acc->PlusEquals(*sentscore);
+    ++lc;
+ 
+  }
+
+  
+  assert(lc > 0);
+  float score = acc->ComputeScore();
+  string details;
+  acc->ScoreDetails(&details);
+  cerr << "INIT RUN " << details << score << endl;
+
+}
+
+
+int main(int argc, char** argv) {
+  register_feature_functions();
+  SetSilent(true);  // turn off verbose decoder output
+
+  po::variables_map conf;
+  if (!InitCommandLine(argc, argv, &conf)) return 1;
+
+  if (conf.count("random_seed"))
+    rng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
+  else
+    rng.reset(new MT19937);
+  
+  vector<string> corpus;
+  //ReadTrainingCorpus(conf["source"].as<string>(), &corpus);
+
+  const string metric_name = conf["mt_metric"].as<string>();
+  optimizer = conf["optimizer"].as<int>();
+  fear_select = conf["fear"].as<int>();
+  hope_select = conf["hope"].as<int>();
+  mt_metric_scale = conf["mt_metric_scale"].as<double>();
+  approx_score = conf.count("approx_score");
+  no_reweight = conf.count("no_reweight");
+  no_select = conf.count("no_select");
+  update_list_size = conf["update_k_best"].as<int>();
+  unique_kbest = conf.count("unique_k_best");
+  pseudo_doc = conf.count("pseudo_doc");
+  sent_approx = conf.count("sent_approx");
+  cerr << "PSEUDO " << pseudo_doc << " SENT " << sent_approx << endl;
+  if(pseudo_doc)
+    mt_metric_scale=1;
+
+  const string weights_dir = conf["weights_output"].as<string>();
+  const string output_dir = conf["output_dir"].as<string>();
+  ScoreType type = ScoreTypeFromString(metric_name);
+
+  //establish metric used for tuning
+  if (type == TER) {
+    invert_score = true;
+    // approx_score = false;
+  } else {
+    invert_score = false;
+  }
+
+  //load references
+  DocScorer ds(type, conf["reference"].as<vector<string> >(), "");
+  cerr << "Loaded " << ds.size() << " references for scoring with " << metric_name << endl;
+  vector<ScoreP> corpus_bleu_sent_stats;
+  
+  //check training pass,if >0, then use previous iterations corpus bleu stats
+  cur_pass = conf["pass"].as<int>();
+  if(cur_pass > 0)
+    {
+      ReadPastTranslationForScore(cur_pass, &corpus_bleu_sent_stats, ds, output_dir);
+    }
+  /*  if (ds.size() != corpus.size()) {
+    cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n";
+    return 1;
+    }*/
+  cerr << "Optimizing with " << optimizer << endl;
+  // load initial weights
+  /*Weights weights;
+  weights.InitFromFile(conf["input_weights"].as<string>());
+  SparseVector<double> lambdas;
+  weights.InitSparseVector(&lambdas);
+  */
+
+  
+  
+  ReadFile ini_rf(conf["decoder_config"].as<string>());
+  Decoder decoder(ini_rf.stream());
+
+  vector<weight_t>& dense_weights = decoder.CurrentWeightVector();
+  
+  SparseVector<weight_t> lambdas;
+  Weights::InitFromFile(conf["input_weights"].as<string>(), &dense_weights);
+  Weights::InitSparseVector(dense_weights, &lambdas);
+
+  const string input = decoder.GetConf()["input"].as<string>();
+  //const bool show_feature_dictionary = decoder.GetConf().count("show_feature_dictionary");
+  if (!SILENT) cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl;
+  ReadFile in_read(input);
+  istream *in = in_read.stream();
+  assert(*in);  
+  string buf;
+  
+  const double max_step_size = conf["max_step_size"].as<double>();
+
+
+  //  assert(corpus.size() > 0);
+  vector<GoodBadOracle> oracles(ds.size());
+
+  TrainingObserver observer(conf["k_best_size"].as<int>(), ds, &oracles, &corpus_bleu_sent_stats);
+
+  int cur_sent = 0;
+  int lcount = 0;
+  double objective=0;
+  double tot_loss = 0;
+  int dots = 0;
+  //  int cur_pass = 1;
+  //  vector<double> dense_weights;
+  SparseVector<double> tot;
+  SparseVector<double> final_tot;
+  //  tot += lambdas;          // initial weights
+  //  lcount++;                // count for initial weights
+
+  //string msg = "# MIRA tuned weights";
+  // while (cur_pass <= max_iteration) {
+    SparseVector<double> old_lambdas = lambdas;
+    tot.clear();
+    tot += lambdas;
+    cerr << "PASS " << cur_pass << " " << endl << lambdas << endl; 
+    ScoreP acc, acc_h, acc_f;
+    
+    while(*in) {
+      getline(*in, buf);
+      if (buf.empty()) continue;
+      //TODO: allow batch updating
+      lambdas.init_vector(&dense_weights);
+      dense_weights_g = dense_weights;
+      decoder.SetId(cur_sent);
+      decoder.Decode(buf, &observer);  // decode the sentence, calling Notify to get the hope,fear, and model best hyps. 
+      
+      cur_sent = observer.GetCurrentSent();
+      cerr << "SENT: " << cur_sent << endl;
+      const HypothesisInfo& cur_hyp = observer.GetCurrentBestHypothesis();
+      const HypothesisInfo& cur_good = *oracles[cur_sent].good[0];
+      const HypothesisInfo& cur_bad = *oracles[cur_sent].bad[0];
+
+      vector<shared_ptr<HypothesisInfo> >& cur_good_v = oracles[cur_sent].good;
+      vector<shared_ptr<HypothesisInfo> >& cur_bad_v = oracles[cur_sent].bad;
+      vector<shared_ptr<HypothesisInfo> > cur_best_v = observer.GetCurrentBest();
+
+      tot_loss += cur_hyp.mt_metric;
+      
+      //score hyps to be able to compute corpus level bleu after we finish this iteration through the corpus
+      ScoreP sentscore = ds[cur_sent]->ScoreCandidate(cur_hyp.hyp);
+      if (!acc) { acc = sentscore->GetZero(); }
+      acc->PlusEquals(*sentscore);
+
+      ScoreP hope_sentscore = ds[cur_sent]->ScoreCandidate(cur_good.hyp);
+      if (!acc_h) { acc_h = hope_sentscore->GetZero(); }
+      acc_h->PlusEquals(*hope_sentscore);
+
+      ScoreP fear_sentscore = ds[cur_sent]->ScoreCandidate(cur_bad.hyp);
+      if (!acc_f) { acc_f = fear_sentscore->GetZero(); }
+      acc_f->PlusEquals(*fear_sentscore);
+      
+      if(optimizer == 4) { //passive-aggresive update (single dual coordinate step)
+      
+	  double margin = cur_bad.features.dot(dense_weights) - cur_good.features.dot(dense_weights);
+	  double mt_loss = (cur_good.mt_metric - cur_bad.mt_metric);
+	  const double loss = margin +  mt_loss;
+	  cerr << "LOSS: " << loss << " Margin:" << margin << " BLEUL:" << mt_loss << " " << cur_bad.features.dot(dense_weights) << " " << cur_good.features.dot(dense_weights) <<endl;
+	  if (loss > 0.0 || !checkloss) {
+	    SparseVector<double> diff = cur_good.features;
+	    diff -= cur_bad.features;	    
+
+	    double diffsqnorm = diff.l2norm_sq();
+	    double delta;
+	    if (diffsqnorm > 0)
+	      delta = loss / (diffsqnorm);
+	    else
+	      delta = 0;
+	    
+	    if (delta > max_step_size) delta = max_step_size;
+	    lambdas += (cur_good.features * delta);
+	    lambdas -= (cur_bad.features * delta);
+	    
+	  }
+      }
+      else if(optimizer == 1) //sgd - nonadapted step size
+	{
+	   
+	  lambdas += (cur_good.features) * max_step_size;
+	  lambdas -= (cur_bad.features) * max_step_size;
+	}
+      else if(optimizer == 5) //full mira with n-best list of constraints from hope, fear, model best
+	{
+	  vector<shared_ptr<HypothesisInfo> > cur_constraint;
+	  cur_constraint.insert(cur_constraint.begin(), cur_bad_v.begin(), cur_bad_v.end());
+	  cur_constraint.insert(cur_constraint.begin(), cur_best_v.begin(), cur_best_v.end());
+	  cur_constraint.insert(cur_constraint.begin(), cur_good_v.begin(), cur_good_v.end());
+
+	  bool optimize_again;
+	  vector<shared_ptr<HypothesisInfo> > cur_pair;
+	  //SMO 
+	  for(int u=0;u!=cur_constraint.size();u++)	
+	    cur_constraint[u]->alpha =0;	      
+	  
+	  cur_constraint[0]->alpha =1; //set oracle to alpha=1
+
+	  cerr <<"Optimizing with " << cur_constraint.size() << " constraints" << endl;
+	  int smo_iter = MAX_SMO, smo_iter2 = MAX_SMO;
+	  int iter, iter2 =0;
+	  bool DEBUG_SMO = false;
+	  while (iter2 < smo_iter2)
+	    {
+	      iter =0;
+	      while (iter < smo_iter)
+		{
+		  optimize_again = true;
+		  for (int i = 0; i< cur_constraint.size(); i++)
+		    for (int j = i+1; j< cur_constraint.size(); j++)
+		      {
+			if(DEBUG_SMO) cerr << "start " << i << " " << j <<  endl;
+			cur_pair.clear();
+			cur_pair.push_back(cur_constraint[j]);
+			cur_pair.push_back(cur_constraint[i]);
+			double delta = ComputeDelta(&cur_pair,max_step_size, dense_weights);
+			
+			if (delta == 0) optimize_again = false;
+			cur_constraint[j]->alpha += delta;
+			cur_constraint[i]->alpha -= delta;
+			double step_size = delta * max_step_size;
+			
+			lambdas += (cur_constraint[i]->features) * step_size;
+			lambdas -= (cur_constraint[j]->features) * step_size;
+			if(DEBUG_SMO) cerr << "SMO opt " << iter << " " << i << " " << j << " " <<  delta << " " << cur_pair[0]->alpha << " " << cur_pair[1]->alpha <<  endl;		
+		      }
+		  iter++;
+		  
+		  if(!optimize_again)
+		    { 
+		      iter = MAX_SMO;
+		      cerr << "Optimization stopped, delta =0" << endl;
+		    }		  
+		}
+	      iter2++;
+	    }	  
+	}
+      else if(optimizer == 2 || optimizer == 3) //PA and Cutting Plane MIRA update
+	  {
+	    bool DEBUG_SMO= true;
+	    vector<shared_ptr<HypothesisInfo> > cur_constraint;
+	    cur_constraint.push_back(cur_good_v[0]); //add oracle to constraint set
+	    bool optimize_again = true;
+	    int cut_plane_calls = 0;
+	    while (optimize_again)
+	      { 
+		if(DEBUG_SMO) cerr<< "optimize again: " << optimize_again << endl;
+		if(optimizer == 2){ //PA
+		  cur_constraint.push_back(cur_bad_v[0]);
+
+		  //check if we have a violation
+		  if(!(cur_constraint[1]->fear > cur_constraint[0]->fear + SMO_EPSILON))
+		    {
+		      optimize_again = false;
+		      cerr << "Constraint not violated" << endl;
+		    }
+		}
+		else
+		  { //cutting plane to add constraints
+		    if(DEBUG_SMO) cerr<< "Cutting Plane " << cut_plane_calls << " with " << lambdas << endl;
+		    optimize_again = false;
+		    cut_plane_calls++;
+		    CuttingPlane(&cur_constraint, &optimize_again, oracles[cur_sent].bad, dense_weights);
+		    if (cut_plane_calls >= MAX_SMO) optimize_again = false;
+		  }
+
+		if(optimize_again)
+		  {
+		    //SMO 
+		    for(int u=0;u!=cur_constraint.size();u++)	
+		      { 
+			cur_constraint[u]->alpha =0;
+		      }
+		    cur_constraint[0]->alpha = 1;
+		    cerr <<" Optimizing with " << cur_constraint.size() << " constraints" << endl;
+		    int smo_iter = MAX_SMO;
+		    int iter =0;
+		    while (iter < smo_iter)
+		      {			
+			//select pair to optimize from constraint set
+			vector<shared_ptr<HypothesisInfo> > cur_pair = SelectPair(&cur_constraint);
+			
+			if(cur_pair.empty()){
+			  iter=MAX_SMO; 
+			  cerr << "Undefined pair " << endl; 
+			  continue;
+			} //pair is undefined so we are done with this smo 
+
+			double delta = ComputeDelta(&cur_pair,max_step_size, dense_weights);
+
+			cur_pair[0]->alpha += delta;
+			cur_pair[1]->alpha -= delta;
+			double step_size = delta * max_step_size;
+			cerr << "step " << step_size << endl;
+
+			lambdas += (cur_pair[1]->features) * step_size;
+			lambdas -= (cur_pair[0]->features) * step_size;
+			cerr << " Lambdas " << lambdas << endl;
+			//reload weights based on update
+
+			dense_weights.clear();
+			lambdas.init_vector(&dense_weights);
+			dense_weights_g = dense_weights;
+			iter++;
+					
+			if(DEBUG_SMO) cerr << "SMO opt " << iter << " " << delta << " " << cur_pair[0]->alpha << " " << cur_pair[1]->alpha <<  endl;		
+			if(no_select) //don't use selection heuristic to determine when to stop SMO, rather just when delta =0 
+			  if (delta == 0) iter = MAX_SMO;
+			
+			//only perform one dual coordinate ascent step
+			if(optimizer == 2) 
+			  {
+			    optimize_again = false;
+			    iter = MAX_SMO;
+			  }					
+		      }
+		    if(optimizer == 3)
+		      {
+			if(!no_reweight) //reweight the forest and select a new k-best
+			  {
+			    if(DEBUG_SMO) cerr<< "Decoding with new weights -- now orac are " << oracles[cur_sent].good.size() << endl;
+			    Hypergraph hg = observer.GetCurrentForest();
+			    hg.Reweight(dense_weights);
+			    if(unique_kbest)
+                              observer.UpdateOracles<KBest::FilterUnique>(cur_sent, hg);
+                            else
+                              observer.UpdateOracles<KBest::NoFilter<std::vector<WordID> > >(cur_sent, hg);			    
+			  }
+		      }
+		  }
+		
+	      }
+	   
+	    //print objective after this sentence
+	    double lambda_change = (lambdas - old_lambdas).l2norm_sq();
+	    double max_fear = cur_constraint[cur_constraint.size()-1]->fear;
+	    double temp_objective = 0.5 * lambda_change;// + max_step_size * max_fear;
+
+	    for(int u=0;u!=cur_constraint.size();u++)	
+	      { 
+		cerr << cur_constraint[u]->alpha << " " << cur_constraint[u]->hope << " " << cur_constraint[u]->fear << endl;
+		temp_objective += cur_constraint[u]->alpha * cur_constraint[u]->fear;
+	      }
+	    objective += temp_objective;
+	    
+	    cerr << "SENT OBJ: " << temp_objective << " NEW OBJ: " << objective << endl;
+	  }
+      
+    
+      if ((cur_sent * 40 / ds.size()) > dots) { ++dots; cerr << '.'; }
+      tot += lambdas;
+      ++lcount;
+      cur_sent++;
+      
+      cout << TD::GetString(cur_good_v[0]->hyp) << " ||| " << TD::GetString(cur_best_v[0]->hyp) << " ||| " << TD::GetString(cur_bad_v[0]->hyp) << endl;
+
+    }
+
+    cerr << "FINAL OBJECTIVE: "<< objective << endl;
+    final_tot += tot;
+    cerr << "Translated " << lcount << " sentences " << endl;
+    cerr << " [AVG METRIC LAST PASS=" << (tot_loss / lcount) << "]\n";
+    tot_loss = 0;
+    
+    int node_id = rng->next() * 100000;
+    cerr << " Writing weights to " << node_id << endl;
+    Weights::ShowLargestFeatures(dense_weights);
+    dots = 0;
+    ostringstream os;
+    os << weights_dir << "/weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "." << node_id << ".gz";
+    string msg = "# MIRA tuned weights ||| " + boost::lexical_cast<std::string>(node_id) + " ||| " + boost::lexical_cast<std::string>(lcount);
+    //Weights.InitFromVector(lambdas);
+    lambdas.init_vector(&dense_weights);
+    Weights::WriteToFile(os.str(), dense_weights, true, &msg);
+
+    SparseVector<double> x = tot;
+    x /= lcount;
+    ostringstream sa;
+    string msga = "# MIRA tuned weights AVERAGED ||| " + boost::lexical_cast<std::string>(node_id) + " ||| " + boost::lexical_cast<std::string>(lcount);
+    sa << weights_dir << "/weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "." << node_id << "-avg.gz";
+    x.init_vector(&dense_weights);
+    Weights::WriteToFile(sa.str(), dense_weights, true, &msga);
+    
+    
+    cerr << "Optimization complete.\n";
+    return 0;
+}
+
diff --git a/training/mira/kbest_mirav5.cc b/training/mira/kbest_mirav5.cc
deleted file mode 100644
index cea5cf67..00000000
--- a/training/mira/kbest_mirav5.cc
+++ /dev/null
@@ -1,1148 +0,0 @@
-#include <sstream>
-#include <iostream>
-#include <vector>
-#include <cassert>
-#include <cmath>
-#include <algorithm>
-
-#include "config.h"
-
-
-#include <boost/shared_ptr.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "sentence_metadata.h"
-#include "scorer.h"
-#include "verbose.h"
-#include "viterbi.h"
-#include "hg.h"
-#include "prob.h"
-#include "kbest.h"
-#include "ff_register.h"
-#include "decoder.h"
-#include "filelib.h"
-#include "fdict.h"
-#include "time.h"
-#include "sampler.h"
-
-#include "weights.h"
-#include "sparse_vector.h"
-
-using namespace std;
-using boost::shared_ptr;
-namespace po = boost::program_options;
-
-bool invert_score;
-boost::shared_ptr<MT19937> rng;
-bool approx_score;
-bool no_reweight;
-bool no_select;
-bool unique_kbest;
-int update_list_size;
-vector<weight_t> dense_weights_g;
-double mt_metric_scale;
-int optimizer;
-int fear_select;
-int hope_select;
-
-bool pseudo_doc;
-
-void SanityCheck(const vector<double>& w) {
-  for (int i = 0; i < w.size(); ++i) {
-    assert(!isnan(w[i]));
-    assert(!isinf(w[i]));
-  }
-}
-
-struct FComp {
-  const vector<double>& w_;
-  FComp(const vector<double>& w) : w_(w) {}
-  bool operator()(int a, int b) const {
-    return fabs(w_[a]) > fabs(w_[b]);
-  }
-};
-
-void ShowLargestFeatures(const vector<double>& w) {
-  vector<int> fnums(w.size());
-  for (int i = 0; i < w.size(); ++i)
-    fnums[i] = i;
-  vector<int>::iterator mid = fnums.begin();
-  mid += (w.size() > 10 ? 10 : w.size());
-  partial_sort(fnums.begin(), mid, fnums.end(), FComp(w));
-  cerr << "TOP FEATURES:";
-  for (vector<int>::iterator i = fnums.begin(); i != mid; ++i) {
-    cerr << ' ' << FD::Convert(*i) << '=' << w[*i];
-  }
-  cerr << endl;
-}
-
-bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-    ("input_weights,w",po::value<string>(),"Input feature weights file")
-    ("source,i",po::value<string>(),"Source file for development set")
-    ("passes,p", po::value<int>()->default_value(15), "Number of passes through the training data")
-    ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation(s) (tokenized text file)")
-    ("mt_metric,m",po::value<string>()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)")
-    ("optimizer,o",po::value<int>()->default_value(1), "Optimizer (sgd=1, mira 1-fear=2, full mira w/ cutting plane=3, full mira w/ nbest list=5, local update=4)")
-    ("fear,f",po::value<int>()->default_value(1), "Fear selection (model-cost=1, max-cost=2, pred-base=3)")
-    ("hope,h",po::value<int>()->default_value(1), "Hope selection (model+cost=1, max-cost=2, local-cost=3)")
-    ("max_step_size,C", po::value<double>()->default_value(0.01), "regularization strength (C)")
-    ("random_seed,S", po::value<uint32_t>(), "Random seed (if not specified, /dev/random will be used)")
-    ("mt_metric_scale,s", po::value<double>()->default_value(1.0), "Amount to scale MT loss function by")
-    ("approx_score,a", "Use smoothed sentence-level BLEU score for approximate scoring")
-    ("no_reweight,d","Do not reweight forest for cutting plane")
-    ("no_select,n", "Do not use selection heuristic")
-    ("k_best_size,k", po::value<int>()->default_value(250), "Size of hypothesis list to search for oracles")
-    ("update_k_best,b", po::value<int>()->default_value(1), "Size of good, bad lists to perform update with")
-    ("unique_k_best,u", "Unique k-best translation list")
-    ("weights_output,O",po::value<string>(),"Directory to write weights to")
-    ("output_dir,D",po::value<string>(),"Directory to place output in")
-    ("decoder_config,c",po::value<string>(),"Decoder configuration file");
-  po::options_description clo("Command line options");
-  clo.add_options()
-    ("config", po::value<string>(), "Configuration file")
-    ("help,H", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help") || !conf->count("input_weights") || !conf->count("decoder_config") || !conf->count("reference")) {
-    cerr << dcmdline_options << endl;
-    return false;
-  }
-  return true;
-}
-
-//load previous translation, store array of each sentences score, subtract it from current sentence and replace with new translation score
-
-
-static const double kMINUS_EPSILON = -1e-6;
-static const double EPSILON = 0.000001;
-static const double SMO_EPSILON = 0.0001;
-static const double PSEUDO_SCALE = 0.95;
-static const int MAX_SMO = 10;
-int cur_pass;
-
-struct HypothesisInfo {
-  SparseVector<double> features;
-  vector<WordID> hyp;
-  double mt_metric;
-  double hope;
-  double fear;
-  double alpha;
-  double oracle_loss;
-  SparseVector<double> oracle_feat_diff;
-  shared_ptr<HypothesisInfo> oracleN;
-};
-
-bool ApproxEqual(double a, double b) {
-  if (a == b) return true;
-  return (fabs(a-b)/fabs(b)) < EPSILON;
-}
-
-typedef shared_ptr<HypothesisInfo> HI;
-bool HypothesisCompareB(const HI& h1, const HI& h2 ) 
-{
-  return h1->mt_metric > h2->mt_metric;
-};
-
-
-bool HopeCompareB(const HI& h1, const HI& h2 ) 
-{
-  return h1->hope > h2->hope;
-};
-
-bool FearCompareB(const HI& h1, const HI& h2 ) 
-{
-  return h1->fear > h2->fear;
-};
-
-bool FearComparePred(const HI& h1, const HI& h2 ) 
-{
-  return h1->features.dot(dense_weights_g) > h2->features.dot(dense_weights_g);
-};
-
-bool HypothesisCompareG(const HI& h1, const HI& h2 ) 
-{
-  return h1->mt_metric < h2->mt_metric;
-};
-
-
-void CuttingPlane(vector<shared_ptr<HypothesisInfo> >* cur_c, bool* again, vector<shared_ptr<HypothesisInfo> >& all_hyp, vector<weight_t> dense_weights)
-{
-  bool DEBUG_CUT = false;
-  shared_ptr<HypothesisInfo> max_fear, max_fear_in_set;
-  vector<shared_ptr<HypothesisInfo> >& cur_constraint = *cur_c;
-
-  if(no_reweight)
-    {
-      //find new hope hypothesis
-      for(int u=0;u!=all_hyp.size();u++)	
-	{ 
-	  double t_score = all_hyp[u]->features.dot(dense_weights);
-	  all_hyp[u]->hope = 1 * all_hyp[u]->mt_metric + t_score;
-	  //if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " S:" << t_score << endl; 
-	  
-	}
-      
-      //sort hyps by hope score
-      sort(all_hyp.begin(),all_hyp.end(),HopeCompareB);    
-      
-      double hope_score = all_hyp[0]->features.dot(dense_weights);
-      if(DEBUG_CUT) cerr << "New hope derivation score " << hope_score << endl;
-     
-      for(int u=0;u!=all_hyp.size();u++)	
-	{ 
-	  double t_score = all_hyp[u]->features.dot(dense_weights);
-	  //all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - hope_score + t_score;
-	  
-	  all_hyp[u]->fear = -1*all_hyp[u]->mt_metric + 1*all_hyp[0]->mt_metric - hope_score + t_score; //relative loss
-	  //      all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric - -1*all_hyp[0]->mt_metric;
-	  //all_hyp[u]->oracle_feat_diff = all_hyp[0]->features - all_hyp[u]->features;
-	  //	all_hyp[u]->fear = -1 * all_hyp[u]->mt_metric + t_score;
-	  //if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " F:" << all_hyp[u]->fear << endl; 
-	  
-	}
-    
-      sort(all_hyp.begin(),all_hyp.end(),FearCompareB);
-      
-    }
-  //assign maximum fear derivation from all derivations
-  max_fear = all_hyp[0];
-  
-  if(DEBUG_CUT) cerr <<"Cutting Plane Max Fear "<<max_fear->fear ;
-  for(int i=0; i < cur_constraint.size();i++) //select maximal violator already in constraint set
-    {
-      if (!max_fear_in_set || cur_constraint[i]->fear > max_fear_in_set->fear)
-	max_fear_in_set = cur_constraint[i];
-    }
-  if(DEBUG_CUT) cerr << "Max Fear in constraint set " << max_fear_in_set->fear << endl;
-  
-  if(max_fear->fear > max_fear_in_set->fear + SMO_EPSILON)
-    {
-      cur_constraint.push_back(max_fear);
-      *again = true;
-      if(DEBUG_CUT) cerr << "Optimize Again " << *again << endl;
-    }
-}
-
-
-double ComputeDelta(vector<shared_ptr<HypothesisInfo> >* cur_p, double max_step_size,vector<weight_t> dense_weights )
-{
-  vector<shared_ptr<HypothesisInfo> >& cur_pair = *cur_p;
-   double loss = cur_pair[0]->oracle_loss - cur_pair[1]->oracle_loss;
-   //double margin = -cur_pair[0]->oracle_feat_diff.dot(dense_weights) + cur_pair[1]->oracle_feat_diff.dot(dense_weights); //TODO: is it a problem that new oracle is used in diff?
-   //double num = loss - margin;
-  
-
-   double margin = -(cur_pair[0]->oracleN->features.dot(dense_weights)- cur_pair[0]->features.dot(dense_weights)) + (cur_pair[1]->oracleN->features.dot(dense_weights) - cur_pair[1]->features.dot(dense_weights));
-   const double num = margin +  loss;
-   cerr << "LOSS: " << num << " Margin:" << margin << " BLEUL:" << loss << " " << cur_pair[1]->features.dot(dense_weights) << " " << cur_pair[0]->features.dot(dense_weights) <<endl;
-   
-   // double margin = cur_pair[1]->features.dot(dense_weights) - cur_pair[0]->features.dot(dense_weights);
-   // double loss =  cur_pair[1]->oracle_loss; //good.mt_metric - cur_bad.mt_metric);
-   //const double num = margin +  loss;
-  
-   //cerr << "Compute Delta " << loss << " " << margin << " ";
-
-  //  double margin = cur_pair[0]->features.dot(dense_weights) - cur_pair[1]->features.dot(dense_weights); //TODO: is it a problem that new oracle is used in diff?
-/*  double num = 
-    (cur_pair[0]->oracle_loss - cur_pair[0]->oracle_feat_diff.dot(dense_weights))
-    - (cur_pair[1]->oracle_loss - cur_pair[1]->oracle_feat_diff.dot(dense_weights));
-  */
-
-  SparseVector<double> diff = cur_pair[0]->features;
-  diff -= cur_pair[1]->features;
-  /*  SparseVector<double> diff = cur_pair[0]->oracle_feat_diff;
-  diff -= cur_pair[1]->oracle_feat_diff;*/
-  double diffsqnorm = diff.l2norm_sq();
-  double delta;
-  if (diffsqnorm > 0)
-    delta = num / (diffsqnorm * max_step_size);
-  else
-    delta = 0;
-  cerr << " D1:" << delta;
-  //clip delta (enforce margin constraints)
-
-  delta = max(-cur_pair[0]->alpha, min(delta, cur_pair[1]->alpha));
-  cerr << " D2:" << delta;
-  return delta;
-}
-
-
-vector<shared_ptr<HypothesisInfo> > SelectPair(vector<shared_ptr<HypothesisInfo> >* cur_c)
-{
-  bool DEBUG_SELECT= false;
-  vector<shared_ptr<HypothesisInfo> >& cur_constraint = *cur_c;
-  
-  vector<shared_ptr<HypothesisInfo> > pair;
-
-  if (no_select || optimizer == 2){ //skip heuristic search and return oracle and fear for 1-mira
-  //    if(optimizer == 2)      {
-      pair.push_back(cur_constraint[0]);
-      pair.push_back(cur_constraint[1]);
-      return pair;
-      //   }
-    }
-  
-  for(int u=0;u != cur_constraint.size();u++)	
-    {
-      shared_ptr<HypothesisInfo> max_fear;
-      
-      if(DEBUG_SELECT) cerr<< "cur alpha " << u  << " " << cur_constraint[u]->alpha;
-      for(int i=0; i < cur_constraint.size();i++) //select maximal violator
-	{
-	  if(i != u)
-	    if (!max_fear || cur_constraint[i]->fear > max_fear->fear)
-	      max_fear = cur_constraint[i];
-	}
-      if(!max_fear) return pair; //
-      
-      if(DEBUG_SELECT) cerr << " F" << max_fear->fear << endl;
-
-      
-      if ((cur_constraint[u]->alpha == 0) && (cur_constraint[u]->fear > max_fear->fear + SMO_EPSILON))
-	{
-	  for(int i=0; i < cur_constraint.size();i++) //select maximal violator
-	    {
-	      if(i != u)
-		if (cur_constraint[i]->alpha > 0)
-		  {
-		    pair.push_back(cur_constraint[u]);
-		    pair.push_back(cur_constraint[i]);
-		    cerr << "RETJURN from 1" << endl;
-		    return pair;
-		  }
-	    }
-	}	       
-      if ((cur_constraint[u]->alpha > 0) && (cur_constraint[u]->fear < max_fear->fear - SMO_EPSILON))
-	{
-	  for(int i=0; i < cur_constraint.size();i++) //select maximal violator
-	    {
-	      if(i != u)	
-		if (cur_constraint[i]->fear > cur_constraint[u]->fear)
-		  {
-		    pair.push_back(cur_constraint[u]);
-		    pair.push_back(cur_constraint[i]);
-		    return pair;
-		  }
-	    }  
-	}
-    
-    } 
-  return pair; //no more constraints to optimize, we're done here
-
-}
-
-struct GoodBadOracle {
-  vector<shared_ptr<HypothesisInfo> > good;
-  vector<shared_ptr<HypothesisInfo> > bad;
-};
-
-struct TrainingObserver : public DecoderObserver {
-  TrainingObserver(const int k, const DocScorer& d, vector<GoodBadOracle>* o, vector<ScoreP>* cbs) : ds(d), oracles(*o), corpus_bleu_sent_stats(*cbs), kbest_size(k) {
-  // TrainingObserver(const int k, const DocScorer& d, vector<GoodBadOracle>* o) : ds(d), oracles(*o), kbest_size(k) {
-    
-    //calculate corpus bleu score from previous iterations 1-best for BLEU gain
-    if(!pseudo_doc)
-    if(cur_pass > 0)
-      {
-	ScoreP acc;
-	for (int ii = 0; ii < corpus_bleu_sent_stats.size(); ii++) {
-	  if (!acc) { acc = corpus_bleu_sent_stats[ii]->GetZero(); }
-	  acc->PlusEquals(*corpus_bleu_sent_stats[ii]);
-	  
-	}
-	corpus_bleu_stats = acc;
-	corpus_bleu_score = acc->ComputeScore();
-      }
-    //corpus_src_length = 0;
-}
-  const DocScorer& ds;
-  vector<ScoreP>& corpus_bleu_sent_stats;
-  vector<GoodBadOracle>& oracles;
-  vector<shared_ptr<HypothesisInfo> > cur_best;
-  shared_ptr<HypothesisInfo> cur_oracle;
-  const int kbest_size;
-  Hypergraph forest;
-  int cur_sent;
-  ScoreP corpus_bleu_stats;
-  float corpus_bleu_score;
-
-  float corpus_src_length;
-  float curr_src_length;
-
-  const int GetCurrentSent() const {
-    return cur_sent;
-  }
-
-  const HypothesisInfo& GetCurrentBestHypothesis() const {
-    return *cur_best[0];
-  }
-
-  const vector<shared_ptr<HypothesisInfo> > GetCurrentBest() const {
-    return cur_best;
-  }
-  
- const HypothesisInfo& GetCurrentOracle() const {
-    return *cur_oracle;
-  }
-  
-  const Hypergraph& GetCurrentForest() const {
-    return forest;
-  }
-  
-
-  virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) {
-    cur_sent = smeta.GetSentenceID();
-    //cerr << "SOURCE " << smeta.GetSourceLength() << endl;
-    curr_src_length = (float) smeta.GetSourceLength();
-    //UpdateOracles(smeta.GetSentenceID(), *hg);
-    if(unique_kbest)
-      UpdateOracles<KBest::FilterUnique>(smeta.GetSentenceID(), *hg);
-    else
-      UpdateOracles<KBest::NoFilter<std::vector<WordID> > >(smeta.GetSentenceID(), *hg);
-    forest = *hg;
-    
-  }
-
-  shared_ptr<HypothesisInfo> MakeHypothesisInfo(const SparseVector<double>& feats, const double score, const vector<WordID>& hyp) {
-    shared_ptr<HypothesisInfo> h(new HypothesisInfo);
-    h->features = feats;
-    h->mt_metric = score;
-    h->hyp = hyp;
-    return h;
-  }
-
-  template <class Filter>  
-  void UpdateOracles(int sent_id, const Hypergraph& forest) {
-
-    bool PRINT_LIST= false;    
-    vector<shared_ptr<HypothesisInfo> >& cur_good = oracles[sent_id].good;
-    vector<shared_ptr<HypothesisInfo> >& cur_bad = oracles[sent_id].bad;
-    //TODO: look at keeping previous iterations hypothesis lists around
-    cur_best.clear();
-    cur_good.clear();
-    cur_bad.clear();
-
-    vector<shared_ptr<HypothesisInfo> > all_hyp;
-
-    typedef KBest::KBestDerivations<vector<WordID>, ESentenceTraversal,Filter> K;
-    K kbest(forest,kbest_size);
-    
-    //KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(forest, kbest_size);
-    for (int i = 0; i < kbest_size; ++i) {
-      //const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
-      typename K::Derivation *d =
-        kbest.LazyKthBest(forest.nodes_.size() - 1, i);
-      if (!d) break;
-
-      float sentscore;
-      if(approx_score)
-	{
-
-	  if(cur_pass > 0 && !pseudo_doc)
-	    {
-	      ScoreP sent_stats = ds[sent_id]->ScoreCandidate(d->yield);
-	      ScoreP corpus_no_best = corpus_bleu_stats->GetZero();
-
-	      corpus_bleu_stats->Subtract(*corpus_bleu_sent_stats[sent_id], &*corpus_no_best);
-	      sent_stats->PlusEquals(*corpus_no_best, 0.5);
-	      
-	      //compute gain from new sentence in 1-best corpus
-	      sentscore = mt_metric_scale * (sent_stats->ComputeScore() - corpus_no_best->ComputeScore());// - corpus_bleu_score);
-	    }
-	  else if(pseudo_doc)
-	    {
-	      //cerr << "CORP:" << corpus_bleu_score << " NEW:" << sent_stats->ComputeScore() << " sentscore:" << sentscore << endl;
-
-	  //-----pseudo-corpus approach
-	      float src_scale = corpus_src_length + curr_src_length;
-	      ScoreP sent_stats = ds[sent_id]->ScoreCandidate(d->yield);
-	      if(!corpus_bleu_stats){ corpus_bleu_stats = sent_stats->GetZero();}
-	      
-	      sent_stats->PlusEquals(*corpus_bleu_stats);
-	      sentscore =  mt_metric_scale  * src_scale * sent_stats->ComputeScore();
-
-	    }
-	  else
-	    {
-	      //cerr << "Using sentence-level approximation - PASS - " << boost::lexical_cast<std::string>(cur_pass) << endl;
-	      //approx style of computation, used for 0th iteration
-	      sentscore = mt_metric_scale * (ds[sent_id]->ScoreCandidate(d->yield)->ComputeSentScore());
-
-	      //use pseudo-doc
-	    }
-	  
-	 
-	}
-      else
-	{
-	  sentscore = mt_metric_scale * (ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore());
-	}
-     
-      if (invert_score) sentscore *= -1.0;
-      //cerr << TD::GetString(d->yield) << " ||| " << d->score << " ||| " << sentscore << " " << approx_sentscore << endl;
-
-      if (i < update_list_size){ 
-	if (i == 0) //take cur best and add its bleu statistics counts to the pseudo-doc
-	  {  }
-	if(PRINT_LIST)cerr << TD::GetString(d->yield) << " ||| " << d->score << " ||| " << sentscore << endl; 
-	cur_best.push_back( MakeHypothesisInfo(d->feature_values, sentscore, d->yield));
-      }
-      
-      all_hyp.push_back(MakeHypothesisInfo(d->feature_values, sentscore,d->yield));   //store all hyp to extract oracle best and worst
-         
-    }
-    
-    if(pseudo_doc){
-    //update psuedo-doc stats
-      string details, details2;     
-      corpus_bleu_stats->ScoreDetails(&details2);   
-      ScoreP sent_stats = ds[sent_id]->ScoreCandidate(cur_best[0]->hyp);
-      corpus_bleu_stats->PlusEquals(*sent_stats);
-      
-     
-      sent_stats->ScoreDetails(&details);
-      
-      
-      sent_stats = corpus_bleu_stats;
-      corpus_bleu_stats = sent_stats->GetZero();
-      corpus_bleu_stats->PlusEquals(*sent_stats, PSEUDO_SCALE);
-      
-      
-      corpus_src_length = PSEUDO_SCALE * (corpus_src_length + curr_src_length);
-      cerr << "CORP S " << corpus_src_length << " " << curr_src_length << "\n" << details << "\n " << details2 << endl;
-      
-
-    }
-
-
-    //figure out how many hyps we can keep maximum
-    int temp_update_size = update_list_size;
-    if (all_hyp.size() < update_list_size){ temp_update_size = all_hyp.size();}
-
-    //sort all hyps by sentscore (bleu)
-    sort(all_hyp.begin(),all_hyp.end(),HypothesisCompareB);
-    
-    if(PRINT_LIST){  cerr << "Sorting " << endl; for(int u=0;u!=all_hyp.size();u++)	cerr << all_hyp[u]->mt_metric << " " << all_hyp[u]->features.dot(dense_weights_g) << endl; }
-    
-    //if(optimizer != 4 )
-    if(hope_select == 1)
-      {
-	//find hope hypothesis using model + bleu
-	if (PRINT_LIST) cerr << "HOPE " << endl;
-	for(int u=0;u!=all_hyp.size();u++)	
-	  { 
-	    double t_score = all_hyp[u]->features.dot(dense_weights_g);
-	    all_hyp[u]->hope = all_hyp[u]->mt_metric + t_score;
-	    if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " S:" << t_score << endl; 
-	    
-	  }
-	
-	//sort hyps by hope score
-	sort(all_hyp.begin(),all_hyp.end(),HopeCompareB);
-      }
-        
-
-    //assign cur_good the sorted list
-    cur_good.insert(cur_good.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size);    
-    if(PRINT_LIST) { cerr << "GOOD" << endl;  for(int u=0;u!=cur_good.size();u++) cerr << cur_good[u]->mt_metric << " " << cur_good[u]->hope << endl;}     
-    /*    if (!cur_oracle) {      cur_oracle = cur_good[0];
-      cerr << "Set oracle " << cur_oracle->hope << " " << cur_oracle->fear << " " << cur_oracle->mt_metric << endl;      }
-    else      {
-	cerr << "Stay oracle " << cur_oracle->hope << " " << cur_oracle->fear << " " << cur_oracle->mt_metric << endl;      }    */
-
-    shared_ptr<HypothesisInfo>& oracleN = cur_good[0];
-    //if(optimizer != 4){
-    if(fear_select == 1){
-      //compute fear hyps
-      if (PRINT_LIST) cerr << "FEAR " << endl;
-      double hope_score = oracleN->features.dot(dense_weights_g);
-      //double hope_score = cur_oracle->features.dot(dense_weights);
-      if (PRINT_LIST) cerr << "hope score " << hope_score << endl;
-      for(int u=0;u!=all_hyp.size();u++)	
-	{ 
-	  double t_score = all_hyp[u]->features.dot(dense_weights_g);
-	  //all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - hope_score + t_score;
-	  
-	  /*	  all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - -1*cur_oracle->mt_metric - hope_score + t_score; //relative loss
-	  all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric - -1*cur_oracle->mt_metric;
-	  all_hyp[u]->oracle_feat_diff = cur_oracle->features - all_hyp[u]->features;*/
-
-	  all_hyp[u]->fear = -1*all_hyp[u]->mt_metric + 1*oracleN->mt_metric - hope_score + t_score; //relative loss
-	  all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric + 1*oracleN->mt_metric;
-	  all_hyp[u]->oracle_feat_diff = oracleN->features - all_hyp[u]->features;
-	  all_hyp[u]->oracleN=oracleN;
-	  //	all_hyp[u]->fear = -1 * all_hyp[u]->mt_metric + t_score;
-	  if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " F:" << all_hyp[u]->fear << endl; 
-	  
-	}
-      
-      sort(all_hyp.begin(),all_hyp.end(),FearCompareB);
-      
-      cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size);    
-    }
-    else if(fear_select == 2) //select fear based on cost
-      {
-	cur_bad.insert(cur_bad.begin(), all_hyp.end()-temp_update_size, all_hyp.end()); 
-	reverse(cur_bad.begin(),cur_bad.end());
-      }
-    else //pred-based, fear_select = 3
-      {
-	sort(all_hyp.begin(),all_hyp.end(),FearComparePred);
-	cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); 
-      }
-
-
-    if(PRINT_LIST){ cerr<< "BAD"<<endl; for(int u=0;u!=cur_bad.size();u++) cerr << cur_bad[u]->mt_metric << " H:" << cur_bad[u]->hope << " F:" << cur_bad[u]->fear << endl;}
-    
-    cerr << "GOOD (BEST): " << cur_good[0]->mt_metric << endl;
-    cerr << " CUR: " << cur_best[0]->mt_metric << endl;
-    cerr << " BAD (WORST): " << cur_bad[0]->mt_metric << endl;
-  }
-};
-
-void ReadTrainingCorpus(const string& fname, vector<string>* c) {
-
-
-  ReadFile rf(fname);
-  istream& in = *rf.stream();
-  string line;
-  while(in) {
-    getline(in, line);
-    if (!in) break;
-    c->push_back(line);
-  }
-}
-
-void ReadPastTranslationForScore(const int cur_pass, vector<ScoreP>* c, DocScorer& ds, const string& od)
-{
-  cerr << "Reading BLEU gain file ";
-  string fname;
-  if(cur_pass == 0)
-    {
-      fname = od + "/run.raw.init";
-    }
-  else
-    {
-      int last_pass = cur_pass - 1; 
-      fname = od + "/run.raw."  +  boost::lexical_cast<std::string>(last_pass) + ".B";
-    }
-  cerr << fname << "\n";
-  ReadFile rf(fname);
-  istream& in = *rf.stream();
-  ScoreP acc;
-  string line;
-  int lc = 0;
-  while(in) {
-    getline(in, line);
-    if (line.empty() && !in) break;
-    vector<WordID> sent;
-    TD::ConvertSentence(line, &sent);
-    ScoreP sentscore = ds[lc]->ScoreCandidate(sent);
-    c->push_back(sentscore);
-    if (!acc) { acc = sentscore->GetZero(); }
-    acc->PlusEquals(*sentscore);
-    ++lc;
- 
-  }
-
-  
-  assert(lc > 0);
-  float score = acc->ComputeScore();
-  string details;
-  acc->ScoreDetails(&details);
-  cerr << "INIT RUN " << details << score << endl;
-
-}
-
-
-int main(int argc, char** argv) {
-  register_feature_functions();
-  SetSilent(true);  // turn off verbose decoder output
-
-  po::variables_map conf;
-  if (!InitCommandLine(argc, argv, &conf)) return 1;
-
-  if (conf.count("random_seed"))
-    rng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
-  else
-    rng.reset(new MT19937);
-  
-  vector<string> corpus;
-  //ReadTrainingCorpus(conf["source"].as<string>(), &corpus);
-
-  const string metric_name = conf["mt_metric"].as<string>();
-  optimizer = conf["optimizer"].as<int>();
-  fear_select = conf["fear"].as<int>();
-  hope_select = conf["hope"].as<int>();
-  mt_metric_scale = conf["mt_metric_scale"].as<double>();
-  approx_score = conf.count("approx_score");
-  no_reweight = conf.count("no_reweight");
-  no_select = conf.count("no_select");
-  update_list_size = conf["update_k_best"].as<int>();
-  unique_kbest = conf.count("unique_k_best");
-  pseudo_doc = true;
-
-  const string weights_dir = conf["weights_output"].as<string>();
-  const string output_dir = conf["output_dir"].as<string>();
-  ScoreType type = ScoreTypeFromString(metric_name);
-
-  //establish metric used for tuning
-  if (type == TER) {
-    invert_score = true;
-    // approx_score = false;
-  } else {
-    invert_score = false;
-  }
-
-  //load references
-  DocScorer ds(type, conf["reference"].as<vector<string> >(), "");
-  cerr << "Loaded " << ds.size() << " references for scoring with " << metric_name << endl;
-  vector<ScoreP> corpus_bleu_sent_stats;
-  
-  //check training pass,if >0, then use previous iterations corpus bleu stats
-  cur_pass = conf["passes"].as<int>();
-  if(cur_pass > 0)
-    {
-      ReadPastTranslationForScore(cur_pass, &corpus_bleu_sent_stats, ds, output_dir);
-    }
-  /*  if (ds.size() != corpus.size()) {
-    cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n";
-    return 1;
-    }*/
-  cerr << "Optimizing with " << optimizer << endl;
-  // load initial weights
-  /*Weights weights;
-  weights.InitFromFile(conf["input_weights"].as<string>());
-  SparseVector<double> lambdas;
-  weights.InitSparseVector(&lambdas);
-  */
-
-  
-  
-  ReadFile ini_rf(conf["decoder_config"].as<string>());
-  Decoder decoder(ini_rf.stream());
-
-  vector<weight_t>& dense_weights = decoder.CurrentWeightVector();
-  
-  SparseVector<weight_t> lambdas;
-  Weights::InitFromFile(conf["input_weights"].as<string>(), &dense_weights);
-  Weights::InitSparseVector(dense_weights, &lambdas);
-
-  const string input = decoder.GetConf()["input"].as<string>();
-  //const bool show_feature_dictionary = decoder.GetConf().count("show_feature_dictionary");
-  if (!SILENT) cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl;
-  ReadFile in_read(input);
-  istream *in = in_read.stream();
-  assert(*in);  
-  string buf;
-  
-  const double max_step_size = conf["max_step_size"].as<double>();
-
-
-  //  assert(corpus.size() > 0);
-  vector<GoodBadOracle> oracles(ds.size());
-
-  TrainingObserver observer(conf["k_best_size"].as<int>(), ds, &oracles, &corpus_bleu_sent_stats);
-
-  int cur_sent = 0;
-  int lcount = 0;
-  double objective=0;
-  double tot_loss = 0;
-  int dots = 0;
-  //  int cur_pass = 1;
-  //  vector<double> dense_weights;
-  SparseVector<double> tot;
-  SparseVector<double> final_tot;
-  //  tot += lambdas;          // initial weights
-  //  lcount++;                // count for initial weights
-
-  //string msg = "# MIRA tuned weights";
-  // while (cur_pass <= max_iteration) {
-    SparseVector<double> old_lambdas = lambdas;
-    tot.clear();
-    tot += lambdas;
-    cerr << "PASS " << cur_pass << " " << endl << lambdas << endl; 
-    ScoreP acc, acc_h, acc_f;
-    
-    while(*in) {
-      getline(*in, buf);
-      if (buf.empty()) continue;
-      //for (cur_sent = 0; cur_sent < corpus.size(); cur_sent++) {
-      
-      cerr << "SENT: " << cur_sent << endl;
-      //TODO: allow batch updating
-      //dense_weights.clear();
-      //weights.InitFromVector(lambdas);
-      //weights.InitVector(&dense_weights);
-      //decoder.SetWeights(dense_weights);  
-      lambdas.init_vector(&dense_weights);
-      dense_weights_g = dense_weights;
-      decoder.SetId(cur_sent);
-      decoder.Decode(buf, &observer);  // decode the sentence, calling Notify to get the hope,fear, and model best hyps. 
-      
-      cur_sent = observer.GetCurrentSent();
-      const HypothesisInfo& cur_hyp = observer.GetCurrentBestHypothesis();
-      const HypothesisInfo& cur_good = *oracles[cur_sent].good[0];
-      const HypothesisInfo& cur_bad = *oracles[cur_sent].bad[0];
-
-      vector<shared_ptr<HypothesisInfo> >& cur_good_v = oracles[cur_sent].good;
-      vector<shared_ptr<HypothesisInfo> >& cur_bad_v = oracles[cur_sent].bad;
-      vector<shared_ptr<HypothesisInfo> > cur_best_v = observer.GetCurrentBest();
-
-      tot_loss += cur_hyp.mt_metric;
-      
-      //score hyps to be able to compute corpus level bleu after we finish this iteration through the corpus
-      ScoreP sentscore = ds[cur_sent]->ScoreCandidate(cur_hyp.hyp);
-      if (!acc) { acc = sentscore->GetZero(); }
-      acc->PlusEquals(*sentscore);
-
-      ScoreP hope_sentscore = ds[cur_sent]->ScoreCandidate(cur_good.hyp);
-      if (!acc_h) { acc_h = hope_sentscore->GetZero(); }
-      acc_h->PlusEquals(*hope_sentscore);
-
-      ScoreP fear_sentscore = ds[cur_sent]->ScoreCandidate(cur_bad.hyp);
-      if (!acc_f) { acc_f = fear_sentscore->GetZero(); }
-      acc_f->PlusEquals(*fear_sentscore);
-      
-      if(optimizer == 4) { //single dual coordinate update, cur_good selected on BLEU score only (not model+BLEU)
-	//	if (!ApproxEqual(cur_hyp.mt_metric, cur_good.mt_metric)) {
-      
-	  double margin = cur_bad.features.dot(dense_weights) - cur_good.features.dot(dense_weights);
-	  double mt_loss = (cur_good.mt_metric - cur_bad.mt_metric);
-	  const double loss = margin +  mt_loss;
-	  cerr << "LOSS: " << loss << " Margin:" << margin << " BLEUL:" << mt_loss << " " << cur_bad.features.dot(dense_weights) << " " << cur_good.features.dot(dense_weights) <<endl;
-	  //	  if (loss > 0.0) {
-	    SparseVector<double> diff = cur_good.features;
-	    diff -= cur_bad.features;	    
-
-	    double diffsqnorm = diff.l2norm_sq();
-	    double delta;
-	    if (diffsqnorm > 0)
-	      delta = loss / (diffsqnorm);
-	    else
-	      delta = 0;
-	    
-	    //double step_size = loss / diff.l2norm_sq();
-	    cerr << loss << " " << delta << " " << diff << endl;
-	    if (delta > max_step_size) delta = max_step_size;
-	    lambdas += (cur_good.features * delta);
-	    lambdas -= (cur_bad.features * delta);
-	    //cerr << "L: " << lambdas << endl;
-	    //	  }
-	    //	  }
-      }
-      else if(optimizer == 1) //sgd - nonadapted step size
-	{
-	   
-	  lambdas += (cur_good.features) * max_step_size;
-	  lambdas -= (cur_bad.features) * max_step_size;
-	}
-      //cerr << "L: " << lambdas << endl;
-      else if(optimizer == 5) //full mira with n-best list of constraints from oracle, fear, best
-	{
-	  vector<shared_ptr<HypothesisInfo> > cur_constraint;
-	  cur_constraint.insert(cur_constraint.begin(), cur_bad_v.begin(), cur_bad_v.end());
-	  cur_constraint.insert(cur_constraint.begin(), cur_best_v.begin(), cur_best_v.end());
-	  cur_constraint.insert(cur_constraint.begin(), cur_good_v.begin(), cur_good_v.end());
-
-	  bool optimize_again;
-	  vector<shared_ptr<HypothesisInfo> > cur_pair;
-	  //SMO 
-	  for(int u=0;u!=cur_constraint.size();u++)	
-	    cur_constraint[u]->alpha =0;	      
-	  
-	  cur_constraint[0]->alpha =1; //set oracle to alpha=1
-
-	  cerr <<"Optimizing with " << cur_constraint.size() << " constraints" << endl;
-	  int smo_iter = 10, smo_iter2 = 10;
-	  int iter, iter2 =0;
-	  bool DEBUG_SMO = false;
-	  while (iter2 < smo_iter2)
-	    {
-	      iter =0;
-	      while (iter < smo_iter)
-		{
-		  optimize_again = true;
-		  for (int i = 0; i< cur_constraint.size(); i++)
-		    for (int j = i+1; j< cur_constraint.size(); j++)
-		      {
-			if(DEBUG_SMO) cerr << "start " << i << " " << j <<  endl;
-			cur_pair.clear();
-			cur_pair.push_back(cur_constraint[j]);
-			cur_pair.push_back(cur_constraint[i]);
-			double delta = ComputeDelta(&cur_pair,max_step_size, dense_weights);
-			
-			if (delta == 0) optimize_again = false;
-			//			cur_pair[0]->alpha += delta;
-			//	cur_pair[1]->alpha -= delta;
-			cur_constraint[j]->alpha += delta;
-			cur_constraint[i]->alpha -= delta;
-			double step_size = delta * max_step_size;
-			/*lambdas += (cur_pair[1]->features) * step_size;
-			lambdas -= (cur_pair[0]->features) * step_size;*/
-			lambdas += (cur_constraint[i]->features) * step_size;
-			lambdas -= (cur_constraint[j]->features) * step_size;
-			if(DEBUG_SMO) cerr << "SMO opt " << iter << " " << i << " " << j << " " <<  delta << " " << cur_pair[0]->alpha << " " << cur_pair[1]->alpha <<  endl;		
-			
-			//reload weights based on update
-			/*dense_weights.clear();
-			weights.InitFromVector(lambdas);
-			weights.InitVector(&dense_weights);*/
-		      }
-		  iter++;
-		  
-		  if(!optimize_again)
-		    { 
-		      iter = 100;
-		      cerr << "Optimization stopped, delta =0" << endl;
-		    }
-		  
-		  
-		}
-	      iter2++;
-	    }
-
-	  
-	}
-      else if(optimizer == 2 || optimizer == 3) //1-fear and cutting plane mira
-	  {
-	    bool DEBUG_SMO= true;
-	    vector<shared_ptr<HypothesisInfo> > cur_constraint;
-	    cur_constraint.push_back(cur_good_v[0]); //add oracle to constraint set
-	    bool optimize_again = true;
-	    int cut_plane_calls = 0;
-	    while (optimize_again)
-	      { 
-		if(DEBUG_SMO) cerr<< "optimize again: " << optimize_again << endl;
-		if(optimizer == 2){ //1-fear
-		  cur_constraint.push_back(cur_bad_v[0]);
-
-		  //check if we have a violation
-		  if(!(cur_constraint[1]->fear > cur_constraint[0]->fear + SMO_EPSILON))
-		    {
-		      optimize_again = false;
-		      cerr << "Constraint not violated" << endl;
-		    }
-		}
-		else
-		  { //cutting plane to add constraints
-		    if(DEBUG_SMO) cerr<< "Cutting Plane " << cut_plane_calls << " with " << lambdas << endl;
-		    optimize_again = false;
-		    cut_plane_calls++;
-		    CuttingPlane(&cur_constraint, &optimize_again, oracles[cur_sent].bad, dense_weights);
-		    if (cut_plane_calls >= MAX_SMO) optimize_again = false;
-		  }
-
-		if(optimize_again)
-		  {
-		    //SMO 
-		    for(int u=0;u!=cur_constraint.size();u++)	
-		      { 
-			cur_constraint[u]->alpha =0;
-			//cur_good_v[0]->alpha = 1; cur_bad_v[0]->alpha = 0;
-		      }
-		    cur_constraint[0]->alpha = 1;
-		    cerr <<"Optimizing with " << cur_constraint.size() << " constraints" << endl;
-		    int smo_iter = MAX_SMO;
-		    int iter =0;
-		    while (iter < smo_iter)
-		      {			
-			//select pair to optimize from constraint set
-			vector<shared_ptr<HypothesisInfo> > cur_pair = SelectPair(&cur_constraint);
-			
-			if(cur_pair.empty()){iter=MAX_SMO; cerr << "Undefined pair " << endl; continue;} //pair is undefined so we are done with this smo 
-
-			//double num = cur_good_v[0]->fear - cur_bad_v[0]->fear;
-			/*double loss = cur_good_v[0]->oracle_loss - cur_bad_v[0]->oracle_loss;
-			  double margin = cur_good_v[0]->oracle_feat_diff.dot(dense_weights) - cur_bad_v[0]->oracle_feat_diff.dot(dense_weights);
-			  double num = loss - margin;
-			  SparseVector<double> diff = cur_good_v[0]->features;
-			  diff -= cur_bad_v[0]->features;
-			  double delta = num / (diff.l2norm_sq() * max_step_size);
-			  delta = max(-cur_good_v[0]->alpha, min(delta, cur_bad_v[0]->alpha));
-			  cur_good_v[0]->alpha += delta;
-			  cur_bad_v[0]->alpha -= delta;
-			  double step_size = delta * max_step_size;
-			  lambdas += (cur_bad_v[0]->features) * step_size;
-			  lambdas -= (cur_good_v[0]->features) * step_size;
-			*/
-			
-			double delta = ComputeDelta(&cur_pair,max_step_size, dense_weights);
-
-			cur_pair[0]->alpha += delta;
-			cur_pair[1]->alpha -= delta;
-			double step_size = delta * max_step_size;
-			/*			lambdas += (cur_pair[1]->oracle_feat_diff) * step_size;
-						lambdas -= (cur_pair[0]->oracle_feat_diff) * step_size;*/
-			
-			cerr << "step " << step_size << endl;
-			double alpha_sum=0;
-			SparseVector<double> temp_lambdas = lambdas;
-			
-			for(int u=0;u!=cur_constraint.size();u++)	
-			  { 
-			    cerr << cur_constraint[u]->alpha << " " << cur_constraint[u]->hope << endl;
-			    temp_lambdas += (cur_constraint[u]->oracleN->features-cur_constraint[u]->features) * cur_constraint[u]->alpha * step_size;
-			    alpha_sum += cur_constraint[u]->alpha;
-			  }
-			cerr << "Alpha sum " << alpha_sum << " " << temp_lambdas << endl;
-						
-			lambdas += (cur_pair[1]->features) * step_size;
-			lambdas -= (cur_pair[0]->features) * step_size;
-			cerr << " Lambdas " << lambdas << endl;
-			//reload weights based on update
-			dense_weights.clear();
-			//weights.InitFromVector(lambdas);
-			//weights.InitVector(&dense_weights);
-			lambdas.init_vector(&dense_weights);
-			dense_weights_g = dense_weights;
-			iter++;
-					
-			if(DEBUG_SMO) cerr << "SMO opt " << iter << " " << delta << " " << cur_pair[0]->alpha << " " << cur_pair[1]->alpha <<  endl;		
-			//		cerr << "SMO opt " << iter << " " << delta << " " << cur_good_v[0]->alpha << " " << cur_bad_v[0]->alpha <<  endl;
-			if(no_select) //don't use selection heuristic to determine when to stop SMO, rather just when delta =0 
-			  if (delta == 0) iter = MAX_SMO;
-			
-			//only perform one dual coordinate ascent step
-			if(optimizer == 2) 
-			  {
-			    optimize_again = false;
-			    iter = MAX_SMO;
-			  }		
-			
-		      }
-		    if(optimizer == 3)
-		      {
-			if(!no_reweight)
-			  {
-			    if(DEBUG_SMO) cerr<< "Decoding with new weights -- now orac are " << oracles[cur_sent].good.size() << endl;
-			    Hypergraph hg = observer.GetCurrentForest();
-			    hg.Reweight(dense_weights);
-			    //observer.UpdateOracles(cur_sent, hg);
-			    if(unique_kbest)
-                              observer.UpdateOracles<KBest::FilterUnique>(cur_sent, hg);
-                            else
-                              observer.UpdateOracles<KBest::NoFilter<std::vector<WordID> > >(cur_sent, hg);
-
-			    
-			  }
-		      }
-		  }
-		
-		
-	      }
-	   
-	    //print objective after this sentence
-	    double lambda_change = (lambdas - old_lambdas).l2norm_sq();
-	    double max_fear = cur_constraint[cur_constraint.size()-1]->fear;
-	    double temp_objective = 0.5 * lambda_change;// + max_step_size * max_fear;
-
-	    for(int u=0;u!=cur_constraint.size();u++)	
-	      { 
-		cerr << cur_constraint[u]->alpha << " " << cur_constraint[u]->hope << " " << cur_constraint[u]->fear << endl;
-		temp_objective += cur_constraint[u]->alpha * cur_constraint[u]->fear;
-	      }
-	    objective += temp_objective;
-	    
-	    cerr << "SENT OBJ: " << temp_objective << " NEW OBJ: " << objective << endl;
-	  }
-      
-    
-      if ((cur_sent * 40 / ds.size()) > dots) { ++dots; cerr << '.'; }
-      tot += lambdas;
-      ++lcount;
-      cur_sent++;
-      
-      cout << TD::GetString(cur_good_v[0]->hyp) << " ||| " << TD::GetString(cur_best_v[0]->hyp) << " ||| " << TD::GetString(cur_bad_v[0]->hyp) << endl;
-
-      //clear good/bad lists from oracles for this sentences  - you want to keep them around for things
-      
-      //      oracles[cur_sent].good.clear();
-      //oracles[cur_sent].bad.clear();
-    }
-
-    cerr << "FINAL OBJECTIVE: "<< objective << endl;
-    final_tot += tot;
-    cerr << "Translated " << lcount << " sentences " << endl;
-    cerr << " [AVG METRIC LAST PASS=" << (tot_loss / lcount) << "]\n";
-    tot_loss = 0;
-    /*
-      float corpus_score = acc->ComputeScore();
-      string corpus_details;
-      acc->ScoreDetails(&corpus_details);
-      cerr << "MODEL " << corpus_details << endl;
-      cout << corpus_score << endl;
-      
-      corpus_score = acc_h->ComputeScore();
-      acc_h->ScoreDetails(&corpus_details);
-      cerr << "HOPE " << corpus_details << endl;
-      cout << corpus_score << endl;
-      
-      corpus_score = acc_f->ComputeScore();
-      acc_f->ScoreDetails(&corpus_details);
-      cerr << "FEAR " << corpus_details << endl;
-      cout << corpus_score << endl;
-    */
-    int node_id = rng->next() * 100000;
-    cerr << " Writing weights to " << node_id << endl;
-    Weights::ShowLargestFeatures(dense_weights);
-    dots = 0;
-    ostringstream os;
-    os << weights_dir << "/weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "." << node_id << ".gz";
-    string msg = "# MIRA tuned weights ||| " + boost::lexical_cast<std::string>(node_id) + " ||| " + boost::lexical_cast<std::string>(lcount);
-    //Weights.InitFromVector(lambdas);
-    lambdas.init_vector(&dense_weights);
-    Weights::WriteToFile(os.str(), dense_weights, true, &msg);
-
-    SparseVector<double> x = tot;
-    x /= lcount;
-    ostringstream sa;
-    string msga = "# MIRA tuned weights AVERAGED ||| " + boost::lexical_cast<std::string>(node_id) + " ||| " + boost::lexical_cast<std::string>(lcount);
-    sa << weights_dir << "/weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "." << node_id << "-avg.gz";
-    //Weights ww;
-    //ww.InitFromVector(x);
-    x.init_vector(&dense_weights);
-    Weights::WriteToFile(sa.str(), dense_weights, true, &msga);
-
-    //assign averaged lambdas to initialize next iteration
-    //lambdas = x;
-
-    /*    double lambda_change = (old_lambdas - lambdas).l2norm_sq();
-    cerr << "Change in lambda " << lambda_change << endl;
-    
-    if ( lambda_change < EPSILON)
-      {
-	cur_pass = max_iteration;
-	cerr << "Weights converged - breaking" << endl;
-      }
-            
-    ++cur_pass;
-    */
-    
-    //} iteration while loop
- 
-    /* cerr << endl;
-  weights.WriteToFile("weights.mira-final.gz", true, &msg);
-  final_tot /= (lcount + 1);//max_iteration);
-  tot /= (corpus.size() + 1);
-  weights.InitFromVector(final_tot);
-  cerr << tot << "||||" << final_tot << endl;
-  msg = "# MIRA tuned weights (averaged vector)";
-  weights.WriteToFile("weights.mira-final-avg.gz", true, &msg);
-    */
-  cerr << "Optimization complete.\\AVERAGED WEIGHTS: weights.mira-final-avg.gz\n";
-  return 0;
-}
-
diff --git a/training/mira/run_mira.pl b/training/mira/run_mira.pl
index f4d61407..90a4da0e 100755
--- a/training/mira/run_mira.pl
+++ b/training/mira/run_mira.pl
@@ -3,7 +3,7 @@ use strict;
 my @ORIG_ARGV=@ARGV;
 use Cwd qw(getcwd);
 my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0));
-push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; }
+push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment"; }
 
 # Skip local config (used for distributing jobs) if we're running in local-only mode
 use LocalConfig;
@@ -11,51 +11,50 @@ use Getopt::Long;
 use IPC::Open2;
 use POSIX ":sys_wait_h";
 my $QSUB_CMD = qsub_args(mert_memory());
-
-require "libcall.pl";
-
+my $default_jobs = env_default_jobs();
 
 my $srcFile;
 my $refFiles;
 my $bin_dir = $SCRIPT_DIR;
 die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
-my $FAST_SCORE="$bin_dir/../mteval/fast_score";
+my $FAST_SCORE="$bin_dir/../../mteval/fast_score";
 die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
 
 my $iteration = 0.0;
-my $max_iterations = 6;
+my $max_iterations = 10;
 my $metric = "ibm_bleu";
 my $iniFile;
 my $weights;
 my $initialWeights;
-my $decode_nodes = 1;   # number of decode nodes
+my $jobs = $default_jobs;   # number of decode nodes
 my $pmem = "1g";
 my $dir;
 
 my $SCORER = $FAST_SCORE;
-my $local_server = "$bin_dir/local_parallelize.pl";
-my $parallelize = "$bin_dir/../dpmert/parallelize.pl";
-my $libcall = "$bin_dir/../dpmert/libcall.pl";
-my $sentserver = "$bin_dir/../dpmert/sentserver";
-my $sentclient = "$bin_dir/../dpmert/sentclient";
-my $run_local_server = 0;
+
+my $UTILS_DIR="$SCRIPT_DIR/../utils";
+require "$UTILS_DIR/libcall.pl";
+
+my $parallelize = "$UTILS_DIR/parallelize.pl";
+my $libcall = "$UTILS_DIR/libcall.pl";
+my $sentserver = "$UTILS_DIR/sentserver";
+my $sentclient = "$UTILS_DIR/sentclient";
+
 my $run_local = 0;
-my $usefork;
 my $pass_suffix = '';
 
-my $cdec ="$bin_dir/kbest_mirav5"; #"$bin_dir/kbest_mira_rmmv2"; #"$bin_dir/kbest_mira_lv";
+my $cdec ="$bin_dir/kbest_cut_mira"; 
 
-#my $cdec ="$bin_dir/kbest_mira_rmmv2"; #"$bin_dir/kbest_mirav5"; #"$bin_dir/kbest_mira_rmmv2"; #"$bin_dir/kbest_mira_lv";
 die "Can't find decoder in $cdec" unless -x $cdec;
 my $decoder = $cdec;
 my $decoderOpt;
-my $update_size=250;
+my $update_size;
 my $approx_score;
 my $kbest_size=250;
 my $metric_scale=1;
 my $optimizer=2;
 my $disable_clean = 0;
-my $use_make;  # use make to parallelize line search
+my $use_make=0;  
 my $density_prune;
 my $cpbin=1;
 my $help = 0;
@@ -64,10 +63,10 @@ my $step_size = 0.01;
 my $gpref;
 my $unique_kbest;
 my $freeze;
-my $latent;
-my $sample_max;
 my $hopes=1;
 my $fears=1;
+my $sent_approx=0;
+my $pseudo_doc=0;
 
 my $range = 35000;
 my $minimum = 15000;
@@ -78,15 +77,13 @@ my $portn = int(rand($range)) + $minimum;
 Getopt::Long::Configure("no_auto_abbrev");
 if (GetOptions(
         "decoder=s" => \$decoderOpt,
-        "decode-nodes=i" => \$decode_nodes,
+        "jobs=i" => \$jobs,
         "density-prune=f" => \$density_prune,
         "dont-clean" => \$disable_clean,
         "pass-suffix=s" => \$pass_suffix,
-        "use-fork" => \$usefork,
         "epsilon=s" => \$epsilon,
         "help" => \$help,
         "local" => \$run_local,
-	"local_server" => \$run_local_server,
         "use-make=i" => \$use_make,
         "max-iterations=i" => \$max_iterations,
         "pmem=s" => \$pmem,
@@ -102,10 +99,9 @@ if (GetOptions(
 	"step-size=f" => \$step_size,
 	"hope-select=i" => \$hopes,
 	"fear-select=i" => \$fears,
-	"approx-score" => \$approx_score,
+	"sent-approx" => \$sent_approx,
+        "pseudo-doc" => \$pseudo_doc,
 	"unique-kbest" => \$unique_kbest,
-	"latent" => \$latent,
-	"sample-max=i" => \$sample_max,
         "grammar-prefix=s" => \$gpref,
 	"freeze" => \$freeze,
         "workdir=s" => \$dir,
@@ -235,7 +231,9 @@ close F;
 
 my $lastPScore = 0;
 my $lastWeightsFile;
-
+my $bestScoreIter=-1;
+my $bestScore=-1;
+unless ($update_size){$update_size = $kbest_size;}
 # main optimization loop
 #while (1){
 for (my $opt_iter=0; $opt_iter<$max_iterations; $opt_iter++) {
@@ -260,16 +258,16 @@ for (my $opt_iter=0; $opt_iter<$max_iterations; $opt_iter++) {
 	my $weightsFile="$dir/weights.$opt_iter";
 	print "ITER $iteration " ;
 	my $cur_pass = "-p 0$opt_iter";
-	my $decoder_cmd = "$decoder -c $iniFile -w $weightsFile $refs_comma_sep -m $metric -s $metric_scale -a -b $update_size -k $kbest_size -o $optimizer $cur_pass -O $weightdir -D $dir  -h $hopes -f $fears -C $step_size";
+	my $decoder_cmd = "$decoder -c $iniFile -w $weightsFile $refs_comma_sep -m $metric -s $metric_scale -b $update_size -k $kbest_size -o $optimizer $cur_pass -O $weightdir -D $dir  -h $hopes -f $fears -C $step_size";
 	if($unique_kbest){
 		$decoder_cmd .= " -u";
 	}
-	if($latent){
-		$decoder_cmd .= " -l";
-	}
-	if($sample_max){
-		$decoder_cmd .= " -t $sample_max";
+	if($sent_approx){
+		$decoder_cmd .= " -a";
 	}
+	if($pseudo_doc){
+                $decoder_cmd .= " -e";
+        }
 	if ($density_prune) {
 		$decoder_cmd .= " --density_prune $density_prune";
 	}
@@ -277,13 +275,11 @@ for (my $opt_iter=0; $opt_iter<$max_iterations; $opt_iter++) {
 	if ($run_local) {
 		$pcmd = "cat $srcFile |";
 	} elsif ($use_make) {
-	    # TODO: Throw error when decode_nodes is specified along with use_make
+	    # TODO: Throw error when jobs is speong with use_make
 		$pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $use_make --";
-	} elsif ($run_local_server){
-	    $pcmd = "cat $srcFile | $local_server $usefork -p $pmem -e $logdir -n $decode_nodes --";
-	}
+	} 
 	else {
-	    $pcmd = "cat $srcFile | $parallelize $usefork -p $pmem -e $logdir -j $decode_nodes --baseport $portn --";
+	    $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --baseport $portn --";
 	}
 	my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile";
 	print STDERR "COMMAND:\n$cmd\n";
@@ -291,14 +287,14 @@ for (my $opt_iter=0; $opt_iter<$max_iterations; $opt_iter++) {
 
 	my $retries = 0;
         my $num_topbest;
-        while($retries < 5) {
+        while($retries < 6) {
             $num_topbest = check_output("wc -l < $runFile");
             print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n";
             if($devSize == $num_topbest) {
                 last;
             } else {
                 print STDERR "Incorrect number of topbest. Waiting for distributed filesystem and retrying...\n";
-                sleep(3);
+                sleep(10);
             }
             $retries++;
         }
@@ -320,12 +316,15 @@ for (my $opt_iter=0; $opt_iter<$max_iterations; $opt_iter++) {
 	close RUN;
 	close F; close B; close H;
 	
-	my $dec_score = check_output("cat $runFile.B | $SCORER $refs_comma_sep -l $metric");
-	my $dec_score_h = check_output("cat $runFile.H | $SCORER $refs_comma_sep -l $metric");
-	my $dec_score_f = check_output("cat $runFile.F | $SCORER $refs_comma_sep -l $metric");
+	my $dec_score = check_output("cat $runFile.B | $SCORER $refs_comma_sep -m $metric");
+	my $dec_score_h = check_output("cat $runFile.H | $SCORER $refs_comma_sep -m $metric");
+	my $dec_score_f = check_output("cat $runFile.F | $SCORER $refs_comma_sep -m $metric");
 	chomp $dec_score; chomp $dec_score_h; chomp $dec_score_f;
 	print STDERR "DECODER SCORE: $dec_score HOPE: $dec_score_h FEAR: $dec_score_f\n";
-
+	if ($dec_score> $bestScore){
+		$bestScoreIter=$opt_iter; 
+		$bestScore=$dec_score;
+	}
 	# save space
 	check_call("gzip -f $runFile");
 	check_call("gzip -f $decoderLog");
@@ -338,21 +337,11 @@ for (my $opt_iter=0; $opt_iter<$max_iterations; $opt_iter++) {
 	$lastWeightsFile = "$dir/weights.$opt_iter";
 
 	average_weights("$weightdir/weights.mira-pass*.*[0-9].gz", $newWeightsFile, $logdir);
-#	check_call("cp $lastW $newWeightsFile");
-#	if ($icc < 2) {
-#		print STDERR "\nREACHED STOPPING CRITERION: score change too little\n";
-#		last;
-#	}
 	system("gzip -f $logdir/kbes*");
 	print STDERR "\n==========\n";
 	$iteration++;
 }
-#find 
-#my $cmd = `grep SCORE /fs/clip-galep5/lexical_tm/log.runmira.nist.20 | cat -n | sort -k +2 | tail -1`;
-#$cmd =~ m/([0-9]+)/;
-#$lastWeightsFile = "$dir/weights.$1";
-#check_call("ln -s $lastWeightsFile $dir/weights.tuned");
-print STDERR "\nFINAL WEIGHTS: $lastWeightsFile\n(Use -w <this file> with the decoder)\n\n";
+print STDERR "\nBEST ITER: $bestScoreIter :: $bestScore\n\n\n";
 
 print STDOUT "$lastWeightsFile\n";
 
@@ -409,7 +398,7 @@ sub write_config {
 	print $fh "EVAL METRIC:      $metric\n";
 	print $fh "START ITERATION:  $iteration\n";
 	print $fh "MAX ITERATIONS:   $max_iterations\n";
-	print $fh "DECODE NODES:     $decode_nodes\n";
+	print $fh "DECODE NODES:     $jobs\n";
 	print $fh "HEAD NODE:        $host\n";
 	print $fh "PMEM (DECODING):  $pmem\n";
 	print $fh "CLEANUP:          $cleanup\n";
@@ -462,9 +451,87 @@ sub enseg {
 }
 
 sub print_help {
-	print "Something wrong\n";
+ my $executable = check_output("basename $0"); chomp $executable;
+        print << "Help";
+
+Usage: $executable [options] <ini file>
+
+        $executable [options] <ini file>
+                Runs a complete MIRA optimization using the ini file specified.
+
+Required:
+
+        --ref-files <files>
+                Dev set ref files.  This option takes only a single string argument.
+                To use multiple files (including file globbing), this argument should
+                be quoted.
+        --source-file <file>
+                Dev set source file.
+        --weights <file>
+                Initial weights file
+
+General options:
+
+        --help
+                Print this message and exit.
+
+       --max-iterations <M>
+                Maximum number of iterations to run.  If not specified, defaults
+                to $max_iterations.
+
+        --metric <method>
+                Metric to optimize.
+                Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi
+
+        --workdir <dir>
+                Directory for intermediate and output files.  If not specified, the
+                name is derived from the ini filename.  Assuming that the ini
+                filename begins with the decoder name and ends with ini, the default
+                name of the working directory is inferred from the middle part of
+                the filename.  E.g. an ini file named decoder.foo.ini would have
+                a default working directory name foo.
+	--optimizer <I>
+		Learning method to use for weight update. Choice are 1) SGD, 2) PA MIRA with Selection from Cutting Plane, 3) Cutting Plane MIRA, 4) PA MIRA,5) nbest MIRA with hope, fear, and model constraints
+	--metric-scale <I>
+		Scale MT loss by this amount when computing hope/fear candidates
+	--kbest-size <I>
+		Size of k-best list to extract from forest
+	--update-size <I>
+		Size of k-best list to use for update (applies to optimizer 5)
+	--step-size <F>
+		Controls aggresiveness of update (C) 
+	--hope-select<I>
+		How to select hope candidate. Choices are 1) model score - cost, 2) min cost
+	--fear-select <I>
+		How to select fear candodate. Choices are 1) model score + cost, 2) max cost, 3) max score
+	--sent-approx
+		Use smoothed sentence-level MT metric
+	--pseudo-doc
+		Use pseudo document to approximate MT metric
+	--unique-kbest
+		Extract unique k-best from forest
+	--grammar-prefix <path>
+		Path to sentence-specific grammar files
+
+Job control options:
+
+        --jobs <I>
+                Number of decoder processes to run in parallel. [default=$default_jobs]
+
+        --pmem <N>
+                Amount of physical memory requested for parallel decoding jobs
+                (used with qsub requests only)
+
+	--local 
+		Run single learner
+	--use-make <I>
+		Run parallel learners on a single machine through fork.
+
+
+Help
 }
 
+
 sub cmdline {
     return join ' ',($0,@ORIG_ARGV);
 }
-- 
cgit v1.2.3


From 14a82a5c9116d5e30dbfa33561851fdee28a0925 Mon Sep 17 00:00:00 2001
From: Vladimir Eidelman <vlad@umiacs.umd.edu>
Date: Sat, 13 Apr 2013 22:21:04 -0400
Subject: cleanup mira

---
 training/mira/kbest_cut_mira.cc | 128 +++++++++++-----------------------------
 1 file changed, 36 insertions(+), 92 deletions(-)

diff --git a/training/mira/kbest_cut_mira.cc b/training/mira/kbest_cut_mira.cc
index 34eb00dc..7df9a18f 100644
--- a/training/mira/kbest_cut_mira.cc
+++ b/training/mira/kbest_cut_mira.cc
@@ -40,7 +40,7 @@ bool no_reweight;
 bool no_select;
 bool unique_kbest;
 int update_list_size;
-vector<weight_t> dense_weights_g;
+vector<weight_t> dense_w_local;
 double mt_metric_scale;
 int optimizer;
 int fear_select;
@@ -170,7 +170,7 @@ bool FearCompareB(const HI& h1, const HI& h2 )
 
 bool FearComparePred(const HI& h1, const HI& h2 ) 
 {
-  return h1->features.dot(dense_weights_g) > h2->features.dot(dense_weights_g);
+  return h1->features.dot(dense_w_local) > h2->features.dot(dense_w_local);
 };
 
 bool HypothesisCompareG(const HI& h1, const HI& h2 ) 
@@ -203,12 +203,7 @@ void CuttingPlane(vector<shared_ptr<HypothesisInfo> >* cur_c, bool* again, vecto
       for(int u=0;u!=all_hyp.size();u++)	
 	{ 
 	  double t_score = all_hyp[u]->features.dot(dense_weights);
-	  //all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - hope_score + t_score;
-	  
 	  all_hyp[u]->fear = -1*all_hyp[u]->mt_metric + 1*all_hyp[0]->mt_metric - hope_score + t_score; //relative loss
-	  //      all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric - -1*all_hyp[0]->mt_metric;
-	  //all_hyp[u]->oracle_feat_diff = all_hyp[0]->features - all_hyp[u]->features;
-	  //	all_hyp[u]->fear = -1 * all_hyp[u]->mt_metric + t_score;
 	}
     
       sort(all_hyp.begin(),all_hyp.end(),FearCompareB);
@@ -238,24 +233,14 @@ double ComputeDelta(vector<shared_ptr<HypothesisInfo> >* cur_p, double max_step_
 {
   vector<shared_ptr<HypothesisInfo> >& cur_pair = *cur_p;
    double loss = cur_pair[0]->oracle_loss - cur_pair[1]->oracle_loss;
-   //double margin = -cur_pair[0]->oracle_feat_diff.dot(dense_weights) + cur_pair[1]->oracle_feat_diff.dot(dense_weights); //TODO: is it a problem that new oracle is used in diff?
-   //double num = loss - margin;
-  
 
    double margin = -(cur_pair[0]->oracleN->features.dot(dense_weights)- cur_pair[0]->features.dot(dense_weights)) + (cur_pair[1]->oracleN->features.dot(dense_weights) - cur_pair[1]->features.dot(dense_weights));
    const double num = margin +  loss;
    cerr << "LOSS: " << num << " Margin:" << margin << " BLEUL:" << loss << " " << cur_pair[1]->features.dot(dense_weights) << " " << cur_pair[0]->features.dot(dense_weights) <<endl;
    
 
-/*  double num = 
-    (cur_pair[0]->oracle_loss - cur_pair[0]->oracle_feat_diff.dot(dense_weights))
-    - (cur_pair[1]->oracle_loss - cur_pair[1]->oracle_feat_diff.dot(dense_weights));
-  */
-
   SparseVector<double> diff = cur_pair[0]->features;
   diff -= cur_pair[1]->features;
-  /*  SparseVector<double> diff = cur_pair[0]->oracle_feat_diff;
-  diff -= cur_pair[1]->oracle_feat_diff;*/
   double diffsqnorm = diff.l2norm_sq();
   double delta;
   if (diffsqnorm > 0)
@@ -264,7 +249,6 @@ double ComputeDelta(vector<shared_ptr<HypothesisInfo> >* cur_p, double max_step_
     delta = 0;
   cerr << " D1:" << delta;
   //clip delta (enforce margin constraints)
-
   delta = max(-cur_pair[0]->alpha, min(delta, cur_pair[1]->alpha));
   cerr << " D2:" << delta;
   return delta;
@@ -278,12 +262,12 @@ vector<shared_ptr<HypothesisInfo> > SelectPair(vector<shared_ptr<HypothesisInfo>
   
   vector<shared_ptr<HypothesisInfo> > pair;
 
-  if (no_select || optimizer == 2){ //skip heuristic search and return oracle and fear for 1-mira
-  //    if(optimizer == 2)      {
+  if (no_select || optimizer == 2){ //skip heuristic search and return oracle and fear for pa-mira
+
       pair.push_back(cur_constraint[0]);
       pair.push_back(cur_constraint[1]);
       return pair;
-      //   }
+
     }
   
   for(int u=0;u != cur_constraint.size();u++)	
@@ -299,8 +283,6 @@ vector<shared_ptr<HypothesisInfo> > SelectPair(vector<shared_ptr<HypothesisInfo>
 	}
       if(!max_fear) return pair; //
       
-      if(DEBUG_SELECT) cerr << " F" << max_fear->fear << endl;
-
       
       if ((cur_constraint[u]->alpha == 0) && (cur_constraint[u]->fear > max_fear->fear + SMO_EPSILON))
 	{
@@ -310,8 +292,7 @@ vector<shared_ptr<HypothesisInfo> > SelectPair(vector<shared_ptr<HypothesisInfo>
 		if (cur_constraint[i]->alpha > 0)
 		  {
 		    pair.push_back(cur_constraint[u]);
-		    pair.push_back(cur_constraint[i]);
-		    cerr << "RETJURN from 1" << endl;
+		    pair.push_back(cur_constraint[i]);		    
 		    return pair;
 		  }
 	    }
@@ -342,11 +323,10 @@ struct GoodBadOracle {
 
 struct TrainingObserver : public DecoderObserver {
   TrainingObserver(const int k, const DocScorer& d, vector<GoodBadOracle>* o, vector<ScoreP>* cbs) : ds(d), oracles(*o), corpus_bleu_sent_stats(*cbs), kbest_size(k) {
-  // TrainingObserver(const int k, const DocScorer& d, vector<GoodBadOracle>* o) : ds(d), oracles(*o), kbest_size(k) {
     
-    //calculate corpus bleu score from previous iterations 1-best for BLEU gain
+
     if(!pseudo_doc && !sent_approx)
-    if(cur_pass > 0)
+    if(cur_pass > 0)     //calculate corpus bleu score from previous iterations 1-best for BLEU gain
       {
 	ScoreP acc;
 	for (int ii = 0; ii < corpus_bleu_sent_stats.size(); ii++) {
@@ -357,7 +337,7 @@ struct TrainingObserver : public DecoderObserver {
 	corpus_bleu_stats = acc;
 	corpus_bleu_score = acc->ComputeScore();
       }
-    //corpus_src_length = 0;
+
 }
   const DocScorer& ds;
   vector<ScoreP>& corpus_bleu_sent_stats;
@@ -396,9 +376,8 @@ struct TrainingObserver : public DecoderObserver {
 
   virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) {
     cur_sent = smeta.GetSentenceID();
-    //cerr << "SOURCE " << smeta.GetSourceLength() << endl;
     curr_src_length = (float) smeta.GetSourceLength();
-    //UpdateOracles(smeta.GetSentenceID(), *hg);
+
     if(unique_kbest)
       UpdateOracles<KBest::FilterUnique>(smeta.GetSentenceID(), *hg);
     else
@@ -431,9 +410,8 @@ struct TrainingObserver : public DecoderObserver {
     typedef KBest::KBestDerivations<vector<WordID>, ESentenceTraversal,Filter> K;
     K kbest(forest,kbest_size);
     
-    //KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(forest, kbest_size);
     for (int i = 0; i < kbest_size; ++i) {
-      //const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
+
       typename K::Derivation *d =
         kbest.LazyKthBest(forest.nodes_.size() - 1, i);
       if (!d) break;
@@ -489,10 +467,9 @@ struct TrainingObserver : public DecoderObserver {
       corpus_bleu_stats->PlusEquals(*sent_stats, PSEUDO_SCALE);
             
       corpus_src_length = PSEUDO_SCALE * (corpus_src_length + curr_src_length);
-      cerr << "CORP S " << corpus_src_length << " " << curr_src_length << "\n" << details << "\n" << details2 << endl;
+      cerr << "ps corpus size: " << corpus_src_length << " " << curr_src_length << "\n" << details << "\n" << details2 << endl;
     }
 
-
     //figure out how many hyps we can keep maximum
     int temp_update_size = update_list_size;
     if (all_hyp.size() < update_list_size){ temp_update_size = all_hyp.size();}
@@ -500,7 +477,8 @@ struct TrainingObserver : public DecoderObserver {
     //sort all hyps by sentscore (eg. bleu)
     sort(all_hyp.begin(),all_hyp.end(),HypothesisCompareB);
     
-    if(PRINT_LIST){  cerr << "Sorting " << endl; for(int u=0;u!=all_hyp.size();u++)	cerr << all_hyp[u]->mt_metric << " " << all_hyp[u]->features.dot(dense_weights_g) << endl; }
+    if(PRINT_LIST){  cerr << "Sorting " << endl; for(int u=0;u!=all_hyp.size();u++)  
+						   cerr << all_hyp[u]->mt_metric << " " << all_hyp[u]->features.dot(dense_w_local) << endl; }
     
     if(hope_select == 1)
       {
@@ -508,7 +486,7 @@ struct TrainingObserver : public DecoderObserver {
 	if (PRINT_LIST) cerr << "HOPE " << endl;
 	for(int u=0;u!=all_hyp.size();u++)	
 	  { 
-	    double t_score = all_hyp[u]->features.dot(dense_weights_g);
+	    double t_score = all_hyp[u]->features.dot(dense_w_local);
 	    all_hyp[u]->hope = all_hyp[u]->mt_metric + t_score;
 	    if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " S:" << t_score << endl; 
 	    
@@ -522,47 +500,38 @@ struct TrainingObserver : public DecoderObserver {
     cur_good.insert(cur_good.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size);    
     if(PRINT_LIST) { cerr << "GOOD" << endl;  for(int u=0;u!=cur_good.size();u++) cerr << cur_good[u]->mt_metric << " " << cur_good[u]->hope << endl;}     
 
+    //use hope for fear selection
     shared_ptr<HypothesisInfo>& oracleN = cur_good[0];
 
-
     if(fear_select == 1){   //compute fear hyps with model - bleu
       if (PRINT_LIST) cerr << "FEAR " << endl;
-      double hope_score = oracleN->features.dot(dense_weights_g);
+      double hope_score = oracleN->features.dot(dense_w_local);
 
       if (PRINT_LIST) cerr << "hope score " << hope_score << endl;
       for(int u=0;u!=all_hyp.size();u++)	
 	{ 
-	  double t_score = all_hyp[u]->features.dot(dense_weights_g);
-	  //all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - hope_score + t_score;
-	  
-	  /*	  all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - -1*cur_oracle->mt_metric - hope_score + t_score; //relative loss
-	  all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric - -1*cur_oracle->mt_metric;
-	  all_hyp[u]->oracle_feat_diff = cur_oracle->features - all_hyp[u]->features;*/
+	  double t_score = all_hyp[u]->features.dot(dense_w_local);
 
 	  all_hyp[u]->fear = -1*all_hyp[u]->mt_metric + 1*oracleN->mt_metric - hope_score + t_score; //relative loss
 	  all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric + 1*oracleN->mt_metric;
 	  all_hyp[u]->oracle_feat_diff = oracleN->features - all_hyp[u]->features;
 	  all_hyp[u]->oracleN=oracleN;
-	  //	all_hyp[u]->fear = -1 * all_hyp[u]->mt_metric + t_score;
 	  if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " F:" << all_hyp[u]->fear << endl; 
 	  
 	}
       
       sort(all_hyp.begin(),all_hyp.end(),FearCompareB);
       
-      cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size);    
     }
     else if(fear_select == 2) //select fear based on cost
       {
-	cur_bad.insert(cur_bad.begin(), all_hyp.end()-temp_update_size, all_hyp.end()); 
-	reverse(cur_bad.begin(),cur_bad.end());
+	sort(all_hyp.begin(),all_hyp.end(),HypothesisCompareG);
       }
-    else //pred-based, fear_select = 3
+    else //max model score, also known as prediction-based
       {
 	sort(all_hyp.begin(),all_hyp.end(),FearComparePred);
-	cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); 
       }
-
+    cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); 
 
     if(PRINT_LIST){ cerr<< "BAD"<<endl; for(int u=0;u!=cur_bad.size();u++) cerr << cur_bad[u]->mt_metric << " H:" << cur_bad[u]->hope << " F:" << cur_bad[u]->fear << endl;}
     
@@ -616,13 +585,12 @@ void ReadPastTranslationForScore(const int cur_pass, vector<ScoreP>* c, DocScore
     ++lc;
  
   }
-
   
   assert(lc > 0);
   float score = acc->ComputeScore();
   string details;
   acc->ScoreDetails(&details);
-  cerr << "INIT RUN " << details << score << endl;
+  cerr << "Previous run: " << details << score << endl;
 
 }
 
@@ -640,7 +608,6 @@ int main(int argc, char** argv) {
     rng.reset(new MT19937);
   
   vector<string> corpus;
-  //ReadTrainingCorpus(conf["source"].as<string>(), &corpus);
 
   const string metric_name = conf["mt_metric"].as<string>();
   optimizer = conf["optimizer"].as<int>();
@@ -654,7 +621,7 @@ int main(int argc, char** argv) {
   unique_kbest = conf.count("unique_k_best");
   pseudo_doc = conf.count("pseudo_doc");
   sent_approx = conf.count("sent_approx");
-  cerr << "PSEUDO " << pseudo_doc << " SENT " << sent_approx << endl;
+  cerr << "Using pseudo-doc:" << pseudo_doc << " Sent:" << sent_approx << endl;
   if(pseudo_doc)
     mt_metric_scale=1;
 
@@ -665,7 +632,6 @@ int main(int argc, char** argv) {
   //establish metric used for tuning
   if (type == TER) {
     invert_score = true;
-    // approx_score = false;
   } else {
     invert_score = false;
   }
@@ -681,20 +647,9 @@ int main(int argc, char** argv) {
     {
       ReadPastTranslationForScore(cur_pass, &corpus_bleu_sent_stats, ds, output_dir);
     }
-  /*  if (ds.size() != corpus.size()) {
-    cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n";
-    return 1;
-    }*/
-  cerr << "Optimizing with " << optimizer << endl;
-  // load initial weights
-  /*Weights weights;
-  weights.InitFromFile(conf["input_weights"].as<string>());
-  SparseVector<double> lambdas;
-  weights.InitSparseVector(&lambdas);
-  */
-
-  
   
+  cerr << "Using optimizer:" << optimizer << endl;
+    
   ReadFile ini_rf(conf["decoder_config"].as<string>());
   Decoder decoder(ini_rf.stream());
 
@@ -705,7 +660,6 @@ int main(int argc, char** argv) {
   Weights::InitSparseVector(dense_weights, &lambdas);
 
   const string input = decoder.GetConf()["input"].as<string>();
-  //const bool show_feature_dictionary = decoder.GetConf().count("show_feature_dictionary");
   if (!SILENT) cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl;
   ReadFile in_read(input);
   istream *in = in_read.stream();
@@ -714,8 +668,6 @@ int main(int argc, char** argv) {
   
   const double max_step_size = conf["max_step_size"].as<double>();
 
-
-  //  assert(corpus.size() > 0);
   vector<GoodBadOracle> oracles(ds.size());
 
   TrainingObserver observer(conf["k_best_size"].as<int>(), ds, &oracles, &corpus_bleu_sent_stats);
@@ -725,27 +677,21 @@ int main(int argc, char** argv) {
   double objective=0;
   double tot_loss = 0;
   int dots = 0;
-  //  int cur_pass = 1;
-  //  vector<double> dense_weights;
   SparseVector<double> tot;
   SparseVector<double> final_tot;
-  //  tot += lambdas;          // initial weights
-  //  lcount++;                // count for initial weights
-
-  //string msg = "# MIRA tuned weights";
-  // while (cur_pass <= max_iteration) {
-    SparseVector<double> old_lambdas = lambdas;
-    tot.clear();
-    tot += lambdas;
-    cerr << "PASS " << cur_pass << " " << endl << lambdas << endl; 
-    ScoreP acc, acc_h, acc_f;
-    
-    while(*in) {
+
+  SparseVector<double> old_lambdas = lambdas;
+  tot.clear();
+  tot += lambdas;
+  cerr << "PASS " << cur_pass << " " << endl << lambdas << endl; 
+  ScoreP acc, acc_h, acc_f;
+  
+  while(*in) {
       getline(*in, buf);
       if (buf.empty()) continue;
       //TODO: allow batch updating
       lambdas.init_vector(&dense_weights);
-      dense_weights_g = dense_weights;
+      dense_w_local = dense_weights;
       decoder.SetId(cur_sent);
       decoder.Decode(buf, &observer);  // decode the sentence, calling Notify to get the hope,fear, and model best hyps. 
       
@@ -922,7 +868,7 @@ int main(int argc, char** argv) {
 
 			dense_weights.clear();
 			lambdas.init_vector(&dense_weights);
-			dense_weights_g = dense_weights;
+			dense_w_local = dense_weights;
 			iter++;
 					
 			if(DEBUG_SMO) cerr << "SMO opt " << iter << " " << delta << " " << cur_pair[0]->alpha << " " << cur_pair[1]->alpha <<  endl;		
@@ -991,19 +937,17 @@ int main(int argc, char** argv) {
     ostringstream os;
     os << weights_dir << "/weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "." << node_id << ".gz";
     string msg = "# MIRA tuned weights ||| " + boost::lexical_cast<std::string>(node_id) + " ||| " + boost::lexical_cast<std::string>(lcount);
-    //Weights.InitFromVector(lambdas);
     lambdas.init_vector(&dense_weights);
     Weights::WriteToFile(os.str(), dense_weights, true, &msg);
 
     SparseVector<double> x = tot;
-    x /= lcount;
+    x /= lcount+1;
     ostringstream sa;
     string msga = "# MIRA tuned weights AVERAGED ||| " + boost::lexical_cast<std::string>(node_id) + " ||| " + boost::lexical_cast<std::string>(lcount);
     sa << weights_dir << "/weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "." << node_id << "-avg.gz";
     x.init_vector(&dense_weights);
     Weights::WriteToFile(sa.str(), dense_weights, true, &msga);
     
-    
     cerr << "Optimization complete.\n";
     return 0;
 }
-- 
cgit v1.2.3


From 61524f041158daa3b5fbfadf4e1494bb8623add3 Mon Sep 17 00:00:00 2001
From: Vladimir Eidelman <vlad@umiacs.umd.edu>
Date: Sat, 13 Apr 2013 23:35:06 -0400
Subject: cleanup script

---
 training/mira/run_mira.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/training/mira/run_mira.pl b/training/mira/run_mira.pl
index 90a4da0e..e72c02e0 100755
--- a/training/mira/run_mira.pl
+++ b/training/mira/run_mira.pl
@@ -593,7 +593,7 @@ sub average_weights {
 	    else
 	    {
 		(my $msg,my $ran,$mult) = split(/ \|\|\| /);
-		print "RAN $ran $mult\n";
+		print "Processing $ran $mult\n";
 	    }
 	}
 	$total_mult += $mult;
-- 
cgit v1.2.3


From 5daf7c9c53bf842721f7bbcbeb235279aa950bcf Mon Sep 17 00:00:00 2001
From: Vladimir Eidelman <vladimir.eidelman@gmail.com>
Date: Sun, 14 Apr 2013 00:00:43 -0400
Subject: cleanup script

---
 training/mira/run_mira.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/training/mira/run_mira.pl b/training/mira/run_mira.pl
index 90a4da0e..e72c02e0 100755
--- a/training/mira/run_mira.pl
+++ b/training/mira/run_mira.pl
@@ -593,7 +593,7 @@ sub average_weights {
 	    else
 	    {
 		(my $msg,my $ran,$mult) = split(/ \|\|\| /);
-		print "RAN $ran $mult\n";
+		print "Processing $ran $mult\n";
 	    }
 	}
 	$total_mult += $mult;
-- 
cgit v1.2.3


From 2613a9673263a4442b4a8f7fc28a820f8d071157 Mon Sep 17 00:00:00 2001
From: Vladimir Eidelman <vladimir.eidelman@gmail.com>
Date: Sun, 14 Apr 2013 00:02:00 -0400
Subject: add example to run script

---
 training/mira/run_mira.pl | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/training/mira/run_mira.pl b/training/mira/run_mira.pl
index e72c02e0..d71590ba 100755
--- a/training/mira/run_mira.pl
+++ b/training/mira/run_mira.pl
@@ -455,9 +455,24 @@ sub print_help {
         print << "Help";
 
 Usage: $executable [options] <ini file>
-
-        $executable [options] <ini file>
-                Runs a complete MIRA optimization using the ini file specified.
+        Runs a complete MIRA optimization using the ini file specified.
+	Example invocation:
+	run_mira.pl \
+        --pmem 3g \
+        --max-iterations 20 \
+        --optimizer 2 \
+        --unique-kbest \
+        --jobs 15 \
+        --kbest-size 500 \
+        --hope-select 1 \
+        --fear-select 1  \
+        --ref-files "ref.0.soseos ref.1.soseos" \
+        --source-file src.soseos \
+        --weights weights.init \
+        --workdir workdir \
+        --grammar-prefix grammars/grammar \
+        --step-size 0.01 \
+        --metric-scale 10000 \
 
 Required:
 
-- 
cgit v1.2.3


From a42fa0eb8b75ebac19b1c3c68f4b435b949bf184 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@Chriss-MacBook-Air.local>
Date: Fri, 19 Apr 2013 17:06:35 -0400
Subject: hindi

---
 corpus/support/tokenizer.pl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/corpus/support/tokenizer.pl b/corpus/support/tokenizer.pl
index 0350a894..acc537fb 100755
--- a/corpus/support/tokenizer.pl
+++ b/corpus/support/tokenizer.pl
@@ -226,7 +226,7 @@ sub proc_token {
     }
 
     ## step 1: check the most common case
-    if($token =~ /^[a-z0-9\p{Cyrillic}\p{Greek}\p{Hebrew}\p{Han}\p{Arabic}]+$/i){
+    if($token =~ /^[a-z0-9\p{Cyrillic}\p{Greek}\p{Hebrew}\p{Han}\p{Arabic}\p{Devanagari}]+$/i){
 	### most common cases
 	return $token;
     }
@@ -246,7 +246,7 @@ sub proc_token {
 	## number
 	return $token;
     }
-    if($token =~ /^(@|#)[A-Za-z0-9_\p{Cyrillic}\p{Greek}\p{Hebrew}\p{Han}\p{Arabic}]+.*$/){
+    if($token =~ /^(@|#)[A-Za-z0-9_\p{Cyrillic}\p{Greek}\p{Hebrew}\p{Han}\p{Arabic}\p{Devanagari}]+.*$/){
         ## twitter hashtag or address
         return proc_rightpunc($token);
     }
@@ -277,7 +277,7 @@ sub proc_token {
     }
 
     #my $t1 = '[\x{0600}-\x{06ff}a-z\d\_\.\-]';
-    my $t1 = '[a-z\d\_\-\.\p{Cyrillic}\p{Greek}\p{Hebrew}\p{Han}\p{Arabic}]';
+    my $t1 = '[a-z\d\_\-\.\p{Cyrillic}\p{Greek}\p{Hebrew}\p{Han}\p{Arabic}\p{Devanagari}]';
     if($token =~ /^\/(($t1)+\/)+($t1)+\/?$/i){
 	### /nls/p/....
 	return $token;
@@ -361,7 +361,7 @@ sub deep_proc_token {
     }
 
     ##### step 0: if it mades up of all puncts, remove one punct at a time.
-    if($line !~ /[\p{Cyrillic}\p{Greek}\p{Hebrew}\p{Han}\p{Arabic}a-zA-Z\d]/){
+    if($line !~ /[\p{Cyrillic}\p{Greek}\p{Hebrew}\p{Han}\p{Arabic}\p{Devanagari}a-zA-Z\d]/){
 	if($line =~ /^(\!+|\@+|\++|\=+|\*+|\<+|\>+|\|+|\?+|\.+|\-+|\_+|\&+)$/){
 	    ## ++ @@@@ !!! ....
 	    return $line;
-- 
cgit v1.2.3


From 0e46089cafa4e8e2f060e370d7afaceeda6b90a9 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@Chriss-MacBook-Air.local>
Date: Mon, 22 Apr 2013 22:50:14 -0400
Subject: support emission probabilities in class-based LMs

---
 decoder/ff_klm.cc | 49 ++++++++++++++++++++++++++++++-------------------
 decoder/ff_klm.h  |  5 +++--
 2 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/decoder/ff_klm.cc b/decoder/ff_klm.cc
index fefa90bd..c8ca917a 100644
--- a/decoder/ff_klm.cc
+++ b/decoder/ff_klm.cc
@@ -1,6 +1,7 @@
 #include "ff_klm.h"
 
 #include <cstring>
+#include <cstdlib>
 #include <iostream>
 
 #include <boost/scoped_ptr.hpp>
@@ -151,8 +152,9 @@ template <class Model> class BoundaryRuleScore {
 template <class Model>
 class KLanguageModelImpl {
  public:
-  double LookupWords(const TRule& rule, const vector<const void*>& ant_states, double* oovs, void* remnant) {
+  double LookupWords(const TRule& rule, const vector<const void*>& ant_states, double* oovs, double* emit, void* remnant) {
     *oovs = 0;
+    *emit = 0;
     const vector<WordID>& e = rule.e();
     BoundaryRuleScore<Model> ruleScore(*ngram_, *static_cast<BoundaryAnnotatedState*>(remnant));
     unsigned i = 0;
@@ -169,8 +171,9 @@ class KLanguageModelImpl {
       if (e[i] <= 0) {
         ruleScore.NonTerminal(*static_cast<const BoundaryAnnotatedState*>(ant_states[-e[i]]));
       } else {
-        const WordID cdec_word_or_class = ClassifyWordIfNecessary(e[i]);  // in future,
-                                                                          // maybe handle emission
+        float ep = 0.f;
+        const WordID cdec_word_or_class = ClassifyWordIfNecessary(e[i], &ep);
+        if (ep) { *emit += ep; }
         const lm::WordIndex cur_word = MapWord(cdec_word_or_class); // map to LM's id
         if (cur_word == 0) (*oovs) += 1.0;
         ruleScore.Terminal(cur_word);
@@ -205,12 +208,14 @@ class KLanguageModelImpl {
   // if this is not a class-based LM, returns w untransformed,
   // otherwise returns a word class mapping of w,
   // returns TD::Convert("<unk>") if there is no mapping for w
-  WordID ClassifyWordIfNecessary(WordID w) const {
+  WordID ClassifyWordIfNecessary(WordID w, float* emitp) const {
     if (word2class_map_.empty()) return w;
     if (w >= word2class_map_.size())
       return kCDEC_UNK;
-    else
-      return word2class_map_[w];
+    else {
+      *emitp = word2class_map_[w].second;
+      return word2class_map_[w].first;
+    }
   }
 
   // converts to cdec word id's to KenLM's id space, OOVs and <unk> end up at 0
@@ -256,32 +261,32 @@ class KLanguageModelImpl {
     int lc = 0;
     if (!SILENT)
       cerr << "  Loading word classes from " << file << " ...\n";
-    AddWordToClassMapping_(TD::Convert("<s>"), TD::Convert("<s>"));
-    AddWordToClassMapping_(TD::Convert("</s>"), TD::Convert("</s>"));
-    while(in) {
-      getline(in, line);
-      if (!in) continue;
+    AddWordToClassMapping_(TD::Convert("<s>"), TD::Convert("<s>"), 0.0);
+    AddWordToClassMapping_(TD::Convert("</s>"), TD::Convert("</s>"), 0.0);
+    while(getline(in, line)) {
       dummy.clear();
       TD::ConvertSentence(line, &dummy);
       ++lc;
-      if (dummy.size() != 2) {
+      if (dummy.size() != 3) {
+        cerr << "    Class map file expects: CLASS WORD logp(WORD|CLASS)\n";
         cerr << "    Format error in " << file << ", line " << lc << ": " << line << endl;
         abort();
       }
-      AddWordToClassMapping_(dummy[0], dummy[1]);
+      AddWordToClassMapping_(dummy[1], dummy[0], strtof(TD::Convert(dummy[2]).c_str(), NULL));
     }
   }
 
-  void AddWordToClassMapping_(WordID word, WordID cls) {
+  void AddWordToClassMapping_(WordID word, WordID cls, float emit) {
     if (word2class_map_.size() <= word) {
-      word2class_map_.resize((word + 10) * 1.1, kCDEC_UNK);
+      word2class_map_.resize((word + 10) * 1.1, pair<WordID,float>(kCDEC_UNK,0.f));
       assert(word2class_map_.size() > word);
     }
-    if(word2class_map_[word] != kCDEC_UNK) {
+    if(word2class_map_[word].first != kCDEC_UNK) {
       cerr << "Multiple classes for symbol " << TD::Convert(word) << endl;
       abort();
     }
-    word2class_map_[word] = cls;
+    word2class_map_[word].first = cls;
+    word2class_map_[word].second = emit;
   }
 
   ~KLanguageModelImpl() {
@@ -304,7 +309,9 @@ class KLanguageModelImpl {
 
   int order_;
   vector<lm::WordIndex> cdec2klm_map_;
-  vector<WordID> word2class_map_;        // if this is a class-based LM, this is the word->class mapping
+  vector<pair<WordID,float> > word2class_map_; // if this is a class-based LM,
+          // .first is the word->class mapping
+          // .second is the emission log probability
 };
 
 template <class Model>
@@ -322,6 +329,7 @@ KLanguageModel<Model>::KLanguageModel(const string& param) {
   }
   fid_ = FD::Convert(featname);
   oov_fid_ = FD::Convert(featname+"_OOV");
+  emit_fid_ = FD::Convert(featname+"_Emit");
   // cerr << "FID: " << oov_fid_ << endl;
   SetStateSize(pimpl_->ReserveStateSize());
 }
@@ -340,9 +348,12 @@ void KLanguageModel<Model>::TraversalFeaturesImpl(const SentenceMetadata& /* sme
                                           void* state) const {
   double est = 0;
   double oovs = 0;
-  features->set_value(fid_, pimpl_->LookupWords(*edge.rule_, ant_states, &oovs, state));
+  double emit = 0;
+  features->set_value(fid_, pimpl_->LookupWords(*edge.rule_, ant_states, &oovs, &emit, state));
   if (oovs && oov_fid_)
     features->set_value(oov_fid_, oovs);
+  if (emit && emit_fid_)
+    features->set_value(emit_fid_, emit);
 }
 
 template <class Model>
diff --git a/decoder/ff_klm.h b/decoder/ff_klm.h
index b5ceffd0..db4032f7 100644
--- a/decoder/ff_klm.h
+++ b/decoder/ff_klm.h
@@ -28,8 +28,9 @@ class KLanguageModel : public FeatureFunction {
                                      SparseVector<double>* estimated_features,
                                      void* out_context) const;
  private:
-  int fid_; // conceptually const; mutable only to simplify constructor
-  int oov_fid_; // will be zero if extra OOV feature is not configured by decoder
+  int fid_;        // LanguageModel
+  int oov_fid_;    // LanguageModel_OOV
+  int emit_fid_;   // LanguageModel_Emit [only used for class-based LMs]
   KLanguageModelImpl<Model>* pimpl_;
 };
 
-- 
cgit v1.2.3


From 4a8ab10734b202aa8744985820ed5eb68d2598e0 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>
Date: Tue, 23 Apr 2013 19:50:18 -0400
Subject: fix build

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index d2d25903..1f0f2eee 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,6 +2,7 @@ language: python
 python:
  - "2.7"
 before_script:
+ - sudo apt-get install libboost-filesystem1.48-dev
  - sudo apt-get install libboost-program-options1.48-dev
  - sudo apt-get install libboost-serialization1.48-dev
  - sudo apt-get install libboost-regex1.48-dev
-- 
cgit v1.2.3


From 9957c8a43354fe0a81b83659de965d6d0934adf8 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@Chriss-MacBook-Air.local>
Date: Tue, 23 Apr 2013 23:59:02 -0400
Subject: configure c++11 if available

---
 configure.ac                   |   1 +
 m4/ax_cxx_compile_stdcxx_11.m4 | 135 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 136 insertions(+)
 create mode 100644 m4/ax_cxx_compile_stdcxx_11.m4

diff --git a/configure.ac b/configure.ac
index eb09676e..8cbdb4fa 100644
--- a/configure.ac
+++ b/configure.ac
@@ -10,6 +10,7 @@ esac
 # CPPFLAGS="$CPPFLAGS -std=c++0x"
 AC_PROG_CC
 AC_PROG_CXX
+AX_CXX_COMPILE_STDCXX_11
 AC_LANG_CPLUSPLUS
 AC_OPENMP
 BOOST_REQUIRE([1.44])
diff --git a/m4/ax_cxx_compile_stdcxx_11.m4 b/m4/ax_cxx_compile_stdcxx_11.m4
new file mode 100644
index 00000000..1bc31128
--- /dev/null
+++ b/m4/ax_cxx_compile_stdcxx_11.m4
@@ -0,0 +1,135 @@
+# ============================================================================
+#  http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx_11.html
+# ============================================================================
+#
+# SYNOPSIS
+#
+#   AX_CXX_COMPILE_STDCXX_11([ext|noext],[mandatory|optional])
+#
+# DESCRIPTION
+#
+#   Check for baseline language coverage in the compiler for the C++11
+#   standard; if necessary, add switches to CXXFLAGS to enable support.
+#
+#   The first argument, if specified, indicates whether you insist on an
+#   extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g.
+#   -std=c++11).  If neither is specified, you get whatever works, with
+#   preference for an extended mode.
+#
+#   The second argument, if specified 'mandatory' or if left unspecified,
+#   indicates that baseline C++11 support is required and that the macro
+#   should error out if no mode with that support is found.  If specified
+#   'optional', then configuration proceeds regardless, after defining
+#   HAVE_CXX11 if and only if a supporting mode is found.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Benjamin Kosnik <bkoz@redhat.com>
+#   Copyright (c) 2012 Zack Weinberg <zackw@panix.com>
+#   Copyright (c) 2013 Roy Stogner <roystgnr@ices.utexas.edu>
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved. This file is offered as-is, without any
+#   warranty.
+
+#serial 3
+
+m4_define([_AX_CXX_COMPILE_STDCXX_11_testbody], [
+  template <typename T>
+    struct check
+    {
+      static_assert(sizeof(int) <= sizeof(T), "not big enough");
+    };
+
+    typedef check<check<bool>> right_angle_brackets;
+
+    int a;
+    decltype(a) b;
+
+    typedef check<int> check_type;
+    check_type c;
+    check_type&& cr = static_cast<check_type&&>(c);
+
+    auto d = a;
+])
+
+AC_DEFUN([AX_CXX_COMPILE_STDCXX_11], [dnl
+  m4_if([$1], [], [],
+        [$1], [ext], [],
+        [$1], [noext], [],
+        [m4_fatal([invalid argument `$1' to AX_CXX_COMPILE_STDCXX_11])])dnl
+  m4_if([$2], [], [ax_cxx_compile_cxx11_required=true],
+        [$2], [mandatory], [ax_cxx_compile_cxx11_required=true],
+        [$2], [optional], [ax_cxx_compile_cxx11_required=false],
+        [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX_11])])dnl
+  AC_LANG_PUSH([C++])dnl
+  ac_success=no
+  AC_CACHE_CHECK(whether $CXX supports C++11 features by default,
+  ax_cv_cxx_compile_cxx11,
+  [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])],
+    [ax_cv_cxx_compile_cxx11=yes],
+    [ax_cv_cxx_compile_cxx11=no])])
+  if test x$ax_cv_cxx_compile_cxx11 = xyes; then
+    ac_success=yes
+  fi
+
+  m4_if([$1], [noext], [], [dnl
+  if test x$ac_success = xno; then
+    for switch in -std=gnu++11 -std=gnu++0x; do
+      cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx11_$switch])
+      AC_CACHE_CHECK(whether $CXX supports C++11 features with $switch,
+                     $cachevar,
+        [ac_save_CXXFLAGS="$CXXFLAGS"
+         CXXFLAGS="$CXXFLAGS $switch"
+         AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])],
+          [eval $cachevar=yes],
+          [eval $cachevar=no])
+         CXXFLAGS="$ac_save_CXXFLAGS"])
+      if eval test x\$$cachevar = xyes; then
+        CXXFLAGS="$CXXFLAGS $switch"
+        ac_success=yes
+        break
+      fi
+    done
+  fi])
+
+  m4_if([$1], [ext], [], [dnl
+  if test x$ac_success = xno; then
+    for switch in -std=c++11 -std=c++0x; do
+      cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx11_$switch])
+      AC_CACHE_CHECK(whether $CXX supports C++11 features with $switch,
+                     $cachevar,
+        [ac_save_CXXFLAGS="$CXXFLAGS"
+         CXXFLAGS="$CXXFLAGS $switch"
+         AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])],
+          [eval $cachevar=yes],
+          [eval $cachevar=no])
+         CXXFLAGS="$ac_save_CXXFLAGS"])
+      if eval test x\$$cachevar = xyes; then
+        CXXFLAGS="$CXXFLAGS $switch"
+        ac_success=yes
+        break
+      fi
+    done
+  fi])
+  AC_LANG_POP([C++])
+  if test x$ax_cxx_compile_cxx11_required = xtrue; then
+    if test x$ac_success = xno; then
+      AC_MSG_ERROR([*** A compiler with support for C++11 language features is required.])
+    fi
+  else
+    if test x$ac_success = xno; then
+      HAVE_CXX11=0
+      AC_MSG_NOTICE([No compiler with C++11 support was found])
+    else
+      HAVE_CXX11=1
+      AC_DEFINE(HAVE_CXX11,1,
+                [define if the compiler supports basic C++11 syntax])
+    fi
+    AM_CONDITIONAL([HAVE_CXX11],[test "x$HAVE_CXX11" = "x1"])
+
+    AC_SUBST(HAVE_CXX11)
+  fi
+])
+
-- 
cgit v1.2.3


From 5aee54869aa19cfe9be965e67a472e94449d16da Mon Sep 17 00:00:00 2001
From: Kenneth Heafield <github@kheafield.com>
Date: Wed, 24 Apr 2013 10:12:41 +0100
Subject: KenLM 0831569c3137536165b107c6841603c725dfa2b1

---
 klm/lm/builder/corpus_count.cc      |  82 ++++++++++++++++++++---------
 klm/lm/builder/corpus_count.hh      |   5 ++
 klm/lm/builder/corpus_count_test.cc |   2 +-
 klm/lm/builder/lmplz_main.cc        |  17 +++++-
 klm/lm/builder/pipeline.cc          |   7 +--
 klm/lm/builder/pipeline.hh          |   9 ++--
 klm/lm/builder/print.cc             |  74 ++------------------------
 klm/lm/builder/print.hh             |   3 +-
 klm/lm/filter/filter_main.cc        |   4 +-
 klm/lm/kenlm_max_order_main.cc      |   6 ---
 klm/lm/query_main.cc                |   1 +
 klm/util/fake_ofstream.hh           |  94 +++++++++++++++++++++++++++++++++
 klm/util/file.cc                    |  37 +++++++++----
 klm/util/file_piece.cc              |  32 ++----------
 klm/util/file_piece.hh              |   5 +-
 klm/util/mmap.cc                    |  14 +++--
 klm/util/probing_hash_table.hh      |  92 +++++++++++++++++++++++++++++++--
 klm/util/probing_hash_table_test.cc |  52 +++++++++++++++++++
 klm/util/read_compressed.cc         | 100 ++++++++++++++++++++++++++----------
 klm/util/scoped.cc                  |  28 +++++++---
 klm/util/scoped.hh                  |   1 +
 klm/util/sized_iterator.hh          |   8 +++
 klm/util/usage.cc                   |  12 +++--
 23 files changed, 484 insertions(+), 201 deletions(-)
 delete mode 100644 klm/lm/kenlm_max_order_main.cc
 create mode 100644 klm/util/fake_ofstream.hh

diff --git a/klm/lm/builder/corpus_count.cc b/klm/lm/builder/corpus_count.cc
index abea4ed0..aea93ad1 100644
--- a/klm/lm/builder/corpus_count.cc
+++ b/klm/lm/builder/corpus_count.cc
@@ -3,6 +3,7 @@
 #include "lm/builder/ngram.hh"
 #include "lm/lm_exception.hh"
 #include "lm/word_index.hh"
+#include "util/fake_ofstream.hh"
 #include "util/file.hh"
 #include "util/file_piece.hh"
 #include "util/murmur_hash.hh"
@@ -23,39 +24,71 @@ namespace lm {
 namespace builder {
 namespace {
 
+#pragma pack(push)
+#pragma pack(4)
+struct VocabEntry {
+  typedef uint64_t Key;
+
+  uint64_t GetKey() const { return key; }
+  void SetKey(uint64_t to) { key = to; }
+
+  uint64_t key;
+  lm::WordIndex value;
+};
+#pragma pack(pop)
+
+const float kProbingMultiplier = 1.5;
+
 class VocabHandout {
   public:
-    explicit VocabHandout(int fd) {
-      util::scoped_fd duped(util::DupOrThrow(fd));
-      word_list_.reset(util::FDOpenOrThrow(duped));
-      
+    static std::size_t MemUsage(WordIndex initial_guess) {
+      if (initial_guess < 2) initial_guess = 2;
+      return util::CheckOverflow(Table::Size(initial_guess, kProbingMultiplier));
+    }
+
+    explicit VocabHandout(int fd, WordIndex initial_guess) :
+        table_backing_(util::CallocOrThrow(MemUsage(initial_guess))),
+        table_(table_backing_.get(), MemUsage(initial_guess)),
+        double_cutoff_(std::max<std::size_t>(initial_guess * 1.1, 1)),
+        word_list_(fd) {
       Lookup("<unk>"); // Force 0
       Lookup("<s>"); // Force 1
       Lookup("</s>"); // Force 2
     }
 
     WordIndex Lookup(const StringPiece &word) {
-      uint64_t hashed = util::MurmurHashNative(word.data(), word.size());
-      std::pair<Seen::iterator, bool> ret(seen_.insert(std::pair<uint64_t, lm::WordIndex>(hashed, seen_.size())));
-      if (ret.second) {
-        char null_delimit = 0;
-        util::WriteOrThrow(word_list_.get(), word.data(), word.size());
-        util::WriteOrThrow(word_list_.get(), &null_delimit, 1);
-        UTIL_THROW_IF(seen_.size() >= std::numeric_limits<lm::WordIndex>::max(), VocabLoadException, "Too many vocabulary words.  Change WordIndex to uint64_t in lm/word_index.hh.");
+      VocabEntry entry;
+      entry.key = util::MurmurHashNative(word.data(), word.size());
+      entry.value = table_.SizeNoSerialization();
+
+      Table::MutableIterator it;
+      if (table_.FindOrInsert(entry, it))
+        return it->value;
+      word_list_ << word << '\0';
+      UTIL_THROW_IF(Size() >= std::numeric_limits<lm::WordIndex>::max(), VocabLoadException, "Too many vocabulary words.  Change WordIndex to uint64_t in lm/word_index.hh.");
+      if (Size() >= double_cutoff_) {
+        table_backing_.call_realloc(table_.DoubleTo());
+        table_.Double(table_backing_.get());
+        double_cutoff_ *= 2;
       }
-      return ret.first->second;
+      return entry.value;
     }
 
     WordIndex Size() const {
-      return seen_.size();
+      return table_.SizeNoSerialization();
     }
 
   private:
-    typedef boost::unordered_map<uint64_t, lm::WordIndex> Seen;
+    // TODO: factor out a resizable probing hash table.
+    // TODO: use mremap on linux to get all zeros on resizes.
+    util::scoped_malloc table_backing_;
 
-    Seen seen_;
+    typedef util::ProbingHashTable<VocabEntry, util::IdentityHash> Table;
+    Table table_;
 
-    util::scoped_FILE word_list_;
+    std::size_t double_cutoff_;
+    
+    util::FakeOFStream word_list_;
 };
 
 class DedupeHash : public std::unary_function<const WordIndex *, bool> {
@@ -85,6 +118,7 @@ class DedupeEquals : public std::binary_function<const WordIndex *, const WordIn
 struct DedupeEntry {
   typedef WordIndex *Key;
   Key GetKey() const { return key; }
+  void SetKey(WordIndex *to) { key = to; }
   Key key;
   static DedupeEntry Construct(WordIndex *at) {
     DedupeEntry ret;
@@ -95,8 +129,6 @@ struct DedupeEntry {
 
 typedef util::ProbingHashTable<DedupeEntry, DedupeHash, DedupeEquals> Dedupe;
 
-const float kProbingMultiplier = 1.5;
-
 class Writer {
   public:
     Writer(std::size_t order, const util::stream::ChainPosition &position, void *dedupe_mem, std::size_t dedupe_mem_size) 
@@ -105,7 +137,7 @@ class Writer {
         dedupe_(dedupe_mem, dedupe_mem_size, &dedupe_invalid_[0], DedupeHash(order), DedupeEquals(order)),
         buffer_(new WordIndex[order - 1]),
         block_size_(position.GetChain().BlockSize()) {
-      dedupe_.Clear(DedupeEntry::Construct(&dedupe_invalid_[0]));
+      dedupe_.Clear();
       assert(Dedupe::Size(position.GetChain().BlockSize() / position.GetChain().EntrySize(), kProbingMultiplier) == dedupe_mem_size);
       if (order == 1) {
         // Add special words.  AdjustCounts is responsible if order != 1.    
@@ -149,7 +181,7 @@ class Writer {
       }
       // Block end.  Need to store the context in a temporary buffer.  
       std::copy(gram_.begin() + 1, gram_.end(), buffer_.get());
-      dedupe_.Clear(DedupeEntry::Construct(&dedupe_invalid_[0]));
+      dedupe_.Clear();
       block_->SetValidSize(block_size_);
       gram_.ReBase((++block_)->Get());
       std::copy(buffer_.get(), buffer_.get() + gram_.Order() - 1, gram_.begin());
@@ -187,18 +219,22 @@ float CorpusCount::DedupeMultiplier(std::size_t order) {
   return kProbingMultiplier * static_cast<float>(sizeof(DedupeEntry)) / static_cast<float>(NGram::TotalSize(order));
 }
 
+std::size_t CorpusCount::VocabUsage(std::size_t vocab_estimate) {
+  return VocabHandout::MemUsage(vocab_estimate);
+}
+
 CorpusCount::CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::size_t entries_per_block) 
   : from_(from), vocab_write_(vocab_write), token_count_(token_count), type_count_(type_count),
     dedupe_mem_size_(Dedupe::Size(entries_per_block, kProbingMultiplier)),
     dedupe_mem_(util::MallocOrThrow(dedupe_mem_size_)) {
-  token_count_ = 0;
-  type_count_ = 0;
 }
 
 void CorpusCount::Run(const util::stream::ChainPosition &position) {
   UTIL_TIMER("(%w s) Counted n-grams\n");
 
-  VocabHandout vocab(vocab_write_);
+  VocabHandout vocab(vocab_write_, type_count_);
+  token_count_ = 0;
+  type_count_ = 0;
   const WordIndex end_sentence = vocab.Lookup("</s>");
   Writer writer(NGram::OrderFromSize(position.GetChain().EntrySize()), position, dedupe_mem_.get(), dedupe_mem_size_);
   uint64_t count = 0;
diff --git a/klm/lm/builder/corpus_count.hh b/klm/lm/builder/corpus_count.hh
index e255bad1..aa0ed8ed 100644
--- a/klm/lm/builder/corpus_count.hh
+++ b/klm/lm/builder/corpus_count.hh
@@ -23,6 +23,11 @@ class CorpusCount {
     // Memory usage will be DedupeMultipler(order) * block_size + total_chain_size + unknown vocab_hash_size
     static float DedupeMultiplier(std::size_t order);
 
+    // How much memory vocabulary will use based on estimated size of the vocab.
+    static std::size_t VocabUsage(std::size_t vocab_estimate);
+
+    // token_count: out.
+    // type_count aka vocabulary size.  Initialize to an estimate.  It is set to the exact value.
     CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::size_t entries_per_block);
 
     void Run(const util::stream::ChainPosition &position);
diff --git a/klm/lm/builder/corpus_count_test.cc b/klm/lm/builder/corpus_count_test.cc
index 8d53ca9d..6d325ef5 100644
--- a/klm/lm/builder/corpus_count_test.cc
+++ b/klm/lm/builder/corpus_count_test.cc
@@ -44,7 +44,7 @@ BOOST_AUTO_TEST_CASE(Short) {
   util::stream::Chain chain(config);
   NGramStream stream;
   uint64_t token_count;
-  WordIndex type_count;
+  WordIndex type_count = 10;
   CorpusCount counter(input_piece, vocab.get(), token_count, type_count, chain.BlockSize() / chain.EntrySize());
   chain >> boost::ref(counter) >> stream >> util::stream::kRecycle;
 
diff --git a/klm/lm/builder/lmplz_main.cc b/klm/lm/builder/lmplz_main.cc
index 90b9dca2..1e086dcc 100644
--- a/klm/lm/builder/lmplz_main.cc
+++ b/klm/lm/builder/lmplz_main.cc
@@ -6,6 +6,7 @@
 #include <iostream>
 
 #include <boost/program_options.hpp>
+#include <boost/version.hpp>
 
 namespace {
 class SizeNotify {
@@ -33,13 +34,17 @@ int main(int argc, char *argv[]) {
     lm::builder::PipelineConfig pipeline;
 
     options.add_options()
-      ("order,o", po::value<std::size_t>(&pipeline.order)->required(), "Order of the model")
+      ("order,o", po::value<std::size_t>(&pipeline.order)
+#if BOOST_VERSION >= 104200
+         ->required()
+#endif
+         , "Order of the model")
       ("interpolate_unigrams", po::bool_switch(&pipeline.initial_probs.interpolate_unigrams), "Interpolate the unigrams (default: emulate SRILM by not interpolating)")
       ("temp_prefix,T", po::value<std::string>(&pipeline.sort.temp_prefix)->default_value("/tmp/lm"), "Temporary file prefix")
       ("memory,S", SizeOption(pipeline.sort.total_memory, util::GuessPhysicalMemory() ? "80%" : "1G"), "Sorting memory")
-      ("vocab_memory", SizeOption(pipeline.assume_vocab_hash_size, "50M"), "Assume that the vocabulary hash table will use this much memory for purposes of calculating total memory in the count step")
       ("minimum_block", SizeOption(pipeline.minimum_block, "8K"), "Minimum block size to allow")
       ("sort_block", SizeOption(pipeline.sort.buffer_size, "64M"), "Size of IO operations for sort (determines arity)")
+      ("vocab_estimate", po::value<lm::WordIndex>(&pipeline.vocab_estimate)->default_value(1000000), "Assume this vocabulary size for purposes of calculating memory in step 1 (corpus count) and pre-sizing the hash table")
       ("block_count", po::value<std::size_t>(&pipeline.block_count)->default_value(2), "Block count (per order)")
       ("vocab_file", po::value<std::string>(&pipeline.vocab_file)->default_value(""), "Location to write vocabulary file")
       ("verbose_header", po::bool_switch(&pipeline.verbose_header), "Add a verbose header to the ARPA file that includes information such as token count, smoothing type, etc.");
@@ -68,6 +73,14 @@ int main(int argc, char *argv[]) {
     po::store(po::parse_command_line(argc, argv, options), vm);
     po::notify(vm);
 
+    // required() appeared in Boost 1.42.0.
+#if BOOST_VERSION < 104200
+    if (!vm.count("order")) {
+      std::cerr << "the option '--order' is required but missing" << std::endl;
+      return 1;
+    }
+#endif
+
     util::NormalizeTempPrefix(pipeline.sort.temp_prefix);
 
     lm::builder::InitialProbabilitiesConfig &initial = pipeline.initial_probs;
diff --git a/klm/lm/builder/pipeline.cc b/klm/lm/builder/pipeline.cc
index 14a1f721..b89ea6ba 100644
--- a/klm/lm/builder/pipeline.cc
+++ b/klm/lm/builder/pipeline.cc
@@ -207,17 +207,18 @@ void CountText(int text_file /* input */, int vocab_file /* output */, Master &m
   const PipelineConfig &config = master.Config();
   std::cerr << "=== 1/5 Counting and sorting n-grams ===" << std::endl;
 
-  UTIL_THROW_IF(config.TotalMemory() < config.assume_vocab_hash_size, util::Exception, "Vocab hash size estimate " << config.assume_vocab_hash_size << " exceeds total memory " << config.TotalMemory());
+  const std::size_t vocab_usage = CorpusCount::VocabUsage(config.vocab_estimate);
+  UTIL_THROW_IF(config.TotalMemory() < vocab_usage, util::Exception, "Vocab hash size estimate " << vocab_usage << " exceeds total memory " << config.TotalMemory());
   std::size_t memory_for_chain = 
     // This much memory to work with after vocab hash table.
-    static_cast<float>(config.TotalMemory() - config.assume_vocab_hash_size) /
+    static_cast<float>(config.TotalMemory() - vocab_usage) /
     // Solve for block size including the dedupe multiplier for one block.
     (static_cast<float>(config.block_count) + CorpusCount::DedupeMultiplier(config.order)) *
     // Chain likes memory expressed in terms of total memory.
     static_cast<float>(config.block_count);
   util::stream::Chain chain(util::stream::ChainConfig(NGram::TotalSize(config.order), config.block_count, memory_for_chain));
 
-  WordIndex type_count;
+  WordIndex type_count = config.vocab_estimate;
   util::FilePiece text(text_file, NULL, &std::cerr);
   text_file_name = text.FileName();
   CorpusCount counter(text, vocab_file, token_count, type_count, chain.BlockSize() / chain.EntrySize());
diff --git a/klm/lm/builder/pipeline.hh b/klm/lm/builder/pipeline.hh
index f1d6c5f6..845e5481 100644
--- a/klm/lm/builder/pipeline.hh
+++ b/klm/lm/builder/pipeline.hh
@@ -3,6 +3,7 @@
 
 #include "lm/builder/initial_probabilities.hh"
 #include "lm/builder/header_info.hh"
+#include "lm/word_index.hh"
 #include "util/stream/config.hh"
 #include "util/file_piece.hh"
 
@@ -19,9 +20,9 @@ struct PipelineConfig {
   util::stream::ChainConfig read_backoffs;
   bool verbose_header;
 
-  // Amount of memory to assume that the vocabulary hash table will use.  This
-  // is subtracted from total memory for CorpusCount.
-  std::size_t assume_vocab_hash_size;
+  // Estimated vocabulary size.  Used for sizing CorpusCount memory and
+  // initial probing hash table sizing, also in CorpusCount.
+  lm::WordIndex vocab_estimate;
 
   // Minimum block size to tolerate.
   std::size_t minimum_block;
@@ -33,7 +34,7 @@ struct PipelineConfig {
   std::size_t TotalMemory() const { return sort.total_memory; }
 };
 
-// Takes ownership of text_file.
+// Takes ownership of text_file and out_arpa.
 void Pipeline(PipelineConfig config, int text_file, int out_arpa);
 
 }} // namespaces
diff --git a/klm/lm/builder/print.cc b/klm/lm/builder/print.cc
index b0323221..84bd81ca 100644
--- a/klm/lm/builder/print.cc
+++ b/klm/lm/builder/print.cc
@@ -1,15 +1,11 @@
 #include "lm/builder/print.hh"
 
-#include "util/double-conversion/double-conversion.h"
-#include "util/double-conversion/utils.h"
+#include "util/fake_ofstream.hh"
 #include "util/file.hh"
 #include "util/mmap.hh"
 #include "util/scoped.hh"
 #include "util/stream/timer.hh"
 
-#define BOOST_LEXICAL_CAST_ASSUME_C_LOCALE
-#include <boost/lexical_cast.hpp>
-
 #include <sstream>
 
 #include <string.h>
@@ -28,71 +24,6 @@ VocabReconstitute::VocabReconstitute(int fd) {
   map_.push_back(i);
 }
 
-namespace {
-class OutputManager {
-  public:
-    static const std::size_t kOutBuf = 1048576;
-
-    // Does not take ownership of out.
-    explicit OutputManager(int out)
-      : buf_(util::MallocOrThrow(kOutBuf)),
-        builder_(static_cast<char*>(buf_.get()), kOutBuf),
-        // Mostly the default but with inf instead.  And no flags.
-        convert_(double_conversion::DoubleToStringConverter::NO_FLAGS, "inf", "NaN", 'e', -6, 21, 6, 0),
-        fd_(out) {}
-
-    ~OutputManager() {
-      Flush();
-    }
-
-    OutputManager &operator<<(float value) {
-      // Odd, but this is the largest number found in the comments.
-      EnsureRemaining(double_conversion::DoubleToStringConverter::kMaxPrecisionDigits + 8);
-      convert_.ToShortestSingle(value, &builder_);
-      return *this;
-    }
-
-    OutputManager &operator<<(StringPiece str) {
-      if (str.size() > kOutBuf) {
-        Flush();
-        util::WriteOrThrow(fd_, str.data(), str.size());
-      } else {
-        EnsureRemaining(str.size());
-        builder_.AddSubstring(str.data(), str.size());
-      }
-      return *this;
-    }
-
-    // Inefficient!
-    OutputManager &operator<<(unsigned val) {
-      return *this << boost::lexical_cast<std::string>(val);
-    }
-
-    OutputManager &operator<<(char c) {
-      EnsureRemaining(1);
-      builder_.AddCharacter(c);
-      return *this;
-    }
-
-    void Flush() {
-      util::WriteOrThrow(fd_, buf_.get(), builder_.position());
-      builder_.Reset();
-    }
-
-  private:
-    void EnsureRemaining(std::size_t amount) {
-      if (static_cast<std::size_t>(builder_.size() - builder_.position()) < amount) {
-        Flush();
-      }
-    }
-
-    util::scoped_malloc buf_;
-    double_conversion::StringBuilder builder_;
-    double_conversion::DoubleToStringConverter convert_;
-    int fd_;
-};
-} // namespace
-
 PrintARPA::PrintARPA(const VocabReconstitute &vocab, const std::vector<uint64_t> &counts, const HeaderInfo* header_info, int out_fd) 
   : vocab_(vocab), out_fd_(out_fd) {
   std::stringstream stream;
@@ -112,8 +43,9 @@ PrintARPA::PrintARPA(const VocabReconstitute &vocab, const std::vector<uint64_t>
 }
 
 void PrintARPA::Run(const ChainPositions &positions) {
+  util::scoped_fd closer(out_fd_);
   UTIL_TIMER("(%w s) Wrote ARPA file\n");
-  OutputManager out(out_fd_);
+  util::FakeOFStream out(out_fd_);
   for (unsigned order = 1; order <= positions.size(); ++order) {
     out << "\\" << order << "-grams:" << '\n';
     for (NGramStream stream(positions[order - 1]); stream; ++stream) {
diff --git a/klm/lm/builder/print.hh b/klm/lm/builder/print.hh
index aa932e75..adbbb94a 100644
--- a/klm/lm/builder/print.hh
+++ b/klm/lm/builder/print.hh
@@ -88,7 +88,8 @@ template <class V> class Print {
 
 class PrintARPA {
   public:
-    // header_info may be NULL to disable the header
+    // header_info may be NULL to disable the header.
+    // Takes ownership of out_fd upon Run().
     explicit PrintARPA(const VocabReconstitute &vocab, const std::vector<uint64_t> &counts, const HeaderInfo* header_info, int out_fd);
 
     void Run(const ChainPositions &positions);
diff --git a/klm/lm/filter/filter_main.cc b/klm/lm/filter/filter_main.cc
index 1a4ba84f..1736bc40 100644
--- a/klm/lm/filter/filter_main.cc
+++ b/klm/lm/filter/filter_main.cc
@@ -25,8 +25,8 @@ void DisplayHelp(const char *name) {
     "    parser.\n"
     "single mode treats the entire input as a single sentence.\n"
     "multiple mode filters to multiple sentences in parallel.  Each sentence is on\n"
-    "    a separate line.  A separate file is created for each file by appending the\n"
-    "    0-indexed line number to the output file name.\n"
+    "    a separate line.  A separate file is created for each sentence by appending\n"
+    "    the 0-indexed line number to the output file name.\n"
     "union mode produces one filtered model that is the union of models created by\n"
     "    multiple mode.\n\n"
     "context means only the context (all but last word) has to pass the filter, but\n"
diff --git a/klm/lm/kenlm_max_order_main.cc b/klm/lm/kenlm_max_order_main.cc
deleted file mode 100644
index 94221201..00000000
--- a/klm/lm/kenlm_max_order_main.cc
+++ /dev/null
@@ -1,6 +0,0 @@
-#include "lm/max_order.hh"
-#include <iostream>
-
-int main(int argc, char *argv[]) {
-  std::cerr << "KenLM was compiled with a maximum supported n-gram order set to " << KENLM_MAX_ORDER << "." << std::endl;
-}
diff --git a/klm/lm/query_main.cc b/klm/lm/query_main.cc
index 49757d9a..27d3a1a5 100644
--- a/klm/lm/query_main.cc
+++ b/klm/lm/query_main.cc
@@ -2,6 +2,7 @@
 
 int main(int argc, char *argv[]) {
   if (!(argc == 2 || (argc == 3 && !strcmp(argv[2], "null")))) {
+    std::cerr << "KenLM was compiled with maximum order " << KENLM_MAX_ORDER << "." << std::endl;
     std::cerr << "Usage: " << argv[0] << " lm_file [null]" << std::endl;
     std::cerr << "Input is wrapped in <s> and </s> unless null is passed." << std::endl;
     return 1;
diff --git a/klm/util/fake_ofstream.hh b/klm/util/fake_ofstream.hh
new file mode 100644
index 00000000..bcdebe45
--- /dev/null
+++ b/klm/util/fake_ofstream.hh
@@ -0,0 +1,94 @@
+/* Like std::ofstream but without being incredibly slow.  Backed by a raw fd.
+ * Does not support many data types.  Currently, it's targeted at writing ARPA
+ * files quickly.
+ */
+#include "util/double-conversion/double-conversion.h"
+#include "util/double-conversion/utils.h"
+#include "util/file.hh"
+#include "util/scoped.hh"
+#include "util/string_piece.hh"
+
+#define BOOST_LEXICAL_CAST_ASSUME_C_LOCALE
+#include <boost/lexical_cast.hpp>
+
+namespace util {
+class FakeOFStream {
+  public:
+    static const std::size_t kOutBuf = 1048576;
+
+    // Does not take ownership of out.
+    explicit FakeOFStream(int out)
+      : buf_(util::MallocOrThrow(kOutBuf)),
+        builder_(static_cast<char*>(buf_.get()), kOutBuf),
+        // Mostly the default but with inf instead.  And no flags.
+        convert_(double_conversion::DoubleToStringConverter::NO_FLAGS, "inf", "NaN", 'e', -6, 21, 6, 0),
+        fd_(out) {}
+
+    ~FakeOFStream() {
+      if (buf_.get()) Flush();
+    }
+
+    FakeOFStream &operator<<(float value) {
+      // Odd, but this is the largest number found in the comments.
+      EnsureRemaining(double_conversion::DoubleToStringConverter::kMaxPrecisionDigits + 8);
+      convert_.ToShortestSingle(value, &builder_);
+      return *this;
+    }
+
+    FakeOFStream &operator<<(double value) {
+      EnsureRemaining(double_conversion::DoubleToStringConverter::kMaxPrecisionDigits + 8);
+      convert_.ToShortest(value, &builder_);
+      return *this;
+    }
+
+    FakeOFStream &operator<<(StringPiece str) {
+      if (str.size() > kOutBuf) {
+        Flush();
+        util::WriteOrThrow(fd_, str.data(), str.size());
+      } else {
+        EnsureRemaining(str.size());
+        builder_.AddSubstring(str.data(), str.size());
+      }
+      return *this;
+    }
+
+    // Inefficient!  TODO: more efficient implementation
+    FakeOFStream &operator<<(unsigned value) {
+      return *this << boost::lexical_cast<std::string>(value);
+    }
+
+    FakeOFStream &operator<<(char c) {
+      EnsureRemaining(1);
+      builder_.AddCharacter(c);
+      return *this;
+    }
+
+    // Note this does not sync.
+    void Flush() {
+      util::WriteOrThrow(fd_, buf_.get(), builder_.position());
+      builder_.Reset();
+    }
+
+    // Not necessary, but does assure the data is cleared.
+    void Finish() {
+      Flush();
+      // It will segfault trying to null terminate otherwise.
+      builder_.Finalize();
+      buf_.reset();
+      util::FSyncOrThrow(fd_);
+    }
+
+  private:
+    void EnsureRemaining(std::size_t amount) {
+      if (static_cast<std::size_t>(builder_.size() - builder_.position()) <= amount) {
+        Flush();
+      }
+    }
+
+    util::scoped_malloc buf_;
+    double_conversion::StringBuilder builder_;
+    double_conversion::DoubleToStringConverter convert_;
+    int fd_;
+};
+
+} // namespace
diff --git a/klm/util/file.cc b/klm/util/file.cc
index 86d9b12d..c7d8e23b 100644
--- a/klm/util/file.cc
+++ b/klm/util/file.cc
@@ -111,15 +111,26 @@ void ResizeOrThrow(int fd, uint64_t to) {
   UTIL_THROW_IF_ARG(ret, FDException, (fd), "while resizing to " << to << " bytes");
 }
 
+namespace {
+std::size_t GuardLarge(std::size_t size) {
+  // The following operating systems have broken read/write/pread/pwrite that
+  // only supports up to 2^31.
+#if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__) || defined(OS_ANDROID)
+  return std::min(static_cast<std::size_t>(INT_MAX), size);
+#else
+  return size;
+#endif
+}
+}
+
 std::size_t PartialRead(int fd, void *to, std::size_t amount) {
 #if defined(_WIN32) || defined(_WIN64)
-  amount = min(static_cast<std::size_t>(INT_MAX), amount);
-  int ret = _read(fd, to, amount); 
+  int ret = _read(fd, to, GuardLarge(amount));
 #else
   errno = 0;
   ssize_t ret;
   do {
-    ret = read(fd, to, amount);
+    ret = read(fd, to, GuardLarge(amount));
   } while (ret == -1 && errno == EINTR);
 #endif
   UTIL_THROW_IF_ARG(ret < 0, FDException, (fd), "while reading " << amount << " bytes");
@@ -169,11 +180,13 @@ void PReadOrThrow(int fd, void *to_void, std::size_t size, uint64_t off) {
     ssize_t ret;
     errno = 0;
     do {
+      ret =
 #ifdef OS_ANDROID
-      ret = pread64(fd, to, size, off);
+        pread64
 #else
-      ret = pread(fd, to, size, off);
+        pread
 #endif
+        (fd, to, GuardLarge(size), off);
     } while (ret == -1 && errno == EINTR);
     if (ret <= 0) {
       UTIL_THROW_IF(ret == 0, EndOfFileException, " for reading " << size << " bytes at " << off << " from " << NameFromFD(fd));
@@ -190,14 +203,20 @@ void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
   const uint8_t *data = static_cast<const uint8_t*>(data_void);
   while (size) {
 #if defined(_WIN32) || defined(_WIN64)
-    int ret = write(fd, data, min(static_cast<std::size_t>(INT_MAX), size));
+    int ret;
 #else
-    errno = 0;
     ssize_t ret;
+#endif
+    errno = 0;
     do {
-      ret = write(fd, data, size);
-    } while (ret == -1 && errno == EINTR);
+      ret = 
+#if defined(_WIN32) || defined(_WIN64)
+        _write
+#else
+        write
 #endif
+        (fd, data, GuardLarge(size));
+    } while (ret == -1 && errno == EINTR);
     UTIL_THROW_IF_ARG(ret < 1, FDException, (fd), "while writing " << size << " bytes");
     data += ret;
     size -= ret;
diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc
index 9de30fc4..b5961bea 100644
--- a/klm/util/file_piece.cc
+++ b/klm/util/file_piece.cc
@@ -51,7 +51,7 @@ FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std:
 
 FilePiece::FilePiece(std::istream &stream, const char *name, std::size_t min_buffer) :
   total_size_(kBadSize), page_(SizePage()) {
-  InitializeNoRead(name ? name : "istream", min_buffer);
+  InitializeNoRead("istream", min_buffer);
 
   fallback_to_read_ = true;
   data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED);
@@ -95,32 +95,6 @@ unsigned long int FilePiece::ReadULong() {
   return ReadNumber<unsigned long int>();
 }
 
-std::size_t FilePiece::Raw(void *to, std::size_t limit) {
-  if (!limit) return 0;
-  std::size_t in_buf = static_cast<std::size_t>(position_end_ - position_);
-  if (in_buf) {
-    std::size_t amount = std::min(in_buf, limit);
-    memcpy(to, position_, amount);
-    position_ += amount;
-    return amount;
-  }
-
-  std::size_t read_return;
-  if (fallback_to_read_) {
-    read_return = fell_back_.Read(to, limit);
-    progress_.Set(fell_back_.RawAmount());
-  } else {
-    uint64_t desired_begin = mapped_offset_ + static_cast<uint64_t>(position_ - data_.begin());
-    SeekOrThrow(file_.get(), desired_begin);
-    read_return = ReadOrEOF(file_.get(), to, limit);
-    // Good thing we never rewind.  This makes desired_begin calculate the right way the next time.
-    mapped_offset_ += static_cast<uint64_t>(read_return);
-    progress_ += read_return;
-  }
-  at_end_ |= (read_return == 0);
-  return read_return;
-}
-
 // Factored out so that istream can call this.
 void FilePiece::InitializeNoRead(const char *name, std::size_t min_buffer) {
   file_name_ = name;
@@ -146,7 +120,7 @@ void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::s
   }
   Shift();
   // gzip detect.
-  if ((position_end_ - position_) >= ReadCompressed::kMagicSize && ReadCompressed::DetectCompressedMagic(position_)) {
+  if ((position_end_ >= position_ + ReadCompressed::kMagicSize) && ReadCompressed::DetectCompressedMagic(position_)) {
     if (!fallback_to_read_) {
       at_end_ = false;
       TransitionToRead();
@@ -244,7 +218,7 @@ void FilePiece::MMapShift(uint64_t desired_begin) {
   // Use mmap.  
   uint64_t ignore = desired_begin % page_;
   // Duplicate request for Shift means give more data.  
-  if (position_ == data_.begin() + ignore) {
+  if (position_ == data_.begin() + ignore && position_) {
     default_map_size_ *= 2;
   }
   // Local version so that in case of failure it doesn't overwrite the class variable.  
diff --git a/klm/util/file_piece.hh b/klm/util/file_piece.hh
index 1b110287..c07c6011 100644
--- a/klm/util/file_piece.hh
+++ b/klm/util/file_piece.hh
@@ -64,10 +64,7 @@ class FilePiece {
     long int ReadLong();
     unsigned long int ReadULong();
 
-    // Fake read() function.  Reads up to limit bytes, returning the amount read.  Returns 0 on EOF || limit == 0. 
-    std::size_t Raw(void *to, std::size_t limit);
-
-    // Skip spaces defined by being in delim.
+    // Skip spaces defined by isspace.
     void SkipSpaces(const bool *delim = kSpaces) {
       for (; ; ++position_) {
         if (position_ == position_end_) Shift();
diff --git a/klm/util/mmap.cc b/klm/util/mmap.cc
index bc9e3f81..6f79f26f 100644
--- a/klm/util/mmap.cc
+++ b/klm/util/mmap.cc
@@ -6,6 +6,7 @@
 
 #include "util/exception.hh"
 #include "util/file.hh"
+#include "util/scoped.hh"
 
 #include <iostream>
 
@@ -110,8 +111,14 @@ void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int
   UTIL_THROW_IF(!ret, ErrnoException, "MapViewOfFile failed");
 #else
   int protect = for_write ? (PROT_READ | PROT_WRITE) : PROT_READ;
-  void *ret = mmap(NULL, size, protect, flags, fd, offset);
-  UTIL_THROW_IF(ret == MAP_FAILED, ErrnoException, "mmap failed for size " << size << " at offset " << offset);
+  void *ret;
+  UTIL_THROW_IF((ret = mmap(NULL, size, protect, flags, fd, offset)) == MAP_FAILED, ErrnoException, "mmap failed for size " << size << " at offset " << offset);
+#  ifdef MADV_HUGEPAGE
+  /* We like huge pages but it's fine if we can't have them.  Note that huge
+   * pages are not supported for file-backed mmap on linux.
+   */
+  madvise(ret, size, MADV_HUGEPAGE);
+#  endif
 #endif
   return ret;
 }
@@ -141,8 +148,7 @@ void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scope
     case POPULATE_OR_READ:
 #endif
     case READ:
-      out.reset(malloc(size), size, scoped_memory::MALLOC_ALLOCATED);
-      if (!out.get()) UTIL_THROW(util::ErrnoException, "Allocating " << size << " bytes with malloc");
+      out.reset(MallocOrThrow(size), size, scoped_memory::MALLOC_ALLOCATED);
       SeekOrThrow(fd, offset);
       ReadOrThrow(fd, out.get(), size);
       break;
diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh
index 6780489d..57866ff9 100644
--- a/klm/util/probing_hash_table.hh
+++ b/klm/util/probing_hash_table.hh
@@ -6,6 +6,7 @@
 #include <algorithm>
 #include <cstddef>
 #include <functional>
+#include <vector>
 
 #include <assert.h>
 #include <stdint.h>
@@ -73,10 +74,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
       assert(initialized_);
 #endif
       UTIL_THROW_IF(++entries_ >= buckets_, ProbingSizeException, "Hash table with " << buckets_ << " buckets is full.");
-      for (MutableIterator i(begin_ + (hash_(t.GetKey()) % buckets_));;) {
-        if (equal_(i->GetKey(), invalid_)) { *i = t; return i; }
-        if (++i == end_) { i = begin_; }
-      }
+      return UncheckedInsert(t);
     }
 
     // Return true if the value was found (and not inserted).  This is consistent with Find but the opposite if hash_map!
@@ -126,12 +124,96 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
       }    
     }
 
-    void Clear(Entry invalid) {
+    void Clear() {
+      Entry invalid;
+      invalid.SetKey(invalid_);
       std::fill(begin_, end_, invalid);
       entries_ = 0;
     }
 
+    // Return number of entries assuming no serialization went on.
+    std::size_t SizeNoSerialization() const {
+      return entries_;
+    }
+
+    // Return memory size expected by Double.
+    std::size_t DoubleTo() const {
+      return buckets_ * 2 * sizeof(Entry);
+    }
+
+    // Inform the table that it has double the amount of memory.
+    // Pass clear_new = false if you are sure the new memory is initialized
+    // properly (to invalid_) i.e. by mremap.
+    void Double(void *new_base, bool clear_new = true) {
+      begin_ = static_cast<MutableIterator>(new_base);
+      MutableIterator old_end = begin_ + buckets_;
+      buckets_ *= 2;
+      end_ = begin_ + buckets_;
+      if (clear_new) {
+        Entry invalid;
+        invalid.SetKey(invalid_);
+        std::fill(old_end, end_, invalid);
+      }
+      std::vector<Entry> rolled_over;
+      // Move roll-over entries to a buffer because they might not roll over anymore.  This should be small.
+      for (MutableIterator i = begin_; i != old_end && !equal_(i->GetKey(), invalid_); ++i) {
+        rolled_over.push_back(*i);
+        i->SetKey(invalid_);
+      }
+      /* Re-insert everything.  Entries might go backwards to take over a
+       * recently opened gap, stay, move to new territory, or wrap around.   If
+       * an entry wraps around, it might go to a pointer greater than i (which
+       * can happen at the beginning) and it will be revisited to possibly fill
+       * in a gap created later.
+       */
+      Entry temp;
+      for (MutableIterator i = begin_; i != old_end; ++i) {
+        if (!equal_(i->GetKey(), invalid_)) {
+          temp = *i;
+          i->SetKey(invalid_);
+          UncheckedInsert(temp);
+        }
+      }
+      // Put the roll-over entries back in.
+      for (typename std::vector<Entry>::const_iterator i(rolled_over.begin()); i != rolled_over.end(); ++i) {
+        UncheckedInsert(*i);
+      }
+    }
+
+    // Mostly for tests, check consistency of every entry.
+    void CheckConsistency() {
+      MutableIterator last;
+      for (last = end_ - 1; last >= begin_ && !equal_(last->GetKey(), invalid_); --last) {}
+      UTIL_THROW_IF(last == begin_, ProbingSizeException, "Completely full");
+      MutableIterator i;
+      // Beginning can be wrap-arounds.
+      for (i = begin_; !equal_(i->GetKey(), invalid_); ++i) {
+        MutableIterator ideal = Ideal(*i);
+        UTIL_THROW_IF(ideal > i && ideal <= last, Exception, "Inconsistency at position " << (i - begin_) << " should be at " << (ideal - begin_));
+      }
+      MutableIterator pre_gap = i;
+      for (; i != end_; ++i) {
+        if (equal_(i->GetKey(), invalid_)) {
+          pre_gap = i;
+          continue;
+        }
+        MutableIterator ideal = Ideal(*i);
+        UTIL_THROW_IF(ideal > i || ideal <= pre_gap, Exception, "Inconsistency at position " << (i - begin_) << " with ideal " << (ideal - begin_));
+      }
+    }
+
   private:
+    template <class T> MutableIterator Ideal(const T &t) {
+      return begin_ + (hash_(t.GetKey()) % buckets_);
+    }
+
+    template <class T> MutableIterator UncheckedInsert(const T &t) {
+      for (MutableIterator i(Ideal(t));;) {
+        if (equal_(i->GetKey(), invalid_)) { *i = t; return i; }
+        if (++i == end_) { i = begin_; }
+      }
+    }
+
     MutableIterator begin_;
     std::size_t buckets_;
     MutableIterator end_;
diff --git a/klm/util/probing_hash_table_test.cc b/klm/util/probing_hash_table_test.cc
index be0fa859..9f7948ce 100644
--- a/klm/util/probing_hash_table_test.cc
+++ b/klm/util/probing_hash_table_test.cc
@@ -1,10 +1,14 @@
 #include "util/probing_hash_table.hh"
 
+#include "util/murmur_hash.hh"
+#include "util/scoped.hh"
+
 #define BOOST_TEST_MODULE ProbingHashTableTest
 #include <boost/test/unit_test.hpp>
 #include <boost/scoped_array.hpp>
 #include <boost/functional/hash.hpp>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <stdint.h>
 
@@ -19,6 +23,10 @@ struct Entry {
     return key;
   }
 
+  void SetKey(unsigned char to) {
+    key = to;
+  }
+
   uint64_t GetValue() const {
     return value;
   }
@@ -46,5 +54,49 @@ BOOST_AUTO_TEST_CASE(simple) {
   BOOST_CHECK(!table.Find(2, i));
 }
 
+struct Entry64 {
+  uint64_t key;
+  typedef uint64_t Key;
+
+  Entry64() {}
+
+  explicit Entry64(uint64_t key_in) {
+    key = key_in;
+  }
+
+  Key GetKey() const { return key; }
+  void SetKey(uint64_t to) { key = to; }
+};
+
+struct MurmurHashEntry64 {
+  std::size_t operator()(uint64_t value) const {
+    return util::MurmurHash64A(&value, 8);
+  }
+};
+
+typedef ProbingHashTable<Entry64, MurmurHashEntry64> Table64;
+
+BOOST_AUTO_TEST_CASE(Double) {
+  for (std::size_t initial = 19; initial < 30; ++initial) {
+    size_t size = Table64::Size(initial, 1.2);
+    scoped_malloc mem(MallocOrThrow(size));
+    Table64 table(mem.get(), size, std::numeric_limits<uint64_t>::max());
+    table.Clear();
+    for (uint64_t i = 0; i < 19; ++i) {
+      table.Insert(Entry64(i));
+    }
+    table.CheckConsistency();
+    mem.call_realloc(table.DoubleTo());
+    table.Double(mem.get());
+    table.CheckConsistency();
+    for (uint64_t i = 20; i < 40 ; ++i) {
+      table.Insert(Entry64(i));
+    }
+    mem.call_realloc(table.DoubleTo());
+    table.Double(mem.get());
+    table.CheckConsistency();
+  }
+}
+
 } // namespace
 } // namespace util
diff --git a/klm/util/read_compressed.cc b/klm/util/read_compressed.cc
index b81549e4..b62a6e83 100644
--- a/klm/util/read_compressed.cc
+++ b/klm/util/read_compressed.cc
@@ -180,12 +180,73 @@ class GZip : public ReadBase {
 };
 #endif // HAVE_ZLIB
 
+const uint8_t kBZMagic[3] = {'B', 'Z', 'h'};
+
 #ifdef HAVE_BZLIB
 class BZip : public ReadBase {
   public:
-    explicit BZip(int fd, void *already_data, std::size_t already_size) {
+    BZip(int fd, void *already_data, std::size_t already_size) {
       scoped_fd hold(fd);
       closer_.reset(FDOpenReadOrThrow(hold));
+      file_ = NULL;
+      Open(already_data, already_size);
+    }
+
+    BZip(FILE *file, void *already_data, std::size_t already_size) {
+      closer_.reset(file);
+      file_ = NULL;
+      Open(already_data, already_size);
+    }
+
+    ~BZip() {
+      Close(file_);
+    }
+
+    std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
+      assert(file_);
+      int bzerror = BZ_OK;
+      int ret = BZ2_bzRead(&bzerror, file_, to, std::min<std::size_t>(static_cast<std::size_t>(INT_MAX), amount));
+      long pos = ftell(closer_.get());
+      if (pos != -1) ReadCount(thunk) = pos;
+      switch (bzerror) {
+        case BZ_STREAM_END:
+          /* bzip2 files can be concatenated by e.g. pbzip2.  Annoyingly, the
+           * library doesn't handle this internally.  This gets the trailing
+           * data, grows it up to magic as needed, validates the magic, and
+           * reopens.
+           */
+          {
+            bzerror = BZ_OK;
+            void *trailing_data;
+            int trailing_size;
+            BZ2_bzReadGetUnused(&bzerror, file_, &trailing_data, &trailing_size);
+            UTIL_THROW_IF(bzerror != BZ_OK, BZException, "bzip2 error in BZ2_bzReadGetUnused " << BZ2_bzerror(file_, &bzerror) << " code " << bzerror);
+            std::string trailing(static_cast<const char*>(trailing_data), trailing_size);
+            Close(file_);
+
+            if (trailing_size < (int)sizeof(kBZMagic)) {
+              trailing.resize(sizeof(kBZMagic));
+              if (1 != fread(&trailing[trailing_size], sizeof(kBZMagic) - trailing_size, 1, closer_.get())) {
+                UTIL_THROW_IF(trailing_size, BZException, "File has trailing cruft");
+                // Legitimate end of file.
+                ReplaceThis(new Complete(), thunk);
+                return ret;
+              }
+            }
+            UTIL_THROW_IF(memcmp(trailing.data(), kBZMagic, sizeof(kBZMagic)), BZException, "Trailing cruft is not another bzip2 stream");
+            Open(&trailing[0], trailing.size());
+          }
+          return ret;
+        case BZ_OK:
+          return ret;
+        default:
+          UTIL_THROW(BZException, "bzip2 error " << BZ2_bzerror(file_, &bzerror) << " code " << bzerror);
+      }
+    }
+
+  private:
+    void Open(void *already_data, std::size_t already_size) {
+      assert(!file_);
       int bzerror = BZ_OK;
       file_ = BZ2_bzReadOpen(&bzerror, closer_.get(), 0, 0, already_data, already_size);
       switch (bzerror) {
@@ -199,38 +260,23 @@ class BZip : public ReadBase {
           UTIL_THROW(BZException, "IO error reading file");
         case BZ_MEM_ERROR:
           throw std::bad_alloc();
+        default:
+          UTIL_THROW(BZException, "Unknown bzip2 error code " << bzerror);
       }
+      assert(file_);
     }
 
-    ~BZip() {
+    static void Close(BZFILE *&file) {
+      if (file == NULL) return;
       int bzerror = BZ_OK;
-      BZ2_bzReadClose(&bzerror, file_);
+      BZ2_bzReadClose(&bzerror, file);
       if (bzerror != BZ_OK) {
-        std::cerr << "bz2 readclose error" << std::endl;
+        std::cerr << "bz2 readclose error number " << bzerror << std::endl;
         abort();
       }
+      file = NULL;
     }
 
-    std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
-      int bzerror = BZ_OK;
-      int ret = BZ2_bzRead(&bzerror, file_, to, std::min<std::size_t>(static_cast<std::size_t>(INT_MAX), amount));
-      long pos;
-      switch (bzerror) {
-        case BZ_STREAM_END:
-          pos = ftell(closer_.get());
-          if (pos != -1) ReadCount(thunk) = pos;
-          ReplaceThis(new Complete(), thunk);
-          return ret;
-        case BZ_OK:
-          pos = ftell(closer_.get());
-          if (pos != -1) ReadCount(thunk) = pos;
-          return ret;
-        default:
-          UTIL_THROW(BZException, "bzip2 error " << BZ2_bzerror(file_, &bzerror) << " code " << bzerror);
-      }
-    }
-
-  private:
     scoped_FILE closer_;
     BZFILE *file_;
 };
@@ -346,11 +392,11 @@ MagicResult DetectMagic(const void *from_void) {
   if (header[0] == 0x1f && header[1] == 0x8b) {
     return GZIP;
   }
-  if (header[0] == 'B' && header[1] == 'Z' && header[2] == 'h') {
+  if (!memcmp(header, kBZMagic, sizeof(kBZMagic))) {
     return BZIP;
   }
-  const uint8_t xzmagic[6] = { 0xFD, '7', 'z', 'X', 'Z', 0x00 };
-  if (!memcmp(header, xzmagic, 6)) {
+  const uint8_t kXZMagic[6] = { 0xFD, '7', 'z', 'X', 'Z', 0x00 };
+  if (!memcmp(header, kXZMagic, sizeof(kXZMagic))) {
     return XZIP;
   }
   return UNKNOWN;
diff --git a/klm/util/scoped.cc b/klm/util/scoped.cc
index e7066ee4..6c5b0c2d 100644
--- a/klm/util/scoped.cc
+++ b/klm/util/scoped.cc
@@ -1,6 +1,9 @@
 #include "util/scoped.hh"
 
 #include <cstdlib>
+#if !defined(_WIN32) && !defined(_WIN64)
+#include <sys/mman.h>
+#endif
 
 namespace util {
 
@@ -10,20 +13,31 @@ MallocException::MallocException(std::size_t requested) throw() {
 
 MallocException::~MallocException() throw() {}
 
+namespace {
+void *InspectAddr(void *addr, std::size_t requested, const char *func_name) {
+  UTIL_THROW_IF_ARG(!addr && requested, MallocException, (requested), "in " << func_name);
+  // These routines are often used for large chunks of memory where huge pages help.
+#if MADV_HUGEPAGE
+  madvise(addr, requested, MADV_HUGEPAGE);
+#endif
+  return addr;
+}
+} // namespace
+
 void *MallocOrThrow(std::size_t requested) {
-  void *ret;
-  UTIL_THROW_IF_ARG(!(ret = std::malloc(requested)), MallocException, (requested), "in malloc");
-  return ret;
+  return InspectAddr(std::malloc(requested), requested, "malloc");
+}
+
+void *CallocOrThrow(std::size_t requested) {
+  return InspectAddr(std::calloc(1, requested), requested, "calloc");
 }
 
 scoped_malloc::~scoped_malloc() {
   std::free(p_);
 }
 
-void scoped_malloc::call_realloc(std::size_t to) {
-  void *ret;
-  UTIL_THROW_IF_ARG(!(ret = std::realloc(p_, to)) && to, MallocException, (to), "in realloc");
-  p_ = ret;
+void scoped_malloc::call_realloc(std::size_t requested) {
+  p_ = InspectAddr(std::realloc(p_, requested), requested, "realloc");
 }
 
 } // namespace util
diff --git a/klm/util/scoped.hh b/klm/util/scoped.hh
index d0a5aabd..b642d064 100644
--- a/klm/util/scoped.hh
+++ b/klm/util/scoped.hh
@@ -14,6 +14,7 @@ class MallocException : public ErrnoException {
 };
 
 void *MallocOrThrow(std::size_t requested);
+void *CallocOrThrow(std::size_t requested);
 
 class scoped_malloc {
   public:
diff --git a/klm/util/sized_iterator.hh b/klm/util/sized_iterator.hh
index aabcc531..cf998953 100644
--- a/klm/util/sized_iterator.hh
+++ b/klm/util/sized_iterator.hh
@@ -3,6 +3,7 @@
 
 #include "util/proxy_iterator.hh"
 
+#include <algorithm>
 #include <functional>
 #include <string>
 
@@ -63,6 +64,13 @@ class SizedProxy {
     const void *Data() const { return inner_.Data(); }
     void *Data() { return inner_.Data(); }
 
+    friend void swap(SizedProxy &first, SizedProxy &second) {
+      std::swap_ranges(
+          static_cast<char*>(first.inner_.Data()), 
+          static_cast<char*>(first.inner_.Data()) + first.inner_.EntrySize(),
+          static_cast<char*>(second.inner_.Data()));
+    }
+
   private:
     friend class util::ProxyIterator<SizedProxy>;
 
diff --git a/klm/util/usage.cc b/klm/util/usage.cc
index b8e125d0..ad4dc7b4 100644
--- a/klm/util/usage.cc
+++ b/klm/util/usage.cc
@@ -22,6 +22,11 @@ float FloatSec(const struct timeval &tv) {
   return static_cast<float>(tv.tv_sec) + (static_cast<float>(tv.tv_usec) / 1000000.0);
 }
 #endif
+
+const char *SkipSpaces(const char *at) {
+  for (; *at == ' '; ++at) {}
+  return at;
+}
 } // namespace
 
 void PrintUsage(std::ostream &out) {
@@ -32,18 +37,19 @@ void PrintUsage(std::ostream &out) {
     return;
   }
   out << "user\t" << FloatSec(usage.ru_utime) << "\nsys\t" << FloatSec(usage.ru_stime) << '\n';
-
+  out << "CPU\t" << (FloatSec(usage.ru_utime) + FloatSec(usage.ru_stime)) << '\n';
   // Linux doesn't set memory usage :-(.  
   std::ifstream status("/proc/self/status", std::ios::in);
   std::string line;
   while (getline(status, line)) {
     if (!strncmp(line.c_str(), "VmRSS:\t", 7)) {
-      out << "VmRSS:  " << (line.c_str() + 7) << '\n';
+      out << "RSSCur\t" << SkipSpaces(line.c_str() + 7) << '\n';
       break;
     } else if (!strncmp(line.c_str(), "VmPeak:\t", 8)) {
-      out << "VmPeak: " << (line.c_str() + 8) << '\n';
+      out << "VmPeak\t" << SkipSpaces(line.c_str() + 8) << '\n';
     }
   }
+  out << "RSSMax\t" << usage.ru_maxrss << " kB" << '\n';
 #endif
 }
 
-- 
cgit v1.2.3