From 925087356b853e2099c1b60d8b757d7aa02121a9 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cab.ark.cs.cmu.edu>
Date: Tue, 2 Oct 2012 00:19:43 -0400
Subject: cdec cleanup, remove bayesian stuff, parsing stuff

---
 utils/Jamfile              |  32 ----
 utils/Makefile.am          |   7 +-
 utils/ccrp.h               | 270 ---------------------------------
 utils/ccrp_nt.h            | 164 --------------------
 utils/ccrp_onetable.h      | 253 -------------------------------
 utils/crp_table_manager.h  | 114 --------------
 utils/crp_test.cc          |  91 -----------
 utils/fast_sparse_vector.h |   2 +-
 utils/gamma_poisson.h      |  33 ----
 utils/mfcr.h               | 370 ---------------------------------------------
 utils/mfcr_test.cc         |  72 ---------
 utils/sampler.h            |   2 +-
 utils/slice_sampler.h      | 191 -----------------------
 utils/small_vector.h       |   2 +-
 utils/stringlib.h          |   2 +-
 utils/unigram_pyp_lm.cc    | 214 --------------------------
 16 files changed, 6 insertions(+), 1813 deletions(-)
 delete mode 100644 utils/Jamfile
 delete mode 100644 utils/ccrp.h
 delete mode 100644 utils/ccrp_nt.h
 delete mode 100644 utils/ccrp_onetable.h
 delete mode 100644 utils/crp_table_manager.h
 delete mode 100644 utils/crp_test.cc
 delete mode 100644 utils/gamma_poisson.h
 delete mode 100644 utils/mfcr.h
 delete mode 100644 utils/mfcr_test.cc
 delete mode 100644 utils/slice_sampler.h
 delete mode 100644 utils/unigram_pyp_lm.cc

(limited to 'utils')
diff --git a/utils/Jamfile b/utils/Jamfile
deleted file mode 100644
index 4444b25f..00000000
--- a/utils/Jamfile
+++ /dev/null
@@ -1,32 +0,0 @@
-import testing ;
-import option ;
-
-additional = ;
-if [ option.get with-cmph ] {
-  additional += perfect_hash.cc ;
-}
-
-lib utils :
-  alignment_io.cc 
-  b64tools.cc 
-  corpus_tools.cc 
-  dict.cc 
-  tdict.cc 
-  fdict.cc 
-  gzstream.cc 
-  filelib.cc 
-  stringlib.cc 
-  sparse_vector.cc 
-  timing_stats.cc 
-  verbose.cc 
-  weights.cc
-  $(additional)
-  ..//z
-  : <include>.. <include>. : : <include>.. <include>. ;
-
-exe atools : atools.cc utils ..//boost_program_options ;
-exe reconstruct_weights : reconstruct_weights.cc utils ..//boost_program_options ;
-
-alias programs : reconstruct_weights atools ;
-
-all_tests [ glob *_test.cc phmt.cc ts.cc ] : utils : <testing.arg>$(TOP)/utils/test_data ;
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 799ec879..55d97354 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -3,16 +3,13 @@ bin_PROGRAMS = reconstruct_weights atools
 noinst_PROGRAMS = \
   ts \
   phmt \
-  mfcr_test \
-  crp_test \
   dict_test \
   m_test \
   weights_test \
   logval_test \
-  small_vector_test \
-  unigram_pyp_lm
+  small_vector_test
 
-TESTS = ts mfcr_test crp_test small_vector_test logval_test weights_test dict_test m_test
+TESTS = ts small_vector_test logval_test weights_test dict_test m_test
 
 noinst_LIBRARIES = libutils.a
 
diff --git a/utils/ccrp.h b/utils/ccrp.h
deleted file mode 100644
index f5d3fc78..00000000
--- a/utils/ccrp.h
+++ /dev/null
@@ -1,270 +0,0 @@
-#ifndef _CCRP_H_
-#define _CCRP_H_
-
-#include <numeric>
-#include <cassert>
-#include <cmath>
-#include <list>
-#include <iostream>
-#include <vector>
-#include <tr1/unordered_map>
-#include <boost/functional/hash.hpp>
-#include "sampler.h"
-#include "slice_sampler.h"
-#include "crp_table_manager.h"
-#include "m.h"
-
-// Chinese restaurant process (Pitman-Yor parameters) with table tracking.
-
-template <typename Dish, typename DishHash = boost::hash<Dish> >
-class CCRP {
- public:
-  CCRP(double disc, double strength) :
-      num_tables_(),
-      num_customers_(),
-      discount_(disc),
-      strength_(strength),
-      discount_prior_strength_(std::numeric_limits<double>::quiet_NaN()),
-      discount_prior_beta_(std::numeric_limits<double>::quiet_NaN()),
-      strength_prior_shape_(std::numeric_limits<double>::quiet_NaN()),
-      strength_prior_rate_(std::numeric_limits<double>::quiet_NaN()) {
-    check_hyperparameters();
-  }
-
-  CCRP(double d_strength, double d_beta, double c_shape, double c_rate, double d = 0.9, double c = 1.0) :
-      num_tables_(),
-      num_customers_(),
-      discount_(d),
-      strength_(c),
-      discount_prior_strength_(d_strength),
-      discount_prior_beta_(d_beta),
-      strength_prior_shape_(c_shape),
-      strength_prior_rate_(c_rate) {
-    check_hyperparameters();
-  }
-
-  void check_hyperparameters() {
-    if (discount_ < 0.0 || discount_ >= 1.0) {
-      std::cerr << "Bad discount: " << discount_ << std::endl;
-      abort();
-    }
-    if (strength_ <= -discount_) {
-      std::cerr << "Bad strength: " << strength_ << " (discount=" << discount_ << ")" << std::endl;
-      abort();
-    }
-  }
-
-  double discount() const { return discount_; }
-  double strength() const { return strength_; }
-  void set_hyperparameters(double d, double s) {
-    discount_ = d; strength_ = s;
-    check_hyperparameters();
-  }
-  void set_discount(double d) { discount_ = d; check_hyperparameters(); }
-  void set_strength(double a) { strength_ = a; check_hyperparameters(); }
-
-  bool has_discount_prior() const {
-    return !std::isnan(discount_prior_strength_);
-  }
-
-  bool has_strength_prior() const {
-    return !std::isnan(strength_prior_shape_);
-  }
-
-  void clear() {
-    num_tables_ = 0;
-    num_customers_ = 0;
-    dish_locs_.clear();
-  }
-
-  unsigned num_tables() const {
-    return num_tables_;
-  }
-
-  unsigned num_tables(const Dish& dish) const {
-    const typename std::tr1::unordered_map<Dish, CRPTableManager, DishHash>::const_iterator it = dish_locs_.find(dish);
-    if (it == dish_locs_.end()) return 0;
-    return it->second.num_tables();
-  }
-
-  unsigned num_customers() const {
-    return num_customers_;
-  }
-
-  unsigned num_customers(const Dish& dish) const {
-    const typename std::tr1::unordered_map<Dish, CRPTableManager, DishHash>::const_iterator it = dish_locs_.find(dish);
-    if (it == dish_locs_.end()) return 0;
-    return it->num_customers();
-  }
-
-  // returns +1 or 0 indicating whether a new table was opened
-  //   p = probability with which the particular table was selected
-  //       excluding p0
-  template <typename T>
-  int increment(const Dish& dish, const T& p0, MT19937* rng, T* p = NULL) {
-    CRPTableManager& loc = dish_locs_[dish];
-    bool share_table = false;
-    if (loc.num_customers()) {
-      const T p_empty = T(strength_ + num_tables_ * discount_) * p0;
-      const T p_share = T(loc.num_customers() - loc.num_tables() * discount_);
-      share_table = rng->SelectSample(p_empty, p_share);
-    }
-    if (share_table) {
-      loc.share_table(discount_, rng);
-    } else {
-      loc.create_table();
-      ++num_tables_;
-    }
-    ++num_customers_;
-    return (share_table ? 0 : 1);
-  }
-
-  // returns -1 or 0, indicating whether a table was closed
-  int decrement(const Dish& dish, MT19937* rng) {
-    CRPTableManager& loc = dish_locs_[dish];
-    assert(loc.num_customers());
-    if (loc.num_customers() == 1) {
-      dish_locs_.erase(dish);
-      --num_tables_;
-      --num_customers_;
-      return -1;
-    } else {
-      int delta = loc.remove_customer(rng);
-      --num_customers_;
-      if (delta) --num_tables_;
-      return delta;
-    }
-  }
-
-  template <typename T>
-  T prob(const Dish& dish, const T& p0) const {
-    const typename std::tr1::unordered_map<Dish, CRPTableManager, DishHash>::const_iterator it = dish_locs_.find(dish);
-    const T r = T(num_tables_ * discount_ + strength_);
-    if (it == dish_locs_.end()) {
-      return r * p0 / T(num_customers_ + strength_);
-    } else {
-      return (T(it->second.num_customers() - discount_ * it->second.num_tables()) + r * p0) /
-               T(num_customers_ + strength_);
-    }
-  }
-
-  double log_crp_prob() const {
-    return log_crp_prob(discount_, strength_);
-  }
-
-  // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process
-  // does not include P_0's
-  double log_crp_prob(const double& discount, const double& strength) const {
-    double lp = 0.0;
-    if (has_discount_prior())
-      lp = Md::log_beta_density(discount, discount_prior_strength_, discount_prior_beta_);
-    if (has_strength_prior())
-      lp += Md::log_gamma_density(strength + discount, strength_prior_shape_, strength_prior_rate_);
-    assert(lp <= 0.0);
-    if (num_customers_) {
-      if (discount > 0.0) {
-        const double r = lgamma(1.0 - discount);
-        if (strength)
-          lp += lgamma(strength) - lgamma(strength / discount);
-        lp += - lgamma(strength + num_customers_)
-             + num_tables_ * log(discount) + lgamma(strength / discount + num_tables_);
-        assert(std::isfinite(lp));
-        for (typename std::tr1::unordered_map<Dish, CRPTableManager, DishHash>::const_iterator it = dish_locs_.begin();
-             it != dish_locs_.end(); ++it) {
-          const CRPTableManager& cur = it->second;  // TODO check
-          for (CRPTableManager::const_iterator ti = cur.begin(); ti != cur.end(); ++ti) {
-            lp += (lgamma(ti->first - discount) - r) * ti->second;
-          }
-        }
-      } else if (!discount) { // discount == 0.0
-        lp += lgamma(strength) + num_tables_ * log(strength) - lgamma(strength + num_tables_);
-        assert(std::isfinite(lp));
-        for (typename std::tr1::unordered_map<Dish, CRPTableManager, DishHash>::const_iterator it = dish_locs_.begin();
-             it != dish_locs_.end(); ++it) {
-          const CRPTableManager& cur = it->second;
-          lp += lgamma(cur.num_tables());
-        }
-      } else {
-        assert(!"discount less than 0 detected!");
-      }
-    }
-    assert(std::isfinite(lp));
-    return lp;
-  }
-
-  void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) {
-    assert(has_discount_prior() || has_strength_prior());
-    if (num_customers() == 0) return;
-    DiscountResampler dr(*this);
-    StrengthResampler sr(*this);
-    for (unsigned iter = 0; iter < nloop; ++iter) {
-      if (has_strength_prior()) {
-        strength_ = slice_sampler1d(sr, strength_, *rng, -discount_ + std::numeric_limits<double>::min(),
-                               std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
-      }
-      if (has_discount_prior()) {
-        double min_discount = std::numeric_limits<double>::min();
-        if (strength_ < 0.0) min_discount -= strength_;
-        discount_ = slice_sampler1d(dr, discount_, *rng, min_discount,
-                               1.0, 0.0, niterations, 100*niterations);
-      }
-    }
-    strength_ = slice_sampler1d(sr, strength_, *rng, -discount_,
-                             std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
-  }
-
-  struct DiscountResampler {
-    DiscountResampler(const CCRP& crp) : crp_(crp) {}
-    const CCRP& crp_;
-    double operator()(const double& proposed_discount) const {
-      return crp_.log_crp_prob(proposed_discount, crp_.strength_);
-    }
-  };
-
-  struct StrengthResampler {
-    StrengthResampler(const CCRP& crp) : crp_(crp) {}
-    const CCRP& crp_;
-    double operator()(const double& proposed_strength) const {
-      return crp_.log_crp_prob(crp_.discount_, proposed_strength);
-    }
-  };
-
-  void Print(std::ostream* out) const {
-    std::cerr << "PYP(d=" << discount_ << ",c=" << strength_ << ") customers=" << num_customers_ << std::endl;
-    for (typename std::tr1::unordered_map<Dish, CRPTableManager, DishHash>::const_iterator it = dish_locs_.begin();
-         it != dish_locs_.end(); ++it) {
-      (*out) << it->first << " : " << it->second << std::endl;
-    }
-  }
-
-  typedef typename std::tr1::unordered_map<Dish, CRPTableManager, DishHash>::const_iterator const_iterator;
-  const_iterator begin() const {
-    return dish_locs_.begin();
-  }
-  const_iterator end() const {
-    return dish_locs_.end();
-  }
-
-  unsigned num_tables_;
-  unsigned num_customers_;
-  std::tr1::unordered_map<Dish, CRPTableManager, DishHash> dish_locs_;
-
-  double discount_;
-  double strength_;
-
-  // optional beta prior on discount_ (NaN if no prior)
-  double discount_prior_strength_;
-  double discount_prior_beta_;
-
-  // optional gamma prior on strength_ (NaN if no prior)
-  double strength_prior_shape_;
-  double strength_prior_rate_;
-};
-
-template <typename T,typename H>
-std::ostream& operator<<(std::ostream& o, const CCRP<T,H>& c) {
-  c.Print(&o);
-  return o;
-}
-
-#endif
diff --git a/utils/ccrp_nt.h b/utils/ccrp_nt.h
deleted file mode 100644
index 724b11bd..00000000
--- a/utils/ccrp_nt.h
+++ /dev/null
@@ -1,164 +0,0 @@
-#ifndef _CCRP_NT_H_
-#define _CCRP_NT_H_
-
-#include <numeric>
-#include <cassert>
-#include <cmath>
-#include <list>
-#include <iostream>
-#include <vector>
-#include <tr1/unordered_map>
-#include <boost/functional/hash.hpp>
-#include "sampler.h"
-#include "slice_sampler.h"
-#include "m.h"
-
-// Chinese restaurant process (1 parameter)
-template <typename Dish, typename DishHash = boost::hash<Dish> >
-class CCRP_NoTable {
- public:
-  explicit CCRP_NoTable(double conc) :
-    num_customers_(),
-    alpha_(conc),
-    alpha_prior_shape_(std::numeric_limits<double>::quiet_NaN()),
-    alpha_prior_rate_(std::numeric_limits<double>::quiet_NaN()) {}
-
-  CCRP_NoTable(double c_shape, double c_rate, double c = 10.0) :
-    num_customers_(),
-    alpha_(c),
-    alpha_prior_shape_(c_shape),
-    alpha_prior_rate_(c_rate) {}
-
-  double alpha() const { return alpha_; }
-  void set_alpha(const double& alpha) { alpha_ = alpha; assert(alpha_ > 0.0); }
-
-  bool has_alpha_prior() const {
-    return !std::isnan(alpha_prior_shape_);
-  }
-
-  void clear() {
-    num_customers_ = 0;
-    custs_.clear();
-  }
-
-  unsigned num_customers() const {
-    return num_customers_;
-  }
-
-  unsigned num_customers(const Dish& dish) const {
-    const typename std::tr1::unordered_map<Dish, unsigned, DishHash>::const_iterator it = custs_.find(dish);
-    if (it == custs_.end()) return 0;
-    return it->second;
-  }
-
-  int increment(const Dish& dish) {
-    int table_diff = 0;
-    if (++custs_[dish] == 1)
-      table_diff = 1;
-    ++num_customers_;
-    return table_diff;
-  }
-
-  int decrement(const Dish& dish) {
-    int table_diff = 0;
-    int nc = --custs_[dish];
-    if (nc == 0) {
-      custs_.erase(dish);
-      table_diff = -1;
-    } else if (nc < 0) {
-      std::cerr << "Dish counts dropped below zero for: " << dish << std::endl;
-      abort();
-    }
-    --num_customers_;
-    return table_diff;
-  }
-
-  template <typename F>
-  F prob(const Dish& dish, const F& p0) const {
-    const unsigned at_table = num_customers(dish);
-    return (F(at_table) + p0 * F(alpha_)) / F(num_customers_ + alpha_);
-  }
-
-  double logprob(const Dish& dish, const double& logp0) const {
-    const unsigned at_table = num_customers(dish);
-    return log(at_table + exp(logp0 + log(alpha_))) - log(num_customers_ + alpha_);
-  }
-
-  double log_crp_prob() const {
-    return log_crp_prob(alpha_);
-  }
-
-  // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process
-  // does not include P_0's
-  double log_crp_prob(const double& alpha) const {
-    double lp = 0.0;
-    if (has_alpha_prior())
-      lp += Md::log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_);
-    assert(lp <= 0.0);
-    if (num_customers_) {
-      lp += lgamma(alpha) - lgamma(alpha + num_customers_) +
-        custs_.size() * log(alpha);
-      assert(std::isfinite(lp));
-      for (typename std::tr1::unordered_map<Dish, unsigned, DishHash>::const_iterator it = custs_.begin();
-             it != custs_.end(); ++it) {
-          lp += lgamma(it->second);
-      }
-    }
-    assert(std::isfinite(lp));
-    return lp;
-  }
-
-  void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) {
-    assert(has_alpha_prior());
-    ConcentrationResampler cr(*this);
-    for (unsigned iter = 0; iter < nloop; ++iter) {
-        alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0,
-                               std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
-    }
-  }
-
-  struct ConcentrationResampler {
-    ConcentrationResampler(const CCRP_NoTable& crp) : crp_(crp) {}
-    const CCRP_NoTable& crp_;
-    double operator()(const double& proposed_alpha) const {
-      return crp_.log_crp_prob(proposed_alpha);
-    }
-  };
-
-  void Print(std::ostream* out) const {
-    (*out) << "DP(alpha=" << alpha_ << ") customers=" << num_customers_ << std::endl;
-    int cc = 0;
-    for (typename std::tr1::unordered_map<Dish, unsigned, DishHash>::const_iterator it = custs_.begin();
-         it != custs_.end(); ++it) {
-      (*out) << " " << it->first << "(" << it->second << " eating)";
-      ++cc;
-      if (cc > 10) { (*out) << " ..."; break; }
-    }
-    (*out) << std::endl;
-  }
-
-  unsigned num_customers_;
-  std::tr1::unordered_map<Dish, unsigned, DishHash> custs_;
-
-  typedef typename std::tr1::unordered_map<Dish, unsigned, DishHash>::const_iterator const_iterator;
-  const_iterator begin() const {
-    return custs_.begin();
-  }
-  const_iterator end() const {
-    return custs_.end();
-  }
-
-  double alpha_;
-
-  // optional gamma prior on alpha_ (NaN if no prior)
-  double alpha_prior_shape_;
-  double alpha_prior_rate_;
-};
-
-template <typename T,typename H>
-std::ostream& operator<<(std::ostream& o, const CCRP_NoTable<T,H>& c) {
-  c.Print(&o);
-  return o;
-}
-
-#endif
diff --git a/utils/ccrp_onetable.h b/utils/ccrp_onetable.h
deleted file mode 100644
index abe399ea..00000000
--- a/utils/ccrp_onetable.h
+++ /dev/null
@@ -1,253 +0,0 @@
-#ifndef _CCRP_ONETABLE_H_
-#define _CCRP_ONETABLE_H_
-
-#include <numeric>
-#include <cassert>
-#include <cmath>
-#include <list>
-#include <iostream>
-#include <tr1/unordered_map>
-#include <boost/functional/hash.hpp>
-#include "sampler.h"
-#include "slice_sampler.h"
-
-// Chinese restaurant process (Pitman-Yor parameters) with one table approximation
-
-template <typename Dish, typename DishHash = boost::hash<Dish> >
-class CCRP_OneTable {
-  typedef std::tr1::unordered_map<Dish, unsigned, DishHash> DishMapType;
- public:
-  CCRP_OneTable(double disc, double conc) :
-    num_tables_(),
-    num_customers_(),
-    discount_(disc),
-    alpha_(conc),
-    discount_prior_alpha_(std::numeric_limits<double>::quiet_NaN()),
-    discount_prior_beta_(std::numeric_limits<double>::quiet_NaN()),
-    alpha_prior_shape_(std::numeric_limits<double>::quiet_NaN()),
-    alpha_prior_rate_(std::numeric_limits<double>::quiet_NaN()) {}
-
-  CCRP_OneTable(double d_alpha, double d_beta, double c_shape, double c_rate, double d = 0.9, double c = 1.0) :
-    num_tables_(),
-    num_customers_(),
-    discount_(d),
-    alpha_(c),
-    discount_prior_alpha_(d_alpha),
-    discount_prior_beta_(d_beta),
-    alpha_prior_shape_(c_shape),
-    alpha_prior_rate_(c_rate) {}
-
-  double discount() const { return discount_; }
-  double alpha() const { return alpha_; }
-  void set_alpha(double c) { alpha_ = c; }
-  void set_discount(double d) { discount_ = d; }
-
-  bool has_discount_prior() const {
-    return !std::isnan(discount_prior_alpha_);
-  }
-
-  bool has_alpha_prior() const {
-    return !std::isnan(alpha_prior_shape_);
-  }
-
-  void clear() {
-    num_tables_ = 0;
-    num_customers_ = 0;
-    dish_counts_.clear();
-  }
-
-  unsigned num_tables() const {
-    return num_tables_;
-  }
-
-  unsigned num_tables(const Dish& dish) const {
-    const typename DishMapType::const_iterator it = dish_counts_.find(dish);
-    if (it == dish_counts_.end()) return 0;
-    return 1;
-  }
-
-  unsigned num_customers() const {
-    return num_customers_;
-  }
-
-  unsigned num_customers(const Dish& dish) const {
-    const typename DishMapType::const_iterator it = dish_counts_.find(dish);
-    if (it == dish_counts_.end()) return 0;
-    return it->second;
-  }
-
-  // returns +1 or 0 indicating whether a new table was opened
-  int increment(const Dish& dish) {
-    unsigned& dc = dish_counts_[dish];
-    ++dc;
-    ++num_customers_;
-    if (dc == 1) {
-      ++num_tables_;
-      return 1;
-    } else {
-      return 0;
-    }
-  }
-
-  // returns -1 or 0, indicating whether a table was closed
-  int decrement(const Dish& dish) {
-    unsigned& dc = dish_counts_[dish];
-    assert(dc > 0);
-    if (dc == 1) {
-      dish_counts_.erase(dish);
-      --num_tables_;
-      --num_customers_;
-      return -1;
-    } else {
-      assert(dc > 1);
-      --dc;
-      --num_customers_;
-      return 0;
-    }
-  }
-
-  double prob(const Dish& dish, const double& p0) const {
-    const typename DishMapType::const_iterator it = dish_counts_.find(dish);
-    const double r = num_tables_ * discount_ + alpha_;
-    if (it == dish_counts_.end()) {
-      return r * p0 / (num_customers_ + alpha_);
-    } else {
-      return (it->second - discount_ + r * p0) /
-               (num_customers_ + alpha_);
-    }
-  }
-
-  template <typename T>
-  T probT(const Dish& dish, const T& p0) const {
-    const typename DishMapType::const_iterator it = dish_counts_.find(dish);
-    const T r(num_tables_ * discount_ + alpha_);
-    if (it == dish_counts_.end()) {
-      return r * p0 / T(num_customers_ + alpha_);
-    } else {
-      return (T(it->second - discount_) + r * p0) /
-               T(num_customers_ + alpha_);
-    }
-  }
-
-  double log_crp_prob() const {
-    return log_crp_prob(discount_, alpha_);
-  }
-
-  static double log_beta_density(const double& x, const double& alpha, const double& beta) {
-    assert(x > 0.0);
-    assert(x < 1.0);
-    assert(alpha > 0.0);
-    assert(beta > 0.0);
-    const double lp = (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta);
-    return lp;
-  }
-
-  static double log_gamma_density(const double& x, const double& shape, const double& rate) {
-    assert(x >= 0.0);
-    assert(shape > 0.0);
-    assert(rate > 0.0);
-    const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape);
-    return lp;
-  }
-
-  // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process
-  // does not include P_0's
-  double log_crp_prob(const double& discount, const double& alpha) const {
-    double lp = 0.0;
-    if (has_discount_prior())
-      lp = log_beta_density(discount, discount_prior_alpha_, discount_prior_beta_);
-    if (has_alpha_prior())
-      lp += log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_);
-    assert(lp <= 0.0);
-    if (num_customers_) {
-      if (discount > 0.0) {
-        const double r = lgamma(1.0 - discount);
-        lp += lgamma(alpha) - lgamma(alpha + num_customers_)
-             + num_tables_ * log(discount) + lgamma(alpha / discount + num_tables_)
-             - lgamma(alpha / discount);
-        assert(std::isfinite(lp));
-        for (typename DishMapType::const_iterator it = dish_counts_.begin();
-             it != dish_counts_.end(); ++it) {
-          const unsigned& cur = it->second;
-          lp += lgamma(cur - discount) - r;
-        }
-      } else {
-        assert(!"not implemented yet");
-      }
-    }
-    assert(std::isfinite(lp));
-    return lp;
-  }
-
-  void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) {
-    assert(has_discount_prior() || has_alpha_prior());
-    DiscountResampler dr(*this);
-    ConcentrationResampler cr(*this);
-    for (unsigned iter = 0; iter < nloop; ++iter) {
-      if (has_alpha_prior()) {
-        alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0,
-                               std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
-      }
-      if (has_discount_prior()) {
-        discount_ = slice_sampler1d(dr, discount_, *rng, std::numeric_limits<double>::min(),
-                               1.0, 0.0, niterations, 100*niterations);
-      }
-    }
-    alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0,
-                             std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
-  }
-
-  struct DiscountResampler {
-    DiscountResampler(const CCRP_OneTable& crp) : crp_(crp) {}
-    const CCRP_OneTable& crp_;
-    double operator()(const double& proposed_discount) const {
-      return crp_.log_crp_prob(proposed_discount, crp_.alpha_);
-    }
-  };
-
-  struct ConcentrationResampler {
-    ConcentrationResampler(const CCRP_OneTable& crp) : crp_(crp) {}
-    const CCRP_OneTable& crp_;
-    double operator()(const double& proposed_alpha) const {
-      return crp_.log_crp_prob(crp_.discount_, proposed_alpha);
-    }
-  };
-
-  void Print(std::ostream* out) const {
-    (*out) << "PYP(d=" << discount_ << ",c=" << alpha_ << ") customers=" << num_customers_ << std::endl;
-    for (typename DishMapType::const_iterator it = dish_counts_.begin(); it != dish_counts_.end(); ++it) {
-      (*out) << "  " << it->first << " = " << it->second << std::endl;
-    }
-  }
-
-  typedef typename DishMapType::const_iterator const_iterator;
-  const_iterator begin() const {
-    return dish_counts_.begin();
-  }
-  const_iterator end() const {
-    return dish_counts_.end();
-  }
-
-  unsigned num_tables_;
-  unsigned num_customers_;
-  DishMapType dish_counts_;
-
-  double discount_;
-  double alpha_;
-
-  // optional beta prior on discount_ (NaN if no prior)
-  double discount_prior_alpha_;
-  double discount_prior_beta_;
-
-  // optional gamma prior on alpha_ (NaN if no prior)
-  double alpha_prior_shape_;
-  double alpha_prior_rate_;
-};
-
-template <typename T,typename H>
-std::ostream& operator<<(std::ostream& o, const CCRP_OneTable<T,H>& c) {
-  c.Print(&o);
-  return o;
-}
-
-#endif
diff --git a/utils/crp_table_manager.h b/utils/crp_table_manager.h
deleted file mode 100644
index 753e721f..00000000
--- a/utils/crp_table_manager.h
+++ /dev/null
@@ -1,114 +0,0 @@
-#ifndef _CRP_TABLE_MANAGER_H_
-#define _CRP_TABLE_MANAGER_H_
-
-#include <iostream>
-#include "sparse_vector.h"
-#include "sampler.h"
-
-// these are helper classes for implementing token-based CRP samplers
-// basically the data structures recommended by Blunsom et al. in the Note.
-
-struct CRPHistogram {
-  //typedef std::map<unsigned, unsigned> MAPTYPE;
-  typedef SparseVector<unsigned> MAPTYPE;
-  typedef MAPTYPE::const_iterator const_iterator;
-
-  inline void increment(unsigned bin, unsigned delta = 1u) {
-    data[bin] += delta;
-  }
-  inline void decrement(unsigned bin, unsigned delta = 1u) {
-    unsigned r = data[bin] -= delta;
-    if (!r) data.erase(bin);
-  }
-  inline void move(unsigned from_bin, unsigned to_bin, unsigned delta = 1u) {
-    decrement(from_bin, delta);
-    increment(to_bin, delta);
-  }
-  inline const_iterator begin() const { return data.begin(); }
-  inline const_iterator end() const { return data.end(); }
-
- private:
-  MAPTYPE data;
-};
-
-// A CRPTableManager tracks statistics about all customers
-// and tables serving some dish in a CRP and can correctly sample what
-// table to remove a customer from and what table to join
-struct CRPTableManager {
-  CRPTableManager() : customers(), tables() {}
-
-  inline unsigned num_tables() const {
-    return tables;
-  }
-
-  inline unsigned num_customers() const {
-    return customers;
-  }
-
-  inline void create_table() {
-    h.increment(1);
-    ++tables;
-    ++customers;
-  }
-
-  // seat a customer at a table proportional to the number of customers seated at a table, less the discount
-  // *new tables are never created by this function!
-  inline void share_table(const double discount, MT19937* rng) {
-    const double z = customers - discount * num_tables();
-    double r = z * rng->next();
-    const CRPHistogram::const_iterator end = h.end();
-    CRPHistogram::const_iterator it = h.begin();
-    for (; it != end; ++it) {
-      // it->first = number of customers at table
-      // it->second = number of such tables
-      double thresh = (it->first - discount) * it->second;
-      if (thresh > r) break;
-      r -= thresh;
-    }
-    h.move(it->first, it->first + 1);
-    ++customers;
-  }
-
-  // randomly sample a customer
-  // *tables may be removed
-  // returns -1 if a table is removed, 0 otherwise
-  inline int remove_customer(MT19937* rng) {
-    int r = rng->next() * num_customers();
-    const CRPHistogram::const_iterator end = h.end();
-    CRPHistogram::const_iterator it = h.begin();
-    for (; it != end; ++it) {
-      int thresh = it->first * it->second;
-      if (thresh > r) break;
-      r -= thresh;
-    }
-    --customers;
-    const unsigned tc = it->first;
-    if (tc == 1) {
-      h.decrement(1);
-      --tables;
-      return -1;
-    } else {
-      h.move(tc, tc - 1);
-      return 0;
-    }
-  }
-
-  typedef CRPHistogram::const_iterator const_iterator;
-  const_iterator begin() const { return h.begin(); }
-  const_iterator end() const { return h.end(); }
-
-  unsigned customers;
-  unsigned tables;
-  CRPHistogram h;
-};
-
-std::ostream& operator<<(std::ostream& os, const CRPTableManager& tm) {
-  os << '[' << tm.num_customers() << " total customers at " << tm.num_tables() << " tables ||| ";
-  for (CRPHistogram::const_iterator it = tm.begin(); it != tm.end(); ++it) {
-    if (it != tm.h.begin()) os << "  --  ";
-    os << '(' << it->first << ") x " << it->second;
-  }
-  return os << ']';
-}
-
-#endif
diff --git a/utils/crp_test.cc b/utils/crp_test.cc
deleted file mode 100644
index 0cdb7afd..00000000
--- a/utils/crp_test.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-#include <iostream>
-#include <vector>
-#include <string>
-
-#define BOOST_TEST_MODULE CrpTest
-#include <boost/test/unit_test.hpp>
-#include <boost/test/floating_point_comparison.hpp>
-
-#include "ccrp.h"
-#include "sampler.h"
-
-using namespace std;
-
-MT19937 rng;
-
-BOOST_AUTO_TEST_CASE(Dist) {
-  CCRP<string> crp(0.1, 5);
-  double un = 0.25;
-  int tt = 0;
-  tt += crp.increment("hi", un, &rng);
-  tt += crp.increment("foo", un, &rng);
-  tt += crp.increment("bar", un, &rng);
-  tt += crp.increment("bar", un, &rng);
-  tt += crp.increment("bar", un, &rng);
-  tt += crp.increment("bar", un, &rng);
-  tt += crp.increment("bar", un, &rng);
-  tt += crp.increment("bar", un, &rng);
-  tt += crp.increment("bar", un, &rng);
-  cout << "tt=" << tt << endl;
-  cout << crp << endl;
-  cout << "  P(bar)=" << crp.prob("bar", un) << endl;
-  cout << "  P(hi)=" << crp.prob("hi", un) << endl;
-  cout << "  P(baz)=" << crp.prob("baz", un) << endl;
-  cout << "  P(foo)=" << crp.prob("foo", un) << endl;
-  double x = crp.prob("bar", un) + crp.prob("hi", un) + crp.prob("baz", un) + crp.prob("foo", un);
-  cout << "    tot=" << x << endl;
-  BOOST_CHECK_CLOSE(1.0, x, 1e-6);
-  tt += crp.decrement("hi", &rng);
-  tt += crp.decrement("bar", &rng);
-  cout << crp << endl;
-  tt += crp.decrement("bar", &rng);
-  cout << crp << endl;
-  cout << "tt=" << tt << endl;
-}
-
-BOOST_AUTO_TEST_CASE(Exchangability) {
-    double tot = 0;
-    double xt = 0;
-    CCRP<int> crp(0.5, 1.0);
-    int cust = 10;
-    vector<int> hist(cust + 1, 0);
-    for (int i = 0; i < cust; ++i) { crp.increment(1, 1.0, &rng); }
-    const int samples = 100000;
-    const bool simulate = true;
-    for (int k = 0; k < samples; ++k) {
-      if (!simulate) {
-        crp.clear();
-        for (int i = 0; i < cust; ++i) { crp.increment(1, 1.0, &rng); }
-      } else {
-        int da = rng.next() * cust;
-        bool a = rng.next() < 0.5;
-        if (a) {
-          for (int i = 0; i < da; ++i) { crp.increment(1, 1.0, &rng); }
-          for (int i = 0; i < da; ++i) { crp.decrement(1, &rng); }
-          xt += 1.0;
-        } else {
-          for (int i = 0; i < da; ++i) { crp.decrement(1, &rng); }
-          for (int i = 0; i < da; ++i) { crp.increment(1, 1.0, &rng); }
-        }
-      }
-      int c = crp.num_tables(1);
-      ++hist[c];
-      tot += c;
-    }
-    BOOST_CHECK_EQUAL(cust, crp.num_customers());
-    cerr << "P(a) = " << (xt / samples) << endl;
-    cerr << "E[num tables] = " << (tot / samples) << endl;
-    double error = fabs((tot / samples) - 5.4);
-    cerr << "  error = " << error << endl;
-    BOOST_CHECK_MESSAGE(error < 0.1, "error is too big = " << error);  // it's possible for this to fail, but
-                            // very, very unlikely
-    for (int i = 1; i <= cust; ++i)
-      cerr << i << ' ' << (hist[i]) << endl;
-}
-
-BOOST_AUTO_TEST_CASE(LP) {
-  CCRP<string> crp(1,1,1,1,0.1,50.0);
-  crp.increment("foo", 1.0, &rng);
-  cerr << crp.log_crp_prob() << endl;
-}
-
diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h
index 9fe00459..590a60c4 100644
--- a/utils/fast_sparse_vector.h
+++ b/utils/fast_sparse_vector.h
@@ -522,7 +522,7 @@ const FastSparseVector<T> operator-(const FastSparseVector<T>& x, const FastSpar
 }
 
 template <class T>
-std::size_t hash_value(FastSparseVector<T> const& x) {
+std::size_t hash_value(FastSparseVector<T> const&) {
   assert(!"not implemented");
   return 0;
 }
diff --git a/utils/gamma_poisson.h b/utils/gamma_poisson.h
deleted file mode 100644
index fec763f6..00000000
--- a/utils/gamma_poisson.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef _GAMMA_POISSON_H_
-#define _GAMMA_POISSON_H_
-
-#include <m.h>
-
-// http://en.wikipedia.org/wiki/Conjugate_prior
-struct GammaPoisson {
-  GammaPoisson(double shape, double rate) :
-    a(shape), b(rate), n(), marginal() {}
-
-  double prob(unsigned x) const {
-    return exp(Md::log_negative_binom(x, a + marginal, 1.0 - (b + n) / (1 + b + n)));
-  }
-
-  void increment(unsigned x) {
-    ++n;
-    marginal += x;
-  }
-
-  void decrement(unsigned x) {
-    --n;
-    marginal -= x;
-  }
-
-  double log_likelihood() const {
-    return 0;
-  }
-
-  double a, b;
-  int n, marginal;
-};
-
-#endif
diff --git a/utils/mfcr.h b/utils/mfcr.h
deleted file mode 100644
index 4aacb567..00000000
--- a/utils/mfcr.h
+++ /dev/null
@@ -1,370 +0,0 @@
-#ifndef _MFCR_H_
-#define _MFCR_H_
-
-#include <algorithm>
-#include <numeric>
-#include <cassert>
-#include <cmath>
-#include <list>
-#include <iostream>
-#include <vector>
-#include <iterator>
-#include <tr1/unordered_map>
-#include <boost/functional/hash.hpp>
-#include "sampler.h"
-#include "slice_sampler.h"
-#include "m.h"
-
-struct TableCount {
-  TableCount() : count(), floor() {}
-  TableCount(int c, int f) : count(c), floor(f) {
-    assert(f >= 0);
-  }
-  int count;               // count or delta (may be 0, <0, or >0)
-  unsigned char floor;     // from which floor?
-};
- 
-std::ostream& operator<<(std::ostream& o, const TableCount& tc) {
-  return o << "[c=" << tc.count << " floor=" << static_cast<unsigned int>(tc.floor) << ']';
-}
-
-// Multi-Floor Chinese Restaurant as proposed by Wood & Teh (AISTATS, 2009) to simulate
-// graphical Pitman-Yor processes.
-// http://jmlr.csail.mit.edu/proceedings/papers/v5/wood09a/wood09a.pdf
-//
-// Implementation is based on Blunsom, Cohn, Goldwater, & Johnson (ACL 2009) and code
-// referenced therein.
-// http://www.aclweb.org/anthology/P/P09/P09-2085.pdf
-//
-template <unsigned Floors, typename Dish, typename DishHash = boost::hash<Dish> >
-class MFCR {
- public:
-
-  MFCR(double d, double strength) :
-    num_tables_(),
-    num_customers_(),
-    discount_(d),
-    strength_(strength),
-    discount_prior_strength_(std::numeric_limits<double>::quiet_NaN()),
-    discount_prior_beta_(std::numeric_limits<double>::quiet_NaN()),
-    strength_prior_shape_(std::numeric_limits<double>::quiet_NaN()),
-    strength_prior_rate_(std::numeric_limits<double>::quiet_NaN()) { check_hyperparameters(); }
-
-  MFCR(double discount_strength, double discount_beta, double strength_shape, double strength_rate, double d = 0.9, double strength = 10.0) :
-    num_tables_(),
-    num_customers_(),
-    discount_(d),
-    strength_(strength),
-    discount_prior_strength_(discount_strength),
-    discount_prior_beta_(discount_beta),
-    strength_prior_shape_(strength_shape),
-    strength_prior_rate_(strength_rate) { check_hyperparameters(); }
-
-  void check_hyperparameters() {
-    if (discount_ < 0.0 || discount_ >= 1.0) {
-      std::cerr << "Bad discount: " << discount_ << std::endl;
-      abort();
-    }
-    if (strength_ <= -discount_) {
-      std::cerr << "Bad strength: " << strength_ << " (discount=" << discount_ << ")" << std::endl;
-      abort();
-    }
-  }
-
-  double discount() const { return discount_; }
-  double strength() const { return strength_; }
-  void set_hyperparameters(double d, double s) {
-    discount_ = d; strength_ = s;
-    check_hyperparameters();
-  }
-  void set_discount(double d) { discount_ = d; check_hyperparameters(); }
-  void set_strength(double a) { strength_ = a; check_hyperparameters(); }
-
-  bool has_discount_prior() const {
-    return !std::isnan(discount_prior_strength_);
-  }
-
-  bool has_strength_prior() const {
-    return !std::isnan(strength_prior_shape_);
-  }
-
-  void clear() {
-    num_tables_ = 0;
-    num_customers_ = 0;
-    dish_locs_.clear();
-  }
-
-  unsigned num_tables() const {
-    return num_tables_;
-  }
-
-  unsigned num_tables(const Dish& dish) const {
-    const typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.find(dish);
-    if (it == dish_locs_.end()) return 0;
-    return it->second.table_counts_.size();
-  }
-
-  // this is not terribly efficient but it should not typically be necessary to execute this query
-  unsigned num_tables(const Dish& dish, const unsigned floor) const {
-    const typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.find(dish);
-    if (it == dish_locs_.end()) return 0;
-    unsigned c = 0;
-    for (typename std::list<TableCount>::const_iterator i = it->second.table_counts_.begin();
-         i != it->second.table_counts_.end(); ++i) {
-      if (i->floor == floor) ++c;
-    }
-    return c;
-  }
-
-  unsigned num_customers() const {
-    return num_customers_;
-  }
-
-  unsigned num_customers(const Dish& dish) const {
-    const typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.find(dish);
-    if (it == dish_locs_.end()) return 0;
-    return it->total_dish_count_;
-  }
-
-  // returns (delta, floor) indicating whether a new table (delta) was opened and on which floor
-  template <class InputIterator, class InputIterator2>
-  TableCount increment(const Dish& dish, InputIterator p0s, InputIterator2 lambdas, MT19937* rng) {
-    DishLocations& loc = dish_locs_[dish];
-    // marg_p0 = marginal probability of opening a new table on any floor with label dish
-    typedef typename std::iterator_traits<InputIterator>::value_type F;
-    const F marg_p0 = std::inner_product(p0s, p0s + Floors, lambdas, F(0.0));
-    assert(marg_p0 <= F(1.0001));
-    int floor = -1;
-    bool share_table = false;
-    if (loc.total_dish_count_) {
-      const F p_empty = F(strength_ + num_tables_ * discount_) * marg_p0;
-      const F p_share = F(loc.total_dish_count_ - loc.table_counts_.size() * discount_);
-      share_table = rng->SelectSample(p_empty, p_share);
-    }
-    if (share_table) {
-      // this can be done with doubles since P0 (which may be tiny) is not involved
-      double r = rng->next() * (loc.total_dish_count_ - loc.table_counts_.size() * discount_);
-      for (typename std::list<TableCount>::iterator ti = loc.table_counts_.begin();
-           ti != loc.table_counts_.end(); ++ti) {
-        r -= ti->count - discount_;
-        if (r <= 0.0) {
-          ++ti->count;
-          floor = ti->floor;
-          break;
-        }
-      }
-      if (r > 0.0) {
-        std::cerr << "Serious error: r=" << r << std::endl;
-        Print(&std::cerr);
-        assert(r <= 0.0);
-      }
-    } else { // sit at currently empty table -- must sample what floor
-      if (Floors == 1) {
-        floor = 0;
-      } else {
-        F r = F(rng->next()) * marg_p0;
-        for (unsigned i = 0; i < Floors; ++i) {
-          r -= (*p0s) * (*lambdas);
-          ++p0s;
-          ++lambdas;
-          if (r <= F(0.0)) {
-            floor = i;
-            break;
-          }
-        }
-      }
-      assert(floor >= 0);
-      loc.table_counts_.push_back(TableCount(1, floor));
-      ++num_tables_;
-    }
-    ++loc.total_dish_count_;
-    ++num_customers_;
-    return (share_table ? TableCount(0, floor) : TableCount(1, floor));
-  }
-
-  // returns first = -1 or 0, indicating whether a table was closed, and on what floor (second)
-  TableCount decrement(const Dish& dish, MT19937* rng) {
-    DishLocations& loc = dish_locs_[dish];
-    assert(loc.total_dish_count_);
-    int floor = -1;
-    int delta = 0;
-    if (loc.total_dish_count_ == 1) {
-      floor = loc.table_counts_.front().floor;
-      dish_locs_.erase(dish);
-      --num_tables_;
-      --num_customers_;
-      delta = -1;
-    } else {
-      // sample customer to remove UNIFORMLY. that is, do NOT use the d
-      // here. if you do, it will introduce (unwanted) bias!
-      double r = rng->next() * loc.total_dish_count_;
-      --loc.total_dish_count_;
-      --num_customers_;
-      for (typename std::list<TableCount>::iterator ti = loc.table_counts_.begin();
-           ti != loc.table_counts_.end(); ++ti) {
-        r -= ti->count;
-        if (r <= 0.0) {
-          floor = ti->floor;
-          if ((--ti->count) == 0) {
-            --num_tables_;
-            delta = -1;
-            loc.table_counts_.erase(ti);
-          }
-          break;
-        }
-      }
-      if (r > 0.0) {
-        std::cerr << "Serious error: r=" << r << std::endl;
-        Print(&std::cerr);
-        assert(r <= 0.0);
-      }
-    }
-    return TableCount(delta, floor);
-  }
-
-  template <class InputIterator, class InputIterator2>
-  typename std::iterator_traits<InputIterator>::value_type prob(const Dish& dish, InputIterator p0s, InputIterator2 lambdas) const {
-    typedef typename std::iterator_traits<InputIterator>::value_type F;
-    const F marg_p0 = std::inner_product(p0s, p0s + Floors, lambdas, F(0.0));
-    assert(marg_p0 <= F(1.0001));
-    const typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.find(dish);
-    const F r = F(num_tables_ * discount_ + strength_);
-    if (it == dish_locs_.end()) {
-      return r * marg_p0 / F(num_customers_ + strength_);
-    } else {
-      return (F(it->second.total_dish_count_ - discount_ * it->second.table_counts_.size()) + F(r * marg_p0)) /
-               F(num_customers_ + strength_);
-    }
-  }
-
-  double log_crp_prob() const {
-    return log_crp_prob(discount_, strength_);
-  }
-
-  // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process
-  // does not include draws from G_w's
-  double log_crp_prob(const double& discount, const double& strength) const {
-    double lp = 0.0;
-    if (has_discount_prior())
-      lp = Md::log_beta_density(discount, discount_prior_strength_, discount_prior_beta_);
-    if (has_strength_prior())
-      lp += Md::log_gamma_density(strength + discount, strength_prior_shape_, strength_prior_rate_);
-    assert(lp <= 0.0);
-    if (num_customers_) {
-      if (discount > 0.0) {
-        const double r = lgamma(1.0 - discount);
-        if (strength)
-          lp += lgamma(strength) - lgamma(strength / discount);
-        lp += - lgamma(strength + num_customers_)
-             + num_tables_ * log(discount) + lgamma(strength / discount + num_tables_);
-        assert(std::isfinite(lp));
-        for (typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.begin();
-             it != dish_locs_.end(); ++it) {
-          const DishLocations& cur = it->second;
-          for (std::list<TableCount>::const_iterator ti = cur.table_counts_.begin(); ti != cur.table_counts_.end(); ++ti) {
-            lp += lgamma(ti->count - discount) - r;
-          }
-        }
-      } else if (!discount) { // discount == 0.0
-        lp += lgamma(strength) + num_tables_ * log(strength) - lgamma(strength + num_tables_);
-        assert(std::isfinite(lp));
-        for (typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.begin();
-             it != dish_locs_.end(); ++it) {
-          const DishLocations& cur = it->second;
-          lp += lgamma(cur.table_counts_.size());
-        }
-      } else {
-        assert(!"discount less than 0 detected!");
-      }
-    }
-    assert(std::isfinite(lp));
-    return lp;
-  }
-
-  void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) {
-    assert(has_discount_prior() || has_strength_prior());
-    DiscountResampler dr(*this);
-    StrengthResampler sr(*this);
-    for (int iter = 0; iter < nloop; ++iter) {
-      if (has_strength_prior()) {
-        strength_ = slice_sampler1d(sr, strength_, *rng, -discount_,
-                               std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
-      }
-      if (has_discount_prior()) {
-        double min_discount = std::numeric_limits<double>::min();
-        if (strength_ < 0.0) min_discount -= strength_;
-        discount_ = slice_sampler1d(dr, discount_, *rng, min_discount,
-                               1.0, 0.0, niterations, 100*niterations);
-      }
-    }
-    strength_ = slice_sampler1d(sr, strength_, *rng, -discount_,
-                             std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
-  }
-
-  struct DiscountResampler {
-    DiscountResampler(const MFCR& crp) : crp_(crp) {}
-    const MFCR& crp_;
-    double operator()(const double& proposed_d) const {
-      return crp_.log_crp_prob(proposed_d, crp_.strength_);
-    }
-  };
-
-  struct StrengthResampler {
-    StrengthResampler(const MFCR& crp) : crp_(crp) {}
-    const MFCR& crp_;
-    double operator()(const double& proposediscount_strength) const {
-      return crp_.log_crp_prob(crp_.discount_, proposediscount_strength);
-    }
-  };
-
-  struct DishLocations {
-    DishLocations() : total_dish_count_() {}
-    unsigned total_dish_count_;          // customers at all tables with this dish
-    std::list<TableCount> table_counts_; // list<> gives O(1) deletion and insertion, which we want
-                                         // .size() is the number of tables for this dish
-  };
-
-  void Print(std::ostream* out) const {
-    (*out) << "MFCR<" << Floors << ">(d=" << discount_ << ",strength=" << strength_ << ") customers=" << num_customers_ << std::endl;
-    for (typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.begin();
-         it != dish_locs_.end(); ++it) {
-      (*out) << it->first << " (" << it->second.total_dish_count_ << " on " << it->second.table_counts_.size() << " tables): ";
-      for (typename std::list<TableCount>::const_iterator i = it->second.table_counts_.begin();
-           i != it->second.table_counts_.end(); ++i) {
-        (*out) << " " << *i;
-      }
-      (*out) << std::endl;
-    }
-  }
-
-  typedef typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator const_iterator;
-  const_iterator begin() const {
-    return dish_locs_.begin();
-  }
-  const_iterator end() const {
-    return dish_locs_.end();
-  }
-
-  unsigned num_tables_;
-  unsigned num_customers_;
-  std::tr1::unordered_map<Dish, DishLocations, DishHash> dish_locs_;
-
-  double discount_;
-  double strength_;
-
-  // optional beta prior on discount_ (NaN if no prior)
-  double discount_prior_strength_;
-  double discount_prior_beta_;
-
-  // optional gamma prior on strength_ (NaN if no prior)
-  double strength_prior_shape_;
-  double strength_prior_rate_;
-};
-
-template <unsigned N,typename T,typename H>
-std::ostream& operator<<(std::ostream& o, const MFCR<N,T,H>& c) {
-  c.Print(&o);
-  return o;
-}
-
-#endif
diff --git a/utils/mfcr_test.cc b/utils/mfcr_test.cc
deleted file mode 100644
index 29a1a2ce..00000000
--- a/utils/mfcr_test.cc
+++ /dev/null
@@ -1,72 +0,0 @@
-#include "mfcr.h"
-
-#include <iostream>
-#include <cassert>
-#include <cmath>
-
-#define BOOST_TEST_MODULE MFCRTest
-#include <boost/test/unit_test.hpp>
-#include <boost/test/floating_point_comparison.hpp>
-
-#include "sampler.h"
-
-using namespace std;
-
-BOOST_AUTO_TEST_CASE(Exchangability) {
-  MT19937 r;
-  MT19937* rng = &r;
-  MFCR<2, int> crp(0.5, 3.0);
-  vector<double> lambdas(2);
-  vector<double> p0s(2);
-  lambdas[0] = 0.2;
-  lambdas[1] = 0.8;
-  p0s[0] = 1.0;
-  p0s[1] = 1.0;
-
-  double tot = 0;
-  double tot2 = 0;
-  double xt = 0;
-  int cust = 10;
-  vector<int> hist(cust + 1, 0), hist2(cust + 1, 0);
-  for (int i = 0; i < cust; ++i) { crp.increment(1, p0s.begin(), lambdas.begin(), rng); }
-  const int samples = 100000;
-  const bool simulate = true;
-  for (int k = 0; k < samples; ++k) {
-    if (!simulate) {
-      crp.clear();
-      for (int i = 0; i < cust; ++i) { crp.increment(1, p0s.begin(), lambdas.begin(), rng); }
-    } else {
-      int da = rng->next() * cust;
-      bool a = rng->next() < 0.45;
-      if (a) {
-        for (int i = 0; i < da; ++i) { crp.increment(1, p0s.begin(), lambdas.begin(), rng); }
-        for (int i = 0; i < da; ++i) { crp.decrement(1, rng); }
-        xt += 1.0;
-      } else {
-        for (int i = 0; i < da; ++i) { crp.decrement(1, rng); }
-        for (int i = 0; i < da; ++i) { crp.increment(1, p0s.begin(), lambdas.begin(), rng); }
-      }
-    }
-    int c = crp.num_tables(1);
-    ++hist[c];
-    tot += c;
-    int c2 = crp.num_tables(1,0);  // tables on floor 0 with dish 1
-    ++hist2[c2];
-    tot2 += c2;
-  }
-  cerr << cust << " = " << crp.num_customers() << endl;
-  cerr << "P(a) = " << (xt / samples) << endl;
-  cerr << "E[num tables] = " << (tot / samples) << endl;
-  double error = fabs((tot / samples) - 6.894);
-  cerr << "   error = " << error << endl;
-  for (int i = 1; i <= cust; ++i)
-    cerr << i << ' ' << (hist[i]) << endl;
-  cerr << "E[num tables on floor 0] = " << (tot2 / samples) << endl;
-  double error2 = fabs((tot2 / samples) - 1.379);
-  cerr << "  error2 = " << error2 << endl;
-  for (int i = 1; i <= cust; ++i)
-    cerr << i << ' ' << (hist2[i]) << endl;
-  assert(error < 0.05);   // these can fail with very low probability
-  assert(error2 < 0.05);
-};
-
diff --git a/utils/sampler.h b/utils/sampler.h
index 3e4a4086..88e1856c 100644
--- a/utils/sampler.h
+++ b/utils/sampler.h
@@ -19,7 +19,7 @@
 
 #include "prob.h"
 
-template <typename F> struct SampleSet;
+template <typename F> class SampleSet;
 
 template <typename RNG>
 struct RandomNumberGenerator {
diff --git a/utils/slice_sampler.h b/utils/slice_sampler.h
deleted file mode 100644
index aa48a169..00000000
--- a/utils/slice_sampler.h
+++ /dev/null
@@ -1,191 +0,0 @@
-//! slice-sampler.h is an MCMC slice sampler
-//!
-//! Mark Johnson, 1st August 2008
-
-#ifndef SLICE_SAMPLER_H
-#define SLICE_SAMPLER_H
-
-#include <algorithm>
-#include <cassert>
-#include <cmath>
-#include <iostream>
-#include <limits>
-
-//! slice_sampler_rfc_type{} returns the value of a user-specified
-//! function if the argument is within range, or - infinity otherwise
-//
-template <typename F, typename Fn, typename U>
-struct slice_sampler_rfc_type {
-  F min_x, max_x;
-  const Fn& f;
-  U max_nfeval, nfeval;
-  slice_sampler_rfc_type(F min_x, F max_x, const Fn& f, U max_nfeval) 
-    : min_x(min_x), max_x(max_x), f(f), max_nfeval(max_nfeval), nfeval(0) { }
-    
-  F operator() (F x) {
-    if (min_x < x && x < max_x) {
-      assert(++nfeval <= max_nfeval);
-      F fx = f(x);
-      assert(std::isfinite(fx));
-      return fx;
-    }
-      return -std::numeric_limits<F>::infinity();
-  }
-};  // slice_sampler_rfc_type{}
-
-//! slice_sampler1d() implements the univariate "range doubling" slice sampler
-//! described in Neal (2003) "Slice Sampling", The Annals of Statistics 31(3), 705-767.
-//
-template <typename F, typename LogF, typename Uniform01>
-F slice_sampler1d(const LogF& logF0,               //!< log of function to sample
-		  F x,                             //!< starting point
-		  Uniform01& u01,                  //!< uniform [0,1) random number generator
-		  F min_x = -std::numeric_limits<F>::infinity(),  //!< minimum value of support
-		  F max_x = std::numeric_limits<F>::infinity(),   //!< maximum value of support
-		  F w = 0.0,                       //!< guess at initial width
-		  unsigned nsamples=1,             //!< number of samples to draw
-		  unsigned max_nfeval=200)         //!< max number of function evaluations
-{
-  typedef unsigned U;
-  slice_sampler_rfc_type<F,LogF,U> logF(min_x, max_x, logF0, max_nfeval);
-
-  assert(std::isfinite(x));
-
-  if (w <= 0.0) {                           // set w to a default width 
-    if (min_x > -std::numeric_limits<F>::infinity() && max_x < std::numeric_limits<F>::infinity())
-      w = (max_x - min_x)/4;
-    else
-      w = std::max(((x < 0.0) ? -x : x)/4, (F) 0.1);
-  }
-  assert(std::isfinite(w));
-
-  F logFx = logF(x);
-  for (U sample = 0; sample < nsamples; ++sample) {
-    F logY = logFx + log(u01()+1e-100);     //! slice logFx at this value
-    assert(std::isfinite(logY));
-
-    F xl = x - w*u01();                     //! lower bound on slice interval
-    F logFxl = logF(xl);
-    F xr = xl + w;                          //! upper bound on slice interval
-    F logFxr = logF(xr);
-
-    while (logY < logFxl || logY < logFxr)  // doubling procedure
-      if (u01() < 0.5) 
-	logFxl = logF(xl -= xr - xl);
-      else
-	logFxr = logF(xr += xr - xl);
-	
-    F xl1 = xl;
-    F xr1 = xr;
-    while (true) {                          // shrinking procedure
-      F x1 = xl1 + u01()*(xr1 - xl1);
-      if (logY < logF(x1)) {
-	F xl2 = xl;                         // acceptance procedure
-	F xr2 = xr; 
-	bool d = false;
-	while (xr2 - xl2 > 1.1*w) {
-	  F xm = (xl2 + xr2)/2;
-	  if ((x < xm && x1 >= xm) || (x >= xm && x1 < xm))
-	    d = true;
-	  if (x1 < xm)
-	    xr2 = xm;
-	  else
-	    xl2 = xm;
-	  if (d && logY >= logF(xl2) && logY >= logF(xr2))
-	    goto unacceptable;
-	}
-	x = x1;
-	goto acceptable;
-      }
-      goto acceptable;
-    unacceptable:
-      if (x1 < x)                           // rest of shrinking procedure
-	xl1 = x1;
-      else 
-	xr1 = x1;
-    }
-  acceptable:
-    w = (4*w + (xr1 - xl1))/5;              // update width estimate
-  }
-  return x;
-}
-
-/*
-//! slice_sampler1d() implements a 1-d MCMC slice sampler.
-//! It should be correct for unimodal distributions, but
-//! not for multimodal ones.
-//
-template <typename F, typename LogP, typename Uniform01>
-F slice_sampler1d(const LogP& logP,     //!< log of distribution to sample
-		  F x,                  //!< initial sample
-		  Uniform01& u01,       //!< uniform random number generator
-		  F min_x = -std::numeric_limits<F>::infinity(),  //!< minimum value of support
-		  F max_x = std::numeric_limits<F>::infinity(),   //!< maximum value of support
-		  F w = 0.0,            //!< guess at initial width
-		  unsigned nsamples=1,  //!< number of samples to draw
-		  unsigned max_nfeval=200)  //!< max number of function evaluations
-{
-  typedef unsigned U;
-  assert(std::isfinite(x));
-  if (w <= 0.0) {
-    if (min_x > -std::numeric_limits<F>::infinity() && max_x < std::numeric_limits<F>::infinity())
-      w = (max_x - min_x)/4;
-    else
-      w = std::max(((x < 0.0) ? -x : x)/4, 0.1);
-  }
-  // TRACE4(x, min_x, max_x, w);
-  F logPx = logP(x);
-  assert(std::isfinite(logPx));
-  U nfeval = 1;
-  for (U sample = 0; sample < nsamples; ++sample) {
-    F x0 = x;
-    F logU = logPx + log(u01()+1e-100);
-    assert(std::isfinite(logU));
-    F r = u01();
-    F xl = std::max(min_x, x - r*w);
-    F xr = std::min(max_x, x + (1-r)*w);
-    // TRACE3(x, logPx, logU);
-    while (xl > min_x && logP(xl) > logU) {
-      xl -= w;
-      w *= 2;
-      ++nfeval;
-      if (nfeval >= max_nfeval)
-	std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << std::endl;
-      assert(nfeval < max_nfeval);
-    }
-    xl = std::max(xl, min_x);
-    while (xr < max_x && logP(xr) > logU) {
-      xr += w;
-      w *= 2;
-      ++nfeval;
-      if (nfeval >= max_nfeval)
-	std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xr = " << xr << std::endl;
-      assert(nfeval < max_nfeval);
-    }
-    xr = std::min(xr, max_x);
-    while (true) {
-      r = u01();
-      x = r*xl + (1-r)*xr;
-      assert(std::isfinite(x));
-      logPx = logP(x);
-      // TRACE4(logPx, x, xl, xr);
-      assert(std::isfinite(logPx));
-      ++nfeval;
-      if (nfeval >= max_nfeval)
-	std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << ", xr = " << xr << ", x = " << x << std::endl;
-      assert(nfeval < max_nfeval);
-      if (logPx > logU)
-        break;
-      else if (x > x0)
-          xr = x;
-        else
-          xl = x;
-    }
-    // w = (4*w + (xr-xl))/5;   // gradually adjust w
-  }
-  // TRACE2(logPx, x);
-  return x;
-}  // slice_sampler1d()
-*/
-
-#endif  // SLICE_SAMPLER_H
diff --git a/utils/small_vector.h b/utils/small_vector.h
index 894b1b32..c8a69927 100644
--- a/utils/small_vector.h
+++ b/utils/small_vector.h
@@ -66,7 +66,7 @@ class SmallVector {
   //TODO: figure out iterator traits to allow this to be selcted for any iterator range
   template <class I>
   SmallVector(I const* begin,I const* end) {
-    int s=end-begin;
+    unsigned s=end-begin;
     Alloc(s);
     if (s <= SV_MAX) {
       for (unsigned i = 0; i < s; ++i,++begin) data_.vals[i] = *begin;
diff --git a/utils/stringlib.h b/utils/stringlib.h
index 75772c4d..ff5dc89d 100644
--- a/utils/stringlib.h
+++ b/utils/stringlib.h
@@ -86,7 +86,7 @@ bool match_begin(Str const& str,Prefix const& prefix)
 // source will be returned as a string, target must be a sentence or
 // a lattice (in PLF format) and will be returned as a Lattice object
 void ParseTranslatorInput(const std::string& line, std::string* input, std::string* ref);
-struct Lattice;
+class Lattice;
 void ParseTranslatorInputLattice(const std::string& line, std::string* input, Lattice* ref);
 
 inline std::string Trim(const std::string& str, const std::string& dropChars = " \t") {
diff --git a/utils/unigram_pyp_lm.cc b/utils/unigram_pyp_lm.cc
deleted file mode 100644
index 30b9fde1..00000000
--- a/utils/unigram_pyp_lm.cc
+++ /dev/null
@@ -1,214 +0,0 @@
-#include <iostream>
-#include <tr1/memory>
-#include <queue>
-
-#include <boost/functional.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "corpus_tools.h"
-#include "m.h"
-#include "tdict.h"
-#include "sampler.h"
-#include "ccrp.h"
-#include "gamma_poisson.h"
-
-// A not very memory-efficient implementation of an 1-gram LM based on PYPs
-// as described in Y.-W. Teh. (2006) A Hierarchical Bayesian Language Model
-// based on Pitman-Yor Processes. In Proc. ACL.
-
-using namespace std;
-using namespace tr1;
-namespace po = boost::program_options;
-
-boost::shared_ptr<MT19937> prng;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("samples,n",po::value<unsigned>()->default_value(50),"Number of samples")
-        ("train,i",po::value<string>(),"Training data file")
-        ("test,T",po::value<string>(),"Test data file")
-        ("discount_prior_a,a",po::value<double>()->default_value(1.0), "discount ~ Beta(a,b): a=this")
-        ("discount_prior_b,b",po::value<double>()->default_value(1.0), "discount ~ Beta(a,b): b=this")
-        ("strength_prior_s,s",po::value<double>()->default_value(1.0), "strength ~ Gamma(s,r): s=this")
-        ("strength_prior_r,r",po::value<double>()->default_value(1.0), "strength ~ Gamma(s,r): r=this")
-        ("random_seed,S",po::value<uint32_t>(), "Random seed");
-  po::options_description clo("Command line options");
-  clo.add_options()
-        ("config", po::value<string>(), "Configuration file")
-        ("help", "Print this help message and exit");
-  po::options_description dconfig_options, dcmdline_options;
-  dconfig_options.add(opts);
-  dcmdline_options.add(opts).add(clo);
-  
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("config")) {
-    ifstream config((*conf)["config"].as<string>().c_str());
-    po::store(po::parse_config_file(config, dconfig_options), *conf);
-  }
-  po::notify(*conf);
-
-  if (conf->count("help") || (conf->count("train") == 0)) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-struct Histogram {
-  void increment(unsigned bin, unsigned delta = 1u) {
-    data[bin] += delta;
-  }
-  void decrement(unsigned bin, unsigned delta = 1u) {
-    data[bin] -= delta;
-  }
-  void move(unsigned from_bin, unsigned to_bin, unsigned delta = 1u) {
-    decrement(from_bin, delta);
-    increment(to_bin, delta);
-  }
-  map<unsigned, unsigned> data;
-  // SparseVector<unsigned> data;
-};
-
-// Lord Rothschild. 1986. THE DISTRIBUTION OF ENGLISH DICTIONARY WORD LENGTHS.
-// Journal of Statistical Planning and Inference 14 (1986) 311-322
-struct PoissonLengthUniformCharWordModel {
-  explicit PoissonLengthUniformCharWordModel(unsigned vocab_size) : plen(5,5), uc(-log(50)), llh() {}
-  void increment(WordID w, MT19937*) {
-    llh += log(prob(w)); // this isn't quite right
-    plen.increment(TD::Convert(w).size() - 1);
-  }
-  void decrement(WordID w, MT19937*) {
-    plen.decrement(TD::Convert(w).size() - 1);
-    llh -= log(prob(w)); // this isn't quite right
-  }
-  double prob(WordID w) const {
-    size_t len = TD::Convert(w).size();
-    return plen.prob(len - 1) * exp(uc * len);
-  }
-  double log_likelihood() const { return llh; }
-  void resample_hyperparameters(MT19937*) {}
-  GammaPoisson plen;
-  const double uc;
-  double llh;
-};
-
-// uniform base distribution (0-gram model)
-struct UniformWordModel {
-  explicit UniformWordModel(unsigned vocab_size) : p0(1.0 / vocab_size), draws() {}
-  void increment(WordID, MT19937*) { ++draws; }
-  void decrement(WordID, MT19937*) { --draws; assert(draws >= 0); }
-  double prob(WordID) const { return p0; } // all words have equal prob
-  double log_likelihood() const { return draws * log(p0); }
-  void resample_hyperparameters(MT19937*) {}
-  const double p0;
-  int draws;
-};
-
-// represents an Unigram LM
-template <class BaseGenerator>
-struct UnigramLM {
-  UnigramLM(unsigned vs, double da, double db, double ss, double sr) :
-      base(vs),
-      crp(da, db, ss, sr, 0.8, 1.0) {}
-  void increment(WordID w, MT19937* rng) {
-    const double backoff = base.prob(w);
-    if (crp.increment(w, backoff, rng))
-      base.increment(w, rng);
-  }
-  void decrement(WordID w, MT19937* rng) {
-    if (crp.decrement(w, rng))
-      base.decrement(w, rng);
-  }
-  double prob(WordID w) const {
-    const double backoff = base.prob(w);
-    return crp.prob(w, backoff);
-  }
-
-  double log_likelihood() const {
-    double llh = base.log_likelihood();
-    llh += crp.log_crp_prob();
-    return llh;
-  }
-
-  void resample_hyperparameters(MT19937* rng) {
-    crp.resample_hyperparameters(rng);
-    base.resample_hyperparameters(rng);
-  }
-
-  double discount_a, discount_b, strength_s, strength_r;
-  double d, strength;
-  BaseGenerator base;
-  CCRP<WordID> crp;
-};
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-
-  InitCommandLine(argc, argv, &conf);
-  const unsigned samples = conf["samples"].as<unsigned>();
-  if (conf.count("random_seed"))
-    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
-  else
-    prng.reset(new MT19937);
-  MT19937& rng = *prng;
-  vector<vector<WordID> > corpuse;
-  set<WordID> vocabe;
-  const WordID kEOS = TD::Convert("</s>");
-  cerr << "Reading corpus...\n";
-  CorpusTools::ReadFromFile(conf["train"].as<string>(), &corpuse, &vocabe);
-  cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n";
-  vector<vector<WordID> > test;
-  if (conf.count("test"))
-    CorpusTools::ReadFromFile(conf["test"].as<string>(), &test);
-  else
-    test = corpuse;
-#if 1
-  UnigramLM<PoissonLengthUniformCharWordModel> lm(vocabe.size(),
-#else
-  UnigramLM<UniformWordModel> lm(vocabe.size(),
-#endif
-                                 conf["discount_prior_a"].as<double>(),
-                                 conf["discount_prior_b"].as<double>(),
-                                 conf["strength_prior_s"].as<double>(),
-                                 conf["strength_prior_r"].as<double>());
-  for (unsigned SS=0; SS < samples; ++SS) {
-    for (unsigned ci = 0; ci < corpuse.size(); ++ci) {
-      const vector<WordID>& s = corpuse[ci];
-      for (unsigned i = 0; i <= s.size(); ++i) {
-        WordID w = (i < s.size() ? s[i] : kEOS);
-        if (SS > 0) lm.decrement(w, &rng);
-        lm.increment(w, &rng);
-      }
-      if (SS > 0) lm.decrement(kEOS, &rng);
-      lm.increment(kEOS, &rng);
-    }
-    cerr << "LLH=" << lm.log_likelihood() << "\t tables=" << lm.crp.num_tables() << " " << endl;
-    if (SS % 10 == 9) lm.resample_hyperparameters(&rng);
-  }
-  double llh = 0;
-  unsigned cnt = 0;
-  unsigned oovs = 0;
-  for (unsigned ci = 0; ci < test.size(); ++ci) {
-    const vector<WordID>& s = test[ci];
-    for (unsigned i = 0; i <= s.size(); ++i) {
-      WordID w = (i < s.size() ? s[i] : kEOS);
-      double lp = log(lm.prob(w)) / log(2);
-      if (i < s.size() && vocabe.count(w) == 0) {
-        cerr << "**OOV ";
-        ++oovs;
-        //lp = 0;
-      }
-      cerr << "p(" << TD::Convert(w) << ") = " << lp << endl;
-      llh -= lp;
-      cnt++;
-    }
-  }
-  cerr << "  Log_10 prob: " << (-llh * log(2) / log(10)) << endl;
-  cerr << "        Count: " << cnt << endl;
-  cerr << "         OOVs: " << oovs << endl;
-  cerr << "Cross-entropy: " << (llh / cnt) << endl;
-  cerr << "   Perplexity: " << pow(2, llh / cnt) << endl;
-  return 0;
-}
-
-- 
cgit v1.2.3


From c9c7536ebd387479c2d39c8f1fa91bc047e0cac5 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cab.ark.cs.cmu.edu>
Date: Tue, 2 Oct 2012 00:27:21 -0400
Subject: fix build

---
 configure.ac      | 1 -
 utils/Makefile.am | 6 ------
 2 files changed, 7 deletions(-)

(limited to 'utils')

diff --git a/configure.ac b/configure.ac
index 07ef9fe1..70e8e932 100644
--- a/configure.ac
+++ b/configure.ac
@@ -124,7 +124,6 @@ AC_CONFIG_FILES([klm/util/Makefile])
 AC_CONFIG_FILES([klm/lm/Makefile])
 AC_CONFIG_FILES([mira/Makefile])
 AC_CONFIG_FILES([dtrain/Makefile])
-AC_CONFIG_FILES([rst_parser/Makefile])
 
 AC_CONFIG_FILES([python/setup.py])
 
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 55d97354..3ad9d69e 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -45,18 +45,12 @@ m_test_SOURCES = m_test.cc
 m_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz
 dict_test_SOURCES = dict_test.cc
 dict_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz
-mfcr_test_SOURCES = mfcr_test.cc
-mfcr_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz
 weights_test_SOURCES = weights_test.cc
 weights_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz
-crp_test_SOURCES = crp_test.cc
-crp_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz
 logval_test_SOURCES = logval_test.cc
 logval_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz
 small_vector_test_SOURCES = small_vector_test.cc
 small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz
-unigram_pyp_lm_SOURCES = unigram_pyp_lm.cc
-unigram_pyp_lm_LDADD = libutils.a -lz
 
 ################################################################
 # do NOT NOT NOT add any other -I includes NO NO NO NO NO ######
-- 
cgit v1.2.3


From 21a8287a2e1451db41c35494647c7b8c3e7e5adc Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cab.ark.cs.cmu.edu>
Date: Mon, 15 Oct 2012 23:20:41 -0400
Subject: get rid of nested class that was causing header polution

---
 decoder/decoder.cc |  14 ++---
 decoder/ff.cc      |   4 +-
 decoder/hg.h       | 180 ++++++++++++++++++++---------------------------------
 decoder/hg_io.cc   |   4 +-
 utils/weights.cc   |   8 +--
 5 files changed, 83 insertions(+), 127 deletions(-)

(limited to 'utils')

diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index a69a6d05..47b298b9 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -871,13 +871,13 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
     if (rp.fid_summary) {
       if (summary_feature_type == kEDGE_PROB) {
         const prob_t z = forest.PushWeightsToGoal(1.0);
-        if (!isfinite(log(z)) || isnan(log(z))) {
+        if (!std::isfinite(log(z)) || std::isnan(log(z))) {
           cerr << "  " << passtr << " !!! Invalid partition detected, abandoning.\n";
         } else {
           for (int i = 0; i < forest.edges_.size(); ++i) {
             const double log_prob_transition = log(forest.edges_[i].edge_prob_); // locally normalized by the edge
                                                                               // head node by forest.PushWeightsToGoal
-            if (!isfinite(log_prob_transition) || isnan(log_prob_transition)) {
+            if (!std::isfinite(log_prob_transition) || std::isnan(log_prob_transition)) {
               cerr << "Edge: i=" << i << " got bad inside prob: " << *forest.edges_[i].rule_ << endl;
               abort();
             }
@@ -889,7 +889,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
       } else if (summary_feature_type == kNODE_RISK) {
         Hypergraph::EdgeProbs posts;
         const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts);
-        if (!isfinite(log(z)) || isnan(log(z))) {
+        if (!std::isfinite(log(z)) || std::isnan(log(z))) {
           cerr << "  " << passtr << " !!! Invalid partition detected, abandoning.\n";
         } else {
           for (int i = 0; i < forest.nodes_.size(); ++i) {
@@ -898,7 +898,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
             for (int j = 0; j < in_edges.size(); ++j)
               node_post += (posts[in_edges[j]] / z);
             const double log_np = log(node_post);
-            if (!isfinite(log_np) || isnan(log_np)) {
+            if (!std::isfinite(log_np) || std::isnan(log_np)) {
               cerr << "got bad posterior prob for node " << i << endl;
               abort();
             }
@@ -913,13 +913,13 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
       } else if (summary_feature_type == kEDGE_RISK) {
         Hypergraph::EdgeProbs posts;
         const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts);
-        if (!isfinite(log(z)) || isnan(log(z))) {
+        if (!std::isfinite(log(z)) || std::isnan(log(z))) {
           cerr << "  " << passtr << " !!! Invalid partition detected, abandoning.\n";
         } else {
           assert(posts.size() == forest.edges_.size());
           for (int i = 0; i < posts.size(); ++i) {
             const double log_np = log(posts[i] / z);
-            if (!isfinite(log_np) || isnan(log_np)) {
+            if (!std::isfinite(log_np) || std::isnan(log_np)) {
               cerr << "got bad posterior prob for node " << i << endl;
               abort();
             }
@@ -1090,7 +1090,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
           cerr << "DIFF. ERR! log_z < log_ref_z: " << log_z << " " << log_ref_z << endl;
           exit(1);
         }
-        assert(!isnan(log_ref_z));
+        assert(!std::isnan(log_ref_z));
         ref_exp -= full_exp;
         acc_vec += ref_exp;
         acc_obj += (log_z - log_ref_z);
diff --git a/decoder/ff.cc b/decoder/ff.cc
index 557e0b5f..008fcad4 100644
--- a/decoder/ff.cc
+++ b/decoder/ff.cc
@@ -175,7 +175,7 @@ void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta,
                                  Hypergraph::Edge* edge,
                                  FFState* context,
                                  prob_t* combination_cost_estimate) const {
-  edge->reset_info();
+  //edge->reset_info();
   context->resize(state_size_);
   if (state_size_ > 0) {
     memset(&(*context)[0], 0, state_size_);
@@ -203,7 +203,7 @@ void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta,
 
 void ModelSet::AddFinalFeatures(const FFState& state, Hypergraph::Edge* edge,SentenceMetadata const& smeta) const {
   assert(1 == edge->rule_->Arity());
-  edge->reset_info();
+  //edge->reset_info();
   for (int i = 0; i < models_.size(); ++i) {
     const FeatureFunction& ff = *models_[i];
     const void* ant_state = NULL;
diff --git a/decoder/hg.h b/decoder/hg.h
index 6d67f2fa..f53d2fd2 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -33,47 +33,20 @@
 // slow
 #undef HG_EDGES_TOPO_SORTED
 
-class Hypergraph;
-typedef boost::shared_ptr<Hypergraph> HypergraphP;
-
-// class representing an acyclic hypergraph
-//  - edges have 1 head, 0..n tails
-class Hypergraph {
-public:
-  Hypergraph() : is_linear_chain_(false) {}
+// SmallVector is a fast, small vector<int> implementation for sizes <= 2
+typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_
+typedef std::vector<int> EdgesVector; // indices in edges_
 
-  // SmallVector is a fast, small vector<int> implementation for sizes <= 2
-  typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_
-  typedef std::vector<int> EdgesVector; // indices in edges_
-
-  // TODO get rid of cat_?
-  // TODO keep cat_ and add span and/or state? :)
-  struct Node {
-    Node() : id_(), cat_() {}
-    int id_; // equal to this object's position in the nodes_ vector
-    WordID cat_;  // non-terminal category if <0, 0 if not set
-    WordID NT() const { return -cat_; }
-    EdgesVector in_edges_;   // an in edge is an edge with this node as its head.  (in edges come from the bottom up to us)  indices in edges_
-    EdgesVector out_edges_;  // an out edge is an edge with this node as its tail.  (out edges leave us up toward the top/goal). indices in edges_
-    void copy_fixed(Node const& o) { // nonstructural fields only - structural ones are managed by sorting/pruning/subsetting
-      cat_=o.cat_;
-    }
-    void copy_reindex(Node const& o,indices_after const& n2,indices_after const& e2) {
-      copy_fixed(o);
-      id_=n2[id_];
-      e2.reindex_push_back(o.in_edges_,in_edges_);
-      e2.reindex_push_back(o.out_edges_,out_edges_);
-    }
-  };
+enum {
+  NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF
+};
 
+namespace HG {
 
-  // TODO get rid of edge_prob_? (can be computed on the fly as the dot
-  // product of the weight vector and the feature values)
   struct Edge {
-//    int poplimit; //TODO: cube pruning per edge limit?  per node didn't work well at all.  also, inside cost + outside(node) is the same information i'd use to set a per-edge limit anyway - and nonmonotonicity in cube pruning may mean it's good to favor edge (in same node) w/ relatively worse score
     Edge() : i_(-1), j_(-1), prev_i_(-1), prev_j_(-1) {}
     Edge(int id,Edge const& copy_pod_from) : id_(id) { copy_pod(copy_pod_from); } // call copy_features yourself later.
-    Edge(int id,Edge const& copy_from,TailNodeVector const& tail) // fully inits - probably more expensive when push_back(Edge(...)) than setting after
+    Edge(int id,Edge const& copy_from,TailNodeVector const& tail) // fully inits - probably more expensive when push_back(Edge(...)) than sett
       : tail_nodes_(tail),id_(id) { copy_pod(copy_from);copy_features(copy_from); }
     inline int Arity() const { return tail_nodes_.size(); }
     int head_node_;               // refers to a position in nodes_
@@ -83,8 +56,6 @@ public:
     prob_t edge_prob_;             // dot product of weights and feat_values
     int id_;   // equal to this object's position in the edges_ vector
 
-    //FIXME: these span ids belong in Node, not Edge, right?  every node should have the same spans.
-
     // span info. typically, i_ and j_ refer to indices in the source sentence.
     // In synchronous parsing, i_ and j_ will refer to target sentence/lattice indices
     // while prev_i_ prev_j_ will refer to positions in the source.
@@ -97,54 +68,6 @@ public:
     short int j_;
     short int prev_i_;
     short int prev_j_;
-
-    void copy_info(Edge const& o) {
-#if USE_INFO_EDGE
-      set_info(o.info_.str()); // by convention, each person putting info here starts with a separator (e.g. space).  it's empty if nobody put any info there.
-#else
-      (void) o;
-#endif
-    }
-    void copy_pod(Edge const& o) {
-      rule_=o.rule_;
-      i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_;
-    }
-    void copy_features(Edge const& o) {
-      feature_values_=o.feature_values_;
-      copy_info(o);
-    }
-    void copy_fixed(Edge const& o) {
-      copy_pod(o);
-      copy_features(o);
-      edge_prob_ = o.edge_prob_;
-    }
-    void copy_reindex(Edge const& o,indices_after const& n2,indices_after const& e2) {
-      copy_fixed(o);
-      head_node_=n2[o.head_node_];
-      id_=e2[o.id_];
-      n2.reindex_push_back(o.tail_nodes_,tail_nodes_);
-    }
-
-#if USE_INFO_EDGE
-    std::ostringstream info_;
-    void set_info(std::string const& s) {
-      info_.str(s);
-      info_.seekp(0,std::ios_base::end);
-    }
-    Edge(Edge const& o) : head_node_(o.head_node_),tail_nodes_(o.tail_nodes_),rule_(o.rule_),feature_values_(o.feature_values_),edge_prob_(o.edge_prob_),id_(o.id_),i_(o.i_),j_(o.j_),prev_i_(o.prev_i_),prev_j_(o.prev_j_), info_(o.info_.str(),std::ios_base::ate) {
-//      info_.seekp(0,std::ios_base::end);
- }
-    void operator=(Edge const& o) {
-      head_node_ = o.head_node_; tail_nodes_ = o.tail_nodes_; rule_ = o.rule_; feature_values_ = o.feature_values_; edge_prob_ = o.edge_prob_; id_ = o.id_; i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_;
-      set_info(o.info_.str());
-    }
-    std::string info() const { return info_.str(); }
-    void reset_info() { info_.str(""); info_.clear(); }
-#else
-    std::string info() const { return std::string(); }
-    void reset_info() {  }
-    void set_info(std::string const& ) {  }
-#endif
     void show(std::ostream &o,unsigned mask=SPAN|RULE) const {
       o<<'{';
       if (mask&CATEGORY)
@@ -159,10 +82,6 @@ public:
         o<<' '<<feature_values_;
       if (mask&RULE)
         o<<' '<<rule_->AsString(mask&RULE_LHS);
-      if (USE_INFO_EDGE) {
-        std::string const& i=info();
-        if (mask&&!i.empty()) o << " |||"<<i; // remember, the initial space is expected as part of i
-      }
       o<<'}';
     }
     std::string show(unsigned mask=SPAN|RULE) const {
@@ -170,12 +89,28 @@ public:
       show(o,mask);
       return o.str();
     }
-    /* generic recursion re: child_handle=re(tail_nodes_[i],i,parent_handle)
-
-       FIXME: make kbest create a simple derivation-tree structure (could be a
-       hg), and replace the list-of-edges viterbi.h with a tree-structured one.
-       CreateViterbiHypergraph can do for 1best, though.
-    */
+    void copy_pod(Edge const& o) {
+      rule_=o.rule_;
+      i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_;
+    }
+    void copy_features(Edge const& o) {
+      feature_values_=o.feature_values_;
+    }
+    void copy_fixed(Edge const& o) {
+      copy_pod(o);
+      copy_features(o);
+      edge_prob_ = o.edge_prob_;
+    }
+    void copy_reindex(Edge const& o,indices_after const& n2,indices_after const& e2) {
+      copy_fixed(o);
+      head_node_=n2[o.head_node_];
+      id_=e2[o.id_];
+      n2.reindex_push_back(o.tail_nodes_,tail_nodes_);
+    }
+    // generic recursion re: child_handle=re(tail_nodes_[i],i,parent_handle)
+    //   FIXME: make kbest create a simple derivation-tree structure (could be a
+    //   hg), and replace the list-of-edges viterbi.h with a tree-structured one.
+    //   CreateViterbiHypergraph can do for 1best, though.
     template <class EdgeRecurse,class TEdgeHandle>
     std::string derivation_tree(EdgeRecurse const& re,TEdgeHandle const& eh,bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const {
       std::ostringstream o;
@@ -203,7 +138,43 @@ public:
     }
   };
 
-  // all this info ought to live in Node, but for some reason it's on Edges.
+  // TODO get rid of cat_?
+  // TODO keep cat_ and add span and/or state? :)
+  struct Node {
+    Node() : id_(), cat_() {}
+    int id_; // equal to this object's position in the nodes_ vector
+    WordID cat_;  // non-terminal category if <0, 0 if not set
+    WordID NT() const { return -cat_; }
+    EdgesVector in_edges_;   // an in edge is an edge with this node as its head.  (in edges come from the bottom up to us)  indices in edges_
+    EdgesVector out_edges_;  // an out edge is an edge with this node as its tail.  (out edges leave us up toward the top/goal). indices in edges_
+    void copy_fixed(Node const& o) { // nonstructural fields only - structural ones are managed by sorting/pruning/subsetting
+      cat_=o.cat_;
+    }
+    void copy_reindex(Node const& o,indices_after const& n2,indices_after const& e2) {
+      copy_fixed(o);
+      id_=n2[id_];
+      e2.reindex_push_back(o.in_edges_,in_edges_);
+      e2.reindex_push_back(o.out_edges_,out_edges_);
+    }
+  };
+
+} // namespace HG
+
+class Hypergraph;
+typedef boost::shared_ptr<Hypergraph> HypergraphP;
+// class representing an acyclic hypergraph
+//  - edges have 1 head, 0..n tails
+class Hypergraph {
+public:
+  Hypergraph() : is_linear_chain_(false) {}
+  typedef HG::Node Node;
+  typedef HG::Edge Edge;
+  typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_
+  typedef std::vector<int> EdgesVector; // indices in edges_
+  enum {
+    NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF
+  };
+
   // except for stateful models that have split nt,span, this should identify the node
   void SetNodeOrigin(int nodeid,NTSpan &r) const {
     Node const &n=nodes_[nodeid];
@@ -230,18 +201,9 @@ public:
     }
     return s;
   }
-  // 0 if none, -TD index otherwise (just like in rule)
   WordID NodeLHS(int nodeid) const {
     Node const &n=nodes_[nodeid];
     return n.NT();
-    /*
-    if (!n.in_edges_.empty()) {
-      Edge const& e=edges_[n.in_edges_.front()];
-      if (e.rule_)
-        return -e.rule_->lhs_;
-    }
-    return 0;
-    */
   }
 
   typedef std::vector<prob_t> EdgeProbs;
@@ -250,14 +212,8 @@ public:
   typedef std::vector<bool> NodeMask;
 
   std::string show_viterbi_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const;
-// builds viterbi hg and returns it formatted as a pretty string
-
-  enum {
-    NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF
-  };
 
   std::string show_first_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const;
-  // same as above, but takes in_edges_[0] all the way down - to make it viterbi cost (1-best), call ViterbiSortInEdges() first
 
   typedef Edge const* EdgeHandle;
   EdgeHandle operator()(int tailn,int /*taili*/,EdgeHandle /*parent*/) const {
@@ -334,7 +290,7 @@ public:
   Edge* AddEdge(Edge const& in_edge, const TailNodeVector& tail) {
     edges_.push_back(Edge(edges_.size(),in_edge));
     Edge* edge = &edges_.back();
-    edge->copy_features(in_edge);
+    edge->feature_values_ = in_edge.feature_values_;
     edge->tail_nodes_ = tail; // possibly faster than copying to Edge() constructed above then copying via push_back.  perhaps optimized it's the same.
     index_tails(*edge);
     return edge;
diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc
index 3a68a429..8f604c89 100644
--- a/decoder/hg_io.cc
+++ b/decoder/hg_io.cc
@@ -392,8 +392,8 @@ string HypergraphIO::AsPLF(const Hypergraph& hg, bool include_global_parentheses
         const Hypergraph::Edge& e = hg.edges_[hg.nodes_[i].out_edges_[j]];
         const string output = e.rule_->e_.size() ==2 ? Escape(TD::Convert(e.rule_->e_[1])) : EPS;
         double prob = log(e.edge_prob_);
-        if (isinf(prob)) { prob = -9e20; }
-        if (isnan(prob)) { prob = 0; }
+        if (std::isinf(prob)) { prob = -9e20; }
+        if (std::isnan(prob)) { prob = 0; }
         os << "('" << output << "'," << prob << "," << e.head_node_ - i << "),";
       }
       os << "),";
diff --git a/utils/weights.cc b/utils/weights.cc
index f56e2a20..575877b6 100644
--- a/utils/weights.cc
+++ b/utils/weights.cc
@@ -34,7 +34,7 @@ void Weights::InitFromFile(const string& filename,
     int weight_count = 0;
     bool fl = false;
     string buf;
-    weight_t val = 0;
+    double val = 0;
     while (in) {
       getline(in, buf);
       if (buf.size() == 0) continue;
@@ -53,7 +53,7 @@ void Weights::InitFromFile(const string& filename,
       if (feature_list) { feature_list->push_back(buf.substr(start, end - start)); }
       while(end < buf.size() && buf[end] == ' ') ++end;
       val = strtod(&buf.c_str()[end], NULL);
-      if (isnan(val)) {
+      if (std::isnan(val)) {
         cerr << FD::Convert(fid) << " has weight NaN!\n";
         abort();
       }
@@ -127,8 +127,8 @@ void Weights::InitSparseVector(const vector<weight_t>& dv,
 
 void Weights::SanityCheck(const vector<weight_t>& w) {
   for (unsigned i = 0; i < w.size(); ++i) {
-    assert(!isnan(w[i]));
-    assert(!isinf(w[i]));
+    assert(!std::isnan(w[i]));
+    assert(!std::isinf(w[i]));
   }
 }
 
-- 
cgit v1.2.3


From c7b1dc8eabd50eefb7403ce36d2746f2df39e30e Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>
Date: Sat, 15 Dec 2012 02:53:56 -0500
Subject: enable kenlm compression

---
 configure.ac                 | 26 ++++++++++++++++++--------
 decoder/Makefile.am          | 11 +++++------
 example_extff/Makefile.am    |  2 +-
 klm/util/have.hh             |  3 +--
 mteval/Makefile.am           |  6 +++---
 python/setup.py.in           |  2 +-
 training/dpmert/Makefile.am  | 10 +++++-----
 training/dtrain/Makefile.am  |  2 +-
 training/minrisk/Makefile.am |  2 +-
 training/mira/Makefile.am    |  2 +-
 training/pro/Makefile.am     |  4 ++--
 training/rampion/Makefile.am |  2 +-
 training/utils/Makefile.am   |  4 ++--
 utils/Makefile.am            | 18 +++++++++---------
 word-aligner/Makefile.am     |  2 +-
 15 files changed, 52 insertions(+), 44 deletions(-)

(limited to 'utils')

diff --git a/configure.ac b/configure.ac
index f4650ca4..eabb8645 100644
--- a/configure.ac
+++ b/configure.ac
@@ -18,6 +18,23 @@ BOOST_TEST
 AM_PATH_PYTHON
 AC_CHECK_HEADER(dlfcn.h,AC_DEFINE(HAVE_DLFCN_H))
 AC_CHECK_LIB(dl, dlopen)
+AC_CHECK_HEADERS(zlib.h,
+    AC_CHECK_LIB(z, gzread,[
+        AC_DEFINE(HAVE_ZLIB,[],[Do we have zlib])
+        ZLIBS="$ZLIBS -lz"
+    ]))
+
+AC_CHECK_HEADERS(bzlib.h,
+    AC_CHECK_LIB(bz2, BZ2_bzReadOpen,[
+        AC_DEFINE(HAVE_BZLIB,[],[Do we have bzlib])
+        ZLIBS="$ZLIBS -lbz2"
+    ]))
+
+AC_CHECK_HEADERS(lzma.h,
+    AC_CHECK_LIB(lzma, lzma_code,[
+        AC_DEFINE(HAVE_XZLIB,[],[Do we have lzma])
+        ZLIBS="$ZLIBS -llzma"
+    ]))
 
 AC_ARG_ENABLE(mpi,
  [ --enable-mpi  Build MPI binaries, assumes mpi.h is present ],
@@ -72,19 +89,12 @@ fi
 CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
 LDFLAGS="$LDFLAGS $BOOST_PROGRAM_OPTIONS_LDFLAGS $BOOST_SERIALIZATION_LDFLAGS $BOOST_SYSTEM_LDFLAGS"
 # $BOOST_THREAD_LDFLAGS"
-LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS $BOOST_SERIALIZATION_LIBS $BOOST_SYSTEM_LIBS"
+LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS $BOOST_SERIALIZATION_LIBS $BOOST_SYSTEM_LIBS $ZLIBS"
 # $BOOST_THREAD_LIBS"
 
 AC_CHECK_HEADER(google/dense_hash_map,
                [AC_DEFINE([HAVE_SPARSEHASH], [1], [flag for google::dense_hash_map])])
 
-AC_CHECK_HEADER(zlib.h,
-               [AC_DEFINE([HAVE_ZLIB], [1], [zlib])])
-AC_CHECK_HEADER(bzlib.h,
-               [AC_DEFINE([HAVE_BZLIB], [1], [bzlib])])
-AC_CHECK_HEADER(lzma.h,
-               [AC_DEFINE([HAVE_XZLIB], [1], [xzlib])])
-
 AC_PROG_INSTALL
 
 CPPFLAGS="-DPIC -fPIC $CPPFLAGS -DHAVE_CONFIG_H -DKENLM_MAX_ORDER=6"
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index 6914fa0f..88a6116c 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -8,16 +8,16 @@ noinst_PROGRAMS = \
  
 TESTS = trule_test parser_test grammar_test hg_test
 parser_test_SOURCES = parser_test.cc
-parser_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz
+parser_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a
 grammar_test_SOURCES = grammar_test.cc
-grammar_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz
+grammar_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a
 hg_test_SOURCES = hg_test.cc
-hg_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz
+hg_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a
 trule_test_SOURCES = trule_test.cc
-trule_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz
+trule_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a
 
 cdec_SOURCES = cdec.cc
-cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
+cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a
 
 AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm
 
@@ -82,4 +82,3 @@ libcdec_a_SOURCES = \
   JSON_parser.c \
   json_parse.cc \
   grammar.cc
-
diff --git a/example_extff/Makefile.am b/example_extff/Makefile.am
index ac2694ca..7b7c34b5 100644
--- a/example_extff/Makefile.am
+++ b/example_extff/Makefile.am
@@ -1,4 +1,4 @@
-AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm -I../decoder
+AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I.. -I../mteval -I../utils -I../klm -I../decoder
 
 lib_LTLIBRARIES = libff_example.la
 libff_example_la_SOURCES = ff_example.cc
diff --git a/klm/util/have.hh b/klm/util/have.hh
index b86ba11e..85b838e4 100644
--- a/klm/util/have.hh
+++ b/klm/util/have.hh
@@ -11,8 +11,7 @@
 #endif
 
 #ifdef HAVE_CONFIG_H
-// Chris; uncomment this line.  
-//#include "config.h"
+#include "config.h"
 #endif
 
 #endif // UTIL_HAVE__
diff --git a/mteval/Makefile.am b/mteval/Makefile.am
index 5e9bba91..4444285f 100644
--- a/mteval/Makefile.am
+++ b/mteval/Makefile.am
@@ -23,12 +23,12 @@ libmteval_a_SOURCES = \
   ter.cc
 
 fast_score_SOURCES = fast_score.cc
-fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz
+fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a
 
 mbr_kbest_SOURCES = mbr_kbest.cc
-mbr_kbest_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz
+mbr_kbest_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a
 
 scorer_test_SOURCES = scorer_test.cc
-scorer_test_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz
+scorer_test_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS)
 
 AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils
diff --git a/python/setup.py.in b/python/setup.py.in
index dac72903..fa8a9f5e 100644
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -17,7 +17,7 @@ ext_modules = [
         sources=['src/_cdec.cpp'],
         include_dirs=INC,
         library_dirs=LIB,
-        libraries=LIBS + ['z', 'cdec', 'utils', 'mteval', 'training_utils', 'klm', 'klm_util', 'ksearch'],
+        libraries=['cdec', 'utils', 'mteval', 'training_utils', 'klm', 'klm_util', 'ksearch'] + LIBS,
         extra_compile_args=CPPFLAGS,
         extra_link_args=LDFLAGS),
     Extension(name='cdec.sa._sa',
diff --git a/training/dpmert/Makefile.am b/training/dpmert/Makefile.am
index ff318bef..3dbdfa69 100644
--- a/training/dpmert/Makefile.am
+++ b/training/dpmert/Makefile.am
@@ -8,18 +8,18 @@ noinst_PROGRAMS = \
 TESTS = lo_test
 
 mr_dpmert_generate_mapper_input_SOURCES = mr_dpmert_generate_mapper_input.cc line_optimizer.cc
-mr_dpmert_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
+mr_dpmert_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a
 
 # nbest2hg_SOURCES = nbest2hg.cc
-# nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst -lz
+# nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst
 
 mr_dpmert_map_SOURCES = mert_geometry.cc ces.cc error_surface.cc mr_dpmert_map.cc line_optimizer.cc
-mr_dpmert_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
+mr_dpmert_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a
 
 mr_dpmert_reduce_SOURCES = error_surface.cc ces.cc mr_dpmert_reduce.cc line_optimizer.cc mert_geometry.cc
-mr_dpmert_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
+mr_dpmert_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a
 
 lo_test_SOURCES = lo_test.cc ces.cc mert_geometry.cc error_surface.cc line_optimizer.cc
-lo_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
+lo_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a
 
 AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/training/dtrain/Makefile.am b/training/dtrain/Makefile.am
index 5b48e756..4f51b0c8 100644
--- a/training/dtrain/Makefile.am
+++ b/training/dtrain/Makefile.am
@@ -1,7 +1,7 @@
 bin_PROGRAMS = dtrain
 
 dtrain_SOURCES = dtrain.cc score.cc
-dtrain_LDADD   = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
+dtrain_LDADD   = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a
 
 AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
 
diff --git a/training/minrisk/Makefile.am b/training/minrisk/Makefile.am
index a15e821e..821730c2 100644
--- a/training/minrisk/Makefile.am
+++ b/training/minrisk/Makefile.am
@@ -1,6 +1,6 @@
 bin_PROGRAMS = minrisk_optimize
 
 minrisk_optimize_SOURCES = minrisk_optimize.cc
-minrisk_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/training/liblbfgs/liblbfgs.a -lz
+minrisk_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/training/liblbfgs/liblbfgs.a
 
 AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training -I$(top_srcdir)/training/utils
diff --git a/training/mira/Makefile.am b/training/mira/Makefile.am
index ae609ede..c8f404fb 100644
--- a/training/mira/Makefile.am
+++ b/training/mira/Makefile.am
@@ -1,6 +1,6 @@
 bin_PROGRAMS = kbest_mira
 
 kbest_mira_SOURCES = kbest_mira.cc
-kbest_mira_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
+kbest_mira_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a
 
 AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/training/pro/Makefile.am b/training/pro/Makefile.am
index 1916b6b2..e0a45a33 100644
--- a/training/pro/Makefile.am
+++ b/training/pro/Makefile.am
@@ -3,9 +3,9 @@ bin_PROGRAMS = \
   mr_pro_reduce
 
 mr_pro_map_SOURCES = mr_pro_map.cc
-mr_pro_map_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
+mr_pro_map_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a
 
 mr_pro_reduce_SOURCES = mr_pro_reduce.cc
-mr_pro_reduce_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a -lz
+mr_pro_reduce_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a
 
 AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils -I$(top_srcdir)/training
diff --git a/training/rampion/Makefile.am b/training/rampion/Makefile.am
index 1633d0f7..ef0ca147 100644
--- a/training/rampion/Makefile.am
+++ b/training/rampion/Makefile.am
@@ -1,6 +1,6 @@
 bin_PROGRAMS = rampion_cccp
 
 rampion_cccp_SOURCES = rampion_cccp.cc
-rampion_cccp_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
+rampion_cccp_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a
 
 AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils
diff --git a/training/utils/Makefile.am b/training/utils/Makefile.am
index 189d9a76..c9405d4e 100644
--- a/training/utils/Makefile.am
+++ b/training/utils/Makefile.am
@@ -24,10 +24,10 @@ libtraining_utils_a_SOURCES = \
   risk.cc
 
 optimize_test_SOURCES = optimize_test.cc
-optimize_test_LDADD = libtraining_utils.a $(top_srcdir)/utils/libutils.a -lz
+optimize_test_LDADD = libtraining_utils.a $(top_srcdir)/utils/libutils.a
 
 lbfgs_test_SOURCES = lbfgs_test.cc
-lbfgs_test_LDADD = $(top_srcdir)/utils/libutils.a -lz
+lbfgs_test_LDADD = $(top_srcdir)/utils/libutils.a
 
 AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/decoder -I$(top_srcdir)/utils -I$(top_srcdir)/mteval -I$(top_srcdir)/klm
 
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 3ad9d69e..639c30b8 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -33,24 +33,24 @@ if HAVE_CMPH
 endif
 
 reconstruct_weights_SOURCES = reconstruct_weights.cc
-reconstruct_weights_LDADD = libutils.a -lz
+reconstruct_weights_LDADD = libutils.a
 atools_SOURCES = atools.cc
-atools_LDADD = libutils.a -lz
+atools_LDADD = libutils.a
 
 phmt_SOURCES = phmt.cc
-phmt_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz
+phmt_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS)
 ts_SOURCES = ts.cc
-ts_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz
+ts_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS)
 m_test_SOURCES = m_test.cc
-m_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz
+m_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS)
 dict_test_SOURCES = dict_test.cc
-dict_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz
+dict_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS)
 weights_test_SOURCES = weights_test.cc
-weights_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz
+weights_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS)
 logval_test_SOURCES = logval_test.cc
-logval_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz
+logval_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS)
 small_vector_test_SOURCES = small_vector_test.cc
-small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz
+small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS)
 
 ################################################################
 # do NOT NOT NOT add any other -I includes NO NO NO NO NO ######
diff --git a/word-aligner/Makefile.am b/word-aligner/Makefile.am
index 280d3ae7..2dcb688e 100644
--- a/word-aligner/Makefile.am
+++ b/word-aligner/Makefile.am
@@ -1,6 +1,6 @@
 bin_PROGRAMS = fast_align
 
 fast_align_SOURCES = fast_align.cc ttables.cc
-fast_align_LDADD = $(top_srcdir)/utils/libutils.a -lz
+fast_align_LDADD = $(top_srcdir)/utils/libutils.a
 
 AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/training
-- 
cgit v1.2.3


From 41bc60a856dc2d0bf9659b443c0cd03be8016db7 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@Chriss-MacBook-Air.local>
Date: Tue, 8 Jan 2013 15:44:45 -0500
Subject: add header files to sources to create correct distributions

---
 configure.ac                  |  5 ++--
 decoder/Makefile.am           | 70 +++++++++++++++++++++++++++++++++++++++++++
 klm/lm/Makefile.am            | 27 +++++++++++++++++
 klm/search/Makefile.am        | 14 ++++++++-
 klm/util/Makefile.am          | 20 +++++++++++++
 mteval/Makefile.am            | 11 +++++++
 training/crf/Makefile.am      |  4 +--
 training/dpmert/Makefile.am   |  6 ++--
 training/dtrain/Makefile.am   |  2 +-
 training/liblbfgs/Makefile.am |  9 +++++-
 training/utils/Makefile.am    |  7 +++++
 utils/Makefile.am             | 48 +++++++++++++++++++++++++++++
 word-aligner/Makefile.am      |  2 +-
 13 files changed, 214 insertions(+), 11 deletions(-)

(limited to 'utils')

diff --git a/configure.ac b/configure.ac
index eabb8645..dcd0a0d8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,5 +1,6 @@
-AC_INIT
-AM_INIT_AUTOMAKE(cdec,0.1)
+AC_INIT([cdec],[1.0])
+AC_CONFIG_SRCDIR([decoder/cdec.cc])
+AM_INIT_AUTOMAKE
 AC_CONFIG_HEADERS(config.h)
 AC_PROG_LIBTOOL
 AC_PROG_LEX
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index 88a6116c..21187da8 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -26,7 +26,77 @@ rule_lexer.cc: rule_lexer.ll
 
 noinst_LIBRARIES = libcdec.a
 
+EXTRA_DIST = rule_lexer.ll
+
 libcdec_a_SOURCES = \
+  JSON_parser.h \
+  aligner.h \
+  apply_fsa_models.h \
+  apply_models.h \
+  bottom_up_parser.h \
+  cfg.h \
+  cfg_binarize.h \
+  cfg_format.h \
+  cfg_options.h \
+  csplit.h \
+  decoder.h \
+  dwarf.h \
+  earley_composer.h \
+  exp_semiring.h \
+  factored_lexicon_helper.h \
+  ff.h \
+  ff_basic.h \
+  ff_bleu.h \
+  ff_charset.h \
+  ff_context.h \
+  ff_csplit.h \
+  ff_dwarf.h \
+  ff_external.h \
+  ff_factory.h \
+  ff_klm.h \
+  ff_lm.h \
+  ff_ngrams.h \
+  ff_register.h \
+  ff_rules.h \
+  ff_ruleshape.h \
+  ff_sample_fsa.h \
+  ff_source_syntax.h \
+  ff_spans.h \
+  ff_tagger.h \
+  ff_wordalign.h \
+  ff_wordset.h \
+  ffset.h \
+  forest_writer.h \
+  freqdict.h \
+  grammar.h \
+  hg.h \
+  hg_cfg.h \
+  hg_intersect.h \
+  hg_io.h \
+  hg_remove_eps.h \
+  hg_sampler.h \
+  hg_test.h \
+  hg_union.h \
+  incremental.h \
+  inside_outside.h \
+  json_parse.h \
+  kbest.h \
+  lattice.h \
+  lexalign.h \
+  lextrans.h \
+  nt_span.h \
+  oracle_bleu.h \
+  phrasebased_translator.h \
+  phrasetable_fst.h \
+  program_options.h \
+  rule_lexer.h \
+  sentence_metadata.h \
+  sentences.h \
+  tagger.h \
+  translator.h \
+  tromble_loss.h \
+  trule.h \
+  viterbi.h \
   forest_writer.cc \
   maxtrans_blunsom.cc \
   cdec_ff.cc \
diff --git a/klm/lm/Makefile.am b/klm/lm/Makefile.am
index a12c5f03..436cfd08 100644
--- a/klm/lm/Makefile.am
+++ b/klm/lm/Makefile.am
@@ -12,6 +12,33 @@ build_binary_LDADD = libklm.a ../util/libklm_util.a -lz
 noinst_LIBRARIES = libklm.a
 
 libklm_a_SOURCES = \
+  bhiksha.hh \
+  binary_format.hh \
+  blank.hh \
+  config.hh \
+  enumerate_vocab.hh \
+  facade.hh \
+  left.hh \
+  lm_exception.hh \
+  max_order.hh \
+  model.hh \
+  model_type.hh \
+  ngram_query.hh \
+  partial.hh \
+  quantize.hh \
+  read_arpa.hh \
+  return.hh \
+  search_hashed.hh \
+  search_trie.hh \
+  state.hh \
+  trie.hh \
+  trie_sort.hh \
+  value.hh \
+  value_build.hh \
+  virtual_interface.hh \
+  vocab.hh \
+  weights.hh \
+  word_index.hh \
   bhiksha.cc \
   binary_format.cc \
   config.cc \
diff --git a/klm/search/Makefile.am b/klm/search/Makefile.am
index 5aea33c2..a34f6cea 100644
--- a/klm/search/Makefile.am
+++ b/klm/search/Makefile.am
@@ -1,11 +1,23 @@
 noinst_LIBRARIES = libksearch.a
 
 libksearch_a_SOURCES = \
+  applied.hh \
+  config.hh \
+  context.hh \
+  dedupe.hh \
+  edge.hh \
+  edge_generator.hh \
+  header.hh \
+  nbest.hh \
+  rule.hh \
+  types.hh \
+  vertex.hh \
+  vertex_generator.hh \
   edge_generator.cc \
 	nbest.cc \
   rule.cc \
   vertex.cc \
   vertex_generator.cc
 
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I..
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I..
 
diff --git a/klm/util/Makefile.am b/klm/util/Makefile.am
index a676bdb3..bb441432 100644
--- a/klm/util/Makefile.am
+++ b/klm/util/Makefile.am
@@ -19,6 +19,26 @@
 noinst_LIBRARIES = libklm_util.a
 
 libklm_util_a_SOURCES = \
+  bit_packing.hh \
+  ersatz_progress.hh \
+  exception.hh \
+  file.hh \
+  file_piece.hh \
+  getopt.hh \
+  have.hh \
+  joint_sort.hh \
+  mmap.hh \
+  murmur_hash.hh \
+  pool.hh \
+  probing_hash_table.hh \
+  proxy_iterator.hh \
+  read_compressed.hh \
+  scoped.hh \
+  sized_iterator.hh \
+  sorted_uniform.hh \
+  string_piece.hh \
+  tokenize_piece.hh \
+  usage.hh \
   ersatz_progress.cc \
   bit_packing.cc \
   exception.cc \
diff --git a/mteval/Makefile.am b/mteval/Makefile.am
index 4444285f..b19e4bb1 100644
--- a/mteval/Makefile.am
+++ b/mteval/Makefile.am
@@ -9,6 +9,17 @@ TESTS = scorer_test
 noinst_LIBRARIES = libmteval.a
 
 libmteval_a_SOURCES = \
+  aer_scorer.h \
+  comb_scorer.h \
+  external_scorer.h \
+  ns.h \
+  ns_cer.h \
+  ns_comb.h \
+  ns_docscorer.h \
+  ns_ext.h \
+  ns_ter.h \
+  scorer.h \
+  ter.h \
   aer_scorer.cc \
   comb_scorer.cc \
   external_scorer.cc \
diff --git a/training/crf/Makefile.am b/training/crf/Makefile.am
index d203df25..f72d8f92 100644
--- a/training/crf/Makefile.am
+++ b/training/crf/Makefile.am
@@ -18,10 +18,10 @@ mpi_extract_reachable_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/
 mpi_extract_features_SOURCES = mpi_extract_features.cc
 mpi_extract_features_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
 
-mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc cllh_observer.cc
+mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc cllh_observer.cc cllh_observer.h
 mpi_batch_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
 
-mpi_compute_cllh_SOURCES = mpi_compute_cllh.cc cllh_observer.cc
+mpi_compute_cllh_SOURCES = mpi_compute_cllh.cc cllh_observer.cc cllh_observer.h
 mpi_compute_cllh_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
 
 AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir)/training -I$(top_srcdir)/training/utils -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/training/dpmert/Makefile.am b/training/dpmert/Makefile.am
index 3dbdfa69..e5f13944 100644
--- a/training/dpmert/Makefile.am
+++ b/training/dpmert/Makefile.am
@@ -13,13 +13,13 @@ mr_dpmert_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_sr
 # nbest2hg_SOURCES = nbest2hg.cc
 # nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst
 
-mr_dpmert_map_SOURCES = mert_geometry.cc ces.cc error_surface.cc mr_dpmert_map.cc line_optimizer.cc
+mr_dpmert_map_SOURCES = mert_geometry.cc ces.cc error_surface.cc mr_dpmert_map.cc line_optimizer.cc ces.h error_surface.h line_optimizer.h mert_geometry.h
 mr_dpmert_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a
 
-mr_dpmert_reduce_SOURCES = error_surface.cc ces.cc mr_dpmert_reduce.cc line_optimizer.cc mert_geometry.cc
+mr_dpmert_reduce_SOURCES = error_surface.cc ces.cc mr_dpmert_reduce.cc line_optimizer.cc mert_geometry.cc ces.h error_surface.h line_optimizer.h mert_geometry.h
 mr_dpmert_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a
 
-lo_test_SOURCES = lo_test.cc ces.cc mert_geometry.cc error_surface.cc line_optimizer.cc
+lo_test_SOURCES = lo_test.cc ces.cc mert_geometry.cc error_surface.cc line_optimizer.cc ces.h error_surface.h line_optimizer.h mert_geometry.h
 lo_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a
 
 AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/training/dtrain/Makefile.am b/training/dtrain/Makefile.am
index 4f51b0c8..ee337ca8 100644
--- a/training/dtrain/Makefile.am
+++ b/training/dtrain/Makefile.am
@@ -1,6 +1,6 @@
 bin_PROGRAMS = dtrain
 
-dtrain_SOURCES = dtrain.cc score.cc
+dtrain_SOURCES = dtrain.cc score.cc dtrain.h kbestget.h ksampler.h pairsampling.h score.h
 dtrain_LDADD   = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a
 
 AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/training/liblbfgs/Makefile.am b/training/liblbfgs/Makefile.am
index 64a3794d..f0d5c8aa 100644
--- a/training/liblbfgs/Makefile.am
+++ b/training/liblbfgs/Makefile.am
@@ -6,10 +6,17 @@ ll_test_LDADD = liblbfgs.a -lz
 
 noinst_LIBRARIES = liblbfgs.a
 
-liblbfgs_a_SOURCES = lbfgs.c
+liblbfgs_a_SOURCES = \
+  lbfgs.c \
+  arithmetic_ansi.h \
+  arithmetic_sse_double.h \
+  arithmetic_sse_float.h \
+  lbfgs++.h \
+  lbfgs.h
 
 ################################################################
 # do NOT NOT NOT add any other -I includes NO NO NO NO NO ######
 AM_LDFLAGS = liblbfgs.a -lz
 AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. -I..
 ################################################################
+
diff --git a/training/utils/Makefile.am b/training/utils/Makefile.am
index d708a9f5..a2ab86fd 100644
--- a/training/utils/Makefile.am
+++ b/training/utils/Makefile.am
@@ -18,6 +18,13 @@ sentclient_LDFLAGS = -pthread
 TESTS = lbfgs_test optimize_test
 
 libtraining_utils_a_SOURCES = \
+  candidate_set.h \
+  entropy.h \
+  lbfgs.h \
+  online_optimizer.h \
+  optimize.h \
+  risk.h \
+  sentserver.h \
   candidate_set.cc \
   entropy.cc \
   optimize.cc \
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 639c30b8..3177325b 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -14,6 +14,53 @@ TESTS = ts small_vector_test logval_test weights_test dict_test m_test
 noinst_LIBRARIES = libutils.a
 
 libutils_a_SOURCES = \
+  alias_sampler.h \
+  alignment_io.h \
+  array2d.h \
+  b64tools.h \
+  batched_append.h \
+  city.h \
+  citycrc.h \
+  corpus_tools.h \
+  dict.h \
+  fast_sparse_vector.h \
+  fdict.h \
+  feature_vector.h \
+  filelib.h \
+  gzstream.h \
+  hash.h \
+  have_64_bits.h \
+  indices_after.h \
+  kernel_string_subseq.h \
+  logval.h \
+  m.h \
+  murmur_hash.h \
+  named_enum.h \
+  null_deleter.h \
+  null_traits.h \
+  perfect_hash.h \
+  prob.h \
+  sampler.h \
+  semiring.h \
+  show.h \
+  small_vector.h \
+  sparse_vector.h \
+  static_utoa.h \
+  stringlib.h \
+  swap_pod.h \
+  tdict.h \
+  timing_stats.h \
+  utoa.h \
+  value_array.h \
+  verbose.h \
+  warning_compiler.h \
+  warning_pop.h \
+  warning_push.h \
+  weights.h \
+  wordid.h \
+  writer.h \
+  fast_lexical_cast.hpp \
+  intrusive_refcount.hpp \
   alignment_io.cc \
   b64tools.cc \
   corpus_tools.cc \
@@ -56,3 +103,4 @@ small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOS
 # do NOT NOT NOT add any other -I includes NO NO NO NO NO ######
 AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I.
 ################################################################
+
diff --git a/word-aligner/Makefile.am b/word-aligner/Makefile.am
index 2dcb688e..e274b209 100644
--- a/word-aligner/Makefile.am
+++ b/word-aligner/Makefile.am
@@ -1,6 +1,6 @@
 bin_PROGRAMS = fast_align
 
-fast_align_SOURCES = fast_align.cc ttables.cc
+fast_align_SOURCES = fast_align.cc ttables.cc da.h ttables.h
 fast_align_LDADD = $(top_srcdir)/utils/libutils.a
 
 AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/training
-- 
cgit v1.2.3


From abb935570a86c6c21c5bb3a95e07763247bd4963 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@Chriss-MacBook-Air.local>
Date: Tue, 8 Jan 2013 22:22:28 -0500
Subject: fixes for dist

---
 decoder/Makefile.am           |  6 +++---
 decoder/cfg_test.cc           |  2 +-
 decoder/grammar_test.cc       |  4 ++--
 decoder/hg_test.cc            |  2 +-
 example_extff/Makefile.am     |  2 +-
 klm/lm/Makefile.am            |  2 +-
 klm/search/Makefile.am        |  2 +-
 klm/util/Makefile.am          |  2 +-
 mteval/Makefile.am            | 10 ++++++----
 mteval/scorer_test.cc         |  2 +-
 training/crf/Makefile.am      | 12 ++++++------
 training/dpmert/Makefile.am   | 12 +++++++-----
 training/dpmert/lo_test.cc    |  2 +-
 training/dtrain/Makefile.am   |  2 +-
 training/liblbfgs/Makefile.am |  2 +-
 training/minrisk/Makefile.am  |  4 ++--
 training/mira/Makefile.am     |  2 +-
 training/pro/Makefile.am      |  4 ++--
 training/rampion/Makefile.am  |  2 +-
 training/utils/Makefile.am    |  6 +++---
 utils/Makefile.am             |  5 ++++-
 utils/weights_test.cc         |  2 +-
 word-aligner/Makefile.am      |  4 ++--
 23 files changed, 50 insertions(+), 43 deletions(-)

(limited to 'utils')

diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index 21187da8..558aeaed 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -5,7 +5,7 @@ noinst_PROGRAMS = \
   hg_test \
   parser_test \
   grammar_test
- 
+
 TESTS = trule_test parser_test grammar_test hg_test
 parser_test_SOURCES = parser_test.cc
 parser_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a
@@ -19,14 +19,14 @@ trule_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEW
 cdec_SOURCES = cdec.cc
 cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a
 
-AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm
+AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/decoder/test_data\" -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/mteval -I$(top_srcdir)/utils -I$(top_srcdir)/klm
 
 rule_lexer.cc: rule_lexer.ll
 	$(LEX) -s -CF -8 -o$@ $<
 
 noinst_LIBRARIES = libcdec.a
 
-EXTRA_DIST = rule_lexer.ll
+EXTRA_DIST = test_data rule_lexer.ll
 
 libcdec_a_SOURCES = \
   JSON_parser.h \
diff --git a/decoder/cfg_test.cc b/decoder/cfg_test.cc
index 316c6d16..cbe7d0be 100644
--- a/decoder/cfg_test.cc
+++ b/decoder/cfg_test.cc
@@ -33,7 +33,7 @@ struct CFGTest : public TestWithParam<HgW> {
     istringstream ws(wts);
     EXPECT_TRUE(ws>>featw);
     CSHOW(featw)
-    std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data");
+    std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
     HGSetup::JsonTestFile(&hg,path,file);
     hg.Reweight(featw);
     cfg.Init(hg,true,true,false);
diff --git a/decoder/grammar_test.cc b/decoder/grammar_test.cc
index 912f4f12..6d2c6e67 100644
--- a/decoder/grammar_test.cc
+++ b/decoder/grammar_test.cc
@@ -19,7 +19,7 @@ using namespace std;
 
 struct GrammarTest {
   GrammarTest() {
-    std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data");
+    std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
     Weights::InitFromFile(path + "/weights.gt", &wts);
   }
   vector<weight_t> wts;
@@ -43,7 +43,7 @@ BOOST_AUTO_TEST_CASE(TestTextGrammar) {
 }
 
 BOOST_AUTO_TEST_CASE(TestTextGrammarFile) {
-  std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data");
+  std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
   GrammarPtr g(new TextGrammar(path + "/grammar.prune"));
   vector<GrammarPtr> grammars(1, g);
 
diff --git a/decoder/hg_test.cc b/decoder/hg_test.cc
index 37469748..8519e559 100644
--- a/decoder/hg_test.cc
+++ b/decoder/hg_test.cc
@@ -339,7 +339,7 @@ BOOST_AUTO_TEST_CASE(TestAddExpectations) {
 
 BOOST_AUTO_TEST_CASE(Small) {
   Hypergraph hg;
-  std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data");
+  std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
   CreateSmallHG(&hg, path);
   SparseVector<double> wts;
   wts.set_value(FD::Convert("Model_0"), -2.0);
diff --git a/example_extff/Makefile.am b/example_extff/Makefile.am
index 7b7c34b5..6abfe6c5 100644
--- a/example_extff/Makefile.am
+++ b/example_extff/Makefile.am
@@ -1,4 +1,4 @@
-AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I.. -I../mteval -I../utils -I../klm -I../decoder
+AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/mteval -I$(top_srcdir)/utils -I$(top_srcdir)/klm -I$(top_srcdir)/decoder
 
 lib_LTLIBRARIES = libff_example.la
 libff_example_la_SOURCES = ff_example.cc
diff --git a/klm/lm/Makefile.am b/klm/lm/Makefile.am
index 436cfd08..870f7128 100644
--- a/klm/lm/Makefile.am
+++ b/klm/lm/Makefile.am
@@ -55,5 +55,5 @@ libklm_a_SOURCES = \
   virtual_interface.cc \
   vocab.cc
 
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I..
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/klm
 
diff --git a/klm/search/Makefile.am b/klm/search/Makefile.am
index a34f6cea..03554276 100644
--- a/klm/search/Makefile.am
+++ b/klm/search/Makefile.am
@@ -19,5 +19,5 @@ libksearch_a_SOURCES = \
   vertex.cc \
   vertex_generator.cc
 
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I..
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/klm
 
diff --git a/klm/util/Makefile.am b/klm/util/Makefile.am
index bb441432..3ab7560f 100644
--- a/klm/util/Makefile.am
+++ b/klm/util/Makefile.am
@@ -51,4 +51,4 @@ libklm_util_a_SOURCES = \
   string_piece.cc \
 	usage.cc
 
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I..
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/klm
diff --git a/mteval/Makefile.am b/mteval/Makefile.am
index b19e4bb1..83adee17 100644
--- a/mteval/Makefile.am
+++ b/mteval/Makefile.am
@@ -8,6 +8,8 @@ TESTS = scorer_test
 
 noinst_LIBRARIES = libmteval.a
 
+EXTRA_DIST = test_data
+
 libmteval_a_SOURCES = \
   aer_scorer.h \
   comb_scorer.h \
@@ -34,12 +36,12 @@ libmteval_a_SOURCES = \
   ter.cc
 
 fast_score_SOURCES = fast_score.cc
-fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a
+fast_score_LDADD = libmteval.a ../utils/libutils.a
 
 mbr_kbest_SOURCES = mbr_kbest.cc
-mbr_kbest_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a
+mbr_kbest_LDADD = libmteval.a ../utils/libutils.a
 
 scorer_test_SOURCES = scorer_test.cc
-scorer_test_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS)
+scorer_test_LDADD = libmteval.a ../utils/libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS)
 
-AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils
+AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/mteval/test_data\" -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/utils
diff --git a/mteval/scorer_test.cc b/mteval/scorer_test.cc
index 9b765d0f..da07f154 100644
--- a/mteval/scorer_test.cc
+++ b/mteval/scorer_test.cc
@@ -36,7 +36,7 @@ struct Stuff {
 BOOST_FIXTURE_TEST_SUITE( s, Stuff );
 
 BOOST_AUTO_TEST_CASE(TestCreateFromFiles) {
-  std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data");
+  std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
   vector<string> files;
   files.push_back(path + "/re.txt.0");
   files.push_back(path + "/re.txt.1");
diff --git a/training/crf/Makefile.am b/training/crf/Makefile.am
index f72d8f92..d37b224c 100644
--- a/training/crf/Makefile.am
+++ b/training/crf/Makefile.am
@@ -7,21 +7,21 @@ bin_PROGRAMS = \
   mpi_online_optimize
 
 mpi_online_optimize_SOURCES = mpi_online_optimize.cc
-mpi_online_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
+mpi_online_optimize_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a -lz
 
 mpi_flex_optimize_SOURCES = mpi_flex_optimize.cc
-mpi_flex_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
+mpi_flex_optimize_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a -lz
 
 mpi_extract_reachable_SOURCES = mpi_extract_reachable.cc
-mpi_extract_reachable_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
+mpi_extract_reachable_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a -lz
 
 mpi_extract_features_SOURCES = mpi_extract_features.cc
-mpi_extract_features_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
+mpi_extract_features_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a -lz
 
 mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc cllh_observer.cc cllh_observer.h
-mpi_batch_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
+mpi_batch_optimize_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a -lz
 
 mpi_compute_cllh_SOURCES = mpi_compute_cllh.cc cllh_observer.cc cllh_observer.h
-mpi_compute_cllh_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
+mpi_compute_cllh_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a -lz
 
 AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir)/training -I$(top_srcdir)/training/utils -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/training/dpmert/Makefile.am b/training/dpmert/Makefile.am
index e5f13944..cba3e30f 100644
--- a/training/dpmert/Makefile.am
+++ b/training/dpmert/Makefile.am
@@ -8,18 +8,20 @@ noinst_PROGRAMS = \
 TESTS = lo_test
 
 mr_dpmert_generate_mapper_input_SOURCES = mr_dpmert_generate_mapper_input.cc line_optimizer.cc
-mr_dpmert_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a
+mr_dpmert_generate_mapper_input_LDADD = ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a
 
 # nbest2hg_SOURCES = nbest2hg.cc
 # nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst
 
 mr_dpmert_map_SOURCES = mert_geometry.cc ces.cc error_surface.cc mr_dpmert_map.cc line_optimizer.cc ces.h error_surface.h line_optimizer.h mert_geometry.h
-mr_dpmert_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a
+mr_dpmert_map_LDADD = ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a
 
 mr_dpmert_reduce_SOURCES = error_surface.cc ces.cc mr_dpmert_reduce.cc line_optimizer.cc mert_geometry.cc ces.h error_surface.h line_optimizer.h mert_geometry.h
-mr_dpmert_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a
+mr_dpmert_reduce_LDADD = ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a
 
 lo_test_SOURCES = lo_test.cc ces.cc mert_geometry.cc error_surface.cc line_optimizer.cc ces.h error_surface.h line_optimizer.h mert_geometry.h
-lo_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a
+lo_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a
 
-AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
+EXTRA_DIST = test_data
+
+AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/training/dpmert/test_data\" -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/training/dpmert/lo_test.cc b/training/dpmert/lo_test.cc
index 95a08d3d..d89bcd99 100644
--- a/training/dpmert/lo_test.cc
+++ b/training/dpmert/lo_test.cc
@@ -118,7 +118,7 @@ BOOST_AUTO_TEST_CASE( TestS1) {
   to_optimize.push_back(fPhraseModel_1);
   to_optimize.push_back(fPhraseModel_2);
 
-  std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data");
+  std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
 
   Hypergraph hg;
   ReadFile rf(path + "/0.json.gz");
diff --git a/training/dtrain/Makefile.am b/training/dtrain/Makefile.am
index ee337ca8..8cf71078 100644
--- a/training/dtrain/Makefile.am
+++ b/training/dtrain/Makefile.am
@@ -1,7 +1,7 @@
 bin_PROGRAMS = dtrain
 
 dtrain_SOURCES = dtrain.cc score.cc dtrain.h kbestget.h ksampler.h pairsampling.h score.h
-dtrain_LDADD   = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a
+dtrain_LDADD   = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a
 
 AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
 
diff --git a/training/liblbfgs/Makefile.am b/training/liblbfgs/Makefile.am
index f0d5c8aa..272d6f56 100644
--- a/training/liblbfgs/Makefile.am
+++ b/training/liblbfgs/Makefile.am
@@ -17,6 +17,6 @@ liblbfgs_a_SOURCES = \
 ################################################################
 # do NOT NOT NOT add any other -I includes NO NO NO NO NO ######
 AM_LDFLAGS = liblbfgs.a -lz
-AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. -I..
+AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I$(top_srcdir)/training -I$(top_srcdir)/training/liblbfgs
 ################################################################
 
diff --git a/training/minrisk/Makefile.am b/training/minrisk/Makefile.am
index 821730c2..2be17498 100644
--- a/training/minrisk/Makefile.am
+++ b/training/minrisk/Makefile.am
@@ -1,6 +1,6 @@
 bin_PROGRAMS = minrisk_optimize
 
 minrisk_optimize_SOURCES = minrisk_optimize.cc
-minrisk_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/training/liblbfgs/liblbfgs.a
+minrisk_optimize_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a ../../training/liblbfgs/liblbfgs.a
 
-AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training -I$(top_srcdir)/training/utils
+AM_CPPFLAGS = -W -Wall -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training -I$(top_srcdir)/training/utils
diff --git a/training/mira/Makefile.am b/training/mira/Makefile.am
index c8f404fb..0084603d 100644
--- a/training/mira/Makefile.am
+++ b/training/mira/Makefile.am
@@ -1,6 +1,6 @@
 bin_PROGRAMS = kbest_mira
 
 kbest_mira_SOURCES = kbest_mira.cc
-kbest_mira_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a
+kbest_mira_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a
 
 AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/training/pro/Makefile.am b/training/pro/Makefile.am
index e0a45a33..93889f34 100644
--- a/training/pro/Makefile.am
+++ b/training/pro/Makefile.am
@@ -3,9 +3,9 @@ bin_PROGRAMS = \
   mr_pro_reduce
 
 mr_pro_map_SOURCES = mr_pro_map.cc
-mr_pro_map_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a
+mr_pro_map_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a
 
 mr_pro_reduce_SOURCES = mr_pro_reduce.cc
-mr_pro_reduce_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a
+mr_pro_reduce_LDADD = ../../training/liblbfgs/liblbfgs.a ../../utils/libutils.a
 
 AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils -I$(top_srcdir)/training
diff --git a/training/rampion/Makefile.am b/training/rampion/Makefile.am
index ef0ca147..6a1a97cb 100644
--- a/training/rampion/Makefile.am
+++ b/training/rampion/Makefile.am
@@ -1,6 +1,6 @@
 bin_PROGRAMS = rampion_cccp
 
 rampion_cccp_SOURCES = rampion_cccp.cc
-rampion_cccp_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a
+rampion_cccp_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a
 
 AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils
diff --git a/training/utils/Makefile.am b/training/utils/Makefile.am
index a2ab86fd..29078aed 100644
--- a/training/utils/Makefile.am
+++ b/training/utils/Makefile.am
@@ -32,13 +32,13 @@ libtraining_utils_a_SOURCES = \
   risk.cc
 
 optimize_test_SOURCES = optimize_test.cc
-optimize_test_LDADD = libtraining_utils.a $(top_srcdir)/utils/libutils.a
+optimize_test_LDADD = libtraining_utils.a ../../utils/libutils.a
 
 grammar_convert_SOURCES = grammar_convert.cc
-grammar_convert_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a
+grammar_convert_LDADD = ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a
 
 lbfgs_test_SOURCES = lbfgs_test.cc
-lbfgs_test_LDADD = $(top_srcdir)/utils/libutils.a
+lbfgs_test_LDADD = ../../utils/libutils.a
 
 AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/decoder -I$(top_srcdir)/utils -I$(top_srcdir)/mteval -I$(top_srcdir)/klm
 
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 3177325b..c5fedb78 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -13,7 +13,10 @@ TESTS = ts small_vector_test logval_test weights_test dict_test m_test
 
 noinst_LIBRARIES = libutils.a
 
+# EXTRA_DIST = test_data
+
 libutils_a_SOURCES = \
+  test_data \
   alias_sampler.h \
   alignment_io.h \
   array2d.h \
@@ -101,6 +104,6 @@ small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOS
 
 ################################################################
 # do NOT NOT NOT add any other -I includes NO NO NO NO NO ######
-AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I.
+AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. -I$(top_srcdir) -DTEST_DATA=\"$(top_srcdir)/utils/test_data\"
 ################################################################
 
diff --git a/utils/weights_test.cc b/utils/weights_test.cc
index 4be4c40f..0d5d8512 100644
--- a/utils/weights_test.cc
+++ b/utils/weights_test.cc
@@ -7,6 +7,6 @@ using namespace std;
 
 BOOST_AUTO_TEST_CASE(Load) {
   vector<weight_t> v;
-  Weights::InitFromFile("test_data/weights", &v);
+  Weights::InitFromFile(TEST_DATA "/weights", &v);
   Weights::WriteToFile("-", v);
 }
diff --git a/word-aligner/Makefile.am b/word-aligner/Makefile.am
index e274b209..a195cc5a 100644
--- a/word-aligner/Makefile.am
+++ b/word-aligner/Makefile.am
@@ -1,6 +1,6 @@
 bin_PROGRAMS = fast_align
 
 fast_align_SOURCES = fast_align.cc ttables.cc da.h ttables.h
-fast_align_LDADD = $(top_srcdir)/utils/libutils.a
+fast_align_LDADD = ../utils/libutils.a
 
-AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/training
+AM_CPPFLAGS = -W -Wall -I$(top_srcdir) -I$(top_srcdir)/utils -I$(top_srcdir)/training
-- 
cgit v1.2.3