diff options
Diffstat (limited to 'utils')
| -rw-r--r-- | utils/Jamfile | 32 | ||||
| -rw-r--r-- | utils/Makefile.am | 13 | ||||
| -rw-r--r-- | utils/ccrp.h | 270 | ||||
| -rw-r--r-- | utils/ccrp_nt.h | 164 | ||||
| -rw-r--r-- | utils/ccrp_onetable.h | 253 | ||||
| -rw-r--r-- | utils/crp_table_manager.h | 114 | ||||
| -rw-r--r-- | utils/crp_test.cc | 91 | ||||
| -rw-r--r-- | utils/fast_sparse_vector.h | 2 | ||||
| -rw-r--r-- | utils/gamma_poisson.h | 33 | ||||
| -rw-r--r-- | utils/mfcr.h | 370 | ||||
| -rw-r--r-- | utils/mfcr_test.cc | 72 | ||||
| -rw-r--r-- | utils/sampler.h | 2 | ||||
| -rw-r--r-- | utils/slice_sampler.h | 191 | ||||
| -rw-r--r-- | utils/small_vector.h | 2 | ||||
| -rw-r--r-- | utils/stringlib.h | 2 | ||||
| -rw-r--r-- | utils/unigram_pyp_lm.cc | 214 | ||||
| -rw-r--r-- | utils/weights.cc | 8 | 
17 files changed, 10 insertions, 1823 deletions
| diff --git a/utils/Jamfile b/utils/Jamfile deleted file mode 100644 index 4444b25f..00000000 --- a/utils/Jamfile +++ /dev/null @@ -1,32 +0,0 @@ -import testing ; -import option ; - -additional = ; -if [ option.get with-cmph ] { -  additional += perfect_hash.cc ; -} - -lib utils : -  alignment_io.cc  -  b64tools.cc  -  corpus_tools.cc  -  dict.cc  -  tdict.cc  -  fdict.cc  -  gzstream.cc  -  filelib.cc  -  stringlib.cc  -  sparse_vector.cc  -  timing_stats.cc  -  verbose.cc  -  weights.cc -  $(additional) -  ..//z -  : <include>.. <include>. : : <include>.. <include>. ; - -exe atools : atools.cc utils ..//boost_program_options ; -exe reconstruct_weights : reconstruct_weights.cc utils ..//boost_program_options ; - -alias programs : reconstruct_weights atools ; - -all_tests [ glob *_test.cc phmt.cc ts.cc ] : utils : <testing.arg>$(TOP)/utils/test_data ; diff --git a/utils/Makefile.am b/utils/Makefile.am index 799ec879..3ad9d69e 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -3,16 +3,13 @@ bin_PROGRAMS = reconstruct_weights atools  noinst_PROGRAMS = \    ts \    phmt \ -  mfcr_test \ -  crp_test \    dict_test \    m_test \    weights_test \    logval_test \ -  small_vector_test \ -  unigram_pyp_lm +  small_vector_test -TESTS = ts mfcr_test crp_test small_vector_test logval_test weights_test dict_test m_test +TESTS = ts small_vector_test logval_test weights_test dict_test m_test  noinst_LIBRARIES = libutils.a @@ -48,18 +45,12 @@ m_test_SOURCES = m_test.cc  m_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz  dict_test_SOURCES = dict_test.cc  dict_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz -mfcr_test_SOURCES = mfcr_test.cc -mfcr_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz  weights_test_SOURCES = weights_test.cc  weights_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz -crp_test_SOURCES = crp_test.cc -crp_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz  logval_test_SOURCES = logval_test.cc  logval_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz  small_vector_test_SOURCES = small_vector_test.cc  small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz -unigram_pyp_lm_SOURCES = unigram_pyp_lm.cc -unigram_pyp_lm_LDADD = libutils.a -lz  ################################################################  # do NOT NOT NOT add any other -I includes NO NO NO NO NO ###### diff --git a/utils/ccrp.h b/utils/ccrp.h deleted file mode 100644 index f5d3fc78..00000000 --- a/utils/ccrp.h +++ /dev/null @@ -1,270 +0,0 @@ -#ifndef _CCRP_H_ -#define _CCRP_H_ - -#include <numeric> -#include <cassert> -#include <cmath> -#include <list> -#include <iostream> -#include <vector> -#include <tr1/unordered_map> -#include <boost/functional/hash.hpp> -#include "sampler.h" -#include "slice_sampler.h" -#include "crp_table_manager.h" -#include "m.h" - -// Chinese restaurant process (Pitman-Yor parameters) with table tracking. - -template <typename Dish, typename DishHash = boost::hash<Dish> > -class CCRP { - public: -  CCRP(double disc, double strength) : -      num_tables_(), -      num_customers_(), -      discount_(disc), -      strength_(strength), -      discount_prior_strength_(std::numeric_limits<double>::quiet_NaN()), -      discount_prior_beta_(std::numeric_limits<double>::quiet_NaN()), -      strength_prior_shape_(std::numeric_limits<double>::quiet_NaN()), -      strength_prior_rate_(std::numeric_limits<double>::quiet_NaN()) { -    check_hyperparameters(); -  } - -  CCRP(double d_strength, double d_beta, double c_shape, double c_rate, double d = 0.9, double c = 1.0) : -      num_tables_(), -      num_customers_(), -      discount_(d), -      strength_(c), -      discount_prior_strength_(d_strength), -      discount_prior_beta_(d_beta), -      strength_prior_shape_(c_shape), -      strength_prior_rate_(c_rate) { -    check_hyperparameters(); -  } - -  void check_hyperparameters() { -    if (discount_ < 0.0 || discount_ >= 1.0) { -      std::cerr << "Bad discount: " << discount_ << std::endl; -      abort(); -    } -    if (strength_ <= -discount_) { -      std::cerr << "Bad strength: " << strength_ << " (discount=" << discount_ << ")" << std::endl; -      abort(); -    } -  } - -  double discount() const { return discount_; } -  double strength() const { return strength_; } -  void set_hyperparameters(double d, double s) { -    discount_ = d; strength_ = s; -    check_hyperparameters(); -  } -  void set_discount(double d) { discount_ = d; check_hyperparameters(); } -  void set_strength(double a) { strength_ = a; check_hyperparameters(); } - -  bool has_discount_prior() const { -    return !std::isnan(discount_prior_strength_); -  } - -  bool has_strength_prior() const { -    return !std::isnan(strength_prior_shape_); -  } - -  void clear() { -    num_tables_ = 0; -    num_customers_ = 0; -    dish_locs_.clear(); -  } - -  unsigned num_tables() const { -    return num_tables_; -  } - -  unsigned num_tables(const Dish& dish) const { -    const typename std::tr1::unordered_map<Dish, CRPTableManager, DishHash>::const_iterator it = dish_locs_.find(dish); -    if (it == dish_locs_.end()) return 0; -    return it->second.num_tables(); -  } - -  unsigned num_customers() const { -    return num_customers_; -  } - -  unsigned num_customers(const Dish& dish) const { -    const typename std::tr1::unordered_map<Dish, CRPTableManager, DishHash>::const_iterator it = dish_locs_.find(dish); -    if (it == dish_locs_.end()) return 0; -    return it->num_customers(); -  } - -  // returns +1 or 0 indicating whether a new table was opened -  //   p = probability with which the particular table was selected -  //       excluding p0 -  template <typename T> -  int increment(const Dish& dish, const T& p0, MT19937* rng, T* p = NULL) { -    CRPTableManager& loc = dish_locs_[dish]; -    bool share_table = false; -    if (loc.num_customers()) { -      const T p_empty = T(strength_ + num_tables_ * discount_) * p0; -      const T p_share = T(loc.num_customers() - loc.num_tables() * discount_); -      share_table = rng->SelectSample(p_empty, p_share); -    } -    if (share_table) { -      loc.share_table(discount_, rng); -    } else { -      loc.create_table(); -      ++num_tables_; -    } -    ++num_customers_; -    return (share_table ? 0 : 1); -  } - -  // returns -1 or 0, indicating whether a table was closed -  int decrement(const Dish& dish, MT19937* rng) { -    CRPTableManager& loc = dish_locs_[dish]; -    assert(loc.num_customers()); -    if (loc.num_customers() == 1) { -      dish_locs_.erase(dish); -      --num_tables_; -      --num_customers_; -      return -1; -    } else { -      int delta = loc.remove_customer(rng); -      --num_customers_; -      if (delta) --num_tables_; -      return delta; -    } -  } - -  template <typename T> -  T prob(const Dish& dish, const T& p0) const { -    const typename std::tr1::unordered_map<Dish, CRPTableManager, DishHash>::const_iterator it = dish_locs_.find(dish); -    const T r = T(num_tables_ * discount_ + strength_); -    if (it == dish_locs_.end()) { -      return r * p0 / T(num_customers_ + strength_); -    } else { -      return (T(it->second.num_customers() - discount_ * it->second.num_tables()) + r * p0) / -               T(num_customers_ + strength_); -    } -  } - -  double log_crp_prob() const { -    return log_crp_prob(discount_, strength_); -  } - -  // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process -  // does not include P_0's -  double log_crp_prob(const double& discount, const double& strength) const { -    double lp = 0.0; -    if (has_discount_prior()) -      lp = Md::log_beta_density(discount, discount_prior_strength_, discount_prior_beta_); -    if (has_strength_prior()) -      lp += Md::log_gamma_density(strength + discount, strength_prior_shape_, strength_prior_rate_); -    assert(lp <= 0.0); -    if (num_customers_) { -      if (discount > 0.0) { -        const double r = lgamma(1.0 - discount); -        if (strength) -          lp += lgamma(strength) - lgamma(strength / discount); -        lp += - lgamma(strength + num_customers_) -             + num_tables_ * log(discount) + lgamma(strength / discount + num_tables_); -        assert(std::isfinite(lp)); -        for (typename std::tr1::unordered_map<Dish, CRPTableManager, DishHash>::const_iterator it = dish_locs_.begin(); -             it != dish_locs_.end(); ++it) { -          const CRPTableManager& cur = it->second;  // TODO check -          for (CRPTableManager::const_iterator ti = cur.begin(); ti != cur.end(); ++ti) { -            lp += (lgamma(ti->first - discount) - r) * ti->second; -          } -        } -      } else if (!discount) { // discount == 0.0 -        lp += lgamma(strength) + num_tables_ * log(strength) - lgamma(strength + num_tables_); -        assert(std::isfinite(lp)); -        for (typename std::tr1::unordered_map<Dish, CRPTableManager, DishHash>::const_iterator it = dish_locs_.begin(); -             it != dish_locs_.end(); ++it) { -          const CRPTableManager& cur = it->second; -          lp += lgamma(cur.num_tables()); -        } -      } else { -        assert(!"discount less than 0 detected!"); -      } -    } -    assert(std::isfinite(lp)); -    return lp; -  } - -  void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { -    assert(has_discount_prior() || has_strength_prior()); -    if (num_customers() == 0) return; -    DiscountResampler dr(*this); -    StrengthResampler sr(*this); -    for (unsigned iter = 0; iter < nloop; ++iter) { -      if (has_strength_prior()) { -        strength_ = slice_sampler1d(sr, strength_, *rng, -discount_ + std::numeric_limits<double>::min(), -                               std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); -      } -      if (has_discount_prior()) { -        double min_discount = std::numeric_limits<double>::min(); -        if (strength_ < 0.0) min_discount -= strength_; -        discount_ = slice_sampler1d(dr, discount_, *rng, min_discount, -                               1.0, 0.0, niterations, 100*niterations); -      } -    } -    strength_ = slice_sampler1d(sr, strength_, *rng, -discount_, -                             std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); -  } - -  struct DiscountResampler { -    DiscountResampler(const CCRP& crp) : crp_(crp) {} -    const CCRP& crp_; -    double operator()(const double& proposed_discount) const { -      return crp_.log_crp_prob(proposed_discount, crp_.strength_); -    } -  }; - -  struct StrengthResampler { -    StrengthResampler(const CCRP& crp) : crp_(crp) {} -    const CCRP& crp_; -    double operator()(const double& proposed_strength) const { -      return crp_.log_crp_prob(crp_.discount_, proposed_strength); -    } -  }; - -  void Print(std::ostream* out) const { -    std::cerr << "PYP(d=" << discount_ << ",c=" << strength_ << ") customers=" << num_customers_ << std::endl; -    for (typename std::tr1::unordered_map<Dish, CRPTableManager, DishHash>::const_iterator it = dish_locs_.begin(); -         it != dish_locs_.end(); ++it) { -      (*out) << it->first << " : " << it->second << std::endl; -    } -  } - -  typedef typename std::tr1::unordered_map<Dish, CRPTableManager, DishHash>::const_iterator const_iterator; -  const_iterator begin() const { -    return dish_locs_.begin(); -  } -  const_iterator end() const { -    return dish_locs_.end(); -  } - -  unsigned num_tables_; -  unsigned num_customers_; -  std::tr1::unordered_map<Dish, CRPTableManager, DishHash> dish_locs_; - -  double discount_; -  double strength_; - -  // optional beta prior on discount_ (NaN if no prior) -  double discount_prior_strength_; -  double discount_prior_beta_; - -  // optional gamma prior on strength_ (NaN if no prior) -  double strength_prior_shape_; -  double strength_prior_rate_; -}; - -template <typename T,typename H> -std::ostream& operator<<(std::ostream& o, const CCRP<T,H>& c) { -  c.Print(&o); -  return o; -} - -#endif diff --git a/utils/ccrp_nt.h b/utils/ccrp_nt.h deleted file mode 100644 index 724b11bd..00000000 --- a/utils/ccrp_nt.h +++ /dev/null @@ -1,164 +0,0 @@ -#ifndef _CCRP_NT_H_ -#define _CCRP_NT_H_ - -#include <numeric> -#include <cassert> -#include <cmath> -#include <list> -#include <iostream> -#include <vector> -#include <tr1/unordered_map> -#include <boost/functional/hash.hpp> -#include "sampler.h" -#include "slice_sampler.h" -#include "m.h" - -// Chinese restaurant process (1 parameter) -template <typename Dish, typename DishHash = boost::hash<Dish> > -class CCRP_NoTable { - public: -  explicit CCRP_NoTable(double conc) : -    num_customers_(), -    alpha_(conc), -    alpha_prior_shape_(std::numeric_limits<double>::quiet_NaN()), -    alpha_prior_rate_(std::numeric_limits<double>::quiet_NaN()) {} - -  CCRP_NoTable(double c_shape, double c_rate, double c = 10.0) : -    num_customers_(), -    alpha_(c), -    alpha_prior_shape_(c_shape), -    alpha_prior_rate_(c_rate) {} - -  double alpha() const { return alpha_; } -  void set_alpha(const double& alpha) { alpha_ = alpha; assert(alpha_ > 0.0); } - -  bool has_alpha_prior() const { -    return !std::isnan(alpha_prior_shape_); -  } - -  void clear() { -    num_customers_ = 0; -    custs_.clear(); -  } - -  unsigned num_customers() const { -    return num_customers_; -  } - -  unsigned num_customers(const Dish& dish) const { -    const typename std::tr1::unordered_map<Dish, unsigned, DishHash>::const_iterator it = custs_.find(dish); -    if (it == custs_.end()) return 0; -    return it->second; -  } - -  int increment(const Dish& dish) { -    int table_diff = 0; -    if (++custs_[dish] == 1) -      table_diff = 1; -    ++num_customers_; -    return table_diff; -  } - -  int decrement(const Dish& dish) { -    int table_diff = 0; -    int nc = --custs_[dish]; -    if (nc == 0) { -      custs_.erase(dish); -      table_diff = -1; -    } else if (nc < 0) { -      std::cerr << "Dish counts dropped below zero for: " << dish << std::endl; -      abort(); -    } -    --num_customers_; -    return table_diff; -  } - -  template <typename F> -  F prob(const Dish& dish, const F& p0) const { -    const unsigned at_table = num_customers(dish); -    return (F(at_table) + p0 * F(alpha_)) / F(num_customers_ + alpha_); -  } - -  double logprob(const Dish& dish, const double& logp0) const { -    const unsigned at_table = num_customers(dish); -    return log(at_table + exp(logp0 + log(alpha_))) - log(num_customers_ + alpha_); -  } - -  double log_crp_prob() const { -    return log_crp_prob(alpha_); -  } - -  // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process -  // does not include P_0's -  double log_crp_prob(const double& alpha) const { -    double lp = 0.0; -    if (has_alpha_prior()) -      lp += Md::log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_); -    assert(lp <= 0.0); -    if (num_customers_) { -      lp += lgamma(alpha) - lgamma(alpha + num_customers_) + -        custs_.size() * log(alpha); -      assert(std::isfinite(lp)); -      for (typename std::tr1::unordered_map<Dish, unsigned, DishHash>::const_iterator it = custs_.begin(); -             it != custs_.end(); ++it) { -          lp += lgamma(it->second); -      } -    } -    assert(std::isfinite(lp)); -    return lp; -  } - -  void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { -    assert(has_alpha_prior()); -    ConcentrationResampler cr(*this); -    for (unsigned iter = 0; iter < nloop; ++iter) { -        alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, -                               std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); -    } -  } - -  struct ConcentrationResampler { -    ConcentrationResampler(const CCRP_NoTable& crp) : crp_(crp) {} -    const CCRP_NoTable& crp_; -    double operator()(const double& proposed_alpha) const { -      return crp_.log_crp_prob(proposed_alpha); -    } -  }; - -  void Print(std::ostream* out) const { -    (*out) << "DP(alpha=" << alpha_ << ") customers=" << num_customers_ << std::endl; -    int cc = 0; -    for (typename std::tr1::unordered_map<Dish, unsigned, DishHash>::const_iterator it = custs_.begin(); -         it != custs_.end(); ++it) { -      (*out) << " " << it->first << "(" << it->second << " eating)"; -      ++cc; -      if (cc > 10) { (*out) << " ..."; break; } -    } -    (*out) << std::endl; -  } - -  unsigned num_customers_; -  std::tr1::unordered_map<Dish, unsigned, DishHash> custs_; - -  typedef typename std::tr1::unordered_map<Dish, unsigned, DishHash>::const_iterator const_iterator; -  const_iterator begin() const { -    return custs_.begin(); -  } -  const_iterator end() const { -    return custs_.end(); -  } - -  double alpha_; - -  // optional gamma prior on alpha_ (NaN if no prior) -  double alpha_prior_shape_; -  double alpha_prior_rate_; -}; - -template <typename T,typename H> -std::ostream& operator<<(std::ostream& o, const CCRP_NoTable<T,H>& c) { -  c.Print(&o); -  return o; -} - -#endif diff --git a/utils/ccrp_onetable.h b/utils/ccrp_onetable.h deleted file mode 100644 index abe399ea..00000000 --- a/utils/ccrp_onetable.h +++ /dev/null @@ -1,253 +0,0 @@ -#ifndef _CCRP_ONETABLE_H_ -#define _CCRP_ONETABLE_H_ - -#include <numeric> -#include <cassert> -#include <cmath> -#include <list> -#include <iostream> -#include <tr1/unordered_map> -#include <boost/functional/hash.hpp> -#include "sampler.h" -#include "slice_sampler.h" - -// Chinese restaurant process (Pitman-Yor parameters) with one table approximation - -template <typename Dish, typename DishHash = boost::hash<Dish> > -class CCRP_OneTable { -  typedef std::tr1::unordered_map<Dish, unsigned, DishHash> DishMapType; - public: -  CCRP_OneTable(double disc, double conc) : -    num_tables_(), -    num_customers_(), -    discount_(disc), -    alpha_(conc), -    discount_prior_alpha_(std::numeric_limits<double>::quiet_NaN()), -    discount_prior_beta_(std::numeric_limits<double>::quiet_NaN()), -    alpha_prior_shape_(std::numeric_limits<double>::quiet_NaN()), -    alpha_prior_rate_(std::numeric_limits<double>::quiet_NaN()) {} - -  CCRP_OneTable(double d_alpha, double d_beta, double c_shape, double c_rate, double d = 0.9, double c = 1.0) : -    num_tables_(), -    num_customers_(), -    discount_(d), -    alpha_(c), -    discount_prior_alpha_(d_alpha), -    discount_prior_beta_(d_beta), -    alpha_prior_shape_(c_shape), -    alpha_prior_rate_(c_rate) {} - -  double discount() const { return discount_; } -  double alpha() const { return alpha_; } -  void set_alpha(double c) { alpha_ = c; } -  void set_discount(double d) { discount_ = d; } - -  bool has_discount_prior() const { -    return !std::isnan(discount_prior_alpha_); -  } - -  bool has_alpha_prior() const { -    return !std::isnan(alpha_prior_shape_); -  } - -  void clear() { -    num_tables_ = 0; -    num_customers_ = 0; -    dish_counts_.clear(); -  } - -  unsigned num_tables() const { -    return num_tables_; -  } - -  unsigned num_tables(const Dish& dish) const { -    const typename DishMapType::const_iterator it = dish_counts_.find(dish); -    if (it == dish_counts_.end()) return 0; -    return 1; -  } - -  unsigned num_customers() const { -    return num_customers_; -  } - -  unsigned num_customers(const Dish& dish) const { -    const typename DishMapType::const_iterator it = dish_counts_.find(dish); -    if (it == dish_counts_.end()) return 0; -    return it->second; -  } - -  // returns +1 or 0 indicating whether a new table was opened -  int increment(const Dish& dish) { -    unsigned& dc = dish_counts_[dish]; -    ++dc; -    ++num_customers_; -    if (dc == 1) { -      ++num_tables_; -      return 1; -    } else { -      return 0; -    } -  } - -  // returns -1 or 0, indicating whether a table was closed -  int decrement(const Dish& dish) { -    unsigned& dc = dish_counts_[dish]; -    assert(dc > 0); -    if (dc == 1) { -      dish_counts_.erase(dish); -      --num_tables_; -      --num_customers_; -      return -1; -    } else { -      assert(dc > 1); -      --dc; -      --num_customers_; -      return 0; -    } -  } - -  double prob(const Dish& dish, const double& p0) const { -    const typename DishMapType::const_iterator it = dish_counts_.find(dish); -    const double r = num_tables_ * discount_ + alpha_; -    if (it == dish_counts_.end()) { -      return r * p0 / (num_customers_ + alpha_); -    } else { -      return (it->second - discount_ + r * p0) / -               (num_customers_ + alpha_); -    } -  } - -  template <typename T> -  T probT(const Dish& dish, const T& p0) const { -    const typename DishMapType::const_iterator it = dish_counts_.find(dish); -    const T r(num_tables_ * discount_ + alpha_); -    if (it == dish_counts_.end()) { -      return r * p0 / T(num_customers_ + alpha_); -    } else { -      return (T(it->second - discount_) + r * p0) / -               T(num_customers_ + alpha_); -    } -  } - -  double log_crp_prob() const { -    return log_crp_prob(discount_, alpha_); -  } - -  static double log_beta_density(const double& x, const double& alpha, const double& beta) { -    assert(x > 0.0); -    assert(x < 1.0); -    assert(alpha > 0.0); -    assert(beta > 0.0); -    const double lp = (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta); -    return lp; -  } - -  static double log_gamma_density(const double& x, const double& shape, const double& rate) { -    assert(x >= 0.0); -    assert(shape > 0.0); -    assert(rate > 0.0); -    const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape); -    return lp; -  } - -  // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process -  // does not include P_0's -  double log_crp_prob(const double& discount, const double& alpha) const { -    double lp = 0.0; -    if (has_discount_prior()) -      lp = log_beta_density(discount, discount_prior_alpha_, discount_prior_beta_); -    if (has_alpha_prior()) -      lp += log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_); -    assert(lp <= 0.0); -    if (num_customers_) { -      if (discount > 0.0) { -        const double r = lgamma(1.0 - discount); -        lp += lgamma(alpha) - lgamma(alpha + num_customers_) -             + num_tables_ * log(discount) + lgamma(alpha / discount + num_tables_) -             - lgamma(alpha / discount); -        assert(std::isfinite(lp)); -        for (typename DishMapType::const_iterator it = dish_counts_.begin(); -             it != dish_counts_.end(); ++it) { -          const unsigned& cur = it->second; -          lp += lgamma(cur - discount) - r; -        } -      } else { -        assert(!"not implemented yet"); -      } -    } -    assert(std::isfinite(lp)); -    return lp; -  } - -  void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { -    assert(has_discount_prior() || has_alpha_prior()); -    DiscountResampler dr(*this); -    ConcentrationResampler cr(*this); -    for (unsigned iter = 0; iter < nloop; ++iter) { -      if (has_alpha_prior()) { -        alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, -                               std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); -      } -      if (has_discount_prior()) { -        discount_ = slice_sampler1d(dr, discount_, *rng, std::numeric_limits<double>::min(), -                               1.0, 0.0, niterations, 100*niterations); -      } -    } -    alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, -                             std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); -  } - -  struct DiscountResampler { -    DiscountResampler(const CCRP_OneTable& crp) : crp_(crp) {} -    const CCRP_OneTable& crp_; -    double operator()(const double& proposed_discount) const { -      return crp_.log_crp_prob(proposed_discount, crp_.alpha_); -    } -  }; - -  struct ConcentrationResampler { -    ConcentrationResampler(const CCRP_OneTable& crp) : crp_(crp) {} -    const CCRP_OneTable& crp_; -    double operator()(const double& proposed_alpha) const { -      return crp_.log_crp_prob(crp_.discount_, proposed_alpha); -    } -  }; - -  void Print(std::ostream* out) const { -    (*out) << "PYP(d=" << discount_ << ",c=" << alpha_ << ") customers=" << num_customers_ << std::endl; -    for (typename DishMapType::const_iterator it = dish_counts_.begin(); it != dish_counts_.end(); ++it) { -      (*out) << "  " << it->first << " = " << it->second << std::endl; -    } -  } - -  typedef typename DishMapType::const_iterator const_iterator; -  const_iterator begin() const { -    return dish_counts_.begin(); -  } -  const_iterator end() const { -    return dish_counts_.end(); -  } - -  unsigned num_tables_; -  unsigned num_customers_; -  DishMapType dish_counts_; - -  double discount_; -  double alpha_; - -  // optional beta prior on discount_ (NaN if no prior) -  double discount_prior_alpha_; -  double discount_prior_beta_; - -  // optional gamma prior on alpha_ (NaN if no prior) -  double alpha_prior_shape_; -  double alpha_prior_rate_; -}; - -template <typename T,typename H> -std::ostream& operator<<(std::ostream& o, const CCRP_OneTable<T,H>& c) { -  c.Print(&o); -  return o; -} - -#endif diff --git a/utils/crp_table_manager.h b/utils/crp_table_manager.h deleted file mode 100644 index 753e721f..00000000 --- a/utils/crp_table_manager.h +++ /dev/null @@ -1,114 +0,0 @@ -#ifndef _CRP_TABLE_MANAGER_H_ -#define _CRP_TABLE_MANAGER_H_ - -#include <iostream> -#include "sparse_vector.h" -#include "sampler.h" - -// these are helper classes for implementing token-based CRP samplers -// basically the data structures recommended by Blunsom et al. in the Note. - -struct CRPHistogram { -  //typedef std::map<unsigned, unsigned> MAPTYPE; -  typedef SparseVector<unsigned> MAPTYPE; -  typedef MAPTYPE::const_iterator const_iterator; - -  inline void increment(unsigned bin, unsigned delta = 1u) { -    data[bin] += delta; -  } -  inline void decrement(unsigned bin, unsigned delta = 1u) { -    unsigned r = data[bin] -= delta; -    if (!r) data.erase(bin); -  } -  inline void move(unsigned from_bin, unsigned to_bin, unsigned delta = 1u) { -    decrement(from_bin, delta); -    increment(to_bin, delta); -  } -  inline const_iterator begin() const { return data.begin(); } -  inline const_iterator end() const { return data.end(); } - - private: -  MAPTYPE data; -}; - -// A CRPTableManager tracks statistics about all customers -// and tables serving some dish in a CRP and can correctly sample what -// table to remove a customer from and what table to join -struct CRPTableManager { -  CRPTableManager() : customers(), tables() {} - -  inline unsigned num_tables() const { -    return tables; -  } - -  inline unsigned num_customers() const { -    return customers; -  } - -  inline void create_table() { -    h.increment(1); -    ++tables; -    ++customers; -  } - -  // seat a customer at a table proportional to the number of customers seated at a table, less the discount -  // *new tables are never created by this function! -  inline void share_table(const double discount, MT19937* rng) { -    const double z = customers - discount * num_tables(); -    double r = z * rng->next(); -    const CRPHistogram::const_iterator end = h.end(); -    CRPHistogram::const_iterator it = h.begin(); -    for (; it != end; ++it) { -      // it->first = number of customers at table -      // it->second = number of such tables -      double thresh = (it->first - discount) * it->second; -      if (thresh > r) break; -      r -= thresh; -    } -    h.move(it->first, it->first + 1); -    ++customers; -  } - -  // randomly sample a customer -  // *tables may be removed -  // returns -1 if a table is removed, 0 otherwise -  inline int remove_customer(MT19937* rng) { -    int r = rng->next() * num_customers(); -    const CRPHistogram::const_iterator end = h.end(); -    CRPHistogram::const_iterator it = h.begin(); -    for (; it != end; ++it) { -      int thresh = it->first * it->second; -      if (thresh > r) break; -      r -= thresh; -    } -    --customers; -    const unsigned tc = it->first; -    if (tc == 1) { -      h.decrement(1); -      --tables; -      return -1; -    } else { -      h.move(tc, tc - 1); -      return 0; -    } -  } - -  typedef CRPHistogram::const_iterator const_iterator; -  const_iterator begin() const { return h.begin(); } -  const_iterator end() const { return h.end(); } - -  unsigned customers; -  unsigned tables; -  CRPHistogram h; -}; - -std::ostream& operator<<(std::ostream& os, const CRPTableManager& tm) { -  os << '[' << tm.num_customers() << " total customers at " << tm.num_tables() << " tables ||| "; -  for (CRPHistogram::const_iterator it = tm.begin(); it != tm.end(); ++it) { -    if (it != tm.h.begin()) os << "  --  "; -    os << '(' << it->first << ") x " << it->second; -  } -  return os << ']'; -} - -#endif diff --git a/utils/crp_test.cc b/utils/crp_test.cc deleted file mode 100644 index 0cdb7afd..00000000 --- a/utils/crp_test.cc +++ /dev/null @@ -1,91 +0,0 @@ -#include <iostream> -#include <vector> -#include <string> - -#define BOOST_TEST_MODULE CrpTest -#include <boost/test/unit_test.hpp> -#include <boost/test/floating_point_comparison.hpp> - -#include "ccrp.h" -#include "sampler.h" - -using namespace std; - -MT19937 rng; - -BOOST_AUTO_TEST_CASE(Dist) { -  CCRP<string> crp(0.1, 5); -  double un = 0.25; -  int tt = 0; -  tt += crp.increment("hi", un, &rng); -  tt += crp.increment("foo", un, &rng); -  tt += crp.increment("bar", un, &rng); -  tt += crp.increment("bar", un, &rng); -  tt += crp.increment("bar", un, &rng); -  tt += crp.increment("bar", un, &rng); -  tt += crp.increment("bar", un, &rng); -  tt += crp.increment("bar", un, &rng); -  tt += crp.increment("bar", un, &rng); -  cout << "tt=" << tt << endl; -  cout << crp << endl; -  cout << "  P(bar)=" << crp.prob("bar", un) << endl; -  cout << "  P(hi)=" << crp.prob("hi", un) << endl; -  cout << "  P(baz)=" << crp.prob("baz", un) << endl; -  cout << "  P(foo)=" << crp.prob("foo", un) << endl; -  double x = crp.prob("bar", un) + crp.prob("hi", un) + crp.prob("baz", un) + crp.prob("foo", un); -  cout << "    tot=" << x << endl; -  BOOST_CHECK_CLOSE(1.0, x, 1e-6); -  tt += crp.decrement("hi", &rng); -  tt += crp.decrement("bar", &rng); -  cout << crp << endl; -  tt += crp.decrement("bar", &rng); -  cout << crp << endl; -  cout << "tt=" << tt << endl; -} - -BOOST_AUTO_TEST_CASE(Exchangability) { -    double tot = 0; -    double xt = 0; -    CCRP<int> crp(0.5, 1.0); -    int cust = 10; -    vector<int> hist(cust + 1, 0); -    for (int i = 0; i < cust; ++i) { crp.increment(1, 1.0, &rng); } -    const int samples = 100000; -    const bool simulate = true; -    for (int k = 0; k < samples; ++k) { -      if (!simulate) { -        crp.clear(); -        for (int i = 0; i < cust; ++i) { crp.increment(1, 1.0, &rng); } -      } else { -        int da = rng.next() * cust; -        bool a = rng.next() < 0.5; -        if (a) { -          for (int i = 0; i < da; ++i) { crp.increment(1, 1.0, &rng); } -          for (int i = 0; i < da; ++i) { crp.decrement(1, &rng); } -          xt += 1.0; -        } else { -          for (int i = 0; i < da; ++i) { crp.decrement(1, &rng); } -          for (int i = 0; i < da; ++i) { crp.increment(1, 1.0, &rng); } -        } -      } -      int c = crp.num_tables(1); -      ++hist[c]; -      tot += c; -    } -    BOOST_CHECK_EQUAL(cust, crp.num_customers()); -    cerr << "P(a) = " << (xt / samples) << endl; -    cerr << "E[num tables] = " << (tot / samples) << endl; -    double error = fabs((tot / samples) - 5.4); -    cerr << "  error = " << error << endl; -    BOOST_CHECK_MESSAGE(error < 0.1, "error is too big = " << error);  // it's possible for this to fail, but -                            // very, very unlikely -    for (int i = 1; i <= cust; ++i) -      cerr << i << ' ' << (hist[i]) << endl; -} - -BOOST_AUTO_TEST_CASE(LP) { -  CCRP<string> crp(1,1,1,1,0.1,50.0); -  crp.increment("foo", 1.0, &rng); -  cerr << crp.log_crp_prob() << endl; -} - diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h index 9fe00459..590a60c4 100644 --- a/utils/fast_sparse_vector.h +++ b/utils/fast_sparse_vector.h @@ -522,7 +522,7 @@ const FastSparseVector<T> operator-(const FastSparseVector<T>& x, const FastSpar  }  template <class T> -std::size_t hash_value(FastSparseVector<T> const& x) { +std::size_t hash_value(FastSparseVector<T> const&) {    assert(!"not implemented");    return 0;  } diff --git a/utils/gamma_poisson.h b/utils/gamma_poisson.h deleted file mode 100644 index fec763f6..00000000 --- a/utils/gamma_poisson.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef _GAMMA_POISSON_H_ -#define _GAMMA_POISSON_H_ - -#include <m.h> - -// http://en.wikipedia.org/wiki/Conjugate_prior -struct GammaPoisson { -  GammaPoisson(double shape, double rate) : -    a(shape), b(rate), n(), marginal() {} - -  double prob(unsigned x) const { -    return exp(Md::log_negative_binom(x, a + marginal, 1.0 - (b + n) / (1 + b + n))); -  } - -  void increment(unsigned x) { -    ++n; -    marginal += x; -  } - -  void decrement(unsigned x) { -    --n; -    marginal -= x; -  } - -  double log_likelihood() const { -    return 0; -  } - -  double a, b; -  int n, marginal; -}; - -#endif diff --git a/utils/mfcr.h b/utils/mfcr.h deleted file mode 100644 index 4aacb567..00000000 --- a/utils/mfcr.h +++ /dev/null @@ -1,370 +0,0 @@ -#ifndef _MFCR_H_ -#define _MFCR_H_ - -#include <algorithm> -#include <numeric> -#include <cassert> -#include <cmath> -#include <list> -#include <iostream> -#include <vector> -#include <iterator> -#include <tr1/unordered_map> -#include <boost/functional/hash.hpp> -#include "sampler.h" -#include "slice_sampler.h" -#include "m.h" - -struct TableCount { -  TableCount() : count(), floor() {} -  TableCount(int c, int f) : count(c), floor(f) { -    assert(f >= 0); -  } -  int count;               // count or delta (may be 0, <0, or >0) -  unsigned char floor;     // from which floor? -}; -  -std::ostream& operator<<(std::ostream& o, const TableCount& tc) { -  return o << "[c=" << tc.count << " floor=" << static_cast<unsigned int>(tc.floor) << ']'; -} - -// Multi-Floor Chinese Restaurant as proposed by Wood & Teh (AISTATS, 2009) to simulate -// graphical Pitman-Yor processes. -// http://jmlr.csail.mit.edu/proceedings/papers/v5/wood09a/wood09a.pdf -// -// Implementation is based on Blunsom, Cohn, Goldwater, & Johnson (ACL 2009) and code -// referenced therein. -// http://www.aclweb.org/anthology/P/P09/P09-2085.pdf -// -template <unsigned Floors, typename Dish, typename DishHash = boost::hash<Dish> > -class MFCR { - public: - -  MFCR(double d, double strength) : -    num_tables_(), -    num_customers_(), -    discount_(d), -    strength_(strength), -    discount_prior_strength_(std::numeric_limits<double>::quiet_NaN()), -    discount_prior_beta_(std::numeric_limits<double>::quiet_NaN()), -    strength_prior_shape_(std::numeric_limits<double>::quiet_NaN()), -    strength_prior_rate_(std::numeric_limits<double>::quiet_NaN()) { check_hyperparameters(); } - -  MFCR(double discount_strength, double discount_beta, double strength_shape, double strength_rate, double d = 0.9, double strength = 10.0) : -    num_tables_(), -    num_customers_(), -    discount_(d), -    strength_(strength), -    discount_prior_strength_(discount_strength), -    discount_prior_beta_(discount_beta), -    strength_prior_shape_(strength_shape), -    strength_prior_rate_(strength_rate) { check_hyperparameters(); } - -  void check_hyperparameters() { -    if (discount_ < 0.0 || discount_ >= 1.0) { -      std::cerr << "Bad discount: " << discount_ << std::endl; -      abort(); -    } -    if (strength_ <= -discount_) { -      std::cerr << "Bad strength: " << strength_ << " (discount=" << discount_ << ")" << std::endl; -      abort(); -    } -  } - -  double discount() const { return discount_; } -  double strength() const { return strength_; } -  void set_hyperparameters(double d, double s) { -    discount_ = d; strength_ = s; -    check_hyperparameters(); -  } -  void set_discount(double d) { discount_ = d; check_hyperparameters(); } -  void set_strength(double a) { strength_ = a; check_hyperparameters(); } - -  bool has_discount_prior() const { -    return !std::isnan(discount_prior_strength_); -  } - -  bool has_strength_prior() const { -    return !std::isnan(strength_prior_shape_); -  } - -  void clear() { -    num_tables_ = 0; -    num_customers_ = 0; -    dish_locs_.clear(); -  } - -  unsigned num_tables() const { -    return num_tables_; -  } - -  unsigned num_tables(const Dish& dish) const { -    const typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.find(dish); -    if (it == dish_locs_.end()) return 0; -    return it->second.table_counts_.size(); -  } - -  // this is not terribly efficient but it should not typically be necessary to execute this query -  unsigned num_tables(const Dish& dish, const unsigned floor) const { -    const typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.find(dish); -    if (it == dish_locs_.end()) return 0; -    unsigned c = 0; -    for (typename std::list<TableCount>::const_iterator i = it->second.table_counts_.begin(); -         i != it->second.table_counts_.end(); ++i) { -      if (i->floor == floor) ++c; -    } -    return c; -  } - -  unsigned num_customers() const { -    return num_customers_; -  } - -  unsigned num_customers(const Dish& dish) const { -    const typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.find(dish); -    if (it == dish_locs_.end()) return 0; -    return it->total_dish_count_; -  } - -  // returns (delta, floor) indicating whether a new table (delta) was opened and on which floor -  template <class InputIterator, class InputIterator2> -  TableCount increment(const Dish& dish, InputIterator p0s, InputIterator2 lambdas, MT19937* rng) { -    DishLocations& loc = dish_locs_[dish]; -    // marg_p0 = marginal probability of opening a new table on any floor with label dish -    typedef typename std::iterator_traits<InputIterator>::value_type F; -    const F marg_p0 = std::inner_product(p0s, p0s + Floors, lambdas, F(0.0)); -    assert(marg_p0 <= F(1.0001)); -    int floor = -1; -    bool share_table = false; -    if (loc.total_dish_count_) { -      const F p_empty = F(strength_ + num_tables_ * discount_) * marg_p0; -      const F p_share = F(loc.total_dish_count_ - loc.table_counts_.size() * discount_); -      share_table = rng->SelectSample(p_empty, p_share); -    } -    if (share_table) { -      // this can be done with doubles since P0 (which may be tiny) is not involved -      double r = rng->next() * (loc.total_dish_count_ - loc.table_counts_.size() * discount_); -      for (typename std::list<TableCount>::iterator ti = loc.table_counts_.begin(); -           ti != loc.table_counts_.end(); ++ti) { -        r -= ti->count - discount_; -        if (r <= 0.0) { -          ++ti->count; -          floor = ti->floor; -          break; -        } -      } -      if (r > 0.0) { -        std::cerr << "Serious error: r=" << r << std::endl; -        Print(&std::cerr); -        assert(r <= 0.0); -      } -    } else { // sit at currently empty table -- must sample what floor -      if (Floors == 1) { -        floor = 0; -      } else { -        F r = F(rng->next()) * marg_p0; -        for (unsigned i = 0; i < Floors; ++i) { -          r -= (*p0s) * (*lambdas); -          ++p0s; -          ++lambdas; -          if (r <= F(0.0)) { -            floor = i; -            break; -          } -        } -      } -      assert(floor >= 0); -      loc.table_counts_.push_back(TableCount(1, floor)); -      ++num_tables_; -    } -    ++loc.total_dish_count_; -    ++num_customers_; -    return (share_table ? TableCount(0, floor) : TableCount(1, floor)); -  } - -  // returns first = -1 or 0, indicating whether a table was closed, and on what floor (second) -  TableCount decrement(const Dish& dish, MT19937* rng) { -    DishLocations& loc = dish_locs_[dish]; -    assert(loc.total_dish_count_); -    int floor = -1; -    int delta = 0; -    if (loc.total_dish_count_ == 1) { -      floor = loc.table_counts_.front().floor; -      dish_locs_.erase(dish); -      --num_tables_; -      --num_customers_; -      delta = -1; -    } else { -      // sample customer to remove UNIFORMLY. that is, do NOT use the d -      // here. if you do, it will introduce (unwanted) bias! -      double r = rng->next() * loc.total_dish_count_; -      --loc.total_dish_count_; -      --num_customers_; -      for (typename std::list<TableCount>::iterator ti = loc.table_counts_.begin(); -           ti != loc.table_counts_.end(); ++ti) { -        r -= ti->count; -        if (r <= 0.0) { -          floor = ti->floor; -          if ((--ti->count) == 0) { -            --num_tables_; -            delta = -1; -            loc.table_counts_.erase(ti); -          } -          break; -        } -      } -      if (r > 0.0) { -        std::cerr << "Serious error: r=" << r << std::endl; -        Print(&std::cerr); -        assert(r <= 0.0); -      } -    } -    return TableCount(delta, floor); -  } - -  template <class InputIterator, class InputIterator2> -  typename std::iterator_traits<InputIterator>::value_type prob(const Dish& dish, InputIterator p0s, InputIterator2 lambdas) const { -    typedef typename std::iterator_traits<InputIterator>::value_type F; -    const F marg_p0 = std::inner_product(p0s, p0s + Floors, lambdas, F(0.0)); -    assert(marg_p0 <= F(1.0001)); -    const typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.find(dish); -    const F r = F(num_tables_ * discount_ + strength_); -    if (it == dish_locs_.end()) { -      return r * marg_p0 / F(num_customers_ + strength_); -    } else { -      return (F(it->second.total_dish_count_ - discount_ * it->second.table_counts_.size()) + F(r * marg_p0)) / -               F(num_customers_ + strength_); -    } -  } - -  double log_crp_prob() const { -    return log_crp_prob(discount_, strength_); -  } - -  // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process -  // does not include draws from G_w's -  double log_crp_prob(const double& discount, const double& strength) const { -    double lp = 0.0; -    if (has_discount_prior()) -      lp = Md::log_beta_density(discount, discount_prior_strength_, discount_prior_beta_); -    if (has_strength_prior()) -      lp += Md::log_gamma_density(strength + discount, strength_prior_shape_, strength_prior_rate_); -    assert(lp <= 0.0); -    if (num_customers_) { -      if (discount > 0.0) { -        const double r = lgamma(1.0 - discount); -        if (strength) -          lp += lgamma(strength) - lgamma(strength / discount); -        lp += - lgamma(strength + num_customers_) -             + num_tables_ * log(discount) + lgamma(strength / discount + num_tables_); -        assert(std::isfinite(lp)); -        for (typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.begin(); -             it != dish_locs_.end(); ++it) { -          const DishLocations& cur = it->second; -          for (std::list<TableCount>::const_iterator ti = cur.table_counts_.begin(); ti != cur.table_counts_.end(); ++ti) { -            lp += lgamma(ti->count - discount) - r; -          } -        } -      } else if (!discount) { // discount == 0.0 -        lp += lgamma(strength) + num_tables_ * log(strength) - lgamma(strength + num_tables_); -        assert(std::isfinite(lp)); -        for (typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.begin(); -             it != dish_locs_.end(); ++it) { -          const DishLocations& cur = it->second; -          lp += lgamma(cur.table_counts_.size()); -        } -      } else { -        assert(!"discount less than 0 detected!"); -      } -    } -    assert(std::isfinite(lp)); -    return lp; -  } - -  void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { -    assert(has_discount_prior() || has_strength_prior()); -    DiscountResampler dr(*this); -    StrengthResampler sr(*this); -    for (int iter = 0; iter < nloop; ++iter) { -      if (has_strength_prior()) { -        strength_ = slice_sampler1d(sr, strength_, *rng, -discount_, -                               std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); -      } -      if (has_discount_prior()) { -        double min_discount = std::numeric_limits<double>::min(); -        if (strength_ < 0.0) min_discount -= strength_; -        discount_ = slice_sampler1d(dr, discount_, *rng, min_discount, -                               1.0, 0.0, niterations, 100*niterations); -      } -    } -    strength_ = slice_sampler1d(sr, strength_, *rng, -discount_, -                             std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); -  } - -  struct DiscountResampler { -    DiscountResampler(const MFCR& crp) : crp_(crp) {} -    const MFCR& crp_; -    double operator()(const double& proposed_d) const { -      return crp_.log_crp_prob(proposed_d, crp_.strength_); -    } -  }; - -  struct StrengthResampler { -    StrengthResampler(const MFCR& crp) : crp_(crp) {} -    const MFCR& crp_; -    double operator()(const double& proposediscount_strength) const { -      return crp_.log_crp_prob(crp_.discount_, proposediscount_strength); -    } -  }; - -  struct DishLocations { -    DishLocations() : total_dish_count_() {} -    unsigned total_dish_count_;          // customers at all tables with this dish -    std::list<TableCount> table_counts_; // list<> gives O(1) deletion and insertion, which we want -                                         // .size() is the number of tables for this dish -  }; - -  void Print(std::ostream* out) const { -    (*out) << "MFCR<" << Floors << ">(d=" << discount_ << ",strength=" << strength_ << ") customers=" << num_customers_ << std::endl; -    for (typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.begin(); -         it != dish_locs_.end(); ++it) { -      (*out) << it->first << " (" << it->second.total_dish_count_ << " on " << it->second.table_counts_.size() << " tables): "; -      for (typename std::list<TableCount>::const_iterator i = it->second.table_counts_.begin(); -           i != it->second.table_counts_.end(); ++i) { -        (*out) << " " << *i; -      } -      (*out) << std::endl; -    } -  } - -  typedef typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator const_iterator; -  const_iterator begin() const { -    return dish_locs_.begin(); -  } -  const_iterator end() const { -    return dish_locs_.end(); -  } - -  unsigned num_tables_; -  unsigned num_customers_; -  std::tr1::unordered_map<Dish, DishLocations, DishHash> dish_locs_; - -  double discount_; -  double strength_; - -  // optional beta prior on discount_ (NaN if no prior) -  double discount_prior_strength_; -  double discount_prior_beta_; - -  // optional gamma prior on strength_ (NaN if no prior) -  double strength_prior_shape_; -  double strength_prior_rate_; -}; - -template <unsigned N,typename T,typename H> -std::ostream& operator<<(std::ostream& o, const MFCR<N,T,H>& c) { -  c.Print(&o); -  return o; -} - -#endif diff --git a/utils/mfcr_test.cc b/utils/mfcr_test.cc deleted file mode 100644 index 29a1a2ce..00000000 --- a/utils/mfcr_test.cc +++ /dev/null @@ -1,72 +0,0 @@ -#include "mfcr.h" - -#include <iostream> -#include <cassert> -#include <cmath> - -#define BOOST_TEST_MODULE MFCRTest -#include <boost/test/unit_test.hpp> -#include <boost/test/floating_point_comparison.hpp> - -#include "sampler.h" - -using namespace std; - -BOOST_AUTO_TEST_CASE(Exchangability) { -  MT19937 r; -  MT19937* rng = &r; -  MFCR<2, int> crp(0.5, 3.0); -  vector<double> lambdas(2); -  vector<double> p0s(2); -  lambdas[0] = 0.2; -  lambdas[1] = 0.8; -  p0s[0] = 1.0; -  p0s[1] = 1.0; - -  double tot = 0; -  double tot2 = 0; -  double xt = 0; -  int cust = 10; -  vector<int> hist(cust + 1, 0), hist2(cust + 1, 0); -  for (int i = 0; i < cust; ++i) { crp.increment(1, p0s.begin(), lambdas.begin(), rng); } -  const int samples = 100000; -  const bool simulate = true; -  for (int k = 0; k < samples; ++k) { -    if (!simulate) { -      crp.clear(); -      for (int i = 0; i < cust; ++i) { crp.increment(1, p0s.begin(), lambdas.begin(), rng); } -    } else { -      int da = rng->next() * cust; -      bool a = rng->next() < 0.45; -      if (a) { -        for (int i = 0; i < da; ++i) { crp.increment(1, p0s.begin(), lambdas.begin(), rng); } -        for (int i = 0; i < da; ++i) { crp.decrement(1, rng); } -        xt += 1.0; -      } else { -        for (int i = 0; i < da; ++i) { crp.decrement(1, rng); } -        for (int i = 0; i < da; ++i) { crp.increment(1, p0s.begin(), lambdas.begin(), rng); } -      } -    } -    int c = crp.num_tables(1); -    ++hist[c]; -    tot += c; -    int c2 = crp.num_tables(1,0);  // tables on floor 0 with dish 1 -    ++hist2[c2]; -    tot2 += c2; -  } -  cerr << cust << " = " << crp.num_customers() << endl; -  cerr << "P(a) = " << (xt / samples) << endl; -  cerr << "E[num tables] = " << (tot / samples) << endl; -  double error = fabs((tot / samples) - 6.894); -  cerr << "   error = " << error << endl; -  for (int i = 1; i <= cust; ++i) -    cerr << i << ' ' << (hist[i]) << endl; -  cerr << "E[num tables on floor 0] = " << (tot2 / samples) << endl; -  double error2 = fabs((tot2 / samples) - 1.379); -  cerr << "  error2 = " << error2 << endl; -  for (int i = 1; i <= cust; ++i) -    cerr << i << ' ' << (hist2[i]) << endl; -  assert(error < 0.05);   // these can fail with very low probability -  assert(error2 < 0.05); -}; - diff --git a/utils/sampler.h b/utils/sampler.h index 3e4a4086..88e1856c 100644 --- a/utils/sampler.h +++ b/utils/sampler.h @@ -19,7 +19,7 @@  #include "prob.h" -template <typename F> struct SampleSet; +template <typename F> class SampleSet;  template <typename RNG>  struct RandomNumberGenerator { diff --git a/utils/slice_sampler.h b/utils/slice_sampler.h deleted file mode 100644 index aa48a169..00000000 --- a/utils/slice_sampler.h +++ /dev/null @@ -1,191 +0,0 @@ -//! slice-sampler.h is an MCMC slice sampler -//! -//! Mark Johnson, 1st August 2008 - -#ifndef SLICE_SAMPLER_H -#define SLICE_SAMPLER_H - -#include <algorithm> -#include <cassert> -#include <cmath> -#include <iostream> -#include <limits> - -//! slice_sampler_rfc_type{} returns the value of a user-specified -//! function if the argument is within range, or - infinity otherwise -// -template <typename F, typename Fn, typename U> -struct slice_sampler_rfc_type { -  F min_x, max_x; -  const Fn& f; -  U max_nfeval, nfeval; -  slice_sampler_rfc_type(F min_x, F max_x, const Fn& f, U max_nfeval)  -    : min_x(min_x), max_x(max_x), f(f), max_nfeval(max_nfeval), nfeval(0) { } -     -  F operator() (F x) { -    if (min_x < x && x < max_x) { -      assert(++nfeval <= max_nfeval); -      F fx = f(x); -      assert(std::isfinite(fx)); -      return fx; -    } -      return -std::numeric_limits<F>::infinity(); -  } -};  // slice_sampler_rfc_type{} - -//! slice_sampler1d() implements the univariate "range doubling" slice sampler -//! described in Neal (2003) "Slice Sampling", The Annals of Statistics 31(3), 705-767. -// -template <typename F, typename LogF, typename Uniform01> -F slice_sampler1d(const LogF& logF0,               //!< log of function to sample -		  F x,                             //!< starting point -		  Uniform01& u01,                  //!< uniform [0,1) random number generator -		  F min_x = -std::numeric_limits<F>::infinity(),  //!< minimum value of support -		  F max_x = std::numeric_limits<F>::infinity(),   //!< maximum value of support -		  F w = 0.0,                       //!< guess at initial width -		  unsigned nsamples=1,             //!< number of samples to draw -		  unsigned max_nfeval=200)         //!< max number of function evaluations -{ -  typedef unsigned U; -  slice_sampler_rfc_type<F,LogF,U> logF(min_x, max_x, logF0, max_nfeval); - -  assert(std::isfinite(x)); - -  if (w <= 0.0) {                           // set w to a default width  -    if (min_x > -std::numeric_limits<F>::infinity() && max_x < std::numeric_limits<F>::infinity()) -      w = (max_x - min_x)/4; -    else -      w = std::max(((x < 0.0) ? -x : x)/4, (F) 0.1); -  } -  assert(std::isfinite(w)); - -  F logFx = logF(x); -  for (U sample = 0; sample < nsamples; ++sample) { -    F logY = logFx + log(u01()+1e-100);     //! slice logFx at this value -    assert(std::isfinite(logY)); - -    F xl = x - w*u01();                     //! lower bound on slice interval -    F logFxl = logF(xl); -    F xr = xl + w;                          //! upper bound on slice interval -    F logFxr = logF(xr); - -    while (logY < logFxl || logY < logFxr)  // doubling procedure -      if (u01() < 0.5)  -	logFxl = logF(xl -= xr - xl); -      else -	logFxr = logF(xr += xr - xl); -	 -    F xl1 = xl; -    F xr1 = xr; -    while (true) {                          // shrinking procedure -      F x1 = xl1 + u01()*(xr1 - xl1); -      if (logY < logF(x1)) { -	F xl2 = xl;                         // acceptance procedure -	F xr2 = xr;  -	bool d = false; -	while (xr2 - xl2 > 1.1*w) { -	  F xm = (xl2 + xr2)/2; -	  if ((x < xm && x1 >= xm) || (x >= xm && x1 < xm)) -	    d = true; -	  if (x1 < xm) -	    xr2 = xm; -	  else -	    xl2 = xm; -	  if (d && logY >= logF(xl2) && logY >= logF(xr2)) -	    goto unacceptable; -	} -	x = x1; -	goto acceptable; -      } -      goto acceptable; -    unacceptable: -      if (x1 < x)                           // rest of shrinking procedure -	xl1 = x1; -      else  -	xr1 = x1; -    } -  acceptable: -    w = (4*w + (xr1 - xl1))/5;              // update width estimate -  } -  return x; -} - -/* -//! slice_sampler1d() implements a 1-d MCMC slice sampler. -//! It should be correct for unimodal distributions, but -//! not for multimodal ones. -// -template <typename F, typename LogP, typename Uniform01> -F slice_sampler1d(const LogP& logP,     //!< log of distribution to sample -		  F x,                  //!< initial sample -		  Uniform01& u01,       //!< uniform random number generator -		  F min_x = -std::numeric_limits<F>::infinity(),  //!< minimum value of support -		  F max_x = std::numeric_limits<F>::infinity(),   //!< maximum value of support -		  F w = 0.0,            //!< guess at initial width -		  unsigned nsamples=1,  //!< number of samples to draw -		  unsigned max_nfeval=200)  //!< max number of function evaluations -{ -  typedef unsigned U; -  assert(std::isfinite(x)); -  if (w <= 0.0) { -    if (min_x > -std::numeric_limits<F>::infinity() && max_x < std::numeric_limits<F>::infinity()) -      w = (max_x - min_x)/4; -    else -      w = std::max(((x < 0.0) ? -x : x)/4, 0.1); -  } -  // TRACE4(x, min_x, max_x, w); -  F logPx = logP(x); -  assert(std::isfinite(logPx)); -  U nfeval = 1; -  for (U sample = 0; sample < nsamples; ++sample) { -    F x0 = x; -    F logU = logPx + log(u01()+1e-100); -    assert(std::isfinite(logU)); -    F r = u01(); -    F xl = std::max(min_x, x - r*w); -    F xr = std::min(max_x, x + (1-r)*w); -    // TRACE3(x, logPx, logU); -    while (xl > min_x && logP(xl) > logU) { -      xl -= w; -      w *= 2; -      ++nfeval; -      if (nfeval >= max_nfeval) -	std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << std::endl; -      assert(nfeval < max_nfeval); -    } -    xl = std::max(xl, min_x); -    while (xr < max_x && logP(xr) > logU) { -      xr += w; -      w *= 2; -      ++nfeval; -      if (nfeval >= max_nfeval) -	std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xr = " << xr << std::endl; -      assert(nfeval < max_nfeval); -    } -    xr = std::min(xr, max_x); -    while (true) { -      r = u01(); -      x = r*xl + (1-r)*xr; -      assert(std::isfinite(x)); -      logPx = logP(x); -      // TRACE4(logPx, x, xl, xr); -      assert(std::isfinite(logPx)); -      ++nfeval; -      if (nfeval >= max_nfeval) -	std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << ", xr = " << xr << ", x = " << x << std::endl; -      assert(nfeval < max_nfeval); -      if (logPx > logU) -        break; -      else if (x > x0) -          xr = x; -        else -          xl = x; -    } -    // w = (4*w + (xr-xl))/5;   // gradually adjust w -  } -  // TRACE2(logPx, x); -  return x; -}  // slice_sampler1d() -*/ - -#endif  // SLICE_SAMPLER_H diff --git a/utils/small_vector.h b/utils/small_vector.h index 894b1b32..c8a69927 100644 --- a/utils/small_vector.h +++ b/utils/small_vector.h @@ -66,7 +66,7 @@ class SmallVector {    //TODO: figure out iterator traits to allow this to be selcted for any iterator range    template <class I>    SmallVector(I const* begin,I const* end) { -    int s=end-begin; +    unsigned s=end-begin;      Alloc(s);      if (s <= SV_MAX) {        for (unsigned i = 0; i < s; ++i,++begin) data_.vals[i] = *begin; diff --git a/utils/stringlib.h b/utils/stringlib.h index 75772c4d..ff5dc89d 100644 --- a/utils/stringlib.h +++ b/utils/stringlib.h @@ -86,7 +86,7 @@ bool match_begin(Str const& str,Prefix const& prefix)  // source will be returned as a string, target must be a sentence or  // a lattice (in PLF format) and will be returned as a Lattice object  void ParseTranslatorInput(const std::string& line, std::string* input, std::string* ref); -struct Lattice; +class Lattice;  void ParseTranslatorInputLattice(const std::string& line, std::string* input, Lattice* ref);  inline std::string Trim(const std::string& str, const std::string& dropChars = " \t") { diff --git a/utils/unigram_pyp_lm.cc b/utils/unigram_pyp_lm.cc deleted file mode 100644 index 30b9fde1..00000000 --- a/utils/unigram_pyp_lm.cc +++ /dev/null @@ -1,214 +0,0 @@ -#include <iostream> -#include <tr1/memory> -#include <queue> - -#include <boost/functional.hpp> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "corpus_tools.h" -#include "m.h" -#include "tdict.h" -#include "sampler.h" -#include "ccrp.h" -#include "gamma_poisson.h" - -// A not very memory-efficient implementation of an 1-gram LM based on PYPs -// as described in Y.-W. Teh. (2006) A Hierarchical Bayesian Language Model -// based on Pitman-Yor Processes. In Proc. ACL. - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -boost::shared_ptr<MT19937> prng; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { -  po::options_description opts("Configuration options"); -  opts.add_options() -        ("samples,n",po::value<unsigned>()->default_value(50),"Number of samples") -        ("train,i",po::value<string>(),"Training data file") -        ("test,T",po::value<string>(),"Test data file") -        ("discount_prior_a,a",po::value<double>()->default_value(1.0), "discount ~ Beta(a,b): a=this") -        ("discount_prior_b,b",po::value<double>()->default_value(1.0), "discount ~ Beta(a,b): b=this") -        ("strength_prior_s,s",po::value<double>()->default_value(1.0), "strength ~ Gamma(s,r): s=this") -        ("strength_prior_r,r",po::value<double>()->default_value(1.0), "strength ~ Gamma(s,r): r=this") -        ("random_seed,S",po::value<uint32_t>(), "Random seed"); -  po::options_description clo("Command line options"); -  clo.add_options() -        ("config", po::value<string>(), "Configuration file") -        ("help", "Print this help message and exit"); -  po::options_description dconfig_options, dcmdline_options; -  dconfig_options.add(opts); -  dcmdline_options.add(opts).add(clo); -   -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  if (conf->count("config")) { -    ifstream config((*conf)["config"].as<string>().c_str()); -    po::store(po::parse_config_file(config, dconfig_options), *conf); -  } -  po::notify(*conf); - -  if (conf->count("help") || (conf->count("train") == 0)) { -    cerr << dcmdline_options << endl; -    exit(1); -  } -} - -struct Histogram { -  void increment(unsigned bin, unsigned delta = 1u) { -    data[bin] += delta; -  } -  void decrement(unsigned bin, unsigned delta = 1u) { -    data[bin] -= delta; -  } -  void move(unsigned from_bin, unsigned to_bin, unsigned delta = 1u) { -    decrement(from_bin, delta); -    increment(to_bin, delta); -  } -  map<unsigned, unsigned> data; -  // SparseVector<unsigned> data; -}; - -// Lord Rothschild. 1986. THE DISTRIBUTION OF ENGLISH DICTIONARY WORD LENGTHS. -// Journal of Statistical Planning and Inference 14 (1986) 311-322 -struct PoissonLengthUniformCharWordModel { -  explicit PoissonLengthUniformCharWordModel(unsigned vocab_size) : plen(5,5), uc(-log(50)), llh() {} -  void increment(WordID w, MT19937*) { -    llh += log(prob(w)); // this isn't quite right -    plen.increment(TD::Convert(w).size() - 1); -  } -  void decrement(WordID w, MT19937*) { -    plen.decrement(TD::Convert(w).size() - 1); -    llh -= log(prob(w)); // this isn't quite right -  } -  double prob(WordID w) const { -    size_t len = TD::Convert(w).size(); -    return plen.prob(len - 1) * exp(uc * len); -  } -  double log_likelihood() const { return llh; } -  void resample_hyperparameters(MT19937*) {} -  GammaPoisson plen; -  const double uc; -  double llh; -}; - -// uniform base distribution (0-gram model) -struct UniformWordModel { -  explicit UniformWordModel(unsigned vocab_size) : p0(1.0 / vocab_size), draws() {} -  void increment(WordID, MT19937*) { ++draws; } -  void decrement(WordID, MT19937*) { --draws; assert(draws >= 0); } -  double prob(WordID) const { return p0; } // all words have equal prob -  double log_likelihood() const { return draws * log(p0); } -  void resample_hyperparameters(MT19937*) {} -  const double p0; -  int draws; -}; - -// represents an Unigram LM -template <class BaseGenerator> -struct UnigramLM { -  UnigramLM(unsigned vs, double da, double db, double ss, double sr) : -      base(vs), -      crp(da, db, ss, sr, 0.8, 1.0) {} -  void increment(WordID w, MT19937* rng) { -    const double backoff = base.prob(w); -    if (crp.increment(w, backoff, rng)) -      base.increment(w, rng); -  } -  void decrement(WordID w, MT19937* rng) { -    if (crp.decrement(w, rng)) -      base.decrement(w, rng); -  } -  double prob(WordID w) const { -    const double backoff = base.prob(w); -    return crp.prob(w, backoff); -  } - -  double log_likelihood() const { -    double llh = base.log_likelihood(); -    llh += crp.log_crp_prob(); -    return llh; -  } - -  void resample_hyperparameters(MT19937* rng) { -    crp.resample_hyperparameters(rng); -    base.resample_hyperparameters(rng); -  } - -  double discount_a, discount_b, strength_s, strength_r; -  double d, strength; -  BaseGenerator base; -  CCRP<WordID> crp; -}; - -int main(int argc, char** argv) { -  po::variables_map conf; - -  InitCommandLine(argc, argv, &conf); -  const unsigned samples = conf["samples"].as<unsigned>(); -  if (conf.count("random_seed")) -    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>())); -  else -    prng.reset(new MT19937); -  MT19937& rng = *prng; -  vector<vector<WordID> > corpuse; -  set<WordID> vocabe; -  const WordID kEOS = TD::Convert("</s>"); -  cerr << "Reading corpus...\n"; -  CorpusTools::ReadFromFile(conf["train"].as<string>(), &corpuse, &vocabe); -  cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; -  vector<vector<WordID> > test; -  if (conf.count("test")) -    CorpusTools::ReadFromFile(conf["test"].as<string>(), &test); -  else -    test = corpuse; -#if 1 -  UnigramLM<PoissonLengthUniformCharWordModel> lm(vocabe.size(), -#else -  UnigramLM<UniformWordModel> lm(vocabe.size(), -#endif -                                 conf["discount_prior_a"].as<double>(), -                                 conf["discount_prior_b"].as<double>(), -                                 conf["strength_prior_s"].as<double>(), -                                 conf["strength_prior_r"].as<double>()); -  for (unsigned SS=0; SS < samples; ++SS) { -    for (unsigned ci = 0; ci < corpuse.size(); ++ci) { -      const vector<WordID>& s = corpuse[ci]; -      for (unsigned i = 0; i <= s.size(); ++i) { -        WordID w = (i < s.size() ? s[i] : kEOS); -        if (SS > 0) lm.decrement(w, &rng); -        lm.increment(w, &rng); -      } -      if (SS > 0) lm.decrement(kEOS, &rng); -      lm.increment(kEOS, &rng); -    } -    cerr << "LLH=" << lm.log_likelihood() << "\t tables=" << lm.crp.num_tables() << " " << endl; -    if (SS % 10 == 9) lm.resample_hyperparameters(&rng); -  } -  double llh = 0; -  unsigned cnt = 0; -  unsigned oovs = 0; -  for (unsigned ci = 0; ci < test.size(); ++ci) { -    const vector<WordID>& s = test[ci]; -    for (unsigned i = 0; i <= s.size(); ++i) { -      WordID w = (i < s.size() ? s[i] : kEOS); -      double lp = log(lm.prob(w)) / log(2); -      if (i < s.size() && vocabe.count(w) == 0) { -        cerr << "**OOV "; -        ++oovs; -        //lp = 0; -      } -      cerr << "p(" << TD::Convert(w) << ") = " << lp << endl; -      llh -= lp; -      cnt++; -    } -  } -  cerr << "  Log_10 prob: " << (-llh * log(2) / log(10)) << endl; -  cerr << "        Count: " << cnt << endl; -  cerr << "         OOVs: " << oovs << endl; -  cerr << "Cross-entropy: " << (llh / cnt) << endl; -  cerr << "   Perplexity: " << pow(2, llh / cnt) << endl; -  return 0; -} - diff --git a/utils/weights.cc b/utils/weights.cc index f56e2a20..575877b6 100644 --- a/utils/weights.cc +++ b/utils/weights.cc @@ -34,7 +34,7 @@ void Weights::InitFromFile(const string& filename,      int weight_count = 0;      bool fl = false;      string buf; -    weight_t val = 0; +    double val = 0;      while (in) {        getline(in, buf);        if (buf.size() == 0) continue; @@ -53,7 +53,7 @@ void Weights::InitFromFile(const string& filename,        if (feature_list) { feature_list->push_back(buf.substr(start, end - start)); }        while(end < buf.size() && buf[end] == ' ') ++end;        val = strtod(&buf.c_str()[end], NULL); -      if (isnan(val)) { +      if (std::isnan(val)) {          cerr << FD::Convert(fid) << " has weight NaN!\n";          abort();        } @@ -127,8 +127,8 @@ void Weights::InitSparseVector(const vector<weight_t>& dv,  void Weights::SanityCheck(const vector<weight_t>& w) {    for (unsigned i = 0; i < w.size(); ++i) { -    assert(!isnan(w[i])); -    assert(!isinf(w[i])); +    assert(!std::isnan(w[i])); +    assert(!std::isinf(w[i]));    }  } | 
