From e26434979adc33bd949566ba7bf02dff64e80a3e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:19:43 -0400 Subject: cdec cleanup, remove bayesian stuff, parsing stuff --- utils/Jamfile | 32 ---- utils/Makefile.am | 7 +- utils/ccrp.h | 270 --------------------------------- utils/ccrp_nt.h | 164 -------------------- utils/ccrp_onetable.h | 253 ------------------------------- utils/crp_table_manager.h | 114 -------------- utils/crp_test.cc | 91 ----------- utils/fast_sparse_vector.h | 2 +- utils/gamma_poisson.h | 33 ---- utils/mfcr.h | 370 --------------------------------------------- utils/mfcr_test.cc | 72 --------- utils/sampler.h | 2 +- utils/slice_sampler.h | 191 ----------------------- utils/small_vector.h | 2 +- utils/stringlib.h | 2 +- utils/unigram_pyp_lm.cc | 214 -------------------------- 16 files changed, 6 insertions(+), 1813 deletions(-) delete mode 100644 utils/Jamfile delete mode 100644 utils/ccrp.h delete mode 100644 utils/ccrp_nt.h delete mode 100644 utils/ccrp_onetable.h delete mode 100644 utils/crp_table_manager.h delete mode 100644 utils/crp_test.cc delete mode 100644 utils/gamma_poisson.h delete mode 100644 utils/mfcr.h delete mode 100644 utils/mfcr_test.cc delete mode 100644 utils/slice_sampler.h delete mode 100644 utils/unigram_pyp_lm.cc (limited to 'utils') diff --git a/utils/Jamfile b/utils/Jamfile deleted file mode 100644 index 4444b25f..00000000 --- a/utils/Jamfile +++ /dev/null @@ -1,32 +0,0 @@ -import testing ; -import option ; - -additional = ; -if [ option.get with-cmph ] { - additional += perfect_hash.cc ; -} - -lib utils : - alignment_io.cc - b64tools.cc - corpus_tools.cc - dict.cc - tdict.cc - fdict.cc - gzstream.cc - filelib.cc - stringlib.cc - sparse_vector.cc - timing_stats.cc - verbose.cc - weights.cc - $(additional) - ..//z - : .. . : : .. . ; - -exe atools : atools.cc utils ..//boost_program_options ; -exe reconstruct_weights : reconstruct_weights.cc utils ..//boost_program_options ; - -alias programs : reconstruct_weights atools ; - -all_tests [ glob *_test.cc phmt.cc ts.cc ] : utils : $(TOP)/utils/test_data ; diff --git a/utils/Makefile.am b/utils/Makefile.am index 799ec879..55d97354 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -3,16 +3,13 @@ bin_PROGRAMS = reconstruct_weights atools noinst_PROGRAMS = \ ts \ phmt \ - mfcr_test \ - crp_test \ dict_test \ m_test \ weights_test \ logval_test \ - small_vector_test \ - unigram_pyp_lm + small_vector_test -TESTS = ts mfcr_test crp_test small_vector_test logval_test weights_test dict_test m_test +TESTS = ts small_vector_test logval_test weights_test dict_test m_test noinst_LIBRARIES = libutils.a diff --git a/utils/ccrp.h b/utils/ccrp.h deleted file mode 100644 index f5d3fc78..00000000 --- a/utils/ccrp.h +++ /dev/null @@ -1,270 +0,0 @@ -#ifndef _CCRP_H_ -#define _CCRP_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "sampler.h" -#include "slice_sampler.h" -#include "crp_table_manager.h" -#include "m.h" - -// Chinese restaurant process (Pitman-Yor parameters) with table tracking. - -template > -class CCRP { - public: - CCRP(double disc, double strength) : - num_tables_(), - num_customers_(), - discount_(disc), - strength_(strength), - discount_prior_strength_(std::numeric_limits::quiet_NaN()), - discount_prior_beta_(std::numeric_limits::quiet_NaN()), - strength_prior_shape_(std::numeric_limits::quiet_NaN()), - strength_prior_rate_(std::numeric_limits::quiet_NaN()) { - check_hyperparameters(); - } - - CCRP(double d_strength, double d_beta, double c_shape, double c_rate, double d = 0.9, double c = 1.0) : - num_tables_(), - num_customers_(), - discount_(d), - strength_(c), - discount_prior_strength_(d_strength), - discount_prior_beta_(d_beta), - strength_prior_shape_(c_shape), - strength_prior_rate_(c_rate) { - check_hyperparameters(); - } - - void check_hyperparameters() { - if (discount_ < 0.0 || discount_ >= 1.0) { - std::cerr << "Bad discount: " << discount_ << std::endl; - abort(); - } - if (strength_ <= -discount_) { - std::cerr << "Bad strength: " << strength_ << " (discount=" << discount_ << ")" << std::endl; - abort(); - } - } - - double discount() const { return discount_; } - double strength() const { return strength_; } - void set_hyperparameters(double d, double s) { - discount_ = d; strength_ = s; - check_hyperparameters(); - } - void set_discount(double d) { discount_ = d; check_hyperparameters(); } - void set_strength(double a) { strength_ = a; check_hyperparameters(); } - - bool has_discount_prior() const { - return !std::isnan(discount_prior_strength_); - } - - bool has_strength_prior() const { - return !std::isnan(strength_prior_shape_); - } - - void clear() { - num_tables_ = 0; - num_customers_ = 0; - dish_locs_.clear(); - } - - unsigned num_tables() const { - return num_tables_; - } - - unsigned num_tables(const Dish& dish) const { - const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - if (it == dish_locs_.end()) return 0; - return it->second.num_tables(); - } - - unsigned num_customers() const { - return num_customers_; - } - - unsigned num_customers(const Dish& dish) const { - const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - if (it == dish_locs_.end()) return 0; - return it->num_customers(); - } - - // returns +1 or 0 indicating whether a new table was opened - // p = probability with which the particular table was selected - // excluding p0 - template - int increment(const Dish& dish, const T& p0, MT19937* rng, T* p = NULL) { - CRPTableManager& loc = dish_locs_[dish]; - bool share_table = false; - if (loc.num_customers()) { - const T p_empty = T(strength_ + num_tables_ * discount_) * p0; - const T p_share = T(loc.num_customers() - loc.num_tables() * discount_); - share_table = rng->SelectSample(p_empty, p_share); - } - if (share_table) { - loc.share_table(discount_, rng); - } else { - loc.create_table(); - ++num_tables_; - } - ++num_customers_; - return (share_table ? 0 : 1); - } - - // returns -1 or 0, indicating whether a table was closed - int decrement(const Dish& dish, MT19937* rng) { - CRPTableManager& loc = dish_locs_[dish]; - assert(loc.num_customers()); - if (loc.num_customers() == 1) { - dish_locs_.erase(dish); - --num_tables_; - --num_customers_; - return -1; - } else { - int delta = loc.remove_customer(rng); - --num_customers_; - if (delta) --num_tables_; - return delta; - } - } - - template - T prob(const Dish& dish, const T& p0) const { - const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - const T r = T(num_tables_ * discount_ + strength_); - if (it == dish_locs_.end()) { - return r * p0 / T(num_customers_ + strength_); - } else { - return (T(it->second.num_customers() - discount_ * it->second.num_tables()) + r * p0) / - T(num_customers_ + strength_); - } - } - - double log_crp_prob() const { - return log_crp_prob(discount_, strength_); - } - - // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process - // does not include P_0's - double log_crp_prob(const double& discount, const double& strength) const { - double lp = 0.0; - if (has_discount_prior()) - lp = Md::log_beta_density(discount, discount_prior_strength_, discount_prior_beta_); - if (has_strength_prior()) - lp += Md::log_gamma_density(strength + discount, strength_prior_shape_, strength_prior_rate_); - assert(lp <= 0.0); - if (num_customers_) { - if (discount > 0.0) { - const double r = lgamma(1.0 - discount); - if (strength) - lp += lgamma(strength) - lgamma(strength / discount); - lp += - lgamma(strength + num_customers_) - + num_tables_ * log(discount) + lgamma(strength / discount + num_tables_); - assert(std::isfinite(lp)); - for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); - it != dish_locs_.end(); ++it) { - const CRPTableManager& cur = it->second; // TODO check - for (CRPTableManager::const_iterator ti = cur.begin(); ti != cur.end(); ++ti) { - lp += (lgamma(ti->first - discount) - r) * ti->second; - } - } - } else if (!discount) { // discount == 0.0 - lp += lgamma(strength) + num_tables_ * log(strength) - lgamma(strength + num_tables_); - assert(std::isfinite(lp)); - for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); - it != dish_locs_.end(); ++it) { - const CRPTableManager& cur = it->second; - lp += lgamma(cur.num_tables()); - } - } else { - assert(!"discount less than 0 detected!"); - } - } - assert(std::isfinite(lp)); - return lp; - } - - void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - assert(has_discount_prior() || has_strength_prior()); - if (num_customers() == 0) return; - DiscountResampler dr(*this); - StrengthResampler sr(*this); - for (unsigned iter = 0; iter < nloop; ++iter) { - if (has_strength_prior()) { - strength_ = slice_sampler1d(sr, strength_, *rng, -discount_ + std::numeric_limits::min(), - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - } - if (has_discount_prior()) { - double min_discount = std::numeric_limits::min(); - if (strength_ < 0.0) min_discount -= strength_; - discount_ = slice_sampler1d(dr, discount_, *rng, min_discount, - 1.0, 0.0, niterations, 100*niterations); - } - } - strength_ = slice_sampler1d(sr, strength_, *rng, -discount_, - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - } - - struct DiscountResampler { - DiscountResampler(const CCRP& crp) : crp_(crp) {} - const CCRP& crp_; - double operator()(const double& proposed_discount) const { - return crp_.log_crp_prob(proposed_discount, crp_.strength_); - } - }; - - struct StrengthResampler { - StrengthResampler(const CCRP& crp) : crp_(crp) {} - const CCRP& crp_; - double operator()(const double& proposed_strength) const { - return crp_.log_crp_prob(crp_.discount_, proposed_strength); - } - }; - - void Print(std::ostream* out) const { - std::cerr << "PYP(d=" << discount_ << ",c=" << strength_ << ") customers=" << num_customers_ << std::endl; - for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); - it != dish_locs_.end(); ++it) { - (*out) << it->first << " : " << it->second << std::endl; - } - } - - typedef typename std::tr1::unordered_map::const_iterator const_iterator; - const_iterator begin() const { - return dish_locs_.begin(); - } - const_iterator end() const { - return dish_locs_.end(); - } - - unsigned num_tables_; - unsigned num_customers_; - std::tr1::unordered_map dish_locs_; - - double discount_; - double strength_; - - // optional beta prior on discount_ (NaN if no prior) - double discount_prior_strength_; - double discount_prior_beta_; - - // optional gamma prior on strength_ (NaN if no prior) - double strength_prior_shape_; - double strength_prior_rate_; -}; - -template -std::ostream& operator<<(std::ostream& o, const CCRP& c) { - c.Print(&o); - return o; -} - -#endif diff --git a/utils/ccrp_nt.h b/utils/ccrp_nt.h deleted file mode 100644 index 724b11bd..00000000 --- a/utils/ccrp_nt.h +++ /dev/null @@ -1,164 +0,0 @@ -#ifndef _CCRP_NT_H_ -#define _CCRP_NT_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "sampler.h" -#include "slice_sampler.h" -#include "m.h" - -// Chinese restaurant process (1 parameter) -template > -class CCRP_NoTable { - public: - explicit CCRP_NoTable(double conc) : - num_customers_(), - alpha_(conc), - alpha_prior_shape_(std::numeric_limits::quiet_NaN()), - alpha_prior_rate_(std::numeric_limits::quiet_NaN()) {} - - CCRP_NoTable(double c_shape, double c_rate, double c = 10.0) : - num_customers_(), - alpha_(c), - alpha_prior_shape_(c_shape), - alpha_prior_rate_(c_rate) {} - - double alpha() const { return alpha_; } - void set_alpha(const double& alpha) { alpha_ = alpha; assert(alpha_ > 0.0); } - - bool has_alpha_prior() const { - return !std::isnan(alpha_prior_shape_); - } - - void clear() { - num_customers_ = 0; - custs_.clear(); - } - - unsigned num_customers() const { - return num_customers_; - } - - unsigned num_customers(const Dish& dish) const { - const typename std::tr1::unordered_map::const_iterator it = custs_.find(dish); - if (it == custs_.end()) return 0; - return it->second; - } - - int increment(const Dish& dish) { - int table_diff = 0; - if (++custs_[dish] == 1) - table_diff = 1; - ++num_customers_; - return table_diff; - } - - int decrement(const Dish& dish) { - int table_diff = 0; - int nc = --custs_[dish]; - if (nc == 0) { - custs_.erase(dish); - table_diff = -1; - } else if (nc < 0) { - std::cerr << "Dish counts dropped below zero for: " << dish << std::endl; - abort(); - } - --num_customers_; - return table_diff; - } - - template - F prob(const Dish& dish, const F& p0) const { - const unsigned at_table = num_customers(dish); - return (F(at_table) + p0 * F(alpha_)) / F(num_customers_ + alpha_); - } - - double logprob(const Dish& dish, const double& logp0) const { - const unsigned at_table = num_customers(dish); - return log(at_table + exp(logp0 + log(alpha_))) - log(num_customers_ + alpha_); - } - - double log_crp_prob() const { - return log_crp_prob(alpha_); - } - - // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process - // does not include P_0's - double log_crp_prob(const double& alpha) const { - double lp = 0.0; - if (has_alpha_prior()) - lp += Md::log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_); - assert(lp <= 0.0); - if (num_customers_) { - lp += lgamma(alpha) - lgamma(alpha + num_customers_) + - custs_.size() * log(alpha); - assert(std::isfinite(lp)); - for (typename std::tr1::unordered_map::const_iterator it = custs_.begin(); - it != custs_.end(); ++it) { - lp += lgamma(it->second); - } - } - assert(std::isfinite(lp)); - return lp; - } - - void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - assert(has_alpha_prior()); - ConcentrationResampler cr(*this); - for (unsigned iter = 0; iter < nloop; ++iter) { - alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - } - } - - struct ConcentrationResampler { - ConcentrationResampler(const CCRP_NoTable& crp) : crp_(crp) {} - const CCRP_NoTable& crp_; - double operator()(const double& proposed_alpha) const { - return crp_.log_crp_prob(proposed_alpha); - } - }; - - void Print(std::ostream* out) const { - (*out) << "DP(alpha=" << alpha_ << ") customers=" << num_customers_ << std::endl; - int cc = 0; - for (typename std::tr1::unordered_map::const_iterator it = custs_.begin(); - it != custs_.end(); ++it) { - (*out) << " " << it->first << "(" << it->second << " eating)"; - ++cc; - if (cc > 10) { (*out) << " ..."; break; } - } - (*out) << std::endl; - } - - unsigned num_customers_; - std::tr1::unordered_map custs_; - - typedef typename std::tr1::unordered_map::const_iterator const_iterator; - const_iterator begin() const { - return custs_.begin(); - } - const_iterator end() const { - return custs_.end(); - } - - double alpha_; - - // optional gamma prior on alpha_ (NaN if no prior) - double alpha_prior_shape_; - double alpha_prior_rate_; -}; - -template -std::ostream& operator<<(std::ostream& o, const CCRP_NoTable& c) { - c.Print(&o); - return o; -} - -#endif diff --git a/utils/ccrp_onetable.h b/utils/ccrp_onetable.h deleted file mode 100644 index abe399ea..00000000 --- a/utils/ccrp_onetable.h +++ /dev/null @@ -1,253 +0,0 @@ -#ifndef _CCRP_ONETABLE_H_ -#define _CCRP_ONETABLE_H_ - -#include -#include -#include -#include -#include -#include -#include -#include "sampler.h" -#include "slice_sampler.h" - -// Chinese restaurant process (Pitman-Yor parameters) with one table approximation - -template > -class CCRP_OneTable { - typedef std::tr1::unordered_map DishMapType; - public: - CCRP_OneTable(double disc, double conc) : - num_tables_(), - num_customers_(), - discount_(disc), - alpha_(conc), - discount_prior_alpha_(std::numeric_limits::quiet_NaN()), - discount_prior_beta_(std::numeric_limits::quiet_NaN()), - alpha_prior_shape_(std::numeric_limits::quiet_NaN()), - alpha_prior_rate_(std::numeric_limits::quiet_NaN()) {} - - CCRP_OneTable(double d_alpha, double d_beta, double c_shape, double c_rate, double d = 0.9, double c = 1.0) : - num_tables_(), - num_customers_(), - discount_(d), - alpha_(c), - discount_prior_alpha_(d_alpha), - discount_prior_beta_(d_beta), - alpha_prior_shape_(c_shape), - alpha_prior_rate_(c_rate) {} - - double discount() const { return discount_; } - double alpha() const { return alpha_; } - void set_alpha(double c) { alpha_ = c; } - void set_discount(double d) { discount_ = d; } - - bool has_discount_prior() const { - return !std::isnan(discount_prior_alpha_); - } - - bool has_alpha_prior() const { - return !std::isnan(alpha_prior_shape_); - } - - void clear() { - num_tables_ = 0; - num_customers_ = 0; - dish_counts_.clear(); - } - - unsigned num_tables() const { - return num_tables_; - } - - unsigned num_tables(const Dish& dish) const { - const typename DishMapType::const_iterator it = dish_counts_.find(dish); - if (it == dish_counts_.end()) return 0; - return 1; - } - - unsigned num_customers() const { - return num_customers_; - } - - unsigned num_customers(const Dish& dish) const { - const typename DishMapType::const_iterator it = dish_counts_.find(dish); - if (it == dish_counts_.end()) return 0; - return it->second; - } - - // returns +1 or 0 indicating whether a new table was opened - int increment(const Dish& dish) { - unsigned& dc = dish_counts_[dish]; - ++dc; - ++num_customers_; - if (dc == 1) { - ++num_tables_; - return 1; - } else { - return 0; - } - } - - // returns -1 or 0, indicating whether a table was closed - int decrement(const Dish& dish) { - unsigned& dc = dish_counts_[dish]; - assert(dc > 0); - if (dc == 1) { - dish_counts_.erase(dish); - --num_tables_; - --num_customers_; - return -1; - } else { - assert(dc > 1); - --dc; - --num_customers_; - return 0; - } - } - - double prob(const Dish& dish, const double& p0) const { - const typename DishMapType::const_iterator it = dish_counts_.find(dish); - const double r = num_tables_ * discount_ + alpha_; - if (it == dish_counts_.end()) { - return r * p0 / (num_customers_ + alpha_); - } else { - return (it->second - discount_ + r * p0) / - (num_customers_ + alpha_); - } - } - - template - T probT(const Dish& dish, const T& p0) const { - const typename DishMapType::const_iterator it = dish_counts_.find(dish); - const T r(num_tables_ * discount_ + alpha_); - if (it == dish_counts_.end()) { - return r * p0 / T(num_customers_ + alpha_); - } else { - return (T(it->second - discount_) + r * p0) / - T(num_customers_ + alpha_); - } - } - - double log_crp_prob() const { - return log_crp_prob(discount_, alpha_); - } - - static double log_beta_density(const double& x, const double& alpha, const double& beta) { - assert(x > 0.0); - assert(x < 1.0); - assert(alpha > 0.0); - assert(beta > 0.0); - const double lp = (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta); - return lp; - } - - static double log_gamma_density(const double& x, const double& shape, const double& rate) { - assert(x >= 0.0); - assert(shape > 0.0); - assert(rate > 0.0); - const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape); - return lp; - } - - // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process - // does not include P_0's - double log_crp_prob(const double& discount, const double& alpha) const { - double lp = 0.0; - if (has_discount_prior()) - lp = log_beta_density(discount, discount_prior_alpha_, discount_prior_beta_); - if (has_alpha_prior()) - lp += log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_); - assert(lp <= 0.0); - if (num_customers_) { - if (discount > 0.0) { - const double r = lgamma(1.0 - discount); - lp += lgamma(alpha) - lgamma(alpha + num_customers_) - + num_tables_ * log(discount) + lgamma(alpha / discount + num_tables_) - - lgamma(alpha / discount); - assert(std::isfinite(lp)); - for (typename DishMapType::const_iterator it = dish_counts_.begin(); - it != dish_counts_.end(); ++it) { - const unsigned& cur = it->second; - lp += lgamma(cur - discount) - r; - } - } else { - assert(!"not implemented yet"); - } - } - assert(std::isfinite(lp)); - return lp; - } - - void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - assert(has_discount_prior() || has_alpha_prior()); - DiscountResampler dr(*this); - ConcentrationResampler cr(*this); - for (unsigned iter = 0; iter < nloop; ++iter) { - if (has_alpha_prior()) { - alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - } - if (has_discount_prior()) { - discount_ = slice_sampler1d(dr, discount_, *rng, std::numeric_limits::min(), - 1.0, 0.0, niterations, 100*niterations); - } - } - alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - } - - struct DiscountResampler { - DiscountResampler(const CCRP_OneTable& crp) : crp_(crp) {} - const CCRP_OneTable& crp_; - double operator()(const double& proposed_discount) const { - return crp_.log_crp_prob(proposed_discount, crp_.alpha_); - } - }; - - struct ConcentrationResampler { - ConcentrationResampler(const CCRP_OneTable& crp) : crp_(crp) {} - const CCRP_OneTable& crp_; - double operator()(const double& proposed_alpha) const { - return crp_.log_crp_prob(crp_.discount_, proposed_alpha); - } - }; - - void Print(std::ostream* out) const { - (*out) << "PYP(d=" << discount_ << ",c=" << alpha_ << ") customers=" << num_customers_ << std::endl; - for (typename DishMapType::const_iterator it = dish_counts_.begin(); it != dish_counts_.end(); ++it) { - (*out) << " " << it->first << " = " << it->second << std::endl; - } - } - - typedef typename DishMapType::const_iterator const_iterator; - const_iterator begin() const { - return dish_counts_.begin(); - } - const_iterator end() const { - return dish_counts_.end(); - } - - unsigned num_tables_; - unsigned num_customers_; - DishMapType dish_counts_; - - double discount_; - double alpha_; - - // optional beta prior on discount_ (NaN if no prior) - double discount_prior_alpha_; - double discount_prior_beta_; - - // optional gamma prior on alpha_ (NaN if no prior) - double alpha_prior_shape_; - double alpha_prior_rate_; -}; - -template -std::ostream& operator<<(std::ostream& o, const CCRP_OneTable& c) { - c.Print(&o); - return o; -} - -#endif diff --git a/utils/crp_table_manager.h b/utils/crp_table_manager.h deleted file mode 100644 index 753e721f..00000000 --- a/utils/crp_table_manager.h +++ /dev/null @@ -1,114 +0,0 @@ -#ifndef _CRP_TABLE_MANAGER_H_ -#define _CRP_TABLE_MANAGER_H_ - -#include -#include "sparse_vector.h" -#include "sampler.h" - -// these are helper classes for implementing token-based CRP samplers -// basically the data structures recommended by Blunsom et al. in the Note. - -struct CRPHistogram { - //typedef std::map MAPTYPE; - typedef SparseVector MAPTYPE; - typedef MAPTYPE::const_iterator const_iterator; - - inline void increment(unsigned bin, unsigned delta = 1u) { - data[bin] += delta; - } - inline void decrement(unsigned bin, unsigned delta = 1u) { - unsigned r = data[bin] -= delta; - if (!r) data.erase(bin); - } - inline void move(unsigned from_bin, unsigned to_bin, unsigned delta = 1u) { - decrement(from_bin, delta); - increment(to_bin, delta); - } - inline const_iterator begin() const { return data.begin(); } - inline const_iterator end() const { return data.end(); } - - private: - MAPTYPE data; -}; - -// A CRPTableManager tracks statistics about all customers -// and tables serving some dish in a CRP and can correctly sample what -// table to remove a customer from and what table to join -struct CRPTableManager { - CRPTableManager() : customers(), tables() {} - - inline unsigned num_tables() const { - return tables; - } - - inline unsigned num_customers() const { - return customers; - } - - inline void create_table() { - h.increment(1); - ++tables; - ++customers; - } - - // seat a customer at a table proportional to the number of customers seated at a table, less the discount - // *new tables are never created by this function! - inline void share_table(const double discount, MT19937* rng) { - const double z = customers - discount * num_tables(); - double r = z * rng->next(); - const CRPHistogram::const_iterator end = h.end(); - CRPHistogram::const_iterator it = h.begin(); - for (; it != end; ++it) { - // it->first = number of customers at table - // it->second = number of such tables - double thresh = (it->first - discount) * it->second; - if (thresh > r) break; - r -= thresh; - } - h.move(it->first, it->first + 1); - ++customers; - } - - // randomly sample a customer - // *tables may be removed - // returns -1 if a table is removed, 0 otherwise - inline int remove_customer(MT19937* rng) { - int r = rng->next() * num_customers(); - const CRPHistogram::const_iterator end = h.end(); - CRPHistogram::const_iterator it = h.begin(); - for (; it != end; ++it) { - int thresh = it->first * it->second; - if (thresh > r) break; - r -= thresh; - } - --customers; - const unsigned tc = it->first; - if (tc == 1) { - h.decrement(1); - --tables; - return -1; - } else { - h.move(tc, tc - 1); - return 0; - } - } - - typedef CRPHistogram::const_iterator const_iterator; - const_iterator begin() const { return h.begin(); } - const_iterator end() const { return h.end(); } - - unsigned customers; - unsigned tables; - CRPHistogram h; -}; - -std::ostream& operator<<(std::ostream& os, const CRPTableManager& tm) { - os << '[' << tm.num_customers() << " total customers at " << tm.num_tables() << " tables ||| "; - for (CRPHistogram::const_iterator it = tm.begin(); it != tm.end(); ++it) { - if (it != tm.h.begin()) os << " -- "; - os << '(' << it->first << ") x " << it->second; - } - return os << ']'; -} - -#endif diff --git a/utils/crp_test.cc b/utils/crp_test.cc deleted file mode 100644 index 0cdb7afd..00000000 --- a/utils/crp_test.cc +++ /dev/null @@ -1,91 +0,0 @@ -#include -#include -#include - -#define BOOST_TEST_MODULE CrpTest -#include -#include - -#include "ccrp.h" -#include "sampler.h" - -using namespace std; - -MT19937 rng; - -BOOST_AUTO_TEST_CASE(Dist) { - CCRP crp(0.1, 5); - double un = 0.25; - int tt = 0; - tt += crp.increment("hi", un, &rng); - tt += crp.increment("foo", un, &rng); - tt += crp.increment("bar", un, &rng); - tt += crp.increment("bar", un, &rng); - tt += crp.increment("bar", un, &rng); - tt += crp.increment("bar", un, &rng); - tt += crp.increment("bar", un, &rng); - tt += crp.increment("bar", un, &rng); - tt += crp.increment("bar", un, &rng); - cout << "tt=" << tt << endl; - cout << crp << endl; - cout << " P(bar)=" << crp.prob("bar", un) << endl; - cout << " P(hi)=" << crp.prob("hi", un) << endl; - cout << " P(baz)=" << crp.prob("baz", un) << endl; - cout << " P(foo)=" << crp.prob("foo", un) << endl; - double x = crp.prob("bar", un) + crp.prob("hi", un) + crp.prob("baz", un) + crp.prob("foo", un); - cout << " tot=" << x << endl; - BOOST_CHECK_CLOSE(1.0, x, 1e-6); - tt += crp.decrement("hi", &rng); - tt += crp.decrement("bar", &rng); - cout << crp << endl; - tt += crp.decrement("bar", &rng); - cout << crp << endl; - cout << "tt=" << tt << endl; -} - -BOOST_AUTO_TEST_CASE(Exchangability) { - double tot = 0; - double xt = 0; - CCRP crp(0.5, 1.0); - int cust = 10; - vector hist(cust + 1, 0); - for (int i = 0; i < cust; ++i) { crp.increment(1, 1.0, &rng); } - const int samples = 100000; - const bool simulate = true; - for (int k = 0; k < samples; ++k) { - if (!simulate) { - crp.clear(); - for (int i = 0; i < cust; ++i) { crp.increment(1, 1.0, &rng); } - } else { - int da = rng.next() * cust; - bool a = rng.next() < 0.5; - if (a) { - for (int i = 0; i < da; ++i) { crp.increment(1, 1.0, &rng); } - for (int i = 0; i < da; ++i) { crp.decrement(1, &rng); } - xt += 1.0; - } else { - for (int i = 0; i < da; ++i) { crp.decrement(1, &rng); } - for (int i = 0; i < da; ++i) { crp.increment(1, 1.0, &rng); } - } - } - int c = crp.num_tables(1); - ++hist[c]; - tot += c; - } - BOOST_CHECK_EQUAL(cust, crp.num_customers()); - cerr << "P(a) = " << (xt / samples) << endl; - cerr << "E[num tables] = " << (tot / samples) << endl; - double error = fabs((tot / samples) - 5.4); - cerr << " error = " << error << endl; - BOOST_CHECK_MESSAGE(error < 0.1, "error is too big = " << error); // it's possible for this to fail, but - // very, very unlikely - for (int i = 1; i <= cust; ++i) - cerr << i << ' ' << (hist[i]) << endl; -} - -BOOST_AUTO_TEST_CASE(LP) { - CCRP crp(1,1,1,1,0.1,50.0); - crp.increment("foo", 1.0, &rng); - cerr << crp.log_crp_prob() << endl; -} - diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h index 9fe00459..590a60c4 100644 --- a/utils/fast_sparse_vector.h +++ b/utils/fast_sparse_vector.h @@ -522,7 +522,7 @@ const FastSparseVector operator-(const FastSparseVector& x, const FastSpar } template -std::size_t hash_value(FastSparseVector const& x) { +std::size_t hash_value(FastSparseVector const&) { assert(!"not implemented"); return 0; } diff --git a/utils/gamma_poisson.h b/utils/gamma_poisson.h deleted file mode 100644 index fec763f6..00000000 --- a/utils/gamma_poisson.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef _GAMMA_POISSON_H_ -#define _GAMMA_POISSON_H_ - -#include - -// http://en.wikipedia.org/wiki/Conjugate_prior -struct GammaPoisson { - GammaPoisson(double shape, double rate) : - a(shape), b(rate), n(), marginal() {} - - double prob(unsigned x) const { - return exp(Md::log_negative_binom(x, a + marginal, 1.0 - (b + n) / (1 + b + n))); - } - - void increment(unsigned x) { - ++n; - marginal += x; - } - - void decrement(unsigned x) { - --n; - marginal -= x; - } - - double log_likelihood() const { - return 0; - } - - double a, b; - int n, marginal; -}; - -#endif diff --git a/utils/mfcr.h b/utils/mfcr.h deleted file mode 100644 index 4aacb567..00000000 --- a/utils/mfcr.h +++ /dev/null @@ -1,370 +0,0 @@ -#ifndef _MFCR_H_ -#define _MFCR_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "sampler.h" -#include "slice_sampler.h" -#include "m.h" - -struct TableCount { - TableCount() : count(), floor() {} - TableCount(int c, int f) : count(c), floor(f) { - assert(f >= 0); - } - int count; // count or delta (may be 0, <0, or >0) - unsigned char floor; // from which floor? -}; - -std::ostream& operator<<(std::ostream& o, const TableCount& tc) { - return o << "[c=" << tc.count << " floor=" << static_cast(tc.floor) << ']'; -} - -// Multi-Floor Chinese Restaurant as proposed by Wood & Teh (AISTATS, 2009) to simulate -// graphical Pitman-Yor processes. -// http://jmlr.csail.mit.edu/proceedings/papers/v5/wood09a/wood09a.pdf -// -// Implementation is based on Blunsom, Cohn, Goldwater, & Johnson (ACL 2009) and code -// referenced therein. -// http://www.aclweb.org/anthology/P/P09/P09-2085.pdf -// -template > -class MFCR { - public: - - MFCR(double d, double strength) : - num_tables_(), - num_customers_(), - discount_(d), - strength_(strength), - discount_prior_strength_(std::numeric_limits::quiet_NaN()), - discount_prior_beta_(std::numeric_limits::quiet_NaN()), - strength_prior_shape_(std::numeric_limits::quiet_NaN()), - strength_prior_rate_(std::numeric_limits::quiet_NaN()) { check_hyperparameters(); } - - MFCR(double discount_strength, double discount_beta, double strength_shape, double strength_rate, double d = 0.9, double strength = 10.0) : - num_tables_(), - num_customers_(), - discount_(d), - strength_(strength), - discount_prior_strength_(discount_strength), - discount_prior_beta_(discount_beta), - strength_prior_shape_(strength_shape), - strength_prior_rate_(strength_rate) { check_hyperparameters(); } - - void check_hyperparameters() { - if (discount_ < 0.0 || discount_ >= 1.0) { - std::cerr << "Bad discount: " << discount_ << std::endl; - abort(); - } - if (strength_ <= -discount_) { - std::cerr << "Bad strength: " << strength_ << " (discount=" << discount_ << ")" << std::endl; - abort(); - } - } - - double discount() const { return discount_; } - double strength() const { return strength_; } - void set_hyperparameters(double d, double s) { - discount_ = d; strength_ = s; - check_hyperparameters(); - } - void set_discount(double d) { discount_ = d; check_hyperparameters(); } - void set_strength(double a) { strength_ = a; check_hyperparameters(); } - - bool has_discount_prior() const { - return !std::isnan(discount_prior_strength_); - } - - bool has_strength_prior() const { - return !std::isnan(strength_prior_shape_); - } - - void clear() { - num_tables_ = 0; - num_customers_ = 0; - dish_locs_.clear(); - } - - unsigned num_tables() const { - return num_tables_; - } - - unsigned num_tables(const Dish& dish) const { - const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - if (it == dish_locs_.end()) return 0; - return it->second.table_counts_.size(); - } - - // this is not terribly efficient but it should not typically be necessary to execute this query - unsigned num_tables(const Dish& dish, const unsigned floor) const { - const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - if (it == dish_locs_.end()) return 0; - unsigned c = 0; - for (typename std::list::const_iterator i = it->second.table_counts_.begin(); - i != it->second.table_counts_.end(); ++i) { - if (i->floor == floor) ++c; - } - return c; - } - - unsigned num_customers() const { - return num_customers_; - } - - unsigned num_customers(const Dish& dish) const { - const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - if (it == dish_locs_.end()) return 0; - return it->total_dish_count_; - } - - // returns (delta, floor) indicating whether a new table (delta) was opened and on which floor - template - TableCount increment(const Dish& dish, InputIterator p0s, InputIterator2 lambdas, MT19937* rng) { - DishLocations& loc = dish_locs_[dish]; - // marg_p0 = marginal probability of opening a new table on any floor with label dish - typedef typename std::iterator_traits::value_type F; - const F marg_p0 = std::inner_product(p0s, p0s + Floors, lambdas, F(0.0)); - assert(marg_p0 <= F(1.0001)); - int floor = -1; - bool share_table = false; - if (loc.total_dish_count_) { - const F p_empty = F(strength_ + num_tables_ * discount_) * marg_p0; - const F p_share = F(loc.total_dish_count_ - loc.table_counts_.size() * discount_); - share_table = rng->SelectSample(p_empty, p_share); - } - if (share_table) { - // this can be done with doubles since P0 (which may be tiny) is not involved - double r = rng->next() * (loc.total_dish_count_ - loc.table_counts_.size() * discount_); - for (typename std::list::iterator ti = loc.table_counts_.begin(); - ti != loc.table_counts_.end(); ++ti) { - r -= ti->count - discount_; - if (r <= 0.0) { - ++ti->count; - floor = ti->floor; - break; - } - } - if (r > 0.0) { - std::cerr << "Serious error: r=" << r << std::endl; - Print(&std::cerr); - assert(r <= 0.0); - } - } else { // sit at currently empty table -- must sample what floor - if (Floors == 1) { - floor = 0; - } else { - F r = F(rng->next()) * marg_p0; - for (unsigned i = 0; i < Floors; ++i) { - r -= (*p0s) * (*lambdas); - ++p0s; - ++lambdas; - if (r <= F(0.0)) { - floor = i; - break; - } - } - } - assert(floor >= 0); - loc.table_counts_.push_back(TableCount(1, floor)); - ++num_tables_; - } - ++loc.total_dish_count_; - ++num_customers_; - return (share_table ? TableCount(0, floor) : TableCount(1, floor)); - } - - // returns first = -1 or 0, indicating whether a table was closed, and on what floor (second) - TableCount decrement(const Dish& dish, MT19937* rng) { - DishLocations& loc = dish_locs_[dish]; - assert(loc.total_dish_count_); - int floor = -1; - int delta = 0; - if (loc.total_dish_count_ == 1) { - floor = loc.table_counts_.front().floor; - dish_locs_.erase(dish); - --num_tables_; - --num_customers_; - delta = -1; - } else { - // sample customer to remove UNIFORMLY. that is, do NOT use the d - // here. if you do, it will introduce (unwanted) bias! - double r = rng->next() * loc.total_dish_count_; - --loc.total_dish_count_; - --num_customers_; - for (typename std::list::iterator ti = loc.table_counts_.begin(); - ti != loc.table_counts_.end(); ++ti) { - r -= ti->count; - if (r <= 0.0) { - floor = ti->floor; - if ((--ti->count) == 0) { - --num_tables_; - delta = -1; - loc.table_counts_.erase(ti); - } - break; - } - } - if (r > 0.0) { - std::cerr << "Serious error: r=" << r << std::endl; - Print(&std::cerr); - assert(r <= 0.0); - } - } - return TableCount(delta, floor); - } - - template - typename std::iterator_traits::value_type prob(const Dish& dish, InputIterator p0s, InputIterator2 lambdas) const { - typedef typename std::iterator_traits::value_type F; - const F marg_p0 = std::inner_product(p0s, p0s + Floors, lambdas, F(0.0)); - assert(marg_p0 <= F(1.0001)); - const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - const F r = F(num_tables_ * discount_ + strength_); - if (it == dish_locs_.end()) { - return r * marg_p0 / F(num_customers_ + strength_); - } else { - return (F(it->second.total_dish_count_ - discount_ * it->second.table_counts_.size()) + F(r * marg_p0)) / - F(num_customers_ + strength_); - } - } - - double log_crp_prob() const { - return log_crp_prob(discount_, strength_); - } - - // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process - // does not include draws from G_w's - double log_crp_prob(const double& discount, const double& strength) const { - double lp = 0.0; - if (has_discount_prior()) - lp = Md::log_beta_density(discount, discount_prior_strength_, discount_prior_beta_); - if (has_strength_prior()) - lp += Md::log_gamma_density(strength + discount, strength_prior_shape_, strength_prior_rate_); - assert(lp <= 0.0); - if (num_customers_) { - if (discount > 0.0) { - const double r = lgamma(1.0 - discount); - if (strength) - lp += lgamma(strength) - lgamma(strength / discount); - lp += - lgamma(strength + num_customers_) - + num_tables_ * log(discount) + lgamma(strength / discount + num_tables_); - assert(std::isfinite(lp)); - for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); - it != dish_locs_.end(); ++it) { - const DishLocations& cur = it->second; - for (std::list::const_iterator ti = cur.table_counts_.begin(); ti != cur.table_counts_.end(); ++ti) { - lp += lgamma(ti->count - discount) - r; - } - } - } else if (!discount) { // discount == 0.0 - lp += lgamma(strength) + num_tables_ * log(strength) - lgamma(strength + num_tables_); - assert(std::isfinite(lp)); - for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); - it != dish_locs_.end(); ++it) { - const DishLocations& cur = it->second; - lp += lgamma(cur.table_counts_.size()); - } - } else { - assert(!"discount less than 0 detected!"); - } - } - assert(std::isfinite(lp)); - return lp; - } - - void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - assert(has_discount_prior() || has_strength_prior()); - DiscountResampler dr(*this); - StrengthResampler sr(*this); - for (int iter = 0; iter < nloop; ++iter) { - if (has_strength_prior()) { - strength_ = slice_sampler1d(sr, strength_, *rng, -discount_, - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - } - if (has_discount_prior()) { - double min_discount = std::numeric_limits::min(); - if (strength_ < 0.0) min_discount -= strength_; - discount_ = slice_sampler1d(dr, discount_, *rng, min_discount, - 1.0, 0.0, niterations, 100*niterations); - } - } - strength_ = slice_sampler1d(sr, strength_, *rng, -discount_, - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - } - - struct DiscountResampler { - DiscountResampler(const MFCR& crp) : crp_(crp) {} - const MFCR& crp_; - double operator()(const double& proposed_d) const { - return crp_.log_crp_prob(proposed_d, crp_.strength_); - } - }; - - struct StrengthResampler { - StrengthResampler(const MFCR& crp) : crp_(crp) {} - const MFCR& crp_; - double operator()(const double& proposediscount_strength) const { - return crp_.log_crp_prob(crp_.discount_, proposediscount_strength); - } - }; - - struct DishLocations { - DishLocations() : total_dish_count_() {} - unsigned total_dish_count_; // customers at all tables with this dish - std::list table_counts_; // list<> gives O(1) deletion and insertion, which we want - // .size() is the number of tables for this dish - }; - - void Print(std::ostream* out) const { - (*out) << "MFCR<" << Floors << ">(d=" << discount_ << ",strength=" << strength_ << ") customers=" << num_customers_ << std::endl; - for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); - it != dish_locs_.end(); ++it) { - (*out) << it->first << " (" << it->second.total_dish_count_ << " on " << it->second.table_counts_.size() << " tables): "; - for (typename std::list::const_iterator i = it->second.table_counts_.begin(); - i != it->second.table_counts_.end(); ++i) { - (*out) << " " << *i; - } - (*out) << std::endl; - } - } - - typedef typename std::tr1::unordered_map::const_iterator const_iterator; - const_iterator begin() const { - return dish_locs_.begin(); - } - const_iterator end() const { - return dish_locs_.end(); - } - - unsigned num_tables_; - unsigned num_customers_; - std::tr1::unordered_map dish_locs_; - - double discount_; - double strength_; - - // optional beta prior on discount_ (NaN if no prior) - double discount_prior_strength_; - double discount_prior_beta_; - - // optional gamma prior on strength_ (NaN if no prior) - double strength_prior_shape_; - double strength_prior_rate_; -}; - -template -std::ostream& operator<<(std::ostream& o, const MFCR& c) { - c.Print(&o); - return o; -} - -#endif diff --git a/utils/mfcr_test.cc b/utils/mfcr_test.cc deleted file mode 100644 index 29a1a2ce..00000000 --- a/utils/mfcr_test.cc +++ /dev/null @@ -1,72 +0,0 @@ -#include "mfcr.h" - -#include -#include -#include - -#define BOOST_TEST_MODULE MFCRTest -#include -#include - -#include "sampler.h" - -using namespace std; - -BOOST_AUTO_TEST_CASE(Exchangability) { - MT19937 r; - MT19937* rng = &r; - MFCR<2, int> crp(0.5, 3.0); - vector lambdas(2); - vector p0s(2); - lambdas[0] = 0.2; - lambdas[1] = 0.8; - p0s[0] = 1.0; - p0s[1] = 1.0; - - double tot = 0; - double tot2 = 0; - double xt = 0; - int cust = 10; - vector hist(cust + 1, 0), hist2(cust + 1, 0); - for (int i = 0; i < cust; ++i) { crp.increment(1, p0s.begin(), lambdas.begin(), rng); } - const int samples = 100000; - const bool simulate = true; - for (int k = 0; k < samples; ++k) { - if (!simulate) { - crp.clear(); - for (int i = 0; i < cust; ++i) { crp.increment(1, p0s.begin(), lambdas.begin(), rng); } - } else { - int da = rng->next() * cust; - bool a = rng->next() < 0.45; - if (a) { - for (int i = 0; i < da; ++i) { crp.increment(1, p0s.begin(), lambdas.begin(), rng); } - for (int i = 0; i < da; ++i) { crp.decrement(1, rng); } - xt += 1.0; - } else { - for (int i = 0; i < da; ++i) { crp.decrement(1, rng); } - for (int i = 0; i < da; ++i) { crp.increment(1, p0s.begin(), lambdas.begin(), rng); } - } - } - int c = crp.num_tables(1); - ++hist[c]; - tot += c; - int c2 = crp.num_tables(1,0); // tables on floor 0 with dish 1 - ++hist2[c2]; - tot2 += c2; - } - cerr << cust << " = " << crp.num_customers() << endl; - cerr << "P(a) = " << (xt / samples) << endl; - cerr << "E[num tables] = " << (tot / samples) << endl; - double error = fabs((tot / samples) - 6.894); - cerr << " error = " << error << endl; - for (int i = 1; i <= cust; ++i) - cerr << i << ' ' << (hist[i]) << endl; - cerr << "E[num tables on floor 0] = " << (tot2 / samples) << endl; - double error2 = fabs((tot2 / samples) - 1.379); - cerr << " error2 = " << error2 << endl; - for (int i = 1; i <= cust; ++i) - cerr << i << ' ' << (hist2[i]) << endl; - assert(error < 0.05); // these can fail with very low probability - assert(error2 < 0.05); -}; - diff --git a/utils/sampler.h b/utils/sampler.h index 3e4a4086..88e1856c 100644 --- a/utils/sampler.h +++ b/utils/sampler.h @@ -19,7 +19,7 @@ #include "prob.h" -template struct SampleSet; +template class SampleSet; template struct RandomNumberGenerator { diff --git a/utils/slice_sampler.h b/utils/slice_sampler.h deleted file mode 100644 index aa48a169..00000000 --- a/utils/slice_sampler.h +++ /dev/null @@ -1,191 +0,0 @@ -//! slice-sampler.h is an MCMC slice sampler -//! -//! Mark Johnson, 1st August 2008 - -#ifndef SLICE_SAMPLER_H -#define SLICE_SAMPLER_H - -#include -#include -#include -#include -#include - -//! slice_sampler_rfc_type{} returns the value of a user-specified -//! function if the argument is within range, or - infinity otherwise -// -template -struct slice_sampler_rfc_type { - F min_x, max_x; - const Fn& f; - U max_nfeval, nfeval; - slice_sampler_rfc_type(F min_x, F max_x, const Fn& f, U max_nfeval) - : min_x(min_x), max_x(max_x), f(f), max_nfeval(max_nfeval), nfeval(0) { } - - F operator() (F x) { - if (min_x < x && x < max_x) { - assert(++nfeval <= max_nfeval); - F fx = f(x); - assert(std::isfinite(fx)); - return fx; - } - return -std::numeric_limits::infinity(); - } -}; // slice_sampler_rfc_type{} - -//! slice_sampler1d() implements the univariate "range doubling" slice sampler -//! described in Neal (2003) "Slice Sampling", The Annals of Statistics 31(3), 705-767. -// -template -F slice_sampler1d(const LogF& logF0, //!< log of function to sample - F x, //!< starting point - Uniform01& u01, //!< uniform [0,1) random number generator - F min_x = -std::numeric_limits::infinity(), //!< minimum value of support - F max_x = std::numeric_limits::infinity(), //!< maximum value of support - F w = 0.0, //!< guess at initial width - unsigned nsamples=1, //!< number of samples to draw - unsigned max_nfeval=200) //!< max number of function evaluations -{ - typedef unsigned U; - slice_sampler_rfc_type logF(min_x, max_x, logF0, max_nfeval); - - assert(std::isfinite(x)); - - if (w <= 0.0) { // set w to a default width - if (min_x > -std::numeric_limits::infinity() && max_x < std::numeric_limits::infinity()) - w = (max_x - min_x)/4; - else - w = std::max(((x < 0.0) ? -x : x)/4, (F) 0.1); - } - assert(std::isfinite(w)); - - F logFx = logF(x); - for (U sample = 0; sample < nsamples; ++sample) { - F logY = logFx + log(u01()+1e-100); //! slice logFx at this value - assert(std::isfinite(logY)); - - F xl = x - w*u01(); //! lower bound on slice interval - F logFxl = logF(xl); - F xr = xl + w; //! upper bound on slice interval - F logFxr = logF(xr); - - while (logY < logFxl || logY < logFxr) // doubling procedure - if (u01() < 0.5) - logFxl = logF(xl -= xr - xl); - else - logFxr = logF(xr += xr - xl); - - F xl1 = xl; - F xr1 = xr; - while (true) { // shrinking procedure - F x1 = xl1 + u01()*(xr1 - xl1); - if (logY < logF(x1)) { - F xl2 = xl; // acceptance procedure - F xr2 = xr; - bool d = false; - while (xr2 - xl2 > 1.1*w) { - F xm = (xl2 + xr2)/2; - if ((x < xm && x1 >= xm) || (x >= xm && x1 < xm)) - d = true; - if (x1 < xm) - xr2 = xm; - else - xl2 = xm; - if (d && logY >= logF(xl2) && logY >= logF(xr2)) - goto unacceptable; - } - x = x1; - goto acceptable; - } - goto acceptable; - unacceptable: - if (x1 < x) // rest of shrinking procedure - xl1 = x1; - else - xr1 = x1; - } - acceptable: - w = (4*w + (xr1 - xl1))/5; // update width estimate - } - return x; -} - -/* -//! slice_sampler1d() implements a 1-d MCMC slice sampler. -//! It should be correct for unimodal distributions, but -//! not for multimodal ones. -// -template -F slice_sampler1d(const LogP& logP, //!< log of distribution to sample - F x, //!< initial sample - Uniform01& u01, //!< uniform random number generator - F min_x = -std::numeric_limits::infinity(), //!< minimum value of support - F max_x = std::numeric_limits::infinity(), //!< maximum value of support - F w = 0.0, //!< guess at initial width - unsigned nsamples=1, //!< number of samples to draw - unsigned max_nfeval=200) //!< max number of function evaluations -{ - typedef unsigned U; - assert(std::isfinite(x)); - if (w <= 0.0) { - if (min_x > -std::numeric_limits::infinity() && max_x < std::numeric_limits::infinity()) - w = (max_x - min_x)/4; - else - w = std::max(((x < 0.0) ? -x : x)/4, 0.1); - } - // TRACE4(x, min_x, max_x, w); - F logPx = logP(x); - assert(std::isfinite(logPx)); - U nfeval = 1; - for (U sample = 0; sample < nsamples; ++sample) { - F x0 = x; - F logU = logPx + log(u01()+1e-100); - assert(std::isfinite(logU)); - F r = u01(); - F xl = std::max(min_x, x - r*w); - F xr = std::min(max_x, x + (1-r)*w); - // TRACE3(x, logPx, logU); - while (xl > min_x && logP(xl) > logU) { - xl -= w; - w *= 2; - ++nfeval; - if (nfeval >= max_nfeval) - std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << std::endl; - assert(nfeval < max_nfeval); - } - xl = std::max(xl, min_x); - while (xr < max_x && logP(xr) > logU) { - xr += w; - w *= 2; - ++nfeval; - if (nfeval >= max_nfeval) - std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xr = " << xr << std::endl; - assert(nfeval < max_nfeval); - } - xr = std::min(xr, max_x); - while (true) { - r = u01(); - x = r*xl + (1-r)*xr; - assert(std::isfinite(x)); - logPx = logP(x); - // TRACE4(logPx, x, xl, xr); - assert(std::isfinite(logPx)); - ++nfeval; - if (nfeval >= max_nfeval) - std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << ", xr = " << xr << ", x = " << x << std::endl; - assert(nfeval < max_nfeval); - if (logPx > logU) - break; - else if (x > x0) - xr = x; - else - xl = x; - } - // w = (4*w + (xr-xl))/5; // gradually adjust w - } - // TRACE2(logPx, x); - return x; -} // slice_sampler1d() -*/ - -#endif // SLICE_SAMPLER_H diff --git a/utils/small_vector.h b/utils/small_vector.h index 894b1b32..c8a69927 100644 --- a/utils/small_vector.h +++ b/utils/small_vector.h @@ -66,7 +66,7 @@ class SmallVector { //TODO: figure out iterator traits to allow this to be selcted for any iterator range template SmallVector(I const* begin,I const* end) { - int s=end-begin; + unsigned s=end-begin; Alloc(s); if (s <= SV_MAX) { for (unsigned i = 0; i < s; ++i,++begin) data_.vals[i] = *begin; diff --git a/utils/stringlib.h b/utils/stringlib.h index 75772c4d..ff5dc89d 100644 --- a/utils/stringlib.h +++ b/utils/stringlib.h @@ -86,7 +86,7 @@ bool match_begin(Str const& str,Prefix const& prefix) // source will be returned as a string, target must be a sentence or // a lattice (in PLF format) and will be returned as a Lattice object void ParseTranslatorInput(const std::string& line, std::string* input, std::string* ref); -struct Lattice; +class Lattice; void ParseTranslatorInputLattice(const std::string& line, std::string* input, Lattice* ref); inline std::string Trim(const std::string& str, const std::string& dropChars = " \t") { diff --git a/utils/unigram_pyp_lm.cc b/utils/unigram_pyp_lm.cc deleted file mode 100644 index 30b9fde1..00000000 --- a/utils/unigram_pyp_lm.cc +++ /dev/null @@ -1,214 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "corpus_tools.h" -#include "m.h" -#include "tdict.h" -#include "sampler.h" -#include "ccrp.h" -#include "gamma_poisson.h" - -// A not very memory-efficient implementation of an 1-gram LM based on PYPs -// as described in Y.-W. Teh. (2006) A Hierarchical Bayesian Language Model -// based on Pitman-Yor Processes. In Proc. ACL. - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -boost::shared_ptr prng; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,n",po::value()->default_value(50),"Number of samples") - ("train,i",po::value(),"Training data file") - ("test,T",po::value(),"Test data file") - ("discount_prior_a,a",po::value()->default_value(1.0), "discount ~ Beta(a,b): a=this") - ("discount_prior_b,b",po::value()->default_value(1.0), "discount ~ Beta(a,b): b=this") - ("strength_prior_s,s",po::value()->default_value(1.0), "strength ~ Gamma(s,r): s=this") - ("strength_prior_r,r",po::value()->default_value(1.0), "strength ~ Gamma(s,r): r=this") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("train") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -struct Histogram { - void increment(unsigned bin, unsigned delta = 1u) { - data[bin] += delta; - } - void decrement(unsigned bin, unsigned delta = 1u) { - data[bin] -= delta; - } - void move(unsigned from_bin, unsigned to_bin, unsigned delta = 1u) { - decrement(from_bin, delta); - increment(to_bin, delta); - } - map data; - // SparseVector data; -}; - -// Lord Rothschild. 1986. THE DISTRIBUTION OF ENGLISH DICTIONARY WORD LENGTHS. -// Journal of Statistical Planning and Inference 14 (1986) 311-322 -struct PoissonLengthUniformCharWordModel { - explicit PoissonLengthUniformCharWordModel(unsigned vocab_size) : plen(5,5), uc(-log(50)), llh() {} - void increment(WordID w, MT19937*) { - llh += log(prob(w)); // this isn't quite right - plen.increment(TD::Convert(w).size() - 1); - } - void decrement(WordID w, MT19937*) { - plen.decrement(TD::Convert(w).size() - 1); - llh -= log(prob(w)); // this isn't quite right - } - double prob(WordID w) const { - size_t len = TD::Convert(w).size(); - return plen.prob(len - 1) * exp(uc * len); - } - double log_likelihood() const { return llh; } - void resample_hyperparameters(MT19937*) {} - GammaPoisson plen; - const double uc; - double llh; -}; - -// uniform base distribution (0-gram model) -struct UniformWordModel { - explicit UniformWordModel(unsigned vocab_size) : p0(1.0 / vocab_size), draws() {} - void increment(WordID, MT19937*) { ++draws; } - void decrement(WordID, MT19937*) { --draws; assert(draws >= 0); } - double prob(WordID) const { return p0; } // all words have equal prob - double log_likelihood() const { return draws * log(p0); } - void resample_hyperparameters(MT19937*) {} - const double p0; - int draws; -}; - -// represents an Unigram LM -template -struct UnigramLM { - UnigramLM(unsigned vs, double da, double db, double ss, double sr) : - base(vs), - crp(da, db, ss, sr, 0.8, 1.0) {} - void increment(WordID w, MT19937* rng) { - const double backoff = base.prob(w); - if (crp.increment(w, backoff, rng)) - base.increment(w, rng); - } - void decrement(WordID w, MT19937* rng) { - if (crp.decrement(w, rng)) - base.decrement(w, rng); - } - double prob(WordID w) const { - const double backoff = base.prob(w); - return crp.prob(w, backoff); - } - - double log_likelihood() const { - double llh = base.log_likelihood(); - llh += crp.log_crp_prob(); - return llh; - } - - void resample_hyperparameters(MT19937* rng) { - crp.resample_hyperparameters(rng); - base.resample_hyperparameters(rng); - } - - double discount_a, discount_b, strength_s, strength_r; - double d, strength; - BaseGenerator base; - CCRP crp; -}; - -int main(int argc, char** argv) { - po::variables_map conf; - - InitCommandLine(argc, argv, &conf); - const unsigned samples = conf["samples"].as(); - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - vector > corpuse; - set vocabe; - const WordID kEOS = TD::Convert(""); - cerr << "Reading corpus...\n"; - CorpusTools::ReadFromFile(conf["train"].as(), &corpuse, &vocabe); - cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; - vector > test; - if (conf.count("test")) - CorpusTools::ReadFromFile(conf["test"].as(), &test); - else - test = corpuse; -#if 1 - UnigramLM lm(vocabe.size(), -#else - UnigramLM lm(vocabe.size(), -#endif - conf["discount_prior_a"].as(), - conf["discount_prior_b"].as(), - conf["strength_prior_s"].as(), - conf["strength_prior_r"].as()); - for (unsigned SS=0; SS < samples; ++SS) { - for (unsigned ci = 0; ci < corpuse.size(); ++ci) { - const vector& s = corpuse[ci]; - for (unsigned i = 0; i <= s.size(); ++i) { - WordID w = (i < s.size() ? s[i] : kEOS); - if (SS > 0) lm.decrement(w, &rng); - lm.increment(w, &rng); - } - if (SS > 0) lm.decrement(kEOS, &rng); - lm.increment(kEOS, &rng); - } - cerr << "LLH=" << lm.log_likelihood() << "\t tables=" << lm.crp.num_tables() << " " << endl; - if (SS % 10 == 9) lm.resample_hyperparameters(&rng); - } - double llh = 0; - unsigned cnt = 0; - unsigned oovs = 0; - for (unsigned ci = 0; ci < test.size(); ++ci) { - const vector& s = test[ci]; - for (unsigned i = 0; i <= s.size(); ++i) { - WordID w = (i < s.size() ? s[i] : kEOS); - double lp = log(lm.prob(w)) / log(2); - if (i < s.size() && vocabe.count(w) == 0) { - cerr << "**OOV "; - ++oovs; - //lp = 0; - } - cerr << "p(" << TD::Convert(w) << ") = " << lp << endl; - llh -= lp; - cnt++; - } - } - cerr << " Log_10 prob: " << (-llh * log(2) / log(10)) << endl; - cerr << " Count: " << cnt << endl; - cerr << " OOVs: " << oovs << endl; - cerr << "Cross-entropy: " << (llh / cnt) << endl; - cerr << " Perplexity: " << pow(2, llh / cnt) << endl; - return 0; -} - -- cgit v1.2.3 From d9918e2a47edf5887a179a566243a37e7b9a1c03 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:27:21 -0400 Subject: fix build --- configure.ac | 1 - utils/Makefile.am | 6 ------ 2 files changed, 7 deletions(-) (limited to 'utils') diff --git a/configure.ac b/configure.ac index 07ef9fe1..70e8e932 100644 --- a/configure.ac +++ b/configure.ac @@ -124,7 +124,6 @@ AC_CONFIG_FILES([klm/util/Makefile]) AC_CONFIG_FILES([klm/lm/Makefile]) AC_CONFIG_FILES([mira/Makefile]) AC_CONFIG_FILES([dtrain/Makefile]) -AC_CONFIG_FILES([rst_parser/Makefile]) AC_CONFIG_FILES([python/setup.py]) diff --git a/utils/Makefile.am b/utils/Makefile.am index 55d97354..3ad9d69e 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -45,18 +45,12 @@ m_test_SOURCES = m_test.cc m_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz dict_test_SOURCES = dict_test.cc dict_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz -mfcr_test_SOURCES = mfcr_test.cc -mfcr_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz weights_test_SOURCES = weights_test.cc weights_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz -crp_test_SOURCES = crp_test.cc -crp_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz logval_test_SOURCES = logval_test.cc logval_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz small_vector_test_SOURCES = small_vector_test.cc small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz -unigram_pyp_lm_SOURCES = unigram_pyp_lm.cc -unigram_pyp_lm_LDADD = libutils.a -lz ################################################################ # do NOT NOT NOT add any other -I includes NO NO NO NO NO ###### -- cgit v1.2.3 From dc435732dbe8929aac900fef8b1d454291769862 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 15 Oct 2012 23:20:41 -0400 Subject: get rid of nested class that was causing header polution --- decoder/decoder.cc | 14 ++--- decoder/ff.cc | 4 +- decoder/hg.h | 180 ++++++++++++++++++++--------------------------------- decoder/hg_io.cc | 4 +- utils/weights.cc | 8 +-- 5 files changed, 83 insertions(+), 127 deletions(-) (limited to 'utils') diff --git a/decoder/decoder.cc b/decoder/decoder.cc index a69a6d05..47b298b9 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -871,13 +871,13 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { if (rp.fid_summary) { if (summary_feature_type == kEDGE_PROB) { const prob_t z = forest.PushWeightsToGoal(1.0); - if (!isfinite(log(z)) || isnan(log(z))) { + if (!std::isfinite(log(z)) || std::isnan(log(z))) { cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n"; } else { for (int i = 0; i < forest.edges_.size(); ++i) { const double log_prob_transition = log(forest.edges_[i].edge_prob_); // locally normalized by the edge // head node by forest.PushWeightsToGoal - if (!isfinite(log_prob_transition) || isnan(log_prob_transition)) { + if (!std::isfinite(log_prob_transition) || std::isnan(log_prob_transition)) { cerr << "Edge: i=" << i << " got bad inside prob: " << *forest.edges_[i].rule_ << endl; abort(); } @@ -889,7 +889,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { } else if (summary_feature_type == kNODE_RISK) { Hypergraph::EdgeProbs posts; const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts); - if (!isfinite(log(z)) || isnan(log(z))) { + if (!std::isfinite(log(z)) || std::isnan(log(z))) { cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n"; } else { for (int i = 0; i < forest.nodes_.size(); ++i) { @@ -898,7 +898,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { for (int j = 0; j < in_edges.size(); ++j) node_post += (posts[in_edges[j]] / z); const double log_np = log(node_post); - if (!isfinite(log_np) || isnan(log_np)) { + if (!std::isfinite(log_np) || std::isnan(log_np)) { cerr << "got bad posterior prob for node " << i << endl; abort(); } @@ -913,13 +913,13 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { } else if (summary_feature_type == kEDGE_RISK) { Hypergraph::EdgeProbs posts; const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts); - if (!isfinite(log(z)) || isnan(log(z))) { + if (!std::isfinite(log(z)) || std::isnan(log(z))) { cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n"; } else { assert(posts.size() == forest.edges_.size()); for (int i = 0; i < posts.size(); ++i) { const double log_np = log(posts[i] / z); - if (!isfinite(log_np) || isnan(log_np)) { + if (!std::isfinite(log_np) || std::isnan(log_np)) { cerr << "got bad posterior prob for node " << i << endl; abort(); } @@ -1090,7 +1090,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { cerr << "DIFF. ERR! log_z < log_ref_z: " << log_z << " " << log_ref_z << endl; exit(1); } - assert(!isnan(log_ref_z)); + assert(!std::isnan(log_ref_z)); ref_exp -= full_exp; acc_vec += ref_exp; acc_obj += (log_z - log_ref_z); diff --git a/decoder/ff.cc b/decoder/ff.cc index 557e0b5f..008fcad4 100644 --- a/decoder/ff.cc +++ b/decoder/ff.cc @@ -175,7 +175,7 @@ void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta, Hypergraph::Edge* edge, FFState* context, prob_t* combination_cost_estimate) const { - edge->reset_info(); + //edge->reset_info(); context->resize(state_size_); if (state_size_ > 0) { memset(&(*context)[0], 0, state_size_); @@ -203,7 +203,7 @@ void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta, void ModelSet::AddFinalFeatures(const FFState& state, Hypergraph::Edge* edge,SentenceMetadata const& smeta) const { assert(1 == edge->rule_->Arity()); - edge->reset_info(); + //edge->reset_info(); for (int i = 0; i < models_.size(); ++i) { const FeatureFunction& ff = *models_[i]; const void* ant_state = NULL; diff --git a/decoder/hg.h b/decoder/hg.h index 6d67f2fa..f53d2fd2 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -33,47 +33,20 @@ // slow #undef HG_EDGES_TOPO_SORTED -class Hypergraph; -typedef boost::shared_ptr HypergraphP; - -// class representing an acyclic hypergraph -// - edges have 1 head, 0..n tails -class Hypergraph { -public: - Hypergraph() : is_linear_chain_(false) {} +// SmallVector is a fast, small vector implementation for sizes <= 2 +typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_ +typedef std::vector EdgesVector; // indices in edges_ - // SmallVector is a fast, small vector implementation for sizes <= 2 - typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_ - typedef std::vector EdgesVector; // indices in edges_ - - // TODO get rid of cat_? - // TODO keep cat_ and add span and/or state? :) - struct Node { - Node() : id_(), cat_() {} - int id_; // equal to this object's position in the nodes_ vector - WordID cat_; // non-terminal category if <0, 0 if not set - WordID NT() const { return -cat_; } - EdgesVector in_edges_; // an in edge is an edge with this node as its head. (in edges come from the bottom up to us) indices in edges_ - EdgesVector out_edges_; // an out edge is an edge with this node as its tail. (out edges leave us up toward the top/goal). indices in edges_ - void copy_fixed(Node const& o) { // nonstructural fields only - structural ones are managed by sorting/pruning/subsetting - cat_=o.cat_; - } - void copy_reindex(Node const& o,indices_after const& n2,indices_after const& e2) { - copy_fixed(o); - id_=n2[id_]; - e2.reindex_push_back(o.in_edges_,in_edges_); - e2.reindex_push_back(o.out_edges_,out_edges_); - } - }; +enum { + NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF +}; +namespace HG { - // TODO get rid of edge_prob_? (can be computed on the fly as the dot - // product of the weight vector and the feature values) struct Edge { -// int poplimit; //TODO: cube pruning per edge limit? per node didn't work well at all. also, inside cost + outside(node) is the same information i'd use to set a per-edge limit anyway - and nonmonotonicity in cube pruning may mean it's good to favor edge (in same node) w/ relatively worse score Edge() : i_(-1), j_(-1), prev_i_(-1), prev_j_(-1) {} Edge(int id,Edge const& copy_pod_from) : id_(id) { copy_pod(copy_pod_from); } // call copy_features yourself later. - Edge(int id,Edge const& copy_from,TailNodeVector const& tail) // fully inits - probably more expensive when push_back(Edge(...)) than setting after + Edge(int id,Edge const& copy_from,TailNodeVector const& tail) // fully inits - probably more expensive when push_back(Edge(...)) than sett : tail_nodes_(tail),id_(id) { copy_pod(copy_from);copy_features(copy_from); } inline int Arity() const { return tail_nodes_.size(); } int head_node_; // refers to a position in nodes_ @@ -83,8 +56,6 @@ public: prob_t edge_prob_; // dot product of weights and feat_values int id_; // equal to this object's position in the edges_ vector - //FIXME: these span ids belong in Node, not Edge, right? every node should have the same spans. - // span info. typically, i_ and j_ refer to indices in the source sentence. // In synchronous parsing, i_ and j_ will refer to target sentence/lattice indices // while prev_i_ prev_j_ will refer to positions in the source. @@ -97,54 +68,6 @@ public: short int j_; short int prev_i_; short int prev_j_; - - void copy_info(Edge const& o) { -#if USE_INFO_EDGE - set_info(o.info_.str()); // by convention, each person putting info here starts with a separator (e.g. space). it's empty if nobody put any info there. -#else - (void) o; -#endif - } - void copy_pod(Edge const& o) { - rule_=o.rule_; - i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_; - } - void copy_features(Edge const& o) { - feature_values_=o.feature_values_; - copy_info(o); - } - void copy_fixed(Edge const& o) { - copy_pod(o); - copy_features(o); - edge_prob_ = o.edge_prob_; - } - void copy_reindex(Edge const& o,indices_after const& n2,indices_after const& e2) { - copy_fixed(o); - head_node_=n2[o.head_node_]; - id_=e2[o.id_]; - n2.reindex_push_back(o.tail_nodes_,tail_nodes_); - } - -#if USE_INFO_EDGE - std::ostringstream info_; - void set_info(std::string const& s) { - info_.str(s); - info_.seekp(0,std::ios_base::end); - } - Edge(Edge const& o) : head_node_(o.head_node_),tail_nodes_(o.tail_nodes_),rule_(o.rule_),feature_values_(o.feature_values_),edge_prob_(o.edge_prob_),id_(o.id_),i_(o.i_),j_(o.j_),prev_i_(o.prev_i_),prev_j_(o.prev_j_), info_(o.info_.str(),std::ios_base::ate) { -// info_.seekp(0,std::ios_base::end); - } - void operator=(Edge const& o) { - head_node_ = o.head_node_; tail_nodes_ = o.tail_nodes_; rule_ = o.rule_; feature_values_ = o.feature_values_; edge_prob_ = o.edge_prob_; id_ = o.id_; i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_; - set_info(o.info_.str()); - } - std::string info() const { return info_.str(); } - void reset_info() { info_.str(""); info_.clear(); } -#else - std::string info() const { return std::string(); } - void reset_info() { } - void set_info(std::string const& ) { } -#endif void show(std::ostream &o,unsigned mask=SPAN|RULE) const { o<<'{'; if (mask&CATEGORY) @@ -159,10 +82,6 @@ public: o<<' '<AsString(mask&RULE_LHS); - if (USE_INFO_EDGE) { - std::string const& i=info(); - if (mask&&!i.empty()) o << " |||"< std::string derivation_tree(EdgeRecurse const& re,TEdgeHandle const& eh,bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const { std::ostringstream o; @@ -203,7 +138,43 @@ public: } }; - // all this info ought to live in Node, but for some reason it's on Edges. + // TODO get rid of cat_? + // TODO keep cat_ and add span and/or state? :) + struct Node { + Node() : id_(), cat_() {} + int id_; // equal to this object's position in the nodes_ vector + WordID cat_; // non-terminal category if <0, 0 if not set + WordID NT() const { return -cat_; } + EdgesVector in_edges_; // an in edge is an edge with this node as its head. (in edges come from the bottom up to us) indices in edges_ + EdgesVector out_edges_; // an out edge is an edge with this node as its tail. (out edges leave us up toward the top/goal). indices in edges_ + void copy_fixed(Node const& o) { // nonstructural fields only - structural ones are managed by sorting/pruning/subsetting + cat_=o.cat_; + } + void copy_reindex(Node const& o,indices_after const& n2,indices_after const& e2) { + copy_fixed(o); + id_=n2[id_]; + e2.reindex_push_back(o.in_edges_,in_edges_); + e2.reindex_push_back(o.out_edges_,out_edges_); + } + }; + +} // namespace HG + +class Hypergraph; +typedef boost::shared_ptr HypergraphP; +// class representing an acyclic hypergraph +// - edges have 1 head, 0..n tails +class Hypergraph { +public: + Hypergraph() : is_linear_chain_(false) {} + typedef HG::Node Node; + typedef HG::Edge Edge; + typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_ + typedef std::vector EdgesVector; // indices in edges_ + enum { + NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF + }; + // except for stateful models that have split nt,span, this should identify the node void SetNodeOrigin(int nodeid,NTSpan &r) const { Node const &n=nodes_[nodeid]; @@ -230,18 +201,9 @@ public: } return s; } - // 0 if none, -TD index otherwise (just like in rule) WordID NodeLHS(int nodeid) const { Node const &n=nodes_[nodeid]; return n.NT(); - /* - if (!n.in_edges_.empty()) { - Edge const& e=edges_[n.in_edges_.front()]; - if (e.rule_) - return -e.rule_->lhs_; - } - return 0; - */ } typedef std::vector EdgeProbs; @@ -250,14 +212,8 @@ public: typedef std::vector NodeMask; std::string show_viterbi_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const; -// builds viterbi hg and returns it formatted as a pretty string - - enum { - NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF - }; std::string show_first_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const; - // same as above, but takes in_edges_[0] all the way down - to make it viterbi cost (1-best), call ViterbiSortInEdges() first typedef Edge const* EdgeHandle; EdgeHandle operator()(int tailn,int /*taili*/,EdgeHandle /*parent*/) const { @@ -334,7 +290,7 @@ public: Edge* AddEdge(Edge const& in_edge, const TailNodeVector& tail) { edges_.push_back(Edge(edges_.size(),in_edge)); Edge* edge = &edges_.back(); - edge->copy_features(in_edge); + edge->feature_values_ = in_edge.feature_values_; edge->tail_nodes_ = tail; // possibly faster than copying to Edge() constructed above then copying via push_back. perhaps optimized it's the same. index_tails(*edge); return edge; diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc index 3a68a429..8f604c89 100644 --- a/decoder/hg_io.cc +++ b/decoder/hg_io.cc @@ -392,8 +392,8 @@ string HypergraphIO::AsPLF(const Hypergraph& hg, bool include_global_parentheses const Hypergraph::Edge& e = hg.edges_[hg.nodes_[i].out_edges_[j]]; const string output = e.rule_->e_.size() ==2 ? Escape(TD::Convert(e.rule_->e_[1])) : EPS; double prob = log(e.edge_prob_); - if (isinf(prob)) { prob = -9e20; } - if (isnan(prob)) { prob = 0; } + if (std::isinf(prob)) { prob = -9e20; } + if (std::isnan(prob)) { prob = 0; } os << "('" << output << "'," << prob << "," << e.head_node_ - i << "),"; } os << "),"; diff --git a/utils/weights.cc b/utils/weights.cc index f56e2a20..575877b6 100644 --- a/utils/weights.cc +++ b/utils/weights.cc @@ -34,7 +34,7 @@ void Weights::InitFromFile(const string& filename, int weight_count = 0; bool fl = false; string buf; - weight_t val = 0; + double val = 0; while (in) { getline(in, buf); if (buf.size() == 0) continue; @@ -53,7 +53,7 @@ void Weights::InitFromFile(const string& filename, if (feature_list) { feature_list->push_back(buf.substr(start, end - start)); } while(end < buf.size() && buf[end] == ' ') ++end; val = strtod(&buf.c_str()[end], NULL); - if (isnan(val)) { + if (std::isnan(val)) { cerr << FD::Convert(fid) << " has weight NaN!\n"; abort(); } @@ -127,8 +127,8 @@ void Weights::InitSparseVector(const vector& dv, void Weights::SanityCheck(const vector& w) { for (unsigned i = 0; i < w.size(); ++i) { - assert(!isnan(w[i])); - assert(!isinf(w[i])); + assert(!std::isnan(w[i])); + assert(!std::isinf(w[i])); } } -- cgit v1.2.3 From 29a47a94bfc09450802484e5cd3f835d39c9f66c Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 15 Dec 2012 02:53:56 -0500 Subject: enable kenlm compression --- configure.ac | 26 ++++++++++++++++++-------- decoder/Makefile.am | 11 +++++------ example_extff/Makefile.am | 2 +- klm/util/have.hh | 3 +-- mteval/Makefile.am | 6 +++--- python/setup.py.in | 2 +- training/dpmert/Makefile.am | 10 +++++----- training/dtrain/Makefile.am | 2 +- training/minrisk/Makefile.am | 2 +- training/mira/Makefile.am | 2 +- training/pro/Makefile.am | 4 ++-- training/rampion/Makefile.am | 2 +- training/utils/Makefile.am | 4 ++-- utils/Makefile.am | 18 +++++++++--------- word-aligner/Makefile.am | 2 +- 15 files changed, 52 insertions(+), 44 deletions(-) (limited to 'utils') diff --git a/configure.ac b/configure.ac index f4650ca4..eabb8645 100644 --- a/configure.ac +++ b/configure.ac @@ -18,6 +18,23 @@ BOOST_TEST AM_PATH_PYTHON AC_CHECK_HEADER(dlfcn.h,AC_DEFINE(HAVE_DLFCN_H)) AC_CHECK_LIB(dl, dlopen) +AC_CHECK_HEADERS(zlib.h, + AC_CHECK_LIB(z, gzread,[ + AC_DEFINE(HAVE_ZLIB,[],[Do we have zlib]) + ZLIBS="$ZLIBS -lz" + ])) + +AC_CHECK_HEADERS(bzlib.h, + AC_CHECK_LIB(bz2, BZ2_bzReadOpen,[ + AC_DEFINE(HAVE_BZLIB,[],[Do we have bzlib]) + ZLIBS="$ZLIBS -lbz2" + ])) + +AC_CHECK_HEADERS(lzma.h, + AC_CHECK_LIB(lzma, lzma_code,[ + AC_DEFINE(HAVE_XZLIB,[],[Do we have lzma]) + ZLIBS="$ZLIBS -llzma" + ])) AC_ARG_ENABLE(mpi, [ --enable-mpi Build MPI binaries, assumes mpi.h is present ], @@ -72,19 +89,12 @@ fi CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" LDFLAGS="$LDFLAGS $BOOST_PROGRAM_OPTIONS_LDFLAGS $BOOST_SERIALIZATION_LDFLAGS $BOOST_SYSTEM_LDFLAGS" # $BOOST_THREAD_LDFLAGS" -LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS $BOOST_SERIALIZATION_LIBS $BOOST_SYSTEM_LIBS" +LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS $BOOST_SERIALIZATION_LIBS $BOOST_SYSTEM_LIBS $ZLIBS" # $BOOST_THREAD_LIBS" AC_CHECK_HEADER(google/dense_hash_map, [AC_DEFINE([HAVE_SPARSEHASH], [1], [flag for google::dense_hash_map])]) -AC_CHECK_HEADER(zlib.h, - [AC_DEFINE([HAVE_ZLIB], [1], [zlib])]) -AC_CHECK_HEADER(bzlib.h, - [AC_DEFINE([HAVE_BZLIB], [1], [bzlib])]) -AC_CHECK_HEADER(lzma.h, - [AC_DEFINE([HAVE_XZLIB], [1], [xzlib])]) - AC_PROG_INSTALL CPPFLAGS="-DPIC -fPIC $CPPFLAGS -DHAVE_CONFIG_H -DKENLM_MAX_ORDER=6" diff --git a/decoder/Makefile.am b/decoder/Makefile.am index 6914fa0f..88a6116c 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -8,16 +8,16 @@ noinst_PROGRAMS = \ TESTS = trule_test parser_test grammar_test hg_test parser_test_SOURCES = parser_test.cc -parser_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz +parser_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a grammar_test_SOURCES = grammar_test.cc -grammar_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz +grammar_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a hg_test_SOURCES = hg_test.cc -hg_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz +hg_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a trule_test_SOURCES = trule_test.cc -trule_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz +trule_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a cdec_SOURCES = cdec.cc -cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz +cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm @@ -82,4 +82,3 @@ libcdec_a_SOURCES = \ JSON_parser.c \ json_parse.cc \ grammar.cc - diff --git a/example_extff/Makefile.am b/example_extff/Makefile.am index ac2694ca..7b7c34b5 100644 --- a/example_extff/Makefile.am +++ b/example_extff/Makefile.am @@ -1,4 +1,4 @@ -AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm -I../decoder +AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I.. -I../mteval -I../utils -I../klm -I../decoder lib_LTLIBRARIES = libff_example.la libff_example_la_SOURCES = ff_example.cc diff --git a/klm/util/have.hh b/klm/util/have.hh index b86ba11e..85b838e4 100644 --- a/klm/util/have.hh +++ b/klm/util/have.hh @@ -11,8 +11,7 @@ #endif #ifdef HAVE_CONFIG_H -// Chris; uncomment this line. -//#include "config.h" +#include "config.h" #endif #endif // UTIL_HAVE__ diff --git a/mteval/Makefile.am b/mteval/Makefile.am index 5e9bba91..4444285f 100644 --- a/mteval/Makefile.am +++ b/mteval/Makefile.am @@ -23,12 +23,12 @@ libmteval_a_SOURCES = \ ter.cc fast_score_SOURCES = fast_score.cc -fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz +fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a mbr_kbest_SOURCES = mbr_kbest.cc -mbr_kbest_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz +mbr_kbest_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a scorer_test_SOURCES = scorer_test.cc -scorer_test_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz +scorer_test_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils diff --git a/python/setup.py.in b/python/setup.py.in index dac72903..fa8a9f5e 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -17,7 +17,7 @@ ext_modules = [ sources=['src/_cdec.cpp'], include_dirs=INC, library_dirs=LIB, - libraries=LIBS + ['z', 'cdec', 'utils', 'mteval', 'training_utils', 'klm', 'klm_util', 'ksearch'], + libraries=['cdec', 'utils', 'mteval', 'training_utils', 'klm', 'klm_util', 'ksearch'] + LIBS, extra_compile_args=CPPFLAGS, extra_link_args=LDFLAGS), Extension(name='cdec.sa._sa', diff --git a/training/dpmert/Makefile.am b/training/dpmert/Makefile.am index ff318bef..3dbdfa69 100644 --- a/training/dpmert/Makefile.am +++ b/training/dpmert/Makefile.am @@ -8,18 +8,18 @@ noinst_PROGRAMS = \ TESTS = lo_test mr_dpmert_generate_mapper_input_SOURCES = mr_dpmert_generate_mapper_input.cc line_optimizer.cc -mr_dpmert_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz +mr_dpmert_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a # nbest2hg_SOURCES = nbest2hg.cc -# nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst -lz +# nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst mr_dpmert_map_SOURCES = mert_geometry.cc ces.cc error_surface.cc mr_dpmert_map.cc line_optimizer.cc -mr_dpmert_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz +mr_dpmert_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a mr_dpmert_reduce_SOURCES = error_surface.cc ces.cc mr_dpmert_reduce.cc line_optimizer.cc mert_geometry.cc -mr_dpmert_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz +mr_dpmert_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a lo_test_SOURCES = lo_test.cc ces.cc mert_geometry.cc error_surface.cc line_optimizer.cc -lo_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz +lo_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/dtrain/Makefile.am b/training/dtrain/Makefile.am index 5b48e756..4f51b0c8 100644 --- a/training/dtrain/Makefile.am +++ b/training/dtrain/Makefile.am @@ -1,7 +1,7 @@ bin_PROGRAMS = dtrain dtrain_SOURCES = dtrain.cc score.cc -dtrain_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz +dtrain_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/minrisk/Makefile.am b/training/minrisk/Makefile.am index a15e821e..821730c2 100644 --- a/training/minrisk/Makefile.am +++ b/training/minrisk/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = minrisk_optimize minrisk_optimize_SOURCES = minrisk_optimize.cc -minrisk_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/training/liblbfgs/liblbfgs.a -lz +minrisk_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/training/liblbfgs/liblbfgs.a AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training -I$(top_srcdir)/training/utils diff --git a/training/mira/Makefile.am b/training/mira/Makefile.am index ae609ede..c8f404fb 100644 --- a/training/mira/Makefile.am +++ b/training/mira/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = kbest_mira kbest_mira_SOURCES = kbest_mira.cc -kbest_mira_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz +kbest_mira_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/pro/Makefile.am b/training/pro/Makefile.am index 1916b6b2..e0a45a33 100644 --- a/training/pro/Makefile.am +++ b/training/pro/Makefile.am @@ -3,9 +3,9 @@ bin_PROGRAMS = \ mr_pro_reduce mr_pro_map_SOURCES = mr_pro_map.cc -mr_pro_map_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz +mr_pro_map_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a mr_pro_reduce_SOURCES = mr_pro_reduce.cc -mr_pro_reduce_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a -lz +mr_pro_reduce_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils -I$(top_srcdir)/training diff --git a/training/rampion/Makefile.am b/training/rampion/Makefile.am index 1633d0f7..ef0ca147 100644 --- a/training/rampion/Makefile.am +++ b/training/rampion/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = rampion_cccp rampion_cccp_SOURCES = rampion_cccp.cc -rampion_cccp_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz +rampion_cccp_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils diff --git a/training/utils/Makefile.am b/training/utils/Makefile.am index 189d9a76..c9405d4e 100644 --- a/training/utils/Makefile.am +++ b/training/utils/Makefile.am @@ -24,10 +24,10 @@ libtraining_utils_a_SOURCES = \ risk.cc optimize_test_SOURCES = optimize_test.cc -optimize_test_LDADD = libtraining_utils.a $(top_srcdir)/utils/libutils.a -lz +optimize_test_LDADD = libtraining_utils.a $(top_srcdir)/utils/libutils.a lbfgs_test_SOURCES = lbfgs_test.cc -lbfgs_test_LDADD = $(top_srcdir)/utils/libutils.a -lz +lbfgs_test_LDADD = $(top_srcdir)/utils/libutils.a AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/decoder -I$(top_srcdir)/utils -I$(top_srcdir)/mteval -I$(top_srcdir)/klm diff --git a/utils/Makefile.am b/utils/Makefile.am index 3ad9d69e..639c30b8 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -33,24 +33,24 @@ if HAVE_CMPH endif reconstruct_weights_SOURCES = reconstruct_weights.cc -reconstruct_weights_LDADD = libutils.a -lz +reconstruct_weights_LDADD = libutils.a atools_SOURCES = atools.cc -atools_LDADD = libutils.a -lz +atools_LDADD = libutils.a phmt_SOURCES = phmt.cc -phmt_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz +phmt_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) ts_SOURCES = ts.cc -ts_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz +ts_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) m_test_SOURCES = m_test.cc -m_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz +m_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) dict_test_SOURCES = dict_test.cc -dict_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz +dict_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) weights_test_SOURCES = weights_test.cc -weights_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz +weights_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) logval_test_SOURCES = logval_test.cc -logval_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz +logval_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) small_vector_test_SOURCES = small_vector_test.cc -small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz +small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) ################################################################ # do NOT NOT NOT add any other -I includes NO NO NO NO NO ###### diff --git a/word-aligner/Makefile.am b/word-aligner/Makefile.am index 280d3ae7..2dcb688e 100644 --- a/word-aligner/Makefile.am +++ b/word-aligner/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = fast_align fast_align_SOURCES = fast_align.cc ttables.cc -fast_align_LDADD = $(top_srcdir)/utils/libutils.a -lz +fast_align_LDADD = $(top_srcdir)/utils/libutils.a AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/training -- cgit v1.2.3 From 2538c4052e844ebf6f5615becd087f2d6658c587 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 8 Jan 2013 15:44:45 -0500 Subject: add header files to sources to create correct distributions --- configure.ac | 5 ++-- decoder/Makefile.am | 70 +++++++++++++++++++++++++++++++++++++++++++ klm/lm/Makefile.am | 27 +++++++++++++++++ klm/search/Makefile.am | 14 ++++++++- klm/util/Makefile.am | 20 +++++++++++++ mteval/Makefile.am | 11 +++++++ training/crf/Makefile.am | 4 +-- training/dpmert/Makefile.am | 6 ++-- training/dtrain/Makefile.am | 2 +- training/liblbfgs/Makefile.am | 9 +++++- training/utils/Makefile.am | 7 +++++ utils/Makefile.am | 48 +++++++++++++++++++++++++++++ word-aligner/Makefile.am | 2 +- 13 files changed, 214 insertions(+), 11 deletions(-) (limited to 'utils') diff --git a/configure.ac b/configure.ac index eabb8645..dcd0a0d8 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,6 @@ -AC_INIT -AM_INIT_AUTOMAKE(cdec,0.1) +AC_INIT([cdec],[1.0]) +AC_CONFIG_SRCDIR([decoder/cdec.cc]) +AM_INIT_AUTOMAKE AC_CONFIG_HEADERS(config.h) AC_PROG_LIBTOOL AC_PROG_LEX diff --git a/decoder/Makefile.am b/decoder/Makefile.am index 88a6116c..21187da8 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -26,7 +26,77 @@ rule_lexer.cc: rule_lexer.ll noinst_LIBRARIES = libcdec.a +EXTRA_DIST = rule_lexer.ll + libcdec_a_SOURCES = \ + JSON_parser.h \ + aligner.h \ + apply_fsa_models.h \ + apply_models.h \ + bottom_up_parser.h \ + cfg.h \ + cfg_binarize.h \ + cfg_format.h \ + cfg_options.h \ + csplit.h \ + decoder.h \ + dwarf.h \ + earley_composer.h \ + exp_semiring.h \ + factored_lexicon_helper.h \ + ff.h \ + ff_basic.h \ + ff_bleu.h \ + ff_charset.h \ + ff_context.h \ + ff_csplit.h \ + ff_dwarf.h \ + ff_external.h \ + ff_factory.h \ + ff_klm.h \ + ff_lm.h \ + ff_ngrams.h \ + ff_register.h \ + ff_rules.h \ + ff_ruleshape.h \ + ff_sample_fsa.h \ + ff_source_syntax.h \ + ff_spans.h \ + ff_tagger.h \ + ff_wordalign.h \ + ff_wordset.h \ + ffset.h \ + forest_writer.h \ + freqdict.h \ + grammar.h \ + hg.h \ + hg_cfg.h \ + hg_intersect.h \ + hg_io.h \ + hg_remove_eps.h \ + hg_sampler.h \ + hg_test.h \ + hg_union.h \ + incremental.h \ + inside_outside.h \ + json_parse.h \ + kbest.h \ + lattice.h \ + lexalign.h \ + lextrans.h \ + nt_span.h \ + oracle_bleu.h \ + phrasebased_translator.h \ + phrasetable_fst.h \ + program_options.h \ + rule_lexer.h \ + sentence_metadata.h \ + sentences.h \ + tagger.h \ + translator.h \ + tromble_loss.h \ + trule.h \ + viterbi.h \ forest_writer.cc \ maxtrans_blunsom.cc \ cdec_ff.cc \ diff --git a/klm/lm/Makefile.am b/klm/lm/Makefile.am index a12c5f03..436cfd08 100644 --- a/klm/lm/Makefile.am +++ b/klm/lm/Makefile.am @@ -12,6 +12,33 @@ build_binary_LDADD = libklm.a ../util/libklm_util.a -lz noinst_LIBRARIES = libklm.a libklm_a_SOURCES = \ + bhiksha.hh \ + binary_format.hh \ + blank.hh \ + config.hh \ + enumerate_vocab.hh \ + facade.hh \ + left.hh \ + lm_exception.hh \ + max_order.hh \ + model.hh \ + model_type.hh \ + ngram_query.hh \ + partial.hh \ + quantize.hh \ + read_arpa.hh \ + return.hh \ + search_hashed.hh \ + search_trie.hh \ + state.hh \ + trie.hh \ + trie_sort.hh \ + value.hh \ + value_build.hh \ + virtual_interface.hh \ + vocab.hh \ + weights.hh \ + word_index.hh \ bhiksha.cc \ binary_format.cc \ config.cc \ diff --git a/klm/search/Makefile.am b/klm/search/Makefile.am index 5aea33c2..a34f6cea 100644 --- a/klm/search/Makefile.am +++ b/klm/search/Makefile.am @@ -1,11 +1,23 @@ noinst_LIBRARIES = libksearch.a libksearch_a_SOURCES = \ + applied.hh \ + config.hh \ + context.hh \ + dedupe.hh \ + edge.hh \ + edge_generator.hh \ + header.hh \ + nbest.hh \ + rule.hh \ + types.hh \ + vertex.hh \ + vertex_generator.hh \ edge_generator.cc \ nbest.cc \ rule.cc \ vertex.cc \ vertex_generator.cc -AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. +AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I.. diff --git a/klm/util/Makefile.am b/klm/util/Makefile.am index a676bdb3..bb441432 100644 --- a/klm/util/Makefile.am +++ b/klm/util/Makefile.am @@ -19,6 +19,26 @@ noinst_LIBRARIES = libklm_util.a libklm_util_a_SOURCES = \ + bit_packing.hh \ + ersatz_progress.hh \ + exception.hh \ + file.hh \ + file_piece.hh \ + getopt.hh \ + have.hh \ + joint_sort.hh \ + mmap.hh \ + murmur_hash.hh \ + pool.hh \ + probing_hash_table.hh \ + proxy_iterator.hh \ + read_compressed.hh \ + scoped.hh \ + sized_iterator.hh \ + sorted_uniform.hh \ + string_piece.hh \ + tokenize_piece.hh \ + usage.hh \ ersatz_progress.cc \ bit_packing.cc \ exception.cc \ diff --git a/mteval/Makefile.am b/mteval/Makefile.am index 4444285f..b19e4bb1 100644 --- a/mteval/Makefile.am +++ b/mteval/Makefile.am @@ -9,6 +9,17 @@ TESTS = scorer_test noinst_LIBRARIES = libmteval.a libmteval_a_SOURCES = \ + aer_scorer.h \ + comb_scorer.h \ + external_scorer.h \ + ns.h \ + ns_cer.h \ + ns_comb.h \ + ns_docscorer.h \ + ns_ext.h \ + ns_ter.h \ + scorer.h \ + ter.h \ aer_scorer.cc \ comb_scorer.cc \ external_scorer.cc \ diff --git a/training/crf/Makefile.am b/training/crf/Makefile.am index d203df25..f72d8f92 100644 --- a/training/crf/Makefile.am +++ b/training/crf/Makefile.am @@ -18,10 +18,10 @@ mpi_extract_reachable_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/ mpi_extract_features_SOURCES = mpi_extract_features.cc mpi_extract_features_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz -mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc cllh_observer.cc +mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc cllh_observer.cc cllh_observer.h mpi_batch_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz -mpi_compute_cllh_SOURCES = mpi_compute_cllh.cc cllh_observer.cc +mpi_compute_cllh_SOURCES = mpi_compute_cllh.cc cllh_observer.cc cllh_observer.h mpi_compute_cllh_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir)/training -I$(top_srcdir)/training/utils -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/dpmert/Makefile.am b/training/dpmert/Makefile.am index 3dbdfa69..e5f13944 100644 --- a/training/dpmert/Makefile.am +++ b/training/dpmert/Makefile.am @@ -13,13 +13,13 @@ mr_dpmert_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_sr # nbest2hg_SOURCES = nbest2hg.cc # nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst -mr_dpmert_map_SOURCES = mert_geometry.cc ces.cc error_surface.cc mr_dpmert_map.cc line_optimizer.cc +mr_dpmert_map_SOURCES = mert_geometry.cc ces.cc error_surface.cc mr_dpmert_map.cc line_optimizer.cc ces.h error_surface.h line_optimizer.h mert_geometry.h mr_dpmert_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -mr_dpmert_reduce_SOURCES = error_surface.cc ces.cc mr_dpmert_reduce.cc line_optimizer.cc mert_geometry.cc +mr_dpmert_reduce_SOURCES = error_surface.cc ces.cc mr_dpmert_reduce.cc line_optimizer.cc mert_geometry.cc ces.h error_surface.h line_optimizer.h mert_geometry.h mr_dpmert_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lo_test_SOURCES = lo_test.cc ces.cc mert_geometry.cc error_surface.cc line_optimizer.cc +lo_test_SOURCES = lo_test.cc ces.cc mert_geometry.cc error_surface.cc line_optimizer.cc ces.h error_surface.h line_optimizer.h mert_geometry.h lo_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/dtrain/Makefile.am b/training/dtrain/Makefile.am index 4f51b0c8..ee337ca8 100644 --- a/training/dtrain/Makefile.am +++ b/training/dtrain/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = dtrain -dtrain_SOURCES = dtrain.cc score.cc +dtrain_SOURCES = dtrain.cc score.cc dtrain.h kbestget.h ksampler.h pairsampling.h score.h dtrain_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/liblbfgs/Makefile.am b/training/liblbfgs/Makefile.am index 64a3794d..f0d5c8aa 100644 --- a/training/liblbfgs/Makefile.am +++ b/training/liblbfgs/Makefile.am @@ -6,10 +6,17 @@ ll_test_LDADD = liblbfgs.a -lz noinst_LIBRARIES = liblbfgs.a -liblbfgs_a_SOURCES = lbfgs.c +liblbfgs_a_SOURCES = \ + lbfgs.c \ + arithmetic_ansi.h \ + arithmetic_sse_double.h \ + arithmetic_sse_float.h \ + lbfgs++.h \ + lbfgs.h ################################################################ # do NOT NOT NOT add any other -I includes NO NO NO NO NO ###### AM_LDFLAGS = liblbfgs.a -lz AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. -I.. ################################################################ + diff --git a/training/utils/Makefile.am b/training/utils/Makefile.am index d708a9f5..a2ab86fd 100644 --- a/training/utils/Makefile.am +++ b/training/utils/Makefile.am @@ -18,6 +18,13 @@ sentclient_LDFLAGS = -pthread TESTS = lbfgs_test optimize_test libtraining_utils_a_SOURCES = \ + candidate_set.h \ + entropy.h \ + lbfgs.h \ + online_optimizer.h \ + optimize.h \ + risk.h \ + sentserver.h \ candidate_set.cc \ entropy.cc \ optimize.cc \ diff --git a/utils/Makefile.am b/utils/Makefile.am index 639c30b8..3177325b 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -14,6 +14,53 @@ TESTS = ts small_vector_test logval_test weights_test dict_test m_test noinst_LIBRARIES = libutils.a libutils_a_SOURCES = \ + alias_sampler.h \ + alignment_io.h \ + array2d.h \ + b64tools.h \ + batched_append.h \ + city.h \ + citycrc.h \ + corpus_tools.h \ + dict.h \ + fast_sparse_vector.h \ + fdict.h \ + feature_vector.h \ + filelib.h \ + gzstream.h \ + hash.h \ + have_64_bits.h \ + indices_after.h \ + kernel_string_subseq.h \ + logval.h \ + m.h \ + murmur_hash.h \ + named_enum.h \ + null_deleter.h \ + null_traits.h \ + perfect_hash.h \ + prob.h \ + sampler.h \ + semiring.h \ + show.h \ + small_vector.h \ + sparse_vector.h \ + static_utoa.h \ + stringlib.h \ + swap_pod.h \ + tdict.h \ + timing_stats.h \ + utoa.h \ + value_array.h \ + verbose.h \ + warning_compiler.h \ + warning_pop.h \ + warning_push.h \ + weights.h \ + wordid.h \ + writer.h \ + fast_lexical_cast.hpp \ + intrusive_refcount.hpp \ alignment_io.cc \ b64tools.cc \ corpus_tools.cc \ @@ -56,3 +103,4 @@ small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOS # do NOT NOT NOT add any other -I includes NO NO NO NO NO ###### AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. ################################################################ + diff --git a/word-aligner/Makefile.am b/word-aligner/Makefile.am index 2dcb688e..e274b209 100644 --- a/word-aligner/Makefile.am +++ b/word-aligner/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = fast_align -fast_align_SOURCES = fast_align.cc ttables.cc +fast_align_SOURCES = fast_align.cc ttables.cc da.h ttables.h fast_align_LDADD = $(top_srcdir)/utils/libutils.a AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/training -- cgit v1.2.3 From 5de78361e67a1750ca2d163b846d5fffdeb09a04 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 8 Jan 2013 22:22:28 -0500 Subject: fixes for dist --- decoder/Makefile.am | 6 +++--- decoder/cfg_test.cc | 2 +- decoder/grammar_test.cc | 4 ++-- decoder/hg_test.cc | 2 +- example_extff/Makefile.am | 2 +- klm/lm/Makefile.am | 2 +- klm/search/Makefile.am | 2 +- klm/util/Makefile.am | 2 +- mteval/Makefile.am | 10 ++++++---- mteval/scorer_test.cc | 2 +- training/crf/Makefile.am | 12 ++++++------ training/dpmert/Makefile.am | 12 +++++++----- training/dpmert/lo_test.cc | 2 +- training/dtrain/Makefile.am | 2 +- training/liblbfgs/Makefile.am | 2 +- training/minrisk/Makefile.am | 4 ++-- training/mira/Makefile.am | 2 +- training/pro/Makefile.am | 4 ++-- training/rampion/Makefile.am | 2 +- training/utils/Makefile.am | 6 +++--- utils/Makefile.am | 5 ++++- utils/weights_test.cc | 2 +- word-aligner/Makefile.am | 4 ++-- 23 files changed, 50 insertions(+), 43 deletions(-) (limited to 'utils') diff --git a/decoder/Makefile.am b/decoder/Makefile.am index 21187da8..558aeaed 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -5,7 +5,7 @@ noinst_PROGRAMS = \ hg_test \ parser_test \ grammar_test - + TESTS = trule_test parser_test grammar_test hg_test parser_test_SOURCES = parser_test.cc parser_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a @@ -19,14 +19,14 @@ trule_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEW cdec_SOURCES = cdec.cc cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm +AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/decoder/test_data\" -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/mteval -I$(top_srcdir)/utils -I$(top_srcdir)/klm rule_lexer.cc: rule_lexer.ll $(LEX) -s -CF -8 -o$@ $< noinst_LIBRARIES = libcdec.a -EXTRA_DIST = rule_lexer.ll +EXTRA_DIST = test_data rule_lexer.ll libcdec_a_SOURCES = \ JSON_parser.h \ diff --git a/decoder/cfg_test.cc b/decoder/cfg_test.cc index 316c6d16..cbe7d0be 100644 --- a/decoder/cfg_test.cc +++ b/decoder/cfg_test.cc @@ -33,7 +33,7 @@ struct CFGTest : public TestWithParam { istringstream ws(wts); EXPECT_TRUE(ws>>featw); CSHOW(featw) - std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); HGSetup::JsonTestFile(&hg,path,file); hg.Reweight(featw); cfg.Init(hg,true,true,false); diff --git a/decoder/grammar_test.cc b/decoder/grammar_test.cc index 912f4f12..6d2c6e67 100644 --- a/decoder/grammar_test.cc +++ b/decoder/grammar_test.cc @@ -19,7 +19,7 @@ using namespace std; struct GrammarTest { GrammarTest() { - std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Weights::InitFromFile(path + "/weights.gt", &wts); } vector wts; @@ -43,7 +43,7 @@ BOOST_AUTO_TEST_CASE(TestTextGrammar) { } BOOST_AUTO_TEST_CASE(TestTextGrammarFile) { - std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); GrammarPtr g(new TextGrammar(path + "/grammar.prune")); vector grammars(1, g); diff --git a/decoder/hg_test.cc b/decoder/hg_test.cc index 37469748..8519e559 100644 --- a/decoder/hg_test.cc +++ b/decoder/hg_test.cc @@ -339,7 +339,7 @@ BOOST_AUTO_TEST_CASE(TestAddExpectations) { BOOST_AUTO_TEST_CASE(Small) { Hypergraph hg; - std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); CreateSmallHG(&hg, path); SparseVector wts; wts.set_value(FD::Convert("Model_0"), -2.0); diff --git a/example_extff/Makefile.am b/example_extff/Makefile.am index 7b7c34b5..6abfe6c5 100644 --- a/example_extff/Makefile.am +++ b/example_extff/Makefile.am @@ -1,4 +1,4 @@ -AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I.. -I../mteval -I../utils -I../klm -I../decoder +AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/mteval -I$(top_srcdir)/utils -I$(top_srcdir)/klm -I$(top_srcdir)/decoder lib_LTLIBRARIES = libff_example.la libff_example_la_SOURCES = ff_example.cc diff --git a/klm/lm/Makefile.am b/klm/lm/Makefile.am index 436cfd08..870f7128 100644 --- a/klm/lm/Makefile.am +++ b/klm/lm/Makefile.am @@ -55,5 +55,5 @@ libklm_a_SOURCES = \ virtual_interface.cc \ vocab.cc -AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. +AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/klm diff --git a/klm/search/Makefile.am b/klm/search/Makefile.am index a34f6cea..03554276 100644 --- a/klm/search/Makefile.am +++ b/klm/search/Makefile.am @@ -19,5 +19,5 @@ libksearch_a_SOURCES = \ vertex.cc \ vertex_generator.cc -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I.. +AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/klm diff --git a/klm/util/Makefile.am b/klm/util/Makefile.am index bb441432..3ab7560f 100644 --- a/klm/util/Makefile.am +++ b/klm/util/Makefile.am @@ -51,4 +51,4 @@ libklm_util_a_SOURCES = \ string_piece.cc \ usage.cc -AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. +AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/klm diff --git a/mteval/Makefile.am b/mteval/Makefile.am index b19e4bb1..83adee17 100644 --- a/mteval/Makefile.am +++ b/mteval/Makefile.am @@ -8,6 +8,8 @@ TESTS = scorer_test noinst_LIBRARIES = libmteval.a +EXTRA_DIST = test_data + libmteval_a_SOURCES = \ aer_scorer.h \ comb_scorer.h \ @@ -34,12 +36,12 @@ libmteval_a_SOURCES = \ ter.cc fast_score_SOURCES = fast_score.cc -fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a +fast_score_LDADD = libmteval.a ../utils/libutils.a mbr_kbest_SOURCES = mbr_kbest.cc -mbr_kbest_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a +mbr_kbest_LDADD = libmteval.a ../utils/libutils.a scorer_test_SOURCES = scorer_test.cc -scorer_test_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +scorer_test_LDADD = libmteval.a ../utils/libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils +AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/mteval/test_data\" -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/utils diff --git a/mteval/scorer_test.cc b/mteval/scorer_test.cc index 9b765d0f..da07f154 100644 --- a/mteval/scorer_test.cc +++ b/mteval/scorer_test.cc @@ -36,7 +36,7 @@ struct Stuff { BOOST_FIXTURE_TEST_SUITE( s, Stuff ); BOOST_AUTO_TEST_CASE(TestCreateFromFiles) { - std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); vector files; files.push_back(path + "/re.txt.0"); files.push_back(path + "/re.txt.1"); diff --git a/training/crf/Makefile.am b/training/crf/Makefile.am index f72d8f92..d37b224c 100644 --- a/training/crf/Makefile.am +++ b/training/crf/Makefile.am @@ -7,21 +7,21 @@ bin_PROGRAMS = \ mpi_online_optimize mpi_online_optimize_SOURCES = mpi_online_optimize.cc -mpi_online_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz +mpi_online_optimize_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a -lz mpi_flex_optimize_SOURCES = mpi_flex_optimize.cc -mpi_flex_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz +mpi_flex_optimize_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a -lz mpi_extract_reachable_SOURCES = mpi_extract_reachable.cc -mpi_extract_reachable_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz +mpi_extract_reachable_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a -lz mpi_extract_features_SOURCES = mpi_extract_features.cc -mpi_extract_features_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz +mpi_extract_features_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a -lz mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc cllh_observer.cc cllh_observer.h -mpi_batch_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz +mpi_batch_optimize_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a -lz mpi_compute_cllh_SOURCES = mpi_compute_cllh.cc cllh_observer.cc cllh_observer.h -mpi_compute_cllh_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz +mpi_compute_cllh_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a -lz AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir)/training -I$(top_srcdir)/training/utils -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/dpmert/Makefile.am b/training/dpmert/Makefile.am index e5f13944..cba3e30f 100644 --- a/training/dpmert/Makefile.am +++ b/training/dpmert/Makefile.am @@ -8,18 +8,20 @@ noinst_PROGRAMS = \ TESTS = lo_test mr_dpmert_generate_mapper_input_SOURCES = mr_dpmert_generate_mapper_input.cc line_optimizer.cc -mr_dpmert_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a +mr_dpmert_generate_mapper_input_LDADD = ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a # nbest2hg_SOURCES = nbest2hg.cc # nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst mr_dpmert_map_SOURCES = mert_geometry.cc ces.cc error_surface.cc mr_dpmert_map.cc line_optimizer.cc ces.h error_surface.h line_optimizer.h mert_geometry.h -mr_dpmert_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a +mr_dpmert_map_LDADD = ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a mr_dpmert_reduce_SOURCES = error_surface.cc ces.cc mr_dpmert_reduce.cc line_optimizer.cc mert_geometry.cc ces.h error_surface.h line_optimizer.h mert_geometry.h -mr_dpmert_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a +mr_dpmert_reduce_LDADD = ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a lo_test_SOURCES = lo_test.cc ces.cc mert_geometry.cc error_surface.cc line_optimizer.cc ces.h error_surface.h line_optimizer.h mert_geometry.h -lo_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a +lo_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a -AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval +EXTRA_DIST = test_data + +AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/training/dpmert/test_data\" -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/dpmert/lo_test.cc b/training/dpmert/lo_test.cc index 95a08d3d..d89bcd99 100644 --- a/training/dpmert/lo_test.cc +++ b/training/dpmert/lo_test.cc @@ -118,7 +118,7 @@ BOOST_AUTO_TEST_CASE( TestS1) { to_optimize.push_back(fPhraseModel_1); to_optimize.push_back(fPhraseModel_2); - std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; ReadFile rf(path + "/0.json.gz"); diff --git a/training/dtrain/Makefile.am b/training/dtrain/Makefile.am index ee337ca8..8cf71078 100644 --- a/training/dtrain/Makefile.am +++ b/training/dtrain/Makefile.am @@ -1,7 +1,7 @@ bin_PROGRAMS = dtrain dtrain_SOURCES = dtrain.cc score.cc dtrain.h kbestget.h ksampler.h pairsampling.h score.h -dtrain_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a +dtrain_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/liblbfgs/Makefile.am b/training/liblbfgs/Makefile.am index f0d5c8aa..272d6f56 100644 --- a/training/liblbfgs/Makefile.am +++ b/training/liblbfgs/Makefile.am @@ -17,6 +17,6 @@ liblbfgs_a_SOURCES = \ ################################################################ # do NOT NOT NOT add any other -I includes NO NO NO NO NO ###### AM_LDFLAGS = liblbfgs.a -lz -AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. -I.. +AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I$(top_srcdir)/training -I$(top_srcdir)/training/liblbfgs ################################################################ diff --git a/training/minrisk/Makefile.am b/training/minrisk/Makefile.am index 821730c2..2be17498 100644 --- a/training/minrisk/Makefile.am +++ b/training/minrisk/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = minrisk_optimize minrisk_optimize_SOURCES = minrisk_optimize.cc -minrisk_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/training/liblbfgs/liblbfgs.a +minrisk_optimize_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a ../../training/liblbfgs/liblbfgs.a -AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training -I$(top_srcdir)/training/utils +AM_CPPFLAGS = -W -Wall -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training -I$(top_srcdir)/training/utils diff --git a/training/mira/Makefile.am b/training/mira/Makefile.am index c8f404fb..0084603d 100644 --- a/training/mira/Makefile.am +++ b/training/mira/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = kbest_mira kbest_mira_SOURCES = kbest_mira.cc -kbest_mira_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a +kbest_mira_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/pro/Makefile.am b/training/pro/Makefile.am index e0a45a33..93889f34 100644 --- a/training/pro/Makefile.am +++ b/training/pro/Makefile.am @@ -3,9 +3,9 @@ bin_PROGRAMS = \ mr_pro_reduce mr_pro_map_SOURCES = mr_pro_map.cc -mr_pro_map_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a +mr_pro_map_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a mr_pro_reduce_SOURCES = mr_pro_reduce.cc -mr_pro_reduce_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a +mr_pro_reduce_LDADD = ../../training/liblbfgs/liblbfgs.a ../../utils/libutils.a AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils -I$(top_srcdir)/training diff --git a/training/rampion/Makefile.am b/training/rampion/Makefile.am index ef0ca147..6a1a97cb 100644 --- a/training/rampion/Makefile.am +++ b/training/rampion/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = rampion_cccp rampion_cccp_SOURCES = rampion_cccp.cc -rampion_cccp_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a +rampion_cccp_LDADD = ../../training/utils/libtraining_utils.a ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils diff --git a/training/utils/Makefile.am b/training/utils/Makefile.am index a2ab86fd..29078aed 100644 --- a/training/utils/Makefile.am +++ b/training/utils/Makefile.am @@ -32,13 +32,13 @@ libtraining_utils_a_SOURCES = \ risk.cc optimize_test_SOURCES = optimize_test.cc -optimize_test_LDADD = libtraining_utils.a $(top_srcdir)/utils/libutils.a +optimize_test_LDADD = libtraining_utils.a ../../utils/libutils.a grammar_convert_SOURCES = grammar_convert.cc -grammar_convert_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a +grammar_convert_LDADD = ../../decoder/libcdec.a ../../mteval/libmteval.a ../../utils/libutils.a lbfgs_test_SOURCES = lbfgs_test.cc -lbfgs_test_LDADD = $(top_srcdir)/utils/libutils.a +lbfgs_test_LDADD = ../../utils/libutils.a AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/decoder -I$(top_srcdir)/utils -I$(top_srcdir)/mteval -I$(top_srcdir)/klm diff --git a/utils/Makefile.am b/utils/Makefile.am index 3177325b..c5fedb78 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -13,7 +13,10 @@ TESTS = ts small_vector_test logval_test weights_test dict_test m_test noinst_LIBRARIES = libutils.a +# EXTRA_DIST = test_data + libutils_a_SOURCES = \ + test_data \ alias_sampler.h \ alignment_io.h \ array2d.h \ @@ -101,6 +104,6 @@ small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOS ################################################################ # do NOT NOT NOT add any other -I includes NO NO NO NO NO ###### -AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. +AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. -I$(top_srcdir) -DTEST_DATA=\"$(top_srcdir)/utils/test_data\" ################################################################ diff --git a/utils/weights_test.cc b/utils/weights_test.cc index 4be4c40f..0d5d8512 100644 --- a/utils/weights_test.cc +++ b/utils/weights_test.cc @@ -7,6 +7,6 @@ using namespace std; BOOST_AUTO_TEST_CASE(Load) { vector v; - Weights::InitFromFile("test_data/weights", &v); + Weights::InitFromFile(TEST_DATA "/weights", &v); Weights::WriteToFile("-", v); } diff --git a/word-aligner/Makefile.am b/word-aligner/Makefile.am index e274b209..a195cc5a 100644 --- a/word-aligner/Makefile.am +++ b/word-aligner/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = fast_align fast_align_SOURCES = fast_align.cc ttables.cc da.h ttables.h -fast_align_LDADD = $(top_srcdir)/utils/libutils.a +fast_align_LDADD = ../utils/libutils.a -AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/training +AM_CPPFLAGS = -W -Wall -I$(top_srcdir) -I$(top_srcdir)/utils -I$(top_srcdir)/training -- cgit v1.2.3