From 648fd70ec05997003e801e113d825c84e55e01ca Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 8 Feb 2012 16:22:55 -0500 Subject: move widely duplicated math functions into m.h header --- utils/Makefile.am | 5 +++- utils/m.h | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ utils/m_test.cc | 75 ++++++++++++++++++++++++++++++++++++++++++++++ utils/mfcr.h | 22 ++------------ 4 files changed, 171 insertions(+), 20 deletions(-) create mode 100644 utils/m.h create mode 100644 utils/m_test.cc (limited to 'utils') diff --git a/utils/Makefile.am b/utils/Makefile.am index 3e559c75..a1ea8270 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -7,11 +7,12 @@ TESTS = ts phmt mfcr_test if HAVE_GTEST noinst_PROGRAMS += \ dict_test \ + m_test \ weights_test \ logval_test \ small_vector_test -TESTS += small_vector_test logval_test weights_test dict_test +TESTS += small_vector_test logval_test weights_test dict_test m_test endif reconstruct_weights_SOURCES = reconstruct_weights.cc @@ -38,6 +39,8 @@ endif phmt_SOURCES = phmt.cc ts_SOURCES = ts.cc +m_test_SOURCES = m_test.cc +m_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) dict_test_SOURCES = dict_test.cc dict_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) mfcr_test_SOURCES = mfcr_test.cc diff --git a/utils/m.h b/utils/m.h new file mode 100644 index 00000000..b25248c2 --- /dev/null +++ b/utils/m.h @@ -0,0 +1,89 @@ +#ifndef _M_H_ +#define _M_H_ + +#include +#include + +template +struct M { + // support [0, 1, 2 ...) + static inline F log_poisson(unsigned x, const F& lambda) { + assert(lambda > 0.0); + return std::log(lambda) * x - lgamma(x + 1) - lambda; + } + + // support [0, 1, 2 ...) + static inline F log_geometric(unsigned x, const F& p) { + assert(p > 0.0); + assert(p < 1.0); + return std::log(1 - p) * x + std::log(p); + } + + // log of the binomial coefficient + static inline F log_binom_coeff(unsigned n, unsigned k) { + assert(n >= k); + if (n == k) return 0.0; + return lgamma(n + 1) - lgamma(k + 1) - lgamma(n - k + 1); + } + + // http://en.wikipedia.org/wiki/Negative_binomial_distribution + // support [0, 1, 2 ...) + static inline F log_negative_binom(unsigned x, unsigned r, const F& p) { + assert(p > 0.0); + assert(p < 1.0); + return log_binom_coeff(x + r - 1u, x) + r * std::log(F(1) - p) + x * std::log(p); + } + + // this is the Beta function, *not* the beta probability density + // http://mathworld.wolfram.com/BetaFunction.html + static inline F log_beta_fn(const F& x, const F& y) { + return lgamma(x) + lgamma(y) - lgamma(x + y); + } + + // support x >= 0.0 + static F log_gamma_density(const F& x, const F& shape, const F& rate) { + assert(x >= 0.0); + assert(shape > 0.0); + assert(rate > 0.0); + return (shape-1)*std::log(x) - shape*std::log(rate) - x/rate - lgamma(shape); + } + + // this is the Beta *density* p(x ; alpha, beta) + // support x \in (0,1) + static inline F log_beta_density(const F& x, const F& alpha, const F& beta) { + assert(x > 0.0); + assert(x < 1.0); + assert(alpha > 0.0); + assert(beta > 0.0); + return (alpha-1)*std::log(x)+(beta-1)*std::log(1-x) - log_beta_fn(alpha, beta); + } + + // note: this has been adapted so that 0 is in the support of the distribution + // support [0, 1, 2 ...) + static inline F log_yule_simon(unsigned x, const F& rho) { + assert(rho > 0.0); + return std::log(rho) + log_beta_fn(x + 1, rho + 1); + } + + // see http://www.gatsby.ucl.ac.uk/~ywteh/research/compling/hpylm.pdf + // when y=1, sometimes written x^{\overline{n}} or x^{(n)} "Pochhammer symbol" + static inline F log_generalized_factorial(const F& x, const F& n, const F& y = 1.0) { + assert(x > 0.0); + assert(y >= 0.0); + assert(n > 0.0); + if (!n) return 0.0; + if (y == F(1)) { + return lgamma(x + n) - lgamma(x); + } else if (y) { + return n * std::log(y) + lgamma(x/y + n) - lgamma(x/y); + } else { // y == 0.0 + return n * std::log(x); + } + } + +}; + +typedef M Md; +typedef M Mf; + +#endif diff --git a/utils/m_test.cc b/utils/m_test.cc new file mode 100644 index 00000000..fca8f895 --- /dev/null +++ b/utils/m_test.cc @@ -0,0 +1,75 @@ +#include "m.h" + +#include +#include +#include + +using namespace std; + +class MTest : public testing::Test { + public: + MTest() {} + protected: + virtual void SetUp() { } + virtual void TearDown() { } +}; + +TEST_F(MTest, Poisson) { + double prev = 1.0; + double tot = 0; + for (int i = 0; i < 10; ++i) { + double p = Md::log_poisson(i, 0.99); + cerr << "p(i=" << i << ") = " << exp(p) << endl; + EXPECT_LT(p, prev); + tot += exp(p); + prev = p; + } + cerr << " tot=" << tot << endl; + EXPECT_LE(tot, 1.0); +} + +TEST_F(MTest, YuleSimon) { + double prev = 1.0; + double tot = 0; + for (int i = 0; i < 10; ++i) { + double p = Md::log_yule_simon(i, 1.0); + cerr << "p(i=" << i << ") = " << exp(p) << endl; + EXPECT_LT(p, prev); + tot += exp(p); + prev = p; + } + cerr << " tot=" << tot << endl; + EXPECT_LE(tot, 1.0); +} + +TEST_F(MTest, LogGeometric) { + double prev = 1.0; + double tot = 0; + for (int i = 0; i < 10; ++i) { + double p = Md::log_geometric(i, 0.5); + cerr << "p(i=" << i << ") = " << exp(p) << endl; + EXPECT_LT(p, prev); + tot += exp(p); + prev = p; + } + cerr << " tot=" << tot << endl; + EXPECT_LE(tot, 1.0); +} + +TEST_F(MTest, GeneralizedFactorial) { + for (double i = 0.3; i < 10000; i += 0.4) { + double a = Md::log_generalized_factorial(1.0, i); + double b = lgamma(1.0 + i); + EXPECT_FLOAT_EQ(a,b); + } + double gf_3_6 = 3.0 * 4.0 * 5.0 * 6.0 * 7.0 * 8.0; + EXPECT_FLOAT_EQ(Md::log_generalized_factorial(3.0, 6.0), std::log(gf_3_6)); + double gf_314_6 = 3.14 * 4.14 * 5.14 * 6.14 * 7.14 * 8.14; + EXPECT_FLOAT_EQ(Md::log_generalized_factorial(3.14, 6.0), std::log(gf_314_6)); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + diff --git a/utils/mfcr.h b/utils/mfcr.h index 3eb133fc..396d0205 100644 --- a/utils/mfcr.h +++ b/utils/mfcr.h @@ -12,6 +12,7 @@ #include #include "sampler.h" #include "slice_sampler.h" +#include "m.h" struct TableCount { TableCount() : count(), floor() {} @@ -218,31 +219,14 @@ class MFCR { return log_crp_prob(d_, alpha_); } - static double log_beta_density(const double& x, const double& alpha, const double& beta) { - assert(x > 0.0); - assert(x < 1.0); - assert(alpha > 0.0); - assert(beta > 0.0); - const double lp = (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta); - return lp; - } - - static double log_gamma_density(const double& x, const double& shape, const double& rate) { - assert(x >= 0.0); - assert(shape > 0.0); - assert(rate > 0.0); - const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape); - return lp; - } - // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process // does not include draws from G_w's double log_crp_prob(const double& d, const double& alpha) const { double lp = 0.0; if (has_d_prior()) - lp = log_beta_density(d, d_prior_alpha_, d_prior_beta_); + lp = Md::log_beta_density(d, d_prior_alpha_, d_prior_beta_); if (has_alpha_prior()) - lp += log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_); + lp += Md::log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_); assert(lp <= 0.0); if (num_customers_) { if (d > 0.0) { -- cgit v1.2.3