summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-02-08 16:22:55 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2012-02-08 16:22:55 -0500
commit400d60b20e9e480b0eff9843404a4cb9f8bd02cc (patch)
treeb19dad517c1f9a6ed68720be2e88b814f1d66aa5 /utils
parent2c3ee44cea2c46c6c1cdd21bc20568142181937b (diff)
move widely duplicated math functions into m.h header
Diffstat (limited to 'utils')
-rw-r--r--utils/Makefile.am5
-rw-r--r--utils/m.h89
-rw-r--r--utils/m_test.cc75
-rw-r--r--utils/mfcr.h22
4 files changed, 171 insertions, 20 deletions
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 3e559c75..a1ea8270 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -7,11 +7,12 @@ TESTS = ts phmt mfcr_test
if HAVE_GTEST
noinst_PROGRAMS += \
dict_test \
+ m_test \
weights_test \
logval_test \
small_vector_test
-TESTS += small_vector_test logval_test weights_test dict_test
+TESTS += small_vector_test logval_test weights_test dict_test m_test
endif
reconstruct_weights_SOURCES = reconstruct_weights.cc
@@ -38,6 +39,8 @@ endif
phmt_SOURCES = phmt.cc
ts_SOURCES = ts.cc
+m_test_SOURCES = m_test.cc
+m_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS)
dict_test_SOURCES = dict_test.cc
dict_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS)
mfcr_test_SOURCES = mfcr_test.cc
diff --git a/utils/m.h b/utils/m.h
new file mode 100644
index 00000000..b25248c2
--- /dev/null
+++ b/utils/m.h
@@ -0,0 +1,89 @@
+#ifndef _M_H_
+#define _M_H_
+
+#include <cassert>
+#include <cmath>
+
+template <typename F>
+struct M {
+ // support [0, 1, 2 ...)
+ static inline F log_poisson(unsigned x, const F& lambda) {
+ assert(lambda > 0.0);
+ return std::log(lambda) * x - lgamma(x + 1) - lambda;
+ }
+
+ // support [0, 1, 2 ...)
+ static inline F log_geometric(unsigned x, const F& p) {
+ assert(p > 0.0);
+ assert(p < 1.0);
+ return std::log(1 - p) * x + std::log(p);
+ }
+
+ // log of the binomial coefficient
+ static inline F log_binom_coeff(unsigned n, unsigned k) {
+ assert(n >= k);
+ if (n == k) return 0.0;
+ return lgamma(n + 1) - lgamma(k + 1) - lgamma(n - k + 1);
+ }
+
+ // http://en.wikipedia.org/wiki/Negative_binomial_distribution
+ // support [0, 1, 2 ...)
+ static inline F log_negative_binom(unsigned x, unsigned r, const F& p) {
+ assert(p > 0.0);
+ assert(p < 1.0);
+ return log_binom_coeff(x + r - 1u, x) + r * std::log(F(1) - p) + x * std::log(p);
+ }
+
+ // this is the Beta function, *not* the beta probability density
+ // http://mathworld.wolfram.com/BetaFunction.html
+ static inline F log_beta_fn(const F& x, const F& y) {
+ return lgamma(x) + lgamma(y) - lgamma(x + y);
+ }
+
+ // support x >= 0.0
+ static F log_gamma_density(const F& x, const F& shape, const F& rate) {
+ assert(x >= 0.0);
+ assert(shape > 0.0);
+ assert(rate > 0.0);
+ return (shape-1)*std::log(x) - shape*std::log(rate) - x/rate - lgamma(shape);
+ }
+
+ // this is the Beta *density* p(x ; alpha, beta)
+ // support x \in (0,1)
+ static inline F log_beta_density(const F& x, const F& alpha, const F& beta) {
+ assert(x > 0.0);
+ assert(x < 1.0);
+ assert(alpha > 0.0);
+ assert(beta > 0.0);
+ return (alpha-1)*std::log(x)+(beta-1)*std::log(1-x) - log_beta_fn(alpha, beta);
+ }
+
+ // note: this has been adapted so that 0 is in the support of the distribution
+ // support [0, 1, 2 ...)
+ static inline F log_yule_simon(unsigned x, const F& rho) {
+ assert(rho > 0.0);
+ return std::log(rho) + log_beta_fn(x + 1, rho + 1);
+ }
+
+ // see http://www.gatsby.ucl.ac.uk/~ywteh/research/compling/hpylm.pdf
+ // when y=1, sometimes written x^{\overline{n}} or x^{(n)} "Pochhammer symbol"
+ static inline F log_generalized_factorial(const F& x, const F& n, const F& y = 1.0) {
+ assert(x > 0.0);
+ assert(y >= 0.0);
+ assert(n > 0.0);
+ if (!n) return 0.0;
+ if (y == F(1)) {
+ return lgamma(x + n) - lgamma(x);
+ } else if (y) {
+ return n * std::log(y) + lgamma(x/y + n) - lgamma(x/y);
+ } else { // y == 0.0
+ return n * std::log(x);
+ }
+ }
+
+};
+
+typedef M<double> Md;
+typedef M<double> Mf;
+
+#endif
diff --git a/utils/m_test.cc b/utils/m_test.cc
new file mode 100644
index 00000000..fca8f895
--- /dev/null
+++ b/utils/m_test.cc
@@ -0,0 +1,75 @@
+#include "m.h"
+
+#include <iostream>
+#include <gtest/gtest.h>
+#include <cassert>
+
+using namespace std;
+
+class MTest : public testing::Test {
+ public:
+ MTest() {}
+ protected:
+ virtual void SetUp() { }
+ virtual void TearDown() { }
+};
+
+TEST_F(MTest, Poisson) {
+ double prev = 1.0;
+ double tot = 0;
+ for (int i = 0; i < 10; ++i) {
+ double p = Md::log_poisson(i, 0.99);
+ cerr << "p(i=" << i << ") = " << exp(p) << endl;
+ EXPECT_LT(p, prev);
+ tot += exp(p);
+ prev = p;
+ }
+ cerr << " tot=" << tot << endl;
+ EXPECT_LE(tot, 1.0);
+}
+
+TEST_F(MTest, YuleSimon) {
+ double prev = 1.0;
+ double tot = 0;
+ for (int i = 0; i < 10; ++i) {
+ double p = Md::log_yule_simon(i, 1.0);
+ cerr << "p(i=" << i << ") = " << exp(p) << endl;
+ EXPECT_LT(p, prev);
+ tot += exp(p);
+ prev = p;
+ }
+ cerr << " tot=" << tot << endl;
+ EXPECT_LE(tot, 1.0);
+}
+
+TEST_F(MTest, LogGeometric) {
+ double prev = 1.0;
+ double tot = 0;
+ for (int i = 0; i < 10; ++i) {
+ double p = Md::log_geometric(i, 0.5);
+ cerr << "p(i=" << i << ") = " << exp(p) << endl;
+ EXPECT_LT(p, prev);
+ tot += exp(p);
+ prev = p;
+ }
+ cerr << " tot=" << tot << endl;
+ EXPECT_LE(tot, 1.0);
+}
+
+TEST_F(MTest, GeneralizedFactorial) {
+ for (double i = 0.3; i < 10000; i += 0.4) {
+ double a = Md::log_generalized_factorial(1.0, i);
+ double b = lgamma(1.0 + i);
+ EXPECT_FLOAT_EQ(a,b);
+ }
+ double gf_3_6 = 3.0 * 4.0 * 5.0 * 6.0 * 7.0 * 8.0;
+ EXPECT_FLOAT_EQ(Md::log_generalized_factorial(3.0, 6.0), std::log(gf_3_6));
+ double gf_314_6 = 3.14 * 4.14 * 5.14 * 6.14 * 7.14 * 8.14;
+ EXPECT_FLOAT_EQ(Md::log_generalized_factorial(3.14, 6.0), std::log(gf_314_6));
+}
+
+int main(int argc, char** argv) {
+ testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
+
diff --git a/utils/mfcr.h b/utils/mfcr.h
index 3eb133fc..396d0205 100644
--- a/utils/mfcr.h
+++ b/utils/mfcr.h
@@ -12,6 +12,7 @@
#include <boost/functional/hash.hpp>
#include "sampler.h"
#include "slice_sampler.h"
+#include "m.h"
struct TableCount {
TableCount() : count(), floor() {}
@@ -218,31 +219,14 @@ class MFCR {
return log_crp_prob(d_, alpha_);
}
- static double log_beta_density(const double& x, const double& alpha, const double& beta) {
- assert(x > 0.0);
- assert(x < 1.0);
- assert(alpha > 0.0);
- assert(beta > 0.0);
- const double lp = (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta);
- return lp;
- }
-
- static double log_gamma_density(const double& x, const double& shape, const double& rate) {
- assert(x >= 0.0);
- assert(shape > 0.0);
- assert(rate > 0.0);
- const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape);
- return lp;
- }
-
// taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process
// does not include draws from G_w's
double log_crp_prob(const double& d, const double& alpha) const {
double lp = 0.0;
if (has_d_prior())
- lp = log_beta_density(d, d_prior_alpha_, d_prior_beta_);
+ lp = Md::log_beta_density(d, d_prior_alpha_, d_prior_beta_);
if (has_alpha_prior())
- lp += log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_);
+ lp += Md::log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_);
assert(lp <= 0.0);
if (num_customers_) {
if (d > 0.0) {