From 481a120564fdb73c8c6833e2102acb533683261c Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Fri, 27 Jan 2012 02:31:00 -0500
Subject: migrate mert to the new scorer interface

---
 gi/pf/base_distributions.cc | 241 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 241 insertions(+)
 create mode 100644 gi/pf/base_distributions.cc

(limited to 'gi/pf/base_distributions.cc')
diff --git a/gi/pf/base_distributions.cc b/gi/pf/base_distributions.cc
new file mode 100644
index 00000000..4b1863fa
--- /dev/null
+++ b/gi/pf/base_distributions.cc
@@ -0,0 +1,241 @@
+#include "base_measures.h"
+
+#include <iostream>
+
+#include "filelib.h"
+
+using namespace std;
+
+TableLookupBase::TableLookupBase(const string& fname) {
+  cerr << "TableLookupBase reading from " << fname << " ..." << endl;
+  ReadFile rf(fname);
+  istream& in = *rf.stream();
+  string line;
+  unsigned lc = 0;
+  const WordID kDIV = TD::Convert("|||");
+  vector<WordID> tmp;
+  vector<int> le, lf;
+  TRule x;
+  x.lhs_ = -TD::Convert("X");
+  bool flag = false;
+  while(getline(in, line)) {
+    ++lc;
+    if (lc % 1000000 == 0) { cerr << " [" << lc << ']' << endl; flag = false; }
+    else if (lc % 25000 == 0) { cerr << '.' << flush; flag = true; }
+    tmp.clear();
+    TD::ConvertSentence(line, &tmp);
+    x.f_.clear();
+    x.e_.clear();
+    size_t pos = 0;
+    int cc = 0;
+    while(pos < tmp.size()) {
+      const WordID cur = tmp[pos++];
+      if (cur == kDIV) {
+        ++cc;
+      } else if (cc == 0) {
+        x.f_.push_back(cur);    
+      } else if (cc == 1) {
+        x.e_.push_back(cur);
+      } else if (cc == 2) {
+        table[x].logeq(atof(TD::Convert(cur)));
+        ++cc;
+      } else {
+        if (flag) cerr << endl;
+        cerr << "Bad format in " << lc << ": " << line << endl; abort();
+      }
+    }
+    if (cc != 3) {
+      if (flag) cerr << endl;
+      cerr << "Bad format in " << lc << ": " << line << endl; abort();
+    }
+  }
+  if (flag) cerr << endl;
+  cerr << " read " << lc << " entries\n";
+}
+
+prob_t PhraseConditionalUninformativeUnigramBase::p0(const vector<WordID>& vsrc,
+                                                     const vector<WordID>& vtrg,
+                                                     int start_src, int start_trg) const {
+  const int flen = vsrc.size() - start_src;
+  const int elen = vtrg.size() - start_trg;
+  prob_t p;
+  p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
+  //p.logeq(log_poisson(elen, 1));       // elen | flen          ~Pois(flen + 0.01)
+  for (int i = 0; i < elen; ++i)
+    p *= u(vtrg[i + start_trg]);                        // draw e_i             ~Uniform
+  return p;
+}
+
+prob_t PhraseConditionalUninformativeBase::p0(const vector<WordID>& vsrc,
+                                              const vector<WordID>& vtrg,
+                                              int start_src, int start_trg) const {
+  const int flen = vsrc.size() - start_src;
+  const int elen = vtrg.size() - start_trg;
+  prob_t p;
+  //p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
+  p.logeq(log_poisson(elen, 1));       // elen | flen          ~Pois(flen + 0.01)
+  for (int i = 0; i < elen; ++i)
+    p *= kUNIFORM_TARGET;                        // draw e_i             ~Uniform
+  return p;
+}
+
+void Model1::LoadModel1(const string& fname) {
+  cerr << "Loading Model 1 parameters from " << fname << " ..." << endl;
+  ReadFile rf(fname);
+  istream& in = *rf.stream();
+  string line;
+  unsigned lc = 0;
+  while(getline(in, line)) {
+    ++lc;
+    int cur = 0;
+    int start = 0;
+    while(cur < line.size() && line[cur] != ' ') { ++cur; }
+    assert(cur != line.size());
+    line[cur] = 0;
+    const WordID src = TD::Convert(&line[0]);
+    ++cur;
+    start = cur;
+    while(cur < line.size() && line[cur] != ' ') { ++cur; }
+    assert(cur != line.size());
+    line[cur] = 0;
+    WordID trg = TD::Convert(&line[start]);
+    const double logprob = strtod(&line[cur + 1], NULL);
+    if (src >= ttable.size()) ttable.resize(src + 1);
+    ttable[src][trg].logeq(logprob);
+  }
+  cerr << "  read " << lc << " parameters.\n";
+}
+
+prob_t PhraseConditionalBase::p0(const vector<WordID>& vsrc,
+                                 const vector<WordID>& vtrg,
+                                 int start_src, int start_trg) const {
+  const int flen = vsrc.size() - start_src;
+  const int elen = vtrg.size() - start_trg;
+  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
+  prob_t p;
+  p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
+  for (int i = 0; i < elen; ++i) {               // for each position i in e-RHS
+    const WordID trg = vtrg[i + start_trg];
+    prob_t tp = prob_t::Zero();
+    for (int j = -1; j < flen; ++j) {
+      const WordID src = j < 0 ? 0 : vsrc[j + start_src];
+      tp += kM1MIXTURE * model1(src, trg);
+      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
+    }
+    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform
+    p *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
+  }
+  if (p.is_0()) {
+    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
+    abort();
+  }
+  return p;
+}
+
+prob_t PhraseJointBase::p0(const vector<WordID>& vsrc,
+                           const vector<WordID>& vtrg,
+                           int start_src, int start_trg) const {
+  const int flen = vsrc.size() - start_src;
+  const int elen = vtrg.size() - start_trg;
+  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
+  prob_t p;
+  p.logeq(log_poisson(flen, 1.0));               // flen                 ~Pois(1)
+                                                 // elen | flen          ~Pois(flen + 0.01)
+  prob_t ptrglen; ptrglen.logeq(log_poisson(elen, flen + 0.01));
+  p *= ptrglen;
+  p *= kUNIFORM_SOURCE.pow(flen);                // each f in F ~Uniform
+  for (int i = 0; i < elen; ++i) {               // for each position i in E
+    const WordID trg = vtrg[i + start_trg];
+    prob_t tp = prob_t::Zero();
+    for (int j = -1; j < flen; ++j) {
+      const WordID src = j < 0 ? 0 : vsrc[j + start_src];
+      tp += kM1MIXTURE * model1(src, trg);
+      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
+    }
+    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform
+    p *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
+  }
+  if (p.is_0()) {
+    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
+    abort();
+  }
+  return p;
+}
+
+prob_t PhraseJointBase_BiDir::p0(const vector<WordID>& vsrc,
+                                 const vector<WordID>& vtrg,
+                                 int start_src, int start_trg) const {
+  const int flen = vsrc.size() - start_src;
+  const int elen = vtrg.size() - start_trg;
+  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
+  prob_t uniform_trg_alignment; uniform_trg_alignment.logeq(-log(elen + 1));
+
+  prob_t p1;
+  p1.logeq(log_poisson(flen, 1.0));               // flen                 ~Pois(1)
+                                                 // elen | flen          ~Pois(flen + 0.01)
+  prob_t ptrglen; ptrglen.logeq(log_poisson(elen, flen + 0.01));
+  p1 *= ptrglen;
+  p1 *= kUNIFORM_SOURCE.pow(flen);                // each f in F ~Uniform
+  for (int i = 0; i < elen; ++i) {               // for each position i in E
+    const WordID trg = vtrg[i + start_trg];
+    prob_t tp = prob_t::Zero();
+    for (int j = -1; j < flen; ++j) {
+      const WordID src = j < 0 ? 0 : vsrc[j + start_src];
+      tp += kM1MIXTURE * model1(src, trg);
+      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
+    }
+    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform
+    p1 *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
+  }
+  if (p1.is_0()) {
+    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
+    abort();
+  }
+
+  prob_t p2;
+  p2.logeq(log_poisson(elen, 1.0));               // elen                 ~Pois(1)
+                                                 // flen | elen          ~Pois(flen + 0.01)
+  prob_t psrclen; psrclen.logeq(log_poisson(flen, elen + 0.01));
+  p2 *= psrclen;
+  p2 *= kUNIFORM_TARGET.pow(elen);                // each f in F ~Uniform
+  for (int i = 0; i < flen; ++i) {               // for each position i in E
+    const WordID src = vsrc[i + start_src];
+    prob_t tp = prob_t::Zero();
+    for (int j = -1; j < elen; ++j) {
+      const WordID trg = j < 0 ? 0 : vtrg[j + start_trg];
+      tp += kM1MIXTURE * invmodel1(trg, src);
+      tp += kUNIFORM_MIXTURE * kUNIFORM_SOURCE;
+    }
+    tp *= uniform_trg_alignment;                 //     draw a_i         ~uniform
+    p2 *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
+  }
+  if (p2.is_0()) {
+    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
+    abort();
+  }
+
+  static const prob_t kHALF(0.5);
+  return (p1 + p2) * kHALF;
+}
+
+JumpBase::JumpBase() : p(200) {
+  for (unsigned src_len = 1; src_len < 200; ++src_len) {
+    map<int, prob_t>& cpd = p[src_len];
+    int min_jump = 1 - src_len;
+    int max_jump = src_len;
+    prob_t z;
+    for (int j = min_jump; j <= max_jump; ++j) {
+      prob_t& cp = cpd[j];
+      if (j < 0)
+        cp.logeq(log_poisson(1.5-j, 1));
+      else if (j > 0)
+        cp.logeq(log_poisson(j, 1));
+      cp.poweq(0.2);
+      z += cp;
+    }
+    for (int j = min_jump; j <= max_jump; ++j) {
+      cpd[j] /= z;
+    }
+  }
+}
+
-- 
cgit v1.2.3


From e5831933364a4cab5084db49281a855baea8e09c Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Thu, 2 Feb 2012 12:33:13 -0500
Subject: fix broken build

---
 gi/pf/Makefile.am           | 2 +-
 gi/pf/base_distributions.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'gi/pf/base_distributions.cc')

diff --git a/gi/pf/Makefile.am b/gi/pf/Makefile.am
index 28367e67..8d43f36d 100644
--- a/gi/pf/Makefile.am
+++ b/gi/pf/Makefile.am
@@ -1,7 +1,7 @@
 bin_PROGRAMS = cbgi brat dpnaive pfbrat pfdist itg pfnaive condnaive align-lexonly align-lexonly-pyp
 
 noinst_LIBRARIES = libpf.a
-libpf_a_SOURCES = base_measures.cc reachability.cc cfg_wfst_composer.cc corpus.cc unigrams.cc ngram_base.cc
+libpf_a_SOURCES = base_distributions.cc reachability.cc cfg_wfst_composer.cc corpus.cc unigrams.cc ngram_base.cc
 
 align_lexonly_SOURCES = align-lexonly.cc
 
diff --git a/gi/pf/base_distributions.cc b/gi/pf/base_distributions.cc
index 4b1863fa..d362fd76 100644
--- a/gi/pf/base_distributions.cc
+++ b/gi/pf/base_distributions.cc
@@ -1,4 +1,4 @@
-#include "base_measures.h"
+#include "base_distributions.h"
 
 #include <iostream>
 
-- 
cgit v1.2.3


From 648fd70ec05997003e801e113d825c84e55e01ca Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Wed, 8 Feb 2012 16:22:55 -0500
Subject: move widely duplicated math functions into m.h header

---
 .gitignore                     |  1 +
 gi/pf/base_distributions.cc    | 22 +++++------
 gi/pf/base_distributions.h     | 21 +---------
 gi/pf/conditional_pseg.h       |  3 +-
 gi/pf/pfdist.cc                |  6 +--
 gi/pf/pfnaive.cc               |  4 +-
 phrasinator/gibbs_train_plm.cc |  8 +---
 utils/Makefile.am              |  5 ++-
 utils/m.h                      | 89 ++++++++++++++++++++++++++++++++++++++++++
 utils/m_test.cc                | 75 +++++++++++++++++++++++++++++++++++
 utils/mfcr.h                   | 22 ++---------
 11 files changed, 194 insertions(+), 62 deletions(-)
 create mode 100644 utils/m.h
 create mode 100644 utils/m_test.cc

(limited to 'gi/pf/base_distributions.cc')

diff --git a/.gitignore b/.gitignore
index ab8bf2c7..4f75d153 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 mira/kbest_mira
+utils/m_test
 sa-extract/calignment.c
 sa-extract/calignment.so
 sa-extract/cdat.c
diff --git a/gi/pf/base_distributions.cc b/gi/pf/base_distributions.cc
index d362fd76..d9761005 100644
--- a/gi/pf/base_distributions.cc
+++ b/gi/pf/base_distributions.cc
@@ -59,7 +59,7 @@ prob_t PhraseConditionalUninformativeUnigramBase::p0(const vector<WordID>& vsrc,
   const int flen = vsrc.size() - start_src;
   const int elen = vtrg.size() - start_trg;
   prob_t p;
-  p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
+  p.logeq(Md::log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
   //p.logeq(log_poisson(elen, 1));       // elen | flen          ~Pois(flen + 0.01)
   for (int i = 0; i < elen; ++i)
     p *= u(vtrg[i + start_trg]);                        // draw e_i             ~Uniform
@@ -73,7 +73,7 @@ prob_t PhraseConditionalUninformativeBase::p0(const vector<WordID>& vsrc,
   const int elen = vtrg.size() - start_trg;
   prob_t p;
   //p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
-  p.logeq(log_poisson(elen, 1));       // elen | flen          ~Pois(flen + 0.01)
+  p.logeq(Md::log_poisson(elen, 1));       // elen | flen          ~Pois(flen + 0.01)
   for (int i = 0; i < elen; ++i)
     p *= kUNIFORM_TARGET;                        // draw e_i             ~Uniform
   return p;
@@ -113,7 +113,7 @@ prob_t PhraseConditionalBase::p0(const vector<WordID>& vsrc,
   const int elen = vtrg.size() - start_trg;
   prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
   prob_t p;
-  p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
+  p.logeq(Md::log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
   for (int i = 0; i < elen; ++i) {               // for each position i in e-RHS
     const WordID trg = vtrg[i + start_trg];
     prob_t tp = prob_t::Zero();
@@ -139,9 +139,9 @@ prob_t PhraseJointBase::p0(const vector<WordID>& vsrc,
   const int elen = vtrg.size() - start_trg;
   prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
   prob_t p;
-  p.logeq(log_poisson(flen, 1.0));               // flen                 ~Pois(1)
+  p.logeq(Md::log_poisson(flen, 1.0));               // flen                 ~Pois(1)
                                                  // elen | flen          ~Pois(flen + 0.01)
-  prob_t ptrglen; ptrglen.logeq(log_poisson(elen, flen + 0.01));
+  prob_t ptrglen; ptrglen.logeq(Md::log_poisson(elen, flen + 0.01));
   p *= ptrglen;
   p *= kUNIFORM_SOURCE.pow(flen);                // each f in F ~Uniform
   for (int i = 0; i < elen; ++i) {               // for each position i in E
@@ -171,9 +171,9 @@ prob_t PhraseJointBase_BiDir::p0(const vector<WordID>& vsrc,
   prob_t uniform_trg_alignment; uniform_trg_alignment.logeq(-log(elen + 1));
 
   prob_t p1;
-  p1.logeq(log_poisson(flen, 1.0));               // flen                 ~Pois(1)
+  p1.logeq(Md::log_poisson(flen, 1.0));               // flen                 ~Pois(1)
                                                  // elen | flen          ~Pois(flen + 0.01)
-  prob_t ptrglen; ptrglen.logeq(log_poisson(elen, flen + 0.01));
+  prob_t ptrglen; ptrglen.logeq(Md::log_poisson(elen, flen + 0.01));
   p1 *= ptrglen;
   p1 *= kUNIFORM_SOURCE.pow(flen);                // each f in F ~Uniform
   for (int i = 0; i < elen; ++i) {               // for each position i in E
@@ -193,9 +193,9 @@ prob_t PhraseJointBase_BiDir::p0(const vector<WordID>& vsrc,
   }
 
   prob_t p2;
-  p2.logeq(log_poisson(elen, 1.0));               // elen                 ~Pois(1)
+  p2.logeq(Md::log_poisson(elen, 1.0));               // elen                 ~Pois(1)
                                                  // flen | elen          ~Pois(flen + 0.01)
-  prob_t psrclen; psrclen.logeq(log_poisson(flen, elen + 0.01));
+  prob_t psrclen; psrclen.logeq(Md::log_poisson(flen, elen + 0.01));
   p2 *= psrclen;
   p2 *= kUNIFORM_TARGET.pow(elen);                // each f in F ~Uniform
   for (int i = 0; i < flen; ++i) {               // for each position i in E
@@ -227,9 +227,9 @@ JumpBase::JumpBase() : p(200) {
     for (int j = min_jump; j <= max_jump; ++j) {
       prob_t& cp = cpd[j];
       if (j < 0)
-        cp.logeq(log_poisson(1.5-j, 1));
+        cp.logeq(Md::log_poisson(1.5-j, 1));
       else if (j > 0)
-        cp.logeq(log_poisson(j, 1));
+        cp.logeq(Md::log_poisson(j, 1));
       cp.poweq(0.2);
       z += cp;
     }
diff --git a/gi/pf/base_distributions.h b/gi/pf/base_distributions.h
index a23ac32b..0d597c5c 100644
--- a/gi/pf/base_distributions.h
+++ b/gi/pf/base_distributions.h
@@ -13,24 +13,7 @@
 #include "prob.h"
 #include "tdict.h"
 #include "sampler.h"
-
-inline double log_poisson(unsigned x, const double& lambda) {
-  assert(lambda > 0.0);
-  return log(lambda) * x - lgamma(x + 1) - lambda;
-}
-
-inline double log_binom_coeff(unsigned n, unsigned k) {
-  assert(n >= k);
-  if (n == k) return 0.0;
-  return lgamma(n + 1) - lgamma(k + 1) - lgamma(n - k + 1);
-}
-
-// http://en.wikipedia.org/wiki/Negative_binomial_distribution
-inline double log_negative_binom(unsigned x, unsigned r, double p) {
-  assert(p > 0.0);
-  assert(p < 1.0);
-  return log_binom_coeff(x + r - 1, x) + r * log(1 - p) + x * log(p);
-}
+#include "m.h"
 
 inline std::ostream& operator<<(std::ostream& os, const std::vector<WordID>& p) {
   os << '[';
@@ -68,7 +51,7 @@ struct Model1 {
 struct PoissonUniformUninformativeBase {
   explicit PoissonUniformUninformativeBase(const unsigned ves) : kUNIFORM(1.0 / ves) {}
   prob_t operator()(const TRule& r) const {
-    prob_t p; p.logeq(log_poisson(r.e_.size(), 1.0));
+    prob_t p; p.logeq(Md::log_poisson(r.e_.size(), 1.0));
     prob_t q = kUNIFORM; q.poweq(r.e_.size());
     p *= q;
     return p;
diff --git a/gi/pf/conditional_pseg.h b/gi/pf/conditional_pseg.h
index 0aa5e8e0..2e9e38fc 100644
--- a/gi/pf/conditional_pseg.h
+++ b/gi/pf/conditional_pseg.h
@@ -6,6 +6,7 @@
 #include <boost/functional/hash.hpp>
 #include <iostream>
 
+#include "m.h"
 #include "prob.h"
 #include "ccrp_nt.h"
 #include "mfcr.h"
@@ -210,7 +211,7 @@ struct ConditionalParallelSegementationModel {
 
   prob_t AlignProbability(unsigned span) const {
     prob_t p;
-    p.logeq(aligns.logprob(span, log_poisson(span, 1.0)));
+    p.logeq(aligns.logprob(span, Md::log_poisson(span, 1.0)));
     return p;
   }
 
diff --git a/gi/pf/pfdist.cc b/gi/pf/pfdist.cc
index ef08a165..3d578db2 100644
--- a/gi/pf/pfdist.cc
+++ b/gi/pf/pfdist.cc
@@ -315,7 +315,7 @@ struct BackwardEstimate {
       for (int i = 0; i < src_cov.size(); ++i)
         if (!src_cov[i]) r.push_back(src_[i]);
       const prob_t uniform_alignment(1.0 / r.size());
-      e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining)
+      e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining)
       for (unsigned j = trg_cov; j < trg_.size(); ++j) {
         prob_t p;
         for (unsigned i = 0; i < r.size(); ++i)
@@ -352,7 +352,7 @@ struct BackwardEstimateSym {
         if (!src_cov[i]) r.push_back(src_[i]);
       r.push_back(0);  // NULL word
       const prob_t uniform_alignment(1.0 / r.size());
-      e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining)
+      e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining)
       for (unsigned j = trg_cov; j < trg_.size(); ++j) {
         prob_t p;
         for (unsigned i = 0; i < r.size(); ++i)
@@ -367,7 +367,7 @@ struct BackwardEstimateSym {
       r.pop_back();
       const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0));
       prob_t inv;
-      inv.logeq(log_poisson(r.size(), trg_.size() - trg_cov));
+      inv.logeq(Md::log_poisson(r.size(), trg_.size() - trg_cov));
       for (unsigned i = 0; i < r.size(); ++i) {
         prob_t p;
         for (unsigned j = trg_cov - 1; j < trg_.size(); ++j)
diff --git a/gi/pf/pfnaive.cc b/gi/pf/pfnaive.cc
index acba9d22..e1a53f5c 100644
--- a/gi/pf/pfnaive.cc
+++ b/gi/pf/pfnaive.cc
@@ -77,7 +77,7 @@ struct BackwardEstimateSym {
         r.push_back(src_[i]);
       r.push_back(0);  // NULL word
       const prob_t uniform_alignment(1.0 / r.size());
-      e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining)
+      e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining)
       for (unsigned j = trg_cov; j < trg_.size(); ++j) {
         prob_t p;
         for (unsigned i = 0; i < r.size(); ++i)
@@ -92,7 +92,7 @@ struct BackwardEstimateSym {
       r.pop_back();
       const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0));
       prob_t inv;
-      inv.logeq(log_poisson(r.size(), trg_.size() - trg_cov));
+      inv.logeq(Md::log_poisson(r.size(), trg_.size() - trg_cov));
       for (unsigned i = 0; i < r.size(); ++i) {
         prob_t p;
         for (unsigned j = trg_cov - 1; j < trg_.size(); ++j)
diff --git a/phrasinator/gibbs_train_plm.cc b/phrasinator/gibbs_train_plm.cc
index 29b3d7ea..66b46011 100644
--- a/phrasinator/gibbs_train_plm.cc
+++ b/phrasinator/gibbs_train_plm.cc
@@ -8,6 +8,7 @@
 #include "dict.h"
 #include "sampler.h"
 #include "ccrp.h"
+#include "m.h"
 
 using namespace std;
 using namespace std::tr1;
@@ -95,11 +96,6 @@ void ReadCorpus(const string& filename, vector<vector<int> >* c, set<int>* vocab
   if (in != &cin) delete in;
 }
 
-double log_poisson(unsigned x, const double& lambda) {
-  assert(lambda > 0.0);
-  return log(lambda) * x - lgamma(x + 1) - lambda;
-}
-
 struct UniphraseLM {
   UniphraseLM(const vector<vector<int> >& corpus,
               const set<int>& vocab,
@@ -128,7 +124,7 @@ struct UniphraseLM {
   double log_p0(const vector<int>& phrase) const {
     double len_logprob;
     if (use_poisson_)
-      len_logprob = log_poisson(phrase.size(), 1.0);
+      len_logprob = Md::log_poisson(phrase.size(), 1.0);
     else
       len_logprob = log(1 - p_end_) * (phrase.size() -1) + log(p_end_);
     return log(uniform_word_) * phrase.size() + len_logprob;
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 3e559c75..a1ea8270 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -7,11 +7,12 @@ TESTS = ts phmt mfcr_test
 if HAVE_GTEST
 noinst_PROGRAMS += \
   dict_test \
+  m_test \
   weights_test \
   logval_test \
   small_vector_test
 
-TESTS += small_vector_test logval_test weights_test dict_test
+TESTS += small_vector_test logval_test weights_test dict_test m_test
 endif
 
 reconstruct_weights_SOURCES = reconstruct_weights.cc
@@ -38,6 +39,8 @@ endif
 
 phmt_SOURCES = phmt.cc
 ts_SOURCES = ts.cc
+m_test_SOURCES = m_test.cc
+m_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS)
 dict_test_SOURCES = dict_test.cc
 dict_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS)
 mfcr_test_SOURCES = mfcr_test.cc
diff --git a/utils/m.h b/utils/m.h
new file mode 100644
index 00000000..b25248c2
--- /dev/null
+++ b/utils/m.h
@@ -0,0 +1,89 @@
+#ifndef _M_H_
+#define _M_H_
+
+#include <cassert>
+#include <cmath>
+
+template <typename F>
+struct M {
+  // support [0, 1, 2 ...)
+  static inline F log_poisson(unsigned x, const F& lambda) {
+    assert(lambda > 0.0);
+    return std::log(lambda) * x - lgamma(x + 1) - lambda;
+  }
+
+  // support [0, 1, 2 ...)
+  static inline F log_geometric(unsigned x, const F& p) {
+    assert(p > 0.0);
+    assert(p < 1.0);
+    return std::log(1 - p) * x + std::log(p);
+  }
+
+  // log of the binomial coefficient
+  static inline F log_binom_coeff(unsigned n, unsigned k) {
+    assert(n >= k);
+    if (n == k) return 0.0;
+    return lgamma(n + 1) - lgamma(k + 1) - lgamma(n - k + 1);
+  }
+
+  // http://en.wikipedia.org/wiki/Negative_binomial_distribution
+  // support [0, 1, 2 ...)
+  static inline F log_negative_binom(unsigned x, unsigned r, const F& p) {
+    assert(p > 0.0);
+    assert(p < 1.0);
+    return log_binom_coeff(x + r - 1u, x) + r * std::log(F(1) - p) + x * std::log(p);
+  }
+
+  // this is the Beta function, *not* the beta probability density
+  // http://mathworld.wolfram.com/BetaFunction.html
+  static inline F log_beta_fn(const F& x, const F& y) {
+    return lgamma(x) + lgamma(y) - lgamma(x + y);
+  }
+
+  // support x >= 0.0
+  static F log_gamma_density(const F& x, const F& shape, const F& rate) {
+    assert(x >= 0.0);
+    assert(shape > 0.0);
+    assert(rate > 0.0);
+    return (shape-1)*std::log(x) - shape*std::log(rate) - x/rate - lgamma(shape);
+  }
+
+  // this is the Beta *density* p(x ; alpha, beta)
+  // support x \in (0,1)
+  static inline F log_beta_density(const F& x, const F& alpha, const F& beta) {
+    assert(x > 0.0);
+    assert(x < 1.0);
+    assert(alpha > 0.0);
+    assert(beta > 0.0);
+    return (alpha-1)*std::log(x)+(beta-1)*std::log(1-x) - log_beta_fn(alpha, beta);
+  }
+
+  // note: this has been adapted so that 0 is in the support of the distribution
+  // support [0, 1, 2 ...)
+  static inline F log_yule_simon(unsigned x, const F& rho) {
+    assert(rho > 0.0);
+    return std::log(rho) + log_beta_fn(x + 1, rho + 1);
+  }
+
+  // see http://www.gatsby.ucl.ac.uk/~ywteh/research/compling/hpylm.pdf
+  // when y=1, sometimes written x^{\overline{n}} or x^{(n)} "Pochhammer symbol"
+  static inline F log_generalized_factorial(const F& x, const F& n, const F& y = 1.0) {
+    assert(x > 0.0);
+    assert(y >= 0.0);
+    assert(n > 0.0);
+    if (!n) return 0.0;
+    if (y == F(1)) {
+      return lgamma(x + n) - lgamma(x);
+    } else if (y) {
+      return n * std::log(y) + lgamma(x/y + n) - lgamma(x/y);
+    } else {  // y == 0.0
+      return n * std::log(x);
+    }
+  }
+
+};
+
+typedef M<double> Md;
+typedef M<double> Mf;
+
+#endif
diff --git a/utils/m_test.cc b/utils/m_test.cc
new file mode 100644
index 00000000..fca8f895
--- /dev/null
+++ b/utils/m_test.cc
@@ -0,0 +1,75 @@
+#include "m.h"
+
+#include <iostream>
+#include <gtest/gtest.h>
+#include <cassert>
+
+using namespace std;
+
+class MTest : public testing::Test {
+ public:
+  MTest() {}
+ protected:
+  virtual void SetUp() { }
+  virtual void TearDown() { }
+};
+
+TEST_F(MTest, Poisson) {
+  double prev = 1.0;
+  double tot = 0;
+  for (int i = 0; i < 10; ++i) {
+    double p = Md::log_poisson(i, 0.99);
+    cerr << "p(i=" << i << ") = " << exp(p) << endl;
+    EXPECT_LT(p, prev);
+    tot += exp(p);
+    prev = p;
+  }
+  cerr << "  tot=" << tot << endl;
+  EXPECT_LE(tot, 1.0);
+}
+
+TEST_F(MTest, YuleSimon) {
+  double prev = 1.0;
+  double tot = 0;
+  for (int i = 0; i < 10; ++i) {
+    double p = Md::log_yule_simon(i, 1.0);
+    cerr << "p(i=" << i << ") = " << exp(p) << endl;
+    EXPECT_LT(p, prev);
+    tot += exp(p);
+    prev = p;
+  }
+  cerr << "  tot=" << tot << endl;
+  EXPECT_LE(tot, 1.0);
+}
+
+TEST_F(MTest, LogGeometric) {
+  double prev = 1.0;
+  double tot = 0;
+  for (int i = 0; i < 10; ++i) {
+    double p = Md::log_geometric(i, 0.5);
+    cerr << "p(i=" << i << ") = " << exp(p) << endl;
+    EXPECT_LT(p, prev);
+    tot += exp(p);
+    prev = p;
+  }
+  cerr << "  tot=" << tot << endl;
+  EXPECT_LE(tot, 1.0);
+}
+
+TEST_F(MTest, GeneralizedFactorial) {
+  for (double i = 0.3; i < 10000; i += 0.4) {
+    double a = Md::log_generalized_factorial(1.0, i);
+    double b = lgamma(1.0 + i);
+    EXPECT_FLOAT_EQ(a,b);
+  }
+  double gf_3_6 = 3.0 * 4.0 * 5.0 * 6.0 * 7.0 * 8.0;
+  EXPECT_FLOAT_EQ(Md::log_generalized_factorial(3.0, 6.0), std::log(gf_3_6));
+  double gf_314_6 = 3.14 * 4.14 * 5.14 * 6.14 * 7.14 * 8.14;
+  EXPECT_FLOAT_EQ(Md::log_generalized_factorial(3.14, 6.0), std::log(gf_314_6));
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
+
diff --git a/utils/mfcr.h b/utils/mfcr.h
index 3eb133fc..396d0205 100644
--- a/utils/mfcr.h
+++ b/utils/mfcr.h
@@ -12,6 +12,7 @@
 #include <boost/functional/hash.hpp>
 #include "sampler.h"
 #include "slice_sampler.h"
+#include "m.h"
 
 struct TableCount {
   TableCount() : count(), floor() {}
@@ -218,31 +219,14 @@ class MFCR {
     return log_crp_prob(d_, alpha_);
   }
 
-  static double log_beta_density(const double& x, const double& alpha, const double& beta) {
-    assert(x > 0.0);
-    assert(x < 1.0);
-    assert(alpha > 0.0);
-    assert(beta > 0.0);
-    const double lp = (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta);
-    return lp;
-  }
-
-  static double log_gamma_density(const double& x, const double& shape, const double& rate) {
-    assert(x >= 0.0);
-    assert(shape > 0.0);
-    assert(rate > 0.0);
-    const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape);
-    return lp;
-  }
-
   // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process
   // does not include draws from G_w's
   double log_crp_prob(const double& d, const double& alpha) const {
     double lp = 0.0;
     if (has_d_prior())
-      lp = log_beta_density(d, d_prior_alpha_, d_prior_beta_);
+      lp = Md::log_beta_density(d, d_prior_alpha_, d_prior_beta_);
     if (has_alpha_prior())
-      lp += log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_);
+      lp += Md::log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_);
     assert(lp <= 0.0);
     if (num_customers_) {
       if (d > 0.0) {
-- 
cgit v1.2.3