tie params

author: Chris Dyer <cdyer@cs.cmu.edu> 2012-03-10 01:08:23 -0500
committer: Chris Dyer <cdyer@cs.cmu.edu> 2012-03-10 01:08:23 -0500
commit: 1c92df11360cda4be57183bfb4efa2d62107c651 (patch)
tree: 20e09886710c404239150ba7d73b24a203afeea1
parent: 2e9006a5b153dfe3c0fcedf9f1eaea8866f518a8 (diff)
3 files changed, 99 insertions, 51 deletions
diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc
index 52e6be2c..85635b8f 100644
--- a/gi/pf/pyp_lm.cc
+++ b/gi/pf/pyp_lm.cc
@@ -11,6 +11,7 @@
 #include "tdict.h"
 #include "sampler.h"
 #include "ccrp.h"
+#include "tied_resampler.h"
 
 // A not very memory-efficient implementation of an N-gram LM based on PYPs
 // as described in Y.-W. Teh. (2006) A Hierarchical Bayesian Language Model
@@ -66,7 +67,7 @@ template<> struct PYPLM<0> {
   void increment(WordID, const vector<WordID>&, MT19937*) { ++draws; }
   void decrement(WordID, const vector<WordID>&, MT19937*) { --draws; assert(draws >= 0); }
   double prob(WordID, const vector<WordID>&) const { return p0; }
-  void resample_hyperparameters(MT19937*, const unsigned, const unsigned) {}
+  void resample_hyperparameters(MT19937*) {}
   double log_likelihood() const { return draws * log(p0); }
   const double p0;
   int draws;
@@ -76,16 +77,17 @@ template<> struct PYPLM<0> {
 template <unsigned N> struct PYPLM {
   PYPLM(unsigned vs, double da, double db, double ss, double sr) :
       backoff(vs, da, db, ss, sr),
-      discount_a(da), discount_b(db),
-      strength_s(ss), strength_r(sr),
-      d(0.8), strength(1.0), lookup(N-1) {}
+      tr(da, db, ss, sr, 0.8, 1.0),
+      lookup(N-1) {}
   void increment(WordID w, const vector<WordID>& context, MT19937* rng) {
     const double bo = backoff.prob(w, context);
     for (unsigned i = 0; i < N-1; ++i)
       lookup[i] = context[context.size() - 1 - i];
     typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::iterator it = p.find(lookup);
-    if (it == p.end())
-      it = p.insert(make_pair(lookup, CCRP<WordID>(d,strength))).first;
+    if (it == p.end()) {
+      it = p.insert(make_pair(lookup, CCRP<WordID>(0.5,1))).first;
+      tr.Add(&it->second);  // add to resampler
+    }
     if (it->second.increment(w, bo, rng))
       backoff.increment(w, context, rng);
   }
@@ -107,59 +109,21 @@ template <unsigned N> struct PYPLM {
   }
 
   double log_likelihood() const {
-    return log_likelihood(d, strength) + backoff.log_likelihood();
-  }
-
-  double log_likelihood(const double& dd, const double& aa) const {
-    if (aa <= -dd) return -std::numeric_limits<double>::infinity();
-    //double llh = Md::log_beta_density(dd, 10, 3) + Md::log_gamma_density(aa, 1, 1);
-    double llh = Md::log_beta_density(dd, discount_a, discount_b) +
-                 Md::log_gamma_density(aa + dd, strength_s, strength_r);
+    double llh = backoff.log_likelihood();
     typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::const_iterator it;
     for (it = p.begin(); it != p.end(); ++it)
-      llh += it->second.log_crp_prob(dd, aa);
+      llh += it->second.log_crp_prob();
+    // TODO parametric likelihood from TiedResampler
     return llh;
   }
 
-  struct DiscountResampler {
-    DiscountResampler(const PYPLM& m) : m_(m) {}
-    const PYPLM& m_;
-    double operator()(const double& proposed_discount) const {
-      return m_.log_likelihood(proposed_discount, m_.strength);
-    }
-  };
-
-  struct AlphaResampler {
-    AlphaResampler(const PYPLM& m) : m_(m) {}
-    const PYPLM& m_;
-    double operator()(const double& proposed_strength) const {
-      return m_.log_likelihood(m_.d, proposed_strength);
-    }
-  };
-
-  void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) {
-    DiscountResampler dr(*this);
-    AlphaResampler ar(*this);
-    for (int iter = 0; iter < nloop; ++iter) {
-      strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits<double>::min(),
-                              std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
-      double min_discount = std::numeric_limits<double>::min();
-      if (strength < 0.0) min_discount -= strength;
-      d = slice_sampler1d(dr, d, *rng, min_discount,
-                          1.0, 0.0, niterations, 100*niterations);
-    }
-    strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits<double>::min(),
-                            std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
-    typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::iterator it;
-    cerr << "PYPLM<" << N << ">(d=" << d << ",a=" << strength << ") = " << log_likelihood(d, strength) << endl;
-    for (it = p.begin(); it != p.end(); ++it) {
-      it->second.set_discount(d);
-      it->second.set_strength(strength);
-    }
-    backoff.resample_hyperparameters(rng, nloop, niterations);
+  void resample_hyperparameters(MT19937* rng) {
+    tr.ResampleHyperparameters(rng);
+    backoff.resample_hyperparameters(rng);
   }
 
   PYPLM<N-1> backoff;
+  TiedResampler<CCRP<WordID> > tr;
   double discount_a, discount_b, strength_s, strength_r;
   double d, strength;
   mutable vector<WordID> lookup;  // thread-local
diff --git a/gi/pf/pyp_tm.cc b/gi/pf/pyp_tm.cc
index b5262f47..73104fe9 100644
--- a/gi/pf/pyp_tm.cc
+++ b/gi/pf/pyp_tm.cc
@@ -11,6 +11,8 @@
 #include "ccrp.h"
 #include "pyp_word_model.h"
 
+#include "tied_resampler.h"
+
 using namespace std;
 using namespace std::tr1;
 
diff --git a/gi/pf/tied_resampler.h b/gi/pf/tied_resampler.h
new file mode 100644
index 00000000..208fb9c7
--- /dev/null
+++ b/gi/pf/tied_resampler.h
@@ -0,0 +1,82 @@
+#ifndef _TIED_RESAMPLER_H_
+#define _TIED_RESAMPLER_H_
+
+#include <set>
+#include "sampler.h"
+#include "slice_sampler.h"
+#include "m.h"
+
+template <class CRP>
+struct TiedResampler {
+  explicit TiedResampler(double da, double db, double ss, double sr, double d=0.5, double s=1.0) :
+      d_alpha(da),
+      d_beta(db),
+      s_shape(ss),
+      s_rate(sr),
+      discount(d),
+      strength(s) {}
+
+  void Add(CRP* crp) {
+    crps.insert(crp);
+    crp->set_discount(discount);
+    crp->set_strength(strength);
+    assert(!crp->has_discount_prior());
+    assert(!crp->has_strength_prior());
+  }
+
+  void Remove(CRP* crp) {
+    crps.erase(crp);
+  }
+
+  double LogLikelihood(double d, double s) const {
+    if (s <= -d) return -std::numeric_limits<double>::infinity();
+    double llh = Md::log_beta_density(d, d_alpha, d_beta) +
+                 Md::log_gamma_density(d + s, s_shape, s_rate);
+    for (typename std::set<CRP*>::iterator it = crps.begin(); it != crps.end(); ++it)
+      llh += (*it)->log_crp_prob(d, s);
+    return llh;
+  }
+
+  struct DiscountResampler {
+    DiscountResampler(const TiedResampler& m) : m_(m) {}
+    const TiedResampler& m_;
+    double operator()(const double& proposed_discount) const {
+      return m_.LogLikelihood(proposed_discount, m_.strength);
+    }
+  };
+
+  struct AlphaResampler {
+    AlphaResampler(const TiedResampler& m) : m_(m) {}
+    const TiedResampler& m_;
+    double operator()(const double& proposed_strength) const {
+      return m_.LogLikelihood(m_.discount, proposed_strength);
+    }
+  };
+
+  void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) {
+    const DiscountResampler dr(*this);
+    const AlphaResampler ar(*this);
+    for (int iter = 0; iter < nloop; ++iter) {
+      strength = slice_sampler1d(ar, strength, *rng, -discount + std::numeric_limits<double>::min(),
+                              std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
+      double min_discount = std::numeric_limits<double>::min();
+      if (strength < 0.0) min_discount -= strength;
+      discount = slice_sampler1d(dr, discount, *rng, min_discount,
+                          1.0, 0.0, niterations, 100*niterations);
+    }
+    strength = slice_sampler1d(ar, strength, *rng, -discount + std::numeric_limits<double>::min(),
+                            std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
+    std::cerr << "TiedCRPs(d=" << discount << ",s="
+              << strength << ") = " << LogLikelihood(discount, strength) << std::endl;
+    for (typename std::set<CRP*>::iterator it = crps.begin(); it != crps.end(); ++it) {
+      (*it)->set_discount(discount);
+      (*it)->set_strength(strength);
+    }
+  }
+ private:
+  std::set<CRP*> crps;
+  const double d_alpha, d_beta, s_shape, s_rate;
+  double discount, strength;
+};
+
+#endif
author	Chris Dyer <cdyer@cs.cmu.edu>	2012-03-10 01:08:23 -0500
committer	Chris Dyer <cdyer@cs.cmu.edu>	2012-03-10 01:08:23 -0500
commit	1c92df11360cda4be57183bfb4efa2d62107c651 (patch)
tree	20e09886710c404239150ba7d73b24a203afeea1
parent	2e9006a5b153dfe3c0fcedf9f1eaea8866f518a8 (diff)