summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-03-10 01:08:23 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2012-03-10 01:08:23 -0500
commit1c92df11360cda4be57183bfb4efa2d62107c651 (patch)
tree20e09886710c404239150ba7d73b24a203afeea1
parent2e9006a5b153dfe3c0fcedf9f1eaea8866f518a8 (diff)
tie params
-rw-r--r--gi/pf/pyp_lm.cc66
-rw-r--r--gi/pf/pyp_tm.cc2
-rw-r--r--gi/pf/tied_resampler.h82
3 files changed, 99 insertions, 51 deletions
diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc
index 52e6be2c..85635b8f 100644
--- a/gi/pf/pyp_lm.cc
+++ b/gi/pf/pyp_lm.cc
@@ -11,6 +11,7 @@
#include "tdict.h"
#include "sampler.h"
#include "ccrp.h"
+#include "tied_resampler.h"
// A not very memory-efficient implementation of an N-gram LM based on PYPs
// as described in Y.-W. Teh. (2006) A Hierarchical Bayesian Language Model
@@ -66,7 +67,7 @@ template<> struct PYPLM<0> {
void increment(WordID, const vector<WordID>&, MT19937*) { ++draws; }
void decrement(WordID, const vector<WordID>&, MT19937*) { --draws; assert(draws >= 0); }
double prob(WordID, const vector<WordID>&) const { return p0; }
- void resample_hyperparameters(MT19937*, const unsigned, const unsigned) {}
+ void resample_hyperparameters(MT19937*) {}
double log_likelihood() const { return draws * log(p0); }
const double p0;
int draws;
@@ -76,16 +77,17 @@ template<> struct PYPLM<0> {
template <unsigned N> struct PYPLM {
PYPLM(unsigned vs, double da, double db, double ss, double sr) :
backoff(vs, da, db, ss, sr),
- discount_a(da), discount_b(db),
- strength_s(ss), strength_r(sr),
- d(0.8), strength(1.0), lookup(N-1) {}
+ tr(da, db, ss, sr, 0.8, 1.0),
+ lookup(N-1) {}
void increment(WordID w, const vector<WordID>& context, MT19937* rng) {
const double bo = backoff.prob(w, context);
for (unsigned i = 0; i < N-1; ++i)
lookup[i] = context[context.size() - 1 - i];
typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::iterator it = p.find(lookup);
- if (it == p.end())
- it = p.insert(make_pair(lookup, CCRP<WordID>(d,strength))).first;
+ if (it == p.end()) {
+ it = p.insert(make_pair(lookup, CCRP<WordID>(0.5,1))).first;
+ tr.Add(&it->second); // add to resampler
+ }
if (it->second.increment(w, bo, rng))
backoff.increment(w, context, rng);
}
@@ -107,59 +109,21 @@ template <unsigned N> struct PYPLM {
}
double log_likelihood() const {
- return log_likelihood(d, strength) + backoff.log_likelihood();
- }
-
- double log_likelihood(const double& dd, const double& aa) const {
- if (aa <= -dd) return -std::numeric_limits<double>::infinity();
- //double llh = Md::log_beta_density(dd, 10, 3) + Md::log_gamma_density(aa, 1, 1);
- double llh = Md::log_beta_density(dd, discount_a, discount_b) +
- Md::log_gamma_density(aa + dd, strength_s, strength_r);
+ double llh = backoff.log_likelihood();
typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::const_iterator it;
for (it = p.begin(); it != p.end(); ++it)
- llh += it->second.log_crp_prob(dd, aa);
+ llh += it->second.log_crp_prob();
+ // TODO parametric likelihood from TiedResampler
return llh;
}
- struct DiscountResampler {
- DiscountResampler(const PYPLM& m) : m_(m) {}
- const PYPLM& m_;
- double operator()(const double& proposed_discount) const {
- return m_.log_likelihood(proposed_discount, m_.strength);
- }
- };
-
- struct AlphaResampler {
- AlphaResampler(const PYPLM& m) : m_(m) {}
- const PYPLM& m_;
- double operator()(const double& proposed_strength) const {
- return m_.log_likelihood(m_.d, proposed_strength);
- }
- };
-
- void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) {
- DiscountResampler dr(*this);
- AlphaResampler ar(*this);
- for (int iter = 0; iter < nloop; ++iter) {
- strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits<double>::min(),
- std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
- double min_discount = std::numeric_limits<double>::min();
- if (strength < 0.0) min_discount -= strength;
- d = slice_sampler1d(dr, d, *rng, min_discount,
- 1.0, 0.0, niterations, 100*niterations);
- }
- strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits<double>::min(),
- std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
- typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::iterator it;
- cerr << "PYPLM<" << N << ">(d=" << d << ",a=" << strength << ") = " << log_likelihood(d, strength) << endl;
- for (it = p.begin(); it != p.end(); ++it) {
- it->second.set_discount(d);
- it->second.set_strength(strength);
- }
- backoff.resample_hyperparameters(rng, nloop, niterations);
+ void resample_hyperparameters(MT19937* rng) {
+ tr.ResampleHyperparameters(rng);
+ backoff.resample_hyperparameters(rng);
}
PYPLM<N-1> backoff;
+ TiedResampler<CCRP<WordID> > tr;
double discount_a, discount_b, strength_s, strength_r;
double d, strength;
mutable vector<WordID> lookup; // thread-local
diff --git a/gi/pf/pyp_tm.cc b/gi/pf/pyp_tm.cc
index b5262f47..73104fe9 100644
--- a/gi/pf/pyp_tm.cc
+++ b/gi/pf/pyp_tm.cc
@@ -11,6 +11,8 @@
#include "ccrp.h"
#include "pyp_word_model.h"
+#include "tied_resampler.h"
+
using namespace std;
using namespace std::tr1;
diff --git a/gi/pf/tied_resampler.h b/gi/pf/tied_resampler.h
new file mode 100644
index 00000000..208fb9c7
--- /dev/null
+++ b/gi/pf/tied_resampler.h
@@ -0,0 +1,82 @@
+#ifndef _TIED_RESAMPLER_H_
+#define _TIED_RESAMPLER_H_
+
+#include <set>
+#include "sampler.h"
+#include "slice_sampler.h"
+#include "m.h"
+
+template <class CRP>
+struct TiedResampler {
+ explicit TiedResampler(double da, double db, double ss, double sr, double d=0.5, double s=1.0) :
+ d_alpha(da),
+ d_beta(db),
+ s_shape(ss),
+ s_rate(sr),
+ discount(d),
+ strength(s) {}
+
+ void Add(CRP* crp) {
+ crps.insert(crp);
+ crp->set_discount(discount);
+ crp->set_strength(strength);
+ assert(!crp->has_discount_prior());
+ assert(!crp->has_strength_prior());
+ }
+
+ void Remove(CRP* crp) {
+ crps.erase(crp);
+ }
+
+ double LogLikelihood(double d, double s) const {
+ if (s <= -d) return -std::numeric_limits<double>::infinity();
+ double llh = Md::log_beta_density(d, d_alpha, d_beta) +
+ Md::log_gamma_density(d + s, s_shape, s_rate);
+ for (typename std::set<CRP*>::iterator it = crps.begin(); it != crps.end(); ++it)
+ llh += (*it)->log_crp_prob(d, s);
+ return llh;
+ }
+
+ struct DiscountResampler {
+ DiscountResampler(const TiedResampler& m) : m_(m) {}
+ const TiedResampler& m_;
+ double operator()(const double& proposed_discount) const {
+ return m_.LogLikelihood(proposed_discount, m_.strength);
+ }
+ };
+
+ struct AlphaResampler {
+ AlphaResampler(const TiedResampler& m) : m_(m) {}
+ const TiedResampler& m_;
+ double operator()(const double& proposed_strength) const {
+ return m_.LogLikelihood(m_.discount, proposed_strength);
+ }
+ };
+
+ void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) {
+ const DiscountResampler dr(*this);
+ const AlphaResampler ar(*this);
+ for (int iter = 0; iter < nloop; ++iter) {
+ strength = slice_sampler1d(ar, strength, *rng, -discount + std::numeric_limits<double>::min(),
+ std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
+ double min_discount = std::numeric_limits<double>::min();
+ if (strength < 0.0) min_discount -= strength;
+ discount = slice_sampler1d(dr, discount, *rng, min_discount,
+ 1.0, 0.0, niterations, 100*niterations);
+ }
+ strength = slice_sampler1d(ar, strength, *rng, -discount + std::numeric_limits<double>::min(),
+ std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
+ std::cerr << "TiedCRPs(d=" << discount << ",s="
+ << strength << ") = " << LogLikelihood(discount, strength) << std::endl;
+ for (typename std::set<CRP*>::iterator it = crps.begin(); it != crps.end(); ++it) {
+ (*it)->set_discount(discount);
+ (*it)->set_strength(strength);
+ }
+ }
+ private:
+ std::set<CRP*> crps;
+ const double d_alpha, d_beta, s_shape, s_rate;
+ double discount, strength;
+};
+
+#endif