diff options
-rw-r--r-- | gi/pf/pyp_lm.cc | 66 | ||||
-rw-r--r-- | gi/pf/pyp_tm.cc | 2 | ||||
-rw-r--r-- | gi/pf/tied_resampler.h | 82 |
3 files changed, 99 insertions, 51 deletions
diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc index 52e6be2c..85635b8f 100644 --- a/gi/pf/pyp_lm.cc +++ b/gi/pf/pyp_lm.cc @@ -11,6 +11,7 @@ #include "tdict.h" #include "sampler.h" #include "ccrp.h" +#include "tied_resampler.h" // A not very memory-efficient implementation of an N-gram LM based on PYPs // as described in Y.-W. Teh. (2006) A Hierarchical Bayesian Language Model @@ -66,7 +67,7 @@ template<> struct PYPLM<0> { void increment(WordID, const vector<WordID>&, MT19937*) { ++draws; } void decrement(WordID, const vector<WordID>&, MT19937*) { --draws; assert(draws >= 0); } double prob(WordID, const vector<WordID>&) const { return p0; } - void resample_hyperparameters(MT19937*, const unsigned, const unsigned) {} + void resample_hyperparameters(MT19937*) {} double log_likelihood() const { return draws * log(p0); } const double p0; int draws; @@ -76,16 +77,17 @@ template<> struct PYPLM<0> { template <unsigned N> struct PYPLM { PYPLM(unsigned vs, double da, double db, double ss, double sr) : backoff(vs, da, db, ss, sr), - discount_a(da), discount_b(db), - strength_s(ss), strength_r(sr), - d(0.8), strength(1.0), lookup(N-1) {} + tr(da, db, ss, sr, 0.8, 1.0), + lookup(N-1) {} void increment(WordID w, const vector<WordID>& context, MT19937* rng) { const double bo = backoff.prob(w, context); for (unsigned i = 0; i < N-1; ++i) lookup[i] = context[context.size() - 1 - i]; typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::iterator it = p.find(lookup); - if (it == p.end()) - it = p.insert(make_pair(lookup, CCRP<WordID>(d,strength))).first; + if (it == p.end()) { + it = p.insert(make_pair(lookup, CCRP<WordID>(0.5,1))).first; + tr.Add(&it->second); // add to resampler + } if (it->second.increment(w, bo, rng)) backoff.increment(w, context, rng); } @@ -107,59 +109,21 @@ template <unsigned N> struct PYPLM { } double log_likelihood() const { - return log_likelihood(d, strength) + backoff.log_likelihood(); - } - - double log_likelihood(const double& dd, const double& aa) const { - if (aa <= -dd) return -std::numeric_limits<double>::infinity(); - //double llh = Md::log_beta_density(dd, 10, 3) + Md::log_gamma_density(aa, 1, 1); - double llh = Md::log_beta_density(dd, discount_a, discount_b) + - Md::log_gamma_density(aa + dd, strength_s, strength_r); + double llh = backoff.log_likelihood(); typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::const_iterator it; for (it = p.begin(); it != p.end(); ++it) - llh += it->second.log_crp_prob(dd, aa); + llh += it->second.log_crp_prob(); + // TODO parametric likelihood from TiedResampler return llh; } - struct DiscountResampler { - DiscountResampler(const PYPLM& m) : m_(m) {} - const PYPLM& m_; - double operator()(const double& proposed_discount) const { - return m_.log_likelihood(proposed_discount, m_.strength); - } - }; - - struct AlphaResampler { - AlphaResampler(const PYPLM& m) : m_(m) {} - const PYPLM& m_; - double operator()(const double& proposed_strength) const { - return m_.log_likelihood(m_.d, proposed_strength); - } - }; - - void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - DiscountResampler dr(*this); - AlphaResampler ar(*this); - for (int iter = 0; iter < nloop; ++iter) { - strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits<double>::min(), - std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); - double min_discount = std::numeric_limits<double>::min(); - if (strength < 0.0) min_discount -= strength; - d = slice_sampler1d(dr, d, *rng, min_discount, - 1.0, 0.0, niterations, 100*niterations); - } - strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits<double>::min(), - std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); - typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::iterator it; - cerr << "PYPLM<" << N << ">(d=" << d << ",a=" << strength << ") = " << log_likelihood(d, strength) << endl; - for (it = p.begin(); it != p.end(); ++it) { - it->second.set_discount(d); - it->second.set_strength(strength); - } - backoff.resample_hyperparameters(rng, nloop, niterations); + void resample_hyperparameters(MT19937* rng) { + tr.ResampleHyperparameters(rng); + backoff.resample_hyperparameters(rng); } PYPLM<N-1> backoff; + TiedResampler<CCRP<WordID> > tr; double discount_a, discount_b, strength_s, strength_r; double d, strength; mutable vector<WordID> lookup; // thread-local diff --git a/gi/pf/pyp_tm.cc b/gi/pf/pyp_tm.cc index b5262f47..73104fe9 100644 --- a/gi/pf/pyp_tm.cc +++ b/gi/pf/pyp_tm.cc @@ -11,6 +11,8 @@ #include "ccrp.h" #include "pyp_word_model.h" +#include "tied_resampler.h" + using namespace std; using namespace std::tr1; diff --git a/gi/pf/tied_resampler.h b/gi/pf/tied_resampler.h new file mode 100644 index 00000000..208fb9c7 --- /dev/null +++ b/gi/pf/tied_resampler.h @@ -0,0 +1,82 @@ +#ifndef _TIED_RESAMPLER_H_ +#define _TIED_RESAMPLER_H_ + +#include <set> +#include "sampler.h" +#include "slice_sampler.h" +#include "m.h" + +template <class CRP> +struct TiedResampler { + explicit TiedResampler(double da, double db, double ss, double sr, double d=0.5, double s=1.0) : + d_alpha(da), + d_beta(db), + s_shape(ss), + s_rate(sr), + discount(d), + strength(s) {} + + void Add(CRP* crp) { + crps.insert(crp); + crp->set_discount(discount); + crp->set_strength(strength); + assert(!crp->has_discount_prior()); + assert(!crp->has_strength_prior()); + } + + void Remove(CRP* crp) { + crps.erase(crp); + } + + double LogLikelihood(double d, double s) const { + if (s <= -d) return -std::numeric_limits<double>::infinity(); + double llh = Md::log_beta_density(d, d_alpha, d_beta) + + Md::log_gamma_density(d + s, s_shape, s_rate); + for (typename std::set<CRP*>::iterator it = crps.begin(); it != crps.end(); ++it) + llh += (*it)->log_crp_prob(d, s); + return llh; + } + + struct DiscountResampler { + DiscountResampler(const TiedResampler& m) : m_(m) {} + const TiedResampler& m_; + double operator()(const double& proposed_discount) const { + return m_.LogLikelihood(proposed_discount, m_.strength); + } + }; + + struct AlphaResampler { + AlphaResampler(const TiedResampler& m) : m_(m) {} + const TiedResampler& m_; + double operator()(const double& proposed_strength) const { + return m_.LogLikelihood(m_.discount, proposed_strength); + } + }; + + void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { + const DiscountResampler dr(*this); + const AlphaResampler ar(*this); + for (int iter = 0; iter < nloop; ++iter) { + strength = slice_sampler1d(ar, strength, *rng, -discount + std::numeric_limits<double>::min(), + std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); + double min_discount = std::numeric_limits<double>::min(); + if (strength < 0.0) min_discount -= strength; + discount = slice_sampler1d(dr, discount, *rng, min_discount, + 1.0, 0.0, niterations, 100*niterations); + } + strength = slice_sampler1d(ar, strength, *rng, -discount + std::numeric_limits<double>::min(), + std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); + std::cerr << "TiedCRPs(d=" << discount << ",s=" + << strength << ") = " << LogLikelihood(discount, strength) << std::endl; + for (typename std::set<CRP*>::iterator it = crps.begin(); it != crps.end(); ++it) { + (*it)->set_discount(discount); + (*it)->set_strength(strength); + } + } + private: + std::set<CRP*> crps; + const double d_alpha, d_beta, s_shape, s_rate; + double discount, strength; +}; + +#endif |