diff options
Diffstat (limited to 'gi')
| -rw-r--r-- | gi/pf/pyp_lm.cc | 66 | ||||
| -rw-r--r-- | gi/pf/pyp_tm.cc | 2 | ||||
| -rw-r--r-- | gi/pf/tied_resampler.h | 82 | 
3 files changed, 99 insertions, 51 deletions
| diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc index 52e6be2c..85635b8f 100644 --- a/gi/pf/pyp_lm.cc +++ b/gi/pf/pyp_lm.cc @@ -11,6 +11,7 @@  #include "tdict.h"  #include "sampler.h"  #include "ccrp.h" +#include "tied_resampler.h"  // A not very memory-efficient implementation of an N-gram LM based on PYPs  // as described in Y.-W. Teh. (2006) A Hierarchical Bayesian Language Model @@ -66,7 +67,7 @@ template<> struct PYPLM<0> {    void increment(WordID, const vector<WordID>&, MT19937*) { ++draws; }    void decrement(WordID, const vector<WordID>&, MT19937*) { --draws; assert(draws >= 0); }    double prob(WordID, const vector<WordID>&) const { return p0; } -  void resample_hyperparameters(MT19937*, const unsigned, const unsigned) {} +  void resample_hyperparameters(MT19937*) {}    double log_likelihood() const { return draws * log(p0); }    const double p0;    int draws; @@ -76,16 +77,17 @@ template<> struct PYPLM<0> {  template <unsigned N> struct PYPLM {    PYPLM(unsigned vs, double da, double db, double ss, double sr) :        backoff(vs, da, db, ss, sr), -      discount_a(da), discount_b(db), -      strength_s(ss), strength_r(sr), -      d(0.8), strength(1.0), lookup(N-1) {} +      tr(da, db, ss, sr, 0.8, 1.0), +      lookup(N-1) {}    void increment(WordID w, const vector<WordID>& context, MT19937* rng) {      const double bo = backoff.prob(w, context);      for (unsigned i = 0; i < N-1; ++i)        lookup[i] = context[context.size() - 1 - i];      typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::iterator it = p.find(lookup); -    if (it == p.end()) -      it = p.insert(make_pair(lookup, CCRP<WordID>(d,strength))).first; +    if (it == p.end()) { +      it = p.insert(make_pair(lookup, CCRP<WordID>(0.5,1))).first; +      tr.Add(&it->second);  // add to resampler +    }      if (it->second.increment(w, bo, rng))        backoff.increment(w, context, rng);    } @@ -107,59 +109,21 @@ template <unsigned N> struct PYPLM {    }    double log_likelihood() const { -    return log_likelihood(d, strength) + backoff.log_likelihood(); -  } - -  double log_likelihood(const double& dd, const double& aa) const { -    if (aa <= -dd) return -std::numeric_limits<double>::infinity(); -    //double llh = Md::log_beta_density(dd, 10, 3) + Md::log_gamma_density(aa, 1, 1); -    double llh = Md::log_beta_density(dd, discount_a, discount_b) + -                 Md::log_gamma_density(aa + dd, strength_s, strength_r); +    double llh = backoff.log_likelihood();      typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::const_iterator it;      for (it = p.begin(); it != p.end(); ++it) -      llh += it->second.log_crp_prob(dd, aa); +      llh += it->second.log_crp_prob(); +    // TODO parametric likelihood from TiedResampler      return llh;    } -  struct DiscountResampler { -    DiscountResampler(const PYPLM& m) : m_(m) {} -    const PYPLM& m_; -    double operator()(const double& proposed_discount) const { -      return m_.log_likelihood(proposed_discount, m_.strength); -    } -  }; - -  struct AlphaResampler { -    AlphaResampler(const PYPLM& m) : m_(m) {} -    const PYPLM& m_; -    double operator()(const double& proposed_strength) const { -      return m_.log_likelihood(m_.d, proposed_strength); -    } -  }; - -  void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { -    DiscountResampler dr(*this); -    AlphaResampler ar(*this); -    for (int iter = 0; iter < nloop; ++iter) { -      strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits<double>::min(), -                              std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); -      double min_discount = std::numeric_limits<double>::min(); -      if (strength < 0.0) min_discount -= strength; -      d = slice_sampler1d(dr, d, *rng, min_discount, -                          1.0, 0.0, niterations, 100*niterations); -    } -    strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits<double>::min(), -                            std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); -    typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::iterator it; -    cerr << "PYPLM<" << N << ">(d=" << d << ",a=" << strength << ") = " << log_likelihood(d, strength) << endl; -    for (it = p.begin(); it != p.end(); ++it) { -      it->second.set_discount(d); -      it->second.set_strength(strength); -    } -    backoff.resample_hyperparameters(rng, nloop, niterations); +  void resample_hyperparameters(MT19937* rng) { +    tr.ResampleHyperparameters(rng); +    backoff.resample_hyperparameters(rng);    }    PYPLM<N-1> backoff; +  TiedResampler<CCRP<WordID> > tr;    double discount_a, discount_b, strength_s, strength_r;    double d, strength;    mutable vector<WordID> lookup;  // thread-local diff --git a/gi/pf/pyp_tm.cc b/gi/pf/pyp_tm.cc index b5262f47..73104fe9 100644 --- a/gi/pf/pyp_tm.cc +++ b/gi/pf/pyp_tm.cc @@ -11,6 +11,8 @@  #include "ccrp.h"  #include "pyp_word_model.h" +#include "tied_resampler.h" +  using namespace std;  using namespace std::tr1; diff --git a/gi/pf/tied_resampler.h b/gi/pf/tied_resampler.h new file mode 100644 index 00000000..208fb9c7 --- /dev/null +++ b/gi/pf/tied_resampler.h @@ -0,0 +1,82 @@ +#ifndef _TIED_RESAMPLER_H_ +#define _TIED_RESAMPLER_H_ + +#include <set> +#include "sampler.h" +#include "slice_sampler.h" +#include "m.h" + +template <class CRP> +struct TiedResampler { +  explicit TiedResampler(double da, double db, double ss, double sr, double d=0.5, double s=1.0) : +      d_alpha(da), +      d_beta(db), +      s_shape(ss), +      s_rate(sr), +      discount(d), +      strength(s) {} + +  void Add(CRP* crp) { +    crps.insert(crp); +    crp->set_discount(discount); +    crp->set_strength(strength); +    assert(!crp->has_discount_prior()); +    assert(!crp->has_strength_prior()); +  } + +  void Remove(CRP* crp) { +    crps.erase(crp); +  } + +  double LogLikelihood(double d, double s) const { +    if (s <= -d) return -std::numeric_limits<double>::infinity(); +    double llh = Md::log_beta_density(d, d_alpha, d_beta) + +                 Md::log_gamma_density(d + s, s_shape, s_rate); +    for (typename std::set<CRP*>::iterator it = crps.begin(); it != crps.end(); ++it) +      llh += (*it)->log_crp_prob(d, s); +    return llh; +  } + +  struct DiscountResampler { +    DiscountResampler(const TiedResampler& m) : m_(m) {} +    const TiedResampler& m_; +    double operator()(const double& proposed_discount) const { +      return m_.LogLikelihood(proposed_discount, m_.strength); +    } +  }; + +  struct AlphaResampler { +    AlphaResampler(const TiedResampler& m) : m_(m) {} +    const TiedResampler& m_; +    double operator()(const double& proposed_strength) const { +      return m_.LogLikelihood(m_.discount, proposed_strength); +    } +  }; + +  void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { +    const DiscountResampler dr(*this); +    const AlphaResampler ar(*this); +    for (int iter = 0; iter < nloop; ++iter) { +      strength = slice_sampler1d(ar, strength, *rng, -discount + std::numeric_limits<double>::min(), +                              std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); +      double min_discount = std::numeric_limits<double>::min(); +      if (strength < 0.0) min_discount -= strength; +      discount = slice_sampler1d(dr, discount, *rng, min_discount, +                          1.0, 0.0, niterations, 100*niterations); +    } +    strength = slice_sampler1d(ar, strength, *rng, -discount + std::numeric_limits<double>::min(), +                            std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); +    std::cerr << "TiedCRPs(d=" << discount << ",s=" +              << strength << ") = " << LogLikelihood(discount, strength) << std::endl; +    for (typename std::set<CRP*>::iterator it = crps.begin(); it != crps.end(); ++it) { +      (*it)->set_discount(discount); +      (*it)->set_strength(strength); +    } +  } + private: +  std::set<CRP*> crps; +  const double d_alpha, d_beta, s_shape, s_rate; +  double discount, strength; +}; + +#endif | 
