diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-03-10 14:10:04 -0500 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-03-10 14:10:04 -0500 |
commit | 289f96779e665ba24adca3461a624c68aa37bd99 (patch) | |
tree | 8de8847d19c897351324461aef66dc318eaa2772 /gi/pf | |
parent | 38f28be7cd2bada87ebad78994e3c938e10c2cce (diff) |
do Bayesian inference on quasimodel2 hyperparameters
Diffstat (limited to 'gi/pf')
-rw-r--r-- | gi/pf/align-lexonly-pyp.cc | 5 | ||||
-rw-r--r-- | gi/pf/pyp_lm.cc | 2 | ||||
-rw-r--r-- | gi/pf/pyp_tm.cc | 11 | ||||
-rw-r--r-- | gi/pf/quasi_model2.h | 57 | ||||
-rw-r--r-- | gi/pf/tied_resampler.h | 11 |
5 files changed, 75 insertions, 11 deletions
diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc index 68cb9192..6c054753 100644 --- a/gi/pf/align-lexonly-pyp.cc +++ b/gi/pf/align-lexonly-pyp.cc @@ -74,6 +74,7 @@ struct Aligner { void ResampleHyperparameters() { model.ResampleHyperparameters(prng); + paj_model.ResampleHyperparameters(prng); } void InitializeRandom() { @@ -216,9 +217,9 @@ int main(int argc, char** argv) { const unsigned samples = conf["samples"].as<unsigned>(); for (int i = 0; i < samples; ++i) { for (int j = 65; j < 67; ++j) Debug(corpus[j]); - if (i % 7 == 6) aligner.ResampleHyperparameters(); + if (i % 10 == 9) aligner.ResampleHyperparameters(); aligner.ResampleCorpus(); - if (i > (samples / 5) && (i % 10 == 9)) for (int j = 0; j < corpus.size(); ++j) AddSample(&corpus[j]); + if (i > (samples / 5) && (i % 6 == 5)) for (int j = 0; j < corpus.size(); ++j) AddSample(&corpus[j]); } for (unsigned i = 0; i < corpus.size(); ++i) WriteAlignments(corpus[i]); diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc index 85635b8f..91029688 100644 --- a/gi/pf/pyp_lm.cc +++ b/gi/pf/pyp_lm.cc @@ -113,7 +113,7 @@ template <unsigned N> struct PYPLM { typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::const_iterator it; for (it = p.begin(); it != p.end(); ++it) llh += it->second.log_crp_prob(); - // TODO parametric likelihood from TiedResampler + llh += tr.LogLikelihood(); return llh; } diff --git a/gi/pf/pyp_tm.cc b/gi/pf/pyp_tm.cc index bf5a6497..34ef0ba2 100644 --- a/gi/pf/pyp_tm.cc +++ b/gi/pf/pyp_tm.cc @@ -17,7 +17,7 @@ using namespace std::tr1; template <typename Base> struct ConditionalPYPWordModel { - ConditionalPYPWordModel(Base* b) : base(*b), btr(3) {} + ConditionalPYPWordModel(Base* b) : base(*b), btr(2) {} void Summary() const { cerr << "Number of conditioning contexts: " << r.size() << endl; @@ -29,8 +29,6 @@ struct ConditionalPYPWordModel { } void ResampleHyperparameters(MT19937* rng) { - for (RuleModelHash::iterator it = r.begin(); it != r.end(); ++it) - it->second.resample_hyperparameters(rng); btr.ResampleHyperparameters(rng); } @@ -45,8 +43,11 @@ struct ConditionalPYPWordModel { void Increment(const WordID src, const vector<WordID>& trglets, MT19937* rng) { RuleModelHash::iterator it = r.find(src); - if (it == r.end()) - it = r.insert(make_pair(src, CCRP<vector<WordID> >(1,1,1,1,0.5,1.0))).first; + if (it == r.end()) { + it = r.insert(make_pair(src, CCRP<vector<WordID> >(0.5,1.0))).first; + static const WordID kNULL = TD::Convert("NULL"); + btr.Add(src == kNULL ? 0 : 1, &it->second); + } if (it->second.increment(trglets, base(trglets), rng)) base.Increment(trglets, rng); } diff --git a/gi/pf/quasi_model2.h b/gi/pf/quasi_model2.h index 8ec0a400..588c8f84 100644 --- a/gi/pf/quasi_model2.h +++ b/gi/pf/quasi_model2.h @@ -7,6 +7,8 @@ #include "boost/functional.hpp" #include "prob.h" #include "array2d.h" +#include "slice_sampler.h" +#include "m.h" struct AlignmentObservation { AlignmentObservation() : src_len(), trg_len(), j(), a_j() {} @@ -53,6 +55,37 @@ struct QuasiModel2 { if (!cc) obs_.erase(ao); } + struct PNullResampler { + PNullResampler(const QuasiModel2& m) : m_(m) {} + const QuasiModel2& m_; + double operator()(const double& proposed_pnull) const { + return log(m_.Likelihood(m_.alpha_, proposed_pnull)); + } + }; + + struct AlphaResampler { + AlphaResampler(const QuasiModel2& m) : m_(m) {} + const QuasiModel2& m_; + double operator()(const double& proposed_alpha) const { + return log(m_.Likelihood(proposed_alpha, m_.pnull_.as_float())); + } + }; + + void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { + const PNullResampler dr(*this); + const AlphaResampler ar(*this); + for (unsigned i = 0; i < nloop; ++i) { + double pnull = slice_sampler1d(dr, pnull_.as_float(), *rng, 0.00000001, + 1.0, 0.0, niterations, 100*niterations); + pnull_ = prob_t(pnull); + alpha_ = slice_sampler1d(ar, alpha_, *rng, 0.00000001, + std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); + } + std::cerr << "QuasiModel2(alpha=" << alpha_ << ",p_null=" + << pnull_.as_float() << ") = " << Likelihood() << std::endl; + zcache_.clear(); + } + prob_t Likelihood() const { return Likelihood(alpha_, pnull_.as_float()); } @@ -61,12 +94,17 @@ struct QuasiModel2 { const prob_t pnull(ppnull); const prob_t pnotnull(1 - ppnull); - prob_t p = prob_t::One(); + prob_t p; + p.logeq(Md::log_gamma_density(alpha, 0.1, 25)); // TODO configure + assert(!p.is_0()); + prob_t prob_of_ppnull; prob_of_ppnull.logeq(Md::log_beta_density(ppnull, 2, 10)); + assert(!prob_of_ppnull.is_0()); + p *= prob_of_ppnull; for (ObsCount::const_iterator it = obs_.begin(); it != obs_.end(); ++it) { const AlignmentObservation& ao = it->first; if (ao.a_j) { - double u = UnnormalizedProb(ao.a_j, ao.j, ao.src_len, ao.trg_len, alpha); - double z = ComputeZ(ao.j, ao.src_len, ao.trg_len, alpha); + prob_t u = XUnnormalizedProb(ao.a_j, ao.j, ao.src_len, ao.trg_len, alpha); + prob_t z = XComputeZ(ao.j, ao.src_len, ao.trg_len, alpha); prob_t pa(u / z); pa *= pnotnull; pa.poweq(it->second); @@ -79,6 +117,19 @@ struct QuasiModel2 { } private: + static prob_t XUnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) { + prob_t p; + p.logeq(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha); + return p; + } + + static prob_t XComputeZ(unsigned j, unsigned src_len, unsigned trg_len, double alpha) { + prob_t z = prob_t::Zero(); + for (int a_j = 1; a_j <= src_len; ++a_j) + z += XUnnormalizedProb(a_j, j, src_len, trg_len, alpha); + return z; + } + static double UnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) { return exp(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha); } diff --git a/gi/pf/tied_resampler.h b/gi/pf/tied_resampler.h index 5a262f9d..6f45fbce 100644 --- a/gi/pf/tied_resampler.h +++ b/gi/pf/tied_resampler.h @@ -42,6 +42,10 @@ struct TiedResampler { return llh; } + double LogLikelihood() const { + return LogLikelihood(discount, strength); + } + struct DiscountResampler { DiscountResampler(const TiedResampler& m) : m_(m) {} const TiedResampler& m_; @@ -106,6 +110,13 @@ struct BinTiedResampler { } } + double LogLikelihood() const { + double llh = 0; + for (unsigned i = 0; i < resamplers.size(); ++i) + llh += resamplers[i].LogLikelihood(); + return llh; + } + private: std::vector<TiedResampler<CRP> > resamplers; }; |