From 289f96779e665ba24adca3461a624c68aa37bd99 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 10 Mar 2012 14:10:04 -0500 Subject: do Bayesian inference on quasimodel2 hyperparameters --- gi/pf/align-lexonly-pyp.cc | 5 ++-- gi/pf/pyp_lm.cc | 2 +- gi/pf/pyp_tm.cc | 11 +++++---- gi/pf/quasi_model2.h | 57 +++++++++++++++++++++++++++++++++++++++++++--- gi/pf/tied_resampler.h | 11 +++++++++ 5 files changed, 75 insertions(+), 11 deletions(-) (limited to 'gi') diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc index 68cb9192..6c054753 100644 --- a/gi/pf/align-lexonly-pyp.cc +++ b/gi/pf/align-lexonly-pyp.cc @@ -74,6 +74,7 @@ struct Aligner { void ResampleHyperparameters() { model.ResampleHyperparameters(prng); + paj_model.ResampleHyperparameters(prng); } void InitializeRandom() { @@ -216,9 +217,9 @@ int main(int argc, char** argv) { const unsigned samples = conf["samples"].as(); for (int i = 0; i < samples; ++i) { for (int j = 65; j < 67; ++j) Debug(corpus[j]); - if (i % 7 == 6) aligner.ResampleHyperparameters(); + if (i % 10 == 9) aligner.ResampleHyperparameters(); aligner.ResampleCorpus(); - if (i > (samples / 5) && (i % 10 == 9)) for (int j = 0; j < corpus.size(); ++j) AddSample(&corpus[j]); + if (i > (samples / 5) && (i % 6 == 5)) for (int j = 0; j < corpus.size(); ++j) AddSample(&corpus[j]); } for (unsigned i = 0; i < corpus.size(); ++i) WriteAlignments(corpus[i]); diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc index 85635b8f..91029688 100644 --- a/gi/pf/pyp_lm.cc +++ b/gi/pf/pyp_lm.cc @@ -113,7 +113,7 @@ template struct PYPLM { typename unordered_map, CCRP, boost::hash > >::const_iterator it; for (it = p.begin(); it != p.end(); ++it) llh += it->second.log_crp_prob(); - // TODO parametric likelihood from TiedResampler + llh += tr.LogLikelihood(); return llh; } diff --git a/gi/pf/pyp_tm.cc b/gi/pf/pyp_tm.cc index bf5a6497..34ef0ba2 100644 --- a/gi/pf/pyp_tm.cc +++ b/gi/pf/pyp_tm.cc @@ -17,7 +17,7 @@ using namespace std::tr1; template struct ConditionalPYPWordModel { - ConditionalPYPWordModel(Base* b) : base(*b), btr(3) {} + ConditionalPYPWordModel(Base* b) : base(*b), btr(2) {} void Summary() const { cerr << "Number of conditioning contexts: " << r.size() << endl; @@ -29,8 +29,6 @@ struct ConditionalPYPWordModel { } void ResampleHyperparameters(MT19937* rng) { - for (RuleModelHash::iterator it = r.begin(); it != r.end(); ++it) - it->second.resample_hyperparameters(rng); btr.ResampleHyperparameters(rng); } @@ -45,8 +43,11 @@ struct ConditionalPYPWordModel { void Increment(const WordID src, const vector& trglets, MT19937* rng) { RuleModelHash::iterator it = r.find(src); - if (it == r.end()) - it = r.insert(make_pair(src, CCRP >(1,1,1,1,0.5,1.0))).first; + if (it == r.end()) { + it = r.insert(make_pair(src, CCRP >(0.5,1.0))).first; + static const WordID kNULL = TD::Convert("NULL"); + btr.Add(src == kNULL ? 0 : 1, &it->second); + } if (it->second.increment(trglets, base(trglets), rng)) base.Increment(trglets, rng); } diff --git a/gi/pf/quasi_model2.h b/gi/pf/quasi_model2.h index 8ec0a400..588c8f84 100644 --- a/gi/pf/quasi_model2.h +++ b/gi/pf/quasi_model2.h @@ -7,6 +7,8 @@ #include "boost/functional.hpp" #include "prob.h" #include "array2d.h" +#include "slice_sampler.h" +#include "m.h" struct AlignmentObservation { AlignmentObservation() : src_len(), trg_len(), j(), a_j() {} @@ -53,6 +55,37 @@ struct QuasiModel2 { if (!cc) obs_.erase(ao); } + struct PNullResampler { + PNullResampler(const QuasiModel2& m) : m_(m) {} + const QuasiModel2& m_; + double operator()(const double& proposed_pnull) const { + return log(m_.Likelihood(m_.alpha_, proposed_pnull)); + } + }; + + struct AlphaResampler { + AlphaResampler(const QuasiModel2& m) : m_(m) {} + const QuasiModel2& m_; + double operator()(const double& proposed_alpha) const { + return log(m_.Likelihood(proposed_alpha, m_.pnull_.as_float())); + } + }; + + void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { + const PNullResampler dr(*this); + const AlphaResampler ar(*this); + for (unsigned i = 0; i < nloop; ++i) { + double pnull = slice_sampler1d(dr, pnull_.as_float(), *rng, 0.00000001, + 1.0, 0.0, niterations, 100*niterations); + pnull_ = prob_t(pnull); + alpha_ = slice_sampler1d(ar, alpha_, *rng, 0.00000001, + std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); + } + std::cerr << "QuasiModel2(alpha=" << alpha_ << ",p_null=" + << pnull_.as_float() << ") = " << Likelihood() << std::endl; + zcache_.clear(); + } + prob_t Likelihood() const { return Likelihood(alpha_, pnull_.as_float()); } @@ -61,12 +94,17 @@ struct QuasiModel2 { const prob_t pnull(ppnull); const prob_t pnotnull(1 - ppnull); - prob_t p = prob_t::One(); + prob_t p; + p.logeq(Md::log_gamma_density(alpha, 0.1, 25)); // TODO configure + assert(!p.is_0()); + prob_t prob_of_ppnull; prob_of_ppnull.logeq(Md::log_beta_density(ppnull, 2, 10)); + assert(!prob_of_ppnull.is_0()); + p *= prob_of_ppnull; for (ObsCount::const_iterator it = obs_.begin(); it != obs_.end(); ++it) { const AlignmentObservation& ao = it->first; if (ao.a_j) { - double u = UnnormalizedProb(ao.a_j, ao.j, ao.src_len, ao.trg_len, alpha); - double z = ComputeZ(ao.j, ao.src_len, ao.trg_len, alpha); + prob_t u = XUnnormalizedProb(ao.a_j, ao.j, ao.src_len, ao.trg_len, alpha); + prob_t z = XComputeZ(ao.j, ao.src_len, ao.trg_len, alpha); prob_t pa(u / z); pa *= pnotnull; pa.poweq(it->second); @@ -79,6 +117,19 @@ struct QuasiModel2 { } private: + static prob_t XUnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) { + prob_t p; + p.logeq(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha); + return p; + } + + static prob_t XComputeZ(unsigned j, unsigned src_len, unsigned trg_len, double alpha) { + prob_t z = prob_t::Zero(); + for (int a_j = 1; a_j <= src_len; ++a_j) + z += XUnnormalizedProb(a_j, j, src_len, trg_len, alpha); + return z; + } + static double UnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) { return exp(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha); } diff --git a/gi/pf/tied_resampler.h b/gi/pf/tied_resampler.h index 5a262f9d..6f45fbce 100644 --- a/gi/pf/tied_resampler.h +++ b/gi/pf/tied_resampler.h @@ -42,6 +42,10 @@ struct TiedResampler { return llh; } + double LogLikelihood() const { + return LogLikelihood(discount, strength); + } + struct DiscountResampler { DiscountResampler(const TiedResampler& m) : m_(m) {} const TiedResampler& m_; @@ -106,6 +110,13 @@ struct BinTiedResampler { } } + double LogLikelihood() const { + double llh = 0; + for (unsigned i = 0; i < resamplers.size(); ++i) + llh += resamplers[i].LogLikelihood(); + return llh; + } + private: std::vector > resamplers; }; -- cgit v1.2.3