summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-03-10 14:10:04 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2012-03-10 14:10:04 -0500
commit280d5aa74b6a41f8f6deb5dd374140b7e3ab2703 (patch)
treea6bd63927128a48006a0634fbd4832b0f06db2a7
parentf06c3f8d9dc2ce66153890809a7fc9b296ee625e (diff)
do Bayesian inference on quasimodel2 hyperparameters
-rw-r--r--gi/pf/align-lexonly-pyp.cc5
-rw-r--r--gi/pf/pyp_lm.cc2
-rw-r--r--gi/pf/pyp_tm.cc11
-rw-r--r--gi/pf/quasi_model2.h57
-rw-r--r--gi/pf/tied_resampler.h11
5 files changed, 75 insertions, 11 deletions
diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc
index 68cb9192..6c054753 100644
--- a/gi/pf/align-lexonly-pyp.cc
+++ b/gi/pf/align-lexonly-pyp.cc
@@ -74,6 +74,7 @@ struct Aligner {
void ResampleHyperparameters() {
model.ResampleHyperparameters(prng);
+ paj_model.ResampleHyperparameters(prng);
}
void InitializeRandom() {
@@ -216,9 +217,9 @@ int main(int argc, char** argv) {
const unsigned samples = conf["samples"].as<unsigned>();
for (int i = 0; i < samples; ++i) {
for (int j = 65; j < 67; ++j) Debug(corpus[j]);
- if (i % 7 == 6) aligner.ResampleHyperparameters();
+ if (i % 10 == 9) aligner.ResampleHyperparameters();
aligner.ResampleCorpus();
- if (i > (samples / 5) && (i % 10 == 9)) for (int j = 0; j < corpus.size(); ++j) AddSample(&corpus[j]);
+ if (i > (samples / 5) && (i % 6 == 5)) for (int j = 0; j < corpus.size(); ++j) AddSample(&corpus[j]);
}
for (unsigned i = 0; i < corpus.size(); ++i)
WriteAlignments(corpus[i]);
diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc
index 85635b8f..91029688 100644
--- a/gi/pf/pyp_lm.cc
+++ b/gi/pf/pyp_lm.cc
@@ -113,7 +113,7 @@ template <unsigned N> struct PYPLM {
typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::const_iterator it;
for (it = p.begin(); it != p.end(); ++it)
llh += it->second.log_crp_prob();
- // TODO parametric likelihood from TiedResampler
+ llh += tr.LogLikelihood();
return llh;
}
diff --git a/gi/pf/pyp_tm.cc b/gi/pf/pyp_tm.cc
index bf5a6497..34ef0ba2 100644
--- a/gi/pf/pyp_tm.cc
+++ b/gi/pf/pyp_tm.cc
@@ -17,7 +17,7 @@ using namespace std::tr1;
template <typename Base>
struct ConditionalPYPWordModel {
- ConditionalPYPWordModel(Base* b) : base(*b), btr(3) {}
+ ConditionalPYPWordModel(Base* b) : base(*b), btr(2) {}
void Summary() const {
cerr << "Number of conditioning contexts: " << r.size() << endl;
@@ -29,8 +29,6 @@ struct ConditionalPYPWordModel {
}
void ResampleHyperparameters(MT19937* rng) {
- for (RuleModelHash::iterator it = r.begin(); it != r.end(); ++it)
- it->second.resample_hyperparameters(rng);
btr.ResampleHyperparameters(rng);
}
@@ -45,8 +43,11 @@ struct ConditionalPYPWordModel {
void Increment(const WordID src, const vector<WordID>& trglets, MT19937* rng) {
RuleModelHash::iterator it = r.find(src);
- if (it == r.end())
- it = r.insert(make_pair(src, CCRP<vector<WordID> >(1,1,1,1,0.5,1.0))).first;
+ if (it == r.end()) {
+ it = r.insert(make_pair(src, CCRP<vector<WordID> >(0.5,1.0))).first;
+ static const WordID kNULL = TD::Convert("NULL");
+ btr.Add(src == kNULL ? 0 : 1, &it->second);
+ }
if (it->second.increment(trglets, base(trglets), rng))
base.Increment(trglets, rng);
}
diff --git a/gi/pf/quasi_model2.h b/gi/pf/quasi_model2.h
index 8ec0a400..588c8f84 100644
--- a/gi/pf/quasi_model2.h
+++ b/gi/pf/quasi_model2.h
@@ -7,6 +7,8 @@
#include "boost/functional.hpp"
#include "prob.h"
#include "array2d.h"
+#include "slice_sampler.h"
+#include "m.h"
struct AlignmentObservation {
AlignmentObservation() : src_len(), trg_len(), j(), a_j() {}
@@ -53,6 +55,37 @@ struct QuasiModel2 {
if (!cc) obs_.erase(ao);
}
+ struct PNullResampler {
+ PNullResampler(const QuasiModel2& m) : m_(m) {}
+ const QuasiModel2& m_;
+ double operator()(const double& proposed_pnull) const {
+ return log(m_.Likelihood(m_.alpha_, proposed_pnull));
+ }
+ };
+
+ struct AlphaResampler {
+ AlphaResampler(const QuasiModel2& m) : m_(m) {}
+ const QuasiModel2& m_;
+ double operator()(const double& proposed_alpha) const {
+ return log(m_.Likelihood(proposed_alpha, m_.pnull_.as_float()));
+ }
+ };
+
+ void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) {
+ const PNullResampler dr(*this);
+ const AlphaResampler ar(*this);
+ for (unsigned i = 0; i < nloop; ++i) {
+ double pnull = slice_sampler1d(dr, pnull_.as_float(), *rng, 0.00000001,
+ 1.0, 0.0, niterations, 100*niterations);
+ pnull_ = prob_t(pnull);
+ alpha_ = slice_sampler1d(ar, alpha_, *rng, 0.00000001,
+ std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
+ }
+ std::cerr << "QuasiModel2(alpha=" << alpha_ << ",p_null="
+ << pnull_.as_float() << ") = " << Likelihood() << std::endl;
+ zcache_.clear();
+ }
+
prob_t Likelihood() const {
return Likelihood(alpha_, pnull_.as_float());
}
@@ -61,12 +94,17 @@ struct QuasiModel2 {
const prob_t pnull(ppnull);
const prob_t pnotnull(1 - ppnull);
- prob_t p = prob_t::One();
+ prob_t p;
+ p.logeq(Md::log_gamma_density(alpha, 0.1, 25)); // TODO configure
+ assert(!p.is_0());
+ prob_t prob_of_ppnull; prob_of_ppnull.logeq(Md::log_beta_density(ppnull, 2, 10));
+ assert(!prob_of_ppnull.is_0());
+ p *= prob_of_ppnull;
for (ObsCount::const_iterator it = obs_.begin(); it != obs_.end(); ++it) {
const AlignmentObservation& ao = it->first;
if (ao.a_j) {
- double u = UnnormalizedProb(ao.a_j, ao.j, ao.src_len, ao.trg_len, alpha);
- double z = ComputeZ(ao.j, ao.src_len, ao.trg_len, alpha);
+ prob_t u = XUnnormalizedProb(ao.a_j, ao.j, ao.src_len, ao.trg_len, alpha);
+ prob_t z = XComputeZ(ao.j, ao.src_len, ao.trg_len, alpha);
prob_t pa(u / z);
pa *= pnotnull;
pa.poweq(it->second);
@@ -79,6 +117,19 @@ struct QuasiModel2 {
}
private:
+ static prob_t XUnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) {
+ prob_t p;
+ p.logeq(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha);
+ return p;
+ }
+
+ static prob_t XComputeZ(unsigned j, unsigned src_len, unsigned trg_len, double alpha) {
+ prob_t z = prob_t::Zero();
+ for (int a_j = 1; a_j <= src_len; ++a_j)
+ z += XUnnormalizedProb(a_j, j, src_len, trg_len, alpha);
+ return z;
+ }
+
static double UnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) {
return exp(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha);
}
diff --git a/gi/pf/tied_resampler.h b/gi/pf/tied_resampler.h
index 5a262f9d..6f45fbce 100644
--- a/gi/pf/tied_resampler.h
+++ b/gi/pf/tied_resampler.h
@@ -42,6 +42,10 @@ struct TiedResampler {
return llh;
}
+ double LogLikelihood() const {
+ return LogLikelihood(discount, strength);
+ }
+
struct DiscountResampler {
DiscountResampler(const TiedResampler& m) : m_(m) {}
const TiedResampler& m_;
@@ -106,6 +110,13 @@ struct BinTiedResampler {
}
}
+ double LogLikelihood() const {
+ double llh = 0;
+ for (unsigned i = 0; i < resamplers.size(); ++i)
+ llh += resamplers[i].LogLikelihood();
+ return llh;
+ }
+
private:
std::vector<TiedResampler<CRP> > resamplers;
};