do Bayesian inference on quasimodel2 hyperparameters

author: Chris Dyer <cdyer@cs.cmu.edu> 2012-03-10 14:10:04 -0500
committer: Chris Dyer <cdyer@cs.cmu.edu> 2012-03-10 14:10:04 -0500
commit: 280d5aa74b6a41f8f6deb5dd374140b7e3ab2703 (patch)
tree: a6bd63927128a48006a0634fbd4832b0f06db2a7
parent: f06c3f8d9dc2ce66153890809a7fc9b296ee625e (diff)
5 files changed, 75 insertions, 11 deletions
diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc
index 68cb9192..6c054753 100644
--- a/gi/pf/align-lexonly-pyp.cc
+++ b/gi/pf/align-lexonly-pyp.cc
@@ -74,6 +74,7 @@ struct Aligner {
 
   void ResampleHyperparameters() {
     model.ResampleHyperparameters(prng);
+    paj_model.ResampleHyperparameters(prng);
   }
 
   void InitializeRandom() {
@@ -216,9 +217,9 @@ int main(int argc, char** argv) {
   const unsigned samples = conf["samples"].as<unsigned>();
   for (int i = 0; i < samples; ++i) {
     for (int j = 65; j < 67; ++j) Debug(corpus[j]);
-    if (i % 7 == 6) aligner.ResampleHyperparameters();
+    if (i % 10 == 9) aligner.ResampleHyperparameters();
     aligner.ResampleCorpus();
-    if (i > (samples / 5) && (i % 10 == 9)) for (int j = 0; j < corpus.size(); ++j) AddSample(&corpus[j]);
+    if (i > (samples / 5) && (i % 6 == 5)) for (int j = 0; j < corpus.size(); ++j) AddSample(&corpus[j]);
   }
   for (unsigned i = 0; i < corpus.size(); ++i)
     WriteAlignments(corpus[i]);
diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc
index 85635b8f..91029688 100644
--- a/gi/pf/pyp_lm.cc
+++ b/gi/pf/pyp_lm.cc
@@ -113,7 +113,7 @@ template <unsigned N> struct PYPLM {
     typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::const_iterator it;
     for (it = p.begin(); it != p.end(); ++it)
       llh += it->second.log_crp_prob();
-    // TODO parametric likelihood from TiedResampler
+    llh += tr.LogLikelihood();
     return llh;
   }
 
diff --git a/gi/pf/pyp_tm.cc b/gi/pf/pyp_tm.cc
index bf5a6497..34ef0ba2 100644
--- a/gi/pf/pyp_tm.cc
+++ b/gi/pf/pyp_tm.cc
@@ -17,7 +17,7 @@ using namespace std::tr1;
 
 template <typename Base>
 struct ConditionalPYPWordModel {
-  ConditionalPYPWordModel(Base* b) : base(*b), btr(3) {}
+  ConditionalPYPWordModel(Base* b) : base(*b), btr(2) {}
 
   void Summary() const {
     cerr << "Number of conditioning contexts: " << r.size() << endl;
@@ -29,8 +29,6 @@ struct ConditionalPYPWordModel {
   }
 
   void ResampleHyperparameters(MT19937* rng) {
-    for (RuleModelHash::iterator it = r.begin(); it != r.end(); ++it)
-      it->second.resample_hyperparameters(rng);
     btr.ResampleHyperparameters(rng);
   } 
 
@@ -45,8 +43,11 @@ struct ConditionalPYPWordModel {
 
   void Increment(const WordID src, const vector<WordID>& trglets, MT19937* rng) {
     RuleModelHash::iterator it = r.find(src);
-    if (it == r.end())
-      it = r.insert(make_pair(src, CCRP<vector<WordID> >(1,1,1,1,0.5,1.0))).first;
+    if (it == r.end()) {
+      it = r.insert(make_pair(src, CCRP<vector<WordID> >(0.5,1.0))).first;
+      static const WordID kNULL = TD::Convert("NULL");
+      btr.Add(src == kNULL ? 0 : 1, &it->second);
+    }
     if (it->second.increment(trglets, base(trglets), rng))
       base.Increment(trglets, rng);
   }
diff --git a/gi/pf/quasi_model2.h b/gi/pf/quasi_model2.h
index 8ec0a400..588c8f84 100644
--- a/gi/pf/quasi_model2.h
+++ b/gi/pf/quasi_model2.h
@@ -7,6 +7,8 @@
 #include "boost/functional.hpp"
 #include "prob.h"
 #include "array2d.h"
+#include "slice_sampler.h"
+#include "m.h"
 
 struct AlignmentObservation {
   AlignmentObservation() : src_len(), trg_len(), j(), a_j() {}
@@ -53,6 +55,37 @@ struct QuasiModel2 {
     if (!cc) obs_.erase(ao);
   }
 
+  struct PNullResampler {
+    PNullResampler(const QuasiModel2& m) : m_(m) {}
+    const QuasiModel2& m_;
+    double operator()(const double& proposed_pnull) const {
+      return log(m_.Likelihood(m_.alpha_, proposed_pnull));
+    }
+  };
+
+  struct AlphaResampler {
+    AlphaResampler(const QuasiModel2& m) : m_(m) {}
+    const QuasiModel2& m_;
+    double operator()(const double& proposed_alpha) const {
+      return log(m_.Likelihood(proposed_alpha, m_.pnull_.as_float()));
+    }
+  };
+
+  void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) {
+    const PNullResampler dr(*this);
+    const AlphaResampler ar(*this);
+    for (unsigned i = 0; i < nloop; ++i) {
+      double pnull = slice_sampler1d(dr, pnull_.as_float(), *rng, 0.00000001,
+                            1.0, 0.0, niterations, 100*niterations);
+      pnull_ = prob_t(pnull);
+      alpha_ = slice_sampler1d(ar, alpha_, *rng, 0.00000001,
+                              std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
+    }
+    std::cerr << "QuasiModel2(alpha=" << alpha_ << ",p_null="
+              << pnull_.as_float() << ") = " << Likelihood() << std::endl;
+    zcache_.clear();
+  }
+
   prob_t Likelihood() const {
     return Likelihood(alpha_, pnull_.as_float());
   }
@@ -61,12 +94,17 @@ struct QuasiModel2 {
     const prob_t pnull(ppnull);
     const prob_t pnotnull(1 - ppnull);
 
-    prob_t p = prob_t::One();
+    prob_t p;
+    p.logeq(Md::log_gamma_density(alpha, 0.1, 25));  // TODO configure
+    assert(!p.is_0());
+    prob_t prob_of_ppnull; prob_of_ppnull.logeq(Md::log_beta_density(ppnull, 2, 10));
+    assert(!prob_of_ppnull.is_0());
+    p *= prob_of_ppnull;
     for (ObsCount::const_iterator it = obs_.begin(); it != obs_.end(); ++it) {
       const AlignmentObservation& ao = it->first;
       if (ao.a_j) {
-        double u = UnnormalizedProb(ao.a_j, ao.j, ao.src_len, ao.trg_len, alpha);
-        double z = ComputeZ(ao.j, ao.src_len, ao.trg_len, alpha);
+        prob_t u = XUnnormalizedProb(ao.a_j, ao.j, ao.src_len, ao.trg_len, alpha);
+        prob_t z = XComputeZ(ao.j, ao.src_len, ao.trg_len, alpha);
         prob_t pa(u / z);
         pa *= pnotnull;
         pa.poweq(it->second);
@@ -79,6 +117,19 @@ struct QuasiModel2 {
   }
 
  private:
+  static prob_t XUnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) {
+    prob_t p;
+    p.logeq(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha);
+    return p;
+  }
+
+  static prob_t XComputeZ(unsigned j, unsigned src_len, unsigned trg_len, double alpha) {
+    prob_t z = prob_t::Zero();
+    for (int a_j = 1; a_j <= src_len; ++a_j)
+      z += XUnnormalizedProb(a_j, j, src_len, trg_len, alpha);
+    return z;
+  }
+
   static double UnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) {
     return exp(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha);
   }
diff --git a/gi/pf/tied_resampler.h b/gi/pf/tied_resampler.h
index 5a262f9d..6f45fbce 100644
--- a/gi/pf/tied_resampler.h
+++ b/gi/pf/tied_resampler.h
@@ -42,6 +42,10 @@ struct TiedResampler {
     return llh;
   }
 
+  double LogLikelihood() const {
+    return LogLikelihood(discount, strength);
+  }
+
   struct DiscountResampler {
     DiscountResampler(const TiedResampler& m) : m_(m) {}
     const TiedResampler& m_;
@@ -106,6 +110,13 @@ struct BinTiedResampler {
     }
   }
 
+  double LogLikelihood() const {
+    double llh = 0;
+    for (unsigned i = 0; i < resamplers.size(); ++i)
+      llh += resamplers[i].LogLikelihood();
+    return llh;
+  }
+
  private:
   std::vector<TiedResampler<CRP> > resamplers;
 };
author	Chris Dyer <cdyer@cs.cmu.edu>	2012-03-10 14:10:04 -0500
committer	Chris Dyer <cdyer@cs.cmu.edu>	2012-03-10 14:10:04 -0500
commit	280d5aa74b6a41f8f6deb5dd374140b7e3ab2703 (patch)
tree	a6bd63927128a48006a0634fbd4832b0f06db2a7
parent	f06c3f8d9dc2ce66153890809a7fc9b296ee625e (diff)