1 files changed, 64 insertions, 6 deletions
diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc
index 2837e33c..0d85536c 100644
--- a/gi/pf/pyp_lm.cc
+++ b/gi/pf/pyp_lm.cc
@@ -50,16 +50,19 @@ template <unsigned N> struct PYPLM;
 
 // uniform base distribution
 template<> struct PYPLM<0> {
-  PYPLM(unsigned vs) : p0(1.0 / vs) {}
-  void increment(WordID w, const vector<WordID>& context, MT19937* rng) const {}
-  void decrement(WordID w, const vector<WordID>& context, MT19937* rng) const {}
+  PYPLM(unsigned vs) : p0(1.0 / vs), draws() {}
+  void increment(WordID w, const vector<WordID>& context, MT19937* rng) { ++draws; }
+  void decrement(WordID w, const vector<WordID>& context, MT19937* rng) { --draws; assert(draws >= 0); }
   double prob(WordID w, const vector<WordID>& context) const { return p0; }
+  void resample_hyperparameters(MT19937* rng, const unsigned nloop, const unsigned niterations) {}
+  double log_likelihood() const { return draws * log(p0); }
   const double p0;
+  int draws;
 };
 
 // represents an N-gram LM
 template <unsigned N> struct PYPLM {
-  PYPLM(unsigned vs) : backoff(vs) {}
+  PYPLM(unsigned vs) : backoff(vs), d(0.8), alpha(1.0) {}
   void increment(WordID w, const vector<WordID>& context, MT19937* rng) {
     const double bo = backoff.prob(w, context);
     static vector<WordID> lookup(N-1);
@@ -67,7 +70,7 @@ template <unsigned N> struct PYPLM {
       lookup[i] = context[context.size() - 1 - i];
     typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::iterator it = p.find(lookup);
     if (it == p.end())
-      it = p.insert(make_pair(lookup, CCRP<WordID>(1,1,1,1))).first;
+      it = p.insert(make_pair(lookup, CCRP<WordID>(d,alpha))).first;
     if (it->second.increment(w, bo, rng))
       backoff.increment(w, context, rng);
   }
@@ -89,7 +92,58 @@ template <unsigned N> struct PYPLM {
     if (it == p.end()) return bo;
     return it->second.prob(w, bo);
   }
+
+  double log_likelihood() const {
+    return log_likelihood(d, alpha) + backoff.log_likelihood();
+  }
+
+  double log_likelihood(const double& dd, const double& aa) const {
+    if (aa <= -dd) return -std::numeric_limits<double>::infinity();
+    double llh = Md::log_beta_density(dd, 1, 1) + Md::log_gamma_density(aa, 1, 1);
+    typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::const_iterator it;
+    for (it = p.begin(); it != p.end(); ++it)
+      llh += it->second.log_crp_prob(dd, aa);
+    return llh;
+  }
+
+  struct DiscountResampler {
+    DiscountResampler(const PYPLM& m) : m_(m) {}
+    const PYPLM& m_;
+    double operator()(const double& proposed_discount) const {
+      return m_.log_likelihood(proposed_discount, m_.alpha);
+    }
+  };
+
+  struct AlphaResampler {
+    AlphaResampler(const PYPLM& m) : m_(m) {}
+    const PYPLM& m_;
+    double operator()(const double& proposed_alpha) const {
+      return m_.log_likelihood(m_.d, proposed_alpha);
+    }
+  };
+
+  void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) {
+    DiscountResampler dr(*this);
+    AlphaResampler ar(*this);
+    for (int iter = 0; iter < nloop; ++iter) {
+      alpha = slice_sampler1d(ar, alpha, *rng, 0.0,
+                              std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
+      d = slice_sampler1d(dr, d, *rng, std::numeric_limits<double>::min(),
+                          1.0, 0.0, niterations, 100*niterations);
+    }
+    alpha = slice_sampler1d(ar, alpha, *rng, 0.0,
+                            std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
+    typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::iterator it;
+    cerr << "PYPLM<" << N << ">(d=" << d << ",a=" << alpha << ") = " << log_likelihood(d, alpha) << endl;
+    for (it = p.begin(); it != p.end(); ++it) {
+      it->second.set_discount(d);
+      it->second.set_alpha(alpha);
+    }
+    backoff.resample_hyperparameters(rng, nloop, niterations);
+  }
+
   PYPLM<N-1> backoff;
+  double d, alpha;
   unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > > p;
 };
 
@@ -109,7 +163,7 @@ int main(int argc, char** argv) {
   cerr << "Reading corpus...\n";
   CorpusTools::ReadFromFile(conf["input"].as<string>(), &corpuse, &vocabe);
   cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n";
-#define kORDER 5
+#define kORDER 3
   PYPLM<kORDER> lm(vocabe.size());
   vector<WordID> ctx(kORDER - 1, TD::Convert("<s>"));
   int mci = corpuse.size() * 99 / 100;
@@ -126,6 +180,10 @@ int main(int argc, char** argv) {
       if (SS > 0) lm.decrement(kEOS, ctx, &rng);
       lm.increment(kEOS, ctx, &rng);
     }
+    if (SS % 10 == 9) {
+      cerr << " [LLH=" << lm.log_likelihood() << "]" << endl;
+      if (SS % 20 == 19) lm.resample_hyperparameters(&rng);
+    } else { cerr << '.' << flush; }
   }
   double llh = 0;
   unsigned cnt = 0;