From 4a129e055387baf922f30f1502c6b6efad7dd8eb Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 15 Mar 2012 22:47:04 -0400 Subject: bayes bayes bayes --- gi/pf/poisson_uniform_word_model.h | 50 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 gi/pf/poisson_uniform_word_model.h (limited to 'gi/pf/poisson_uniform_word_model.h') diff --git a/gi/pf/poisson_uniform_word_model.h b/gi/pf/poisson_uniform_word_model.h new file mode 100644 index 00000000..76204a0e --- /dev/null +++ b/gi/pf/poisson_uniform_word_model.h @@ -0,0 +1,50 @@ +#ifndef _POISSON_UNIFORM_WORD_MODEL_H_ +#define _POISSON_UNIFORM_WORD_MODEL_H_ + +#include +#include +#include "prob.h" +#include "m.h" + +// len ~ Poisson(lambda) +// for (1..len) +// e_i ~ Uniform({Vocabulary}) +struct PoissonUniformWordModel { + explicit PoissonUniformWordModel(const unsigned vocab_size, + const unsigned alphabet_size, + const double mean_len = 5) : + lh(prob_t::One()), + v0(-std::log(vocab_size)), + u0(-std::log(alphabet_size)), + mean_length(mean_len) {} + + void ResampleHyperparameters(MT19937*) {} + + inline prob_t operator()(const std::vector& s) const { + prob_t p; + p.logeq(Md::log_poisson(s.size(), mean_length) + s.size() * u0); + //p.logeq(v0); + return p; + } + + inline void Increment(const std::vector& w, MT19937*) { + lh *= (*this)(w); + } + + inline void Decrement(const std::vector& w, MT19937 *) { + lh /= (*this)(w); + } + + inline prob_t Likelihood() const { return lh; } + + void Summary() const {} + + private: + + prob_t lh; // keeps track of the draws from the base distribution + const double v0; // uniform log prob of generating a word + const double u0; // uniform log prob of generating a letter + const double mean_length; // mean length of a word in the base distribution +}; + +#endif -- cgit v1.2.3