diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-04-07 16:58:55 +0200 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-04-07 16:58:55 +0200 |
commit | 715245dc7042ac0dca4fea94031d7c6de8058033 (patch) | |
tree | 3a7ff0b88f2e113a08aef663d2487edec0b5f67f /gi/pf/poisson_uniform_word_model.h | |
parent | 89211ab30937672d84a54fac8fa435805499e38d (diff) | |
parent | 6001b81eba37985d2e7dea6e6ebb488b787789a6 (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'gi/pf/poisson_uniform_word_model.h')
-rw-r--r-- | gi/pf/poisson_uniform_word_model.h | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/gi/pf/poisson_uniform_word_model.h b/gi/pf/poisson_uniform_word_model.h new file mode 100644 index 00000000..76204a0e --- /dev/null +++ b/gi/pf/poisson_uniform_word_model.h @@ -0,0 +1,50 @@ +#ifndef _POISSON_UNIFORM_WORD_MODEL_H_ +#define _POISSON_UNIFORM_WORD_MODEL_H_ + +#include <cmath> +#include <vector> +#include "prob.h" +#include "m.h" + +// len ~ Poisson(lambda) +// for (1..len) +// e_i ~ Uniform({Vocabulary}) +struct PoissonUniformWordModel { + explicit PoissonUniformWordModel(const unsigned vocab_size, + const unsigned alphabet_size, + const double mean_len = 5) : + lh(prob_t::One()), + v0(-std::log(vocab_size)), + u0(-std::log(alphabet_size)), + mean_length(mean_len) {} + + void ResampleHyperparameters(MT19937*) {} + + inline prob_t operator()(const std::vector<WordID>& s) const { + prob_t p; + p.logeq(Md::log_poisson(s.size(), mean_length) + s.size() * u0); + //p.logeq(v0); + return p; + } + + inline void Increment(const std::vector<WordID>& w, MT19937*) { + lh *= (*this)(w); + } + + inline void Decrement(const std::vector<WordID>& w, MT19937 *) { + lh /= (*this)(w); + } + + inline prob_t Likelihood() const { return lh; } + + void Summary() const {} + + private: + + prob_t lh; // keeps track of the draws from the base distribution + const double v0; // uniform log prob of generating a word + const double u0; // uniform log prob of generating a letter + const double mean_length; // mean length of a word in the base distribution +}; + +#endif |