diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-04-12 18:57:44 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-04-12 18:57:44 -0400 |
commit | c294227a928672bf108eed81106063a194c872ca (patch) | |
tree | 3cc364d092567a3a8670f8f0fb195f31891891f6 /utils/alias_sampler.h | |
parent | 6001b81eba37985d2e7dea6e6ebb488b787789a6 (diff) |
unigram pyp lm added
Diffstat (limited to 'utils/alias_sampler.h')
-rw-r--r-- | utils/alias_sampler.h | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/utils/alias_sampler.h b/utils/alias_sampler.h new file mode 100644 index 00000000..85da9944 --- /dev/null +++ b/utils/alias_sampler.h @@ -0,0 +1,47 @@ +#ifndef _ALIAS_SAMPLER_H_ +#define _ALIAS_SAMPLER_H_ + +#include <vector> +#include <limits> + +// R. A. Kronmal and A. V. Peterson, Jr. (1977) On the alias method for +// generating random variables from a discrete distribution. In The American +// Statistician, Vol. 33, No. 4. Pages 214--218. +// +// Intuition: a multinomial with N outcomes can be rewritten as a uniform +// mixture of N Bernoulli distributions. The ith Bernoulli returns i with +// probability F[i], otherwise it returns an "alias" value L[i]. The +// constructor computes the F's and L's given an arbitrary multionimial p in +// O(n) time and Draw returns samples in O(1) time. +struct AliasSampler { + explicit AliasSampler(const std::vector<double>& p) : + cutoffs_(p.size()), + aliases_(p.size(), std::numeric_limits<unsigned>::max()) { + const unsigned N = p.size(); + std::vector<unsigned> s,g; + for (unsigned i = 0; i < N; ++i) { + const double cutoff = cutoffs_[i] = N * p[i]; + if (cutoff >= 1.0) g.push_back(i); else s.push_back(i); + } + while(!s.empty() && !g.empty()) { + const unsigned k = g.back(); + const unsigned j = s.back(); + aliases_[j] = k; + cutoffs_[k] -= 1.0 - cutoffs_[j]; + s.pop_back(); + if (cutoffs_[k] < 1.0) { + g.pop_back(); + s.push_back(k); + } + } + } + template <typename Uniform01Generator> + unsigned Draw(Uniform01Generator& u01) const { + const unsigned n = u01() * cutoffs_.size(); + if (u01() > cutoffs_[n]) return aliases_[n]; else return n; + } + std::vector<double> cutoffs_; // F + std::vector<unsigned> aliases_; // L +}; + +#endif |