summaryrefslogtreecommitdiff
path: root/utils/alias_sampler.h
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-04-12 18:57:44 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-04-12 18:57:44 -0400
commitc294227a928672bf108eed81106063a194c872ca (patch)
tree3cc364d092567a3a8670f8f0fb195f31891891f6 /utils/alias_sampler.h
parent6001b81eba37985d2e7dea6e6ebb488b787789a6 (diff)
unigram pyp lm added
Diffstat (limited to 'utils/alias_sampler.h')
-rw-r--r--utils/alias_sampler.h47
1 files changed, 47 insertions, 0 deletions
diff --git a/utils/alias_sampler.h b/utils/alias_sampler.h
new file mode 100644
index 00000000..85da9944
--- /dev/null
+++ b/utils/alias_sampler.h
@@ -0,0 +1,47 @@
+#ifndef _ALIAS_SAMPLER_H_
+#define _ALIAS_SAMPLER_H_
+
+#include <vector>
+#include <limits>
+
+// R. A. Kronmal and A. V. Peterson, Jr. (1977) On the alias method for
+// generating random variables from a discrete distribution. In The American
+// Statistician, Vol. 33, No. 4. Pages 214--218.
+//
+// Intuition: a multinomial with N outcomes can be rewritten as a uniform
+// mixture of N Bernoulli distributions. The ith Bernoulli returns i with
+// probability F[i], otherwise it returns an "alias" value L[i]. The
+// constructor computes the F's and L's given an arbitrary multionimial p in
+// O(n) time and Draw returns samples in O(1) time.
+struct AliasSampler {
+ explicit AliasSampler(const std::vector<double>& p) :
+ cutoffs_(p.size()),
+ aliases_(p.size(), std::numeric_limits<unsigned>::max()) {
+ const unsigned N = p.size();
+ std::vector<unsigned> s,g;
+ for (unsigned i = 0; i < N; ++i) {
+ const double cutoff = cutoffs_[i] = N * p[i];
+ if (cutoff >= 1.0) g.push_back(i); else s.push_back(i);
+ }
+ while(!s.empty() && !g.empty()) {
+ const unsigned k = g.back();
+ const unsigned j = s.back();
+ aliases_[j] = k;
+ cutoffs_[k] -= 1.0 - cutoffs_[j];
+ s.pop_back();
+ if (cutoffs_[k] < 1.0) {
+ g.pop_back();
+ s.push_back(k);
+ }
+ }
+ }
+ template <typename Uniform01Generator>
+ unsigned Draw(Uniform01Generator& u01) const {
+ const unsigned n = u01() * cutoffs_.size();
+ if (u01() > cutoffs_[n]) return aliases_[n]; else return n;
+ }
+ std::vector<double> cutoffs_; // F
+ std::vector<unsigned> aliases_; // L
+};
+
+#endif