From e26434979adc33bd949566ba7bf02dff64e80a3e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:19:43 -0400 Subject: cdec cleanup, remove bayesian stuff, parsing stuff --- gi/pf/mh_test.cc | 148 ------------------------------------------------------- 1 file changed, 148 deletions(-) delete mode 100644 gi/pf/mh_test.cc (limited to 'gi/pf/mh_test.cc') diff --git a/gi/pf/mh_test.cc b/gi/pf/mh_test.cc deleted file mode 100644 index 296e7285..00000000 --- a/gi/pf/mh_test.cc +++ /dev/null @@ -1,148 +0,0 @@ -#include "ccrp.h" - -#include -#include - -#include "tdict.h" -#include "transliterations.h" - -using namespace std; - -MT19937 rng; - -static bool verbose = false; - -struct Model { - - Model() : bp(), base(0.2, 0.6) , ccrps(5, CCRP(0.8, 0.5)) {} - - double p0(int x) const { - assert(x > 0); - assert(x < 5); - return 1.0/4.0; - } - - double llh() const { - double lh = bp + base.log_crp_prob(); - for (int ctx = 1; ctx < 5; ++ctx) - lh += ccrps[ctx].log_crp_prob(); - return lh; - } - - double prob(int ctx, int x) const { - assert(ctx > 0 && ctx < 5); - return ccrps[ctx].prob(x, base.prob(x, p0(x))); - } - - void increment(int ctx, int x) { - assert(ctx > 0 && ctx < 5); - if (ccrps[ctx].increment(x, base.prob(x, p0(x)), &rng)) { - if (base.increment(x, p0(x), &rng)) { - bp += log(1.0 / 4.0); - } - } - } - - // this is just a biased estimate - double est_base_prob(int x) { - return (x + 1) * x / 40.0; - } - - void increment_is(int ctx, int x) { - assert(ctx > 0 && ctx < 5); - SampleSet ss; - const int PARTICLES = 25; - vector > s1s(PARTICLES, CCRP(0.5,0.5)); - vector > sbs(PARTICLES, CCRP(0.5,0.5)); - vector sp0s(PARTICLES); - - CCRP s1 = ccrps[ctx]; - CCRP sb = base; - double sp0 = bp; - for (int pp = 0; pp < PARTICLES; ++pp) { - if (pp > 0) { - ccrps[ctx] = s1; - base = sb; - bp = sp0; - } - - double q = 1; - double gamma = 1; - double est_p = est_base_prob(x); - //base.prob(x, p0(x)) + rng.next() * 0.1; - if (ccrps[ctx].increment(x, est_p, &rng, &q)) { - gamma = q * base.prob(x, p0(x)); - q *= est_p; - if (verbose) cerr << "(DP-base draw) "; - double qq = -1; - if (base.increment(x, p0(x), &rng, &qq)) { - if (verbose) cerr << "(G0 draw) "; - bp += log(p0(x)); - qq *= p0(x); - } - } else { gamma = q; } - double w = gamma / q; - if (verbose) - cerr << "gamma=" << gamma << " q=" << q << "\tw=" << w << endl; - ss.add(w); - s1s[pp] = ccrps[ctx]; - sbs[pp] = base; - sp0s[pp] = bp; - } - int ps = rng.SelectSample(ss); - ccrps[ctx] = s1s[ps]; - base = sbs[ps]; - bp = sp0s[ps]; - if (verbose) { - cerr << "SELECTED: " << ps << endl; - static int cc = 0; cc++; if (cc ==10) exit(1); - } - } - - void decrement(int ctx, int x) { - assert(ctx > 0 && ctx < 5); - if (ccrps[ctx].decrement(x, &rng)) { - if (base.decrement(x, &rng)) { - bp -= log(p0(x)); - } - } - } - - double bp; - CCRP base; - vector > ccrps; - -}; - -int main(int argc, char** argv) { - if (argc > 1) { verbose = true; } - vector counts(15, 0); - vector tcounts(15, 0); - int points[] = {1,2, 2,2, 3,2, 4,1, 3, 4, 3, 3, 2, 3, 4, 1, 4, 1, 3, 2, 1, 3, 1, 4, 0, 0}; - double tlh = 0; - double tt = 0; - for (int n = 0; n < 1000; ++n) { - if (n % 10 == 0) cerr << '.'; - if ((n+1) % 400 == 0) cerr << " [" << (n+1) << "]\n"; - Model m; - for (int *x = points; *x; x += 2) - m.increment(x[0], x[1]); - - for (int j = 0; j < 24; ++j) { - for (int *x = points; *x; x += 2) { - if (rng.next() < 0.8) { - m.decrement(x[0], x[1]); - m.increment_is(x[0], x[1]); - } - } - } - counts[m.base.num_customers()]++; - tcounts[m.base.num_tables()]++; - tlh += m.llh(); - tt += 1.0; - } - cerr << "mean LLH = " << (tlh / tt) << endl; - for (int i = 0; i < 15; ++i) - cerr << i << ": " << (counts[i] / tt) << "\t" << (tcounts[i] / tt) << endl; -} - -- cgit v1.2.3