diff options
Diffstat (limited to 'gi')
-rw-r--r-- | gi/pf/mh_test.cc | 148 | ||||
-rw-r--r-- | gi/pf/pf_test.cc | 148 |
2 files changed, 296 insertions, 0 deletions
diff --git a/gi/pf/mh_test.cc b/gi/pf/mh_test.cc new file mode 100644 index 00000000..296e7285 --- /dev/null +++ b/gi/pf/mh_test.cc @@ -0,0 +1,148 @@ +#include "ccrp.h" + +#include <vector> +#include <iostream> + +#include "tdict.h" +#include "transliterations.h" + +using namespace std; + +MT19937 rng; + +static bool verbose = false; + +struct Model { + + Model() : bp(), base(0.2, 0.6) , ccrps(5, CCRP<int>(0.8, 0.5)) {} + + double p0(int x) const { + assert(x > 0); + assert(x < 5); + return 1.0/4.0; + } + + double llh() const { + double lh = bp + base.log_crp_prob(); + for (int ctx = 1; ctx < 5; ++ctx) + lh += ccrps[ctx].log_crp_prob(); + return lh; + } + + double prob(int ctx, int x) const { + assert(ctx > 0 && ctx < 5); + return ccrps[ctx].prob(x, base.prob(x, p0(x))); + } + + void increment(int ctx, int x) { + assert(ctx > 0 && ctx < 5); + if (ccrps[ctx].increment(x, base.prob(x, p0(x)), &rng)) { + if (base.increment(x, p0(x), &rng)) { + bp += log(1.0 / 4.0); + } + } + } + + // this is just a biased estimate + double est_base_prob(int x) { + return (x + 1) * x / 40.0; + } + + void increment_is(int ctx, int x) { + assert(ctx > 0 && ctx < 5); + SampleSet<double> ss; + const int PARTICLES = 25; + vector<CCRP<int> > s1s(PARTICLES, CCRP<int>(0.5,0.5)); + vector<CCRP<int> > sbs(PARTICLES, CCRP<int>(0.5,0.5)); + vector<double> sp0s(PARTICLES); + + CCRP<int> s1 = ccrps[ctx]; + CCRP<int> sb = base; + double sp0 = bp; + for (int pp = 0; pp < PARTICLES; ++pp) { + if (pp > 0) { + ccrps[ctx] = s1; + base = sb; + bp = sp0; + } + + double q = 1; + double gamma = 1; + double est_p = est_base_prob(x); + //base.prob(x, p0(x)) + rng.next() * 0.1; + if (ccrps[ctx].increment(x, est_p, &rng, &q)) { + gamma = q * base.prob(x, p0(x)); + q *= est_p; + if (verbose) cerr << "(DP-base draw) "; + double qq = -1; + if (base.increment(x, p0(x), &rng, &qq)) { + if (verbose) cerr << "(G0 draw) "; + bp += log(p0(x)); + qq *= p0(x); + } + } else { gamma = q; } + double w = gamma / q; + if (verbose) + cerr << "gamma=" << gamma << " q=" << q << "\tw=" << w << endl; + ss.add(w); + s1s[pp] = ccrps[ctx]; + sbs[pp] = base; + sp0s[pp] = bp; + } + int ps = rng.SelectSample(ss); + ccrps[ctx] = s1s[ps]; + base = sbs[ps]; + bp = sp0s[ps]; + if (verbose) { + cerr << "SELECTED: " << ps << endl; + static int cc = 0; cc++; if (cc ==10) exit(1); + } + } + + void decrement(int ctx, int x) { + assert(ctx > 0 && ctx < 5); + if (ccrps[ctx].decrement(x, &rng)) { + if (base.decrement(x, &rng)) { + bp -= log(p0(x)); + } + } + } + + double bp; + CCRP<int> base; + vector<CCRP<int> > ccrps; + +}; + +int main(int argc, char** argv) { + if (argc > 1) { verbose = true; } + vector<int> counts(15, 0); + vector<int> tcounts(15, 0); + int points[] = {1,2, 2,2, 3,2, 4,1, 3, 4, 3, 3, 2, 3, 4, 1, 4, 1, 3, 2, 1, 3, 1, 4, 0, 0}; + double tlh = 0; + double tt = 0; + for (int n = 0; n < 1000; ++n) { + if (n % 10 == 0) cerr << '.'; + if ((n+1) % 400 == 0) cerr << " [" << (n+1) << "]\n"; + Model m; + for (int *x = points; *x; x += 2) + m.increment(x[0], x[1]); + + for (int j = 0; j < 24; ++j) { + for (int *x = points; *x; x += 2) { + if (rng.next() < 0.8) { + m.decrement(x[0], x[1]); + m.increment_is(x[0], x[1]); + } + } + } + counts[m.base.num_customers()]++; + tcounts[m.base.num_tables()]++; + tlh += m.llh(); + tt += 1.0; + } + cerr << "mean LLH = " << (tlh / tt) << endl; + for (int i = 0; i < 15; ++i) + cerr << i << ": " << (counts[i] / tt) << "\t" << (tcounts[i] / tt) << endl; +} + diff --git a/gi/pf/pf_test.cc b/gi/pf/pf_test.cc new file mode 100644 index 00000000..296e7285 --- /dev/null +++ b/gi/pf/pf_test.cc @@ -0,0 +1,148 @@ +#include "ccrp.h" + +#include <vector> +#include <iostream> + +#include "tdict.h" +#include "transliterations.h" + +using namespace std; + +MT19937 rng; + +static bool verbose = false; + +struct Model { + + Model() : bp(), base(0.2, 0.6) , ccrps(5, CCRP<int>(0.8, 0.5)) {} + + double p0(int x) const { + assert(x > 0); + assert(x < 5); + return 1.0/4.0; + } + + double llh() const { + double lh = bp + base.log_crp_prob(); + for (int ctx = 1; ctx < 5; ++ctx) + lh += ccrps[ctx].log_crp_prob(); + return lh; + } + + double prob(int ctx, int x) const { + assert(ctx > 0 && ctx < 5); + return ccrps[ctx].prob(x, base.prob(x, p0(x))); + } + + void increment(int ctx, int x) { + assert(ctx > 0 && ctx < 5); + if (ccrps[ctx].increment(x, base.prob(x, p0(x)), &rng)) { + if (base.increment(x, p0(x), &rng)) { + bp += log(1.0 / 4.0); + } + } + } + + // this is just a biased estimate + double est_base_prob(int x) { + return (x + 1) * x / 40.0; + } + + void increment_is(int ctx, int x) { + assert(ctx > 0 && ctx < 5); + SampleSet<double> ss; + const int PARTICLES = 25; + vector<CCRP<int> > s1s(PARTICLES, CCRP<int>(0.5,0.5)); + vector<CCRP<int> > sbs(PARTICLES, CCRP<int>(0.5,0.5)); + vector<double> sp0s(PARTICLES); + + CCRP<int> s1 = ccrps[ctx]; + CCRP<int> sb = base; + double sp0 = bp; + for (int pp = 0; pp < PARTICLES; ++pp) { + if (pp > 0) { + ccrps[ctx] = s1; + base = sb; + bp = sp0; + } + + double q = 1; + double gamma = 1; + double est_p = est_base_prob(x); + //base.prob(x, p0(x)) + rng.next() * 0.1; + if (ccrps[ctx].increment(x, est_p, &rng, &q)) { + gamma = q * base.prob(x, p0(x)); + q *= est_p; + if (verbose) cerr << "(DP-base draw) "; + double qq = -1; + if (base.increment(x, p0(x), &rng, &qq)) { + if (verbose) cerr << "(G0 draw) "; + bp += log(p0(x)); + qq *= p0(x); + } + } else { gamma = q; } + double w = gamma / q; + if (verbose) + cerr << "gamma=" << gamma << " q=" << q << "\tw=" << w << endl; + ss.add(w); + s1s[pp] = ccrps[ctx]; + sbs[pp] = base; + sp0s[pp] = bp; + } + int ps = rng.SelectSample(ss); + ccrps[ctx] = s1s[ps]; + base = sbs[ps]; + bp = sp0s[ps]; + if (verbose) { + cerr << "SELECTED: " << ps << endl; + static int cc = 0; cc++; if (cc ==10) exit(1); + } + } + + void decrement(int ctx, int x) { + assert(ctx > 0 && ctx < 5); + if (ccrps[ctx].decrement(x, &rng)) { + if (base.decrement(x, &rng)) { + bp -= log(p0(x)); + } + } + } + + double bp; + CCRP<int> base; + vector<CCRP<int> > ccrps; + +}; + +int main(int argc, char** argv) { + if (argc > 1) { verbose = true; } + vector<int> counts(15, 0); + vector<int> tcounts(15, 0); + int points[] = {1,2, 2,2, 3,2, 4,1, 3, 4, 3, 3, 2, 3, 4, 1, 4, 1, 3, 2, 1, 3, 1, 4, 0, 0}; + double tlh = 0; + double tt = 0; + for (int n = 0; n < 1000; ++n) { + if (n % 10 == 0) cerr << '.'; + if ((n+1) % 400 == 0) cerr << " [" << (n+1) << "]\n"; + Model m; + for (int *x = points; *x; x += 2) + m.increment(x[0], x[1]); + + for (int j = 0; j < 24; ++j) { + for (int *x = points; *x; x += 2) { + if (rng.next() < 0.8) { + m.decrement(x[0], x[1]); + m.increment_is(x[0], x[1]); + } + } + } + counts[m.base.num_customers()]++; + tcounts[m.base.num_tables()]++; + tlh += m.llh(); + tt += 1.0; + } + cerr << "mean LLH = " << (tlh / tt) << endl; + for (int i = 0; i < 15; ++i) + cerr << i << ": " << (counts[i] / tt) << "\t" << (tcounts[i] / tt) << endl; +} + |