summaryrefslogtreecommitdiff
path: root/report/pyp_clustering/acl09-short/code/pygibbs3.c
diff options
context:
space:
mode:
Diffstat (limited to 'report/pyp_clustering/acl09-short/code/pygibbs3.c')
-rw-r--r--report/pyp_clustering/acl09-short/code/pygibbs3.c198
1 files changed, 0 insertions, 198 deletions
diff --git a/report/pyp_clustering/acl09-short/code/pygibbs3.c b/report/pyp_clustering/acl09-short/code/pygibbs3.c
deleted file mode 100644
index 3c2240a1..00000000
--- a/report/pyp_clustering/acl09-short/code/pygibbs3.c
+++ /dev/null
@@ -1,198 +0,0 @@
-#include <stdio.h>
-#include <math.h>
-
-#define myrand() (double) (((unsigned long) randomMT()) / 4294967296.)
-
-#define W 30114
-#define N 831190
-#define KWMAX 1000
-
-#define NLOOPS 1000
-#define BURNIN 0
-#define SAMPLEFREQ 1
-
-#define ALPHA 0.0 // PYB a
-//#define GAMMA 1000000000.0
-#define GAMMA .01 // Dirichlet over multinomial P0
-
-double BETA; // CRP alpha (PYB b)
-int w[N], z[N]; // words, table assignments
-int typecount[W], typetot; //# of tables of each type, total # tables
-int usedcount[W];
-double ztot[W][KWMAX];
-double k; // total # tables
-int nactive;
-
-void initialise(void);
-void anderson(void);
-void fileread(void);
-
-void initialise(void)
-{
- int i,j;
-
- for (i = 1; i < W; i++) {
- typecount[i] = 0;
- usedcount[i] = 0;
- for (j = 0; j < KWMAX; j++) {
- ztot[i][j] = 0;
- }
- }
-
-}
-
-void anderson(void) //stochastic Anderson-style initialisation
-{
- int i,j, tag;
- double max, totprob, r, runtot;
- double probs[KWMAX];
- int ind, temp;
-
- ztot[w[0]][0] = 1;
- z[0] = 0;
- typecount[w[0]] = 1;
- usedcount[w[0]] = 1;
- k = 1;
- typetot = 1;
-
- for (i = 1; i < N; i++) {
- // printf("%5d\n", w[i]);
- max = 0; tag = 0; totprob = 0;
- for (j = 0; j < usedcount[w[i]]; j++) {
- probs[j] = ztot[w[i]][j] - ALPHA;
- totprob += probs[j];
- }
- probs[usedcount[w[i]]] = (ALPHA*k+BETA)*((double) typecount[w[i]]+GAMMA)/((double) typetot+W*GAMMA);
- totprob += probs[usedcount[w[i]]];
- // printf("%10.6lf\n",totprob);
- r = myrand()*totprob;
- max = probs[0];
- j = 0;
- while (r>max) {
- j++;
- max += probs[j];
- }
- // printf("%5d\n",j);
- z[i] = j;
- ztot[w[i]][j]++;
- if (ztot[w[i]][j]==1) {
- typecount[w[i]]++;
- usedcount[w[i]]++;
- if (usedcount[w[i]]==KWMAX) {
- printf("Maximum number of tables exceeded!!!\n");
- }
- typetot++;
- k++;
- }
- }
-}
-
-void fileread(void)
-{
- int i,j, wt;
- FILE *fileptr;
-
- fileptr = fopen("wsj.dat", "r");
-
- for (i = 1; i < N; i++) {
- fscanf(fileptr, "%d", &wt);
- w[i] = wt-1;
- z[i] = 0;
- }
- printf("Total cases: %10d\n", N);
- fclose(fileptr);
-}
-
-main(int argc, char* argv[])
-{
- int i,j,loop,run;
- int temp,ind, tag;
- double newprob, WBETA;
- double probs[KWMAX];
- double max, totprob, r;
- int sampcount;
- FILE *fileptr;
- char filename[30];
- double score;
-
- if (argc < 2) {
- printf("Please provide a value of b\n");
- exit(0);
- }
- BETA = strtol(argv[1]);
- printf("Basic initialising...\n");
-
- // you can seed with any uint32, but the best are odds in 0..(2^32 - 1)
- seedMT(4157U);
-
- sprintf(filename,"typecountrecordwsjpeak%0.1f.%0.1f.dat",ALPHA,BETA);
- fileptr = fopen(filename, "w");
-
- printf("Reading from file...\n");
- fileread();
-
- printf("Initialising...\n");
- initialise();
- printf("k = %1.0f, typetot = %d\n",k,typetot);
-
- printf("Finding start state...\n");
- anderson();
- printf("Beginning burnin...\n");
- for (loop = 0; loop < NLOOPS; loop++) {
- for (i = 0; i < N; i++) {
- j = z[i];
- ztot[w[i]][j]--;
- if (ztot[w[i]][j] == 0) {
- if (j==usedcount[w[i]]) {
- usedcount[w[i]]--;
- }
- typecount[w[i]]--;
- typetot--;
- k--;
- }
- max = 0; tag = 0; totprob = 0;
- for (j = 0; j <= usedcount[w[i]]; j++) {
- if (ztot[w[i]][j] > 0) {
- probs[j] = ztot[w[i]][j] - ALPHA;
- } else {
- probs[j] = 0;
- if (tag == 0) {
- probs[j] = (ALPHA*k+BETA)*(((double) typecount[w[i]])+GAMMA)/(((double) typetot)+((double) W)*GAMMA);
- tag = 1;
- }
- }
- totprob += probs[j];
- }
- r = myrand()*totprob;
- max = probs[0];
- j = 0;
- while (r>max) {
- j++;
- max += probs[j];
- }
- z[i] = j;
- ztot[w[i]][j]++;
- if (ztot[w[i]][j]==1) {
- if (j == usedcount[w[i]]) {
- usedcount[w[i]]++;
- if (usedcount[w[i]]==KWMAX) {
- printf("Maximum number of tables exceeded!!!\n");
- }
- }
- typecount[w[i]]++;
- typetot++;
- k++;
- }
- }
- printf("Completed sample # %5d\n", loop);
- if (k != typetot) printf("k = %1.0f, typetot = %d\n",k,typetot);
- if (loop >= BURNIN && loop % SAMPLEFREQ == 0) {
- for (i = 0; i < W; i++) {
- fprintf(fileptr," %d", typecount[i]); //print (table?) count for each word type
- }
- fprintf(fileptr,"\n");
- }
- }
- fclose(fileptr);
-}
-