From e26434979adc33bd949566ba7bf02dff64e80a3e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:19:43 -0400 Subject: cdec cleanup, remove bayesian stuff, parsing stuff --- .../prjava/src/phrase/VB.java | 419 --------------------- 1 file changed, 419 deletions(-) delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/VB.java (limited to 'gi/posterior-regularisation/prjava/src/phrase/VB.java') diff --git a/gi/posterior-regularisation/prjava/src/phrase/VB.java b/gi/posterior-regularisation/prjava/src/phrase/VB.java deleted file mode 100644 index cd3f4966..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/VB.java +++ /dev/null @@ -1,419 +0,0 @@ -package phrase; - -import gnu.trove.TIntArrayList; - -import io.FileUtil; - -import java.io.File; -import java.io.IOException; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; - -import org.apache.commons.math.special.Gamma; - -import phrase.Corpus.Edge; - -public class VB { - - public static int MAX_ITER=400; - - /**@brief - * hyper param for beta - * where beta is multinomial - * for generating words from a topic - */ - public double lambda=0.1; - /**@brief - * hyper param for theta - * where theta is dirichlet for z - */ - public double alpha=0.0001; - /**@brief - * variational param for beta - */ - private double rho[][][]; - private double digamma_rho[][][]; - private double rho_sum[][]; - /**@brief - * variational param for z - */ - //private double phi[][]; - /**@brief - * variational param for theta - */ - private double gamma[]; - private static double VAL_DIFF_RATIO=0.005; - - private int n_positions; - private int n_words; - private int K; - private ExecutorService pool; - - private Corpus c; - public static void main(String[] args) { - // String in="../pdata/canned.con"; - String in="../pdata/btec.con"; - String out="../pdata/vb.out"; - int numCluster=25; - Corpus corpus = null; - File infile = new File(in); - try { - System.out.println("Reading concordance from " + infile); - corpus = Corpus.readFromFile(FileUtil.reader(infile)); - corpus.printStats(System.out); - } catch (IOException e) { - System.err.println("Failed to open input file: " + infile); - e.printStackTrace(); - System.exit(1); - } - - VB vb=new VB(numCluster, corpus); - int iter=20; - for(int i=0;idoc=c.getEdgesForPhrase(d); - for(int n=0;n doc=c.getEdgesForPhrase(phraseID); - for(int i=0;i 0){ - phisum = log_sum(phisum, phi[n][i]); - } - else{ - phisum = phi[n][i]; - } - - }//end of a word - - for(int i=0;i1e-10){ - obj+=phi[n][i]*Math.log(phi[n][i]); - } - - double beta_sum=0; - for(int pos=0;pos0 && (obj-prev_val)/Math.abs(obj) doc=c.getEdgesForPhrase(d); - double[][] phi = new double[doc.size()][K]; - double[] gamma = new double[K]; - - emObj += inference(d, phi, gamma); - - for(int n=0;n - { - double[][] phi; - double[] gamma; - double obj; - int d; - PartialEStep(int d) { this.d = d; } - - public PartialEStep call() - { - phi = new double[c.getEdgesForPhrase(d).size()][K]; - gamma = new double[K]; - obj = inference(d, phi, gamma); - return this; - } - } - - List> jobs = new ArrayList>(); - for (int d=0;d job: jobs) - { - try { - PartialEStep e = job.get(); - - emObj += e.obj; - List doc = c.getEdgesForPhrase(e.d); - for(int n=0;n doc=c.getEdgesForPhrase(d); - double[][] phi = new double[doc.size()][K]; - for(int i=0;i