package phrase; import gnu.trove.TIntArrayList; import io.FileUtil; import java.io.File; import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import org.apache.commons.math.special.Gamma; import phrase.Corpus.Edge; public class VB { public static int MAX_ITER=400; /**@brief * hyper param for beta * where beta is multinomial * for generating words from a topic */ public double lambda=0.1; /**@brief * hyper param for theta * where theta is dirichlet for z */ public double alpha=0.0001; /**@brief * variational param for beta */ private double rho[][][]; private double digamma_rho[][][]; private double rho_sum[][]; /**@brief * variational param for z */ //private double phi[][]; /**@brief * variational param for theta */ private double gamma[]; private static double VAL_DIFF_RATIO=0.005; private int n_positions; private int n_words; private int K; private ExecutorService pool; private Corpus c; public static void main(String[] args) { // String in="../pdata/canned.con"; String in="../pdata/btec.con"; String out="../pdata/vb.out"; int numCluster=25; Corpus corpus = null; File infile = new File(in); try { System.out.println("Reading concordance from " + infile); corpus = Corpus.readFromFile(FileUtil.reader(infile)); corpus.printStats(System.out); } catch (IOException e) { System.err.println("Failed to open input file: " + infile); e.printStackTrace(); System.exit(1); } VB vb=new VB(numCluster, corpus); int iter=20; for(int i=0;idoc=c.getEdgesForPhrase(d); for(int n=0;n doc=c.getEdgesForPhrase(phraseID); for(int i=0;i 0){ phisum = log_sum(phisum, phi[n][i]); } else{ phisum = phi[n][i]; } }//end of a word for(int i=0;i1e-10){ obj+=phi[n][i]*Math.log(phi[n][i]); } double beta_sum=0; for(int pos=0;pos0 && (obj-prev_val)/Math.abs(obj) doc=c.getEdgesForPhrase(d); double[][] phi = new double[doc.size()][K]; double[] gamma = new double[K]; emObj += inference(d, phi, gamma); for(int n=0;n { double[][] phi; double[] gamma; double obj; int d; PartialEStep(int d) { this.d = d; } public PartialEStep call() { phi = new double[c.getEdgesForPhrase(d).size()][K]; gamma = new double[K]; obj = inference(d, phi, gamma); return this; } } List> jobs = new ArrayList>(); for (int d=0;d job: jobs) { try { PartialEStep e = job.get(); emObj += e.obj; List doc = c.getEdgesForPhrase(e.d); for(int n=0;n doc=c.getEdgesForPhrase(d); double[][] phi = new double[doc.size()][K]; for(int i=0;i