package phrase; import gnu.trove.TIntArrayList; import io.FileUtil; import java.io.File; import java.io.IOException; import java.io.PrintStream; import java.util.Arrays; import java.util.List; import org.apache.commons.math.special.Gamma; import phrase.Corpus.Edge; public class VB { public static int MAX_ITER=400; /**@brief * hyper param for beta * where beta is multinomial * for generating words from a topic */ public double lambda=0.1; /**@brief * hyper param for theta * where theta is dirichlet for z */ public double alpha=0.0001; /**@brief * variational param for beta */ private double rho[][][]; private double digamma_rho[][][]; private double rho_sum[][]; /**@brief * variational param for z */ private double phi[][]; /**@brief * variational param for theta */ private double gamma[]; private static double VAL_DIFF_RATIO=0.005; /**@brief * objective for a single document */ private double obj; private int n_positions; private int n_words; private int K; private Corpus c; public static void main(String[] args) { // String in="../pdata/canned.con"; String in="../pdata/btec.con"; String out="../pdata/vb.out"; int numCluster=25; Corpus corpus = null; File infile = new File(in); try { System.out.println("Reading concordance from " + infile); corpus = Corpus.readFromFile(FileUtil.reader(infile)); corpus.printStats(System.out); } catch (IOException e) { System.err.println("Failed to open input file: " + infile); e.printStackTrace(); System.exit(1); } VB vb=new VB(numCluster, corpus); int iter=20; for(int i=0;idoc=c.getEdgesForPhrase(d); for(int n=0;n doc=c.getEdgesForPhrase(phraseID); phi=new double[doc.size()][K]; for(int i=0;i 0){ phisum = log_sum(phisum, phi[n][i]); } else{ phisum = phi[n][i]; } }//end of a word for(int i=0;i1e-10){ obj+=phi[n][i]*Math.log(phi[n][i]); } double beta_sum=0; for(int pos=0;pos0 && (obj-prev_val)/Math.abs(obj)doc=c.getEdgesForPhrase(d); for(int n=0;n doc=c.getEdgesForPhrase(d); for(int n=0;n