From e26434979adc33bd949566ba7bf02dff64e80a3e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:19:43 -0400 Subject: cdec cleanup, remove bayesian stuff, parsing stuff --- .../prjava/src/phrase/C2F.java | 216 --------------------- 1 file changed, 216 deletions(-) delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/C2F.java (limited to 'gi/posterior-regularisation/prjava/src/phrase/C2F.java') diff --git a/gi/posterior-regularisation/prjava/src/phrase/C2F.java b/gi/posterior-regularisation/prjava/src/phrase/C2F.java deleted file mode 100644 index e8783950..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/C2F.java +++ /dev/null @@ -1,216 +0,0 @@ -package phrase; - -import gnu.trove.TIntArrayList; - -import io.FileUtil; - -import java.io.File; -import java.io.IOException; -import java.io.PrintStream; -import java.util.Arrays; -import java.util.List; - -import phrase.Corpus.Edge; - -/** - * @brief context generates phrase - * @author desaic - * - */ -public class C2F { - public int K; - private int n_words, n_contexts, n_positions; - public Corpus c; - - /**@brief - * emit[tag][position][word] = p(word | tag, position in phrase) - */ - public double emit[][][]; - /**@brief - * pi[context][tag] = p(tag | context) - */ - public double pi[][]; - - public C2F(int numCluster, Corpus corpus){ - K=numCluster; - c=corpus; - n_words=c.getNumWords(); - n_contexts=c.getNumContexts(); - - //number of words in a phrase to be considered - //currently the first and last word in source and target - //if the phrase has length 1 in either dimension then - //we use the same word for two positions - n_positions=c.phraseEdges(c.getEdges().get(0).getPhrase()).size(); - - emit=new double [K][n_positions][n_words]; - pi=new double[n_contexts][K]; - - for(double [][]i:emit){ - for(double []j:i){ - arr.F.randomise(j); - } - } - - for(double []j:pi){ - arr.F.randomise(j); - } - } - - /**@brief test - * - */ - public static void main(String args[]){ - String in="../pdata/canned.con"; - String out="../pdata/posterior.out"; - int numCluster=25; - Corpus corpus = null; - File infile = new File(in); - try { - System.out.println("Reading concordance from " + infile); - corpus = Corpus.readFromFile(FileUtil.reader(infile)); - corpus.printStats(System.out); - } catch (IOException e) { - System.err.println("Failed to open input file: " + infile); - e.printStackTrace(); - System.exit(1); - } - - C2F c2f=new C2F(numCluster,corpus); - int iter=20; - double llh=0; - for(int i=0;i contexts = c.getEdgesForContext(context); - - for (int ctx=0; ctx 0; - loglikelihood += edge.getCount() * Math.log(z); - arr.F.l1normalize(p); - - double count = edge.getCount(); - //increment expected count - TIntArrayList phrase= edge.getPhrase(); - for(int tag=0;tag EPS) - ps.print("\t" + j + ": " + pi[i][j]); - } - ps.println(); - } - - ps.println("P(word|tag,position)"); - for (int i = 0; i < K; ++i) - { - for(int position=0;position EPS) - ps.print(c.getWord(word)+"="+emit[i][position][word]+"\t"); - } - ps.println(); - } - ps.println(); - } - - } - -} -- cgit v1.2.3