package phrase; import gnu.trove.TIntArrayList; import io.FileUtil; import java.io.File; import java.io.IOException; import java.io.PrintStream; import java.util.Arrays; import java.util.List; import phrase.Corpus.Edge; /** * @brief context generates phrase * @author desaic * */ public class C2F { public int K; private int n_words, n_contexts, n_positions; public Corpus c; /**@brief * emit[tag][position][word] = p(word | tag, position in phrase) */ public double emit[][][]; /**@brief * pi[context][tag] = p(tag | context) */ public double pi[][]; public C2F(int numCluster, Corpus corpus){ K=numCluster; c=corpus; n_words=c.getNumWords(); n_contexts=c.getNumContexts(); //number of words in a phrase to be considered //currently the first and last word in source and target //if the phrase has length 1 in either dimension then //we use the same word for two positions n_positions=c.phraseEdges(c.getEdges().get(0).getPhrase()).size(); emit=new double [K][n_positions][n_words]; pi=new double[n_contexts][K]; for(double [][]i:emit){ for(double []j:i){ arr.F.randomise(j); } } for(double []j:pi){ arr.F.randomise(j); } } /**@brief test * */ public static void main(String args[]){ String in="../pdata/canned.con"; String out="../pdata/posterior.out"; int numCluster=25; Corpus corpus = null; File infile = new File(in); try { System.out.println("Reading concordance from " + infile); corpus = Corpus.readFromFile(FileUtil.reader(infile)); corpus.printStats(System.out); } catch (IOException e) { System.err.println("Failed to open input file: " + infile); e.printStackTrace(); System.exit(1); } C2F c2f=new C2F(numCluster,corpus); int iter=20; double llh=0; for(int i=0;i contexts = c.getEdgesForContext(context); for (int ctx=0; ctx 0; loglikelihood += edge.getCount() * Math.log(z); arr.F.l1normalize(p); double count = edge.getCount(); //increment expected count TIntArrayList phrase= edge.getPhrase(); for(int tag=0;tag EPS) ps.print("\t" + j + ": " + pi[i][j]); } ps.println(); } ps.println("P(word|tag,position)"); for (int i = 0; i < K; ++i) { for(int position=0;position EPS) ps.print(c.getWord(word)+"="+emit[i][position][word]+"\t"); } ps.println(); } ps.println(); } } }