From 1e4724dd169fbb20fc7448cc2cb1ae1bc539560c Mon Sep 17 00:00:00 2001 From: desaicwtf Date: Mon, 12 Jul 2010 14:17:09 +0000 Subject: agreement model git-svn-id: https://ws10smt.googlecode.com/svn/trunk@221 ec762483-ff6d-05da-a07a-a48fb63a330f --- .../prjava/src/phrase/Agree.java | 174 +++++++++++++++++++++ .../prjava/src/phrase/C2F.java | 4 +- 2 files changed, 176 insertions(+), 2 deletions(-) create mode 100644 gi/posterior-regularisation/prjava/src/phrase/Agree.java diff --git a/gi/posterior-regularisation/prjava/src/phrase/Agree.java b/gi/posterior-regularisation/prjava/src/phrase/Agree.java new file mode 100644 index 00000000..091875ce --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/phrase/Agree.java @@ -0,0 +1,174 @@ +package phrase; + +import gnu.trove.TIntArrayList; + +import io.FileUtil; + +import java.io.File; +import java.io.IOException; +import java.io.PrintStream; +import java.util.List; + +import phrase.Corpus.Edge; + +public class Agree { + private PhraseCluster model1; + private C2F model2; + Corpus c; + private int K,n_phrases, n_words, n_contexts, n_positions1,n_positions2; + + /** + * + * @param numCluster + * @param corpus + */ + public Agree(int numCluster, Corpus corpus){ + + model1=new PhraseCluster(numCluster, corpus, 0, 0, 0); + model2=new C2F(numCluster,corpus); + c=corpus; + n_words=c.getNumWords(); + n_phrases=c.getNumPhrases(); + n_contexts=c.getNumContexts(); + n_positions1=c.getNumContextPositions(); + n_positions2=2; + K=numCluster; + + } + + /**@brief test + * + */ + public static void main(String args[]){ + String in="../pdata/canned.con"; + String out="../pdata/posterior.out"; + int numCluster=25; + Corpus corpus = null; + File infile = new File(in); + try { + System.out.println("Reading concordance from " + infile); + corpus = Corpus.readFromFile(FileUtil.reader(infile)); + corpus.printStats(System.out); + } catch (IOException e) { + System.err.println("Failed to open input file: " + infile); + e.printStackTrace(); + System.exit(1); + } + + Agree agree=new Agree(numCluster, corpus); + int iter=20; + double llh=0; + for(int i=0;i contexts = c.getEdgesForContext(context); + + for (int ctx=0; ctx 0; + loglikelihood += edge.getCount() * Math.log(z); + arr.F.l1normalize(p); + + int count = edge.getCount(); + //increment expected count + TIntArrayList phraseToks = edge.getPhrase(); + TIntArrayList contextToks = edge.getContext(); + for(int tag=0;tag