From efd4b6bb374a66eb4db07d9b2b6cef1dc4b6f5b0 Mon Sep 17 00:00:00 2001 From: desaicwtf Date: Fri, 16 Jul 2010 22:07:11 +0000 Subject: agreement between source and target side git-svn-id: https://ws10smt.googlecode.com/svn/trunk@300 ec762483-ff6d-05da-a07a-a48fb63a330f --- .../prjava/src/phrase/Agree2Sides.java | 196 +++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java (limited to 'gi/posterior-regularisation/prjava/src') diff --git a/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java b/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java new file mode 100644 index 00000000..7bc28a40 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java @@ -0,0 +1,196 @@ +package phrase; + +import gnu.trove.TIntArrayList; + +import io.FileUtil; + +import java.io.File; +import java.io.IOException; +import java.io.PrintStream; +import java.util.List; + +import phrase.Corpus.Edge; + +public class Agree2Sides { + PhraseCluster model1,model2; + Corpus c1,c2; + private int K; + + /**@brief sum of loglikelihood of two + * individual models + */ + public double llh; + /**@brief Bhattacharyya distance + * + */ + public double bdist; + /** + * + * @param numCluster + * @param corpus + */ + public Agree2Sides(int numCluster, Corpus corpus1 , Corpus corpus2 ){ + + model1=new PhraseCluster(numCluster, corpus1); + model2=new PhraseCluster(numCluster,corpus2); + c1=corpus1; + c2=corpus2; + K=numCluster; + + } + + /**@brief test + * + */ + public static void main(String args[]){ + //String in="../pdata/canned.con"; + // String in="../pdata/btec.con"; + String in1="../pdata/source.txt"; + String in2="../pdata/target.txt"; + String out="../pdata/posterior.out"; + int numCluster=25; + Corpus corpus1 = null,corpus2=null; + File infile1 = new File(in1),infile2=new File(in2); + try { + System.out.println("Reading concordance from " + infile1); + corpus1 = Corpus.readFromFile(FileUtil.reader(infile1)); + System.out.println("Reading concordance from " + infile2); + corpus2 = Corpus.readFromFile(FileUtil.reader(infile2)); + corpus1.printStats(System.out); + } catch (IOException e) { + System.err.println("Failed to open input file: " + infile1); + e.printStackTrace(); + System.exit(1); + } + + Agree2Sides agree=new Agree2Sides(numCluster, corpus1,corpus2); + int iter=20; + for(int i=0;i 0; + bdist += edge1.getCount() * Math.log(z); + arr.F.l1normalize(p); + int count = edge1.getCount(); + //increment expected count + TIntArrayList contextToks1 = edge1.getContext(); + TIntArrayList contextToks2 = edge2.getContext(); + int phrase1=edge1.getPhraseId(); + int phrase2=edge2.getPhraseId(); + for(int tag=0;tag