Merge remote branch 'upstream/master'

Conflicts: Jamroot bjam decoder/Jamfile decoder/cdec.cc dpmert/Jamfile jam-files/sanity.jam klm/lm/Jamfile klm/util/Jamfile mira/Jamfile
author: Kenneth Heafield <github@kheafield.com> 2012-10-22 12:07:20 +0100
committer: Kenneth Heafield <github@kheafield.com> 2012-10-22 12:07:20 +0100
commit: 5f98fe5c4f2a2090eeb9d30c030305a70a8347d1 (patch)
tree: 9b6002f850e6dea1e3400c6b19bb31a9cdf3067f /gi/posterior-regularisation/prjava/src/phrase
parent: cf9994131993b40be62e90e213b1e11e6b550143 (diff)
parent: 21825a09d97c2e0afd20512f306fb25fed55e529 (diff)
11 files changed, 0 insertions, 3008 deletions
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Agree.java b/gi/posterior-regularisation/prjava/src/phrase/Agree.java
deleted file mode 100644
index 8f7b499e..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Agree.java
+++ /dev/null
@@ -1,204 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.List;
-
-import phrase.Corpus.Edge;
-
-public class Agree {
-	PhraseCluster model1;
-	C2F model2;
-	Corpus c;
-	private int K,n_phrases, n_words, n_contexts, n_positions1,n_positions2;
-	
-	/**@brief sum of loglikelihood of two
-	 * individual models
-	 */
-	public double llh;
-	/**@brief Bhattacharyya distance
-	 * 
-	 */
-	public double bdist; 
-	/**
-	 * 
-	 * @param numCluster
-	 * @param corpus
-	 */
-	public Agree(int numCluster, Corpus corpus){
-		
-		model1=new PhraseCluster(numCluster, corpus);
-		model2=new C2F(numCluster,corpus);
-		c=corpus;
-		n_words=c.getNumWords();
-		n_phrases=c.getNumPhrases();
-		n_contexts=c.getNumContexts();
-		n_positions1=c.getNumContextPositions();
-		n_positions2=2;
-		K=numCluster;
-		
-	}
-	
-	/**@brief test
-	 * 
-	 */
-	public static void main(String args[]){
-		//String in="../pdata/canned.con";
-		String in="../pdata/btec.con";
-		String out="../pdata/posterior.out";
-		int numCluster=25;
-		Corpus corpus = null;
-		File infile = new File(in);
-		try {
-			System.out.println("Reading concordance from " + infile);
-			corpus = Corpus.readFromFile(FileUtil.reader(infile));
-			corpus.printStats(System.out);
-		} catch (IOException e) {
-			System.err.println("Failed to open input file: " + infile);
-			e.printStackTrace();
-			System.exit(1);
-		}
-		
-		Agree agree=new Agree(numCluster, corpus);
-		int iter=20;
-		for(int i=0;i<iter;i++){
-			agree.EM();
-			System.out.println("Iter"+i+", llh: "+agree.llh+
-					", divergence:"+agree.bdist+
-							" sum: "+(agree.llh+agree.bdist));
-		}
-		
-		File outfile = new File (out);
-		try {
-			PrintStream ps = FileUtil.printstream(outfile);
-			agree.displayPosterior(ps);
-		//	ps.println();
-		//	c2f.displayModelParam(ps);
-			ps.close();
-		} catch (IOException e) {
-			System.err.println("Failed to open output file: " + outfile);
-			e.printStackTrace();
-			System.exit(1);
-		}
-		
-	}
-	
-	public double EM(){
-		
-		double [][][]exp_emit1=new double [K][n_positions1][n_words];
-		double [][]exp_pi1=new double[n_phrases][K];
-		
-		double [][][]exp_emit2=new double [K][n_positions2][n_words];
-		double [][]exp_pi2=new double[n_contexts][K];
-		
-		llh=0;
-		bdist=0;
-		//E
-		for(int context=0; context< n_contexts; context++){
-			
-			List<Edge> contexts = c.getEdgesForContext(context);
-
-			for (int ctx=0; ctx<contexts.size(); ctx++){
-				Edge edge = contexts.get(ctx);
-				int phrase=edge.getPhraseId();
-				double p[]=posterior(edge);
-				double z = arr.F.l1norm(p);
-				assert z > 0;
-				bdist += edge.getCount() * Math.log(z);
-				arr.F.l1normalize(p);
-				
-				double count = edge.getCount();
-				//increment expected count
-				TIntArrayList phraseToks = edge.getPhrase();
-				TIntArrayList contextToks = edge.getContext();
-				for(int tag=0;tag<K;tag++){
-
-					for(int position=0;position<n_positions1;position++){
-						exp_emit1[tag][position][contextToks.get(position)]+=p[tag]*count;
-					}
-					
-					exp_emit2[tag][0][phraseToks.get(0)]+=p[tag]*count;
-					exp_emit2[tag][1][phraseToks.get(phraseToks.size()-1)]+=p[tag]*count;
-					
-					exp_pi1[phrase][tag]+=p[tag]*count;
-					exp_pi2[context][tag]+=p[tag]*count;
-				}
-			}
-		}
-		
-		//System.out.println("Log likelihood: "+loglikelihood);
-		
-		//M
-		for(double [][]i:exp_emit1){
-			for(double []j:i){
-				arr.F.l1normalize(j);
-			}
-		}
-		
-		for(double []j:exp_pi1){
-			arr.F.l1normalize(j);
-		}
-		
-		for(double [][]i:exp_emit2){
-			for(double []j:i){
-				arr.F.l1normalize(j);
-			}
-		}
-		
-		for(double []j:exp_pi2){
-			arr.F.l1normalize(j);
-		}
-		
-		model1.emit=exp_emit1;
-		model1.pi=exp_pi1;
-		model2.emit=exp_emit2;
-		model2.pi=exp_pi2;
-		
-		return llh;
-	}
-
-	public double[] posterior(Corpus.Edge edge) 
-	{
-		double[] prob1=model1.posterior(edge);
-		double[] prob2=model2.posterior(edge);
-		
-		llh+=edge.getCount()*Math.log(arr.F.l1norm(prob1));
-		llh+=edge.getCount()*Math.log(arr.F.l1norm(prob2));
-		arr.F.l1normalize(prob1);
-		arr.F.l1normalize(prob2);
-		
-		for(int i=0;i<prob1.length;i++){
-			prob1[i]*=prob2[i];
-			prob1[i]=Math.sqrt(prob1[i]);
-		}
-		
-		return prob1;
-	}
-	
-	public void displayPosterior(PrintStream ps)
-	{	
-		displayPosterior(ps, c.getEdges());
-	}
-	
-	public void displayPosterior(PrintStream ps, List<Edge> test)
-	{	
-		for (Edge edge : test)
-		{
-			double probs[] = posterior(edge);
-			arr.F.l1normalize(probs);
-
-			// emit phrase
-			ps.print(edge.getPhraseString());
-			ps.print("\t");
-			ps.print(edge.getContextString(true));
-			int t=arr.F.argmax(probs);
-			ps.println(" ||| C=" + t);
-		}
-	}
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java b/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java
deleted file mode 100644
index 031f887f..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java
+++ /dev/null
@@ -1,197 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.List;
-
-import phrase.Corpus.Edge;
-
-public class Agree2Sides {
-	PhraseCluster model1,model2;
-	Corpus c1,c2;
-	private int K;
-	
-	/**@brief sum of loglikelihood of two
-	 * individual models
-	 */
-	public double llh;
-	/**@brief Bhattacharyya distance
-	 * 
-	 */
-	public double bdist; 
-	/**
-	 * 
-	 * @param numCluster
-	 * @param corpus
-	 */
-	public Agree2Sides(int numCluster, Corpus corpus1 , Corpus corpus2 ){
-		
-		model1=new PhraseCluster(numCluster, corpus1);
-		model2=new PhraseCluster(numCluster,corpus2);
-		c1=corpus1;
-		c2=corpus2;
-		K=numCluster;
-		
-	}
-	
-	/**@brief test
-	 * 
-	 */
-	public static void main(String args[]){
-		//String in="../pdata/canned.con";
-	//	String in="../pdata/btec.con";
-		String in1="../pdata/source.txt";
-		String in2="../pdata/target.txt";
-		String out="../pdata/posterior.out";
-		int numCluster=25;
-		Corpus corpus1 = null,corpus2=null;
-		File infile1 = new File(in1),infile2=new File(in2);
-		try {
-			System.out.println("Reading concordance from " + infile1);
-			corpus1 = Corpus.readFromFile(FileUtil.reader(infile1));
-			System.out.println("Reading concordance from " + infile2);
-			corpus2 = Corpus.readFromFile(FileUtil.reader(infile2));
-			corpus1.printStats(System.out);
-		} catch (IOException e) {
-			System.err.println("Failed to open input file: " + infile1);
-			e.printStackTrace();
-			System.exit(1);
-		}
-		
-		Agree2Sides agree=new Agree2Sides(numCluster, corpus1,corpus2);
-		int iter=20;
-		for(int i=0;i<iter;i++){
-			agree.EM();
-			System.out.println("Iter"+i+", llh: "+agree.llh+
-					", divergence:"+agree.bdist+
-							" sum: "+(agree.llh+agree.bdist));
-		}
-		
-		File outfile = new File (out);
-		try {
-			PrintStream ps = FileUtil.printstream(outfile);
-			agree.displayPosterior(ps);
-		//	ps.println();
-		//	c2f.displayModelParam(ps);
-			ps.close();
-		} catch (IOException e) {
-			System.err.println("Failed to open output file: " + outfile);
-			e.printStackTrace();
-			System.exit(1);
-		}
-		
-	}
-	
-	public double EM(){
-		
-		double [][][]exp_emit1=new double [K][c1.getNumContextPositions()][c1.getNumWords()];
-		double [][]exp_pi1=new double[c1.getNumPhrases()][K];
-		
-		double [][][]exp_emit2=new double [K][c2.getNumContextPositions()][c2.getNumWords()];
-		double [][]exp_pi2=new double[c2.getNumPhrases()][K];
-		
-		llh=0;
-		bdist=0;
-		//E
-		for(int i=0;i<c1.getEdges().size();i++){
-			Edge edge1=c1.getEdges().get(i);
-			Edge edge2=c2.getEdges().get(i);
-			double p[]=posterior(i);
-			double z = arr.F.l1norm(p);
-			assert z > 0;
-			bdist += edge1.getCount() * Math.log(z);
-			arr.F.l1normalize(p);
-			double count = edge1.getCount();
-				//increment expected count
-			TIntArrayList contextToks1 = edge1.getContext();
-			TIntArrayList contextToks2 = edge2.getContext();
-			int phrase1=edge1.getPhraseId();
-			int phrase2=edge2.getPhraseId();
-			for(int tag=0;tag<K;tag++){
-				for(int position=0;position<c1.getNumContextPositions();position++){
-					exp_emit1[tag][position][contextToks1.get(position)]+=p[tag]*count;
-				}
-				for(int position=0;position<c2.getNumContextPositions();position++){
-					exp_emit2[tag][position][contextToks2.get(position)]+=p[tag]*count;
-				}
-				exp_pi1[phrase1][tag]+=p[tag]*count;
-				exp_pi2[phrase2][tag]+=p[tag]*count;
-			}
-		}
-		
-		//System.out.println("Log likelihood: "+loglikelihood);
-		
-		//M
-		for(double [][]i:exp_emit1){
-			for(double []j:i){
-				arr.F.l1normalize(j);
-			}
-		}
-		
-		for(double []j:exp_pi1){
-			arr.F.l1normalize(j);
-		}
-		
-		for(double [][]i:exp_emit2){
-			for(double []j:i){
-				arr.F.l1normalize(j);
-			}
-		}
-		
-		for(double []j:exp_pi2){
-			arr.F.l1normalize(j);
-		}
-		
-		model1.emit=exp_emit1;
-		model1.pi=exp_pi1;
-		model2.emit=exp_emit2;
-		model2.pi=exp_pi2;
-		
-		return llh;
-	}
-
-	public double[] posterior(int edgeIdx) 
-	{
-		return posterior(c1.getEdges().get(edgeIdx), c2.getEdges().get(edgeIdx));
-	}
-	
-	public double[] posterior(Edge e1, Edge e2) 
-	{
-		double[] prob1=model1.posterior(e1);
-		double[] prob2=model2.posterior(e2);
-		
-		llh+=e1.getCount()*Math.log(arr.F.l1norm(prob1));
-		llh+=e2.getCount()*Math.log(arr.F.l1norm(prob2));
-		arr.F.l1normalize(prob1);
-		arr.F.l1normalize(prob2);
-		
-		for(int i=0;i<prob1.length;i++){
-			prob1[i]*=prob2[i];
-			prob1[i]=Math.sqrt(prob1[i]);
-		}
-		
-		return prob1;
-	}
-	
-	public void displayPosterior(PrintStream ps)
-	{	
-		for (int i=0;i<c1.getEdges().size();i++)
-		{
-			Edge edge=c1.getEdges().get(i);
-			double probs[] = posterior(i);
-			arr.F.l1normalize(probs);
-
-			// emit phrase
-			ps.print(edge.getPhraseString());
-			ps.print("\t");
-			ps.print(edge.getContextString(true));
-			int t=arr.F.argmax(probs);
-			ps.println(" ||| C=" + t);
-		}
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/C2F.java b/gi/posterior-regularisation/prjava/src/phrase/C2F.java
deleted file mode 100644
index e8783950..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/C2F.java
+++ /dev/null
@@ -1,216 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.Arrays;
-import java.util.List;
-
-import phrase.Corpus.Edge;
-
-/**
- * @brief context generates phrase
- * @author desaic
- *
- */
-public class C2F {
-	public int K;
-	private int n_words, n_contexts, n_positions;
-	public Corpus c;
-	
-	/**@brief
-	 *  emit[tag][position][word] = p(word | tag, position in phrase)
-	 */
-	public double emit[][][];
-	/**@brief
-	 *  pi[context][tag] = p(tag | context)
-	 */
-	public double pi[][];
-	
-	public C2F(int numCluster, Corpus corpus){
-		K=numCluster;
-		c=corpus;
-		n_words=c.getNumWords();
-		n_contexts=c.getNumContexts();
-		
-		//number of words in a phrase to be considered
-		//currently the first and last word in source and target
-		//if the phrase has length 1 in either dimension then
-		//we use the same word for two positions
-		n_positions=c.phraseEdges(c.getEdges().get(0).getPhrase()).size();
-		
-		emit=new double [K][n_positions][n_words];
-		pi=new double[n_contexts][K];
-		
-		for(double [][]i:emit){
-			for(double []j:i){
-				arr.F.randomise(j);
-			}
-		}
-		
-		for(double []j:pi){
-			arr.F.randomise(j);
-		}
-	}
-	
-	/**@brief test
-	 * 
-	 */
-	public static void main(String args[]){
-		String in="../pdata/canned.con";
-		String out="../pdata/posterior.out";
-		int numCluster=25;
-		Corpus corpus = null;
-		File infile = new File(in);
-		try {
-			System.out.println("Reading concordance from " + infile);
-			corpus = Corpus.readFromFile(FileUtil.reader(infile));
-			corpus.printStats(System.out);
-		} catch (IOException e) {
-			System.err.println("Failed to open input file: " + infile);
-			e.printStackTrace();
-			System.exit(1);
-		}
-		
-		C2F c2f=new C2F(numCluster,corpus);
-		int iter=20;
-		double llh=0;
-		for(int i=0;i<iter;i++){
-			llh=c2f.EM();
-			System.out.println("Iter"+i+", llh: "+llh);
-		}
-		
-		File outfile = new File (out);
-		try {
-			PrintStream ps = FileUtil.printstream(outfile);
-			c2f.displayPosterior(ps);
-		//	ps.println();
-		//	c2f.displayModelParam(ps);
-			ps.close();
-		} catch (IOException e) {
-			System.err.println("Failed to open output file: " + outfile);
-			e.printStackTrace();
-			System.exit(1);
-		}
-		
-	}
-	
-	public double EM(){
-		double [][][]exp_emit=new double [K][n_positions][n_words];
-		double [][]exp_pi=new double[n_contexts][K];
-		
-		double loglikelihood=0;
-		
-		//E
-		for(int context=0; context< n_contexts; context++){
-			
-			List<Edge> contexts = c.getEdgesForContext(context);
-
-			for (int ctx=0; ctx<contexts.size(); ctx++){
-				Edge edge = contexts.get(ctx);
-				double p[]=posterior(edge);
-				double z = arr.F.l1norm(p);
-				assert z > 0;
-				loglikelihood += edge.getCount() * Math.log(z);
-				arr.F.l1normalize(p);
-				
-				double count = edge.getCount();
-				//increment expected count
-				TIntArrayList phrase= edge.getPhrase();
-				for(int tag=0;tag<K;tag++){
-
-					exp_emit[tag][0][phrase.get(0)]+=p[tag]*count;
-					exp_emit[tag][1][phrase.get(phrase.size()-1)]+=p[tag]*count;
-					
-					exp_pi[context][tag]+=p[tag]*count;
-				}
-			}
-		}
-		
-		//System.out.println("Log likelihood: "+loglikelihood);
-		
-		//M
-		for(double [][]i:exp_emit){
-			for(double []j:i){
-				arr.F.l1normalize(j);
-			}
-		}
-		
-		emit=exp_emit;
-		
-		for(double []j:exp_pi){
-			arr.F.l1normalize(j);
-		}
-		
-		pi=exp_pi;
-		
-		return loglikelihood;
-	}
-
-	public double[] posterior(Corpus.Edge edge) 
-	{
-		double[] prob=Arrays.copyOf(pi[edge.getContextId()], K);
-		
-		TIntArrayList phrase = edge.getPhrase();
-		TIntArrayList offsets = c.phraseEdges(phrase);
-		for(int tag=0;tag<K;tag++)
-		{
-			for (int i=0; i < offsets.size(); ++i)
-				prob[tag]*=emit[tag][i][phrase.get(offsets.get(i))];
-		}
-			
-		return prob;
-	}
-
-	public void displayPosterior(PrintStream ps)
-	{	
-		for (Edge edge : c.getEdges())
-		{
-			double probs[] = posterior(edge);
-			arr.F.l1normalize(probs);
-
-			// emit phrase
-			ps.print(edge.getPhraseString());
-			ps.print("\t");
-			ps.print(edge.getContextString(true));
-			int t=arr.F.argmax(probs);
-			ps.println(" ||| C=" + t);
-		}
-	}
-	
-	public void displayModelParam(PrintStream ps)
-	{
-		final double EPS = 1e-6;
-		
-		ps.println("P(tag|context)");
-		for (int i = 0; i < n_contexts; ++i)
-		{
-			ps.print(c.getContext(i));
-			for(int j=0;j<pi[i].length;j++){
-				if (pi[i][j] > EPS)
-					ps.print("\t" + j + ": " + pi[i][j]);
-			}
-			ps.println();
-		}
-		
-		ps.println("P(word|tag,position)");
-		for (int i = 0; i < K; ++i)
-		{
-			for(int position=0;position<n_positions;position++){
-				ps.println("tag " + i + " position " + position);
-				for(int word=0;word<emit[i][position].length;word++){
-					if (emit[i][position][word] > EPS)
-						ps.print(c.getWord(word)+"="+emit[i][position][word]+"\t");
-				}
-				ps.println();
-			}
-			ps.println();
-		}
-		
-	}
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Corpus.java b/gi/posterior-regularisation/prjava/src/phrase/Corpus.java
deleted file mode 100644
index 4b1939cd..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Corpus.java
+++ /dev/null
@@ -1,288 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import java.io.*;
-import java.util.*;
-import java.util.regex.Pattern;
-
-
-public class Corpus
-{
-	private Lexicon<String> wordLexicon = new Lexicon<String>();
-	private Lexicon<TIntArrayList> phraseLexicon = new Lexicon<TIntArrayList>();
-	private Lexicon<TIntArrayList> contextLexicon = new Lexicon<TIntArrayList>();
-	private List<Edge> edges = new ArrayList<Edge>();
-	private List<List<Edge>> phraseToContext = new ArrayList<List<Edge>>();
-	private List<List<Edge>> contextToPhrase = new ArrayList<List<Edge>>();
-	public int splitSentinel;
-	public int phraseSentinel;
-	public int rareSentinel;
-
-	public Corpus()
-	{
-		splitSentinel = wordLexicon.insert("<SPLIT>");
-		phraseSentinel = wordLexicon.insert("<PHRASE>");		
-		rareSentinel = wordLexicon.insert("<RARE>");
-	}
-	
-	public class Edge
-	{
-		
-		Edge(int phraseId, int contextId, double count,int tag)
-		{
-			this.phraseId = phraseId;
-			this.contextId = contextId;
-			this.count = count;
-			fixTag=tag;
-		}
-		
-		Edge(int phraseId, int contextId, double count)
-		{
-			this.phraseId = phraseId;
-			this.contextId = contextId;
-			this.count = count;
-			fixTag=-1;
-		}
-		public int getTag(){
-			return fixTag;
-		}
-		
-		public int getPhraseId()
-		{
-			return phraseId;
-		}
-		public TIntArrayList getPhrase()
-		{
-			return Corpus.this.getPhrase(phraseId);
-		}
-		public String getPhraseString()
-		{
-			return Corpus.this.getPhraseString(phraseId);
-		}		
-		public int getContextId()
-		{
-			return contextId;
-		}
-		public TIntArrayList getContext()
-		{
-			return Corpus.this.getContext(contextId);
-		}
-		public String getContextString(boolean insertPhraseSentinel)
-		{
-			return Corpus.this.getContextString(contextId, insertPhraseSentinel);
-		}
-		public double getCount()
-		{
-			return count;
-		}
-		public boolean equals(Object other)
-		{
-			if (other instanceof Edge) 
-			{
-				Edge oe = (Edge) other;
-				return oe.phraseId == phraseId && oe.contextId == contextId; 
-			}
-			else return false;
-		}
-		public int hashCode()
-		{   // this is how boost's hash_combine does it
-			int seed = phraseId;
-			seed ^= contextId + 0x9e3779b9 + (seed << 6) + (seed >> 2);
-			return seed;
-		}
-		public String toString()
-		{
-			return getPhraseString() + "\t" + getContextString(true);
-		}
-		
-		private int phraseId;
-		private int contextId;
-		private double count;
-		private int fixTag;
-	}
-
-	List<Edge> getEdges()
-	{
-		return edges;
-	}
-	
-	int getNumEdges()
-	{
-		return edges.size();
-	}
-
-	int getNumPhrases()
-	{
-		return phraseLexicon.size();
-	}
-	
-	int getNumContextPositions()
-	{
-		return contextLexicon.lookup(0).size();
-	}
-	
-	List<Edge> getEdgesForPhrase(int phraseId)
-	{
-		return phraseToContext.get(phraseId);
-	}
-	
-	int getNumContexts()
-	{
-		return contextLexicon.size();
-	}
-	
-	List<Edge> getEdgesForContext(int contextId)
-	{
-		return contextToPhrase.get(contextId);
-	}
-	
-	int getNumWords()
-	{
-		return wordLexicon.size();
-	}
-	
-	String getWord(int wordId)
-	{
-		return wordLexicon.lookup(wordId);
-	}
-	
-	public TIntArrayList getPhrase(int phraseId)
-	{
-		return phraseLexicon.lookup(phraseId);
-	}
-	
-	public String getPhraseString(int phraseId)
-	{
-		StringBuffer b = new StringBuffer();
-		for (int tid: getPhrase(phraseId).toNativeArray())
-		{
-			if (b.length() > 0)
-				b.append(" ");
-			b.append(wordLexicon.lookup(tid));
-		}
-		return b.toString();
-	}		
-	
-	public TIntArrayList getContext(int contextId)
-	{
-		return contextLexicon.lookup(contextId);
-	}
-	
-	public String getContextString(int contextId, boolean insertPhraseSentinel)
-	{
-		StringBuffer b = new StringBuffer();
-		TIntArrayList c = getContext(contextId);
-		for (int i = 0; i < c.size(); ++i)
-		{
-			if (i > 0) b.append(" ");
-			//if (i == c.size() / 2) b.append("<PHRASE> ");
-			b.append(wordLexicon.lookup(c.get(i)));
-		}
-		return b.toString();
-	}
-	
-	public boolean isSentinel(int wordId)
-	{
-		return wordId == splitSentinel || wordId == phraseSentinel;
-	}
-	
-	List<Edge> readEdges(Reader in) throws IOException
-	{	
-		// read in line-by-line
-		BufferedReader bin = new BufferedReader(in);
-		String line;
-		Pattern separator = Pattern.compile(" \\|\\|\\| ");
-		
-		List<Edge> edges = new ArrayList<Edge>();
-		while ((line = bin.readLine()) != null)
-		{
-			// split into phrase and contexts
-			StringTokenizer st = new StringTokenizer(line, "\t");
-			assert (st.hasMoreTokens());
-			String phraseToks = st.nextToken();
-			assert (st.hasMoreTokens());
-			String rest = st.nextToken();
-			assert (!st.hasMoreTokens());
-
-			// process phrase	
-			st = new StringTokenizer(phraseToks, " ");
-			TIntArrayList ptoks = new TIntArrayList();
-			while (st.hasMoreTokens())
-				ptoks.add(wordLexicon.insert(st.nextToken()));
-			int phraseId = phraseLexicon.insert(ptoks);
-			
-			// process contexts
-			String[] parts = separator.split(rest);
-			assert (parts.length % 2 == 0);
-			for (int i = 0; i < parts.length; i += 2)
-			{
-				// process pairs of strings - context and count
-				String ctxString = parts[i];
-				String countString = parts[i + 1];
-
-				assert (countString.startsWith("C="));
-
-				String []countToks=countString.split(" ");
-				
-				double count = Double.parseDouble(countToks[0].substring(2).trim());
-				
-				TIntArrayList ctx = new TIntArrayList();
-				StringTokenizer ctxStrtok = new StringTokenizer(ctxString, " ");
-				while (ctxStrtok.hasMoreTokens())
-				{
-					String token = ctxStrtok.nextToken();
-					ctx.add(wordLexicon.insert(token));
-				}
-				int contextId = contextLexicon.insert(ctx);
-
-
-				if(countToks.length<2){
-					edges.add(new Edge(phraseId, contextId, count));
-				}
-				else{
-					int tag=Integer.parseInt(countToks[1].substring(2));
-					edges.add(new Edge(phraseId, contextId, count,tag));
-				}
-			}
-		}
-		return edges;
-	}
-	
-	static Corpus readFromFile(Reader in) throws IOException
-	{	
-		Corpus c = new Corpus();
-		c.edges = c.readEdges(in);
-		for (Edge edge: c.edges)
-		{
-			while (edge.getPhraseId() >= c.phraseToContext.size())
-				c.phraseToContext.add(new ArrayList<Edge>());
-			while (edge.getContextId() >= c.contextToPhrase.size())
-				c.contextToPhrase.add(new ArrayList<Edge>());
-			
-			// index the edge for fast phrase, context lookup
-			c.phraseToContext.get(edge.getPhraseId()).add(edge);
-			c.contextToPhrase.get(edge.getContextId()).add(edge);
-		}
-		return c;
-	}
-		
-	TIntArrayList phraseEdges(TIntArrayList phrase)
-	{
-		TIntArrayList r = new TIntArrayList(4);
-		for (int p = 0; p < phrase.size(); ++p)
-		{
-			if (p == 0 || phrase.get(p-1) == splitSentinel) 				
-				r.add(p);
-			if (p == phrase.size() - 1 || phrase.get(p+1) == splitSentinel) 
-				r.add(p);
-		}
-		return r;
-	}
-
-	public void printStats(PrintStream out) 
-	{
-		out.println("Corpus has " + edges.size() + " edges " + phraseLexicon.size() + " phrases " 
-				+ contextLexicon.size() + " contexts and " + wordLexicon.size() + " word types");
-	}
-}
-\ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java b/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java
deleted file mode 100644
index a386e4a3..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java
+++ /dev/null
@@ -1,34 +0,0 @@
-package phrase;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-public class Lexicon<T>
-{
-	public int insert(T word)
-	{
-		Integer i = wordToIndex.get(word);
-		if (i == null)
-		{
-			i = indexToWord.size();
-			wordToIndex.put(word, i);
-			indexToWord.add(word);
-		}
-		return i;
-	}
-
-	public T lookup(int index)
-	{
-		return indexToWord.get(index);
-	}
-
-	public int size()
-	{
-		return indexToWord.size();
-	}
-
-	private Map<T, Integer> wordToIndex = new HashMap<T, Integer>();
-	private List<T> indexToWord = new ArrayList<T>();
-}
-\ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java
deleted file mode 100644
index c032bb2b..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java
+++ /dev/null
@@ -1,540 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-import org.apache.commons.math.special.Gamma;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.regex.Pattern;
-
-import phrase.Corpus.Edge;
-
-
-public class PhraseCluster {
-	
-	public int K;
-	private int n_phrases, n_words, n_contexts, n_positions;
-	public Corpus c;
-	public ExecutorService pool; 
-
-	double[] lambdaPTCT;
-	double[][] lambdaPT;
-	boolean cacheLambda = true;
-
-	// emit[tag][position][word] = p(word | tag, position in context)
-	double emit[][][];
-	// pi[phrase][tag] = p(tag | phrase)
-	double pi[][];
-	
-	public PhraseCluster(int numCluster, Corpus corpus)
-	{
-		K=numCluster;
-		c=corpus;
-		n_words=c.getNumWords();
-		n_phrases=c.getNumPhrases();
-		n_contexts=c.getNumContexts();
-		n_positions=c.getNumContextPositions();
-
-		emit=new double [K][n_positions][n_words];
-		pi=new double[n_phrases][K];
-		
-		for(double [][]i:emit)
-			for(double []j:i)
-				arr.F.randomise(j, true);
-
-		for(double []j:pi)
-			arr.F.randomise(j, true);
-	}
-	
-	void useThreadPool(ExecutorService pool)
-	{
-		this.pool = pool;
-	}
-
-	public double EM(int phraseSizeLimit)
-	{
-		double [][][]exp_emit=new double [K][n_positions][n_words];
-		double []exp_pi=new double[K];
-		
-		for(double [][]i:exp_emit)
-			for(double []j:i)
-				Arrays.fill(j, 1e-10);
-		
-		double loglikelihood=0;
-		
-		//E
-		for(int phrase=0; phrase < n_phrases; phrase++)
-		{
-			if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit)
-				continue;
-
-			Arrays.fill(exp_pi, 1e-10);
-			
-			List<Edge> contexts = c.getEdgesForPhrase(phrase);
-
-			for (int ctx=0; ctx<contexts.size(); ctx++)
-			{
-				Edge edge = contexts.get(ctx);
-				
-				double p[]=posterior(edge);
-				double z = arr.F.l1norm(p);
-				assert z > 0;
-				loglikelihood += edge.getCount() * Math.log(z);
-				arr.F.l1normalize(p);
-				
-				double count = edge.getCount();
-				//increment expected count
-				TIntArrayList context = edge.getContext();
-				for(int tag=0;tag<K;tag++)
-				{
-					for(int pos=0;pos<n_positions;pos++){
-						exp_emit[tag][pos][context.get(pos)]+=p[tag]*count;
-					}
-					exp_pi[tag]+=p[tag]*count;
-				}
-			}
-			arr.F.l1normalize(exp_pi);
-			System.arraycopy(exp_pi, 0, pi[phrase], 0, K);
-		}
-
-		//M
-		for(double [][]i:exp_emit)
-			for(double []j:i)
-				arr.F.l1normalize(j);
-			
-		emit=exp_emit;
-
-		return loglikelihood;
-	}
-	
-	public double PREM(double scalePT, double scaleCT, int phraseSizeLimit)
-	{
-		if (scaleCT == 0)
-		{
-			if (pool != null)
-				return PREM_phrase_constraints_parallel(scalePT, phraseSizeLimit);
-			else
-				return PREM_phrase_constraints(scalePT, phraseSizeLimit);
-		}
-		else // FIXME: ignores phraseSizeLimit
-			return this.PREM_phrase_context_constraints(scalePT, scaleCT);
-	}
-
-	
-	public double PREM_phrase_constraints(double scalePT, int phraseSizeLimit)
-	{
-		double [][][]exp_emit=new double[K][n_positions][n_words];
-		double []exp_pi=new double[K];
-		
-		for(double [][]i:exp_emit)
-			for(double []j:i)
-				Arrays.fill(j, 1e-10);
-		
-		if (lambdaPT == null && cacheLambda)
-			lambdaPT = new double[n_phrases][];
-		
-		double loglikelihood=0, kl=0, l1lmax=0, primal=0;
-		int failures=0, iterations=0;
-		long start = System.currentTimeMillis();
-		//E
-		for(int phrase=0; phrase<n_phrases; phrase++)
-		{
-			if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit)
-			{
-				//System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K);
-				continue;
-			}
-			
-			Arrays.fill(exp_pi, 1e-10);
-			
-			// FIXME: add rare edge check to phrase objective & posterior processing
-			PhraseObjective po = new PhraseObjective(this, phrase, scalePT, (cacheLambda) ? lambdaPT[phrase] : null);
-			boolean ok = po.optimizeWithProjectedGradientDescent();
-			if (!ok) ++failures;
-			if (cacheLambda) lambdaPT[phrase] = po.getParameters();
-			iterations += po.getNumberUpdateCalls();
-			double [][] q=po.posterior();
-			loglikelihood += po.loglikelihood();
-			kl += po.KL_divergence();
-			l1lmax += po.l1lmax();
-			primal += po.primal(scalePT);
-			List<Edge> edges = c.getEdgesForPhrase(phrase);
-
-			for(int edge=0;edge<q.length;edge++){
-				Edge e = edges.get(edge);
-				TIntArrayList context = e.getContext();
-				double contextCnt = e.getCount();
-				//increment expected count
-				for(int tag=0;tag<K;tag++){
-					for(int pos=0;pos<n_positions;pos++){
-						exp_emit[tag][pos][context.get(pos)]+=q[edge][tag]*contextCnt;
-					}
-					
-					exp_pi[tag]+=q[edge][tag]*contextCnt;
-					
-				}
-			}
-			arr.F.l1normalize(exp_pi);
-			System.arraycopy(exp_pi, 0, pi[phrase], 0, K);
-		}
-		
-		long end = System.currentTimeMillis();
-		if (failures > 0)
-			System.out.println("WARNING: failed to converge in " + failures + "/" + n_phrases + " cases");
-		System.out.println("\tmean iters:     " + iterations/(double)n_phrases + " elapsed time " + (end - start) / 1000.0);
-		System.out.println("\tllh:            " + loglikelihood);
-		System.out.println("\tKL:             " + kl);
-		System.out.println("\tphrase l1lmax:  " + l1lmax);
-		
-		//M
-		for(double [][]i:exp_emit)
-			for(double []j:i)
-				arr.F.l1normalize(j);
-		emit=exp_emit;
-		
-		return primal;
-	}
-
-	public double PREM_phrase_constraints_parallel(final double scalePT, int phraseSizeLimit)
-	{
-		assert(pool != null);
-		
-		final LinkedBlockingQueue<PhraseObjective> expectations 
-			= new LinkedBlockingQueue<PhraseObjective>();
-		
-		double [][][]exp_emit=new double [K][n_positions][n_words];
-		double [][]exp_pi=new double[n_phrases][K];
-		
-		for(double [][]i:exp_emit)
-			for(double []j:i)
-				Arrays.fill(j, 1e-10);
-		for(double []j:exp_pi)
-			Arrays.fill(j, 1e-10);
-		
-		double loglikelihood=0, kl=0, l1lmax=0, primal=0;
-		final AtomicInteger failures = new AtomicInteger(0);
-		final AtomicLong elapsed = new AtomicLong(0l);
-		int iterations=0;
-		long start = System.currentTimeMillis();
-		List<Future<PhraseObjective>> results = new ArrayList<Future<PhraseObjective>>();
-		
-		if (lambdaPT == null && cacheLambda)
-			lambdaPT = new double[n_phrases][];
-
-		//E
-		for(int phrase=0;phrase<n_phrases;phrase++) {
-			if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit) {
-				System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K);
-				continue;
-			}
-
-			final int p=phrase;
-			results.add(pool.submit(new Callable<PhraseObjective>() {
-				public PhraseObjective call() {
-					//System.out.println("" + Thread.currentThread().getId() + " optimising lambda for " + p);
-					long start = System.currentTimeMillis();
-					PhraseObjective po = new PhraseObjective(PhraseCluster.this, p, scalePT, (cacheLambda) ? lambdaPT[p] : null);
-					boolean ok = po.optimizeWithProjectedGradientDescent();
-					if (!ok) failures.incrementAndGet();
-					long end = System.currentTimeMillis();
-					elapsed.addAndGet(end - start);
-					//System.out.println("" + Thread.currentThread().getId() + " done optimising lambda for " + p);
-					return po;
-				}
-			}));
-		}
-		
-		// aggregate the expectations as they become available
-		for (Future<PhraseObjective> fpo : results)
-		{
-			try {
-				//System.out.println("" + Thread.currentThread().getId() + " reading queue #" + count);
-
-				// wait (blocking) until something is ready
-				PhraseObjective po = fpo.get();
-				// process
-				int phrase = po.phrase;
-				if (cacheLambda) lambdaPT[phrase] = po.getParameters();
-				//System.out.println("" + Thread.currentThread().getId() + " taken phrase " + phrase);
-				double [][] q=po.posterior();
-				loglikelihood += po.loglikelihood();
-				kl += po.KL_divergence();
-				l1lmax += po.l1lmax();
-				primal += po.primal(scalePT);
-				iterations += po.getNumberUpdateCalls();
-
-				List<Edge> edges = c.getEdgesForPhrase(phrase);
-				for(int edge=0;edge<q.length;edge++){
-					Edge e = edges.get(edge);
-					TIntArrayList context = e.getContext();
-					double contextCnt = e.getCount();
-					//increment expected count
-					for(int tag=0;tag<K;tag++){
-						for(int pos=0;pos<n_positions;pos++){
-							exp_emit[tag][pos][context.get(pos)]+=q[edge][tag]*contextCnt;
-						}
-						exp_pi[phrase][tag]+=q[edge][tag]*contextCnt;
-					}
-				}
-			} catch (InterruptedException e) {
-				System.err.println("M-step thread interrupted. Probably fatal!");
-				throw new RuntimeException(e);
-			} catch (ExecutionException e) {
-				System.err.println("M-step thread execution died. Probably fatal!");
-				throw new RuntimeException(e);
-			}
-		}
-		
-		long end = System.currentTimeMillis();
-		
-		if (failures.get() > 0)
-			System.out.println("WARNING: failed to converge in " + failures.get() + "/" + n_phrases + " cases");
-		System.out.println("\tmean iters:     " + iterations/(double)n_phrases + " walltime " + (end-start)/1000.0 + " threads " + elapsed.get() / 1000.0);
-		System.out.println("\tllh:            " + loglikelihood);
-		System.out.println("\tKL:             " + kl);
-		System.out.println("\tphrase l1lmax:  " + l1lmax);
-		
-		//M
-		for(double [][]i:exp_emit)
-			for(double []j:i)
-				arr.F.l1normalize(j);
-		emit=exp_emit;
-		
-		for(double []j:exp_pi)
-			arr.F.l1normalize(j);
-		pi=exp_pi;
-		
-		return primal;
-	}
-	
-	public double PREM_phrase_context_constraints(double scalePT, double scaleCT)
-	{	
-		double[][][] exp_emit = new double [K][n_positions][n_words];
-		double[][] exp_pi = new double[n_phrases][K];
-
-		//E step
-		PhraseContextObjective pco = new PhraseContextObjective(this, lambdaPTCT, pool, scalePT, scaleCT);
-		boolean ok = pco.optimizeWithProjectedGradientDescent();
-		if (cacheLambda) lambdaPTCT = pco.getParameters();
-
-		//now extract expectations
-		List<Corpus.Edge> edges = c.getEdges();
-		for(int e = 0; e < edges.size(); ++e)
-		{
-			double [] q = pco.posterior(e);
-			Corpus.Edge edge = edges.get(e);
-
-			TIntArrayList context = edge.getContext();
-			double contextCnt = edge.getCount();
-			//increment expected count
-			for(int tag=0;tag<K;tag++)
-			{
-				for(int pos=0;pos<n_positions;pos++)
-					exp_emit[tag][pos][context.get(pos)]+=q[tag]*contextCnt;
-				exp_pi[edge.getPhraseId()][tag]+=q[tag]*contextCnt;
-			}
-		}
-		
-		System.out.println("\tllh:            " + pco.loglikelihood());
-		System.out.println("\tKL:             " + pco.KL_divergence());
-		System.out.println("\tphrase l1lmax:  " + pco.phrase_l1lmax());
-		System.out.println("\tcontext l1lmax: " + pco.context_l1lmax());
-		
-		//M step
-		for(double [][]i:exp_emit)
-			for(double []j:i)
-				arr.F.l1normalize(j);
-		emit=exp_emit;
-		
-		for(double []j:exp_pi)
-			arr.F.l1normalize(j);
-		pi=exp_pi;
-		
-		return pco.primal();
-	}	
-		
-	/**
-	 * @param phrase index of phrase
-	 * @param ctx array of context
-	 * @return unnormalized posterior
-	 */
-	public double[] posterior(Corpus.Edge edge) 
-	{
-		double[] prob;
-		
-		if(edge.getTag()>=0){
-			prob=new double[K];
-			prob[edge.getTag()]=1;
-			return prob;
-		}
-		
-		if (edge.getPhraseId() < n_phrases)
-			prob = Arrays.copyOf(pi[edge.getPhraseId()], K);
-		else
-		{
-			prob = new double[K];
-			Arrays.fill(prob, 1.0);
-		}
-		
-		TIntArrayList ctx = edge.getContext();
-		for(int tag=0;tag<K;tag++)
-		{
-			for(int c=0;c<n_positions;c++)
-			{
-				int word = ctx.get(c);
-				if (!this.c.isSentinel(word) && word < n_words)
-					prob[tag]*=emit[tag][c][word];
-			}
-		}
-		
-		return prob;
-	}
-	
-	public void displayPosterior(PrintStream ps, List<Edge> testing)
-	{	
-		for (Edge edge : testing)
-		{
-			double probs[] = posterior(edge);
-			arr.F.l1normalize(probs);
-
-			// emit phrase
-			ps.print(edge.getPhraseString());
-			ps.print("\t");
-			ps.print(edge.getContextString(true));
-			int t=arr.F.argmax(probs);
-			ps.println(" ||| C=" + t + " T=" + edge.getCount() + " P=" + probs[t]);
-			//ps.println("# probs " + Arrays.toString(probs));
-		}
-	}
-	
-	public void displayModelParam(PrintStream ps)
-	{
-		final double EPS = 1e-6;
-		ps.println("phrases " + n_phrases + " tags " + K + " positions " + n_positions);
-		
-		for (int i = 0; i < n_phrases; ++i)
-			for(int j=0;j<pi[i].length;j++)
-				if (pi[i][j] > EPS)
-					ps.println(i + " " + j + " " + pi[i][j]);
-
-		ps.println();
-		for (int i = 0; i < K; ++i)
-		{
-			for(int position=0;position<n_positions;position++)
-			{
-				for(int word=0;word<emit[i][position].length;word++)
-				{
-					if (emit[i][position][word] > EPS)
-						ps.println(i + " " + position + " " + word + " " + emit[i][position][word]);
-				}
-			}
-		}
-	}
-	
-	double phrase_l1lmax()
-	{
-		double sum=0;
-		for(int phrase=0; phrase<n_phrases; phrase++)
-		{
-			double [] maxes = new double[K];
-			for (Edge edge : c.getEdgesForPhrase(phrase))
-			{
-				double p[] = posterior(edge);
-				arr.F.l1normalize(p);
-				for(int tag=0;tag<K;tag++)
-					maxes[tag] = Math.max(maxes[tag], p[tag]);
-			}
-			for(int tag=0;tag<K;tag++)
-				sum += maxes[tag];
-		}
-		return sum;
-	}
-
-	double context_l1lmax()
-	{
-		double sum=0;
-		for(int context=0; context<n_contexts; context++)
-		{
-			double [] maxes = new double[K];
-			for (Edge edge : c.getEdgesForContext(context))
-			{
-				double p[] = posterior(edge);
-				arr.F.l1normalize(p);
-				for(int tag=0;tag<K;tag++)
-					maxes[tag] = Math.max(maxes[tag], p[tag]);
-			}
-			for(int tag=0;tag<K;tag++)
-				sum += maxes[tag];
-		}
-		return sum;
-	}
-
-	public void loadParameters(BufferedReader input) throws IOException
-	{	
-		final double EPS = 1e-50;
-		
-		// overwrite pi, emit with ~zeros
-		for(double [][]i:emit)
-			for(double []j:i)
-				Arrays.fill(j, EPS);
-
-		for(double []j:pi)
-			Arrays.fill(j, EPS);
-
-		String line = input.readLine();
-		assert line != null;
-
-		Pattern space = Pattern.compile(" +");
-		String[] parts = space.split(line);
-		assert parts.length == 6;
-
-		assert parts[0].equals("phrases");
-		int phrases = Integer.parseInt(parts[1]);
-		int tags = Integer.parseInt(parts[3]);
-		int positions = Integer.parseInt(parts[5]);
-		
-		assert phrases == n_phrases;
-		assert tags == K;
-		assert positions == n_positions;
-
-		// read in pi
-		while ((line = input.readLine()) != null)
-		{
-			line = line.trim();
-			if (line.isEmpty()) break;
-			
-			String[] tokens = space.split(line);
-			assert tokens.length == 3;
-			int p = Integer.parseInt(tokens[0]);
-			int t = Integer.parseInt(tokens[1]);
-			double v = Double.parseDouble(tokens[2]);
-
-			pi[p][t] = v;
-		}
-		
-		// read in emissions
-		while ((line = input.readLine()) != null)
-		{
-			String[] tokens = space.split(line);
-			assert tokens.length == 4;
-			int t = Integer.parseInt(tokens[0]);
-			int p = Integer.parseInt(tokens[1]);
-			int w = Integer.parseInt(tokens[2]);
-			double v = Double.parseDouble(tokens[3]);
-
-			emit[t][p][w] = v;
-		}
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java
deleted file mode 100644
index 646ff392..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java
+++ /dev/null
@@ -1,436 +0,0 @@
-package phrase;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Future;
-
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-import optimization.util.MathUtils;
-import phrase.Corpus.Edge;
-
-public class PhraseContextObjective extends ProjectedObjective
-{
-	private static final double GRAD_DIFF = 0.00002;
-	private static double INIT_STEP_SIZE = 300;
-	private static double VAL_DIFF = 1e-8;
-	private static int ITERATIONS = 20;
-	boolean debug = false;
-	
-	private PhraseCluster c;
-	
-	// un-regularized unnormalized posterior, p[edge][tag]
-	// P(tag|edge) \propto P(tag|phrase)P(context|tag)
-	private double p[][];
-
-	// regularized unnormalized posterior 
-	// q[edge][tag] propto p[edge][tag]*exp(-lambda)
-	private double q[][];
-	private List<Corpus.Edge> data;
-	
-	// log likelihood under q
-	private double loglikelihood;
-	private SimplexProjection projectionPhrase;
-	private SimplexProjection projectionContext;
-	
-	double[] newPoint;
-	private int n_param;
-	
-	// likelihood under p
-	public double llh;
-	
-	private static Map<Corpus.Edge, Integer> edgeIndex;
-	
-	private long projectionTime;
-	private long objectiveTime;
-	private long actualProjectionTime;
-	private ExecutorService pool;
-	
-	double scalePT;
-	double scaleCT;
-	
-	public PhraseContextObjective(PhraseCluster cluster, double[] startingParameters, ExecutorService pool,
-			double scalePT, double scaleCT)
-	{
-		c=cluster;
-		data=c.c.getEdges();
-		n_param=data.size()*c.K*2;
-		this.pool=pool;
-		this.scalePT = scalePT;
-		this.scaleCT = scaleCT;
-		
-		parameters = startingParameters;
-		if (parameters == null)
-			parameters = new double[n_param];
-		
-		System.out.println("Num parameters " + n_param);
-		newPoint = new double[n_param];
-		gradient = new double[n_param];
-		initP();
-		projectionPhrase = new SimplexProjection(scalePT);
-		projectionContext = new SimplexProjection(scaleCT);
-		q=new double [data.size()][c.K];
-		
-		if (edgeIndex == null) {
-			edgeIndex = new HashMap<Edge, Integer>();
-			for (int e=0; e<data.size(); e++)
-			{
-				edgeIndex.put(data.get(e), e);
-				//if (debug) System.out.println("Edge " + data.get(e) + " index " + e);
-			}
-		}
-		
-		setParameters(parameters);
-	}
-
-	private void initP(){
-		p=new double[data.size()][];
-		for(int edge=0;edge<data.size();edge++)
-		{
-			p[edge]=c.posterior(data.get(edge));
-			llh += data.get(edge).getCount() * Math.log(arr.F.l1norm(p[edge]));
-			arr.F.l1normalize(p[edge]);
-		}
-	}
-	
-	@Override
-	public void setParameters(double[] params) {
-		//System.out.println("setParameters " + Arrays.toString(parameters));
-		// TODO: test if params have changed and skip update otherwise
-		super.setParameters(params);
-		updateFunction();
-	}
-	
-	private void updateFunction()
-	{
-		updateCalls++;
-		loglikelihood=0;
-
-		System.out.print(".");
-		System.out.flush();
-
-		long begin = System.currentTimeMillis();
-		for (int e=0; e<data.size(); e++) 
-		{
-			Edge edge = data.get(e);
-			for(int tag=0; tag<c.K; tag++)
-			{
-				int ip = index(e, tag, true);
-				int ic = index(e, tag, false);
-				q[e][tag] = p[e][tag]*
-					Math.exp((-parameters[ip]-parameters[ic]) / edge.getCount());
-				//if (debug)
-					//System.out.println("\tposterior " + edge + " with tag " + tag + " p " + p[e][tag] + " params " + parameters[ip] + " and " + parameters[ic] + " q " + q[e][tag]);
-			}
-		}
-	
-		for(int edge=0;edge<data.size();edge++) {
-			loglikelihood+=data.get(edge).getCount() * Math.log(arr.F.l1norm(q[edge]));
-			arr.F.l1normalize(q[edge]);
-		}
-		
-		for (int e=0; e<data.size(); e++) 
-		{
-			for(int tag=0; tag<c.K; tag++)
-			{
-				int ip = index(e, tag, true);
-				int ic = index(e, tag, false);
-				gradient[ip]=-q[e][tag];
-				gradient[ic]=-q[e][tag];
-			}
-		}
-		//if (debug) {
-			//System.out.println("objective " + loglikelihood + " ||gradient||_2: " + arr.F.l2norm(gradient));		
-			//System.out.println("gradient " + Arrays.toString(gradient));
-		//}
-		objectiveTime += System.currentTimeMillis() - begin;
-	}
-	
-	@Override
-	public double[] projectPoint(double[] point) 
-	{
-		long begin = System.currentTimeMillis();
-		List<Future<?>> tasks = new ArrayList<Future<?>>();
-		
-		System.out.print(",");
-		System.out.flush();
-
-		Arrays.fill(newPoint, 0, newPoint.length, 0);
-		
-		// first project using the phrase-tag constraints,
-		// for all p,t: sum_c lambda_ptc < scaleP 
-		if (pool == null)
-		{
-			for (int p = 0; p < c.c.getNumPhrases(); ++p)
-			{
-				List<Edge> edges = c.c.getEdgesForPhrase(p);
-				double[] toProject = new double[edges.size()];
-				for(int tag=0;tag<c.K;tag++)
-				{
-					// FIXME: slow hash lookup for e (twice)
-					for(int e=0; e<edges.size(); e++) 						
-						toProject[e] = point[index(edges.get(e), tag, true)];
-					long lbegin = System.currentTimeMillis();
-					projectionPhrase.project(toProject);
-					actualProjectionTime += System.currentTimeMillis() - lbegin;
-					for(int e=0; e<edges.size(); e++)
-						newPoint[index(edges.get(e), tag, true)] = toProject[e];
-				}
-			}
-		}
-		else // do above in parallel using thread pool
-		{	
-			for (int p = 0; p < c.c.getNumPhrases(); ++p)
-			{
-				final int phrase = p;
-				final double[] inPoint = point;
-				Runnable task = new Runnable()
-				{
-					public void run()
-					{
-						List<Edge> edges = c.c.getEdgesForPhrase(phrase);
-						double toProject[] = new double[edges.size()];
-						for(int tag=0;tag<c.K;tag++)
-						{
-							// FIXME: slow hash lookup for e
-							for(int e=0; e<edges.size(); e++)
-								toProject[e] = inPoint[index(edges.get(e), tag, true)];
-							projectionPhrase.project(toProject);
-							for(int e=0; e<edges.size(); e++)
-								newPoint[index(edges.get(e), tag, true)] = toProject[e];
-						}
-					}		
-				};
-				tasks.add(pool.submit(task));
-			}
-		}
-		//System.out.println("after PT " + Arrays.toString(newPoint));
-	
-		// now project using the context-tag constraints,
-		// for all c,t: sum_p omega_pct < scaleC
-		if (pool == null)
-		{
-			for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
-			{
-				List<Edge> edges = c.c.getEdgesForContext(ctx);
-				double toProject[] = new double[edges.size()];
-				for(int tag=0;tag<c.K;tag++)
-				{
-					// FIXME: slow hash lookup for e
-					for(int e=0; e<edges.size(); e++)
-						toProject[e] = point[index(edges.get(e), tag, false)];
-					long lbegin = System.currentTimeMillis();
-					projectionContext.project(toProject);
-					actualProjectionTime += System.currentTimeMillis() - lbegin;
-					for(int e=0; e<edges.size(); e++)
-						newPoint[index(edges.get(e), tag, false)] = toProject[e];
-				}
-			}
-		}
-		else
-		{
-			// do above in parallel using thread pool
-			for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
-			{
-				final int context = ctx;
-				final double[] inPoint = point;
-				Runnable task = new Runnable()
-				{
-					public void run()
-					{
-						List<Edge> edges = c.c.getEdgesForContext(context);
-						double toProject[] = new double[edges.size()];
-						for(int tag=0;tag<c.K;tag++)
-						{
-							// FIXME: slow hash lookup for e
-							for(int e=0; e<edges.size(); e++)
-								toProject[e] = inPoint[index(edges.get(e), tag, false)];
-							projectionContext.project(toProject);
-							for(int e=0; e<edges.size(); e++)
-								newPoint[index(edges.get(e), tag, false)] = toProject[e];
-						}
-					}
-				};
-				tasks.add(pool.submit(task));
-			}
-		}
-		
-		if (pool != null)
-		{
-			// wait for all the jobs to complete
-			Exception failure = null;
-			for (Future<?> task: tasks)
-			{
-				try {
-					task.get();
-				} catch (InterruptedException e) {
-					System.err.println("ERROR: Projection thread interrupted");
-					e.printStackTrace();
-					failure = e;
-				} catch (ExecutionException e) {
-					System.err.println("ERROR: Projection thread died");
-					e.printStackTrace();
-					failure = e;
-				}
-			}
-			// rethrow the exception
-			if (failure != null)
-			{
-				pool.shutdownNow();
-				throw new RuntimeException(failure);
-			}
-		}
-		
-		double[] tmp = newPoint;
-		newPoint = point;
-		projectionTime += System.currentTimeMillis() - begin;
-		
-		//if (debug)
-			//System.out.println("\t\treturning " + Arrays.toString(tmp));
-		return tmp;
-	}
-	
-	private int index(Edge edge, int tag, boolean phrase)
-	{
-		// NB if indexing changes must also change code in updateFunction and constructor
-		if (phrase)
-			return tag * edgeIndex.size() + edgeIndex.get(edge);
-		else
-			return (c.K + tag) * edgeIndex.size() + edgeIndex.get(edge);
-	}
-
-	private int index(int e, int tag, boolean phrase)
-	{
-		// NB if indexing changes must also change code in updateFunction and constructor
-		if (phrase)
-			return tag * edgeIndex.size() + e;
-		else
-			return (c.K + tag) * edgeIndex.size() + e;
-	}
-	
-	@Override
-	public double[] getGradient() {
-		gradientCalls++;
-		return gradient;
-	}
-
-	@Override
-	public double getValue() {
-		functionCalls++;
-		return loglikelihood;
-	}
-
-	@Override
-	public String toString() {
-		return "No need for pointless toString";
-	}
-
-	public double []posterior(int edgeIndex){
-		return q[edgeIndex];
-	}
-	
-	public boolean optimizeWithProjectedGradientDescent()
-	{
-		projectionTime = 0;
-		actualProjectionTime = 0;
-		objectiveTime = 0;
-		long start = System.currentTimeMillis();
-
-		LineSearchMethod ls =
-			new ArmijoLineSearchMinimizationAlongProjectionArc
-				(new InterpolationPickFirstStep(INIT_STEP_SIZE));
-		//LineSearchMethod  ls = new WolfRuleLineSearch(
-		//		(new InterpolationPickFirstStep(INIT_STEP_SIZE)), c1, c2);
-		OptimizerStats stats = new OptimizerStats();
-		
-		
-		ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
-		StopingCriteria stopGrad = new ProjectedGradientL2Norm(GRAD_DIFF);
-		StopingCriteria stopValue = new ValueDifference(VAL_DIFF*(-llh));
-		CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
-		compositeStop.add(stopGrad);
-		compositeStop.add(stopValue);
-		optimizer.setMaxIterations(ITERATIONS);
-		updateFunction();
-		boolean success = optimizer.optimize(this,stats,compositeStop);
-
-		System.out.println();
-		System.out.println(stats.prettyPrint(1));
-		
-		if (success)
-			System.out.print("\toptimization took " + optimizer.getCurrentIteration() + " iterations");
-	 	else
-			System.out.print("\toptimization failed to converge");
-		long total = System.currentTimeMillis() - start;
-		System.out.println(" and " + total + " ms: projection " + projectionTime + 
-				" actual " + actualProjectionTime + " objective " + objectiveTime);
-
-		return success;
-	}
-	
-	double loglikelihood()
-	{
-		return llh;
-	}
-	
-	double KL_divergence()
-	{
-		return -loglikelihood + MathUtils.dotProduct(parameters, gradient);
-	}
-	
-	double phrase_l1lmax()
-	{
-		// \sum_{tag,phrase} max_{context} P(tag|context,phrase)
-		double sum=0;
-		for (int p = 0; p < c.c.getNumPhrases(); ++p)
-		{
-			List<Edge> edges = c.c.getEdgesForPhrase(p);
-			for(int tag=0;tag<c.K;tag++)
-			{
-				double max=0;
-				for (Edge edge: edges)
-					max = Math.max(max, q[edgeIndex.get(edge)][tag]);
-				sum+=max;
-			}	
-		}
-		return sum;
-	}
-	
-	double context_l1lmax()
-	{
-		// \sum_{tag,context} max_{phrase} P(tag|context,phrase)
-		double sum=0;
-		for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
-		{
-			List<Edge> edges = c.c.getEdgesForContext(ctx);
-			for(int tag=0; tag<c.K; tag++)
-			{
-				double max=0;
-				for (Edge edge: edges)
-					max = Math.max(max, q[edgeIndex.get(edge)][tag]);
-				sum+=max;
-			}	
-		}
-		return sum;
-	}
-	
-	// L - KL(q||p) - scalePT * l1lmax_phrase - scaleCT * l1lmax_context
-	public double primal()
-	{
-		return loglikelihood() - KL_divergence() - scalePT * phrase_l1lmax() - scaleCT * context_l1lmax();
-	}
-}
-\ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java
deleted file mode 100644
index 0cf31c1c..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java
+++ /dev/null
@@ -1,193 +0,0 @@
-package phrase;
-
-import io.FileUtil;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Scanner;
-
-public class PhraseCorpus 
-{
-	public HashMap<String,Integer>wordLex;
-	public HashMap<String,Integer>phraseLex;
-	
-	public String wordList[];
-	public String phraseList[];
-	
-	//data[phrase][num context][position]
-	public int data[][][];
-	public int numContexts;	
-
-	public PhraseCorpus(String filename) throws FileNotFoundException, IOException
-	{
-		BufferedReader r = FileUtil.reader(new File(filename));
-		
-		phraseLex=new HashMap<String,Integer>();
-		wordLex=new HashMap<String,Integer>();
-		
-		ArrayList<int[][]>dataList=new ArrayList<int[][]>();
-		String line=null;
-		numContexts = 0;
-		
-		while((line=readLine(r))!=null){
-			
-			String toks[]=line.split("\t");
-			String phrase=toks[0];
-			addLex(phrase,phraseLex);
-			
-			toks=toks[1].split(" \\|\\|\\| ");
-			
-			ArrayList <int[]>ctxList=new ArrayList<int[]>();
-			
-			for(int i=0;i<toks.length;i+=2){
-				String ctx=toks[i];
-				String words[]=ctx.split(" ");
-				if (numContexts == 0)
-					numContexts = words.length - 1;
-				else
-					assert numContexts == words.length - 1;
-				
-				int []context=new int [numContexts+1];
-				int idx=0;
-				for(String word:words){
-					if(word.equals("<PHRASE>")){
-						continue;
-					}
-					addLex(word,wordLex);
-					context[idx]=wordLex.get(word);
-					idx++;
-				}
-				
-				String count=toks[i+1];
-				context[idx]=Integer.parseInt(count.trim().substring(2));
-				
-				ctxList.add(context);
-			}
-			
-			dataList.add(ctxList.toArray(new int [0][]));
-			
-		}
-		try{
-			r.close();
-		}catch(IOException ioe){
-			ioe.printStackTrace();
-		}
-		data=dataList.toArray(new int[0][][]);
-	}
-
-	private void addLex(String key, HashMap<String,Integer>lex){
-		Integer i=lex.get(key);
-		if(i==null){
-			lex.put(key, lex.size());
-		}
-	}
-	
-	//for debugging
-	public void saveLex(String lexFilename) throws FileNotFoundException, IOException
-	{
-		PrintStream ps = FileUtil.printstream(new File(lexFilename));
-		ps.println("Phrase Lexicon");
-		ps.println(phraseLex.size());
-		printDict(phraseLex,ps);
-		
-		ps.println("Word Lexicon");
-		ps.println(wordLex.size());
-		printDict(wordLex,ps);
-		ps.close();
-	}
-	
-	private static void printDict(HashMap<String,Integer>lex,PrintStream ps){
-		String []dict=buildList(lex);
-		for(int i=0;i<dict.length;i++){
-			ps.println(dict[i]);
-		}
-	}
-	
-	public void loadLex(String lexFilename){
-		Scanner sc=io.FileUtil.openInFile(lexFilename);
-		
-		sc.nextLine();
-		int size=sc.nextInt();
-		sc.nextLine();
-		String[]dict=new String[size];
-		for(int i=0;i<size;i++){
-			dict[i]=sc.nextLine();
-		}
-		phraseLex=buildMap(dict);
-
-		sc.nextLine();
-		size=sc.nextInt();
-		sc.nextLine();
-		dict=new String[size];
-		for(int i=0;i<size;i++){
-			dict[i]=sc.nextLine();
-		}
-		wordLex=buildMap(dict);
-		sc.close();
-	}
-	
-	private HashMap<String, Integer> buildMap(String[]dict){
-		HashMap<String,Integer> map=new HashMap<String,Integer>();
-		for(int i=0;i<dict.length;i++){
-			map.put(dict[i], i);
-		}
-		return map;
-	}
-	
-	public void buildList(){
-		if(wordList==null){
-			wordList=buildList(wordLex);
-			phraseList=buildList(phraseLex);
-		}
-	}
-	
-	private static String[]buildList(HashMap<String,Integer>lex){
-		String dict[]=new String [lex.size()];
-		for(String key:lex.keySet()){
-			dict[lex.get(key)]=key;
-		}
-		return dict;
-	}
-	
-	public String getContextString(int context[], boolean addPhraseMarker)
-	{
-		StringBuffer b = new StringBuffer();
-		for (int i=0;i<context.length-1;i++)
-		{
-			if (b.length() > 0)
-				b.append(" ");
-
-			if (i == context.length/2)
-				b.append("<PHRASE> ");
-			
-			b.append(wordList[context[i]]);
-		}
-		return b.toString();
-	}
-	
-	public static String readLine(BufferedReader r){
-		try{
-			return r.readLine();
-		}
-		catch(IOException ioe){
-			ioe.printStackTrace();
-		}
-		return null;
-	}
-
-	public static void main(String[] args) throws Exception 
-	{
-		String LEX_FILENAME="../pdata/lex.out";
-		String DATA_FILENAME="../pdata/btec.con";
-		PhraseCorpus c=new PhraseCorpus(DATA_FILENAME);
-		c.saveLex(LEX_FILENAME);
-		c.loadLex(LEX_FILENAME);
-		c.saveLex(LEX_FILENAME);
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java
deleted file mode 100644
index ac73a075..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java
+++ /dev/null
@@ -1,224 +0,0 @@
-package phrase;
-
-import java.util.Arrays;
-import java.util.List;
-
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.linesearch.WolfRuleLineSearch;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-import optimization.util.MathUtils;
-
-public class PhraseObjective extends ProjectedObjective
-{
-	static final double GRAD_DIFF = 0.00002;
-	static double INIT_STEP_SIZE = 300;
-	static double VAL_DIFF = 1e-8; // tuned to BTEC subsample
-	static int ITERATIONS = 100;
-	private PhraseCluster c;
-	
-	/**@brief
-	 *  for debugging purposes
-	 */
-	//public static PrintStream ps;
-	
-	/**@brief current phrase being optimzed*/
-	public int phrase;
-
-	/**@brief un-regularized posterior
-	 * unnormalized
-	 * p[edge][tag]
-	*  P(tag|edge) \propto P(tag|phrase)P(context|tag)
-	 */
-	private double[][]p;
-
-	/**@brief regularized posterior
-	 * q[edge][tag] propto p[edge][tag]*exp(-lambda)
-	 */
-	private double q[][];
-	private List<Corpus.Edge> data;
-	
-	/**@brief log likelihood of the associated phrase
-	 * 
-	 */
-	private double loglikelihood;
-	private SimplexProjection projection;
-	
-	double[] newPoint  ;
-	
-	private int n_param;
-	
-	/**@brief likelihood under p
-	 * 
-	 */
-	public double llh;
-	
-	public PhraseObjective(PhraseCluster cluster, int phraseIdx, double scale, double[] lambda){
-		phrase=phraseIdx;
-		c=cluster;
-		data=c.c.getEdgesForPhrase(phrase);
-		n_param=data.size()*c.K;
-		//System.out.println("Num parameters " + n_param + " for phrase #" + phraseIdx);
-		
-		if (lambda==null) 
-			lambda=new double[n_param];
-		
-		parameters = lambda;
-		newPoint = new double[n_param];
-		gradient = new double[n_param];
-		initP();
-		projection=new SimplexProjection(scale);
-		q=new double [data.size()][c.K];
-
-		setParameters(parameters);
-	}
-
-	private void initP(){
-		p=new double[data.size()][];
-		for(int edge=0;edge<data.size();edge++){
-			p[edge]=c.posterior(data.get(edge));
-			llh += data.get(edge).getCount() * Math.log(arr.F.l1norm(p[edge])); // Was bug here - count inside log!
-			arr.F.l1normalize(p[edge]);
-		}
-	}
-	
-	@Override
-	public void setParameters(double[] params) {
-		super.setParameters(params);
-		updateFunction();
-	}
-	
-	private void updateFunction(){
-		updateCalls++;
-		loglikelihood=0;
-
-		for(int tag=0;tag<c.K;tag++){
-			for(int edge=0;edge<data.size();edge++){
-				q[edge][tag]=p[edge][tag]*
-					Math.exp(-parameters[tag*data.size()+edge]/data.get(edge).getCount());
-			}
-		}
-	
-		for(int edge=0;edge<data.size();edge++){
-			loglikelihood+=data.get(edge).getCount() * Math.log(arr.F.l1norm(q[edge]));
-			arr.F.l1normalize(q[edge]);
-		}
-		
-		for(int tag=0;tag<c.K;tag++){
-			for(int edge=0;edge<data.size();edge++){
-				gradient[tag*data.size()+edge]=-q[edge][tag];
-			}
-		}
-	}
-	
-	@Override
-	public double[] projectPoint(double[] point) 
-	{
-		double toProject[]=new double[data.size()];
-		for(int tag=0;tag<c.K;tag++){
-			for(int edge=0;edge<data.size();edge++){
-				toProject[edge]=point[tag*data.size()+edge];
-			}
-			projection.project(toProject);
-			for(int edge=0;edge<data.size();edge++){
-				newPoint[tag*data.size()+edge]=toProject[edge];
-			}
-		}
-		return newPoint;
-	}
-
-	@Override
-	public double[] getGradient() {
-		gradientCalls++;
-		return gradient;
-	}
-
-	@Override
-	public double getValue() {
-		functionCalls++;
-		return loglikelihood;
-	}
-
-	@Override
-	public String toString() {
-		return Arrays.toString(parameters);
-	}
-
-	public double [][]posterior(){
-		return q;
-	}
-	
-	long optimizationTime;
-	
-	public boolean optimizeWithProjectedGradientDescent(){
-		long start = System.currentTimeMillis();
-		
-		LineSearchMethod ls =
-			new ArmijoLineSearchMinimizationAlongProjectionArc
-				(new InterpolationPickFirstStep(INIT_STEP_SIZE));
-		//LineSearchMethod  ls = new WolfRuleLineSearch(
-		//		(new InterpolationPickFirstStep(INIT_STEP_SIZE)), c1, c2);
-		OptimizerStats stats = new OptimizerStats();
-		
-		
-		ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
-		StopingCriteria stopGrad = new ProjectedGradientL2Norm(GRAD_DIFF);
-		StopingCriteria stopValue = new ValueDifference(VAL_DIFF*(-llh));
-		CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
-		compositeStop.add(stopGrad);
-		compositeStop.add(stopValue);
-		optimizer.setMaxIterations(ITERATIONS);
-		updateFunction();
-		boolean success = optimizer.optimize(this,stats,compositeStop);
-		//System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
-		//if(succed){
-			//System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
-		//}else{
-//			System.out.println("Failed to optimize");
-		//}
-		//System.out.println(Arrays.toString(parameters));
-		
-		//	for(int edge=0;edge<data.getSize();edge++){
-		//	ps.println(Arrays.toString(q[edge]));
-		//	}
-
-		return success;
-	}
-	
-	public double KL_divergence()
-	{
-		return -loglikelihood + MathUtils.dotProduct(parameters, gradient);
-	}
-	
-	public double loglikelihood()
-	{
-		return llh;
-	}
-	
-	public double l1lmax()
-	{
-		double sum=0;
-		for(int tag=0;tag<c.K;tag++){
-			double max=0;
-			for(int edge=0;edge<data.size();edge++){
-				if(q[edge][tag]>max)
-					max=q[edge][tag];
-			}
-			sum+=max;
-		}
-		return sum;
-	}
-
-	public double primal(double scale)
-	{
-		return loglikelihood() - KL_divergence() - scale * l1lmax();	
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java b/gi/posterior-regularisation/prjava/src/phrase/Trainer.java
deleted file mode 100644
index 6f302b20..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java
+++ /dev/null
@@ -1,257 +0,0 @@
-package phrase;
-
-import io.FileUtil;
-import joptsimple.OptionParser;
-import joptsimple.OptionSet;
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.List;
-import java.util.Random;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-
-import phrase.Corpus.Edge;
-
-import arr.F;
-
-public class Trainer 
-{
-	public static void main(String[] args) 
-	{
-        OptionParser parser = new OptionParser();
-        parser.accepts("help");
-        parser.accepts("in").withRequiredArg().ofType(File.class);
-        parser.accepts("in1").withRequiredArg().ofType(File.class);
-        parser.accepts("test").withRequiredArg().ofType(File.class);
-        parser.accepts("out").withRequiredArg().ofType(File.class);
-        parser.accepts("start").withRequiredArg().ofType(File.class);
-        parser.accepts("parameters").withRequiredArg().ofType(File.class);
-        parser.accepts("topics").withRequiredArg().ofType(Integer.class).defaultsTo(5);
-        parser.accepts("iterations").withRequiredArg().ofType(Integer.class).defaultsTo(10);
-        parser.accepts("threads").withRequiredArg().ofType(Integer.class).defaultsTo(0);
-        parser.accepts("scale-phrase").withRequiredArg().ofType(Double.class).defaultsTo(0.0);
-        parser.accepts("scale-context").withRequiredArg().ofType(Double.class).defaultsTo(0.0);
-        parser.accepts("seed").withRequiredArg().ofType(Long.class).defaultsTo(0l);
-        parser.accepts("convergence-threshold").withRequiredArg().ofType(Double.class).defaultsTo(1e-6);
-        parser.accepts("variational-bayes");
-        parser.accepts("alpha-emit").withRequiredArg().ofType(Double.class).defaultsTo(0.1);
-        parser.accepts("alpha-pi").withRequiredArg().ofType(Double.class).defaultsTo(0.0001);
-        parser.accepts("agree-direction");
-        parser.accepts("agree-language");
-        parser.accepts("no-parameter-cache");
-        parser.accepts("skip-large-phrases").withRequiredArg().ofType(Integer.class).defaultsTo(5);
-        OptionSet options = parser.parse(args);
-
-        if (options.has("help") || !options.has("in"))
-        {
-        	try {
-				parser.printHelpOn(System.err);
-			} catch (IOException e) {
-				System.err.println("This should never happen.");
-				e.printStackTrace();
-			}
-        	System.exit(1);     
-        }
-		
-		int tags = (Integer) options.valueOf("topics");
-		int iterations = (Integer) options.valueOf("iterations");
-		double scale_phrase = (Double) options.valueOf("scale-phrase");
-		double scale_context = (Double) options.valueOf("scale-context");
-		int threads = (Integer) options.valueOf("threads");
-		double threshold = (Double) options.valueOf("convergence-threshold");
-		boolean vb = options.has("variational-bayes");
-		double alphaEmit = (vb) ? (Double) options.valueOf("alpha-emit") : 0;
-		double alphaPi = (vb) ? (Double) options.valueOf("alpha-pi") : 0;
-		int skip = (Integer) options.valueOf("skip-large-phrases");
-		
-		if (options.has("seed"))
-			F.rng = new Random((Long) options.valueOf("seed"));
-		
-		ExecutorService threadPool = null;
-		if (threads > 0)
-			threadPool = Executors.newFixedThreadPool(threads);			
-		
-		if (tags <= 1 || scale_phrase < 0 || scale_context < 0 || threshold < 0)
-		{
-			System.err.println("Invalid arguments. Try again!");
-			System.exit(1);
-		}
-		
-		Corpus corpus = null;
-		File infile = (File) options.valueOf("in");
-		Corpus corpus1 = null;
-		File infile1 = (File) options.valueOf("in1");
-		try {
-			System.out.println("Reading concordance from " + infile);
-			corpus = Corpus.readFromFile(FileUtil.reader(infile));
-			corpus.printStats(System.out);
-			if(options.has("in1")){
-				corpus1 = Corpus.readFromFile(FileUtil.reader(infile1));
-				corpus1.printStats(System.out);
-			}
-		} catch (IOException e) {
-			System.err.println("Failed to open input file: " + infile);
-			e.printStackTrace();
-			System.exit(1);
-		}
-				
-		if (!(options.has("agree-direction")||options.has("agree-language")))
-			System.out.println("Running with " + tags + " tags " +
-					"for " + iterations + " iterations " +
-					((skip > 0) ? "skipping large phrases for first " + skip + " iterations " : "") +
-					"with scale " + scale_phrase + " phrase and " + scale_context + " context " +
-					"and " + threads + " threads");
-		else
-			System.out.println("Running agreement model with " + tags + " tags " +
-	 				"for " + iterations);
-
-	 	System.out.println();
-		
- 		PhraseCluster cluster = null;
- 		Agree2Sides agree2sides = null;
- 		Agree agree= null;
- 		VB vbModel=null;
- 		if (options.has("agree-language"))
- 			agree2sides = new Agree2Sides(tags, corpus,corpus1);
- 		else if (options.has("agree-direction"))
- 			agree = new Agree(tags, corpus);
- 		else
- 		{
- 			if (vb)	
- 			{
- 				vbModel=new VB(tags,corpus);
- 				vbModel.alpha=alphaPi;
- 				vbModel.lambda=alphaEmit;
- 	 			if (threadPool != null) vbModel.useThreadPool(threadPool);
- 			} 
- 			else 
- 			{
- 				cluster = new PhraseCluster(tags, corpus);
- 	 			if (threadPool != null) cluster.useThreadPool(threadPool);
- 				
-	 			if (options.has("no-parameter-cache")) 
-	 				cluster.cacheLambda = false;
-	 			if (options.has("start"))
-	 			{
-	 				try {
-						System.err.println("Reading starting parameters from " + options.valueOf("start"));
-						cluster.loadParameters(FileUtil.reader((File)options.valueOf("start")));
-					} catch (IOException e) {
-						System.err.println("Failed to open input file: " + options.valueOf("start"));
-						e.printStackTrace();
-					}
-	 			}
- 			}
- 		}
-				
-		double last = 0;
-		for (int i=0; i < iterations; i++)
-		{
-			double o;
-			if (agree != null)
-				o = agree.EM();
-			else if(agree2sides!=null)
-				o = agree2sides.EM();
-			else
-			{
-				if (i < skip)
-					System.out.println("Skipping phrases of length > " + (i+1));
-				
-				if (scale_phrase <= 0 && scale_context <= 0)
-				{
-					if (!vb)
-						o = cluster.EM((i < skip) ? i+1 : 0);
-					else
-						o = vbModel.EM();	
-				}
-				else
-					o = cluster.PREM(scale_phrase, scale_context, (i < skip) ? i+1 : 0);
-			}
-			
-			System.out.println("ITER: "+i+" objective: " + o);
-			
-			// sometimes takes a few iterations to break the ties
-			if (i > 5 && Math.abs((o - last) / o) < threshold)
-			{
-				last = o;
-				break;
-			}
-			last = o;
-		}
-		
-		double pl1lmax = 0, cl1lmax = 0;
-		if (cluster != null)
-		{
-			pl1lmax = cluster.phrase_l1lmax();
-			cl1lmax = cluster.context_l1lmax();
-		}
-		else if (agree != null)
-		{
-			// fairly arbitrary choice of model1 cf model2
-			pl1lmax = agree.model1.phrase_l1lmax();
-			cl1lmax = agree.model1.context_l1lmax();
-		}
-		else if (agree2sides != null)
-		{
-			// fairly arbitrary choice of model1 cf model2
-			pl1lmax = agree2sides.model1.phrase_l1lmax();
-			cl1lmax = agree2sides.model1.context_l1lmax();
-		}
-
-		System.out.println("\nFinal posterior phrase l1lmax " + pl1lmax + " context l1lmax " + cl1lmax);
-		
-		if (options.has("out"))
-		{
-			File outfile = (File) options.valueOf("out");
-			try {
-				PrintStream ps = FileUtil.printstream(outfile);
-				List<Edge> test;
-				if (!options.has("test")) // just use the training
-					test = corpus.getEdges();
-				else
-				{	// if --test supplied, load up the file
-					infile = (File) options.valueOf("test");
-					System.out.println("Reading testing concordance from " + infile);
-					test = corpus.readEdges(FileUtil.reader(infile));
-				}
-				if(vb) {
-					assert !options.has("test");
-					vbModel.displayPosterior(ps);
-				} else if (cluster != null) 
-					cluster.displayPosterior(ps, test);
-				else if (agree != null) 
-					agree.displayPosterior(ps, test);
-				else if (agree2sides != null) {
-					assert !options.has("test");
-					agree2sides.displayPosterior(ps);
-				}
-				
-				ps.close();
-			} catch (IOException e) {
-				System.err.println("Failed to open either testing file or output file");
-				e.printStackTrace();
-				System.exit(1);
-			}
-		}
-
-		if (options.has("parameters"))
-		{
-			assert !vb;
-			File outfile = (File) options.valueOf("parameters");
-			PrintStream ps;
-			try {
-				ps = FileUtil.printstream(outfile);
-				cluster.displayModelParam(ps);
-				ps.close();
-			} catch (IOException e) {
-				System.err.println("Failed to open output parameters file: " + outfile);
-				e.printStackTrace();
-				System.exit(1);
-			}
-		}
-		
-		if (cluster != null && cluster.pool != null)
-			cluster.pool.shutdown();
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/VB.java b/gi/posterior-regularisation/prjava/src/phrase/VB.java
deleted file mode 100644
index cd3f4966..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/VB.java
+++ /dev/null
@@ -1,419 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Future;
-
-import org.apache.commons.math.special.Gamma;
-
-import phrase.Corpus.Edge;
-
-public class VB {
-
-	public static int MAX_ITER=400;
-	
-	/**@brief
-	 * hyper param for beta
-	 * where beta is multinomial
-	 * for generating words from a topic
-	 */
-	public double lambda=0.1;
-	/**@brief
-	 * hyper param for theta
-	 * where theta is dirichlet for z
-	 */
-	public double alpha=0.0001;
-	/**@brief
-	 * variational param for beta
-	 */
-	private double rho[][][];
-	private double digamma_rho[][][];
-	private double rho_sum[][];
-	/**@brief
-	 * variational param for z
-	 */
-	//private double phi[][];
-	/**@brief
-	 * variational param for theta
-	 */
-	private double gamma[];
-	private static double VAL_DIFF_RATIO=0.005;
-	
-	private int n_positions;
-	private int n_words;
-	private int K;
-	private ExecutorService pool;
-	
-	private Corpus c;
-	public static void main(String[] args) {
-	//	String in="../pdata/canned.con";
-		String in="../pdata/btec.con";
-		String out="../pdata/vb.out";
-		int numCluster=25;
-		Corpus corpus = null;
-		File infile = new File(in);
-		try {
-			System.out.println("Reading concordance from " + infile);
-			corpus = Corpus.readFromFile(FileUtil.reader(infile));
-			corpus.printStats(System.out);
-		} catch (IOException e) {
-			System.err.println("Failed to open input file: " + infile);
-			e.printStackTrace();
-			System.exit(1);
-		}
-		
-		VB vb=new VB(numCluster, corpus);
-		int iter=20;
-		for(int i=0;i<iter;i++){
-			double obj=vb.EM();
-			System.out.println("Iter "+i+": "+obj);
-		}
-		
-		File outfile = new File (out);
-		try {
-			PrintStream ps = FileUtil.printstream(outfile);
-			vb.displayPosterior(ps);
-		//	ps.println();
-		//	c2f.displayModelParam(ps);
-			ps.close();
-		} catch (IOException e) {
-			System.err.println("Failed to open output file: " + outfile);
-			e.printStackTrace();
-			System.exit(1);
-		}
-	}
-
-	public VB(int numCluster, Corpus corpus){
-		c=corpus;
-		K=numCluster;
-		n_positions=c.getNumContextPositions();
-		n_words=c.getNumWords();
-		rho=new double[K][n_positions][n_words];
-		//to init rho
-		//loop through data and count up words
-		double[] phi_tmp=new double[K];
-		for(int i=0;i<K;i++){
-			for(int pos=0;pos<n_positions;pos++){
-				Arrays.fill(rho[i][pos], lambda);
-			}
-		}
-		for(int d=0;d<c.getNumPhrases();d++){
-			List<Edge>doc=c.getEdgesForPhrase(d);
-			for(int n=0;n<doc.size();n++){
-				TIntArrayList context=doc.get(n).getContext();
-				arr.F.randomise(phi_tmp);
-				for(int i=0;i<K;i++){
-					for(int pos=0;pos<n_positions;pos++){
-						rho[i][pos][context.get(pos)]+=phi_tmp[i];
-					}
-				}
-			}
-		}
-		
-	}
-	
-	private double inference(int phraseID, double[][] phi, double[] gamma)
-	{
-		List<Edge > doc=c.getEdgesForPhrase(phraseID);
-		for(int i=0;i<phi.length;i++){
-			for(int j=0;j<phi[i].length;j++){
-				phi[i][j]=1.0/K;
-			}
-		}
-		Arrays.fill(gamma,alpha+1.0/K);
-		
-		double digamma_gamma[]=new double[K];
-		
-		double gamma_sum=digamma(arr.F.l1norm(gamma));
-		for(int i=0;i<K;i++){
-			digamma_gamma[i]=digamma(gamma[i]);
-		}
-		double gammaSum[]=new double [K];
-		double prev_val=0;
-		double obj=0;
-		
-		for(int iter=0;iter<MAX_ITER;iter++){
-			prev_val=obj;
-			obj=0;
-			Arrays.fill(gammaSum,0.0);
-			for(int n=0;n<doc.size();n++){
-				TIntArrayList context=doc.get(n).getContext();
-				double phisum=0;
-				for(int i=0;i<K;i++){
-					double sum=0;
-					for(int pos=0;pos<n_positions;pos++){
-						int word=context.get(pos);
-						sum+=digamma_rho[i][pos][word]-rho_sum[i][pos];
-					}
-					sum+= digamma_gamma[i]-gamma_sum;
-					phi[n][i]=sum;
-					
-					if (i > 0){
-	                    phisum = log_sum(phisum, phi[n][i]);
-					}
-	                else{
-	                    phisum = phi[n][i];
-	                }
-					
-				}//end of  a word
-				
-				for(int i=0;i<K;i++){
-					phi[n][i]=Math.exp(phi[n][i]-phisum);
-					gammaSum[i]+=phi[n][i];
-				}
-				
-			}//end of doc
-			
-			for(int i=0;i<K;i++){
-				gamma[i]=alpha+gammaSum[i];
-			}
-			gamma_sum=digamma(arr.F.l1norm(gamma));
-			for(int i=0;i<K;i++){
-				digamma_gamma[i]=digamma(gamma[i]);
-			}
-			//compute objective for reporting
-
-			obj=0;
-			
-			for(int i=0;i<K;i++){
-				obj+=(alpha-1)*(digamma_gamma[i]-gamma_sum);
-			}
-			
-			
-			for(int n=0;n<doc.size();n++){
-				TIntArrayList context=doc.get(n).getContext();
-				
-				for(int i=0;i<K;i++){
-					//entropy of phi + expected log likelihood of z
-					obj+=phi[n][i]*(digamma_gamma[i]-gamma_sum);
-					
-					if(phi[n][i]>1e-10){
-						obj+=phi[n][i]*Math.log(phi[n][i]);
-					}
-					
-					double beta_sum=0;
-					for(int pos=0;pos<n_positions;pos++){
-						int word=context.get(pos);
-						beta_sum+=(digamma(rho[i][pos][word])-rho_sum[i][pos]);
-					}
-					obj+=phi[n][i]*beta_sum;
-				}
-			}
-			
-			obj-=log_gamma(arr.F.l1norm(gamma));
-			for(int i=0;i<K;i++){
-				obj+=Gamma.logGamma(gamma[i]);
-				obj-=(gamma[i]-1)*(digamma_gamma[i]-gamma_sum);
-			}
-			
-//			System.out.println(phraseID+": "+obj);
-			if(iter>0 && (obj-prev_val)/Math.abs(obj)<VAL_DIFF_RATIO){
-				break;
-			}
-		}//end of inference loop
-		
-		return obj;
-	}//end of inference
-	
-	/**
-	 * @return objective of this iteration
-	 */
-	public double EM(){
-		double emObj=0;
-		if(digamma_rho==null){
-			digamma_rho=new double[K][n_positions][n_words];
-		}
-		for(int i=0;i<K;i++){
-			for (int pos=0;pos<n_positions;pos++){
-				for(int j=0;j<n_words;j++){
-					digamma_rho[i][pos][j]= digamma(rho[i][pos][j]);
-				}
-			}
-		}
-		
-		if(rho_sum==null){
-			rho_sum=new double [K][n_positions];
-		}
-		for(int i=0;i<K;i++){
-			for(int pos=0;pos<n_positions;pos++){
-				rho_sum[i][pos]=digamma(arr.F.l1norm(rho[i][pos]));
-			}
-		}
-
-		//E
-		double exp_rho[][][]=new double[K][n_positions][n_words];
-		if (pool == null)
-		{
-			for (int d=0;d<c.getNumPhrases();d++)
-			{		
-				List<Edge > doc=c.getEdgesForPhrase(d);
-				double[][] phi = new double[doc.size()][K];
-				double[] gamma = new double[K];
-				
-				emObj += inference(d, phi, gamma);
-				
-				for(int n=0;n<doc.size();n++){
-					TIntArrayList context=doc.get(n).getContext();
-					for(int pos=0;pos<n_positions;pos++){
-						int word=context.get(pos);
-						for(int i=0;i<K;i++){	
-							exp_rho[i][pos][word]+=phi[n][i];
-						}
-					}
-				}
-				//if(d!=0 && d%100==0)  System.out.print(".");
-				//if(d!=0 && d%1000==0) System.out.println(d);
-			}
-		}
-		else // multi-threaded version of above loop
-		{
-			class PartialEStep implements Callable<PartialEStep>
-			{
-				double[][] phi;
-				double[] gamma;
-				double obj;
-				int d;
-				PartialEStep(int d) { this.d = d; }
-
-				public PartialEStep call()
-				{
-					phi = new double[c.getEdgesForPhrase(d).size()][K];
-					gamma = new double[K];
-					obj = inference(d, phi, gamma);
-					return this;
-				}			
-			}
-
-			List<Future<PartialEStep>> jobs = new ArrayList<Future<PartialEStep>>();
-			for (int d=0;d<c.getNumPhrases();d++)
-				jobs.add(pool.submit(new PartialEStep(d)));
-		
-			for (Future<PartialEStep> job: jobs)
-			{
-				try {
-					PartialEStep e = job.get();
-					
-					emObj += e.obj;				
-					List<Edge> doc = c.getEdgesForPhrase(e.d);
-					for(int n=0;n<doc.size();n++){
-						TIntArrayList context=doc.get(n).getContext();
-						for(int pos=0;pos<n_positions;pos++){
-							int word=context.get(pos);
-							for(int i=0;i<K;i++){	
-								exp_rho[i][pos][word]+=e.phi[n][i];
-							}
-						}
-					}
-				} catch (ExecutionException e) {
-					System.err.println("ERROR: E-step thread execution failed.");
-					throw new RuntimeException(e);
-				} catch (InterruptedException e) {
-					System.err.println("ERROR: Failed to join E-step thread.");
-					throw new RuntimeException(e);
-				}
-			}
-		}	
-	//	System.out.println("EM Objective:"+emObj);
-		
-		//M
-		for(int i=0;i<K;i++){
-			for(int pos=0;pos<n_positions;pos++){
-				for(int j=0;j<n_words;j++){
-					rho[i][pos][j]=lambda+exp_rho[i][pos][j];
-				}
-			}
-		}
-		
-		//E[\log p(\beta|\lambda)] - E[\log q(\beta)]
-		for(int i=0;i<K;i++){
-			double rhoSum=0;
-			for(int pos=0;pos<n_positions;pos++){
-				for(int j=0;j<n_words;j++){
-					rhoSum+=rho[i][pos][j];
-				}
-				double digamma_rhoSum=Gamma.digamma(rhoSum);
-				emObj-=Gamma.logGamma(rhoSum);
-				for(int j=0;j<n_words;j++){
-					emObj+=(lambda-rho[i][pos][j])*(Gamma.digamma(rho[i][pos][j])-digamma_rhoSum);
-					emObj+=Gamma.logGamma(rho[i][pos][j]);
-				}
-			}
-		}
-		
-		return emObj;
-	}//end of EM
-	
-	public void displayPosterior(PrintStream ps)
-	{	
-		for(int d=0;d<c.getNumPhrases();d++){
-			List<Edge > doc=c.getEdgesForPhrase(d);
-			double[][] phi = new double[doc.size()][K];
-			for(int i=0;i<phi.length;i++)
-				for(int j=0;j<phi[i].length;j++)
-					phi[i][j]=1.0/K;
-			double[] gamma = new double[K];
-
-			inference(d, phi, gamma);
-
-			for(int n=0;n<doc.size();n++){
-				Edge edge=doc.get(n);
-				int tag=arr.F.argmax(phi[n]);
-				ps.print(edge.getPhraseString());
-				ps.print("\t");
-				ps.print(edge.getContextString(true));
-
-				ps.println(" ||| C=" + tag);
-			}
-		}
-	}
-
-	double log_sum(double log_a, double log_b)
-	{
-	  double v;
-
-	  if (log_a < log_b)
-	      v = log_b+Math.log(1 + Math.exp(log_a-log_b));
-	  else
-	      v = log_a+Math.log(1 + Math.exp(log_b-log_a));
-	  return(v);
-	}
-		
-	double digamma(double x)
-	{
-	    double p;
-	    x=x+6;
-	    p=1/(x*x);
-	    p=(((0.004166666666667*p-0.003968253986254)*p+
-		0.008333333333333)*p-0.083333333333333)*p;
-	    p=p+Math.log(x)-0.5/x-1/(x-1)-1/(x-2)-1/(x-3)-1/(x-4)-1/(x-5)-1/(x-6);
-	    return p;
-	}
-	
-	double log_gamma(double x)
-	{
-	     double z=1/(x*x);
-
-	    x=x+6;
-	    z=(((-0.000595238095238*z+0.000793650793651)
-		*z-0.002777777777778)*z+0.083333333333333)/x;
-	    z=(x-0.5)*Math.log(x)-x+0.918938533204673+z-Math.log(x-1)-
-	    Math.log(x-2)-Math.log(x-3)-Math.log(x-4)-Math.log(x-5)-Math.log(x-6);
-	    return z;
-	}
-
-	public void useThreadPool(ExecutorService threadPool) 
-	{
-		pool = threadPool;
-	}
-}//End of  class
author	Kenneth Heafield <github@kheafield.com>	2012-10-22 12:07:20 +0100
committer	Kenneth Heafield <github@kheafield.com>	2012-10-22 12:07:20 +0100
commit	5f98fe5c4f2a2090eeb9d30c030305a70a8347d1 (patch)
tree	9b6002f850e6dea1e3400c6b19bb31a9cdf3067f /gi/posterior-regularisation/prjava/src/phrase
parent	cf9994131993b40be62e90e213b1e11e6b550143 (diff)
parent	21825a09d97c2e0afd20512f306fb25fed55e529 (diff)