Merge remote branch 'upstream/master'

Conflicts: Jamroot bjam decoder/Jamfile decoder/cdec.cc dpmert/Jamfile jam-files/sanity.jam klm/lm/Jamfile klm/util/Jamfile mira/Jamfile
author: Kenneth Heafield <github@kheafield.com> 2012-10-22 12:07:20 +0100
committer: Kenneth Heafield <github@kheafield.com> 2012-10-22 12:07:20 +0100
commit: 5f98fe5c4f2a2090eeb9d30c030305a70a8347d1 (patch)
tree: 9b6002f850e6dea1e3400c6b19bb31a9cdf3067f /gi/posterior-regularisation
parent: cf9994131993b40be62e90e213b1e11e6b550143 (diff)
parent: 21825a09d97c2e0afd20512f306fb25fed55e529 (diff)
100 files changed, 0 insertions, 12238 deletions
diff --git a/gi/posterior-regularisation/Corpus.java b/gi/posterior-regularisation/Corpus.java
deleted file mode 100644
index 07b27387..00000000
--- a/gi/posterior-regularisation/Corpus.java
+++ /dev/null
@@ -1,167 +0,0 @@
-import gnu.trove.TIntArrayList;
-
-import java.io.*;
-import java.util.*;
-import java.util.regex.Pattern;
-
-public class Corpus
-{
-	private Lexicon<String> tokenLexicon = new Lexicon<String>();
-	private Lexicon<TIntArrayList> phraseLexicon = new Lexicon<TIntArrayList>();
-	private Lexicon<TIntArrayList> contextLexicon = new Lexicon<TIntArrayList>();
-	private List<Edge> edges = new ArrayList<Edge>();
-	private List<List<Edge>> phraseToContext = new ArrayList<List<Edge>>();
-	private List<List<Edge>> contextToPhrase = new ArrayList<List<Edge>>();
-	
-	public class Edge
-	{
-		Edge(int phraseId, int contextId, int count)
-		{
-			this.phraseId = phraseId;
-			this.contextId = contextId;
-			this.count = count;
-		}
-		public int getPhraseId()
-		{
-			return phraseId;
-		}
-		public TIntArrayList getPhrase()
-		{
-			return phraseLexicon.lookup(phraseId);
-		}
-		public String getPhraseString()
-		{
-			StringBuffer b = new StringBuffer();
-			for (int tid: getPhrase().toNativeArray())
-			{
-				if (b.length() > 0)
-					b.append(" ");
-				b.append(tokenLexicon.lookup(tid));
-			}
-			return b.toString();
-		}		
-		public int getContextId()
-		{
-			return contextId;
-		}
-		public TIntArrayList getContext()
-		{
-			return contextLexicon.lookup(contextId);
-		}
-		public String getContextString()
-		{
-			StringBuffer b = new StringBuffer();
-			for (int tid: getContext().toNativeArray())
-			{
-				if (b.length() > 0)
-					b.append(" ");
-				b.append(tokenLexicon.lookup(tid));
-			}
-			return b.toString();
-		}
-		public int getCount()
-		{
-			return count;
-		}
-		private int phraseId;
-		private int contextId;
-		private int count;
-	}
-
-	List<Edge> getEdges()
-	{
-		return edges;
-	}
-	
-	int getNumEdges()
-	{
-		return edges.size();
-	}
-
-	int getNumPhrases()
-	{
-		return phraseLexicon.size();
-	}
-	
-	List<Edge> getEdgesForPhrase(int phraseId)
-	{
-		return phraseToContext.get(phraseId);
-	}
-	
-	int getNumContexts()
-	{
-		return contextLexicon.size();
-	}
-	
-	List<Edge> getEdgesForContext(int contextId)
-	{
-		return contextToPhrase.get(contextId);
-	}
-	
-	int getNumTokens()
-	{
-		return tokenLexicon.size();
-	}
-	
-	static Corpus readFromFile(Reader in) throws IOException
-	{
-		Corpus c = new Corpus();
-		
-		// read in line-by-line
-		BufferedReader bin = new BufferedReader(in);
-		String line;
-		Pattern separator = Pattern.compile(" \\|\\|\\| ");
-
-		while ((line = bin.readLine()) != null)
-		{
-			// split into phrase and contexts
-			StringTokenizer st = new StringTokenizer(line, "\t");
-			assert (st.hasMoreTokens());
-			String phraseToks = st.nextToken();
-			assert (st.hasMoreTokens());
-			String rest = st.nextToken();
-			assert (!st.hasMoreTokens());
-
-			// process phrase	
-			st = new StringTokenizer(phraseToks, " ");
-			TIntArrayList ptoks = new TIntArrayList();
-			while (st.hasMoreTokens())
-				ptoks.add(c.tokenLexicon.insert(st.nextToken()));
-			int phraseId = c.phraseLexicon.insert(ptoks);
-			if (phraseId == c.phraseToContext.size())
-				c.phraseToContext.add(new ArrayList<Edge>());
-			
-			// process contexts
-			String[] parts = separator.split(rest);
-			assert (parts.length % 2 == 0);
-			for (int i = 0; i < parts.length; i += 2)
-			{
-				// process pairs of strings - context and count
-				TIntArrayList ctx = new TIntArrayList();
-				String ctxString = parts[i];
-				String countString = parts[i + 1];
-				StringTokenizer ctxStrtok = new StringTokenizer(ctxString, " ");
-				while (ctxStrtok.hasMoreTokens())
-				{
-					String token = ctxStrtok.nextToken();
-					if (!token.equals("<PHRASE>"))
-						ctx.add(c.tokenLexicon.insert(token));
-				}
-				int contextId = c.contextLexicon.insert(ctx);
-				if (contextId == c.contextToPhrase.size())
-					c.contextToPhrase.add(new ArrayList<Edge>());
-
-				assert (countString.startsWith("C="));
-				Edge e = c.new Edge(phraseId, contextId, 
-						Integer.parseInt(countString.substring(2).trim()));
-				c.edges.add(e);
-				
-				// index the edge for fast phrase, context lookup
-				c.phraseToContext.get(phraseId).add(e);
-				c.contextToPhrase.get(contextId).add(e);
-			}
-		}
-		
-		return c;
-	}	
-}
diff --git a/gi/posterior-regularisation/Lexicon.java b/gi/posterior-regularisation/Lexicon.java
deleted file mode 100644
index 9f0245ee..00000000
--- a/gi/posterior-regularisation/Lexicon.java
+++ /dev/null
@@ -1,32 +0,0 @@
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-public class Lexicon<T>
-{
-	public int insert(T word)
-	{
-		Integer i = wordToIndex.get(word);
-		if (i == null)
-		{
-			i = indexToWord.size();
-			wordToIndex.put(word, i);
-			indexToWord.add(word);
-		}
-		return i;
-	}
-
-	public T lookup(int index)
-	{
-		return indexToWord.get(index);
-	}
-
-	public int size()
-	{
-		return indexToWord.size();
-	}
-
-	private Map<T, Integer> wordToIndex = new HashMap<T, Integer>();
-	private List<T> indexToWord = new ArrayList<T>();
-}
-\ No newline at end of file
diff --git a/gi/posterior-regularisation/PhraseContextModel.java b/gi/posterior-regularisation/PhraseContextModel.java
deleted file mode 100644
index 85bcfb89..00000000
--- a/gi/posterior-regularisation/PhraseContextModel.java
+++ /dev/null
@@ -1,466 +0,0 @@
-// Input of the form:
-// " the phantom of the opera "    tickets for <PHRASE> tonight ? ||| C=1 ||| seats for <PHRASE> ? </s> ||| C=1 ||| i see <PHRASE> ? </s> ||| C=1
-//                      phrase TAB [context]+
-// where    context =   phrase ||| C=...        which are separated by |||
-
-// Model parameterised as follows:
-// - each phrase, p, is allocated a latent state, t
-// - this is used to generate the contexts, c
-// - each context is generated using 4 independent multinomials, one for each position LL, L, R, RR
-
-// Training with EM:
-// - e-step is estimating q(t) = P(t|p,c) for all x,c
-// - m-step is estimating model parameters P(c,t|p) = P(t) P(c|t)
-// - PR uses alternate e-step, which first optimizes lambda 
-//      min_q KL(q||p) + delta sum_pt max_c E_q[phi_ptc]
-//   where
-//      q(t|p,c) propto p(t,c|p) exp( -phi_ptc )
-//   Then q is used to obtain expectations for vanilla M-step.
-
-// Sexing it up:
-// - learn p-specific conditionals P(t|p)
-// - or generate phrase internals, e.g., generate edge words from
-//   different distribution to central words
-// - agreement between phrase->context model and context->phrase model
-
-import java.io.*;
-import optimization.gradientBasedMethods.*;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.gradientBasedMethods.stats.ProjectedOptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.GenericPickFirstStep;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.linesearch.WolfRuleLineSearch;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.NormalizedProjectedGradientL2Norm;
-import optimization.stopCriteria.NormalizedValueDifference;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-import optimization.util.MathUtils;
-import java.util.*;
-import java.util.regex.*;
-import gnu.trove.TDoubleArrayList;
-import gnu.trove.TIntArrayList;
-import static java.lang.Math.*;
-
-class PhraseContextModel
-{
-	// model/optimisation configuration parameters
-	int numTags;
-	boolean posteriorRegularisation = true;
-	double constraintScale = 3; // FIXME: make configurable
-	
-	// copied from L1LMax in depparsing code
-	final double c1= 0.0001, c2=0.9, stoppingPrecision = 1e-5, maxStep = 10;
-	final int maxZoomEvals = 10, maxExtrapolationIters = 200;
-	int maxProjectionIterations = 200;
-	int minOccurrencesForProjection = 0;
-
-	// book keeping
-	int numPositions;
-	Random rng = new Random();
-
-	// training set
-	Corpus training;
-
-	// model parameters (learnt)
-	double emissions[][][]; // position in 0 .. 3 x tag x word Pr(word | tag, position)
-	double prior[][]; // phrase x tag Pr(tag | phrase)
-	double lambda[]; // edge = (phrase, context) x tag flattened lagrange multipliers
-
-	PhraseContextModel(Corpus training, int tags)
-	{
-		this.training = training;
-		this.numTags = tags;
-		assert (!training.getEdges().isEmpty());
-		assert (numTags > 1);
-
-		// now initialise emissions
-		numPositions = training.getEdges().get(0).getContext().size();
-		assert (numPositions > 0);
-
-		emissions = new double[numPositions][numTags][training.getNumTokens()];
-		prior = new double[training.getNumEdges()][numTags];
-		if (posteriorRegularisation)
-			lambda = new double[training.getNumEdges() * numTags];
-
-		for (double[][] emissionTW : emissions)
-		{
-			for (double[] emissionW : emissionTW)
-			{
-				randomise(emissionW);
-//				for (int i = 0; i < emissionW.length; ++i)
-//					emissionW[i] = i+1;
-//				normalise(emissionW);
-			}
-		}
-					
-		for (double[] priorTag : prior)
-		{
-			randomise(priorTag);
-//			for (int i = 0; i < priorTag.length; ++i)
-//				priorTag[i] = i+1;
-//			normalise(priorTag);
-		}
-	}
-
-	void expectationMaximisation(int numIterations)
-	{
-		double lastLlh = Double.NEGATIVE_INFINITY;
-
-		for (int iteration = 0; iteration < numIterations; ++iteration)
-		{
-			double emissionsCounts[][][] = new double[numPositions][numTags][training.getNumTokens()];
-			double priorCounts[][] = new double[training.getNumPhrases()][numTags];
-			
-			// E-step
-			double llh = 0;
-			if (posteriorRegularisation)
-			{
-				EStepDualObjective objective = new EStepDualObjective();
-				
-				// copied from x2y2withconstraints
-//				LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1));				
-//				OptimizerStats stats = new OptimizerStats();
-//				ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
-//				CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
-//				compositeStop.add(new ProjectedGradientL2Norm(0.001));
-//				compositeStop.add(new ValueDifference(0.001));
-//				optimizer.setMaxIterations(50);
-//				boolean succeed = optimizer.optimize(objective,stats,compositeStop);
-				
-				// copied from depparser l1lmaxobjective
-				ProjectedOptimizerStats stats = new ProjectedOptimizerStats();
-				GenericPickFirstStep pickFirstStep = new GenericPickFirstStep(1);
-				LineSearchMethod linesearch = new WolfRuleLineSearch(pickFirstStep, c1, c2);
-				ProjectedGradientDescent optimizer = new ProjectedGradientDescent(linesearch);
-				optimizer.setMaxIterations(maxProjectionIterations);
-		        CompositeStopingCriteria stop = new CompositeStopingCriteria();
-		        stop.add(new NormalizedProjectedGradientL2Norm(stoppingPrecision));
-		        stop.add(new NormalizedValueDifference(stoppingPrecision));
-		        boolean succeed = optimizer.optimize(objective, stats, stop);
-
-				System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
-				//System.out.println("Solution: " + objective.parameters);
-				if (!succeed)
-					System.out.println("Failed to optimize");
-				//System.out.println("Ended optimization in " + optimizer.getCurrentIteration());				
-
-				//lambda = objective.getParameters();
-				llh = objective.primal();
-				
-				for (int i = 0; i < training.getNumPhrases(); ++i)
-				{
-					List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-					for (int j = 0; j < edges.size(); ++j)
-					{
-						Corpus.Edge e = edges.get(j);
-						for (int t = 0; t < numTags; t++)
-						{
-							double p = objective.q.get(i).get(j).get(t);
-							priorCounts[i][t] += e.getCount() * p;
-							TIntArrayList tokens = e.getContext();
-							for (int k = 0; k < tokens.size(); ++k)
-								emissionsCounts[k][t][tokens.get(k)] += e.getCount() * p;
-						}
-					}
-				}
-			}
-			else
-			{
-				for (int i = 0; i < training.getNumPhrases(); ++i)
-				{
-					List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-					for (int j = 0; j < edges.size(); ++j)
-					{
-						Corpus.Edge e = edges.get(j);
-						double probs[] = posterior(i, e);			
-						double z = normalise(probs);
-						llh += log(z) * e.getCount();
-						
-						TIntArrayList tokens = e.getContext();
-						for (int t = 0; t < numTags; ++t)
-						{
-							priorCounts[i][t] += e.getCount() * probs[t];
-							for (int k = 0; k < tokens.size(); ++k)
-								emissionsCounts[j][t][tokens.get(k)] += e.getCount() * probs[t];
-						}
-					}
-				}
-			}
-
-			// M-step: normalise
-			for (double[][] emissionTW : emissionsCounts)
-				for (double[] emissionW : emissionTW)
-					normalise(emissionW);
-
-			for (double[] priorTag : priorCounts)
-				normalise(priorTag);
-
-			emissions = emissionsCounts;
-			prior = priorCounts;
-
-			System.out.println("Iteration " + iteration + " llh " + llh);
-
-//			if (llh - lastLlh < 1e-4)
-//				break;
-//			else
-//				lastLlh = llh;
-		}
-	}
-
-	static double normalise(double probs[])
-	{
-		double z = 0;
-		for (double p : probs)
-			z += p;
-		for (int i = 0; i < probs.length; ++i)
-			probs[i] /= z;
-		return z;
-	}
-
-	void randomise(double probs[])
-	{
-		double z = 0;
-		for (int i = 0; i < probs.length; ++i)
-		{
-			probs[i] = 10 + rng.nextDouble();
-			z += probs[i];
-		}
-
-		for (int i = 0; i < probs.length; ++i)
-			probs[i] /= z;
-	}
-
-	static int argmax(double probs[])
-	{
-		double m = Double.NEGATIVE_INFINITY;
-		int mi = -1;
-		for (int i = 0; i < probs.length; ++i)
-		{
-			if (probs[i] > m)
-			{
-				m = probs[i];
-				mi = i;
-			}
-		}
-		return mi;
-	}
-
-	double[] posterior(int phraseId, Corpus.Edge e) // unnormalised
-	{
-		double probs[] = new double[numTags];
-		TIntArrayList tokens = e.getContext();
-		for (int t = 0; t < numTags; ++t)
-		{
-			probs[t] = prior[phraseId][t];
-			for (int k = 0; k < tokens.size(); ++k)
-				probs[t] *= emissions[k][t][tokens.get(k)];
-		}
-		return probs;
-	}
-
-	void displayPosterior()
-	{
-		for (int i = 0; i < training.getNumPhrases(); ++i)
-		{
-			List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-			for (Corpus.Edge e: edges)
-			{
-				double probs[] = posterior(i, e);
-				normalise(probs);
-
-				// emit phrase
-				System.out.print(e.getPhraseString());
-				System.out.print("\t");
-				System.out.print(e.getContextString());
-				System.out.print("||| C=" + e.getCount() + " |||");
-
-				int t = argmax(probs);
-				System.out.print(" " + t + " ||| " + probs[t]);
-				// for (int t = 0; t < numTags; ++t)
-				// System.out.print(" " + probs[t]);
-				System.out.println();
-			}
-		}
-	}
-
-	public static void main(String[] args)
-	{
-		assert (args.length >= 2);
-		try
-		{
-			Corpus corpus = Corpus.readFromFile(new FileReader(new File(args[0])));
-			PhraseContextModel model = new PhraseContextModel(corpus, Integer.parseInt(args[1]));
-			model.expectationMaximisation(Integer.parseInt(args[2]));
-			model.displayPosterior();
-		} 
-		catch (IOException e)
-		{
-			System.out.println("Failed to read input file: " + args[0]);
-			e.printStackTrace();
-		}
-	}
-
-	class EStepDualObjective extends ProjectedObjective
-	{
-		List<List<TDoubleArrayList>> conditionals; // phrase id x context # x tag - precomputed
-		List<List<TDoubleArrayList>> q; // ditto, but including exp(-lambda) terms
-		double objective = 0; // log(z)
-		// Objective.gradient = d log(z) / d lambda = E_q[phi]
-		double llh = 0;
-
-		public EStepDualObjective()
-		{
-			super();
-			// compute conditionals p(context, tag | phrase) for all training instances
-			conditionals = new ArrayList<List<TDoubleArrayList>>(training.getNumPhrases());
-			q = new ArrayList<List<TDoubleArrayList>>(training.getNumPhrases());
-			for (int i = 0; i < training.getNumPhrases(); ++i)
-			{
-				List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-
-				conditionals.add(new ArrayList<TDoubleArrayList>(edges.size()));
-				q.add(new ArrayList<TDoubleArrayList>(edges.size()));
-
-				for (int j = 0; j < edges.size(); ++j)
-				{
-					Corpus.Edge e = edges.get(j);
-					double probs[] = posterior(i, e);
-					double z = normalise(probs);
-					llh += log(z) * e.getCount();
-					conditionals.get(i).add(new TDoubleArrayList(probs));
-					q.get(i).add(new TDoubleArrayList(probs));
-				}
-			}
-			
-			gradient = new double[training.getNumEdges()*numTags];
-			setInitialParameters(lambda);
-			computeObjectiveAndGradient();
-		}
-
-		@Override
-		public double[] projectPoint(double[] point)
-		{
-			SimplexProjection p = new SimplexProjection(constraintScale);
-
-			double[] newPoint = point.clone();
-			int edgeIndex = 0;
-			for (int i = 0; i < training.getNumPhrases(); ++i)
-			{
-				List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-
-				for (int t = 0; t < numTags; t++)
-				{
-					double[] subPoint = new double[edges.size()];
-					for (int j = 0; j < edges.size(); ++j)
-						subPoint[j] = point[edgeIndex+j*numTags+t];
-				
-					p.project(subPoint);
-					for (int j = 0; j < edges.size(); ++j)
-						newPoint[edgeIndex+j*numTags+t] = subPoint[j];
-				}
-				
-				edgeIndex += edges.size() * numTags;
-			}
-//			System.out.println("Proj from: " + Arrays.toString(point)); 
-//			System.out.println("Proj to:   " + Arrays.toString(newPoint)); 
-			return newPoint;
-		}
-
-		@Override
-		public void setParameters(double[] params)
-		{
-			super.setParameters(params);
-			computeObjectiveAndGradient();
-		}
-
-		@Override
-		public double[] getGradient()
-		{
-			gradientCalls += 1;
-			return gradient;
-		}
-
-		@Override
-		public double getValue()
-		{
-			functionCalls += 1;
-			return objective;
-		}
-
-		public void computeObjectiveAndGradient()
-		{
-			int edgeIndex = 0;
-			objective = 0;
-			Arrays.fill(gradient, 0);
-			for (int i = 0; i < training.getNumPhrases(); ++i)
-			{
-				List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-
-				for (int j = 0; j < edges.size(); ++j)
-				{
-					Corpus.Edge e = edges.get(j);
-					
-					double z = 0;
-					for (int t = 0; t < numTags; t++)
-					{
-						double v = conditionals.get(i).get(j).get(t) * exp(-parameters[edgeIndex+t]);
-						q.get(i).get(j).set(t, v);
-						z += v;
-					}
-					objective += log(z) * e.getCount();
-
-					for (int t = 0; t < numTags; t++)
-					{
-						double v = q.get(i).get(j).get(t) / z; 
-						q.get(i).get(j).set(t, v);
-						gradient[edgeIndex+t] -= e.getCount() * v;
-					}
-					
-					edgeIndex += numTags;
-				}
-			}			
-//			System.out.println("computeObjectiveAndGradient logz=" + objective);
-//			System.out.println("lambda=  " + Arrays.toString(parameters));
-//			System.out.println("gradient=" + Arrays.toString(gradient));
-		}
-
-		public String toString()
-		{
-			StringBuilder sb = new StringBuilder();
-			sb.append(getClass().getCanonicalName()).append(" with ");
-			sb.append(parameters.length).append(" parameters and ");
-			sb.append(training.getNumPhrases() * numTags).append(" constraints");
-			return sb.toString();
-		}
-				
-		double primal()
-		{
-			// primal = llh + KL(q||p) + scale * sum_pt max_c E_q[phi_pct]
-			// kl = sum_Y q(Y) log q(Y) / p(Y|X)
-			//    = sum_Y q(Y) { -lambda . phi(Y) - log Z }
-			//    = -log Z - lambda . E_q[phi]
-			//    = -objective + lambda . gradient
-			
-			double kl = -objective + MathUtils.dotProduct(parameters, gradient);
-			double l1lmax = 0;
-			for (int i = 0; i < training.getNumPhrases(); ++i)
-			{
-				List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-				for (int t = 0; t < numTags; t++)
-				{
-					double lmax = Double.NEGATIVE_INFINITY;
-					for (int j = 0; j < edges.size(); ++j)
-						lmax = max(lmax, q.get(i).get(j).get(t));
-					l1lmax += lmax;
-				}
-			}
-
-			return llh + kl + constraintScale * l1lmax;
-		}
-	}
-}
diff --git a/gi/posterior-regularisation/README b/gi/posterior-regularisation/README
deleted file mode 100644
index a3d54ffc..00000000
--- a/gi/posterior-regularisation/README
+++ /dev/null
@@ -1,3 +0,0 @@
-  557  ./cdec_extools/extractor -i btec/split.zh-en.al -c 500000 -L 12 -C  | sort -t $'\t' -k 1 | ./cdec_extools/mr_stripe_rule_reduce > btec.concordance
-  559  wc -l btec.concordance 
-  588  cat btec.concordance  | sed  's/.*	//' | awk '{ for (i=1; i < NF; i++) { x=substr($i, 1, 2); if (x == "C=") printf "\n"; else if (x != "||") printf "%s ", $i; }; printf "\n"; }' | sort | uniq | wc -l
diff --git a/gi/posterior-regularisation/alphabet.hh b/gi/posterior-regularisation/alphabet.hh
deleted file mode 100644
index 1db928da..00000000
--- a/gi/posterior-regularisation/alphabet.hh
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef _alphabet_hh
-#define _alphabet_hh
-
-#include <cassert>
-#include <iosfwd>
-#include <map>
-#include <string>
-#include <vector>
-
-// Alphabet: indexes a set of types 
-template <typename T>
-class Alphabet: protected std::map<T, int>
-{
-public:
-    Alphabet() {};
-
-    bool empty() const { return std::map<T,int>::empty(); }
-    int size() const { return std::map<T,int>::size(); }
-
-    int operator[](const T &k) const
-    {
-        typename std::map<T,int>::const_iterator cit = find(k);
-        if (cit != std::map<T,int>::end())
-            return cit->second;
-        else
-            return -1;
-    }
-
-    int lookup(const T &k) const { return (*this)[k]; }
-
-    int insert(const T &k) 
-    {
-        int sz = size();
-        assert((unsigned) sz == _items.size());
-
-        std::pair<typename std::map<T,int>::iterator, bool>
-            ins = std::map<T,int>::insert(make_pair(k, sz));
-
-        if (ins.second) 
-            _items.push_back(k);
-
-        return ins.first->second;
-    }
-
-    const T &type(int i) const
-    {
-        assert(i >= 0);
-        assert(i < size());
-        return _items[i];
-    }
-
-    std::ostream &display(std::ostream &out, int i) const
-    {
-        return out << type(i);
-    }
-
-private:
-    std::vector<T> _items;
-};
-
-#endif
diff --git a/gi/posterior-regularisation/canned.concordance b/gi/posterior-regularisation/canned.concordance
deleted file mode 100644
index 710973ff..00000000
--- a/gi/posterior-regularisation/canned.concordance
+++ /dev/null
@@ -1,4 +0,0 @@
-a	0 0 <PHRASE> 0 0 ||| C=1 ||| 1 1 <PHRASE> 1 1 ||| C=1 ||| 2 2 <PHRASE> 2 2 ||| C=1
-b	0 0 <PHRASE> 0 0 ||| C=1 ||| 1 1 <PHRASE> 1 1 ||| C=1 
-c	2 2 <PHRASE> 2 2 ||| C=1 ||| 4 4 <PHRASE> 4 4 ||| C=1 ||| 5 5 <PHRASE> 5 5 ||| C=1
-d	4 4 <PHRASE> 4 4 ||| C=1 ||| 5 5 <PHRASE> 5 5 ||| C=1
diff --git a/gi/posterior-regularisation/em.cc b/gi/posterior-regularisation/em.cc
deleted file mode 100644
index f6c9fd68..00000000
--- a/gi/posterior-regularisation/em.cc
+++ /dev/null
@@ -1,830 +0,0 @@
-// Input of the form:
-// " the phantom of the opera "    tickets for <PHRASE> tonight ? ||| C=1 ||| seats for <PHRASE> ? </s> ||| C=1 ||| i see <PHRASE> ? </s> ||| C=1
-//                      phrase TAB [context]+
-// where    context =   phrase ||| C=...        which are separated by |||
-
-// Model parameterised as follows:
-// - each phrase, p, is allocated a latent state, t
-// - this is used to generate the contexts, c
-// - each context is generated using 4 independent multinomials, one for each position LL, L, R, RR
-
-// Training with EM:
-// - e-step is estimating P(t|p,c) for all x,c
-// - m-step is estimating model parameters P(p,c,t) = P(t) P(p|t) P(c|t)
-
-// Sexing it up:
-// - constrain the posteriors P(t|c) and P(t|p) to have few high-magnitude entries
-// - improve the generation of phrase internals, e.g., generate edge words from
-//   different distribution to central words
-
-#include "alphabet.hh"
-#include "log_add.hh"
-#include <algorithm>
-#include <fstream>
-#include <iostream>
-#include <iterator>
-#include <map>
-#include <sstream>
-#include <stdexcept>
-#include <vector>
-#include <tr1/random>
-#include <tr1/tuple>
-#include <nlopt.h>
-
-using namespace std;
-using namespace std::tr1;
-
-const int numTags = 5;
-const int numIterations = 100;
-const bool posterior_regularisation = true;
-const double PHRASE_VIOLATION_WEIGHT = 10;
-const double CONTEXT_VIOLATION_WEIGHT = 0;
-const bool includePhraseProb = false;
-
-// Data structures:
-Alphabet<string> lexicon;
-typedef vector<int> Phrase;
-typedef tuple<int, int, int, int> Context;
-Alphabet<Phrase> phrases;
-Alphabet<Context> contexts;
-
-typedef map<int, int> ContextCounts;
-typedef map<int, int> PhraseCounts;
-typedef map<int, ContextCounts> PhraseToContextCounts;
-typedef map<int, PhraseCounts> ContextToPhraseCounts;
-
-PhraseToContextCounts concordancePhraseToContexts;
-ContextToPhraseCounts concordanceContextToPhrases;
-
-typedef vector<double> Dist;
-typedef vector<Dist> ConditionalDist;
-Dist prior; // class -> P(class)
-vector<ConditionalDist> probCtx; // word -> class -> P(word | class), for each position of context word
-ConditionalDist probPhrase; // class -> P(word | class)
-Dist probPhraseLength; // class -> P(length | class) expressed as geometric distribution parameter
-
-mt19937 randomGenerator((size_t) time(NULL));
-uniform_real<double> uniDist(0.0, 1e-1);
-variate_generator< mt19937, uniform_real<double> > rng(randomGenerator, uniDist);
-
-void addRandomNoise(Dist &d);
-void normalise(Dist &d);
-void addTo(Dist &d, const Dist &e);
-int argmax(const Dist &d);
-
-map<Phrase, map<Context, int> > lambda_indices;
-
-Dist conditional_probs(const Phrase &phrase, const Context &context, double *normalisation = 0);
-template <typename T>
-Dist
-penalised_conditionals(const Phrase &phrase, const Context &context, 
-                       const T &lambda, double *normalisation);
-//Dist penalised_conditionals(const Phrase &phrase, const Context &context, const double *lambda, double *normalisation = 0);
-double penalised_log_likelihood(int n, const double *lambda, double *gradient, void *data);
-void optimise_lambda(double delta, double gamma, vector<double> &lambda);
-double expected_violation_phrases(const double *lambda);
-double expected_violation_contexts(const double *lambda);
-double primal_kl_divergence(const double *lambda);
-double dual(const double *lambda);
-void print_primal_dual(const double *lambda, double delta, double gamma);
-
-ostream &operator<<(ostream &, const Phrase &);
-ostream &operator<<(ostream &, const Context &);
-ostream &operator<<(ostream &, const Dist &);
-ostream &operator<<(ostream &, const ConditionalDist &);
-
-int
-main(int argc, char *argv[])
-{
-    randomGenerator.seed(time(NULL));
-
-    int edges = 0;
-    istream &input = cin;
-    while (input.good())
-    {
-        // read the phrase
-        string phraseString;
-        Phrase phrase;
-        getline(input, phraseString, '\t');
-        istringstream pinput(phraseString);
-        string token;
-        while (pinput >> token)
-            phrase.push_back(lexicon.insert(token));
-        int phraseId = phrases.insert(phrase);
-
-        // read the rest, storing each context
-        string remainder;
-        getline(input, remainder, '\n');
-        istringstream rinput(remainder);
-        Context context(-1, -1, -1, -1);
-        int index = 0;
-        while (rinput >> token)
-        {
-            if (token != "|||" && token != "<PHRASE>")
-            {
-                if (index < 4)
-                {
-                    // eugh! damn templates
-                    switch (index)
-                    {
-                        case 0: get<0>(context) = lexicon.insert(token); break;
-                        case 1: get<1>(context) = lexicon.insert(token); break;
-                        case 2: get<2>(context) = lexicon.insert(token); break;
-                        case 3: get<3>(context) = lexicon.insert(token); break;
-                        default: assert(false);
-                    }
-                    index += 1;
-                }
-                else if (token.find("C=") == 0)
-                {
-                    int contextId = contexts.insert(context);
-                    int count = atoi(token.substr(strlen("C=")).c_str());
-                    concordancePhraseToContexts[phraseId][contextId] += count;
-                    concordanceContextToPhrases[contextId][phraseId] += count;
-                    index = 0;
-                    context = Context(-1, -1, -1, -1);
-                    edges += 1;
-                }
-            }
-        }
-
-        // trigger EOF
-        input >> ws;
-    }
-
-    cout << "Read in " << phrases.size() << " phrases"
-         << " and " << contexts.size() << " contexts"
-         << " and " << edges << " edges"
-         << " and " << lexicon.size() << " word types\n";
-
-    // FIXME: filter out low count phrases and low count contexts (based on individual words?)
-    // now populate model parameters with uniform + random noise
-    prior.resize(numTags, 1.0);
-    addRandomNoise(prior);
-    normalise(prior);
-
-    probCtx.resize(4, ConditionalDist(numTags, Dist(lexicon.size(), 1.0)));
-    if (includePhraseProb)
-        probPhrase.resize(numTags, Dist(lexicon.size(), 1.0));
-    for (int t = 0; t < numTags; ++t)
-    {
-        for (int j = 0; j < 4; ++j)
-        {
-            addRandomNoise(probCtx[j][t]);
-            normalise(probCtx[j][t]);
-        }
-        if (includePhraseProb)
-        {
-            addRandomNoise(probPhrase[t]);
-            normalise(probPhrase[t]);
-        }
-    }
-    if (includePhraseProb)
-    {
-        probPhraseLength.resize(numTags, 0.5); // geometric distribution p=0.5
-        addRandomNoise(probPhraseLength);
-    }
-
-    cout << "\tprior:     " << prior << "\n";
-    //cout << "\tcontext:   " << probCtx << "\n";
-    //cout << "\tphrase:    " << probPhrase << "\n";
-    //cout << "\tphraseLen: " << probPhraseLength << endl;
-
-    vector<double> lambda;
-
-    // now do EM training
-    for (int iteration = 0; iteration < numIterations; ++iteration)
-    {
-        cout << "EM iteration " << iteration << endl;
-
-        if (posterior_regularisation)
-            optimise_lambda(PHRASE_VIOLATION_WEIGHT, CONTEXT_VIOLATION_WEIGHT, lambda);
-        //cout << "\tlambda " << lambda << endl;
-
-        Dist countsPrior(numTags, 0.0);
-        vector<ConditionalDist> countsCtx(4, ConditionalDist(numTags, Dist(lexicon.size(), 1e-10)));
-        ConditionalDist countsPhrase(numTags, Dist(lexicon.size(), 1e-10));
-        Dist countsPhraseLength(numTags, 0.0);
-        Dist nPhrases(numTags, 0.0);
-
-        double llh = 0;
-        for (PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.begin();
-             pcit != concordancePhraseToContexts.end(); ++pcit)
-        {
-            const Phrase &phrase = phrases.type(pcit->first);
-
-            // e-step: estimate latent class probs; compile (class,word) stats for m-step
-            for (ContextCounts::iterator ccit = pcit->second.begin();
-                 ccit != pcit->second.end(); ++ccit)
-            {
-                const Context &context = contexts.type(ccit->first);
-
-                double z = 0;
-                Dist tagCounts;
-                if (!posterior_regularisation)
-                    tagCounts = conditional_probs(phrase, context, &z);
-                else
-                    tagCounts = penalised_conditionals(phrase, context, lambda, &z);
-
-                llh += log(z) * ccit->second;
-                addTo(countsPrior, tagCounts); // FIXME: times ccit->secon
-
-                for (int t = 0; t < numTags; ++t)
-                {
-                    for (int j = 0; j < 4; ++j)
-                        countsCtx[j][t][get<0>(context)] += tagCounts[t] * ccit->second;
-
-                    if (includePhraseProb)
-                    {
-                        for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit)
-                            countsPhrase[t][*pit] += tagCounts[t] * ccit->second;
-                        countsPhraseLength[t] += phrase.size() * tagCounts[t] * ccit->second;
-                        nPhrases[t] += tagCounts[t] * ccit->second;
-                    }
-                }
-            }
-        }
-
-        cout << "M-step\n";
-
-        // m-step: normalise prior and (class,word) stats and assign to model parameters
-        normalise(countsPrior);
-        prior = countsPrior;
-        for (int t = 0; t < numTags; ++t)
-        {
-            //cout << "\t\tt " << t << " prior " << countsPrior[t] << "\n";
-            for (int j = 0; j < 4; ++j)
-                normalise(countsCtx[j][t]);
-            if (includePhraseProb)
-            {
-                normalise(countsPhrase[t]);
-                countsPhraseLength[t] = nPhrases[t] / countsPhraseLength[t];
-            }
-        }
-        probCtx = countsCtx;
-        if (includePhraseProb)
-        {
-            probPhrase = countsPhrase;
-            probPhraseLength = countsPhraseLength;
-        }
-
-        double *larray = new double[lambda.size()];
-        copy(lambda.begin(), lambda.end(), larray);
-        print_primal_dual(larray, PHRASE_VIOLATION_WEIGHT, CONTEXT_VIOLATION_WEIGHT);
-        delete [] larray;
-
-        //cout << "\tllh " << llh << endl;
-        //cout << "\tprior:     " << prior << "\n";
-        //cout << "\tcontext:   " << probCtx << "\n";
-        //cout << "\tphrase:    " << probPhrase << "\n";
-        //cout << "\tphraseLen: " << probPhraseLength << "\n";
-    }
-
-    // output class membership
-    for (PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.begin();
-         pcit != concordancePhraseToContexts.end(); ++pcit)
-    {
-        const Phrase &phrase = phrases.type(pcit->first);
-        for (ContextCounts::iterator ccit = pcit->second.begin();
-             ccit != pcit->second.end(); ++ccit)
-        {
-            const Context &context = contexts.type(ccit->first);
-            Dist tagCounts = conditional_probs(phrase, context, 0);
-            cout << phrase << " ||| " << context << " ||| " << argmax(tagCounts) << "\n";
-        }
-    }
-
-    return 0;
-}
-
-void addRandomNoise(Dist &d)
-{
-    for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit)
-        *dit += rng();
-}
-
-void normalise(Dist &d)
-{
-    double z = 0;
-    for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit)
-        z += *dit;
-    for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit)
-        *dit /= z;
-}
-
-void addTo(Dist &d, const Dist &e)
-{
-    assert(d.size() == e.size());
-    for (int i = 0; i < (int) d.size(); ++i)
-        d[i] += e[i];
-}
-
-int argmax(const Dist &d)
-{
-    double best = d[0];
-    int index = 0;
-    for (int i = 1; i < (int) d.size(); ++i)
-    {
-        if (d[i] > best)
-        {
-            best = d[i];
-            index = i;
-        }
-    }
-    return index;
-}
-
-ostream &operator<<(ostream &out, const Phrase &phrase)
-{
-    for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit)
-        lexicon.display(((pit == phrase.begin()) ? out : out << " "), *pit);
-    return out;
-}
-
-ostream &operator<<(ostream &out, const Context &context)
-{
-    lexicon.display(out, get<0>(context));
-    lexicon.display(out << " ", get<1>(context));
-    lexicon.display(out << " <PHRASE> ", get<2>(context));
-    lexicon.display(out << " ", get<3>(context));
-    return out;
-}
-
-ostream &operator<<(ostream &out, const Dist &dist)
-{
-    for (Dist::const_iterator dit = dist.begin(); dit != dist.end(); ++dit)
-        out << ((dit == dist.begin()) ? "" : " ") << *dit;
-    return out;
-}
-
-ostream &operator<<(ostream &out, const ConditionalDist &dist)
-{
-    for (ConditionalDist::const_iterator dit = dist.begin(); dit != dist.end(); ++dit)
-        out << ((dit == dist.begin()) ? "" : "; ") << *dit;
-    return out;
-}
-
-// FIXME: slow - just use the phrase index, context index to do the mapping
-// (n.b. it's a sparse setup, not just equal to 3d array index)
-int
-lambda_index(const Phrase &phrase, const Context &context, int tag)
-{
-    return lambda_indices[phrase][context] + tag;
-}
-
-template <typename T>
-Dist
-penalised_conditionals(const Phrase &phrase, const Context &context, 
-                       const T &lambda, double *normalisation)
-{
-    Dist d = conditional_probs(phrase, context, 0);
-
-    double z = 0;
-    for (int t = 0; t < numTags; ++t)
-    {
-        d[t] *= exp(-lambda[lambda_index(phrase, context, t)]);
-        z += d[t];
-    }
-
-    if (normalisation)
-        *normalisation = z;
-
-    for (int t = 0; t < numTags; ++t)
-        d[t] /= z;
-
-    return d;
-}
-
-Dist 
-conditional_probs(const Phrase &phrase, const Context &context, double *normalisation)
-{
-    Dist tagCounts(numTags, 0.0);
-    double z = 0;
-    for (int t = 0; t < numTags; ++t)
-    {
-        double prob = prior[t];
-        prob *= (probCtx[0][t][get<0>(context)] * probCtx[1][t][get<1>(context)] *
-                 probCtx[2][t][get<2>(context)] * probCtx[3][t][get<3>(context)]);
-
-        if (includePhraseProb)
-        {
-            prob *= pow(1 - probPhraseLength[t], phrase.size() - 1) * probPhraseLength[t];
-            for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit)
-                prob *= probPhrase[t][*pit];
-        }
-
-        tagCounts[t] = prob;
-        z += prob;
-    }
-    if (normalisation)
-        *normalisation = z;
-
-    for (int t = 0; t < numTags; ++t)
-        tagCounts[t] /= z;
-
-    return tagCounts;
-}
-
-double 
-penalised_log_likelihood(int n, const double *lambda, double *grad, void *)
-{
-    // return log Z(lambda, theta) over the corpus
-    // where theta are the global parameters (prior, probCtx*, probPhrase*) 
-    // and lambda are lagrange multipliers for the posterior sparsity constraints
-    //
-    // this is formulated as: 
-    // f = log Z(lambda) = sum_i log ( sum_i p_theta(t_i|p_i,c_i) exp [-lambda_{t_i,p_i,c_i}] )
-    // where i indexes the training examples - specifying the (p, c) pair (which may occur with count > 1)
-    //
-    // with derivative:
-    // f'_{tpc} = frac { - count(t,p,c) p_theta(t|p,c) exp (-lambda_{t,p,c}) }
-    //                 { sum_t' p_theta(t'|p,c) exp (-lambda_{t',p,c}) }
-
-    //cout << "penalised_log_likelihood with lambda ";
-    //copy(lambda, lambda+n, ostream_iterator<double>(cout, " "));
-    //cout << "\n";
-
-    double f = 0;
-    if (grad)
-    {
-        for (int i = 0; i < n; ++i)
-            grad[i] = 0.0;
-    }
-
-    for (int p = 0; p < phrases.size(); ++p)
-    {
-        const Phrase &phrase = phrases.type(p);
-        PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p);
-        for (ContextCounts::const_iterator ccit = pcit->second.begin();
-             ccit != pcit->second.end(); ++ccit)
-        {
-            const Context &context = contexts.type(ccit->first);
-            double z = 0;
-            Dist scores = penalised_conditionals(phrase, context, lambda, &z);
-
-            f += ccit->second * log(z);
-            //cout << "\tphrase: " << phrase << " context: " << context << " count: " << ccit->second << " z " << z << endl;
-            //cout << "\t\tscores: " << scores << "\n";
-
-            if (grad)
-            {
-                for (int t = 0; t < numTags; ++t)
-                {
-                    int i = lambda_index(phrase, context, t); // FIXME: redundant lookups
-                    assert(grad[i] == 0.0);
-                    grad[i] = - ccit->second * scores[t];
-                }
-            }
-        }
-    }
-
-    //cout << "penalised_log_likelihood returning " << f;
-    //if (grad)
-    //{
-        //cout << "\ngradient: ";
-        //copy(grad, grad+n, ostream_iterator<double>(cout, " "));
-    //}
-    //cout << "\n";
-
-    return f;
-}
-
-typedef struct 
-{
-    // one of p or c should be set to -1, in which case it will be marginalised out 
-    // i.e. sum_p' lambda_{p'ct} <= threshold
-    //   or sum_c' lambda_{pc't} <= threshold
-    int p, c, t, threshold;
-} constraint_data;
-
-double 
-constraint_and_gradient(int n, const double *lambda, double *grad, void *data)
-{
-    constraint_data *d = (constraint_data *) data;
-    assert(d->t >= 0);
-    assert(d->threshold >= 0);
-
-    //cout << "constraint_and_gradient: t " << d->t << " p " << d->p << " c " << d->c << " tau " << d->threshold << endl;
-    //cout << "\tlambda ";
-    //copy(lambda, lambda+n, ostream_iterator<double>(cout, " "));
-    //cout << "\n";
-
-    // FIXME: it's crazy to use a dense gradient here => will only have a handful of non-zero entries
-    if (grad)
-    {
-        for (int i = 0; i < n; ++i)
-            grad[i] = 0.0;
-    }
-
-    //cout << "constraint_and_gradient: " << d->p << "; " << d->c << "; " << d->t << "; " << d->threshold << endl;
-
-    if (d->p >= 0)
-    {
-        assert(d->c < 0);
-        //    sum_c lambda_pct          <= delta [a.k.a. threshold]
-        // => sum_c lambda_pct - delta  <= 0
-        // derivative_pct = { 1, if p and t match; 0, otherwise }
-
-        double val = -d->threshold;
-
-        const Phrase &phrase = phrases.type(d->p);
-        PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(d->p);
-        assert(pcit != concordancePhraseToContexts.end());
-        for (ContextCounts::const_iterator ccit = pcit->second.begin();
-             ccit != pcit->second.end(); ++ccit)
-        {
-            const Context &context = contexts.type(ccit->first);
-            int i = lambda_index(phrase, context, d->t);
-            val += lambda[i];
-            if (grad) grad[i] = 1;
-        }
-        //cout << "\treturning " << val << endl;
-
-        return val;
-    }
-    else
-    {
-        assert(d->c >= 0);
-        assert(d->p < 0);
-        //    sum_p lambda_pct          <= gamma [a.k.a. threshold]
-        // => sum_p lambda_pct - gamma  <= 0
-        // derivative_pct = { 1, if c and t match; 0, otherwise }
-
-        double val = -d->threshold;
-
-        const Context &context = contexts.type(d->c);
-        ContextToPhraseCounts::iterator cpit = concordanceContextToPhrases.find(d->c);
-        assert(cpit != concordanceContextToPhrases.end());
-        for (PhraseCounts::iterator pcit = cpit->second.begin();
-             pcit != cpit->second.end(); ++pcit)
-        {
-            const Phrase &phrase = phrases.type(pcit->first);
-            int i = lambda_index(phrase, context, d->t);
-            val += lambda[i];
-            if (grad) grad[i] = 1;
-        }
-        //cout << "\treturning " << val << endl;
-
-        return val;
-    }
-}
-
-void
-optimise_lambda(double delta, double gamma, vector<double> &lambdav)
-{
-    int num_lambdas = lambdav.size();
-    if (lambda_indices.empty() || lambdav.empty())
-    {
-        lambda_indices.clear();
-        lambdav.clear();
-
-        int i = 0;
-        for (int p = 0; p < phrases.size(); ++p)
-        {
-            const Phrase &phrase = phrases.type(p);
-            PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.find(p);
-            for (ContextCounts::iterator ccit = pcit->second.begin();
-                 ccit != pcit->second.end(); ++ccit)
-            {
-                const Context &context = contexts.type(ccit->first);
-                lambda_indices[phrase][context] = i;
-                i += numTags;
-            }
-        }
-        num_lambdas = i;
-        lambdav.resize(num_lambdas);
-    }
-    //cout << "optimise_lambda: #langrange multipliers " << num_lambdas << endl;
-
-    // FIXME: better to work with an implicit representation to save memory usage
-    int num_constraints = (((delta > 0) ? phrases.size() : 0) + ((gamma > 0) ? contexts.size() : 0)) * numTags;
-    //cout << "optimise_lambda: #constraints " << num_constraints << endl;
-    constraint_data *data = new constraint_data[num_constraints];
-    int i = 0;
-    if (delta > 0)
-    {
-        for (int p = 0; p < phrases.size(); ++p)
-        {
-            for (int t = 0; t < numTags; ++t, ++i)
-            {
-                constraint_data &d = data[i];
-                d.p = p;
-                d.c = -1;
-                d.t = t;
-                d.threshold = delta;
-            }
-        }
-    }
-
-    if (gamma > 0)
-    {
-        for (int c = 0; c < contexts.size(); ++c)
-        {
-            for (int t = 0; t < numTags; ++t, ++i)
-            {
-                constraint_data &d = data[i];
-                d.p = -1;
-                d.c = c;
-                d.t = t;
-                d.threshold = gamma;
-            }
-        }
-    }
-    assert(i == num_constraints);
-
-    double lambda[num_lambdas];
-    double lb[num_lambdas], ub[num_lambdas];
-    for (i = 0; i < num_lambdas; ++i)
-    {
-        lambda[i] = lambdav[i]; // starting value
-        lb[i] = 0;              // lower bound
-        if (delta <= 0)         // upper bound
-            ub[i] = gamma;      
-        else if (gamma <= 0)
-            ub[i] = delta;
-        else
-            assert(false);
-    }
-
-    //print_primal_dual(lambda, delta, gamma);
-   
-    double minf;
-    int error_code = nlopt_minimize_constrained(NLOPT_LN_COBYLA, num_lambdas, penalised_log_likelihood, NULL,
-                                                num_constraints, constraint_and_gradient, data, sizeof(constraint_data),
-                                                lb, ub, lambda, &minf, -HUGE_VAL, 0.0, 0.0, 1e-4, NULL, 0, 0.0);
-    //cout << "optimise error code " << error_code << endl;
-
-    //print_primal_dual(lambda, delta, gamma);
-
-    delete [] data;
-
-    if (error_code < 0)
-        cout << "WARNING: optimisation failed with error code: " << error_code << endl;
-    //else
-    //{
-        //cout << "success; minf " << minf << endl;
-        //print_primal_dual(lambda, delta, gamma);
-    //}
-
-    lambdav = vector<double>(&lambda[0], &lambda[0] + num_lambdas);
-}
-
-// FIXME: inefficient - cache the scores
-double
-expected_violation_phrases(const double *lambda)
-{
-    // sum_pt max_c E_q[phi_pct]
-    double violation = 0;
-
-    for (int p = 0; p < phrases.size(); ++p)
-    {
-        const Phrase &phrase = phrases.type(p);
-        PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p);
-
-        for (int t = 0; t < numTags; ++t)
-        {
-            double best = 0;
-            for (ContextCounts::const_iterator ccit = pcit->second.begin();
-                 ccit != pcit->second.end(); ++ccit)
-            {
-                const Context &context = contexts.type(ccit->first);
-                Dist scores = penalised_conditionals(phrase, context, lambda, 0);
-                best = max(best, scores[t]);
-            }
-            violation += best;
-        }
-    }
-
-    return violation;
-}
-
-// FIXME: inefficient - cache the scores
-double
-expected_violation_contexts(const double *lambda)
-{
-    // sum_ct max_p E_q[phi_pct]
-    double violation = 0;
-
-    for (int c = 0; c < contexts.size(); ++c)
-    {
-        const Context &context = contexts.type(c);
-        ContextToPhraseCounts::iterator cpit = concordanceContextToPhrases.find(c);
-
-        for (int t = 0; t < numTags; ++t)
-        {
-            double best = 0;
-            for (PhraseCounts::iterator pit = cpit->second.begin();
-                 pit != cpit->second.end(); ++pit)
-            {
-                const Phrase &phrase = phrases.type(pit->first);
-                Dist scores = penalised_conditionals(phrase, context, lambda, 0);
-                best = max(best, scores[t]);
-            }
-            violation += best;
-        }
-    }
-
-    return violation;
-}
-
-// FIXME: possibly inefficient
-double 
-primal_likelihood() // FIXME: primal evaluation needs to use lambda and calculate l1linf terms
-{
-    double llh = 0;
-    for (int p = 0; p < phrases.size(); ++p)
-    {
-        const Phrase &phrase = phrases.type(p);
-        PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p);
-        for (ContextCounts::const_iterator ccit = pcit->second.begin();
-             ccit != pcit->second.end(); ++ccit)
-        {
-            const Context &context = contexts.type(ccit->first);
-            double z = 0;
-            Dist scores = conditional_probs(phrase, context, &z);
-            llh += ccit->second * log(z);
-        }
-    }
-    return llh;
-}
-
-// FIXME: inefficient - cache the scores
-double 
-primal_kl_divergence(const double *lambda)
-{
-    // return KL(q || p) = sum_y q(y) { log q(y) - log p(y | x) }
-    //                   = sum_y q(y) { log p(y | x) - lambda . phi(x, y) - log Z - log p(y | x) }
-    //                   = sum_y q(y) { - lambda . phi(x, y) } - log Z
-    // and q(y) factors with each edge, ditto for Z
-    
-    double feature_sum = 0, log_z = 0;
-    for (int p = 0; p < phrases.size(); ++p)
-    {
-        const Phrase &phrase = phrases.type(p);
-        PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p);
-        for (ContextCounts::const_iterator ccit = pcit->second.begin();
-             ccit != pcit->second.end(); ++ccit)
-        {
-            const Context &context = contexts.type(ccit->first);
-
-            double local_z = 0;
-            double local_f = 0;
-            Dist d = conditional_probs(phrase, context, 0);
-            for (int t = 0; t < numTags; ++t)
-            {
-                int i = lambda_index(phrase, context, t);
-                double s = d[t] * exp(-lambda[i]);
-                local_f += lambda[i] * s;
-                local_z += s;
-            }
-
-            log_z += ccit->second * log(local_z);
-            feature_sum += ccit->second * (local_f / local_z);
-        }
-    }
-
-    return -feature_sum - log_z;
-}
-
-// FIXME: inefficient - cache the scores
-double 
-dual(const double *lambda)
-{
-    // return log(Z) = - log { sum_y p(y | x) exp( - lambda . phi(x, y) }
-    // n.b. have flipped the sign as we're minimising
-    
-    double z = 0;
-    for (int p = 0; p < phrases.size(); ++p)
-    {
-        const Phrase &phrase = phrases.type(p);
-        PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p);
-        for (ContextCounts::const_iterator ccit = pcit->second.begin();
-             ccit != pcit->second.end(); ++ccit)
-        {
-            const Context &context = contexts.type(ccit->first);
-            double lz = 0;
-            Dist scores = penalised_conditionals(phrase, context, lambda, &z);
-            z += lz * ccit->second;
-        }
-    }
-    return log(z);
-}
-
-void
-print_primal_dual(const double *lambda, double delta, double gamma)
-{
-    double likelihood = primal_likelihood();
-    double kl = primal_kl_divergence(lambda);
-    double sum_pt = expected_violation_phrases(lambda);
-    double sum_ct = expected_violation_contexts(lambda);
-    //double d = dual(lambda);
-
-    cout << "\tllh=" << likelihood
-         << " kl=" << kl
-         << " violations phrases=" << sum_pt
-         << " contexts=" << sum_ct
-         //<< " primal=" << (kl + delta * sum_pt + gamma * sum_ct) 
-         //<< " dual=" << d
-         << " objective=" << (likelihood - kl + delta * sum_pt + gamma * sum_ct) 
-         << endl;
-}
diff --git a/gi/posterior-regularisation/invert.hh b/gi/posterior-regularisation/invert.hh
deleted file mode 100644
index d06356e9..00000000
--- a/gi/posterior-regularisation/invert.hh
+++ /dev/null
@@ -1,45 +0,0 @@
-// The following code inverts the matrix input using LU-decomposition with
-// backsubstitution of unit vectors. Reference: Numerical Recipies in C, 2nd
-// ed., by Press, Teukolsky, Vetterling & Flannery. 
-// Code written by Fredrik Orderud.
-// http://www.crystalclearsoftware.com/cgi-bin/boost_wiki/wiki.pl?LU_Matrix_Inversion
-
-#ifndef INVERT_MATRIX_HPP
-#define INVERT_MATRIX_HPP
-
-// REMEMBER to update "lu.hpp" header includes from boost-CVS
-#include <boost/numeric/ublas/vector.hpp>
-#include <boost/numeric/ublas/vector_proxy.hpp>
-#include <boost/numeric/ublas/matrix.hpp>
-#include <boost/numeric/ublas/triangular.hpp>
-#include <boost/numeric/ublas/lu.hpp>
-#include <boost/numeric/ublas/io.hpp>
-
-namespace ublas = boost::numeric::ublas;
-
-/* Matrix inversion routine.
-   Uses lu_factorize and lu_substitute in uBLAS to invert a matrix */
-template<class T>
-bool invert_matrix(const ublas::matrix<T>& input, ublas::matrix<T>& inverse) 
-{
-    using namespace boost::numeric::ublas;
-    typedef permutation_matrix<std::size_t> pmatrix;
-    // create a working copy of the input
-    matrix<T> A(input);
-    // create a permutation matrix for the LU-factorization
-    pmatrix pm(A.size1());
-
-    // perform LU-factorization
-    int res = lu_factorize(A,pm);
-    if( res != 0 ) return false;
-
-    // create identity matrix of "inverse"
-    inverse.assign(ublas::identity_matrix<T>(A.size1()));
-
-    // backsubstitute to get the inverse
-    lu_substitute(A, pm, inverse);
-    
-    return true;
-}
-
-#endif //INVERT_MATRIX_HPP
diff --git a/gi/posterior-regularisation/linesearch.py b/gi/posterior-regularisation/linesearch.py
deleted file mode 100644
index 5a3f2e9c..00000000
--- a/gi/posterior-regularisation/linesearch.py
+++ /dev/null
@@ -1,58 +0,0 @@
-## Automatically adapted for scipy Oct 07, 2005 by convertcode.py
-
-from scipy.optimize import minpack2
-import numpy
-
-import __builtin__
-pymin = __builtin__.min
-
-def line_search(f, myfprime, xk, pk, gfk, old_fval, old_old_fval,
-                args=(), c1=1e-4, c2=0.9, amax=50):
-
-    fc = 0
-    gc = 0
-    phi0 = old_fval
-    derphi0 = numpy.dot(gfk,pk)
-    alpha1 = pymin(1.0,1.01*2*(phi0-old_old_fval)/derphi0)
-    # trevor: added this test
-    alpha1 = pymin(alpha1,amax)
-
-    if isinstance(myfprime,type(())):
-        eps = myfprime[1]
-        fprime = myfprime[0]
-        newargs = (f,eps) + args
-        gradient = False
-    else:
-        fprime = myfprime
-        newargs = args
-        gradient = True
-
-    xtol = 1e-14
-    amin = 1e-8
-    isave = numpy.zeros((2,), numpy.intc)
-    dsave = numpy.zeros((13,), float)
-    task = 'START'
-    fval = old_fval
-    gval = gfk
-
-    while 1:
-        stp,fval,derphi,task = minpack2.dcsrch(alpha1, phi0, derphi0, c1, c2,
-                                               xtol, task, amin, amax,isave,dsave)
-        #print 'minpack2.dcsrch', alpha1, phi0, derphi0, c1, c2, xtol, task, amin, amax,isave,dsave
-        #print 'returns', stp,fval,derphi,task
-
-        if task[:2] == 'FG':
-            alpha1 = stp
-            fval = f(xk+stp*pk,*args)
-            fc += 1
-            gval = fprime(xk+stp*pk,*newargs)
-            if gradient: gc += 1
-            else: fc += len(xk) + 1
-            phi0 = fval
-            derphi0 = numpy.dot(gval,pk)
-        else:
-            break
-
-    if task[:5] == 'ERROR' or task[1:4] == 'WARN':
-        stp = None  # failed
-    return stp, fc, gc, fval, old_fval, gval
diff --git a/gi/posterior-regularisation/log_add.hh b/gi/posterior-regularisation/log_add.hh
deleted file mode 100644
index e0620c5a..00000000
--- a/gi/posterior-regularisation/log_add.hh
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef log_add_hh
-#define log_add_hh
-
-#include <limits>
-#include <iostream>
-#include <cassert>
-#include <cmath>
-
-template <typename T>
-struct Log
-{
-    static T zero() { return -std::numeric_limits<T>::infinity(); } 
-
-    static T add(T l1, T l2)
-    {
-        if (l1 == zero()) return l2;
-        if (l1 > l2) 
-            return l1 + std::log(1 + exp(l2 - l1));
-        else
-            return l2 + std::log(1 + exp(l1 - l2));
-    }
-
-    static T subtract(T l1, T l2)
-    {
-        //std::assert(l1 >= l2);
-        return l1 + log(1 - exp(l2 - l1));
-    }
-};
-
-#endif
diff --git a/gi/posterior-regularisation/prjava.jar b/gi/posterior-regularisation/prjava.jar
deleted file mode 120000
index da8bf761..00000000
--- a/gi/posterior-regularisation/prjava.jar
+++ /dev/null
@@ -1 +0,0 @@
-prjava/prjava-20100708.jar
-\ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/Makefile b/gi/posterior-regularisation/prjava/Makefile
deleted file mode 100755
index bd3bfca0..00000000
--- a/gi/posterior-regularisation/prjava/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-all:
-	ant dist
-
-check:
-	echo no tests
-
-clean:
-	ant clean
diff --git a/gi/posterior-regularisation/prjava/build.xml b/gi/posterior-regularisation/prjava/build.xml
deleted file mode 100644
index 7222b3c8..00000000
--- a/gi/posterior-regularisation/prjava/build.xml
+++ /dev/null
@@ -1,38 +0,0 @@
-<project name="prjava" default="dist" basedir=".">
-  <!-- set global properties for this build -->
-  <property name="src" location="src"/>
-  <property name="build" location="build"/>
-  <property name="dist" location="lib"/>
-  <path id="classpath">
-      <pathelement location="lib/trove-2.0.2.jar"/>
-      <pathelement location="lib/optimization.jar"/>
-      <pathelement location="lib/jopt-simple-3.2.jar"/>
-      <pathelement location="lib/commons-math-2.1.jar"/>
-  </path>
-
-  <target name="init">
-    <!-- Create the time stamp -->
-    <tstamp/>
-    <!-- Create the build directory structure used by compile -->
-    <mkdir dir="${build}"/>
-  </target>
-
-  <target name="compile" depends="init"
-        description="compile the source " >
-    <!-- Compile the java code from ${src} into ${build} -->
-    <javac srcdir="${src}" destdir="${build}" includeantruntime="false">
-            <classpath refid="classpath"/>
-    </javac>
-  </target>
-
-  <target name="dist" depends="compile"
-        description="generate the distribution" >
-    <jar jarfile="${dist}/prjava-${DSTAMP}.jar" basedir="${build}"/>
-    <symlink link="./prjava.jar" resource="${dist}/prjava-${DSTAMP}.jar" overwrite="true"/>
-  </target>
-
-  <target name="clean"
-        description="clean up" >
-    <delete dir="${build}"/>
-  </target>
-</project>
diff --git a/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar b/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar
deleted file mode 100644
index 43b4b369..00000000
--- a/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar
+++ /dev/null
diff --git a/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar b/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar
deleted file mode 100644
index 56373621..00000000
--- a/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar
+++ /dev/null
diff --git a/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar b/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar
deleted file mode 100644
index 3e59fbf3..00000000
--- a/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar
+++ /dev/null
diff --git a/gi/posterior-regularisation/prjava/src/arr/F.java b/gi/posterior-regularisation/prjava/src/arr/F.java
deleted file mode 100644
index be0a6ed6..00000000
--- a/gi/posterior-regularisation/prjava/src/arr/F.java
+++ /dev/null
@@ -1,99 +0,0 @@
-package arr;
-
-import java.util.Arrays;
-import java.util.Random;
-
-public class F {
-	public static Random rng = new Random();
-
-	public static void randomise(double probs[])
-	{
-		randomise(probs, true);
-	}
-
-	public static void randomise(double probs[], boolean normalise)
-	{
-		double z = 0;
-		for (int i = 0; i < probs.length; ++i)
-		{
-			probs[i] = 10 + rng.nextDouble();
-			if (normalise)
-				z += probs[i];
-		}
-
-		if (normalise)
-			for (int i = 0; i < probs.length; ++i)
-				probs[i] /= z;
-	}
-	
-	public static void uniform(double probs[])
-	{
-		for (int i = 0; i < probs.length; ++i)
-			probs[i] = 1.0 / probs.length;
-	}
-	
-	public static void l1normalize(double [] a){
-		double sum=0;
-		for(int i=0;i<a.length;i++){
-			sum+=a[i];
-		}
-		if(sum==0)
-			Arrays.fill(a, 1.0/a.length);
-		else
-		{
-			for(int i=0;i<a.length;i++){
-				a[i]/=sum;
-			}
-		}
-	}
-	
-	public  static void l1normalize(double [][] a){
-		double sum=0;
-		for(int i=0;i<a.length;i++){
-			for(int j=0;j<a[i].length;j++){
-				sum+=a[i][j];
-			}
-		}
-		if(sum==0){
-			return;
-		}
-		for(int i=0;i<a.length;i++){
-			for(int j=0;j<a[i].length;j++){
-				a[i][j]/=sum;
-			}
-		}
-	}
-	
-	public static double l1norm(double a[]){
-		// FIXME: this isn't the l1 norm for a < 0
-		double norm=0;
-		for(int i=0;i<a.length;i++){
-			norm += a[i];
-		}
-		return norm;
-	}
-	
-	public static double l2norm(double a[]){
-		double norm=0;
-		for(int i=0;i<a.length;i++){
-			norm += a[i]*a[i];
-		}
-		return Math.sqrt(norm);
-	}
-	
-	public static int argmax(double probs[])
-	{
-		double m = Double.NEGATIVE_INFINITY;
-		int mi = -1;
-		for (int i = 0; i < probs.length; ++i)
-		{
-			if (probs[i] > m)
-			{
-				m = probs[i];
-				mi = i;
-			}
-		}
-		return mi;
-	}
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/data/Corpus.java b/gi/posterior-regularisation/prjava/src/data/Corpus.java
deleted file mode 100644
index 425ede11..00000000
--- a/gi/posterior-regularisation/prjava/src/data/Corpus.java
+++ /dev/null
@@ -1,233 +0,0 @@
-package data;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Scanner;
-
-public class Corpus {
-
-	public static final String alphaFilename="../posdata/corpus.alphabet";
-	public static final String tagalphaFilename="../posdata/corpus.tag.alphabet";
-	
-//	public static final String START_SYM="<s>";
-	public static final String END_SYM="<e>";
-	public static final String NUM_TOK="<NUM>";
-	
-	public static final String UNK_TOK="<unk>";
-	
-	private ArrayList<String[]>sent;
-	private ArrayList<int[]>data;
-	
-	public ArrayList<String[]>tag;
-	public  ArrayList<int[]>tagData;
-	
-	public static boolean convertNumTok=true;
-	
-	private HashMap<String,Integer>freq;
-	public HashMap<String,Integer>vocab;
-	
-	public HashMap<String,Integer>tagVocab;
-	private int tagV;
-	
-	private int V;
-	
-	public static void main(String[] args) {
-		Corpus c=new Corpus("../posdata/en_test.conll");
-		System.out.println(
-			Arrays.toString(c.get(0))	
-		);
-		System.out.println(
-				Arrays.toString(c.getInt(0))	
-			);
-		
-		System.out.println(
-				Arrays.toString(c.get(1))	
-			);
-			System.out.println(
-					Arrays.toString(c.getInt(1))	
-				);
-	}
-
-	public Corpus(String filename,HashMap<String,Integer>dict){
-		V=0;
-		tagV=0;
-		freq=new HashMap<String,Integer>();
-		tagVocab=new HashMap<String,Integer>();
-		vocab=dict;
-		
-		sent=new ArrayList<String[]>();
-		tag=new ArrayList<String[]>();
-		
-		Scanner sc=io.FileUtil.openInFile(filename);
-		ArrayList<String>s=new ArrayList<String>();
-	//	s.add(START_SYM);
-		while(sc.hasNextLine()){
-			String line=sc.nextLine();
-			String toks[]=line.split("\t");
-			if(toks.length<2){
-				s.add(END_SYM);
-				sent.add(s.toArray(new String[0]));
-				s=new ArrayList<String>();
-		//		s.add(START_SYM);
-				continue;
-			}
-			String tok=toks[1].toLowerCase();
-			s.add(tok);
-		}
-		sc.close();
-
-		buildData();
-	}
-	
-	public Corpus(String filename){
-		V=0;
-		freq=new HashMap<String,Integer>();
-		vocab=new HashMap<String,Integer>();
-		tagVocab=new HashMap<String,Integer>();
-		
-		sent=new ArrayList<String[]>();
-		tag=new ArrayList<String[]>();
-		
-		System.out.println("Reading:"+filename);
-		
-		Scanner sc=io.FileUtil.openInFile(filename);
-		ArrayList<String>s=new ArrayList<String>();
-		ArrayList<String>tags=new ArrayList<String>();
-		//s.add(START_SYM);
-		while(sc.hasNextLine()){
-			String line=sc.nextLine();
-			String toks[]=line.split("\t");
-			if(toks.length<2){
-				s.add(END_SYM);
-				tags.add(END_SYM);
-				if(s.size()>2){
-					sent.add(s.toArray(new String[0]));
-					tag.add(tags.toArray(new String [0]));
-				}
-				s=new ArrayList<String>();
-				tags=new ArrayList<String>();
-			//	s.add(START_SYM);
-				continue;
-			}
-			
-			String tok=toks[1].toLowerCase();
-			if(convertNumTok && tok.matches(".*\\d.*")){
-				tok=NUM_TOK;
-			}
-			s.add(tok);
-			
-			if(toks.length>3){
-				tok=toks[3].toLowerCase();
-			}else{
-				tok="_";
-			}
-			tags.add(tok);
-			
-		}
-		sc.close();
-		
-		for(int i=0;i<sent.size();i++){
-			String[]toks=sent.get(i);
-			for(int j=0;j<toks.length;j++){
-				addVocab(toks[j]);
-				addTag(tag.get(i)[j]);
-			}
-		}
-		
-		buildVocab();
-		buildData();
-		System.out.println(data.size()+"sentences, "+vocab.keySet().size()+" word types");
-	}
-
-	public String[] get(int idx){
-		return sent.get(idx);
-	}
-	
-	private void addVocab(String s){
-		Integer integer=freq.get(s);
-		if(integer==null){
-			integer=0;
-		}
-		freq.put(s, integer+1);
-	}
-	
-	public int tokIdx(String tok){
-		Integer integer=vocab.get(tok);
-		if(integer==null){
-			return V;
-		}
-		return integer;
-	}
-	
-	public int tagIdx(String tok){
-		Integer integer=tagVocab.get(tok);
-		if(integer==null){
-			return tagV;
-		}
-		return integer;
-	}
-	
-	private void buildData(){
-		data=new ArrayList<int[]>();
-		for(int i=0;i<sent.size();i++){
-			String s[]=sent.get(i);
-			data.add(new int [s.length]);
-			for(int j=0;j<s.length;j++){
-				data.get(i)[j]=tokIdx(s[j]);
-			}
-		}
-		
-		tagData=new ArrayList<int[]>();
-		for(int i=0;i<tag.size();i++){
-			String s[]=tag.get(i);
-			tagData.add(new int [s.length]);
-			for(int j=0;j<s.length;j++){
-				tagData.get(i)[j]=tagIdx(s[j]);
-			}
-		}
-		sent=null;
-		tag=null;
-		System.gc();
-	}
-	
-	public int [] getInt(int idx){
-		return data.get(idx);
-	}
-	
-	/**
-	 * 
-	 * @return size of vocabulary 
-	 */
-	public int getVocabSize(){
-		return V;
-	}
-	
-	public int [][]getAllData(){
-		return data.toArray(new int [0][]);
-	}
-	
-	public int [][]getTagData(){
-		return tagData.toArray(new int [0][]);
-	}
-	
-	private void buildVocab(){
-		for (String key:freq.keySet()){
-			if(freq.get(key)>2){
-				vocab.put(key, V);
-				V++;
-			}
-		}
-		io.SerializedObjects.writeSerializedObject(vocab, alphaFilename);
-		io.SerializedObjects.writeSerializedObject(tagVocab,tagalphaFilename);
-	}
-
-	private void addTag(String tag){
-		Integer i=tagVocab.get(tag);
-		if(i==null){
-			tagVocab.put(tag, tagV);
-			tagV++;
-		}
-	}
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/hmm/HMM.java b/gi/posterior-regularisation/prjava/src/hmm/HMM.java
deleted file mode 100644
index 17a4679f..00000000
--- a/gi/posterior-regularisation/prjava/src/hmm/HMM.java
+++ /dev/null
@@ -1,579 +0,0 @@
-package hmm;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Scanner;
-
-public class HMM {
-
-	
-	//trans[i][j]=prob of going FROM i to j
-	double [][]trans;
-	double [][]emit;
-	double []pi;
-	int  [][]data;
-	int [][]tagdata;
-	
-	double logtrans[][];
-	
-	public HMMObjective o;
-	
-	public static void main(String[] args) {
-	
-	}
-	
-	public HMM(int n_state,int n_emit,int [][]data){
-		trans=new double [n_state][n_state];
-		emit=new double[n_state][n_emit];
-		pi=new double [n_state];
-		System.out.println(" random initial parameters");
-		fillRand(trans);
-		fillRand(emit);
-		fillRand(pi);
-
-		this.data=data;
-		
-	}
-	
-	private void fillRand(double [][] a){
-		for(int i=0;i<a.length;i++){
-			for(int j=0;j<a[i].length;j++){
-				a[i][j]=Math.random();
-			}
-			l1normalize(a[i]);
-		}
-	}
-	private void fillRand(double []a){
-		for(int i=0;i<a.length;i++){
-				a[i]=Math.random();
-		}
-		l1normalize(a);
-	}
-	
-	private double loglikely=0;
-	
-	public void EM(){
-		double trans_exp_cnt[][]=new double [trans.length][trans.length];
-		double emit_exp_cnt[][]=new double[trans.length][emit[0].length];
-		double start_exp_cnt[]=new double[trans.length];
-		loglikely=0;
-		
-		//E
-		for(int i=0;i<data.length;i++){
-			
-			double [][][] post=forwardBackward(data[i]);
-			incrementExpCnt(post, data[i], 
-					trans_exp_cnt,
-					emit_exp_cnt,
-					start_exp_cnt);
-			
-			
-			if(i%100==0){
-				System.out.print(".");
-			}
-			if(i%1000==0){
-				System.out.println(i);
-			}
-			
-		}
-		System.out.println("Log likelihood: "+loglikely);
-		
-		//M
-		addOneSmooth(emit_exp_cnt);
-		for(int i=0;i<trans.length;i++){
-		
-			//transition probs
-			double sum=0;
-			for(int j=0;j<trans.length;j++){
-				sum+=trans_exp_cnt[i][j];
-			}
-			//avoid NAN
-			if(sum==0){
-				sum=1;
-			}
-			for(int j=0;j<trans[i].length;j++){
-				trans[i][j]=trans_exp_cnt[i][j]/sum;
-			}
-			
-			//emission probs
-			
-			sum=0;
-			for(int j=0;j<emit[i].length;j++){
-				sum+=emit_exp_cnt[i][j];
-			}
-			//avoid NAN
-			if(sum==0){
-				sum=1;
-			}
-			for(int j=0;j<emit[i].length;j++){
-				emit[i][j]=emit_exp_cnt[i][j]/sum;
-			}
-			
-			
-			//initial probs
-			for(int j=0;j<pi.length;j++){
-				pi[j]=start_exp_cnt[j];
-			}
-			l1normalize(pi);
-		}
-	}
-	
-	private double [][][]forwardBackward(int [] seq){
-		double a[][]=new double [seq.length][trans.length];
-		double b[][]=new double [seq.length][trans.length];
-		
-		int len=seq.length;
-		//initialize the first step
-		for(int i=0;i<trans.length;i++){
-			a[0][i]=emit[i][seq[0]]*pi[i];
-			b[len-1][i]=1;
-		}
-		
-		//log of denominator for likelyhood
-		double c=Math.log(l1norm(a[0]));
-		
-		l1normalize(a[0]);
-		l1normalize(b[len-1]);
-		
-		
-		
-		//forward
-		for(int n=1;n<len;n++){
-			for(int i=0;i<trans.length;i++){
-				for(int j=0;j<trans.length;j++){
-					a[n][i]+=trans[j][i]*a[n-1][j];
-				}
-				a[n][i]*=emit[i][seq[n]];
-			}
-			c+=Math.log(l1norm(a[n]));
-			l1normalize(a[n]);
-		}
-		
-		loglikely+=c;
-		
-		//backward
-		for(int n=len-2;n>=0;n--){
-			for(int i=0;i<trans.length;i++){
-				for(int j=0;j<trans.length;j++){
-					b[n][i]+=trans[i][j]*b[n+1][j]*emit[j][seq[n+1]];
-				}
-			}
-			l1normalize(b[n]);
-		}
-		
-		
-		//expected transition 
-		double p[][][]=new double [seq.length][trans.length][trans.length];
-		for(int n=0;n<len-1;n++){
-			for(int i=0;i<trans.length;i++){
-				for(int j=0;j<trans.length;j++){
-					p[n][i][j]=a[n][i]*trans[i][j]*emit[j][seq[n+1]]*b[n+1][j];
-					
-				}
-			}
-
-			l1normalize(p[n]);
-		}
-		return p;
-	}
-	
-	private void incrementExpCnt(
-			double post[][][],int [] seq, 
-			double trans_exp_cnt[][],
-			double emit_exp_cnt[][],
-			double start_exp_cnt[])
-	{
-		
-		for(int n=0;n<post.length;n++){
-			for(int i=0;i<trans.length;i++){
-				double py=0;
-				for(int j=0;j<trans.length;j++){
-					py+=post[n][i][j];
-					trans_exp_cnt[i][j]+=post[n][i][j];
-				}
-
-				emit_exp_cnt[i][seq[n]]+=py;				
-				
-			}
-		}
-		
-		//the first state
-		for(int i=0;i<trans.length;i++){
-			double py=0;
-			for(int j=0;j<trans.length;j++){
-				py+=post[0][i][j];
-			}
-			start_exp_cnt[i]+=py;	
-		}
-		
-		
-		//the last state
-		int len=post.length;
-		for(int i=0;i<trans.length;i++){
-			double py=0;
-			for(int j=0;j<trans.length;j++){
-				py+=post[len-2][j][i];
-			}
-			emit_exp_cnt[i][seq[len-1]]+=py;	
-		}
-	}
-	
-	public void l1normalize(double [] a){
-		double sum=0;
-		for(int i=0;i<a.length;i++){
-			sum+=a[i];
-		}
-		if(sum==0){
-			return ;
-		}
-		for(int i=0;i<a.length;i++){
-			a[i]/=sum;
-		}
-	}
-	
-	public  void l1normalize(double [][] a){
-		double sum=0;
-		for(int i=0;i<a.length;i++){
-			for(int j=0;j<a[i].length;j++){
-				sum+=a[i][j];
-			}
-		}
-		if(sum==0){
-			return;
-		}
-		for(int i=0;i<a.length;i++){
-			for(int j=0;j<a[i].length;j++){
-				a[i][j]/=sum;
-			}
-		}
-	}
-	
-	public void writeModel(String modelFilename) throws FileNotFoundException, IOException{
-		PrintStream ps=io.FileUtil.printstream(new File(modelFilename));
-		ps.println(trans.length);
-		ps.println("Initial Probabilities:");
-		for(int i=0;i<pi.length;i++){
-			ps.print(pi[i]+"\t");
-		}
-		ps.println();
-		ps.println("Transition Probabilities:");
-		for(int i=0;i<trans.length;i++){
-			for(int j=0;j<trans[i].length;j++){
-				ps.print(trans[i][j]+"\t");
-			}
-			ps.println();
-		}
-		ps.println("Emission Probabilities:");
-		ps.println(emit[0].length);
-		for(int i=0;i<trans.length;i++){
-			for(int j=0;j<emit[i].length;j++){
-				ps.println(emit[i][j]);
-			}
-			ps.println();
-		}
-		ps.close();
-	}
-	
-	public HMM(){
-	
-	}
-	
-	public void readModel(String modelFilename){
-		Scanner sc=io.FileUtil.openInFile(modelFilename);
-	
-		int n_state=sc.nextInt();
-		sc.nextLine();
-		sc.nextLine();
-		pi=new double [n_state];
-		for(int i=0;i<n_state;i++){
-			pi[i]=sc.nextDouble();
-		}
-		sc.nextLine();
-		sc.nextLine();
-		trans=new double[n_state][n_state];
-		for(int i=0;i<trans.length;i++){
-			for(int j=0;j<trans[i].length;j++){
-				trans[i][j]=sc.nextDouble();
-			}
-		}
-		sc.nextLine();
-		sc.nextLine();
-		
-		int n_obs=sc.nextInt();
-		emit=new double[n_state][n_obs];
-		for(int i=0;i<trans.length;i++){
-			for(int j=0;j<emit[i].length;j++){
-				emit[i][j]=sc.nextDouble();
-			}
-		}
-		sc.close();
-	}
-	
-	public int []viterbi(int [] seq){
-		double [][]p=new double [seq.length][trans.length];
-		int backp[][]=new int [seq.length][trans.length];
-		
-		for(int i=0;i<trans.length;i++){
-			p[0][i]=Math.log(emit[i][seq[0]]*pi[i]);
-		}
-		
-		double a[][]=logtrans;
-		if(logtrans==null){
-			a=new double [trans.length][trans.length];
-			for(int i=0;i<trans.length;i++){
-				for(int j=0;j<trans.length;j++){
-					a[i][j]=Math.log(trans[i][j]);
-				}
-			}
-			logtrans=a;
-		}
-		
-		double maxprob=0;
-		for(int n=1;n<seq.length;n++){
-			for(int i=0;i<trans.length;i++){
-				maxprob=p[n-1][0]+a[0][i];
-				backp[n][i]=0;
-				for(int j=1;j<trans.length;j++){
-					double prob=p[n-1][j]+a[j][i];
-					if(maxprob<prob){
-						backp[n][i]=j;
-						maxprob=prob;
-					}
-				}
-				p[n][i]=maxprob+Math.log(emit[i][seq[n]]);
-			}
-		}
-		
-		maxprob=p[seq.length-1][0];
-		int maxIdx=0;
-		for(int i=1;i<trans.length;i++){
-			if(p[seq.length-1][i]>maxprob){
-				maxprob=p[seq.length-1][i];
-				maxIdx=i;
-			}
-		}
-		int ans[]=new int [seq.length];
-		ans[seq.length-1]=maxIdx;
-		for(int i=seq.length-2;i>=0;i--){
-			ans[i]=backp[i+1][ans[i+1]];
-		}
-		return ans;
-	}
-	
-	public double l1norm(double a[]){
-		double norm=0;
-		for(int i=0;i<a.length;i++){
-			norm += a[i];
-		}
-		return norm;
-	}
-	
-	public double [][]getEmitProb(){
-		return emit;
-	}
-	
-	public int [] sample(int terminalSym){
-		ArrayList<Integer > s=new ArrayList<Integer>();
-		int state=sample(pi);
-		int sym=sample(emit[state]);
-		while(sym!=terminalSym){
-			s.add(sym);
-			state=sample(trans[state]);
-			sym=sample(emit[state]);
-		}
-		
-		int ans[]=new int [s.size()];
-		for(int i=0;i<ans.length;i++){
-			ans[i]=s.get(i);
-		}
-		return ans;
-	}
-	
-	public int sample(double p[]){
-		double r=Math.random();
-		double sum=0;
-		for(int i=0;i<p.length;i++){
-			sum+=p[i];
-			if(sum>=r){
-				return i;
-			}
-		}
-		return p.length-1;
-	}
-	
-	public void train(int tagdata[][]){
-		double trans_exp_cnt[][]=new double [trans.length][trans.length];
-		double emit_exp_cnt[][]=new double[trans.length][emit[0].length];
-		double start_exp_cnt[]=new double[trans.length];
-		
-		for(int i=0;i<tagdata.length;i++){
-			start_exp_cnt[tagdata[i][0]]++;
-			
-			for(int j=0;j<tagdata[i].length;j++){
-				if(j+1<tagdata[i].length){
-					trans_exp_cnt[ tagdata[i][j] ] [ tagdata[i][j+1] ]++;
-				}
-				emit_exp_cnt[tagdata[i][j]][data[i][j]]++;
-			}
-			
-		}
-		
-		//M
-		addOneSmooth(emit_exp_cnt);
-		for(int i=0;i<trans.length;i++){
-		
-			//transition probs
-			double sum=0;
-			for(int j=0;j<trans.length;j++){
-				sum+=trans_exp_cnt[i][j];
-			}
-			if(sum==0){
-				sum=1;
-			}
-			for(int j=0;j<trans[i].length;j++){
-				trans[i][j]=trans_exp_cnt[i][j]/sum;
-			}
-			
-			//emission probs
-
-			sum=0;
-			for(int j=0;j<emit[i].length;j++){
-				sum+=emit_exp_cnt[i][j];
-			}
-			if(sum==0){
-				sum=1;
-			}
-			for(int j=0;j<emit[i].length;j++){
-				emit[i][j]=emit_exp_cnt[i][j]/sum;
-			}
-
-			
-			//initial probs
-			for(int j=0;j<pi.length;j++){
-				pi[j]=start_exp_cnt[j];
-			}
-			l1normalize(pi);
-		}
-	}
-	
-	private void addOneSmooth(double a[][]){
-		for(int i=0;i<a.length;i++){
-			for(int j=0;j<a[i].length;j++){
-				a[i][j]+=0.01;
-			}
-			//l1normalize(a[i]);
-		}
-	}
-	
-	public void PREM(){
-		
-		o.optimizeWithProjectedGradientDescent();
-		
-		double trans_exp_cnt[][]=new double [trans.length][trans.length];
-		double emit_exp_cnt[][]=new double[trans.length][emit[0].length];
-		double start_exp_cnt[]=new double[trans.length];
-		
-		o.loglikelihood=0;
-		//E
-		for(int sentNum=0;sentNum<data.length;sentNum++){
-			
-			double [][][] post=o.forwardBackward(sentNum);
-			incrementExpCnt(post, data[sentNum], 
-					trans_exp_cnt,
-					emit_exp_cnt,
-					start_exp_cnt);
-			
-			
-			if(sentNum%100==0){
-				System.out.print(".");
-			}
-			if(sentNum%1000==0){
-				System.out.println(sentNum);
-			}
-			
-		}
-		
-		System.out.println("Log likelihood: "+o.getValue());
-		
-		//M
-		addOneSmooth(emit_exp_cnt);
-		for(int i=0;i<trans.length;i++){
-		
-			//transition probs
-			double sum=0;
-			for(int j=0;j<trans.length;j++){
-				sum+=trans_exp_cnt[i][j];
-			}
-			//avoid NAN
-			if(sum==0){
-				sum=1;
-			}
-			for(int j=0;j<trans[i].length;j++){
-				trans[i][j]=trans_exp_cnt[i][j]/sum;
-			}
-			
-			//emission probs
-			
-			sum=0;
-			for(int j=0;j<emit[i].length;j++){
-				sum+=emit_exp_cnt[i][j];
-			}
-			//avoid NAN
-			if(sum==0){
-				sum=1;
-			}
-			for(int j=0;j<emit[i].length;j++){
-				emit[i][j]=emit_exp_cnt[i][j]/sum;
-			}
-			
-			
-			//initial probs
-			for(int j=0;j<pi.length;j++){
-				pi[j]=start_exp_cnt[j];
-			}
-			l1normalize(pi);
-		}
-		
-	}
-	
-	public void computeMaxwt(double[][]maxwt, int[][] d){
-
-		for(int sentNum=0;sentNum<d.length;sentNum++){
-			double post[][][]=forwardBackward(d[sentNum]);
-			
-			for(int n=0;n<post.length;n++){
-				for(int i=0;i<trans.length;i++){
-					double py=0;
-					for(int j=0;j<trans.length;j++){
-						py+=post[n][i][j];
-					}
-
-					if(py>maxwt[i][d[sentNum][n]]){
-						maxwt[i][d[sentNum][n]]=py;
-					}
-					
-				}
-			}
-			
-			//the last state
-			int len=post.length;
-			for(int i=0;i<trans.length;i++){
-				double py=0;
-				for(int j=0;j<trans.length;j++){
-					py+=post[len-2][j][i];
-				}
-				
-				if(py>maxwt[i][d[sentNum][len-1]]){
-					maxwt[i][d[sentNum][len-1]]=py;
-				}
-				
-			}
-			
-		}
-	
-	}
-	
-}//end of class
diff --git a/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java b/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java
deleted file mode 100644
index 70b6c966..00000000
--- a/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java
+++ /dev/null
@@ -1,351 +0,0 @@
-package hmm;
-
-import gnu.trove.TIntArrayList;
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-
-public class HMMObjective extends ProjectedObjective{
-
-	
-	private static final double GRAD_DIFF = 3;
-	public static double INIT_STEP_SIZE=10;
-	public static double VAL_DIFF=1000;
-	
-	private HMM hmm;
-	double[] newPoint  ;
-	
-	//posterior[sent num][tok num][tag]=index into lambda
-	private int posteriorMap[][][];
-	//projection[word][tag].get(occurence)=index into lambda
-	private TIntArrayList projectionMap[][];
-
-	//Size of the simplex
-	public double scale=10;
-	private SimplexProjection projection;
-	
-	private int wordFreq[];
-	private static int MIN_FREQ=10;
-	private int numWordsToProject=0;
-	
-	private int n_param;
-	
-	public  double loglikelihood;
-	
-	public HMMObjective(HMM h){
-		hmm=h;
-		
-		countWords();
-		buildMap();
-
-		gradient=new double [n_param];
-		projection = new SimplexProjection(scale);
-		newPoint  = new double[n_param];
-		setInitialParameters(new double[n_param]);
-		
-	}
-	
-	/**@brief counts word frequency in the corpus
-	 * 
-	 */
-	private void countWords(){
-		wordFreq=new int [hmm.emit[0].length];
-		for(int i=0;i<hmm.data.length;i++){
-			for(int j=0;j<hmm.data[i].length;j++){
-				wordFreq[hmm.data[i][j]]++;
-			}
-		}
-	}
-	
-	/**@brief build posterior and projection indices
-	 * 
-	 */
-	private void buildMap(){
-		//number of sentences hidden states and words
-		int n_states=hmm.trans.length;
-		int n_words=hmm.emit[0].length;
-		int n_sents=hmm.data.length;
-		
-		n_param=0;
-		posteriorMap=new int[n_sents][][];
-		projectionMap=new TIntArrayList[n_words][];
-		for(int sentNum=0;sentNum<n_sents;sentNum++){
-			int [] data=hmm.data[sentNum];
-			posteriorMap[sentNum]=new int[data.length][n_states];
-			numWordsToProject=0;
-			for(int i=0;i<data.length;i++){
-				int word=data[i];
-				for(int state=0;state<n_states;state++){
-					if(wordFreq[word]>MIN_FREQ){
-						if(projectionMap[word]==null){
-							projectionMap[word]=new TIntArrayList[n_states];
-						}
-			//			if(posteriorMap[sentNum][i]==null){
-			//				posteriorMap[sentNum][i]=new int[n_states];
-			//			}
-						
-						posteriorMap[sentNum][i][state]=n_param;
-						if(projectionMap[word][state]==null){
-							projectionMap[word][state]=new TIntArrayList();
-							numWordsToProject++;
-						}
-						projectionMap[word][state].add(n_param);
-						n_param++;
-					}
-					else{
-						posteriorMap[sentNum][i][state]=-1;
-					}
-				}
-			}
-		}
-	}
-	
-	@Override
-	public double[] projectPoint(double[] point) {
-		// TODO Auto-generated method stub
-		for(int i=0;i<projectionMap.length;i++){
-			
-			if(projectionMap[i]==null){
-				//this word is not constrained
-				continue;
-			}
-			
-			for(int j=0;j<projectionMap[i].length;j++){
-				TIntArrayList instances=projectionMap[i][j];
-				double[] toProject = new double[instances.size()];
-				
-				for (int k = 0; k < toProject.length; k++) {
-					//	System.out.print(instances.get(k) + " ");
-						toProject[k] = point[instances.get(k)];
-				}
-				
-				projection.project(toProject);
-				for (int k = 0; k < toProject.length; k++) {
-					newPoint[instances.get(k)]=toProject[k];
-				}
-			}
-		}
-		return newPoint;
-	}
-
-	@Override
-	public double[] getGradient() {
-		// TODO Auto-generated method stub
-		gradientCalls++;
-		return gradient;
-	}
-
-	@Override
-	public double getValue() {
-		// TODO Auto-generated method stub
-		functionCalls++;
-		return loglikelihood;
-	}
-	
-
-	@Override
-	public String toString() {
-		// TODO Auto-generated method stub
-		StringBuffer sb = new StringBuffer();
-		for (int i = 0; i < parameters.length; i++) {
-			sb.append(parameters[i]+" ");
-			if(i%100==0){
-				sb.append("\n");
-			}
-		}
-		sb.append("\n");
-		/*
-		for (int i = 0; i < gradient.length; i++) {
-			sb.append(gradient[i]+" ");
-			if(i%100==0){
-				sb.append("\n");
-			}
-		}
-		sb.append("\n");
-		*/
-		return sb.toString();
-	}
-
-	
-	/**
-	 * @param seq
-	 * @return posterior probability of each transition
-	 */
-	public double [][][]forwardBackward(int sentNum){
-		int [] seq=hmm.data[sentNum];
-		int n_states=hmm.trans.length;
-		double a[][]=new double [seq.length][n_states];
-		double b[][]=new double [seq.length][n_states];
-		
-		int len=seq.length;
-		
-		boolean  constrained=
-			(projectionMap[seq[0]]!=null);
-
-		//initialize the first step
-		for(int i=0;i<n_states;i++){
-			a[0][i]=hmm.emit[i][seq[0]]*hmm.pi[i];
-			if(constrained){
-				a[0][i]*=
-					Math.exp(- parameters[ posteriorMap[sentNum][0][i] ] );
-			}
-			b[len-1][i]=1;
-		}
-		
-		loglikelihood+=Math.log(hmm.l1norm(a[0]));		
-		hmm.l1normalize(a[0]);
-		hmm.l1normalize(b[len-1]);
-		
-		//forward
-		for(int n=1;n<len;n++){
-			
-			constrained=
-				(projectionMap[seq[n]]!=null);
-			
-			for(int i=0;i<n_states;i++){
-				for(int j=0;j<n_states;j++){
-					a[n][i]+=hmm.trans[j][i]*a[n-1][j];
-				}
-				a[n][i]*=hmm.emit[i][seq[n]];
-				
-				if(constrained){
-					a[n][i]*=
-						Math.exp(- parameters[ posteriorMap[sentNum][n][i] ] );
-				}
-				
-			}
-			loglikelihood+=Math.log(hmm.l1norm(a[n]));
-			hmm.l1normalize(a[n]);
-		}
-		
-		//temp variable for e^{-\lambda}
-		double factor=1;
-		//backward
-		for(int n=len-2;n>=0;n--){
-			
-			constrained=
-				(projectionMap[seq[n+1]]!=null);
-			
-			for(int i=0;i<n_states;i++){
-				for(int j=0;j<n_states;j++){
-					
-					if(constrained){
-						factor=
-							Math.exp(- parameters[ posteriorMap[sentNum][n+1][j] ] );
-					}else{
-						factor=1;
-					}
-					
-					b[n][i]+=hmm.trans[i][j]*b[n+1][j]*hmm.emit[j][seq[n+1]]*factor;
-					
-				}
-			}
-			hmm.l1normalize(b[n]);
-		}
-		
-		//expected transition 
-		double p[][][]=new double [seq.length][n_states][n_states];
-		for(int n=0;n<len-1;n++){
-			
-			constrained=
-				(projectionMap[seq[n+1]]!=null);
-			
-			for(int i=0;i<n_states;i++){
-				for(int j=0;j<n_states;j++){
-					
-					if(constrained){
-						factor=
-							Math.exp(- parameters[ posteriorMap[sentNum][n+1][j] ] );
-					}else{
-						factor=1;
-					}
-					
-					p[n][i][j]=a[n][i]*hmm.trans[i][j]*
-						hmm.emit[j][seq[n+1]]*b[n+1][j]*factor;
-					
-				}
-			}
-
-			hmm.l1normalize(p[n]);
-		}
-		return p;
-	}
-
-	public void optimizeWithProjectedGradientDescent(){
-		LineSearchMethod ls =
-			new ArmijoLineSearchMinimizationAlongProjectionArc
-				(new InterpolationPickFirstStep(INIT_STEP_SIZE));
-		
-		OptimizerStats stats = new OptimizerStats();
-		
-		
-		ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
-		StopingCriteria stopGrad = new ProjectedGradientL2Norm(GRAD_DIFF);
-		StopingCriteria stopValue = new ValueDifference(VAL_DIFF);
-		CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
-		compositeStop.add(stopGrad);
-		compositeStop.add(stopValue);
-		
-		optimizer.setMaxIterations(10);
-		updateFunction();
-		boolean succed = optimizer.optimize(this,stats,compositeStop);
-		System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
-		if(succed){
-			System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
-		}else{
-			System.out.println("Failed to optimize");
-		}
-	}
-	
-	@Override
-	public void setParameters(double[] params) {
-		super.setParameters(params);
-		updateFunction();
-	}
-	
-	private void updateFunction(){
-		
-		updateCalls++;
-		loglikelihood=0;
-	
-		for(int sentNum=0;sentNum<hmm.data.length;sentNum++){
-			double [][][]p=forwardBackward(sentNum);
-			
-			for(int n=0;n<p.length-1;n++){
-				for(int i=0;i<p[n].length;i++){
-					if(projectionMap[hmm.data[sentNum][n]]!=null){
-						double posterior=0;
-						for(int j=0;j<p[n][i].length;j++){
-							posterior+=p[n][i][j];
-						}
-						gradient[posteriorMap[sentNum][n][i]]=-posterior;
-					}
-				}
-			}
-			
-			//the last state
-			int n=p.length-2;
-			for(int i=0;i<p[n].length;i++){
-				if(projectionMap[hmm.data[sentNum][n+1]]!=null){
-					
-					double posterior=0;
-					for(int j=0;j<p[n].length;j++){
-						posterior+=p[n][j][i];
-					}
-					gradient[posteriorMap[sentNum][n+1][i]]=-posterior;
-				
-				}
-			}	
-		}
-		
-	}
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/hmm/POS.java b/gi/posterior-regularisation/prjava/src/hmm/POS.java
deleted file mode 100644
index bdcbc683..00000000
--- a/gi/posterior-regularisation/prjava/src/hmm/POS.java
+++ /dev/null
@@ -1,120 +0,0 @@
-package hmm;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.HashMap;
-
-import data.Corpus;
-
-public class POS {
-
-	//public String trainFilename="../posdata/en_train.conll";
-	public static String trainFilename="../posdata/small_train.txt";
-//	public static String trainFilename="../posdata/en_test.conll";
-//	public static String trainFilename="../posdata/trial1.txt";
-	
-	public static String testFilename="../posdata/en_test.conll";
-	//public static String testFilename="../posdata/trial1.txt";
-	
-	public static String predFilename="../posdata/en_test.predict.conll";
-	public static String modelFilename="../posdata/posModel.out";
-	public static final int ITER=20;
-	public static final int N_STATE=30;
-	
-	public static void main(String[] args) {
-		//POS p=new POS();
-		//POS p=new POS(true);
-		try {
-			PRPOS();
-		} catch (FileNotFoundException e) {
-			e.printStackTrace();
-		} catch (IOException e) {
-			e.printStackTrace();
-		}
-	}
-
-	
-	public POS() throws FileNotFoundException, IOException{
-		Corpus c= new Corpus(trainFilename);
-		//size of vocabulary +1 for unknown tokens
-		HMM hmm =new HMM(N_STATE, c.getVocabSize()+1,c.getAllData());
-		for(int i=0;i<ITER;i++){
-			System.out.println("Iter"+i);
-			hmm.EM();
-			if((i+1)%10==0){
-				hmm.writeModel(modelFilename+i);
-			}
-		}
-
-		hmm.writeModel(modelFilename);
-		
-		Corpus test=new Corpus(testFilename,c.vocab);
-		
-		PrintStream ps= io.FileUtil.printstream(new File(predFilename));
-		
-		int [][]data=test.getAllData();
-		for(int i=0;i<data.length;i++){
-			int []tag=hmm.viterbi(data[i]);
-			String sent[]=test.get(i);
-			for(int j=0;j<data[i].length;j++){
-				ps.println(sent[j]+"\t"+tag[j]);
-			}
-			ps.println();
-		}
-		ps.close();
-	}
-	
-	//POS induction with L1/Linf constraints
-	public static void PRPOS() throws FileNotFoundException, IOException{
-		Corpus c= new Corpus(trainFilename);
-		//size of vocabulary +1 for unknown tokens
-		HMM hmm =new HMM(N_STATE, c.getVocabSize()+1,c.getAllData());
-		hmm.o=new HMMObjective(hmm);
-		for(int i=0;i<ITER;i++){
-			System.out.println("Iter: "+i);
-			hmm.PREM();
-			if((i+1)%10==0){
-				hmm.writeModel(modelFilename+i);
-			}
-		}
-
-		hmm.writeModel(modelFilename);
-	}
-	
-	
-	public POS(boolean supervised) throws FileNotFoundException, IOException{
-		Corpus c= new Corpus(trainFilename);
-		//size of vocabulary +1 for unknown tokens
-		HMM hmm =new HMM(c.tagVocab.size() , c.getVocabSize()+1,c.getAllData());
-		hmm.train(c.getTagData());
-
-		hmm.writeModel(modelFilename);
-		
-		Corpus test=new Corpus(testFilename,c.vocab);
-		
-		HashMap<String, Integer>tagVocab=
-			(HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(Corpus.tagalphaFilename);
-		String [] tagdict=new String [tagVocab.size()+1];
-		for(String key:tagVocab.keySet()){
-			tagdict[tagVocab.get(key)]=key;
-		}
-		tagdict[tagdict.length-1]=Corpus.UNK_TOK;
-		
-		System.out.println(c.vocab.get("<e>"));
-		
-		PrintStream ps= io.FileUtil.printstream(new File(predFilename));
-		
-		int [][]data=test.getAllData();
-		for(int i=0;i<data.length;i++){
-			int []tag=hmm.viterbi(data[i]);
-			String sent[]=test.get(i);
-			for(int j=0;j<data[i].length;j++){
-				ps.println(sent[j]+"\t"+tagdict[tag[j]]);
-			}
-			ps.println();
-		}
-		ps.close();
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/io/FileUtil.java b/gi/posterior-regularisation/prjava/src/io/FileUtil.java
deleted file mode 100644
index 6720d087..00000000
--- a/gi/posterior-regularisation/prjava/src/io/FileUtil.java
+++ /dev/null
@@ -1,48 +0,0 @@
-package io;
-import java.util.*;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
-import java.io.*;
-public class FileUtil 
-{
-	public static BufferedReader reader(File file) throws FileNotFoundException, IOException
-	{
-		if (file.getName().endsWith(".gz"))
-			return new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file)), "UTF8"));
-		else
-			return new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF8"));
-	}
-	
-	public static PrintStream printstream(File file) throws FileNotFoundException, IOException
-	{
-		if (file.getName().endsWith(".gz"))
-			return new PrintStream(new GZIPOutputStream(new FileOutputStream(file)), true, "UTF8");
-		else
-			return new PrintStream(new FileOutputStream(file), true, "UTF8");
-	}
-
-	public static Scanner openInFile(String filename)
-	{
-		Scanner localsc=null;
-		try
-		{
-			localsc=new Scanner(new FileInputStream(filename), "UTF8");
-
-		}catch(IOException ioe){
-			System.out.println(ioe.getMessage());
-		}
-		return localsc;
-	}
-	
-	public static FileInputStream openInputStream(String infilename)
-	{
-		FileInputStream fis=null;
-		try {
-			fis = new FileInputStream(infilename);
-			
-		} catch (IOException ioe) {
-			System.out.println(ioe.getMessage());
-		}
-		return fis;
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/io/SerializedObjects.java b/gi/posterior-regularisation/prjava/src/io/SerializedObjects.java
deleted file mode 100644
index d1631b51..00000000
--- a/gi/posterior-regularisation/prjava/src/io/SerializedObjects.java
+++ /dev/null
@@ -1,83 +0,0 @@
-package io;
-
-
-
-import java.io.BufferedInputStream;
-import java.io.BufferedOutputStream;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.ObjectInput;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutput;
-import java.io.ObjectOutputStream;
-import java.io.OutputStream;
-
-public class SerializedObjects
-{
-	public static void writeSerializedObject(Object object, String outFile)
-	{
-		ObjectOutput output = null;
-	    try{
-	      //use buffering
-	      OutputStream file = new FileOutputStream(outFile);
-	      OutputStream buffer = new BufferedOutputStream( file );
-	      output = new ObjectOutputStream( buffer );
-	      output.writeObject(object);
-	      buffer.close();
-	      file.close();
-	    }
-	    catch(IOException ex){
-	    	ex.printStackTrace();
-	    }
-	    finally{
-	      try {
-	        if (output != null) {
-	          //flush and close "output" and its underlying streams
-	          output.close();
-	        }
-	      }
-	      catch (IOException ex ){
-	    	  ex.printStackTrace();
-	      }
-	    }
-	}
-	
-	public static Object readSerializedObject(String inputFile)
-	{
-		ObjectInput input = null;
-	    Object recoveredObject=null;
-		try{
-	      //use buffering
-	      InputStream file = new FileInputStream(inputFile);
-	      InputStream buffer = new BufferedInputStream(file);
-	      input = new ObjectInputStream(buffer);
-	      //deserialize the List
-	      recoveredObject = input.readObject();
-	    }
-	    catch(IOException ex){
-	    	ex.printStackTrace();
-	    }
-	    catch (ClassNotFoundException ex){
-	    	ex.printStackTrace();
-	    }
-	    catch(Exception ex)
-	    {
-	    	ex.printStackTrace();
-	    }
-	    finally{
-	      try {
-	        if ( input != null ) {
-	          //close "input" and its underlying streams
-	          input.close();
-	        }
-	      }
-	      catch (IOException ex){
-	    	  ex.printStackTrace();
-	      }
-	    }
-	    return recoveredObject;
-	 }
-	
-}
-\ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java b/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java
deleted file mode 100644
index 25fa7f09..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java
+++ /dev/null
@@ -1,110 +0,0 @@
-package optimization.examples;
-
-
-import optimization.gradientBasedMethods.ConjugateGradient;
-import optimization.gradientBasedMethods.GradientDescent;
-import optimization.gradientBasedMethods.LBFGS;
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.Optimizer;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimization;
-import optimization.linesearch.LineSearchMethod;
-import optimization.stopCriteria.GradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.util.MathUtils;
-
-/**
- * 
- * @author javg
- * f(x) = \sum_{i=1}^{N-1} \left[ (1-x_i)^2+ 100 (x_{i+1} - x_i^2 )^2 \right] \quad \forall x\in\mathbb{R}^N.
- */
-public class GeneralizedRosenbrock extends Objective{
-
-	
-	
-	public GeneralizedRosenbrock(int dimensions){
-		parameters = new double[dimensions];		
-		java.util.Arrays.fill(parameters, 0);
-		gradient = new double[dimensions];
-		
-	}
-	
-	public GeneralizedRosenbrock(int dimensions, double[] params){
-		parameters = params;	
-		gradient = new double[dimensions];
-	}
-	
-	
-	public double getValue() {
-		functionCalls++;
-		double value = 0;
-		for(int i = 0; i < parameters.length-1; i++){
-			value += MathUtils.square(1-parameters[i]) + 100*MathUtils.square(parameters[i+1] - MathUtils.square(parameters[i]));
-		}
-		
-		return value;
-	}
-
-	/**
-	 * gx = -2(1-x) -2x200(y-x^2)
-	 * gy = 200(y-x^2)
-	 */
-	public double[] getGradient() {
-		gradientCalls++;
-		java.util.Arrays.fill(gradient,0);
-		for(int i = 0; i < parameters.length-1; i++){
-			gradient[i]+=-2*(1-parameters[i]) - 400*parameters[i]*(parameters[i+1] - MathUtils.square(parameters[i]));
-			gradient[i+1]+=200*(parameters[i+1] - MathUtils.square(parameters[i]));
-		}	
-		return gradient;
-	}
-
-	
-
-	
-
-	
-	
-	public String toString(){
-		String  res ="";
-		for(int i = 0; i < parameters.length; i++){
-			res += "P" + i+ " " + parameters[i];
-		}
-		res += " Value " + getValue();
-		return res;
-	}
-	
-	public static void main(String[] args) {
-		
-		GeneralizedRosenbrock o = new GeneralizedRosenbrock(2);
-		System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
-		;
-
-		System.out.println("Doing Gradient descent");
-		//LineSearchMethod wolfe = new WolfRuleLineSearch(new InterpolationPickFirstStep(1),100,0.001,0.1);
-		StopingCriteria stop = new GradientL2Norm(0.001);		
-		LineSearchMethod ls = new ArmijoLineSearchMinimization();
-		Optimizer optimizer = new GradientDescent(ls);		
-		OptimizerStats stats = new OptimizerStats();
-		optimizer.setMaxIterations(1000);
-		boolean succed = optimizer.optimize(o,stats, stop);
-		System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1));
-		System.out.println("Doing Conjugate Gradient descent");
-		o = new GeneralizedRosenbrock(2);
-	//	wolfe = new WolfRuleLineSearch(new InterpolationPickFirstStep(1),100,0.001,0.1);
-		optimizer = new ConjugateGradient(ls);
-		stats = new OptimizerStats();
-		optimizer.setMaxIterations(1000);
-		succed = optimizer.optimize(o,stats,stop);
-		System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1));
-		System.out.println("Doing Quasi newton descent");
-		o = new GeneralizedRosenbrock(2);
-		optimizer = new LBFGS(ls,10);
-		stats = new OptimizerStats();
-		optimizer.setMaxIterations(1000);
-		succed = optimizer.optimize(o,stats,stop);
-		System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1));
-
-	}
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java
deleted file mode 100644
index f087681e..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java
+++ /dev/null
@@ -1,128 +0,0 @@
-package optimization.examples;
-
-
-import optimization.gradientBasedMethods.ConjugateGradient;
-
-import optimization.gradientBasedMethods.GradientDescent;
-import optimization.gradientBasedMethods.LBFGS;
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.GenericPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.linesearch.WolfRuleLineSearch;
-import optimization.stopCriteria.GradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-
-
-/**
- * @author javg
- *
- */
-public class x2y2 extends Objective{
-
-	
-	//Implements function ax2+ by2 
-	double a, b;
-	public x2y2(double a, double b){
-		this.a = a;
-		this.b = b;
-		parameters = new double[2];
-		parameters[0] = 4;
-		parameters[1] = 4;
-		gradient = new double[2];
-	}
-	
-	public double getValue() {
-		functionCalls++;
-		return a*parameters[0]*parameters[0]+b*parameters[1]*parameters[1];
-	}
-
-	public double[] getGradient() {
-		gradientCalls++;
-		gradient[0]=2*a*parameters[0];
-		gradient[1]=2*b*parameters[1];
-		return gradient;
-//		if(debugLevel >=2){
-//			double[] numericalGradient = DebugHelpers.getNumericalGradient(this, parameters, 0.000001);
-//			for(int i = 0; i < parameters.length; i++){
-//				double diff = Math.abs(gradient[i]-numericalGradient[i]);
-//				if(diff > 0.00001){
-//					System.out.println("Numerical Gradient does not match");
-//					System.exit(1);
-//				}
-//			}
-//		}
-	}
-
-	
-	
-	public void optimizeWithGradientDescent(LineSearchMethod ls, OptimizerStats stats, x2y2 o){
-		GradientDescent optimizer = new GradientDescent(ls);
-		StopingCriteria stop = new GradientL2Norm(0.001);
-//		optimizer.setGradientConvergenceValue(0.001);
-		optimizer.setMaxIterations(100);
-		boolean succed = optimizer.optimize(o,stats,stop);
-		System.out.println("Ended optimzation Gradient Descent\n" + stats.prettyPrint(1));
-		System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
-		if(succed){
-			System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
-		}else{
-			System.out.println("Failed to optimize");
-		}
-	}
-	
-	public void optimizeWithConjugateGradient(LineSearchMethod ls, OptimizerStats stats, x2y2 o){
-		ConjugateGradient optimizer = new ConjugateGradient(ls);
-		StopingCriteria stop = new GradientL2Norm(0.001);
-
-		optimizer.setMaxIterations(10);
-		boolean succed = optimizer.optimize(o,stats,stop);
-		System.out.println("Ended optimzation Conjugate Gradient\n" + stats.prettyPrint(1));
-		System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
-		if(succed){
-			System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
-		}else{
-			System.out.println("Failed to optimize");
-		}
-	}
-	
-	public void optimizeWithLBFGS(LineSearchMethod ls, OptimizerStats stats, x2y2 o){
-		LBFGS optimizer = new LBFGS(ls,10);
-		StopingCriteria stop = new GradientL2Norm(0.001);
-		optimizer.setMaxIterations(10);
-		boolean succed = optimizer.optimize(o,stats,stop);
-		System.out.println("Ended optimzation LBFGS\n" + stats.prettyPrint(1));
-		System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
-		if(succed){
-			System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
-		}else{
-			System.out.println("Failed to optimize");
-		}
-	}
-	
-	public static void main(String[] args) {
-		x2y2 o = new x2y2(1,10);
-		System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
-		o.setDebugLevel(4);
-		LineSearchMethod wolfe = new WolfRuleLineSearch(new GenericPickFirstStep(1),0.001,0.9);;
-//		LineSearchMethod ls = new ArmijoLineSearchMinimization();
-		OptimizerStats stats = new OptimizerStats();
-		o.optimizeWithGradientDescent(wolfe, stats, o);
-		o = new x2y2(1,10);
-		System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
-//		ls = new ArmijoLineSearchMinimization();
-		stats = new OptimizerStats();
-		o.optimizeWithConjugateGradient(wolfe, stats, o);
-		o = new x2y2(1,10);
-		System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
-//		ls = new ArmijoLineSearchMinimization();
-		stats = new OptimizerStats();
-		o.optimizeWithLBFGS(wolfe, stats, o);	
-	}
-	
-	public String toString(){
-		return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue();
-	}
-	
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java
deleted file mode 100644
index 391775b7..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java
+++ /dev/null
@@ -1,127 +0,0 @@
-package optimization.examples;
-
-
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.projections.BoundsProjection;
-import optimization.projections.Projection;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.GradientL2Norm;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-
-
-/**
- * @author javg
- * 
- * 
- *ax2+ b(y2 -displacement)
- */
-public class x2y2WithConstraints extends ProjectedObjective{
-
-
-	double a, b;
-	double dx;
-	double dy;
-	Projection projection;
-	
-	
-	public x2y2WithConstraints(double a, double b, double[] params, double dx, double dy, Projection proj){
-		//projection = new BoundsProjection(0.2,Double.MAX_VALUE);
-		super();
-		projection = proj;	
-		this.a = a;
-		this.b = b;
-		this.dx = dx;
-		this.dy = dy;
-		setInitialParameters(params);
-		System.out.println("Function " +a+"(x-"+dx+")^2 + "+b+"(y-"+dy+")^2");
-		System.out.println("Gradient " +(2*a)+"(x-"+dx+") ; "+(b*2)+"(y-"+dy+")");
-		printParameters();
-		projection.project(parameters);
-		printParameters();
-		gradient = new double[2];
-	}
-	
-	public double getValue() {
-		functionCalls++;
-		return a*(parameters[0]-dx)*(parameters[0]-dx)+b*((parameters[1]-dy)*(parameters[1]-dy));
-	}
-
-	public double[] getGradient() {
-		if(gradient == null){
-			gradient = new double[2];
-		}
-		gradientCalls++;
-		gradient[0]=2*a*(parameters[0]-dx);
-		gradient[1]=2*b*(parameters[1]-dy);
-		return gradient;
-	}
-	
-	
-	public double[] projectPoint(double[] point) {
-		double[] newPoint = point.clone();
-		projection.project(newPoint);
-		return newPoint;
-	}	
-	
-	public void optimizeWithProjectedGradientDescent(LineSearchMethod ls, OptimizerStats stats, x2y2WithConstraints o){
-		ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
-		StopingCriteria stopGrad = new ProjectedGradientL2Norm(0.001);
-		StopingCriteria stopValue = new ValueDifference(0.001);
-		CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
-		compositeStop.add(stopGrad);
-		compositeStop.add(stopValue);
-		
-		optimizer.setMaxIterations(5);
-		boolean succed = optimizer.optimize(o,stats,compositeStop);
-		System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
-		System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
-		if(succed){
-			System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
-		}else{
-			System.out.println("Failed to optimize");
-		}
-	}
-	
-	
-	
-	public String toString(){
-		
-		return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue() + " grad (" + getGradient()[0] + ":" + getGradient()[1]+")";
-	}
-	
-	public static void main(String[] args) {
-		double a = 1;
-		double b=1;
-		double x0 = 0;
-		double y0  =1;
-		double dx = 0.5;
-		double dy = 0.5	;
-		double [] parameters = new double[2];
-		parameters[0] = x0;
-		parameters[1] = y0;
-		x2y2WithConstraints o = new x2y2WithConstraints(a,b,parameters,dx,dy, new SimplexProjection(0.5));
-		System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1] + " a " + a + " b "+b );
-		o.setDebugLevel(4);
-		
-		LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1));
-		
-		OptimizerStats stats = new OptimizerStats();
-		o.optimizeWithProjectedGradientDescent(ls, stats, o);
-		
-//		o = new x2y2WithConstraints(a,b,x0,y0,dx,dy);
-//		stats = new OptimizerStats();
-//		o.optimizeWithSpectralProjectedGradientDescent(stats, o);
-	}
-	
-	
-	
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java
deleted file mode 100644
index 2fcb7990..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java
+++ /dev/null
@@ -1,120 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.DifferentiableLineSearchObjective;
-import optimization.linesearch.LineSearchMethod;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.util.MathUtils;
-
-/**
- * 
- * @author javg
- *
- */
-public abstract class AbstractGradientBaseMethod implements Optimizer{
-	
-	protected int maxNumberOfIterations=10000;
-	
-	
-	
-	protected int currentProjectionIteration;
-	protected double currValue;	
-	protected double previousValue = Double.MAX_VALUE;;
-	protected double step;
-	protected double[] gradient;
-	public double[] direction;
-	
-	//Original values
-	protected double originalGradientL2Norm;
-	
-	protected LineSearchMethod lineSearch;
-	DifferentiableLineSearchObjective lso;
-	
-	
-	public void reset(){
-		direction = null;
-		gradient = null;
-		previousValue = Double.MAX_VALUE;
-		currentProjectionIteration = 0;
-		originalGradientL2Norm = 0;
-		step = 0;
-		currValue = 0;
-	}
-	
-	public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){
-		lso =   new DifferentiableLineSearchObjective(o);
-	}
-	public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){
-	}
-	
-	public void updateStructuresAfterStep(Objective o,OptimizerStats stats, StopingCriteria stop){
-	}
-	
-	public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){
-		//Initialize structures
-			
-		stats.collectInitStats(this, o);
-		direction = new double[o.getNumParameters()];
-		initializeStructures(o, stats, stop);
-		for (currentProjectionIteration = 1; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){
-			//System.out.println("\tgradient descent iteration " + currentProjectionIteration);
-			//System.out.print("\tparameters:" );
-			//o.printParameters();
-			previousValue = currValue;
-			currValue = o.getValue();
-			gradient = o.getGradient();
-			if(stop.stopOptimization(o)){
-				stats.collectFinalStats(this, o);
-				return true;
-			}	
-			
-			getDirection();
-			if(MathUtils.dotProduct(gradient, direction) > 0){
-				System.out.println("Not a descent direction");
-				System.out.println(" current stats " + stats.prettyPrint(1));
-				System.exit(-1);
-			}
-			updateStructuresBeforeStep(o, stats, stop);
-			lso.reset(direction);
-			step = lineSearch.getStepSize(lso);
-			//System.out.println("\t\tLeave with step: " + step);
-			if(step==-1){
-				System.out.println("Failed to find step");
-				stats.collectFinalStats(this, o);
-				return false;		
-			}
-			updateStructuresAfterStep( o, stats,  stop);
-//			previousValue = currValue;
-//			currValue = o.getValue();
-//			gradient = o.getGradient();
-			stats.collectIterationStats(this, o);
-		}
-		stats.collectFinalStats(this, o);
-		return false;
-	}
-	
-	
-	public int getCurrentIteration() {
-		return currentProjectionIteration;
-	}
-
-	
-	/**
-	 * Method specific
-	 */
-	public abstract double[] getDirection();
-
-	public double getCurrentStep() {
-		return step;
-	}
-
-
-
-	public void setMaxIterations(int max) {
-		maxNumberOfIterations = max;
-	}
-
-	public double getCurrentValue() {
-		return currValue;
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java
deleted file mode 100644
index 28295729..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java
+++ /dev/null
@@ -1,92 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.DifferentiableLineSearchObjective;
-import optimization.linesearch.LineSearchMethod;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.util.MathUtils;
-
-
-
-public class ConjugateGradient extends AbstractGradientBaseMethod{
-	
-	
-	double[] previousGradient;
-	double[] previousDirection;
-
-	public ConjugateGradient(LineSearchMethod lineSearch) {
-		this.lineSearch = lineSearch;
-	}
-	
-	public void reset(){
-		super.reset();
-		java.util.Arrays.fill(previousDirection, 0);
-		java.util.Arrays.fill(previousGradient, 0);
-	}
-	
-	public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){
-		super.initializeStructures(o, stats, stop);
-		previousGradient = new double[o.getNumParameters()];
-		previousDirection = new double[o.getNumParameters()];
-	}
-	public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){
-		System.arraycopy(gradient, 0, previousGradient, 0, gradient.length);
-		System.arraycopy(direction, 0, previousDirection, 0, direction.length);	
-	}
-	
-//	public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){
-//		DifferentiableLineSearchObjective lso = new DifferentiableLineSearchObjective(o);
-//		stats.collectInitStats(this, o);
-//		direction = new double[o.getNumParameters()];
-//		initializeStructures(o, stats, stop);
-//		for (currentProjectionIteration = 0; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){
-//			previousValue = currValue;
-//			currValue = o.getValue();
-//			gradient =o.getGradient();
-//			if(stop.stopOptimization(gradient)){
-//				stats.collectFinalStats(this, o);
-//				return true;
-//			}
-//			getDirection();
-//			updateStructures(o, stats, stop);
-//			lso.reset(direction);
-//			step = lineSearch.getStepSize(lso);	
-//			if(step==-1){
-//				System.out.println("Failed to find a step size");
-//				System.out.println("Failed to find step");
-//				stats.collectFinalStats(this, o);
-//				return false;	
-//			}
-//			
-//			stats.collectIterationStats(this, o);
-//		}
-//		stats.collectFinalStats(this, o);
-//		return false;
-//	}
-	
-	public double[] getDirection(){
-		direction = MathUtils.negation(gradient);
-		if(currentProjectionIteration != 1){
-			//Using Polak-Ribiere method (book equation 5.45)
-			double b = MathUtils.dotProduct(gradient, MathUtils.arrayMinus(gradient, previousGradient))
-			/MathUtils.dotProduct(previousGradient, previousGradient);
-			if(b<0){
-				System.out.println("Defaulting to gradient descent");
-				b = Math.max(b, 0);
-			}
-			MathUtils.plusEquals(direction, previousDirection, b);
-			//Debug code
-			if(MathUtils.dotProduct(direction, gradient) > 0){
-				System.out.println("Not an descent direction reseting to gradien");
-				direction = MathUtils.negation(gradient);
-			}
-		}
-		return direction;
-	}
-	
-	
-	
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java
deleted file mode 100644
index 6dc4ef6c..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java
+++ /dev/null
@@ -1,65 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import java.util.ArrayList;
-
-import optimization.util.MathUtils;
-
-
-
-public class DebugHelpers {
-	public static void getLineSearchGraph(Objective o, double[] direction, 
-			double[] parameters, double originalObj,
-			double originalDot, double c1, double c2){
-		ArrayList<Double> stepS = new ArrayList<Double>();
-		ArrayList<Double> obj = new ArrayList<Double>();
-		ArrayList<Double> norm = new ArrayList<Double>();
-		double[] gradient = new double[o.getNumParameters()];
-		double[] newParameters = parameters.clone();
-		MathUtils.plusEquals(newParameters,direction,0);
-		o.setParameters(newParameters);
-		double minValue = o.getValue();
-		int valuesBiggerThanMax = 0;
-		for(double step = 0; step < 2; step +=0.01 ){
-			newParameters = parameters.clone();
-			MathUtils.plusEquals(newParameters,direction,step);
-			o.setParameters(newParameters);
-			double newValue = o.getValue();
-			gradient = o.getGradient();
-			double newgradDirectionDot = MathUtils.dotProduct(gradient,direction);
-			stepS.add(step);
-			obj.add(newValue);
-			norm.add(newgradDirectionDot);
-			if(newValue <= minValue){
-				minValue = newValue;
-			}else{
-				valuesBiggerThanMax++;
-			}
-			
-			if(valuesBiggerThanMax > 10){
-				break;
-			}
-			
-		}
-		System.out.println("step\torigObj\tobj\tsuffdec\tnorm\tcurvature1");
-		for(int i = 0; i < stepS.size(); i++){
-			double cnorm= norm.get(i); 
-			System.out.println(stepS.get(i)+"\t"+originalObj +"\t"+obj.get(i) + "\t" + 
-					(originalObj + originalDot*((Double)stepS.get(i))*c1) +"\t"+Math.abs(cnorm) +"\t"+c2*Math.abs(originalDot));
-		}
-	}
-	
-	public static double[] getNumericalGradient(Objective o, double[] parameters, double epsilon){
-		int nrParameters = o.getNumParameters();
-		double[] gradient = new double[nrParameters];
-		double[] newParameters;
-		double originalValue = o.getValue();
-		for(int parameter = 0; parameter < nrParameters; parameter++){
-			newParameters = parameters.clone();
-			newParameters[parameter]+=epsilon;
-			o.setParameters(newParameters);
-			double newValue = o.getValue();
-			gradient[parameter]=(newValue-originalValue)/epsilon;
-		}	
-		return gradient;
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java
deleted file mode 100644
index 9a53cef4..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java
+++ /dev/null
@@ -1,19 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import optimization.linesearch.LineSearchMethod;
-
-
-
-public class GradientDescent extends AbstractGradientBaseMethod{
-	
-	public GradientDescent(LineSearchMethod lineSearch) {
-		this.lineSearch = lineSearch;
-	}
-		
-	public double[] getDirection(){
-		for(int i = 0; i< gradient.length; i++){
-			direction[i] = -gradient[i];
-		}
-		return direction;
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java
deleted file mode 100644
index dedbc942..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java
+++ /dev/null
@@ -1,234 +0,0 @@
-package optimization.gradientBasedMethods;
-
-
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.DifferentiableLineSearchObjective;
-import optimization.linesearch.LineSearchMethod;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.util.MathUtils;
-
-public class LBFGS extends AbstractGradientBaseMethod{
-
-	//How many previous values are being saved
-	int history;
-	double[][] skList;
-	double[][] ykList;
-	double initialHessianParameters;
-	double[] previousGradient;
-	double[] previousParameters;
-	
-	//auxiliar structures
-	double q[];
-	double[] roi;
-	double[] alphai;
-	
-	public LBFGS(LineSearchMethod ls, int history) {
-		lineSearch = ls;
-		this.history = history;
-		skList = new double[history][];
-		ykList = new double[history][];
-
-	}
-	
-	public void reset(){
-		super.reset();
-		initialHessianParameters = 0;
-		previousParameters = null;
-		previousGradient = null;
-		skList = new double[history][];
-		ykList = new double[history][];
-		q = null;
-		roi = null;
-		alphai = null;
-	}
-	
-	public double[] LBFGSTwoLoopRecursion(double hessianConst){
-		//Only create array once
-		if(q == null){
-			 q = new double[gradient.length];
-		}
-		System.arraycopy(gradient, 0, q, 0, gradient.length);
-		//Only create array once
-		if(roi == null){
-			roi = new double[history]; 
-		}
-		//Only create array once
-		if(alphai == null){
-			alphai = new double[history];
-		}
-		
-		for(int i = history-1; i >=0 && skList[i]!= null && ykList[i]!=null; i-- ){			
-		//	System.out.println("New to Old proj " + currentProjectionIteration + " history "+history + " index " + i);
-			double[] si =  skList[i];
-			double[] yi = ykList[i];
-			roi[i]= 1.0/MathUtils.dotProduct(yi,si);
-			alphai[i] = MathUtils.dotProduct(si, q)*roi[i];
-			MathUtils.plusEquals(q, yi, -alphai[i]);
-		}
-		//Initial Hessian is just a constant
-		MathUtils.scalarMultiplication(q, hessianConst);
-		for(int i = 0; i <history && skList[i]!= null && ykList[i]!=null; i++ ){
-		//	System.out.println("Old to New proj " + currentProjectionIteration + " history "+history + " index " + i);
-			double beta = MathUtils.dotProduct(ykList[i], q)*roi[i];
-			MathUtils.plusEquals(q, skList[i], (alphai[i]-beta));
-		}
-		return q;
-	}
-	
-	
-	
-	
-	@Override
-	public double[] getDirection() {
-		
-		calculateInitialHessianParameter();
-//		System.out.println("Initial hessian " + initialHessianParameters);
-		return direction = MathUtils.negation(LBFGSTwoLoopRecursion(initialHessianParameters));		
-	}
-	
-	public void calculateInitialHessianParameter(){
-		if(currentProjectionIteration == 1){
-			//Use gradient
-			initialHessianParameters = 1;
-		}else if(currentProjectionIteration <= history){
-			double[] sk = skList[currentProjectionIteration-2];
-			double[] yk = ykList[currentProjectionIteration-2];
-			initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk);
-		}else{
-			//get the last one
-			double[] sk = skList[history-1];
-			double[] yk = ykList[history-1];
-			initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk);
-		}
-	}
-	
-	//TODO if structures exit just reset them to zero
-	public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){
-		super.initializeStructures(o, stats, stop);
-		previousParameters = new double[o.getNumParameters()];
-		previousGradient = new double[o.getNumParameters()];
-	}
-	public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){	
-		super.initializeStructures(o, stats, stop);
-		System.arraycopy(o.getParameters(), 0, previousParameters, 0, previousParameters.length);
-		System.arraycopy(gradient, 0, previousGradient, 0, gradient.length);
-	}
-
-	public void 	updateStructuresAfterStep( Objective o,OptimizerStats stats, StopingCriteria stop){
-		double[] diffX = MathUtils.arrayMinus(o.getParameters(), previousParameters);
-		double[] diffGrad = MathUtils.arrayMinus(gradient, previousGradient);
-		//Save new values and discard new ones
-		if(currentProjectionIteration > history){
-			for(int i = 0; i < history-1;i++){
-				skList[i]=skList[i+1];
-				ykList[i]=ykList[i+1];
-			}
-			skList[history-1]=diffX;
-			ykList[history-1]=diffGrad;
-		}else{
-			skList[currentProjectionIteration-1]=diffX;
-			ykList[currentProjectionIteration-1]=diffGrad;
-		}	
-	}
-	
-//	public boolean optimize(Objective o, OptimizerStats stats, StopingCriteria stop) {		
-//		DifferentiableLineSearchObjective lso = new DifferentiableLineSearchObjective(o);		
-//		gradient = o.getGradient();
-//		direction = new double[o.getNumParameters()];
-//		previousGradient = new double[o.getNumParameters()];
-//		
-//		previousParameters = new double[o.getNumParameters()];
-//	
-//		stats.collectInitStats(this, o);
-//		previousValue = Double.MAX_VALUE;
-//		currValue= o.getValue();
-//		//Used for stopping criteria
-//		double[] originalGradient = o.getGradient();
-//		
-//		originalGradientL2Norm = MathUtils.L2Norm(originalGradient);
-//		if(stop.stopOptimization(originalGradient)){
-//			stats.collectFinalStats(this, o);
-//			return true;
-//		}
-//		for (currentProjectionIteration = 1; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){
-//			
-//			
-//			currValue = o.getValue();
-//			gradient  = o.getGradient();
-//			currParameters = o.getParameters();
-//			
-//			
-//			if(currentProjectionIteration == 1){
-//				//Use gradient
-//				initialHessianParameters = 1;
-//			}else if(currentProjectionIteration <= history){
-//				double[] sk = skList[currentProjectionIteration-2];
-//				double[] yk = ykList[currentProjectionIteration-2];
-//				initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk);
-//			}else{
-//				//get the last one
-//				double[] sk = skList[history-1];
-//				double[] yk = ykList[history-1];
-//				initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk);
-//			}
-//			
-//			getDirection();
-//			
-//			//MatrixOutput.printDoubleArray(direction, "direction");
-//			double dot = MathUtils.dotProduct(direction, gradient);
-//			if(dot > 0){				
-//				throw new RuntimeException("Not a descent direction");
-//			} if (Double.isNaN(dot)){
-//				throw new RuntimeException("dot is not a number!!");
-//			}
-//			System.arraycopy(currParameters, 0, previousParameters, 0, currParameters.length);
-//			System.arraycopy(gradient, 0, previousGradient, 0, gradient.length);
-//			lso.reset(direction);
-//			step = lineSearch.getStepSize(lso);
-//			if(step==-1){
-//				System.out.println("Failed to find a step size");
-////				lso.printLineSearchSteps();
-////				System.out.println(stats.prettyPrint(1));
-//				stats.collectFinalStats(this, o);
-//				return false;	
-//			}
-//			stats.collectIterationStats(this, o);
-//			
-//			//We are not updating the alpha since it is done in line search already
-//			currParameters = o.getParameters();
-//			gradient = o.getGradient();
-//			
-//			if(stop.stopOptimization(gradient)){
-//				stats.collectFinalStats(this, o);
-//				return true;
-//			}
-//			double[] diffX = MathUtils.arrayMinus(currParameters, previousParameters);
-//			double[] diffGrad = MathUtils.arrayMinus(gradient, previousGradient);
-//			//Save new values and discard new ones
-//			if(currentProjectionIteration > history){
-//				for(int i = 0; i < history-1;i++){
-//					skList[i]=skList[i+1];
-//					ykList[i]=ykList[i+1];
-//				}
-//				skList[history-1]=diffX;
-//				ykList[history-1]=diffGrad;
-//			}else{
-//				skList[currentProjectionIteration-1]=diffX;
-//				ykList[currentProjectionIteration-1]=diffGrad;
-//			}		
-//			previousValue = currValue;
-//		}
-//		stats.collectFinalStats(this, o);
-//		return false;	
-//	}
-	
-
-
-	
-
-	
-
-	
-	
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java
deleted file mode 100644
index 6be01bf9..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java
+++ /dev/null
@@ -1,87 +0,0 @@
-package optimization.gradientBasedMethods;
-
-
-/**
- * Defines an optimization objective:
- * 
- * 
- * @author javg
- *
- */
-public abstract  class Objective {
-
-	protected int functionCalls = 0;
-	protected int gradientCalls = 0;
-	protected int updateCalls = 0;
-	
-	protected double[] parameters;
-	
-	//Contains a cache with the gradient
-	public double[] gradient;
-	int debugLevel = 0;
-	
-	public void setDebugLevel(int level){
-		debugLevel = level;
-	}
-	
-	public int getNumParameters() {
-		return parameters.length;
-	}
-
-	public double getParameter(int index) {
-		return parameters[index];
-	}
-
-	public double[] getParameters() {
-		return parameters;
-	}
-
-	public abstract double[] getGradient( );
-	
-	public void setParameter(int index, double value) {
-		parameters[index]=value;
-	}
-
-	public void setParameters(double[] params) {
-		if(parameters == null){
-			parameters = new double[params.length];
-		}
-		updateCalls++;
-		System.arraycopy(params, 0, parameters, 0, params.length);
-	}
-
-	
-	public int getNumberFunctionCalls() {
-		return functionCalls;
-	}
-
-	public int getNumberGradientCalls() {
-		return gradientCalls;
-	}
-	
-	public int getNumberUpdateCalls() {
-		return updateCalls;
-	}
-	
-	public String finalInfoString() {
-		return "FE: " + functionCalls + " GE " + gradientCalls + " Params updates" +
-		updateCalls;
-	}
-	public void printParameters() {
-		System.out.println(toString());
-	}	
-	
-	public abstract String toString();	
-	public abstract double getValue ();
-	
-	/**
-	 * Sets the initial objective parameters
-	 * For unconstrained models this just sets the objective params = argument no copying
-	 * For a constrained objective project the parameters and then set
-	 * @param params
-	 */
-	public  void setInitialParameters(double[] params){
-		parameters = params;
-	}
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java
deleted file mode 100644
index 96fce5b0..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java
+++ /dev/null
@@ -1,19 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.stopCriteria.StopingCriteria;
-
-public interface Optimizer {
-	public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stoping);
-	
-	
-	public double[] getDirection();
-	public double getCurrentStep();
-	public double getCurrentValue();
-	public int getCurrentIteration();
-	public void reset();
-	
-	public void setMaxIterations(int max);
-	
-		
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java
deleted file mode 100644
index afb29d04..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java
+++ /dev/null
@@ -1,11 +0,0 @@
-package optimization.gradientBasedMethods;
-
-
-/**
- * 
- * @author javg
- *
- */
-public abstract class ProjectedAbstractGradientBaseMethod extends AbstractGradientBaseMethod implements ProjectedOptimizer{
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java
deleted file mode 100644
index 0186e945..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java
+++ /dev/null
@@ -1,154 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import java.io.IOException;
-
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.DifferentiableLineSearchObjective;
-import optimization.linesearch.LineSearchMethod;
-import optimization.linesearch.ProjectedDifferentiableLineSearchObjective;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.util.MathUtils;
-
-
-/**
- * This class implements the projected gradiend
- * as described in Bertsekas "Non Linear Programming"
- * section 2.3.
- * 
- * The update is given by:
- * x_k+1 = x_k + alpha^k(xbar_k-x_k)
- * Where xbar is:
- * xbar = [x_k -s_k grad(f(x_k))]+
- * where []+ is the projection into the feasibility set
- * 
- * alpha is the step size 
- * s_k - is a positive scalar which can be view as a step size as well, by 
- * setting alpha to 1, then x_k+1 = [x_k -s_k grad(f(x_k))]+
- * This is called taking a step size along the projection arc (Bertsekas) which
- * we will use by default.
- * 
- * Note that the only place where we actually take a step size is on pick a step size
- * so this is going to be just like a normal gradient descent but use a different 
- * armijo line search where we project after taking a step.
- * 
- * 
- * @author javg
- *
- */
-public class ProjectedGradientDescent extends ProjectedAbstractGradientBaseMethod{
-	
-
-	
-	
-	public ProjectedGradientDescent(LineSearchMethod lineSearch) {
-		this.lineSearch = lineSearch;
-	}
-	
-	//Use projected differential objective instead
-	public void initializeStructures(Objective o, OptimizerStats stats, StopingCriteria stop) {
-		lso = new ProjectedDifferentiableLineSearchObjective(o);
-	};
-	
-	
-	ProjectedObjective obj;
-	public boolean optimize(ProjectedObjective o,OptimizerStats stats, StopingCriteria stop){
-		obj = o;
-		return super.optimize(o, stats, stop);
-	}
-	
-	public double[] getDirection(){
-		for(int i = 0; i< gradient.length; i++){
-			direction[i] = -gradient[i];
-		}
-		return direction;
-	}
-	
-	
-
-		
-}
-
-
-
-
-
-
-
-///OLD CODE
-
-//Use projected gradient norm
-//public boolean stopCriteria(double[] gradient){
-//	if(originalDirenctionL2Norm == 0){
-//		System.out.println("Leaving original direction norm is zero");
-//		return true;	
-//	}
-//	if(MathUtils.L2Norm(direction)/originalDirenctionL2Norm < gradientConvergenceValue){
-//		System.out.println("Leaving projected gradient Norm smaller than epsilon");
-//		return true;	
-//	}
-//	if((previousValue - currValue)/Math.abs(previousValue) < valueConvergenceValue) {
-//		System.out.println("Leaving value change below treshold " + previousValue + " - " + currValue);
-//		System.out.println(previousValue/currValue + " - " + currValue/currValue 
-//				+ " = " + (previousValue - currValue)/Math.abs(previousValue));
-//		return true;
-//	}
-//	return false;
-//}
-//
-
-//public boolean optimize(ProjectedObjective o,OptimizerStats stats, StopingCriteria stop){
-//		stats.collectInitStats(this, o);
-//		obj = o;
-//		step = 0;
-//		currValue = o.getValue();
-//		previousValue = Double.MAX_VALUE;
-//		gradient = o.getGradient();
-//		originalGradientL2Norm = MathUtils.L2Norm(gradient);
-//		parameterChange = new double[gradient.length];
-//		getDirection();
-//		ProjectedDifferentiableLineSearchObjective lso = new ProjectedDifferentiableLineSearchObjective(o,direction);
-//		
-//		originalDirenctionL2Norm = MathUtils.L2Norm(direction);
-//		//MatrixOutput.printDoubleArray(currParameters, "parameters");
-//		for (currentProjectionIteration = 0; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){		
-//		//	System.out.println("Iter " + currentProjectionIteration);
-//			//o.printParameters();
-//			
-//			
-//			
-//			if(stop.stopOptimization(gradient)){
-//				stats.collectFinalStats(this, o);
-//				lastStepUsed = step;
-//				return true;
-//			}			
-//			lso.reset(direction);
-//			step = lineSearch.getStepSize(lso);
-//			if(step==-1){
-//				System.out.println("Failed to find step");
-//				stats.collectFinalStats(this, o);
-//				return false;	
-//				
-//			}
-//			
-//			//Update the direction for stopping criteria
-//			previousValue = currValue;
-//			currValue = o.getValue();
-//			gradient = o.getGradient();
-//			direction = getDirection();		
-//			if(MathUtils.dotProduct(gradient, direction) > 0){
-//				System.out.println("Not a descent direction");
-//				System.out.println(" current stats " + stats.prettyPrint(1));
-//				System.exit(-1);
-//			}
-//			stats.collectIterationStats(this, o);
-//		}
-//		lastStepUsed = step;
-//		stats.collectFinalStats(this, o);
-//		return false;
-//	}
-
-//public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){
-//	System.out.println("Objective is not a projected objective");
-//	throw new RuntimeException();
-//}
-
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java
deleted file mode 100644
index c3d21393..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import optimization.util.MathUtils;
-
-
-/**
- * Computes a projected objective
- * When we tell it to set some parameters it automatically projects the parameters back into the simplex:
- * 
- * 
- * When we tell it to get the gradient in automatically returns the projected gradient:
- * @author javg
- *
- */
-public abstract class ProjectedObjective extends Objective{
-	
-	public abstract double[] projectPoint (double[] point);
-	
-	public double[] auxParameters;
-	
-	
-	public  void setInitialParameters(double[] params){
-		setParameters(projectPoint(params));
-	}
-	
-	
-	
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java
deleted file mode 100644
index 81d8403e..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java
+++ /dev/null
@@ -1,10 +0,0 @@
-package optimization.gradientBasedMethods;
-
-
-
-public interface ProjectedOptimizer extends Optimizer{
-	
-	
-	
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java
deleted file mode 100644
index 6340ef73..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java
+++ /dev/null
@@ -1,86 +0,0 @@
-package optimization.gradientBasedMethods.stats;
-
-import java.util.ArrayList;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.Optimizer;
-import optimization.util.MathUtils;
-import optimization.util.StaticTools;
-
-
-public class OptimizerStats {
-	
-	double start = 0;
-	double totalTime = 0;
-	
-	String objectiveFinalStats;
-	
-	ArrayList<Double> gradientNorms = new ArrayList<Double>();
-	ArrayList<Double> steps = new ArrayList<Double>();
-	ArrayList<Double> value = new ArrayList<Double>();
-	ArrayList<Integer> iterations = new ArrayList<Integer>();
-	double prevValue =0;
-	
-	public void reset(){
-		start = 0;
-		totalTime = 0;
-		
-		objectiveFinalStats="";
-		
-		gradientNorms.clear();
-		steps.clear();
-		value.clear();
-		iterations.clear();
-		prevValue =0;
-	}
-	
-	public void startTime() {
-		start = System.currentTimeMillis();
-	}
-	public void stopTime() {
-		totalTime += System.currentTimeMillis() - start;
-	}
-	
-	public String prettyPrint(int level){
-		StringBuffer res = new StringBuffer();
-		res.append("Total time " + totalTime/1000 + " seconds \n" + "Iterations " + iterations.size() + "\n");
-		res.append(objectiveFinalStats+"\n");
-		if(level > 0){
-			if(iterations.size() > 0){
-			res.append("\tIteration"+iterations.get(0)+"\tstep: "+StaticTools.prettyPrint(steps.get(0), "0.00E00", 6)+ "\tgradientNorm "+ 
-					StaticTools.prettyPrint(gradientNorms.get(0), "0.00000E00", 10)+ "\tvalue "+ StaticTools.prettyPrint(value.get(0), "0.000000E00",11)+"\n");
-			}
-			for(int i = 1; i < iterations.size(); i++){
-			res.append("\tIteration:\t"+iterations.get(i)+"\tstep:"+StaticTools.prettyPrint(steps.get(i), "0.00E00", 6)+ "\tgradientNorm "+ 
-					StaticTools.prettyPrint(gradientNorms.get(i), "0.00000E00", 10)+ 
-					"\tvalue:\t"+ StaticTools.prettyPrint(value.get(i), "0.000000E00",11)+
-					"\tvalueDiff:\t"+ StaticTools.prettyPrint((value.get(i-1)-value.get(i)), "0.000000E00",11)+
-					"\n");
-			}
-		}
-		return res.toString();
-	}
-	
-	
-	public void collectInitStats(Optimizer optimizer, Objective objective){
-		startTime();
-		iterations.add(-1);
-		gradientNorms.add(MathUtils.L2Norm(objective.getGradient()));
-		steps.add(0.0);
-		value.add(objective.getValue());
-	}
-	
-	public void collectIterationStats(Optimizer optimizer, Objective objective){
-		iterations.add(optimizer.getCurrentIteration());
-		gradientNorms.add(MathUtils.L2Norm(objective.getGradient()));
-		steps.add(optimizer.getCurrentStep());
-		value.add(optimizer.getCurrentValue());
-	}
-	
-	
-	public void collectFinalStats(Optimizer optimizer, Objective objective){
-		stopTime();
-		objectiveFinalStats = objective.finalInfoString();
-	}
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java
deleted file mode 100644
index d65a1267..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java
+++ /dev/null
@@ -1,70 +0,0 @@
-package optimization.gradientBasedMethods.stats;
-
-import java.util.ArrayList;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.Optimizer;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.ProjectedOptimizer;
-import optimization.util.MathUtils;
-import optimization.util.StaticTools;
-
-
-public class ProjectedOptimizerStats extends OptimizerStats{
-	
-	
-	
-	public void reset(){
-		super.reset();
-		projectedGradientNorms.clear();
-	}
-	
-	ArrayList<Double> projectedGradientNorms = new ArrayList<Double>();
-
-	public String prettyPrint(int level){
-		StringBuffer res = new StringBuffer();
-		res.append("Total time " + totalTime/1000 + " seconds \n" + "Iterations " + iterations.size() + "\n");
-		res.append(objectiveFinalStats+"\n");
-		if(level > 0){
-			if(iterations.size() > 0){
-			res.append("\tIteration"+iterations.get(0)+"\tstep: "+
-					StaticTools.prettyPrint(steps.get(0), "0.00E00", 6)+ "\tgradientNorm "+ 
-					StaticTools.prettyPrint(gradientNorms.get(0), "0.00000E00", 10)
-					+ "\tdirection"+
-					StaticTools.prettyPrint(projectedGradientNorms.get(0), "0.00000E00", 10)+
-					"\tvalue "+ StaticTools.prettyPrint(value.get(0), "0.000000E00",11)+"\n");
-			}
-			for(int i = 1; i < iterations.size(); i++){
-			res.append("\tIteration"+iterations.get(i)+"\tstep: "+StaticTools.prettyPrint(steps.get(i), "0.00E00", 6)+ "\tgradientNorm "+ 
-					StaticTools.prettyPrint(gradientNorms.get(i), "0.00000E00", 10)+ 
-					"\t direction "+
-					StaticTools.prettyPrint(projectedGradientNorms.get(i), "0.00000E00", 10)+
-					"\tvalue "+ StaticTools.prettyPrint(value.get(i), "0.000000E00",11)+
-					"\tvalueDiff "+ StaticTools.prettyPrint((value.get(i-1)-value.get(i)), "0.000000E00",11)+
-					"\n");
-			}
-		}
-		return res.toString();
-	}
-	
-	
-	public void collectInitStats(Optimizer optimizer, Objective objective){
-		startTime();
-	}
-	
-	public void collectIterationStats(Optimizer optimizer, Objective objective){		
-		iterations.add(optimizer.getCurrentIteration());
-		gradientNorms.add(MathUtils.L2Norm(objective.getGradient()));
-		projectedGradientNorms.add(MathUtils.L2Norm(optimizer.getDirection()));
-		steps.add(optimizer.getCurrentStep());
-		value.add(optimizer.getCurrentValue());
-	}
-	
-	
-	
-	public void collectFinalStats(Optimizer optimizer, Objective objective){
-		stopTime();
-		objectiveFinalStats = objective.finalInfoString();
-	}
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java
deleted file mode 100644
index c9f9b8df..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java
+++ /dev/null
@@ -1,102 +0,0 @@
-package optimization.linesearch;
-
-import optimization.util.Interpolation;
-
-
-/**
- * Implements Back Tracking Line Search as described on page 37 of Numerical Optimization.
- * Also known as armijo rule
- * @author javg
- *
- */
-public class ArmijoLineSearchMinimization implements LineSearchMethod{
-
-	/**
-	 * How much should the step size decrease at each iteration.
-	 */
-	double contractionFactor = 0.5;
-	double c1 = 0.0001;
-	
-	double sigma1 = 0.1;
-	double sigma2 = 0.9;
-
-
-	
-	double initialStep;
-	int maxIterations = 10;
-	
-			
-	public ArmijoLineSearchMinimization(){
-		this.initialStep = 1;
-	}
-	
-	//Experiment
-	double previousStepPicked = -1;;
-	double previousInitGradientDot = -1;
-	double currentInitGradientDot = -1;
-	
-	
-	public void reset(){
-		previousStepPicked = -1;;
-		previousInitGradientDot = -1;
-		currentInitGradientDot = -1;
-	}
-	
-	public void setInitialStep(double initial){
-		initialStep = initial;
-	}
-	
-	/**
-	 * 
-	 */
-	
-	public double getStepSize(DifferentiableLineSearchObjective o) {	
-		currentInitGradientDot = o.getInitialGradient();
-		//Should update all in the objective
-		o.updateAlpha(initialStep);	
-		int nrIterations = 0;
-		//System.out.println("tried alpha" + initialStep + " value " + o.getCurrentValue());
-		while(!WolfeConditions.suficientDecrease(o,c1)){			
-			if(nrIterations >= maxIterations){
-				o.printLineSearchSteps();	
-				return -1;
-			}
-			double alpha=o.getAlpha();
-			double alphaTemp = 
-				Interpolation.quadraticInterpolation(o.getOriginalValue(), o.getInitialGradient(), alpha, o.getCurrentValue());
-			if(alphaTemp >= sigma1 || alphaTemp <= sigma2*o.getAlpha()){
-//				System.out.println("using alpha temp " + alphaTemp);
-				alpha = alphaTemp;
-			}else{
-//				System.out.println("Discarding alpha temp " + alphaTemp);
-				alpha = alpha*contractionFactor;
-			}
-//			double alpha =o.getAlpha()*contractionFactor;
-
-			o.updateAlpha(alpha);
-			//System.out.println("tried alpha" + alpha+ " value " + o.getCurrentValue());
-			nrIterations++;			
-		}
-		
-		//System.out.println("Leavning line search used:");
-		//o.printLineSearchSteps();	
-		
-		previousInitGradientDot = currentInitGradientDot;
-		previousStepPicked = o.getAlpha();
-		return o.getAlpha();
-	}
-
-	public double getInitialGradient() {
-		return currentInitGradientDot;
-		
-	}
-
-	public double getPreviousInitialGradient() {
-		return previousInitGradientDot;
-	}
-
-	public double getPreviousStepUsed() {
-		return previousStepPicked;
-	}
-		
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java
deleted file mode 100644
index e153f2da..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java
+++ /dev/null
@@ -1,141 +0,0 @@
-package optimization.linesearch;
-
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.util.Interpolation;
-import optimization.util.MathUtils;
-
-
-
-
-
-/**
- * Implements Armijo Rule Line search along the projection arc (Non-Linear Programming page 230)
- * To be used with Projected gradient Methods.
- * 
- * Recall that armijo tries successive step sizes alpha until the sufficient decrease is satisfied:
- * f(x+alpha*direction) < f(x) + alpha*c1*grad(f)*direction
- * 
- * In this case we are optimizing over a convex set X so we must guarantee that the new point stays inside the 
- * constraints.
- * First the direction as to be feasible (inside constraints) and will be define as:
- * d = (x_k_f - x_k) where x_k_f is a feasible point.
- * so the armijo condition can be rewritten as:
- * f(x+alpha(x_k_f - x_k)) < f(x) + c1*grad(f)*(x_k_f - x_k)
- * and x_k_f is defined as:
- * [x_k-alpha*grad(f)]+
- * where []+ mean a projection to the feasibility set.
- * So this means that we take a step on the negative gradient (gradient descent) and then obtain then project
- * that point to the feasibility set. 
- * Note that if the point is already feasible then we are back to the normal armijo rule.
- * 
- * @author javg
- *
- */
-public class ArmijoLineSearchMinimizationAlongProjectionArc implements LineSearchMethod{
-
-	/**
-	 * How much should the step size decrease at each iteration.
-	 */
-	double contractionFactor = 0.5;
-	double c1 = 0.0001;
-	
-	
-	double initialStep;
-	int maxIterations = 100;
-			
-	
-	double sigma1 = 0.1;
-	double sigma2 = 0.9;
-	
-	//Experiment
-	double previousStepPicked = -1;;
-	double previousInitGradientDot = -1;
-	double currentInitGradientDot = -1;
-	
-	GenericPickFirstStep strategy;
-	
-	
-	public void reset(){
-		previousStepPicked = -1;;
-		previousInitGradientDot = -1;
-		currentInitGradientDot = -1;
-	}
-
-	
-	public ArmijoLineSearchMinimizationAlongProjectionArc(){
-		this.initialStep = 1;
-	}
-	
-	public ArmijoLineSearchMinimizationAlongProjectionArc(GenericPickFirstStep strategy){
-		this.strategy = strategy;
-		this.initialStep = strategy.getFirstStep(this);
-	}
-	
-	
-	public void setInitialStep(double initial){
-		this.initialStep = initial;
-	}
-	
-	/**
-	 * 
-	 */
-	
-	public double getStepSize(DifferentiableLineSearchObjective o) {	
-
-		
-		//Should update all in the objective
-		initialStep = strategy.getFirstStep(this);
-		o.updateAlpha(initialStep);	
-		previousInitGradientDot=currentInitGradientDot;
-		currentInitGradientDot=o.getCurrentGradient();
-		int nrIterations = 0;
-	
-		//Armijo rule, the current value has to be smaller than the original value plus a small step of the gradient
-		while(o.getCurrentValue()  >
-			o.getOriginalValue() + c1*(o.getCurrentGradient())){			
-//			System.out.println("curr value "+o.getCurrentValue());
-//			System.out.println("original value "+o.getOriginalValue());
-//			System.out.println("GRADIENT decrease" +(MathUtils.dotProduct(o.o.gradient,
-//					MathUtils.arrayMinus(o.originalParameters,((ProjectedObjective)o.o).auxParameters))));
-//			System.out.println("GRADIENT SAVED" + o.getCurrentGradient());
-			if(nrIterations >= maxIterations){
-				System.out.println("Could not find a step leaving line search with -1");
-				o.printLineSearchSteps();
-				return -1;
-			}
-			double alpha=o.getAlpha();
-			double alphaTemp = 
-				Interpolation.quadraticInterpolation(o.getOriginalValue(), o.getInitialGradient(), alpha, o.getCurrentValue());
-			if(alphaTemp >= sigma1 || alphaTemp <= sigma2*o.getAlpha()){
-				alpha = alphaTemp;
-			}else{
-				alpha = alpha*contractionFactor;
-			}
-//			double alpha =obj.getAlpha()*contractionFactor;
-			o.updateAlpha(alpha);
-			nrIterations++;			
-		}
-//		System.out.println("curr value "+o.getCurrentValue());
-//		System.out.println("original value "+o.getOriginalValue());
-//		System.out.println("sufficient decrease" +c1*o.getCurrentGradient());
-//		System.out.println("Leavning line search used:");
-//		o.printSmallLineSearchSteps();	
-		
-		previousStepPicked = o.getAlpha();
-		return o.getAlpha();
-	}
-	
-	public double getInitialGradient() {
-		return currentInitGradientDot;
-		
-	}
-
-	public double getPreviousInitialGradient() {
-		return previousInitGradientDot;
-	}
-
-	public double getPreviousStepUsed() {
-		return previousStepPicked;
-	}
-		
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java
deleted file mode 100644
index a5bc958e..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java
+++ /dev/null
@@ -1,185 +0,0 @@
-package optimization.linesearch;
-
-import gnu.trove.TDoubleArrayList;
-import gnu.trove.TIntArrayList;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.util.MathUtils;
-import optimization.util.StaticTools;
-
-
-
-import util.MathUtil;
-import util.Printing;
-
-
-/**
- * A wrapper class for the actual objective in order to perform 
- * line search.  The optimization code assumes that this does a lot 
- * of caching in order to simplify legibility.  For the applications 
- * we use it for, caching the entire history of evaluations should be 
- * a win. 
- * 
- * Note: the lastEvaluatedAt value is very important, since we will use
- * it to avoid doing an evaluation of the gradient after the line search.  
- * 
- * The differentiable line search objective defines a search along the ray
- * given by a direction of the main objective.
- * It defines the following function, 
- * where f is the original objective function:
- * g(alpha) = f(x_0 + alpha*direction)
- * g'(alpha) = f'(x_0 + alpha*direction)*direction
- * 
- * @author joao
- *
- */
-public class DifferentiableLineSearchObjective {
-
-	
-	
-	Objective o;
-	int nrIterations;
-	TDoubleArrayList steps;
-	TDoubleArrayList values;
-	TDoubleArrayList gradients;
-	
-	//This variables cannot change
-	public double[] originalParameters;
-	public double[] searchDirection;
-
-	
-	/**
-	 * Defines a line search objective:
-	 * Receives:
-	 * Objective to each we are performing the line search, is used to calculate values and gradients
-	 * Direction where to do the ray search, note that the direction does not depend of the 
-	 * objective but depends from the method.
-	 * @param o
-	 * @param direction
-	 */
-	public DifferentiableLineSearchObjective(Objective o) {
-		this.o = o;
-		originalParameters = new double[o.getNumParameters()];
-		searchDirection = new double[o.getNumParameters()];
-		steps = new TDoubleArrayList();
-		values = new TDoubleArrayList();
-		gradients = new TDoubleArrayList();
-	}
-	/**
-	 * Called whenever we start a new iteration. 
-	 * Receives the ray where we are searching for and resets all values
-	 * 
-	 */
-	public void reset(double[] direction){
-		//Copy initial values
-		System.arraycopy(o.getParameters(), 0, originalParameters, 0, o.getNumParameters());
-		System.arraycopy(direction, 0, searchDirection, 0, o.getNumParameters());
-		
-		//Initialize variables
-		nrIterations = 0;
-		steps.clear();
-		values.clear();
-		gradients.clear();
-	
-		values.add(o.getValue());
-		gradients.add(MathUtils.dotProduct(o.getGradient(),direction));	
-		steps.add(0);
-	}
-	
-	
-	/**
-	 * update the current value of alpha.
-	 * Takes a step with that alpha in direction
-	 * Get the real objective value and gradient and calculate all required information.
-	 */
-	public void updateAlpha(double alpha){
-		if(alpha < 0){
-			System.out.println("alpha may not be smaller that zero");
-			throw new RuntimeException();
-		}
-		nrIterations++;
-		steps.add(alpha);
-		//x_t+1 = x_t + alpha*direction
-		System.arraycopy(originalParameters,0, o.getParameters(), 0, originalParameters.length);
-		MathUtils.plusEquals(o.getParameters(), searchDirection, alpha);
-		o.setParameters(o.getParameters());
-//		System.out.println("Took a step of " + alpha + " new value " + o.getValue());
-		values.add(o.getValue());
-		gradients.add(MathUtils.dotProduct(o.getGradient(),searchDirection));		
-	}
-
-	
-	
-	public int getNrIterations(){
-		return nrIterations;
-	}
-	
-	/**
-	 * return g(alpha) for the current value of alpha
-	 * @param iter
-	 * @return
-	 */
-	public double getValue(int iter){
-		return values.get(iter);
-	}
-	
-	public double getCurrentValue(){
-		return values.get(nrIterations);
-	}
-	
-	public double getOriginalValue(){
-		return values.get(0);
-	}
-
-	/**
-	 * return g'(alpha) for the current value of alpha
-	 * @param iter
-	 * @return
-	 */
-	public double getGradient(int iter){
-		return gradients.get(iter);
-	}
-	
-	public double getCurrentGradient(){
-		return gradients.get(nrIterations);
-	}
-	
-	public double getInitialGradient(){
-		return gradients.get(0);
-	}
-	
-	
-	
-	
-	public double getAlpha(){
-		return steps.get(nrIterations);
-	}
-	
-	public void printLineSearchSteps(){
-		System.out.println(
-				" Steps size "+steps.size() + 
-				"Values size "+values.size() +
-				"Gradeients size "+gradients.size());
-		for(int i =0; i < steps.size();i++){
-			System.out.println("Iter " + i + " step " + steps.get(i) +
-					" value " + values.get(i) + " grad "  + gradients.get(i));
-		}
-	}
-	
-	public void printSmallLineSearchSteps(){
-		for(int i =0; i < steps.size();i++){
-			System.out.print(StaticTools.prettyPrint(steps.get(i), "0.0000E00",8) + " ");
-		}
-		System.out.println();
-	}
-	
-	public static void main(String[] args) {
-		
-	}
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java
deleted file mode 100644
index a33eb311..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java
+++ /dev/null
@@ -1,20 +0,0 @@
-package optimization.linesearch;
-
-
-public class GenericPickFirstStep{
-	double _initValue;
-	public GenericPickFirstStep(double initValue) {
-		_initValue = initValue;
-	}
-	
-	public double getFirstStep(LineSearchMethod ls){
-		return _initValue;
-	}
-	public void collectInitValues(LineSearchMethod ls){
-		
-	}
-	
-	public void collectFinalValues(LineSearchMethod ls){
-		
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java
deleted file mode 100644
index 0deebcdb..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package optimization.linesearch;
-
-
-public class InterpolationPickFirstStep extends GenericPickFirstStep{
-	public InterpolationPickFirstStep(double initValue) {
-		super(initValue);
-	}
-	
-	public double getFirstStep(LineSearchMethod ls){
-		if(ls.getPreviousStepUsed() != -1 && ls.getPreviousInitialGradient()!=0){
-			double newStep = Math.min(300, 1.02*ls.getPreviousInitialGradient()*ls.getPreviousStepUsed()/ls.getInitialGradient());
-		//	System.out.println("proposing " + newStep);
-			return newStep;
-			
-		}
-		return _initValue;
-	}
-	public void collectInitValues(WolfRuleLineSearch ls){
-		
-	}
-	
-	public void collectFinalValues(WolfRuleLineSearch ls){
-		
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java
deleted file mode 100644
index 80cd7f39..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java
+++ /dev/null
@@ -1,14 +0,0 @@
-package optimization.linesearch;
-
-
-public interface LineSearchMethod {
-	
-	double getStepSize(DifferentiableLineSearchObjective o);
-	
-	public double getInitialGradient();
-	public double getPreviousInitialGradient();
-	public double getPreviousStepUsed();
-	
-	public void setInitialStep(double initial);
-	public void reset();
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java
deleted file mode 100644
index 4b354fd9..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package optimization.linesearch;
-
-/**
- * Non newtwon since we don't always try 1...
- * Not sure if that is even usefull for newton
- * @author javg
- *
- */
-public class NonNewtonInterpolationPickFirstStep extends GenericPickFirstStep{
-	public NonNewtonInterpolationPickFirstStep(double initValue) {
-		super(initValue);
-	}
-	
-	public double getFirstStep(LineSearchMethod ls){
-//		System.out.println("Previous step used " + ls.getPreviousStepUsed());
-//		System.out.println("PreviousGradinebt " + ls.getPreviousInitialGradient());
-//		System.out.println("CurrentGradinebt " + ls.getInitialGradient());
-		if(ls.getPreviousStepUsed() != -1 && ls.getPreviousInitialGradient()!=0){
-			double newStep = 1.01*ls.getPreviousInitialGradient()*ls.getPreviousStepUsed()/ls.getInitialGradient();
-			//System.out.println("Suggesting " + newStep);
-			return newStep;
-			
-		}
-		return _initValue;
-	}
-	public void collectInitValues(WolfRuleLineSearch ls){
-		
-	}
-	
-	public void collectFinalValues(WolfRuleLineSearch ls){
-		
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java
deleted file mode 100644
index 29ccbc32..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java
+++ /dev/null
@@ -1,137 +0,0 @@
-package optimization.linesearch;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.util.MathUtils;
-import optimization.util.MatrixOutput;
-
-
-/**
- * See ArmijoLineSearchMinimizationAlongProjectionArc for description
- * @author javg
- *
- */
-public class ProjectedDifferentiableLineSearchObjective extends DifferentiableLineSearchObjective{
-
-	
-	
-	ProjectedObjective obj;
-	public ProjectedDifferentiableLineSearchObjective(Objective o) {
-		super(o);
-		if(!(o instanceof ProjectedObjective)){
-			System.out.println("Must receive a projected objective");
-			throw new RuntimeException();
-		}
-		obj = (ProjectedObjective) o;
-	}
-
-	
-	
-	public double[] projectPoint (double[] point){
-		return ((ProjectedObjective)o).projectPoint(point);
-	}
-	public void updateAlpha(double alpha){
-		if(alpha < 0){
-			System.out.println("alpha may not be smaller that zero");
-			throw new RuntimeException();
-		}
-		
-		if(obj.auxParameters == null){
-			obj.auxParameters = new double[obj.getParameters().length];
-		}
-		
-		nrIterations++;
-		
-		steps.add(alpha);		
-		System.arraycopy(originalParameters, 0, obj.auxParameters, 0, obj.auxParameters.length);
-		
-		//Take a step into the search direction
-		
-//		MatrixOutput.printDoubleArray(obj.getGradient(), "gradient");
-		
-//		alpha=gradients.get(0)*alpha/(gradients.get(gradients.size()-1));
-	
-		//x_t+1 = x_t - alpha*gradient = x_t + alpha*direction
-		MathUtils.plusEquals(obj.auxParameters, searchDirection, alpha);
-//		MatrixOutput.printDoubleArray(obj.auxParameters, "before projection");
-		obj.auxParameters = projectPoint(obj.auxParameters);
-//		MatrixOutput.printDoubleArray(obj.auxParameters, "after projection");
-		o.setParameters(obj.auxParameters);
-//		System.out.println("new parameters");
-//		o.printParameters();
-		values.add(o.getValue());
-		//Computes the new gradient x_k-[x_k-alpha*Gradient(x_k)]+ 
-		MathUtils.minusEqualsInverse(originalParameters,obj.auxParameters,1);
-//		MatrixOutput.printDoubleArray(obj.auxParameters, "new gradient");
-		//Dot product between the new direction and the new gradient
-		double gradient = MathUtils.dotProduct(obj.auxParameters,searchDirection);
-		gradients.add(gradient);	
-		if(gradient > 0){
-			System.out.println("Gradient on line search has to be smaller than zero");
-			System.out.println("Iter: " + nrIterations);
-			MatrixOutput.printDoubleArray(obj.auxParameters, "new direction");
-			MatrixOutput.printDoubleArray(searchDirection, "search direction");
-			throw new RuntimeException();
-			
-		}
-		
-	}
-	
-	/**
-	 * 
-	 */
-//	public void updateAlpha(double alpha){
-//		
-//		if(alpha < 0){
-//			System.out.println("alpha may not be smaller that zero");
-//			throw new RuntimeException();
-//		}
-//		
-//		nrIterations++;
-//		steps.add(alpha);
-//		//x_t+1 = x_t - alpha*direction
-//		System.arraycopy(originalParameters, 0, parametersChange, 0, parametersChange.length);
-////		MatrixOutput.printDoubleArray(parametersChange, "parameters before step");
-////		System.out.println("Step" + alpha);
-//		MatrixOutput.printDoubleArray(originalGradient, "gradient + " + alpha);
-//
-//		MathUtils.minusEquals(parametersChange, originalGradient, alpha);
-//		
-//		//Project the points into the feasibility set
-////		MatrixOutput.printDoubleArray(parametersChange, "before projection");
-//		//x_k(alpha) = [x_k - alpha*grad f(x_k)]+
-//		parametersChange = projectPoint(parametersChange);
-////		MatrixOutput.printDoubleArray(parametersChange, "after projection");
-//		o.setParameters(parametersChange);
-//		values.add(o.getValue());
-//		//Computes the new direction x_k-[x_k-alpha*Gradient(x_k)]+
-//		
-//		direction=MathUtils.arrayMinus(parametersChange,originalParameters);
-////		MatrixOutput.printDoubleArray(direction, "new direction");
-//		
-//		double gradient = MathUtils.dotProduct(originalGradient,direction);
-//		gradients.add(gradient);		
-//		if(gradient > 1E-10){
-//			System.out.println("cosine " + gradient/(MathUtils.L2Norm(originalGradient)*MathUtils.L2Norm(direction)));
-//			
-//			
-//			System.out.println("not a descent direction for alpha " + alpha);
-//			System.arraycopy(originalParameters, 0, parametersChange, 0, parametersChange.length);
-//			MathUtils.minusEquals(parametersChange, originalGradient, 1E-20);
-//			
-//			parametersChange = projectPoint(parametersChange);
-//			direction=MathUtils.arrayMinus(parametersChange,originalParameters);
-//			gradient = MathUtils.dotProduct(originalGradient,direction);
-//			if(gradient > 0){
-//				System.out.println("Direction is really non-descent evern for small alphas:" + gradient);
-//			}
-//			System.out.println("ProjecteLineSearchObjective: Should be a descent direction at " + nrIterations + ": "+ gradient);
-////			System.out.println(Printing.doubleArrayToString(originalGradient, null,"Original gradient"));
-////			System.out.println(Printing.doubleArrayToString(originalParameters, null,"Original parameters"));
-////			System.out.println(Printing.doubleArrayToString(parametersChange, null,"Projected parameters"));
-////			System.out.println(Printing.doubleArrayToString(direction, null,"Direction"));
-//			throw new RuntimeException();
-//		}
-//	}
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java
deleted file mode 100644
index 5489f2d0..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java
+++ /dev/null
@@ -1,300 +0,0 @@
-package optimization.linesearch;
-
-import java.io.PrintStream;
-import java.util.ArrayList;
-
-import optimization.util.Interpolation;
-
-
-
-
-/**
- * 
- * @author javg
- *
- */
-public class WolfRuleLineSearch implements LineSearchMethod{
-
-	GenericPickFirstStep pickFirstStep;
-	
-	double c1 = 1.0E-4;
-	double c2 = 0.9;
-	
-	//Application dependent
-	double maxStep=100;
-	
-	int extrapolationIteration;
-	int maxExtrapolationIteration = 1000;
-	
-	
-	double minZoomDiffTresh = 10E-10;
-
-	
-	ArrayList<Double> steps;
-	ArrayList<Double> gradientDots;
-	ArrayList<Double> functionVals;
-	
-	int debugLevel = 0;
-	boolean foudStep = false;
-	
-	public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep){
-		this.pickFirstStep = pickFirstStep;
-		
-	}
-	
-	
-
-	
-	public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep,  double c1, double c2){
-		this.pickFirstStep = pickFirstStep;
-		initialStep = pickFirstStep.getFirstStep(this);
-		this.c1 = c1;
-		this.c2 = c2;
-	}
-	
-	public void setDebugLevel(int level){
-		debugLevel = level;
-	}
-	
-	//Experiment
-	double previousStepPicked = -1;;
-	double previousInitGradientDot = -1;
-	double currentInitGradientDot = -1;
-	
-	double initialStep;
-
-	
-	public void reset(){
-		previousStepPicked = -1;;
-		previousInitGradientDot = -1;
-		currentInitGradientDot = -1;
-		if(steps != null)
-			steps.clear();
-		if(gradientDots != null)
-			gradientDots.clear();
-		if(functionVals != null)
-			functionVals.clear();
-	}
-	
-	public void setInitialStep(double initial){
-		initialStep = pickFirstStep.getFirstStep(this);
-	}
-	
-	
-	
-	/**
-	 * Implements Wolf Line search as described in nocetal.
-	 * This process consists in two stages. The first stage we try to satisfy the
-	 * biggest step size that still satisfies the curvature condition. We keep increasing
-	 * the initial step size until we find a step satisfying the curvature condition, we return 
-	 * success, we failed the sufficient increase so we cannot increase more and we can call zoom with 
-	 * that maximum step, or we pass the minimum in which case we can call zoom the same way. 
-	 * 
-	 */
-	public double getStepSize(DifferentiableLineSearchObjective objective){
-		//System.out.println("entering line search");
-		
-		foudStep = false;
-		if(debugLevel >= 1){
-			steps = new ArrayList<Double>();
-			gradientDots = new ArrayList<Double>();
-			functionVals  =new ArrayList<Double>();
-		}
-		
-		//test
-		currentInitGradientDot = objective.getInitialGradient();
-		
-		
-		double previousValue = objective.getCurrentValue();
-		double previousStep = 0;
-		double currentStep =pickFirstStep.getFirstStep(this);
-		for(extrapolationIteration = 0; 
-		extrapolationIteration < maxExtrapolationIteration; extrapolationIteration++){	
-			
-			objective.updateAlpha(currentStep);
-			double currentValue = objective.getCurrentValue();
-			if(debugLevel >= 1){
-				steps.add(currentStep);
-				functionVals.add(currentValue);
-				gradientDots.add(objective.getCurrentGradient());
-			}
-			
-			
-			//The current step does not satisfy the sufficient decrease condition anymore
-			// so we cannot get bigger than that calling zoom.
-			if(!WolfeConditions.suficientDecrease(objective,c1)||					
-					(extrapolationIteration > 0 && currentValue >= previousValue)){
-				currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations);
-				break;
-			}
-			
-			//Satisfying both conditions ready to leave
-			if(WolfeConditions.sufficientCurvature(objective,c1,c2)){
-				//Found step
-				foudStep = true;
-				break;
-			}
-			
-			/**
-			 * This means that we passed the minimum already since the dot product that should be 
-			 * negative (descent direction) is now positive. So we cannot increase more. On the other hand
-			 * since we know the direction is a descent direction the value the objective at the current step
-			 * is for sure smaller than the preivous step so we change the order.
-			 */
-			if(objective.getCurrentGradient() >= 0){
-				currentStep =  zoom(objective,currentStep,previousStep,objective.nrIterations,objective.nrIterations-1);
-				break;
-			}
-			
-			
-			//Ok, so we can still get a bigger step, 
-			double aux = currentStep;
-			//currentStep = currentStep*2;
-			if(Math.abs(currentStep-maxStep)>1.1e-2){
-				currentStep = (currentStep+maxStep)/2;
-			}else{
-				currentStep = currentStep*2;
-			}
-			previousStep = aux;
-			previousValue = currentValue;
-			//Could be done better
-			if(currentStep >= maxStep){
-				System.out.println("Excedded max step...calling zoom with maxStepSize");
-				currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations);
-			}
-		}
-		if(!foudStep){
-			System.out.println("Wolfe Rule exceed number of iterations");
-			if(debugLevel >= 1){
-				printSmallWolfeStats(System.out);
-//				System.out.println("Line search values");
-//				DebugHelpers.getLineSearchGraph(o,  direction, originalParameters,origValue, origGradDirectionDot,c1,c2);			
-			}
-			return -1;
-		}
-		if(debugLevel >= 1){
-			printSmallWolfeStats(System.out);
-		}
-
-		previousStepPicked = currentStep;
-		previousInitGradientDot = currentInitGradientDot;
-//		objective.printLineSearchSteps();
-		return currentStep;
-	}
-	
-	
-	
-	
-	
-	public void printWolfeStats(PrintStream out){
-		for(int i = 0; i < steps.size(); i++){		
-			out.println("Step " + steps.get(i) + " value " + functionVals.get(i) + " dot " + gradientDots.get(i));
-		}
-	}
-	
-	public void printSmallWolfeStats(PrintStream out){
-		for(int i = 0; i < steps.size(); i++){		
-			out.print(steps.get(i) + ":"+functionVals.get(i)+":"+gradientDots.get(i)+" ");
-		}
-		System.out.println();
-	}
-	
-	
-	
-	/**
-	 * Pick a step satisfying the strong wolfe condition from an given from lowerStep and higherStep
-	 * picked on the routine above.
-	 * 
-	 * Both lowerStep and higherStep have been evaluated, so we only need to pass the iteration where they have
-	 * been evaluated and save extra evaluations.
-	 * 
-	 * We know that lowerStepValue as to be smaller than higherStepValue, and that a point 
-	 * satisfying both conditions exists in such interval.
-	 * 
-	 * LowerStep always satisfies at least the sufficient decrease
-	 * @return
-	 */
-	public double zoom(DifferentiableLineSearchObjective o, double lowerStep, double higherStep,
-			int lowerStepIter, int higherStepIter){
-		
-		if(debugLevel >=2){
-			System.out.println("Entering zoom with " + lowerStep+"-"+higherStep);
-		}
-		
-		double currentStep=-1;
-		
-		int zoomIter = 0;
-		while(zoomIter < 1000){		
-			if(Math.abs(lowerStep-higherStep) < minZoomDiffTresh){
-				o.updateAlpha(lowerStep);
-				if(debugLevel >= 1){
-					steps.add(lowerStep);
-					functionVals.add(o.getCurrentValue());
-					gradientDots.add(o.getCurrentGradient());
-				}
-				foudStep = true;
-				return lowerStep;
-			}	
-	
-			//Cubic interpolation
-			currentStep = 
-				Interpolation.cubicInterpolation(lowerStep, o.getValue(lowerStepIter), o.getGradient(lowerStepIter), 
-						higherStep, o.getValue(higherStepIter), o.getGradient(higherStepIter));
-			
-			//Safeguard.... should not be required check in what condtions it is required
-			if(currentStep < 0 ){
-				currentStep = (lowerStep+higherStep)/2;
-			}
-			if(Double.isNaN(currentStep) || Double.isInfinite(currentStep)){
-				currentStep = (lowerStep+higherStep)/2;
-			}
-//			currentStep = (lowerStep+higherStep)/2;
-//			System.out.println("Trying "+currentStep);
-			o.updateAlpha(currentStep);
-			if(debugLevel >=1){
-				steps.add(currentStep);
-				functionVals.add(o.getCurrentValue());
-				gradientDots.add(o.getCurrentGradient());
-			}
-			if(!WolfeConditions.suficientDecrease(o,c1)
-					|| o.getCurrentValue() >= o.getValue(lowerStepIter)){
-				higherStepIter = o.nrIterations;
-				higherStep = currentStep;
-			}
-			//Note when entering here the new step satisfies the sufficent decrease and
-			// or as a function value that is better than the previous best (lowerStepFunctionValues)
-			// so we either leave or change the value of the alpha low.
-			else{
-				if(WolfeConditions.sufficientCurvature(o,c1,c2)){
-					//Satisfies the both wolf conditions
-					foudStep = true;
-					break;
-				}
-				//If does not satisfy curvature 
-				if(o.getCurrentGradient()*(higherStep-lowerStep) >= 0){
-					higherStep = lowerStep;
-					higherStepIter = lowerStepIter;
-				}
-				lowerStep = currentStep;
-				lowerStepIter = o.nrIterations;
-			}
-			zoomIter++;
-		}
-		return currentStep;
-	}
-
-	public double getInitialGradient() {
-		return currentInitGradientDot;
-		
-	}
-
-	public double getPreviousInitialGradient() {
-		return previousInitGradientDot;
-	}
-
-	public double getPreviousStepUsed() {
-		return previousStepPicked;
-	}
-	
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java
deleted file mode 100644
index dcc704eb..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java
+++ /dev/null
@@ -1,45 +0,0 @@
-package optimization.linesearch;
-
-
-public class WolfeConditions {
-	
-	/**
-	 * Sufficient Increase number. Default constant
-	 */
-	
-	
-	/**
-	 * Value for suficient curvature:
-	 * 0.9 - For newton and quase netwon methods
-	 * 0.1 - Non linear conhugate gradient
-	 */
-	
-	int debugLevel = 0;
-	public void setDebugLevel(int level){
-		debugLevel = level;
-	}
-	
-	public  static boolean suficientDecrease(DifferentiableLineSearchObjective o, double c1){	
-		double value = o.getOriginalValue()+c1*o.getAlpha()*o.getInitialGradient();
-//		System.out.println("Sufficient Decrease original "+value+" new "+  o.getCurrentValue());
-		return o.getCurrentValue() <= value;
-	}
-	
-	
-
-
-	public static boolean sufficientCurvature(DifferentiableLineSearchObjective o, double c1, double c2){
-//		if(debugLevel >= 2){
-//			double current = Math.abs(o.getCurrentGradient());
-//			double orig = -c2*o.getInitialGradient();
-//			if(current <= orig){
-//				return true;
-//			}else{
-//				System.out.println("Not satistfying curvature condition curvature " + current + " wants " + orig);
-//				return false;
-//			}
-//		}
-		return Math.abs(o.getCurrentGradient()) <= -c2*o.getInitialGradient();
-	}
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java
deleted file mode 100644
index 0429d531..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java
+++ /dev/null
@@ -1,104 +0,0 @@
-package optimization.projections;
-
-
-import java.util.Random;
-
-import optimization.util.MathUtils;
-import optimization.util.MatrixOutput;
-
-/**
- * Implements a projection into a box set defined by a and b.
- * If either a or b are infinity then that bound is ignored.
- * @author javg
- *
- */
-public class BoundsProjection extends Projection{
-
-	double a,b;
-	boolean ignoreA = false;
-	boolean ignoreB = false;
-	public BoundsProjection(double lowerBound, double upperBound) {
-		if(Double.isInfinite(lowerBound)){
-			this.ignoreA = true;
-		}else{
-			this.a =lowerBound;
-		}
-		if(Double.isInfinite(upperBound)){
-			this.ignoreB = true;
-		}else{
-			this.b =upperBound;
-		}
-	}
-	
-	
-	
-	/**
-	* Projects into the bounds
-	* a <= x_i <=b
-	 */
-	public void project(double[] original){
-		for (int i = 0; i < original.length; i++) {
-			if(!ignoreA && original[i] < a){
-				original[i] = a;
-			}else if(!ignoreB && original[i]>b){
-				original[i]=b;
-			}
-		}
-	}
-	
-	/**
-	 * Generates a random number between a and b.
-	 */
-
-	Random r = new Random();
-	
-	public double[] samplePoint(int numParams) {
-		double[] point = new double[numParams];
-		for (int i = 0; i < point.length; i++) {
-			double rand = r.nextDouble();
-			if(ignoreA && ignoreB){
-				//Use const to avoid number near overflow
-				point[i] = rand*(1.E100+1.E100)-1.E100;
-			}else if(ignoreA){
-				point[i] = rand*(b-1.E100)-1.E100;
-			}else if(ignoreB){
-				point[i] = rand*(1.E100-a)-a;
-			}else{
-				point[i] = rand*(b-a)-a;
-			}
-		}
-		return point;
-	}
-	
-	public static void main(String[] args) {
-		BoundsProjection sp = new BoundsProjection(0,Double.POSITIVE_INFINITY);
-		
-		
-		MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 1");
-		MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 2");
-		MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 3");
-		
-		double[] d = {-1.1,1.2,1.4};
-		double[] original = d.clone();
-		MatrixOutput.printDoubleArray(d, "before");
-		
-		sp.project(d);
-		MatrixOutput.printDoubleArray(d, "after");
-		System.out.println("Test projection: " + sp.testProjection(original, d));
-	}
-	
-	double epsilon = 1.E-10;
-	public double[] perturbePoint(double[] point, int parameter){
-		double[] newPoint = point.clone();
-		if(!ignoreA && MathUtils.almost(point[parameter], a)){
-			newPoint[parameter]+=epsilon;
-		}else if(!ignoreB && MathUtils.almost(point[parameter], b)){
-			newPoint[parameter]-=epsilon;
-		}else{
-			newPoint[parameter]-=epsilon;
-		}
-		return newPoint;
-	}
-
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java
deleted file mode 100644
index b5a9f92f..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java
+++ /dev/null
@@ -1,72 +0,0 @@
-package optimization.projections;
-
-import optimization.util.MathUtils;
-import optimization.util.MatrixOutput;
-import util.ArrayMath;
-import util.Printing;
-
-
-
-public abstract class Projection {
-
-	
-	public abstract void project(double[] original);
-	
-	
-	/**
-	 *  From the projection theorem "Non-Linear Programming" page
-	 *  201 fact 2.
-	 *  
-	 *  Given some z in R, and a vector x* in X;
-	 *  x* = z+ iif for all x in X 
-	 *  (z-x*)'(x-x*) <= 0 where 0 is when x*=x
-	 *  See figure 2.16 in book
-	 *  
-	 * @param original
-	 * @param projected
-	 * @return
-	 */
-	public boolean testProjection(double[] original, double[] projected){
-		double[] original1 = original.clone();
-		//System.out.println(Printing.doubleArrayToString(original1, null, "original"));
-		//System.out.println(Printing.doubleArrayToString(projected, null, "projected"));
-		MathUtils.minusEquals(original1, projected, 1);
-		//System.out.println(Printing.doubleArrayToString(original1, null, "minus1"));
-		for(int i = 0; i < 10; i++){
-			double[] x = samplePoint(original.length);
-		//	System.out.println(Printing.doubleArrayToString(x, null, "sample"));
-			//If the same this returns zero so we are there.	
-			MathUtils.minusEquals(x, projected, 1);
-		//	System.out.println(Printing.doubleArrayToString(x, null, "minus2"));
-			double dotProd = MathUtils.dotProduct(original1, x);
-			
-		//	System.out.println("dot " + dotProd);
-			if(dotProd > 0) return false;
-		}
-		
-		//Perturbs the point a bit in all possible directions
-		for(int i = 0; i < original.length; i++){
-			double[] x = perturbePoint(projected,i);
-		//	System.out.println(Printing.doubleArrayToString(x, null, "perturbed"));
-			//If the same this returns zero so we are there.	
-			MathUtils.minusEquals(x, projected, 1);
-		//	System.out.println(Printing.doubleArrayToString(x, null, "minus2"));
-			double dotProd = MathUtils.dotProduct(original1, x);
-			
-		//	System.out.println("dot " + dotProd);
-			if(dotProd > 0) return false;
-		}
-		
-		
-		
-		return true;
-	}
-
-	//Samples a point from the constrained set
-	public abstract double[] samplePoint(int dimensions);
-	
-	//Perturbs a point a bit still leaving it at the constraints set
-	public abstract double[] perturbePoint(double[] point, int parameter);
-	
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java
deleted file mode 100644
index f22afcaf..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java
+++ /dev/null
@@ -1,127 +0,0 @@
-package optimization.projections;
-
-
-
-import java.util.Random;
-
-import optimization.util.MathUtils;
-import optimization.util.MatrixOutput;
-
-public class SimplexProjection extends Projection{
-
-	double scale;
-	public SimplexProjection(double scale) {
-		this.scale = scale;
-	}
-	
-	/**
-	 * projects the numbers of the array 
-	 * into a simplex of size. 
-	 * We follow the description of the paper
-	 * "Efficient Projetions onto the l1-Ball
-	 * for learning in high dimensions"
-	 */
-	public void project(double[] original){
-		double[] ds = new double[original.length];
-		System.arraycopy(original, 0, ds, 0, ds.length);
-		//If sum is smaller then zero then its ok
-		for (int i = 0; i < ds.length; i++) ds[i] = ds[i]>0? ds[i]:0;
-		double sum = MathUtils.sum(ds);
-		if (scale - sum >=  -1.E-10 ){
-			System.arraycopy(ds, 0, original, 0, ds.length);
-			//System.out.println("Not projecting");
-			return;
-		}
-		//System.out.println("projecting " + sum + " scontraints " + scale);	
-		util.Array.sortDescending(ds);
-		double currentSum = 0;
-		double previousTheta = 0;
-		double theta = 0;
-		for (int i = 0; i < ds.length; i++) {
-			currentSum+=ds[i];
-			theta = (currentSum-scale)/(i+1);
-			if(ds[i]-theta < -1e-10){
-				break;
-			}
-			previousTheta = theta;
-		}
-		//DEBUG
-		if(previousTheta < 0){
-			System.out.println("Simple Projection: Theta is smaller than zero: " + previousTheta);
-			System.exit(-1);
-		}
-		for (int i = 0; i < original.length; i++) {
-			original[i] = Math.max(original[i]-previousTheta, 0);
-		}
-	}
-	
-	
-	
-	
-	
-
-	/**
-	 * Samples a point from the simplex of scale. Just sample
-	 * random number from 0-scale and then if
-	 * their sum is bigger then sum make them normalize.
-	 * This is probably not sampling uniformly from the simplex but it is
-	 * enough for our goals in here.
-	 */
-	Random r = new Random();
-	public double[] samplePoint(int dimensions) {
-		double[] newPoint = new double[dimensions];
-		double sum =0;
-		for (int i = 0; i < newPoint.length; i++) {
-			double rand = r.nextDouble()*scale;
-			sum+=rand;
-			newPoint[i]=rand;
-		}
-		//Normalize
-		if(sum > scale){
-			for (int i = 0; i < newPoint.length; i++) {
-				newPoint[i]=scale*newPoint[i]/sum;
-			}
-		}
-		return newPoint;
-	}
-	
-	public static void main(String[] args) {
-		SimplexProjection sp = new SimplexProjection(1);
-		
-		
-		double[] point = sp.samplePoint(3);
-		MatrixOutput.printDoubleArray(point , "random 1 sum:" + MathUtils.sum(point));
-		point = sp.samplePoint(3);
-		MatrixOutput.printDoubleArray(point , "random 2 sum:" + MathUtils.sum(point));
-		point = sp.samplePoint(3);
-		MatrixOutput.printDoubleArray(point , "random 3 sum:" + MathUtils.sum(point));
-		
-		double[] d = {0,1.1,-10};
-		double[] original = d.clone();
-		MatrixOutput.printDoubleArray(d, "before");
-		
-		sp.project(d);
-		MatrixOutput.printDoubleArray(d, "after");
-		System.out.println("Test projection: " + sp.testProjection(original, d));
-		
-	}
-	
-	
-	double epsilon = 1.E-10;
-	public double[] perturbePoint(double[] point, int parameter){
-		double[] newPoint = point.clone();
-		if(MathUtils.almost(MathUtils.sum(point), scale)){
-			newPoint[parameter]-=epsilon;
-		}
-		else if(point[parameter]==0){
-			newPoint[parameter]+=epsilon;
-		}else if(MathUtils.almost(point[parameter], scale)){
-			newPoint[parameter]-=epsilon;
-		}
-		else{
-			newPoint[parameter]-=epsilon;
-		}
-		return newPoint;
-	}
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java
deleted file mode 100644
index 15760f18..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package optimization.stopCriteria;
-
-import java.util.ArrayList;
-
-import optimization.gradientBasedMethods.Objective;
-
-public class CompositeStopingCriteria implements StopingCriteria {
-	
-	ArrayList<StopingCriteria> criterias;
-	
-	public CompositeStopingCriteria() {
-		criterias = new ArrayList<StopingCriteria>();
-	}
-	
-	public void add(StopingCriteria criteria){
-		criterias.add(criteria);
-	}
-	
-	public boolean stopOptimization(Objective obj){
-		for(StopingCriteria criteria: criterias){
-			if(criteria.stopOptimization(obj)){
-				return true;
-			}
-		}
-		return false;
-	}
-	
-	public void reset(){
-		for(StopingCriteria criteria: criterias){
-			criteria.reset();
-		}
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java
deleted file mode 100644
index 534ff833..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java
+++ /dev/null
@@ -1,30 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.util.MathUtils;
-
-public class GradientL2Norm implements StopingCriteria{
-	
-	/**
-	 * Stop if gradientNorm/(originalGradientNorm) smaller
-	 * than gradientConvergenceValue
-	 */
-	protected double gradientConvergenceValue;
-	
-	
-	public GradientL2Norm(double gradientConvergenceValue){
-		this.gradientConvergenceValue = gradientConvergenceValue;
-	}
-	
-	public void reset(){}
-	
-	public boolean stopOptimization(Objective obj){
-		double norm = MathUtils.L2Norm(obj.gradient);
-		if(norm < gradientConvergenceValue){
-			System.out.println("Gradient norm below treshold");
-			return true;
-		}
-		return false;
-		
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java
deleted file mode 100644
index 4a489641..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java
+++ /dev/null
@@ -1,48 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.util.MathUtils;
-
-/**
- * Divides the norm by the norm at the begining of the iteration
- * @author javg
- *
- */
-public class NormalizedGradientL2Norm extends GradientL2Norm{
-	
-	/**
-	 * Stop if gradientNorm/(originalGradientNorm) smaller
-	 * than gradientConvergenceValue
-	 */
-	protected double originalGradientNorm = -1;
-	
-	public void reset(){
-		originalGradientNorm = -1;
-	}
-	public NormalizedGradientL2Norm(double gradientConvergenceValue){
-		super(gradientConvergenceValue);
-	}
-	
-	
-	 
-	
-	public boolean stopOptimization(Objective obj){
-			double norm = MathUtils.L2Norm(obj.gradient);
-			if(originalGradientNorm == -1){
-				originalGradientNorm = norm;
-			}
-			if(originalGradientNorm < 1E-10){
-				System.out.println("Gradient norm is zero " +  originalGradientNorm);
-				return true;
-			}
-			double normalizedNorm = 1.0*norm/originalGradientNorm;
-			if( normalizedNorm < gradientConvergenceValue){
-				System.out.println("Gradient norm below normalized normtreshold: " + norm + " original: " + originalGradientNorm + " normalized norm: " + normalizedNorm);
-				return true;
-			}else{
-//				System.out.println("projected gradient norm: " + norm);
-				return false;
-			}
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java
deleted file mode 100644
index 5ae554c2..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java
+++ /dev/null
@@ -1,60 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.util.MathUtils;
-
-/**
- * Divides the norm by the norm at the begining of the iteration
- * @author javg
- *
- */
-public class NormalizedProjectedGradientL2Norm extends ProjectedGradientL2Norm{
-	
-	/**
-	 * Stop if gradientNorm/(originalGradientNorm) smaller
-	 * than gradientConvergenceValue
-	 */
-	double originalProjectedNorm = -1;
-	
-	public NormalizedProjectedGradientL2Norm(double gradientConvergenceValue){
-		super(gradientConvergenceValue);
-	}
-	
-	public void reset(){
-		originalProjectedNorm = -1;
-	}
-	
-	
-	 double[] projectGradient(ProjectedObjective obj){
-		
-		if(obj.auxParameters == null){
-			obj.auxParameters = new double[obj.getNumParameters()];
-		}
-		System.arraycopy(obj.getParameters(), 0, obj.auxParameters, 0, obj.getNumParameters());
-		MathUtils.minusEquals(obj.auxParameters, obj.gradient, 1);
-		obj.auxParameters = obj.projectPoint(obj.auxParameters);
-		MathUtils.minusEquals(obj.auxParameters,obj.getParameters(),1);
-		return obj.auxParameters;
-	}
-	
-	public boolean stopOptimization(Objective obj){
-		if(obj instanceof ProjectedObjective) {
-			ProjectedObjective o = (ProjectedObjective) obj;
-			double norm = MathUtils.L2Norm(projectGradient(o));
-			if(originalProjectedNorm == -1){
-				originalProjectedNorm = norm;
-			}
-			double normalizedNorm = 1.0*norm/originalProjectedNorm;
-			if( normalizedNorm < gradientConvergenceValue){
-				System.out.println("Gradient norm below normalized normtreshold: " + norm + " original: " + originalProjectedNorm + " normalized norm: " + normalizedNorm);
-				return true;
-			}else{
-//				System.out.println("projected gradient norm: " + norm);
-				return false;
-			}
-		}
-		System.out.println("Not a projected objective");
-		throw new RuntimeException();
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java
deleted file mode 100644
index 6dbbc50d..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java
+++ /dev/null
@@ -1,54 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.util.MathUtils;
-
-public class NormalizedValueDifference implements StopingCriteria{
-	
-	/**
-	 * Stop if the different between values is smaller than a treshold
-	 */
-	protected double valueConvergenceValue=0.01;
-	protected double previousValue = Double.NaN;
-	protected double currentValue = Double.NaN;
-	
-	public NormalizedValueDifference(double valueConvergenceValue){
-		this.valueConvergenceValue = valueConvergenceValue;
-	}
-	
-	public void reset(){
-		previousValue = Double.NaN;
-		currentValue = Double.NaN;
-	}
-
-	
-	public boolean stopOptimization(Objective obj){
-		if(Double.isNaN(currentValue)){
-			currentValue = obj.getValue();
-			return false;
-		}else {
-			previousValue = currentValue;
-			currentValue = obj.getValue();
-			if(previousValue != 0){
-				double valueDiff = Math.abs(previousValue - currentValue)/Math.abs(previousValue);
-				if( valueDiff  < valueConvergenceValue){
-					System.out.println("Leaving different in values is to small: Prev " 
-							+ (previousValue/previousValue) + " Curr: " + (currentValue/previousValue) 
-							+ " diff: " + valueDiff);
-					return true;
-				}
-			}else{
-				double valueDiff = Math.abs(previousValue - currentValue);
-				if( valueDiff  < valueConvergenceValue){
-					System.out.println("Leaving different in values is to small: Prev " 
-							+ (previousValue) + " Curr: " + (currentValue) 
-							+ " diff: " + valueDiff);
-					return true;
-				}
-			}
-
-			return false;
-		}
-		
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java
deleted file mode 100644
index aadf1fd5..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java
+++ /dev/null
@@ -1,51 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.util.MathUtils;
-
-public class ProjectedGradientL2Norm implements StopingCriteria{
-	
-	/**
-	 * Stop if gradientNorm/(originalGradientNorm) smaller
-	 * than gradientConvergenceValue
-	 */
-	protected double gradientConvergenceValue;
-	
-	
-	public ProjectedGradientL2Norm(double gradientConvergenceValue){
-		this.gradientConvergenceValue = gradientConvergenceValue;
-	}
-	
-	public void reset(){
-		
-	}
-	
-	 double[] projectGradient(ProjectedObjective obj){
-		
-		if(obj.auxParameters == null){
-			obj.auxParameters = new double[obj.getNumParameters()];
-		}
-		System.arraycopy(obj.getParameters(), 0, obj.auxParameters, 0, obj.getNumParameters());
-		MathUtils.minusEquals(obj.auxParameters, obj.gradient, 1);
-		obj.auxParameters = obj.projectPoint(obj.auxParameters);
-		MathUtils.minusEquals(obj.auxParameters,obj.getParameters(),1);
-		return obj.auxParameters;
-	}
-	
-	public boolean stopOptimization(Objective obj){
-		if(obj instanceof ProjectedObjective) {
-			ProjectedObjective o = (ProjectedObjective) obj;
-			double norm = MathUtils.L2Norm(projectGradient(o));
-			if(norm < gradientConvergenceValue){
-	//			System.out.println("Gradient norm below treshold: " + norm);
-				return true;
-			}else{
-//				System.out.println("projected gradient norm: " + norm);
-				return false;
-			}
-		}
-		System.out.println("Not a projected objective");
-		throw new RuntimeException();
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java
deleted file mode 100644
index 10cf0522..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java
+++ /dev/null
@@ -1,8 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-
-public interface StopingCriteria {
-	public boolean stopOptimization(Objective obj);
-	public void reset();
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java
deleted file mode 100644
index e5d07229..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.util.MathUtils;
-
-public class ValueDifference implements StopingCriteria{
-	
-	/**
-	 * Stop if the different between values is smaller than a treshold
-	 */
-	protected double valueConvergenceValue=0.01;
-	protected double previousValue = Double.NaN;
-	protected double currentValue = Double.NaN;
-	
-	public ValueDifference(double valueConvergenceValue){
-		this.valueConvergenceValue = valueConvergenceValue;
-	}
-	
-	public void reset(){
-		previousValue = Double.NaN;
-		currentValue = Double.NaN;
-	}
-	
-	public boolean stopOptimization(Objective obj){
-		if(Double.isNaN(currentValue)){
-			currentValue = obj.getValue();
-			return false;
-		}else {
-			previousValue = currentValue;
-			currentValue = obj.getValue();
-			if(previousValue - currentValue   < valueConvergenceValue){
-//				System.out.println("Leaving different in values is to small: Prev " 
-//						+ previousValue + " Curr: " + currentValue 
-//						+ " diff: " + (previousValue - currentValue));
-				return true;
-			}
-			return false;
-		}
-		
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java b/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java
deleted file mode 100644
index cdbdefc6..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java
+++ /dev/null
@@ -1,37 +0,0 @@
-package optimization.util;
-
-public class Interpolation {
-
-	/**
-	 * Fits a cubic polinomyal to a function given two points,
-	 * such that either gradB is bigger than zero or funcB >= funcA
-	 * 
-	 * NonLinear Programming appendix C
-	 * @param funcA
-	 * @param gradA
-	 * @param funcB
-	 * @param gradB
-	 */
-	public final static double cubicInterpolation(double a, 
-			double funcA, double gradA, double b,double funcB, double gradB ){
-		if(gradB < 0 && funcA > funcB){
-			System.out.println("Cannot call cubic interpolation");
-			return -1;
-		}
-		
-		double z = 3*(funcA-funcB)/(b-a) + gradA + gradB;
-		double w = Math.sqrt(z*z - gradA*gradB);
-		double min = b -(gradB+w-z)*(b-a)/(gradB-gradA+2*w);
-		return min;
-	}
-	
-	public final static double quadraticInterpolation(double initFValue, 
-			double initGrad, double point,double pointFValue){
-				double min = -1*initGrad*point*point/(2*(pointFValue-initGrad*point-initFValue));
-		return min;
-	}
-	
-	public static void main(String[] args) {
-		
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java b/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java
deleted file mode 100644
index 5343a39b..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java
+++ /dev/null
@@ -1,7 +0,0 @@
-package optimization.util;
-
-public class Logger {
-	
-	
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java b/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java
deleted file mode 100644
index af66f82c..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java
+++ /dev/null
@@ -1,339 +0,0 @@
-package optimization.util;
-
-import java.util.Arrays;
-
-
-
-public class MathUtils {
-	
-	/**
-	 * 
-	 * @param vector
-	 * @return
-	 */
-	public static double L2Norm(double[] vector){
-		double value = 0;
-		for(int i = 0; i < vector.length; i++){
-			double v = vector[i];
-			value+=v*v;
-		}
-		return Math.sqrt(value);
-	}
-	
-	public static double sum(double[] v){
-		double sum = 0;
-		for (int i = 0; i < v.length; i++) {
-			sum+=v[i];
-		}
-		return sum;
-	}
-	
-	
-	
-	
-	/**
-	 * w = w + v
-	 * @param w
-	 * @param v
-	 */
-	public static void plusEquals(double[] w, double[] v) {
-		for(int i=0; i<w.length;i++){
-			w[i] += w[i] + v[i];
-		}		
-	}
-	
-	/**
-	 * w[i] = w[i] + v
-	 * @param w
-	 * @param v
-	 */
-	public static void plusEquals(double[] w, double v) {
-		for(int i=0; i<w.length;i++){
-			w[i] += w[i] + v;
-		}		
-	}
-	
-	/**
-	 * w[i] = w[i] - v
-	 * @param w
-	 * @param v
-	 */
-	public static void minusEquals(double[] w, double v) {
-		for(int i=0; i<w.length;i++){
-			w[i] -= w[i] + v;
-		}		
-	}
-	
-	/**
-	 * w = w + a*v
-	 * @param w
-	 * @param v
-	 * @param a
-	 */
-	public static void plusEquals(double[] w, double[] v, double a) {
-		for(int i=0; i<w.length;i++){
-			w[i] += a*v[i];
-		}		
-	}
-	
-	/**
-	 * w = w - a*v
-	 * @param w
-	 * @param v
-	 * @param a
-	 */
-	public static void minusEquals(double[] w, double[] v, double a) {
-		for(int i=0; i<w.length;i++){
-			w[i] -= a*v[i];
-		}		
-	}
-	/**
-	 * v = w - a*v
-	 * @param w
-	 * @param v
-	 * @param a
-	 */
-	public static void minusEqualsInverse(double[] w, double[] v, double a) {
-		for(int i=0; i<w.length;i++){
-			v[i] = w[i] - a*v[i];
-		}		
-	}
-	
-	public static double dotProduct(double[] w, double[] v){
-		double accum = 0;
-		for(int i=0; i<w.length;i++){
-			accum += w[i]*v[i];
-		}
-		return accum;
-	}
-	
-	public static double[] arrayMinus(double[]w, double[]v){
-		double result[] = w.clone();
-		for(int i=0; i<w.length;i++){
-			result[i] -= v[i];
-		}
-		return result;
-	}
-	
-	public static double[] arrayMinus(double[] result , double[]w, double[]v){
-		for(int i=0; i<w.length;i++){
-			result[i] = w[i]-v[i];
-		}
-		return result;
-	}
-	
-	public static double[] negation(double[]w){
-		double result[]  = new double[w.length];
-		for(int i=0; i<w.length;i++){
-			result[i] = -w[i];
-		}
-		return result;
-	}
-	
-	public static double square(double value){
-		return value*value;
-	}
-	public static double[][] outerProduct(double[] w, double[] v){
-		double[][] result = new double[w.length][v.length];
-		for(int i = 0; i < w.length; i++){
-			for(int j = 0; j < v.length; j++){
-				result[i][j] = w[i]*v[j];
-			}
-		}
-		return result;
-	}
-	/**
-	 * results = a*W*V
-	 * @param w
-	 * @param v
-	 * @param a
-	 * @return
-	 */
-	public static double[][] weightedouterProduct(double[] w, double[] v, double a){
-		double[][] result = new double[w.length][v.length];
-		for(int i = 0; i < w.length; i++){
-			for(int j = 0; j < v.length; j++){
-				result[i][j] = a*w[i]*v[j];
-			}
-		}
-		return result;
-	}
-	
-	public static double[][] identity(int size){
-		double[][] result = new double[size][size];
-		for(int i = 0; i < size; i++){
-			result[i][i] = 1;
-		}
-		return result;
-	}
-	
-	/**
-	 * v -= w
-	 * @param v
-	 * @param w
-	 */
-	public static void minusEquals(double[][] w, double[][] v){
-		for(int i = 0; i < w.length; i++){
-			for(int j = 0; j < w[0].length; j++){
-				w[i][j] -= v[i][j];
-			}
-		}
-	}
-	
-	/**
-	 * v[i][j] -= a*w[i][j]
-	 * @param v
-	 * @param w
-	 */
-	public static void minusEquals(double[][] w, double[][] v, double a){
-		for(int i = 0; i < w.length; i++){
-			for(int j = 0; j < w[0].length; j++){
-				w[i][j] -= a*v[i][j];
-			}
-		}
-	}
-	
-	/**
-	 * v += w
-	 * @param v
-	 * @param w
-	 */
-	public static void plusEquals(double[][] w, double[][] v){
-		for(int i = 0; i < w.length; i++){
-			for(int j = 0; j < w[0].length; j++){
-				w[i][j] += v[i][j];
-			}
-		}
-	}
-	
-	/**
-	 * v[i][j] += a*w[i][j]
-	 * @param v
-	 * @param w
-	 */
-	public static void plusEquals(double[][] w, double[][] v, double a){
-		for(int i = 0; i < w.length; i++){
-			for(int j = 0; j < w[0].length; j++){
-				w[i][j] += a*v[i][j];
-			}
-		}
-	}
-	
-	
-	/**
-	 * results = w*v
-	 * @param w
-	 * @param v
-	 * @return
-	 */
-	public static  double[][] matrixMultiplication(double[][] w,double[][] v){
-		int w1 = w.length;
-		int w2 = w[0].length;
-		int v1 = v.length;
-		int v2 = v[0].length;
-		
-		if(w2 != v1){
-			System.out.println("Matrix dimensions do not agree...");
-			System.exit(-1);
-		}
-		
-		double[][] result = new double[w1][v2];
-		for(int w_i1 = 0; w_i1 < w1; w_i1++){
-			for(int v_i2 = 0; v_i2 < v2; v_i2++){
-				double sum = 0;
-				for(int w_i2 = 0; w_i2 < w2; w_i2++){
-						sum += w[w_i1 ][w_i2]*v[w_i2][v_i2];	
-				}
-				result[w_i1][v_i2] = sum;
-			}
-		}
-		return result;
-	}
-	
-	/**
-	 * w = w.*v
-	 * @param w
-	 * @param v
-	 */
-	public static  void matrixScalarMultiplication(double[][] w,double v){
-		int w1 = w.length;
-		int w2 = w[0].length;	
-		for(int w_i1 = 0; w_i1 < w1; w_i1++){
-				for(int w_i2 = 0; w_i2 < w2; w_i2++){
-						w[w_i1 ][w_i2] *= v;	
-				}
-		}
-	}
-	
-	public static  void scalarMultiplication(double[] w,double v){
-		int w1 = w.length;
-		for(int w_i1 = 0; w_i1 < w1; w_i1++){
-			w[w_i1 ] *= v;	
-		}
-		
-	}
-	
-	public static  double[] matrixVector(double[][] w,double[] v){
-		int w1 = w.length;
-		int w2 = w[0].length;
-		int v1 = v.length;
-		
-		if(w2 != v1){
-			System.out.println("Matrix dimensions do not agree...");
-			System.exit(-1);
-		}
-		
-		double[] result = new double[w1];
-		for(int w_i1 = 0; w_i1 < w1; w_i1++){
-				double sum = 0;
-				for(int w_i2 = 0; w_i2 < w2; w_i2++){
-						sum += w[w_i1 ][w_i2]*v[w_i2];	
-				}
-				result[w_i1] = sum;
-		}
-		return result;
-	}
-	
-	public static boolean allPositive(double[] array){
-		for (int i = 0; i < array.length; i++) {
-			if(array[i] < 0) return false;
-		}
-		return true;
-	}
-	
-	
-	
-	
-	
-		public static void main(String[] args) {
-			double[][] m1 = new double[2][2];
-			m1[0][0]=2;
-			m1[1][0]=2;
-			m1[0][1]=2;
-			m1[1][1]=2;
-			MatrixOutput.printDoubleArray(m1, "m1");
-			double[][] m2 = new double[2][2];
-			m2[0][0]=3;
-			m2[1][0]=3;
-			m2[0][1]=3;
-			m2[1][1]=3;
-			MatrixOutput.printDoubleArray(m2, "m2");
-			double[][] result = matrixMultiplication(m1, m2);
-			MatrixOutput.printDoubleArray(result, "result");
-			matrixScalarMultiplication(result, 3);
-			MatrixOutput.printDoubleArray(result, "result after multiply by 3");
-		}
-	
-		public static boolean almost(double a, double b, double prec){
-			return Math.abs(a-b)/Math.abs(a+b) <= prec || (almostZero(a) && almostZero(b));
-		}
-
-		public static boolean almost(double a, double b){
-			return Math.abs(a-b)/Math.abs(a+b) <= 1e-10 || (almostZero(a) && almostZero(b));
-		}
-
-		public static boolean almostZero(double a) {
-			return Math.abs(a) <= 1e-30;
-		}
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java b/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java
deleted file mode 100644
index 9fbdf955..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java
+++ /dev/null
@@ -1,28 +0,0 @@
-package optimization.util;
-
-
-public class MatrixOutput {
-	public static void printDoubleArray(double[][] array, String arrayName) {
-		int size1 = array.length;
-		int size2 = array[0].length;
-		System.out.println(arrayName);
-		for (int i = 0; i < size1; i++) {
-			for (int j = 0; j < size2; j++) {
-				System.out.print(" " + StaticTools.prettyPrint(array[i][j],
-						"00.00E00", 4) + " ");
-
-			}
-			System.out.println();
-		}
-		System.out.println();
-	}
-	
-	public static void printDoubleArray(double[] array, String arrayName) {
-		System.out.println(arrayName);
-		for (int i = 0; i < array.length; i++) {
-				System.out.print(" " + StaticTools.prettyPrint(array[i],
-						"00.00E00", 4) + " ");
-		}
-		System.out.println();
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java b/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java
deleted file mode 100644
index bcabee06..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java
+++ /dev/null
@@ -1,180 +0,0 @@
-package optimization.util;
-
-
-import java.io.File;
-import java.io.PrintStream;
-
-public class StaticTools {
-
-	static java.text.DecimalFormat fmt = new java.text.DecimalFormat();
-
-	public static void createDir(String directory) {
-
-		File dir = new File(directory);
-		if (!dir.isDirectory()) {
-			boolean success = dir.mkdirs();
-			if (!success) {
-				System.out.println("Unable to create directory " + directory);
-				System.exit(0);
-			}
-			System.out.println("Created directory " + directory);
-		} else {
-			System.out.println("Reusing directory " + directory);
-		}
-	}
-
-	/*
-	 * q and p are indexed by source/foreign Sum_S(q) = 1 the same for p KL(q,p) =
-	 * Eq*q/p
-	 */
-	public static double KLDistance(double[][] p, double[][] q, int sourceSize,
-			int foreignSize) {
-		double totalKL = 0;
-		// common.StaticTools.printMatrix(q, sourceSize, foreignSize, "q",
-		// System.out);
-		// common.StaticTools.printMatrix(p, sourceSize, foreignSize, "p",
-		// System.out);
-		for (int i = 0; i < sourceSize; i++) {
-			double kl = 0;
-			for (int j = 0; j < foreignSize; j++) {
-				assert !Double.isNaN(q[i][j]) : "KLDistance q:  prob is NaN";
-				assert !Double.isNaN(p[i][j]) : "KLDistance p:  prob is NaN";
-				if (p[i][j] == 0 || q[i][j] == 0) {
-					continue;
-				} else {
-					kl += q[i][j] * Math.log(q[i][j] / p[i][j]);
-				}
-
-			}
-			totalKL += kl;
-		}
-		assert !Double.isNaN(totalKL) : "KLDistance: prob is NaN";
-		if (totalKL < -1.0E-10) {
-			System.out.println("KL Smaller than zero " + totalKL);
-			System.out.println("Source Size" + sourceSize);
-			System.out.println("Foreign Size" + foreignSize);
-			StaticTools.printMatrix(q, sourceSize, foreignSize, "q",
-					System.out);
-			StaticTools.printMatrix(p, sourceSize, foreignSize, "p",
-					System.out);
-			System.exit(-1);
-		}
-		return totalKL / sourceSize;
-	}
-
-	/*
-	 * indexed the by [fi][si]
-	 */
-	public static double KLDistancePrime(double[][] p, double[][] q,
-			int sourceSize, int foreignSize) {
-		double totalKL = 0;
-		for (int i = 0; i < sourceSize; i++) {
-			double kl = 0;
-			for (int j = 0; j < foreignSize; j++) {
-				assert !Double.isNaN(q[j][i]) : "KLDistance q:  prob is NaN";
-				assert !Double.isNaN(p[j][i]) : "KLDistance p:  prob is NaN";
-				if (p[j][i] == 0 || q[j][i] == 0) {
-					continue;
-				} else {
-					kl += q[j][i] * Math.log(q[j][i] / p[j][i]);
-				}
-
-			}
-			totalKL += kl;
-		}
-		assert !Double.isNaN(totalKL) : "KLDistance: prob is NaN";
-		return totalKL / sourceSize;
-	}
-
-	public static double Entropy(double[][] p, int sourceSize, int foreignSize) {
-		double totalE = 0;
-		for (int i = 0; i < foreignSize; i++) {
-			double e = 0;
-			for (int j = 0; j < sourceSize; j++) {
-				e += p[i][j] * Math.log(p[i][j]);
-			}
-			totalE += e;
-		}
-		return totalE / sourceSize;
-	}
-
-	public static double[][] copyMatrix(double[][] original, int sourceSize,
-			int foreignSize) {
-		double[][] result = new double[sourceSize][foreignSize];
-		for (int i = 0; i < sourceSize; i++) {
-			for (int j = 0; j < foreignSize; j++) {
-				result[i][j] = original[i][j];
-			}
-		}
-		return result;
-	}
-
-	public static void printMatrix(double[][] matrix, int sourceSize,
-			int foreignSize, String info, PrintStream out) {
-
-		java.text.DecimalFormat fmt = new java.text.DecimalFormat();
-		fmt.setMaximumFractionDigits(3);
-		fmt.setMaximumIntegerDigits(3);
-		fmt.setMinimumFractionDigits(3);
-		fmt.setMinimumIntegerDigits(3);
-
-		out.println(info);
-
-		for (int i = 0; i < foreignSize; i++) {
-			for (int j = 0; j < sourceSize; j++) {
-				out.print(prettyPrint(matrix[j][i], ".00E00", 6) + " ");
-			}
-			out.println();
-		}
-		out.println();
-		out.println();
-	}
-
-	public static void printMatrix(int[][] matrix, int sourceSize,
-			int foreignSize, String info, PrintStream out) {
-
-		out.println(info);
-		for (int i = 0; i < foreignSize; i++) {
-			for (int j = 0; j < sourceSize; j++) {
-				out.print(matrix[j][i] + " ");
-			}
-			out.println();
-		}
-		out.println();
-		out.println();
-	}
-
-	public static String formatTime(long duration) {
-		StringBuilder sb = new StringBuilder();
-		double d = duration / 1000;
-		fmt.applyPattern("00");
-		sb.append(fmt.format((int) (d / (60 * 60))) + ":");
-		d -= ((int) d / (60 * 60)) * 60 * 60;
-		sb.append(fmt.format((int) (d / 60)) + ":");
-		d -= ((int) d / 60) * 60;
-		fmt.applyPattern("00.0");
-		sb.append(fmt.format(d));
-		return sb.toString();
-	}
-
-	public static String prettyPrint(double d, String patt, int len) {
-		fmt.applyPattern(patt);
-		String s = fmt.format(d);
-		while (s.length() < len) {
-			s = " " + s;
-		}
-		return s;
-	}
-	
-	
-	public static long getUsedMemory(){
-		System.gc();
-		return (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/ (1024 * 1024);
-	}
-	
-	public final static boolean compareDoubles(double d1, double d2){
-		return Math.abs(d1-d2) <= 1.E-10;
-	}
-	
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Agree.java b/gi/posterior-regularisation/prjava/src/phrase/Agree.java
deleted file mode 100644
index 8f7b499e..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Agree.java
+++ /dev/null
@@ -1,204 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.List;
-
-import phrase.Corpus.Edge;
-
-public class Agree {
-	PhraseCluster model1;
-	C2F model2;
-	Corpus c;
-	private int K,n_phrases, n_words, n_contexts, n_positions1,n_positions2;
-	
-	/**@brief sum of loglikelihood of two
-	 * individual models
-	 */
-	public double llh;
-	/**@brief Bhattacharyya distance
-	 * 
-	 */
-	public double bdist; 
-	/**
-	 * 
-	 * @param numCluster
-	 * @param corpus
-	 */
-	public Agree(int numCluster, Corpus corpus){
-		
-		model1=new PhraseCluster(numCluster, corpus);
-		model2=new C2F(numCluster,corpus);
-		c=corpus;
-		n_words=c.getNumWords();
-		n_phrases=c.getNumPhrases();
-		n_contexts=c.getNumContexts();
-		n_positions1=c.getNumContextPositions();
-		n_positions2=2;
-		K=numCluster;
-		
-	}
-	
-	/**@brief test
-	 * 
-	 */
-	public static void main(String args[]){
-		//String in="../pdata/canned.con";
-		String in="../pdata/btec.con";
-		String out="../pdata/posterior.out";
-		int numCluster=25;
-		Corpus corpus = null;
-		File infile = new File(in);
-		try {
-			System.out.println("Reading concordance from " + infile);
-			corpus = Corpus.readFromFile(FileUtil.reader(infile));
-			corpus.printStats(System.out);
-		} catch (IOException e) {
-			System.err.println("Failed to open input file: " + infile);
-			e.printStackTrace();
-			System.exit(1);
-		}
-		
-		Agree agree=new Agree(numCluster, corpus);
-		int iter=20;
-		for(int i=0;i<iter;i++){
-			agree.EM();
-			System.out.println("Iter"+i+", llh: "+agree.llh+
-					", divergence:"+agree.bdist+
-							" sum: "+(agree.llh+agree.bdist));
-		}
-		
-		File outfile = new File (out);
-		try {
-			PrintStream ps = FileUtil.printstream(outfile);
-			agree.displayPosterior(ps);
-		//	ps.println();
-		//	c2f.displayModelParam(ps);
-			ps.close();
-		} catch (IOException e) {
-			System.err.println("Failed to open output file: " + outfile);
-			e.printStackTrace();
-			System.exit(1);
-		}
-		
-	}
-	
-	public double EM(){
-		
-		double [][][]exp_emit1=new double [K][n_positions1][n_words];
-		double [][]exp_pi1=new double[n_phrases][K];
-		
-		double [][][]exp_emit2=new double [K][n_positions2][n_words];
-		double [][]exp_pi2=new double[n_contexts][K];
-		
-		llh=0;
-		bdist=0;
-		//E
-		for(int context=0; context< n_contexts; context++){
-			
-			List<Edge> contexts = c.getEdgesForContext(context);
-
-			for (int ctx=0; ctx<contexts.size(); ctx++){
-				Edge edge = contexts.get(ctx);
-				int phrase=edge.getPhraseId();
-				double p[]=posterior(edge);
-				double z = arr.F.l1norm(p);
-				assert z > 0;
-				bdist += edge.getCount() * Math.log(z);
-				arr.F.l1normalize(p);
-				
-				double count = edge.getCount();
-				//increment expected count
-				TIntArrayList phraseToks = edge.getPhrase();
-				TIntArrayList contextToks = edge.getContext();
-				for(int tag=0;tag<K;tag++){
-
-					for(int position=0;position<n_positions1;position++){
-						exp_emit1[tag][position][contextToks.get(position)]+=p[tag]*count;
-					}
-					
-					exp_emit2[tag][0][phraseToks.get(0)]+=p[tag]*count;
-					exp_emit2[tag][1][phraseToks.get(phraseToks.size()-1)]+=p[tag]*count;
-					
-					exp_pi1[phrase][tag]+=p[tag]*count;
-					exp_pi2[context][tag]+=p[tag]*count;
-				}
-			}
-		}
-		
-		//System.out.println("Log likelihood: "+loglikelihood);
-		
-		//M
-		for(double [][]i:exp_emit1){
-			for(double []j:i){
-				arr.F.l1normalize(j);
-			}
-		}
-		
-		for(double []j:exp_pi1){
-			arr.F.l1normalize(j);
-		}
-		
-		for(double [][]i:exp_emit2){
-			for(double []j:i){
-				arr.F.l1normalize(j);
-			}
-		}
-		
-		for(double []j:exp_pi2){
-			arr.F.l1normalize(j);
-		}
-		
-		model1.emit=exp_emit1;
-		model1.pi=exp_pi1;
-		model2.emit=exp_emit2;
-		model2.pi=exp_pi2;
-		
-		return llh;
-	}
-
-	public double[] posterior(Corpus.Edge edge) 
-	{
-		double[] prob1=model1.posterior(edge);
-		double[] prob2=model2.posterior(edge);
-		
-		llh+=edge.getCount()*Math.log(arr.F.l1norm(prob1));
-		llh+=edge.getCount()*Math.log(arr.F.l1norm(prob2));
-		arr.F.l1normalize(prob1);
-		arr.F.l1normalize(prob2);
-		
-		for(int i=0;i<prob1.length;i++){
-			prob1[i]*=prob2[i];
-			prob1[i]=Math.sqrt(prob1[i]);
-		}
-		
-		return prob1;
-	}
-	
-	public void displayPosterior(PrintStream ps)
-	{	
-		displayPosterior(ps, c.getEdges());
-	}
-	
-	public void displayPosterior(PrintStream ps, List<Edge> test)
-	{	
-		for (Edge edge : test)
-		{
-			double probs[] = posterior(edge);
-			arr.F.l1normalize(probs);
-
-			// emit phrase
-			ps.print(edge.getPhraseString());
-			ps.print("\t");
-			ps.print(edge.getContextString(true));
-			int t=arr.F.argmax(probs);
-			ps.println(" ||| C=" + t);
-		}
-	}
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java b/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java
deleted file mode 100644
index 031f887f..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java
+++ /dev/null
@@ -1,197 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.List;
-
-import phrase.Corpus.Edge;
-
-public class Agree2Sides {
-	PhraseCluster model1,model2;
-	Corpus c1,c2;
-	private int K;
-	
-	/**@brief sum of loglikelihood of two
-	 * individual models
-	 */
-	public double llh;
-	/**@brief Bhattacharyya distance
-	 * 
-	 */
-	public double bdist; 
-	/**
-	 * 
-	 * @param numCluster
-	 * @param corpus
-	 */
-	public Agree2Sides(int numCluster, Corpus corpus1 , Corpus corpus2 ){
-		
-		model1=new PhraseCluster(numCluster, corpus1);
-		model2=new PhraseCluster(numCluster,corpus2);
-		c1=corpus1;
-		c2=corpus2;
-		K=numCluster;
-		
-	}
-	
-	/**@brief test
-	 * 
-	 */
-	public static void main(String args[]){
-		//String in="../pdata/canned.con";
-	//	String in="../pdata/btec.con";
-		String in1="../pdata/source.txt";
-		String in2="../pdata/target.txt";
-		String out="../pdata/posterior.out";
-		int numCluster=25;
-		Corpus corpus1 = null,corpus2=null;
-		File infile1 = new File(in1),infile2=new File(in2);
-		try {
-			System.out.println("Reading concordance from " + infile1);
-			corpus1 = Corpus.readFromFile(FileUtil.reader(infile1));
-			System.out.println("Reading concordance from " + infile2);
-			corpus2 = Corpus.readFromFile(FileUtil.reader(infile2));
-			corpus1.printStats(System.out);
-		} catch (IOException e) {
-			System.err.println("Failed to open input file: " + infile1);
-			e.printStackTrace();
-			System.exit(1);
-		}
-		
-		Agree2Sides agree=new Agree2Sides(numCluster, corpus1,corpus2);
-		int iter=20;
-		for(int i=0;i<iter;i++){
-			agree.EM();
-			System.out.println("Iter"+i+", llh: "+agree.llh+
-					", divergence:"+agree.bdist+
-							" sum: "+(agree.llh+agree.bdist));
-		}
-		
-		File outfile = new File (out);
-		try {
-			PrintStream ps = FileUtil.printstream(outfile);
-			agree.displayPosterior(ps);
-		//	ps.println();
-		//	c2f.displayModelParam(ps);
-			ps.close();
-		} catch (IOException e) {
-			System.err.println("Failed to open output file: " + outfile);
-			e.printStackTrace();
-			System.exit(1);
-		}
-		
-	}
-	
-	public double EM(){
-		
-		double [][][]exp_emit1=new double [K][c1.getNumContextPositions()][c1.getNumWords()];
-		double [][]exp_pi1=new double[c1.getNumPhrases()][K];
-		
-		double [][][]exp_emit2=new double [K][c2.getNumContextPositions()][c2.getNumWords()];
-		double [][]exp_pi2=new double[c2.getNumPhrases()][K];
-		
-		llh=0;
-		bdist=0;
-		//E
-		for(int i=0;i<c1.getEdges().size();i++){
-			Edge edge1=c1.getEdges().get(i);
-			Edge edge2=c2.getEdges().get(i);
-			double p[]=posterior(i);
-			double z = arr.F.l1norm(p);
-			assert z > 0;
-			bdist += edge1.getCount() * Math.log(z);
-			arr.F.l1normalize(p);
-			double count = edge1.getCount();
-				//increment expected count
-			TIntArrayList contextToks1 = edge1.getContext();
-			TIntArrayList contextToks2 = edge2.getContext();
-			int phrase1=edge1.getPhraseId();
-			int phrase2=edge2.getPhraseId();
-			for(int tag=0;tag<K;tag++){
-				for(int position=0;position<c1.getNumContextPositions();position++){
-					exp_emit1[tag][position][contextToks1.get(position)]+=p[tag]*count;
-				}
-				for(int position=0;position<c2.getNumContextPositions();position++){
-					exp_emit2[tag][position][contextToks2.get(position)]+=p[tag]*count;
-				}
-				exp_pi1[phrase1][tag]+=p[tag]*count;
-				exp_pi2[phrase2][tag]+=p[tag]*count;
-			}
-		}
-		
-		//System.out.println("Log likelihood: "+loglikelihood);
-		
-		//M
-		for(double [][]i:exp_emit1){
-			for(double []j:i){
-				arr.F.l1normalize(j);
-			}
-		}
-		
-		for(double []j:exp_pi1){
-			arr.F.l1normalize(j);
-		}
-		
-		for(double [][]i:exp_emit2){
-			for(double []j:i){
-				arr.F.l1normalize(j);
-			}
-		}
-		
-		for(double []j:exp_pi2){
-			arr.F.l1normalize(j);
-		}
-		
-		model1.emit=exp_emit1;
-		model1.pi=exp_pi1;
-		model2.emit=exp_emit2;
-		model2.pi=exp_pi2;
-		
-		return llh;
-	}
-
-	public double[] posterior(int edgeIdx) 
-	{
-		return posterior(c1.getEdges().get(edgeIdx), c2.getEdges().get(edgeIdx));
-	}
-	
-	public double[] posterior(Edge e1, Edge e2) 
-	{
-		double[] prob1=model1.posterior(e1);
-		double[] prob2=model2.posterior(e2);
-		
-		llh+=e1.getCount()*Math.log(arr.F.l1norm(prob1));
-		llh+=e2.getCount()*Math.log(arr.F.l1norm(prob2));
-		arr.F.l1normalize(prob1);
-		arr.F.l1normalize(prob2);
-		
-		for(int i=0;i<prob1.length;i++){
-			prob1[i]*=prob2[i];
-			prob1[i]=Math.sqrt(prob1[i]);
-		}
-		
-		return prob1;
-	}
-	
-	public void displayPosterior(PrintStream ps)
-	{	
-		for (int i=0;i<c1.getEdges().size();i++)
-		{
-			Edge edge=c1.getEdges().get(i);
-			double probs[] = posterior(i);
-			arr.F.l1normalize(probs);
-
-			// emit phrase
-			ps.print(edge.getPhraseString());
-			ps.print("\t");
-			ps.print(edge.getContextString(true));
-			int t=arr.F.argmax(probs);
-			ps.println(" ||| C=" + t);
-		}
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/C2F.java b/gi/posterior-regularisation/prjava/src/phrase/C2F.java
deleted file mode 100644
index e8783950..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/C2F.java
+++ /dev/null
@@ -1,216 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.Arrays;
-import java.util.List;
-
-import phrase.Corpus.Edge;
-
-/**
- * @brief context generates phrase
- * @author desaic
- *
- */
-public class C2F {
-	public int K;
-	private int n_words, n_contexts, n_positions;
-	public Corpus c;
-	
-	/**@brief
-	 *  emit[tag][position][word] = p(word | tag, position in phrase)
-	 */
-	public double emit[][][];
-	/**@brief
-	 *  pi[context][tag] = p(tag | context)
-	 */
-	public double pi[][];
-	
-	public C2F(int numCluster, Corpus corpus){
-		K=numCluster;
-		c=corpus;
-		n_words=c.getNumWords();
-		n_contexts=c.getNumContexts();
-		
-		//number of words in a phrase to be considered
-		//currently the first and last word in source and target
-		//if the phrase has length 1 in either dimension then
-		//we use the same word for two positions
-		n_positions=c.phraseEdges(c.getEdges().get(0).getPhrase()).size();
-		
-		emit=new double [K][n_positions][n_words];
-		pi=new double[n_contexts][K];
-		
-		for(double [][]i:emit){
-			for(double []j:i){
-				arr.F.randomise(j);
-			}
-		}
-		
-		for(double []j:pi){
-			arr.F.randomise(j);
-		}
-	}
-	
-	/**@brief test
-	 * 
-	 */
-	public static void main(String args[]){
-		String in="../pdata/canned.con";
-		String out="../pdata/posterior.out";
-		int numCluster=25;
-		Corpus corpus = null;
-		File infile = new File(in);
-		try {
-			System.out.println("Reading concordance from " + infile);
-			corpus = Corpus.readFromFile(FileUtil.reader(infile));
-			corpus.printStats(System.out);
-		} catch (IOException e) {
-			System.err.println("Failed to open input file: " + infile);
-			e.printStackTrace();
-			System.exit(1);
-		}
-		
-		C2F c2f=new C2F(numCluster,corpus);
-		int iter=20;
-		double llh=0;
-		for(int i=0;i<iter;i++){
-			llh=c2f.EM();
-			System.out.println("Iter"+i+", llh: "+llh);
-		}
-		
-		File outfile = new File (out);
-		try {
-			PrintStream ps = FileUtil.printstream(outfile);
-			c2f.displayPosterior(ps);
-		//	ps.println();
-		//	c2f.displayModelParam(ps);
-			ps.close();
-		} catch (IOException e) {
-			System.err.println("Failed to open output file: " + outfile);
-			e.printStackTrace();
-			System.exit(1);
-		}
-		
-	}
-	
-	public double EM(){
-		double [][][]exp_emit=new double [K][n_positions][n_words];
-		double [][]exp_pi=new double[n_contexts][K];
-		
-		double loglikelihood=0;
-		
-		//E
-		for(int context=0; context< n_contexts; context++){
-			
-			List<Edge> contexts = c.getEdgesForContext(context);
-
-			for (int ctx=0; ctx<contexts.size(); ctx++){
-				Edge edge = contexts.get(ctx);
-				double p[]=posterior(edge);
-				double z = arr.F.l1norm(p);
-				assert z > 0;
-				loglikelihood += edge.getCount() * Math.log(z);
-				arr.F.l1normalize(p);
-				
-				double count = edge.getCount();
-				//increment expected count
-				TIntArrayList phrase= edge.getPhrase();
-				for(int tag=0;tag<K;tag++){
-
-					exp_emit[tag][0][phrase.get(0)]+=p[tag]*count;
-					exp_emit[tag][1][phrase.get(phrase.size()-1)]+=p[tag]*count;
-					
-					exp_pi[context][tag]+=p[tag]*count;
-				}
-			}
-		}
-		
-		//System.out.println("Log likelihood: "+loglikelihood);
-		
-		//M
-		for(double [][]i:exp_emit){
-			for(double []j:i){
-				arr.F.l1normalize(j);
-			}
-		}
-		
-		emit=exp_emit;
-		
-		for(double []j:exp_pi){
-			arr.F.l1normalize(j);
-		}
-		
-		pi=exp_pi;
-		
-		return loglikelihood;
-	}
-
-	public double[] posterior(Corpus.Edge edge) 
-	{
-		double[] prob=Arrays.copyOf(pi[edge.getContextId()], K);
-		
-		TIntArrayList phrase = edge.getPhrase();
-		TIntArrayList offsets = c.phraseEdges(phrase);
-		for(int tag=0;tag<K;tag++)
-		{
-			for (int i=0; i < offsets.size(); ++i)
-				prob[tag]*=emit[tag][i][phrase.get(offsets.get(i))];
-		}
-			
-		return prob;
-	}
-
-	public void displayPosterior(PrintStream ps)
-	{	
-		for (Edge edge : c.getEdges())
-		{
-			double probs[] = posterior(edge);
-			arr.F.l1normalize(probs);
-
-			// emit phrase
-			ps.print(edge.getPhraseString());
-			ps.print("\t");
-			ps.print(edge.getContextString(true));
-			int t=arr.F.argmax(probs);
-			ps.println(" ||| C=" + t);
-		}
-	}
-	
-	public void displayModelParam(PrintStream ps)
-	{
-		final double EPS = 1e-6;
-		
-		ps.println("P(tag|context)");
-		for (int i = 0; i < n_contexts; ++i)
-		{
-			ps.print(c.getContext(i));
-			for(int j=0;j<pi[i].length;j++){
-				if (pi[i][j] > EPS)
-					ps.print("\t" + j + ": " + pi[i][j]);
-			}
-			ps.println();
-		}
-		
-		ps.println("P(word|tag,position)");
-		for (int i = 0; i < K; ++i)
-		{
-			for(int position=0;position<n_positions;position++){
-				ps.println("tag " + i + " position " + position);
-				for(int word=0;word<emit[i][position].length;word++){
-					if (emit[i][position][word] > EPS)
-						ps.print(c.getWord(word)+"="+emit[i][position][word]+"\t");
-				}
-				ps.println();
-			}
-			ps.println();
-		}
-		
-	}
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Corpus.java b/gi/posterior-regularisation/prjava/src/phrase/Corpus.java
deleted file mode 100644
index 4b1939cd..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Corpus.java
+++ /dev/null
@@ -1,288 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import java.io.*;
-import java.util.*;
-import java.util.regex.Pattern;
-
-
-public class Corpus
-{
-	private Lexicon<String> wordLexicon = new Lexicon<String>();
-	private Lexicon<TIntArrayList> phraseLexicon = new Lexicon<TIntArrayList>();
-	private Lexicon<TIntArrayList> contextLexicon = new Lexicon<TIntArrayList>();
-	private List<Edge> edges = new ArrayList<Edge>();
-	private List<List<Edge>> phraseToContext = new ArrayList<List<Edge>>();
-	private List<List<Edge>> contextToPhrase = new ArrayList<List<Edge>>();
-	public int splitSentinel;
-	public int phraseSentinel;
-	public int rareSentinel;
-
-	public Corpus()
-	{
-		splitSentinel = wordLexicon.insert("<SPLIT>");
-		phraseSentinel = wordLexicon.insert("<PHRASE>");		
-		rareSentinel = wordLexicon.insert("<RARE>");
-	}
-	
-	public class Edge
-	{
-		
-		Edge(int phraseId, int contextId, double count,int tag)
-		{
-			this.phraseId = phraseId;
-			this.contextId = contextId;
-			this.count = count;
-			fixTag=tag;
-		}
-		
-		Edge(int phraseId, int contextId, double count)
-		{
-			this.phraseId = phraseId;
-			this.contextId = contextId;
-			this.count = count;
-			fixTag=-1;
-		}
-		public int getTag(){
-			return fixTag;
-		}
-		
-		public int getPhraseId()
-		{
-			return phraseId;
-		}
-		public TIntArrayList getPhrase()
-		{
-			return Corpus.this.getPhrase(phraseId);
-		}
-		public String getPhraseString()
-		{
-			return Corpus.this.getPhraseString(phraseId);
-		}		
-		public int getContextId()
-		{
-			return contextId;
-		}
-		public TIntArrayList getContext()
-		{
-			return Corpus.this.getContext(contextId);
-		}
-		public String getContextString(boolean insertPhraseSentinel)
-		{
-			return Corpus.this.getContextString(contextId, insertPhraseSentinel);
-		}
-		public double getCount()
-		{
-			return count;
-		}
-		public boolean equals(Object other)
-		{
-			if (other instanceof Edge) 
-			{
-				Edge oe = (Edge) other;
-				return oe.phraseId == phraseId && oe.contextId == contextId; 
-			}
-			else return false;
-		}
-		public int hashCode()
-		{   // this is how boost's hash_combine does it
-			int seed = phraseId;
-			seed ^= contextId + 0x9e3779b9 + (seed << 6) + (seed >> 2);
-			return seed;
-		}
-		public String toString()
-		{
-			return getPhraseString() + "\t" + getContextString(true);
-		}
-		
-		private int phraseId;
-		private int contextId;
-		private double count;
-		private int fixTag;
-	}
-
-	List<Edge> getEdges()
-	{
-		return edges;
-	}
-	
-	int getNumEdges()
-	{
-		return edges.size();
-	}
-
-	int getNumPhrases()
-	{
-		return phraseLexicon.size();
-	}
-	
-	int getNumContextPositions()
-	{
-		return contextLexicon.lookup(0).size();
-	}
-	
-	List<Edge> getEdgesForPhrase(int phraseId)
-	{
-		return phraseToContext.get(phraseId);
-	}
-	
-	int getNumContexts()
-	{
-		return contextLexicon.size();
-	}
-	
-	List<Edge> getEdgesForContext(int contextId)
-	{
-		return contextToPhrase.get(contextId);
-	}
-	
-	int getNumWords()
-	{
-		return wordLexicon.size();
-	}
-	
-	String getWord(int wordId)
-	{
-		return wordLexicon.lookup(wordId);
-	}
-	
-	public TIntArrayList getPhrase(int phraseId)
-	{
-		return phraseLexicon.lookup(phraseId);
-	}
-	
-	public String getPhraseString(int phraseId)
-	{
-		StringBuffer b = new StringBuffer();
-		for (int tid: getPhrase(phraseId).toNativeArray())
-		{
-			if (b.length() > 0)
-				b.append(" ");
-			b.append(wordLexicon.lookup(tid));
-		}
-		return b.toString();
-	}		
-	
-	public TIntArrayList getContext(int contextId)
-	{
-		return contextLexicon.lookup(contextId);
-	}
-	
-	public String getContextString(int contextId, boolean insertPhraseSentinel)
-	{
-		StringBuffer b = new StringBuffer();
-		TIntArrayList c = getContext(contextId);
-		for (int i = 0; i < c.size(); ++i)
-		{
-			if (i > 0) b.append(" ");
-			//if (i == c.size() / 2) b.append("<PHRASE> ");
-			b.append(wordLexicon.lookup(c.get(i)));
-		}
-		return b.toString();
-	}
-	
-	public boolean isSentinel(int wordId)
-	{
-		return wordId == splitSentinel || wordId == phraseSentinel;
-	}
-	
-	List<Edge> readEdges(Reader in) throws IOException
-	{	
-		// read in line-by-line
-		BufferedReader bin = new BufferedReader(in);
-		String line;
-		Pattern separator = Pattern.compile(" \\|\\|\\| ");
-		
-		List<Edge> edges = new ArrayList<Edge>();
-		while ((line = bin.readLine()) != null)
-		{
-			// split into phrase and contexts
-			StringTokenizer st = new StringTokenizer(line, "\t");
-			assert (st.hasMoreTokens());
-			String phraseToks = st.nextToken();
-			assert (st.hasMoreTokens());
-			String rest = st.nextToken();
-			assert (!st.hasMoreTokens());
-
-			// process phrase	
-			st = new StringTokenizer(phraseToks, " ");
-			TIntArrayList ptoks = new TIntArrayList();
-			while (st.hasMoreTokens())
-				ptoks.add(wordLexicon.insert(st.nextToken()));
-			int phraseId = phraseLexicon.insert(ptoks);
-			
-			// process contexts
-			String[] parts = separator.split(rest);
-			assert (parts.length % 2 == 0);
-			for (int i = 0; i < parts.length; i += 2)
-			{
-				// process pairs of strings - context and count
-				String ctxString = parts[i];
-				String countString = parts[i + 1];
-
-				assert (countString.startsWith("C="));
-
-				String []countToks=countString.split(" ");
-				
-				double count = Double.parseDouble(countToks[0].substring(2).trim());
-				
-				TIntArrayList ctx = new TIntArrayList();
-				StringTokenizer ctxStrtok = new StringTokenizer(ctxString, " ");
-				while (ctxStrtok.hasMoreTokens())
-				{
-					String token = ctxStrtok.nextToken();
-					ctx.add(wordLexicon.insert(token));
-				}
-				int contextId = contextLexicon.insert(ctx);
-
-
-				if(countToks.length<2){
-					edges.add(new Edge(phraseId, contextId, count));
-				}
-				else{
-					int tag=Integer.parseInt(countToks[1].substring(2));
-					edges.add(new Edge(phraseId, contextId, count,tag));
-				}
-			}
-		}
-		return edges;
-	}
-	
-	static Corpus readFromFile(Reader in) throws IOException
-	{	
-		Corpus c = new Corpus();
-		c.edges = c.readEdges(in);
-		for (Edge edge: c.edges)
-		{
-			while (edge.getPhraseId() >= c.phraseToContext.size())
-				c.phraseToContext.add(new ArrayList<Edge>());
-			while (edge.getContextId() >= c.contextToPhrase.size())
-				c.contextToPhrase.add(new ArrayList<Edge>());
-			
-			// index the edge for fast phrase, context lookup
-			c.phraseToContext.get(edge.getPhraseId()).add(edge);
-			c.contextToPhrase.get(edge.getContextId()).add(edge);
-		}
-		return c;
-	}
-		
-	TIntArrayList phraseEdges(TIntArrayList phrase)
-	{
-		TIntArrayList r = new TIntArrayList(4);
-		for (int p = 0; p < phrase.size(); ++p)
-		{
-			if (p == 0 || phrase.get(p-1) == splitSentinel) 				
-				r.add(p);
-			if (p == phrase.size() - 1 || phrase.get(p+1) == splitSentinel) 
-				r.add(p);
-		}
-		return r;
-	}
-
-	public void printStats(PrintStream out) 
-	{
-		out.println("Corpus has " + edges.size() + " edges " + phraseLexicon.size() + " phrases " 
-				+ contextLexicon.size() + " contexts and " + wordLexicon.size() + " word types");
-	}
-}
-\ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java b/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java
deleted file mode 100644
index a386e4a3..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java
+++ /dev/null
@@ -1,34 +0,0 @@
-package phrase;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-public class Lexicon<T>
-{
-	public int insert(T word)
-	{
-		Integer i = wordToIndex.get(word);
-		if (i == null)
-		{
-			i = indexToWord.size();
-			wordToIndex.put(word, i);
-			indexToWord.add(word);
-		}
-		return i;
-	}
-
-	public T lookup(int index)
-	{
-		return indexToWord.get(index);
-	}
-
-	public int size()
-	{
-		return indexToWord.size();
-	}
-
-	private Map<T, Integer> wordToIndex = new HashMap<T, Integer>();
-	private List<T> indexToWord = new ArrayList<T>();
-}
-\ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java
deleted file mode 100644
index c032bb2b..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java
+++ /dev/null
@@ -1,540 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-import org.apache.commons.math.special.Gamma;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.regex.Pattern;
-
-import phrase.Corpus.Edge;
-
-
-public class PhraseCluster {
-	
-	public int K;
-	private int n_phrases, n_words, n_contexts, n_positions;
-	public Corpus c;
-	public ExecutorService pool; 
-
-	double[] lambdaPTCT;
-	double[][] lambdaPT;
-	boolean cacheLambda = true;
-
-	// emit[tag][position][word] = p(word | tag, position in context)
-	double emit[][][];
-	// pi[phrase][tag] = p(tag | phrase)
-	double pi[][];
-	
-	public PhraseCluster(int numCluster, Corpus corpus)
-	{
-		K=numCluster;
-		c=corpus;
-		n_words=c.getNumWords();
-		n_phrases=c.getNumPhrases();
-		n_contexts=c.getNumContexts();
-		n_positions=c.getNumContextPositions();
-
-		emit=new double [K][n_positions][n_words];
-		pi=new double[n_phrases][K];
-		
-		for(double [][]i:emit)
-			for(double []j:i)
-				arr.F.randomise(j, true);
-
-		for(double []j:pi)
-			arr.F.randomise(j, true);
-	}
-	
-	void useThreadPool(ExecutorService pool)
-	{
-		this.pool = pool;
-	}
-
-	public double EM(int phraseSizeLimit)
-	{
-		double [][][]exp_emit=new double [K][n_positions][n_words];
-		double []exp_pi=new double[K];
-		
-		for(double [][]i:exp_emit)
-			for(double []j:i)
-				Arrays.fill(j, 1e-10);
-		
-		double loglikelihood=0;
-		
-		//E
-		for(int phrase=0; phrase < n_phrases; phrase++)
-		{
-			if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit)
-				continue;
-
-			Arrays.fill(exp_pi, 1e-10);
-			
-			List<Edge> contexts = c.getEdgesForPhrase(phrase);
-
-			for (int ctx=0; ctx<contexts.size(); ctx++)
-			{
-				Edge edge = contexts.get(ctx);
-				
-				double p[]=posterior(edge);
-				double z = arr.F.l1norm(p);
-				assert z > 0;
-				loglikelihood += edge.getCount() * Math.log(z);
-				arr.F.l1normalize(p);
-				
-				double count = edge.getCount();
-				//increment expected count
-				TIntArrayList context = edge.getContext();
-				for(int tag=0;tag<K;tag++)
-				{
-					for(int pos=0;pos<n_positions;pos++){
-						exp_emit[tag][pos][context.get(pos)]+=p[tag]*count;
-					}
-					exp_pi[tag]+=p[tag]*count;
-				}
-			}
-			arr.F.l1normalize(exp_pi);
-			System.arraycopy(exp_pi, 0, pi[phrase], 0, K);
-		}
-
-		//M
-		for(double [][]i:exp_emit)
-			for(double []j:i)
-				arr.F.l1normalize(j);
-			
-		emit=exp_emit;
-
-		return loglikelihood;
-	}
-	
-	public double PREM(double scalePT, double scaleCT, int phraseSizeLimit)
-	{
-		if (scaleCT == 0)
-		{
-			if (pool != null)
-				return PREM_phrase_constraints_parallel(scalePT, phraseSizeLimit);
-			else
-				return PREM_phrase_constraints(scalePT, phraseSizeLimit);
-		}
-		else // FIXME: ignores phraseSizeLimit
-			return this.PREM_phrase_context_constraints(scalePT, scaleCT);
-	}
-
-	
-	public double PREM_phrase_constraints(double scalePT, int phraseSizeLimit)
-	{
-		double [][][]exp_emit=new double[K][n_positions][n_words];
-		double []exp_pi=new double[K];
-		
-		for(double [][]i:exp_emit)
-			for(double []j:i)
-				Arrays.fill(j, 1e-10);
-		
-		if (lambdaPT == null && cacheLambda)
-			lambdaPT = new double[n_phrases][];
-		
-		double loglikelihood=0, kl=0, l1lmax=0, primal=0;
-		int failures=0, iterations=0;
-		long start = System.currentTimeMillis();
-		//E
-		for(int phrase=0; phrase<n_phrases; phrase++)
-		{
-			if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit)
-			{
-				//System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K);
-				continue;
-			}
-			
-			Arrays.fill(exp_pi, 1e-10);
-			
-			// FIXME: add rare edge check to phrase objective & posterior processing
-			PhraseObjective po = new PhraseObjective(this, phrase, scalePT, (cacheLambda) ? lambdaPT[phrase] : null);
-			boolean ok = po.optimizeWithProjectedGradientDescent();
-			if (!ok) ++failures;
-			if (cacheLambda) lambdaPT[phrase] = po.getParameters();
-			iterations += po.getNumberUpdateCalls();
-			double [][] q=po.posterior();
-			loglikelihood += po.loglikelihood();
-			kl += po.KL_divergence();
-			l1lmax += po.l1lmax();
-			primal += po.primal(scalePT);
-			List<Edge> edges = c.getEdgesForPhrase(phrase);
-
-			for(int edge=0;edge<q.length;edge++){
-				Edge e = edges.get(edge);
-				TIntArrayList context = e.getContext();
-				double contextCnt = e.getCount();
-				//increment expected count
-				for(int tag=0;tag<K;tag++){
-					for(int pos=0;pos<n_positions;pos++){
-						exp_emit[tag][pos][context.get(pos)]+=q[edge][tag]*contextCnt;
-					}
-					
-					exp_pi[tag]+=q[edge][tag]*contextCnt;
-					
-				}
-			}
-			arr.F.l1normalize(exp_pi);
-			System.arraycopy(exp_pi, 0, pi[phrase], 0, K);
-		}
-		
-		long end = System.currentTimeMillis();
-		if (failures > 0)
-			System.out.println("WARNING: failed to converge in " + failures + "/" + n_phrases + " cases");
-		System.out.println("\tmean iters:     " + iterations/(double)n_phrases + " elapsed time " + (end - start) / 1000.0);
-		System.out.println("\tllh:            " + loglikelihood);
-		System.out.println("\tKL:             " + kl);
-		System.out.println("\tphrase l1lmax:  " + l1lmax);
-		
-		//M
-		for(double [][]i:exp_emit)
-			for(double []j:i)
-				arr.F.l1normalize(j);
-		emit=exp_emit;
-		
-		return primal;
-	}
-
-	public double PREM_phrase_constraints_parallel(final double scalePT, int phraseSizeLimit)
-	{
-		assert(pool != null);
-		
-		final LinkedBlockingQueue<PhraseObjective> expectations 
-			= new LinkedBlockingQueue<PhraseObjective>();
-		
-		double [][][]exp_emit=new double [K][n_positions][n_words];
-		double [][]exp_pi=new double[n_phrases][K];
-		
-		for(double [][]i:exp_emit)
-			for(double []j:i)
-				Arrays.fill(j, 1e-10);
-		for(double []j:exp_pi)
-			Arrays.fill(j, 1e-10);
-		
-		double loglikelihood=0, kl=0, l1lmax=0, primal=0;
-		final AtomicInteger failures = new AtomicInteger(0);
-		final AtomicLong elapsed = new AtomicLong(0l);
-		int iterations=0;
-		long start = System.currentTimeMillis();
-		List<Future<PhraseObjective>> results = new ArrayList<Future<PhraseObjective>>();
-		
-		if (lambdaPT == null && cacheLambda)
-			lambdaPT = new double[n_phrases][];
-
-		//E
-		for(int phrase=0;phrase<n_phrases;phrase++) {
-			if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit) {
-				System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K);
-				continue;
-			}
-
-			final int p=phrase;
-			results.add(pool.submit(new Callable<PhraseObjective>() {
-				public PhraseObjective call() {
-					//System.out.println("" + Thread.currentThread().getId() + " optimising lambda for " + p);
-					long start = System.currentTimeMillis();
-					PhraseObjective po = new PhraseObjective(PhraseCluster.this, p, scalePT, (cacheLambda) ? lambdaPT[p] : null);
-					boolean ok = po.optimizeWithProjectedGradientDescent();
-					if (!ok) failures.incrementAndGet();
-					long end = System.currentTimeMillis();
-					elapsed.addAndGet(end - start);
-					//System.out.println("" + Thread.currentThread().getId() + " done optimising lambda for " + p);
-					return po;
-				}
-			}));
-		}
-		
-		// aggregate the expectations as they become available
-		for (Future<PhraseObjective> fpo : results)
-		{
-			try {
-				//System.out.println("" + Thread.currentThread().getId() + " reading queue #" + count);
-
-				// wait (blocking) until something is ready
-				PhraseObjective po = fpo.get();
-				// process
-				int phrase = po.phrase;
-				if (cacheLambda) lambdaPT[phrase] = po.getParameters();
-				//System.out.println("" + Thread.currentThread().getId() + " taken phrase " + phrase);
-				double [][] q=po.posterior();
-				loglikelihood += po.loglikelihood();
-				kl += po.KL_divergence();
-				l1lmax += po.l1lmax();
-				primal += po.primal(scalePT);
-				iterations += po.getNumberUpdateCalls();
-
-				List<Edge> edges = c.getEdgesForPhrase(phrase);
-				for(int edge=0;edge<q.length;edge++){
-					Edge e = edges.get(edge);
-					TIntArrayList context = e.getContext();
-					double contextCnt = e.getCount();
-					//increment expected count
-					for(int tag=0;tag<K;tag++){
-						for(int pos=0;pos<n_positions;pos++){
-							exp_emit[tag][pos][context.get(pos)]+=q[edge][tag]*contextCnt;
-						}
-						exp_pi[phrase][tag]+=q[edge][tag]*contextCnt;
-					}
-				}
-			} catch (InterruptedException e) {
-				System.err.println("M-step thread interrupted. Probably fatal!");
-				throw new RuntimeException(e);
-			} catch (ExecutionException e) {
-				System.err.println("M-step thread execution died. Probably fatal!");
-				throw new RuntimeException(e);
-			}
-		}
-		
-		long end = System.currentTimeMillis();
-		
-		if (failures.get() > 0)
-			System.out.println("WARNING: failed to converge in " + failures.get() + "/" + n_phrases + " cases");
-		System.out.println("\tmean iters:     " + iterations/(double)n_phrases + " walltime " + (end-start)/1000.0 + " threads " + elapsed.get() / 1000.0);
-		System.out.println("\tllh:            " + loglikelihood);
-		System.out.println("\tKL:             " + kl);
-		System.out.println("\tphrase l1lmax:  " + l1lmax);
-		
-		//M
-		for(double [][]i:exp_emit)
-			for(double []j:i)
-				arr.F.l1normalize(j);
-		emit=exp_emit;
-		
-		for(double []j:exp_pi)
-			arr.F.l1normalize(j);
-		pi=exp_pi;
-		
-		return primal;
-	}
-	
-	public double PREM_phrase_context_constraints(double scalePT, double scaleCT)
-	{	
-		double[][][] exp_emit = new double [K][n_positions][n_words];
-		double[][] exp_pi = new double[n_phrases][K];
-
-		//E step
-		PhraseContextObjective pco = new PhraseContextObjective(this, lambdaPTCT, pool, scalePT, scaleCT);
-		boolean ok = pco.optimizeWithProjectedGradientDescent();
-		if (cacheLambda) lambdaPTCT = pco.getParameters();
-
-		//now extract expectations
-		List<Corpus.Edge> edges = c.getEdges();
-		for(int e = 0; e < edges.size(); ++e)
-		{
-			double [] q = pco.posterior(e);
-			Corpus.Edge edge = edges.get(e);
-
-			TIntArrayList context = edge.getContext();
-			double contextCnt = edge.getCount();
-			//increment expected count
-			for(int tag=0;tag<K;tag++)
-			{
-				for(int pos=0;pos<n_positions;pos++)
-					exp_emit[tag][pos][context.get(pos)]+=q[tag]*contextCnt;
-				exp_pi[edge.getPhraseId()][tag]+=q[tag]*contextCnt;
-			}
-		}
-		
-		System.out.println("\tllh:            " + pco.loglikelihood());
-		System.out.println("\tKL:             " + pco.KL_divergence());
-		System.out.println("\tphrase l1lmax:  " + pco.phrase_l1lmax());
-		System.out.println("\tcontext l1lmax: " + pco.context_l1lmax());
-		
-		//M step
-		for(double [][]i:exp_emit)
-			for(double []j:i)
-				arr.F.l1normalize(j);
-		emit=exp_emit;
-		
-		for(double []j:exp_pi)
-			arr.F.l1normalize(j);
-		pi=exp_pi;
-		
-		return pco.primal();
-	}	
-		
-	/**
-	 * @param phrase index of phrase
-	 * @param ctx array of context
-	 * @return unnormalized posterior
-	 */
-	public double[] posterior(Corpus.Edge edge) 
-	{
-		double[] prob;
-		
-		if(edge.getTag()>=0){
-			prob=new double[K];
-			prob[edge.getTag()]=1;
-			return prob;
-		}
-		
-		if (edge.getPhraseId() < n_phrases)
-			prob = Arrays.copyOf(pi[edge.getPhraseId()], K);
-		else
-		{
-			prob = new double[K];
-			Arrays.fill(prob, 1.0);
-		}
-		
-		TIntArrayList ctx = edge.getContext();
-		for(int tag=0;tag<K;tag++)
-		{
-			for(int c=0;c<n_positions;c++)
-			{
-				int word = ctx.get(c);
-				if (!this.c.isSentinel(word) && word < n_words)
-					prob[tag]*=emit[tag][c][word];
-			}
-		}
-		
-		return prob;
-	}
-	
-	public void displayPosterior(PrintStream ps, List<Edge> testing)
-	{	
-		for (Edge edge : testing)
-		{
-			double probs[] = posterior(edge);
-			arr.F.l1normalize(probs);
-
-			// emit phrase
-			ps.print(edge.getPhraseString());
-			ps.print("\t");
-			ps.print(edge.getContextString(true));
-			int t=arr.F.argmax(probs);
-			ps.println(" ||| C=" + t + " T=" + edge.getCount() + " P=" + probs[t]);
-			//ps.println("# probs " + Arrays.toString(probs));
-		}
-	}
-	
-	public void displayModelParam(PrintStream ps)
-	{
-		final double EPS = 1e-6;
-		ps.println("phrases " + n_phrases + " tags " + K + " positions " + n_positions);
-		
-		for (int i = 0; i < n_phrases; ++i)
-			for(int j=0;j<pi[i].length;j++)
-				if (pi[i][j] > EPS)
-					ps.println(i + " " + j + " " + pi[i][j]);
-
-		ps.println();
-		for (int i = 0; i < K; ++i)
-		{
-			for(int position=0;position<n_positions;position++)
-			{
-				for(int word=0;word<emit[i][position].length;word++)
-				{
-					if (emit[i][position][word] > EPS)
-						ps.println(i + " " + position + " " + word + " " + emit[i][position][word]);
-				}
-			}
-		}
-	}
-	
-	double phrase_l1lmax()
-	{
-		double sum=0;
-		for(int phrase=0; phrase<n_phrases; phrase++)
-		{
-			double [] maxes = new double[K];
-			for (Edge edge : c.getEdgesForPhrase(phrase))
-			{
-				double p[] = posterior(edge);
-				arr.F.l1normalize(p);
-				for(int tag=0;tag<K;tag++)
-					maxes[tag] = Math.max(maxes[tag], p[tag]);
-			}
-			for(int tag=0;tag<K;tag++)
-				sum += maxes[tag];
-		}
-		return sum;
-	}
-
-	double context_l1lmax()
-	{
-		double sum=0;
-		for(int context=0; context<n_contexts; context++)
-		{
-			double [] maxes = new double[K];
-			for (Edge edge : c.getEdgesForContext(context))
-			{
-				double p[] = posterior(edge);
-				arr.F.l1normalize(p);
-				for(int tag=0;tag<K;tag++)
-					maxes[tag] = Math.max(maxes[tag], p[tag]);
-			}
-			for(int tag=0;tag<K;tag++)
-				sum += maxes[tag];
-		}
-		return sum;
-	}
-
-	public void loadParameters(BufferedReader input) throws IOException
-	{	
-		final double EPS = 1e-50;
-		
-		// overwrite pi, emit with ~zeros
-		for(double [][]i:emit)
-			for(double []j:i)
-				Arrays.fill(j, EPS);
-
-		for(double []j:pi)
-			Arrays.fill(j, EPS);
-
-		String line = input.readLine();
-		assert line != null;
-
-		Pattern space = Pattern.compile(" +");
-		String[] parts = space.split(line);
-		assert parts.length == 6;
-
-		assert parts[0].equals("phrases");
-		int phrases = Integer.parseInt(parts[1]);
-		int tags = Integer.parseInt(parts[3]);
-		int positions = Integer.parseInt(parts[5]);
-		
-		assert phrases == n_phrases;
-		assert tags == K;
-		assert positions == n_positions;
-
-		// read in pi
-		while ((line = input.readLine()) != null)
-		{
-			line = line.trim();
-			if (line.isEmpty()) break;
-			
-			String[] tokens = space.split(line);
-			assert tokens.length == 3;
-			int p = Integer.parseInt(tokens[0]);
-			int t = Integer.parseInt(tokens[1]);
-			double v = Double.parseDouble(tokens[2]);
-
-			pi[p][t] = v;
-		}
-		
-		// read in emissions
-		while ((line = input.readLine()) != null)
-		{
-			String[] tokens = space.split(line);
-			assert tokens.length == 4;
-			int t = Integer.parseInt(tokens[0]);
-			int p = Integer.parseInt(tokens[1]);
-			int w = Integer.parseInt(tokens[2]);
-			double v = Double.parseDouble(tokens[3]);
-
-			emit[t][p][w] = v;
-		}
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java
deleted file mode 100644
index 646ff392..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java
+++ /dev/null
@@ -1,436 +0,0 @@
-package phrase;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Future;
-
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-import optimization.util.MathUtils;
-import phrase.Corpus.Edge;
-
-public class PhraseContextObjective extends ProjectedObjective
-{
-	private static final double GRAD_DIFF = 0.00002;
-	private static double INIT_STEP_SIZE = 300;
-	private static double VAL_DIFF = 1e-8;
-	private static int ITERATIONS = 20;
-	boolean debug = false;
-	
-	private PhraseCluster c;
-	
-	// un-regularized unnormalized posterior, p[edge][tag]
-	// P(tag|edge) \propto P(tag|phrase)P(context|tag)
-	private double p[][];
-
-	// regularized unnormalized posterior 
-	// q[edge][tag] propto p[edge][tag]*exp(-lambda)
-	private double q[][];
-	private List<Corpus.Edge> data;
-	
-	// log likelihood under q
-	private double loglikelihood;
-	private SimplexProjection projectionPhrase;
-	private SimplexProjection projectionContext;
-	
-	double[] newPoint;
-	private int n_param;
-	
-	// likelihood under p
-	public double llh;
-	
-	private static Map<Corpus.Edge, Integer> edgeIndex;
-	
-	private long projectionTime;
-	private long objectiveTime;
-	private long actualProjectionTime;
-	private ExecutorService pool;
-	
-	double scalePT;
-	double scaleCT;
-	
-	public PhraseContextObjective(PhraseCluster cluster, double[] startingParameters, ExecutorService pool,
-			double scalePT, double scaleCT)
-	{
-		c=cluster;
-		data=c.c.getEdges();
-		n_param=data.size()*c.K*2;
-		this.pool=pool;
-		this.scalePT = scalePT;
-		this.scaleCT = scaleCT;
-		
-		parameters = startingParameters;
-		if (parameters == null)
-			parameters = new double[n_param];
-		
-		System.out.println("Num parameters " + n_param);
-		newPoint = new double[n_param];
-		gradient = new double[n_param];
-		initP();
-		projectionPhrase = new SimplexProjection(scalePT);
-		projectionContext = new SimplexProjection(scaleCT);
-		q=new double [data.size()][c.K];
-		
-		if (edgeIndex == null) {
-			edgeIndex = new HashMap<Edge, Integer>();
-			for (int e=0; e<data.size(); e++)
-			{
-				edgeIndex.put(data.get(e), e);
-				//if (debug) System.out.println("Edge " + data.get(e) + " index " + e);
-			}
-		}
-		
-		setParameters(parameters);
-	}
-
-	private void initP(){
-		p=new double[data.size()][];
-		for(int edge=0;edge<data.size();edge++)
-		{
-			p[edge]=c.posterior(data.get(edge));
-			llh += data.get(edge).getCount() * Math.log(arr.F.l1norm(p[edge]));
-			arr.F.l1normalize(p[edge]);
-		}
-	}
-	
-	@Override
-	public void setParameters(double[] params) {
-		//System.out.println("setParameters " + Arrays.toString(parameters));
-		// TODO: test if params have changed and skip update otherwise
-		super.setParameters(params);
-		updateFunction();
-	}
-	
-	private void updateFunction()
-	{
-		updateCalls++;
-		loglikelihood=0;
-
-		System.out.print(".");
-		System.out.flush();
-
-		long begin = System.currentTimeMillis();
-		for (int e=0; e<data.size(); e++) 
-		{
-			Edge edge = data.get(e);
-			for(int tag=0; tag<c.K; tag++)
-			{
-				int ip = index(e, tag, true);
-				int ic = index(e, tag, false);
-				q[e][tag] = p[e][tag]*
-					Math.exp((-parameters[ip]-parameters[ic]) / edge.getCount());
-				//if (debug)
-					//System.out.println("\tposterior " + edge + " with tag " + tag + " p " + p[e][tag] + " params " + parameters[ip] + " and " + parameters[ic] + " q " + q[e][tag]);
-			}
-		}
-	
-		for(int edge=0;edge<data.size();edge++) {
-			loglikelihood+=data.get(edge).getCount() * Math.log(arr.F.l1norm(q[edge]));
-			arr.F.l1normalize(q[edge]);
-		}
-		
-		for (int e=0; e<data.size(); e++) 
-		{
-			for(int tag=0; tag<c.K; tag++)
-			{
-				int ip = index(e, tag, true);
-				int ic = index(e, tag, false);
-				gradient[ip]=-q[e][tag];
-				gradient[ic]=-q[e][tag];
-			}
-		}
-		//if (debug) {
-			//System.out.println("objective " + loglikelihood + " ||gradient||_2: " + arr.F.l2norm(gradient));		
-			//System.out.println("gradient " + Arrays.toString(gradient));
-		//}
-		objectiveTime += System.currentTimeMillis() - begin;
-	}
-	
-	@Override
-	public double[] projectPoint(double[] point) 
-	{
-		long begin = System.currentTimeMillis();
-		List<Future<?>> tasks = new ArrayList<Future<?>>();
-		
-		System.out.print(",");
-		System.out.flush();
-
-		Arrays.fill(newPoint, 0, newPoint.length, 0);
-		
-		// first project using the phrase-tag constraints,
-		// for all p,t: sum_c lambda_ptc < scaleP 
-		if (pool == null)
-		{
-			for (int p = 0; p < c.c.getNumPhrases(); ++p)
-			{
-				List<Edge> edges = c.c.getEdgesForPhrase(p);
-				double[] toProject = new double[edges.size()];
-				for(int tag=0;tag<c.K;tag++)
-				{
-					// FIXME: slow hash lookup for e (twice)
-					for(int e=0; e<edges.size(); e++) 						
-						toProject[e] = point[index(edges.get(e), tag, true)];
-					long lbegin = System.currentTimeMillis();
-					projectionPhrase.project(toProject);
-					actualProjectionTime += System.currentTimeMillis() - lbegin;
-					for(int e=0; e<edges.size(); e++)
-						newPoint[index(edges.get(e), tag, true)] = toProject[e];
-				}
-			}
-		}
-		else // do above in parallel using thread pool
-		{	
-			for (int p = 0; p < c.c.getNumPhrases(); ++p)
-			{
-				final int phrase = p;
-				final double[] inPoint = point;
-				Runnable task = new Runnable()
-				{
-					public void run()
-					{
-						List<Edge> edges = c.c.getEdgesForPhrase(phrase);
-						double toProject[] = new double[edges.size()];
-						for(int tag=0;tag<c.K;tag++)
-						{
-							// FIXME: slow hash lookup for e
-							for(int e=0; e<edges.size(); e++)
-								toProject[e] = inPoint[index(edges.get(e), tag, true)];
-							projectionPhrase.project(toProject);
-							for(int e=0; e<edges.size(); e++)
-								newPoint[index(edges.get(e), tag, true)] = toProject[e];
-						}
-					}		
-				};
-				tasks.add(pool.submit(task));
-			}
-		}
-		//System.out.println("after PT " + Arrays.toString(newPoint));
-	
-		// now project using the context-tag constraints,
-		// for all c,t: sum_p omega_pct < scaleC
-		if (pool == null)
-		{
-			for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
-			{
-				List<Edge> edges = c.c.getEdgesForContext(ctx);
-				double toProject[] = new double[edges.size()];
-				for(int tag=0;tag<c.K;tag++)
-				{
-					// FIXME: slow hash lookup for e
-					for(int e=0; e<edges.size(); e++)
-						toProject[e] = point[index(edges.get(e), tag, false)];
-					long lbegin = System.currentTimeMillis();
-					projectionContext.project(toProject);
-					actualProjectionTime += System.currentTimeMillis() - lbegin;
-					for(int e=0; e<edges.size(); e++)
-						newPoint[index(edges.get(e), tag, false)] = toProject[e];
-				}
-			}
-		}
-		else
-		{
-			// do above in parallel using thread pool
-			for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
-			{
-				final int context = ctx;
-				final double[] inPoint = point;
-				Runnable task = new Runnable()
-				{
-					public void run()
-					{
-						List<Edge> edges = c.c.getEdgesForContext(context);
-						double toProject[] = new double[edges.size()];
-						for(int tag=0;tag<c.K;tag++)
-						{
-							// FIXME: slow hash lookup for e
-							for(int e=0; e<edges.size(); e++)
-								toProject[e] = inPoint[index(edges.get(e), tag, false)];
-							projectionContext.project(toProject);
-							for(int e=0; e<edges.size(); e++)
-								newPoint[index(edges.get(e), tag, false)] = toProject[e];
-						}
-					}
-				};
-				tasks.add(pool.submit(task));
-			}
-		}
-		
-		if (pool != null)
-		{
-			// wait for all the jobs to complete
-			Exception failure = null;
-			for (Future<?> task: tasks)
-			{
-				try {
-					task.get();
-				} catch (InterruptedException e) {
-					System.err.println("ERROR: Projection thread interrupted");
-					e.printStackTrace();
-					failure = e;
-				} catch (ExecutionException e) {
-					System.err.println("ERROR: Projection thread died");
-					e.printStackTrace();
-					failure = e;
-				}
-			}
-			// rethrow the exception
-			if (failure != null)
-			{
-				pool.shutdownNow();
-				throw new RuntimeException(failure);
-			}
-		}
-		
-		double[] tmp = newPoint;
-		newPoint = point;
-		projectionTime += System.currentTimeMillis() - begin;
-		
-		//if (debug)
-			//System.out.println("\t\treturning " + Arrays.toString(tmp));
-		return tmp;
-	}
-	
-	private int index(Edge edge, int tag, boolean phrase)
-	{
-		// NB if indexing changes must also change code in updateFunction and constructor
-		if (phrase)
-			return tag * edgeIndex.size() + edgeIndex.get(edge);
-		else
-			return (c.K + tag) * edgeIndex.size() + edgeIndex.get(edge);
-	}
-
-	private int index(int e, int tag, boolean phrase)
-	{
-		// NB if indexing changes must also change code in updateFunction and constructor
-		if (phrase)
-			return tag * edgeIndex.size() + e;
-		else
-			return (c.K + tag) * edgeIndex.size() + e;
-	}
-	
-	@Override
-	public double[] getGradient() {
-		gradientCalls++;
-		return gradient;
-	}
-
-	@Override
-	public double getValue() {
-		functionCalls++;
-		return loglikelihood;
-	}
-
-	@Override
-	public String toString() {
-		return "No need for pointless toString";
-	}
-
-	public double []posterior(int edgeIndex){
-		return q[edgeIndex];
-	}
-	
-	public boolean optimizeWithProjectedGradientDescent()
-	{
-		projectionTime = 0;
-		actualProjectionTime = 0;
-		objectiveTime = 0;
-		long start = System.currentTimeMillis();
-
-		LineSearchMethod ls =
-			new ArmijoLineSearchMinimizationAlongProjectionArc
-				(new InterpolationPickFirstStep(INIT_STEP_SIZE));
-		//LineSearchMethod  ls = new WolfRuleLineSearch(
-		//		(new InterpolationPickFirstStep(INIT_STEP_SIZE)), c1, c2);
-		OptimizerStats stats = new OptimizerStats();
-		
-		
-		ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
-		StopingCriteria stopGrad = new ProjectedGradientL2Norm(GRAD_DIFF);
-		StopingCriteria stopValue = new ValueDifference(VAL_DIFF*(-llh));
-		CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
-		compositeStop.add(stopGrad);
-		compositeStop.add(stopValue);
-		optimizer.setMaxIterations(ITERATIONS);
-		updateFunction();
-		boolean success = optimizer.optimize(this,stats,compositeStop);
-
-		System.out.println();
-		System.out.println(stats.prettyPrint(1));
-		
-		if (success)
-			System.out.print("\toptimization took " + optimizer.getCurrentIteration() + " iterations");
-	 	else
-			System.out.print("\toptimization failed to converge");
-		long total = System.currentTimeMillis() - start;
-		System.out.println(" and " + total + " ms: projection " + projectionTime + 
-				" actual " + actualProjectionTime + " objective " + objectiveTime);
-
-		return success;
-	}
-	
-	double loglikelihood()
-	{
-		return llh;
-	}
-	
-	double KL_divergence()
-	{
-		return -loglikelihood + MathUtils.dotProduct(parameters, gradient);
-	}
-	
-	double phrase_l1lmax()
-	{
-		// \sum_{tag,phrase} max_{context} P(tag|context,phrase)
-		double sum=0;
-		for (int p = 0; p < c.c.getNumPhrases(); ++p)
-		{
-			List<Edge> edges = c.c.getEdgesForPhrase(p);
-			for(int tag=0;tag<c.K;tag++)
-			{
-				double max=0;
-				for (Edge edge: edges)
-					max = Math.max(max, q[edgeIndex.get(edge)][tag]);
-				sum+=max;
-			}	
-		}
-		return sum;
-	}
-	
-	double context_l1lmax()
-	{
-		// \sum_{tag,context} max_{phrase} P(tag|context,phrase)
-		double sum=0;
-		for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
-		{
-			List<Edge> edges = c.c.getEdgesForContext(ctx);
-			for(int tag=0; tag<c.K; tag++)
-			{
-				double max=0;
-				for (Edge edge: edges)
-					max = Math.max(max, q[edgeIndex.get(edge)][tag]);
-				sum+=max;
-			}	
-		}
-		return sum;
-	}
-	
-	// L - KL(q||p) - scalePT * l1lmax_phrase - scaleCT * l1lmax_context
-	public double primal()
-	{
-		return loglikelihood() - KL_divergence() - scalePT * phrase_l1lmax() - scaleCT * context_l1lmax();
-	}
-}
-\ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java
deleted file mode 100644
index 0cf31c1c..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java
+++ /dev/null
@@ -1,193 +0,0 @@
-package phrase;
-
-import io.FileUtil;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Scanner;
-
-public class PhraseCorpus 
-{
-	public HashMap<String,Integer>wordLex;
-	public HashMap<String,Integer>phraseLex;
-	
-	public String wordList[];
-	public String phraseList[];
-	
-	//data[phrase][num context][position]
-	public int data[][][];
-	public int numContexts;	
-
-	public PhraseCorpus(String filename) throws FileNotFoundException, IOException
-	{
-		BufferedReader r = FileUtil.reader(new File(filename));
-		
-		phraseLex=new HashMap<String,Integer>();
-		wordLex=new HashMap<String,Integer>();
-		
-		ArrayList<int[][]>dataList=new ArrayList<int[][]>();
-		String line=null;
-		numContexts = 0;
-		
-		while((line=readLine(r))!=null){
-			
-			String toks[]=line.split("\t");
-			String phrase=toks[0];
-			addLex(phrase,phraseLex);
-			
-			toks=toks[1].split(" \\|\\|\\| ");
-			
-			ArrayList <int[]>ctxList=new ArrayList<int[]>();
-			
-			for(int i=0;i<toks.length;i+=2){
-				String ctx=toks[i];
-				String words[]=ctx.split(" ");
-				if (numContexts == 0)
-					numContexts = words.length - 1;
-				else
-					assert numContexts == words.length - 1;
-				
-				int []context=new int [numContexts+1];
-				int idx=0;
-				for(String word:words){
-					if(word.equals("<PHRASE>")){
-						continue;
-					}
-					addLex(word,wordLex);
-					context[idx]=wordLex.get(word);
-					idx++;
-				}
-				
-				String count=toks[i+1];
-				context[idx]=Integer.parseInt(count.trim().substring(2));
-				
-				ctxList.add(context);
-			}
-			
-			dataList.add(ctxList.toArray(new int [0][]));
-			
-		}
-		try{
-			r.close();
-		}catch(IOException ioe){
-			ioe.printStackTrace();
-		}
-		data=dataList.toArray(new int[0][][]);
-	}
-
-	private void addLex(String key, HashMap<String,Integer>lex){
-		Integer i=lex.get(key);
-		if(i==null){
-			lex.put(key, lex.size());
-		}
-	}
-	
-	//for debugging
-	public void saveLex(String lexFilename) throws FileNotFoundException, IOException
-	{
-		PrintStream ps = FileUtil.printstream(new File(lexFilename));
-		ps.println("Phrase Lexicon");
-		ps.println(phraseLex.size());
-		printDict(phraseLex,ps);
-		
-		ps.println("Word Lexicon");
-		ps.println(wordLex.size());
-		printDict(wordLex,ps);
-		ps.close();
-	}
-	
-	private static void printDict(HashMap<String,Integer>lex,PrintStream ps){
-		String []dict=buildList(lex);
-		for(int i=0;i<dict.length;i++){
-			ps.println(dict[i]);
-		}
-	}
-	
-	public void loadLex(String lexFilename){
-		Scanner sc=io.FileUtil.openInFile(lexFilename);
-		
-		sc.nextLine();
-		int size=sc.nextInt();
-		sc.nextLine();
-		String[]dict=new String[size];
-		for(int i=0;i<size;i++){
-			dict[i]=sc.nextLine();
-		}
-		phraseLex=buildMap(dict);
-
-		sc.nextLine();
-		size=sc.nextInt();
-		sc.nextLine();
-		dict=new String[size];
-		for(int i=0;i<size;i++){
-			dict[i]=sc.nextLine();
-		}
-		wordLex=buildMap(dict);
-		sc.close();
-	}
-	
-	private HashMap<String, Integer> buildMap(String[]dict){
-		HashMap<String,Integer> map=new HashMap<String,Integer>();
-		for(int i=0;i<dict.length;i++){
-			map.put(dict[i], i);
-		}
-		return map;
-	}
-	
-	public void buildList(){
-		if(wordList==null){
-			wordList=buildList(wordLex);
-			phraseList=buildList(phraseLex);
-		}
-	}
-	
-	private static String[]buildList(HashMap<String,Integer>lex){
-		String dict[]=new String [lex.size()];
-		for(String key:lex.keySet()){
-			dict[lex.get(key)]=key;
-		}
-		return dict;
-	}
-	
-	public String getContextString(int context[], boolean addPhraseMarker)
-	{
-		StringBuffer b = new StringBuffer();
-		for (int i=0;i<context.length-1;i++)
-		{
-			if (b.length() > 0)
-				b.append(" ");
-
-			if (i == context.length/2)
-				b.append("<PHRASE> ");
-			
-			b.append(wordList[context[i]]);
-		}
-		return b.toString();
-	}
-	
-	public static String readLine(BufferedReader r){
-		try{
-			return r.readLine();
-		}
-		catch(IOException ioe){
-			ioe.printStackTrace();
-		}
-		return null;
-	}
-
-	public static void main(String[] args) throws Exception 
-	{
-		String LEX_FILENAME="../pdata/lex.out";
-		String DATA_FILENAME="../pdata/btec.con";
-		PhraseCorpus c=new PhraseCorpus(DATA_FILENAME);
-		c.saveLex(LEX_FILENAME);
-		c.loadLex(LEX_FILENAME);
-		c.saveLex(LEX_FILENAME);
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java
deleted file mode 100644
index ac73a075..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java
+++ /dev/null
@@ -1,224 +0,0 @@
-package phrase;
-
-import java.util.Arrays;
-import java.util.List;
-
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.linesearch.WolfRuleLineSearch;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-import optimization.util.MathUtils;
-
-public class PhraseObjective extends ProjectedObjective
-{
-	static final double GRAD_DIFF = 0.00002;
-	static double INIT_STEP_SIZE = 300;
-	static double VAL_DIFF = 1e-8; // tuned to BTEC subsample
-	static int ITERATIONS = 100;
-	private PhraseCluster c;
-	
-	/**@brief
-	 *  for debugging purposes
-	 */
-	//public static PrintStream ps;
-	
-	/**@brief current phrase being optimzed*/
-	public int phrase;
-
-	/**@brief un-regularized posterior
-	 * unnormalized
-	 * p[edge][tag]
-	*  P(tag|edge) \propto P(tag|phrase)P(context|tag)
-	 */
-	private double[][]p;
-
-	/**@brief regularized posterior
-	 * q[edge][tag] propto p[edge][tag]*exp(-lambda)
-	 */
-	private double q[][];
-	private List<Corpus.Edge> data;
-	
-	/**@brief log likelihood of the associated phrase
-	 * 
-	 */
-	private double loglikelihood;
-	private SimplexProjection projection;
-	
-	double[] newPoint  ;
-	
-	private int n_param;
-	
-	/**@brief likelihood under p
-	 * 
-	 */
-	public double llh;
-	
-	public PhraseObjective(PhraseCluster cluster, int phraseIdx, double scale, double[] lambda){
-		phrase=phraseIdx;
-		c=cluster;
-		data=c.c.getEdgesForPhrase(phrase);
-		n_param=data.size()*c.K;
-		//System.out.println("Num parameters " + n_param + " for phrase #" + phraseIdx);
-		
-		if (lambda==null) 
-			lambda=new double[n_param];
-		
-		parameters = lambda;
-		newPoint = new double[n_param];
-		gradient = new double[n_param];
-		initP();
-		projection=new SimplexProjection(scale);
-		q=new double [data.size()][c.K];
-
-		setParameters(parameters);
-	}
-
-	private void initP(){
-		p=new double[data.size()][];
-		for(int edge=0;edge<data.size();edge++){
-			p[edge]=c.posterior(data.get(edge));
-			llh += data.get(edge).getCount() * Math.log(arr.F.l1norm(p[edge])); // Was bug here - count inside log!
-			arr.F.l1normalize(p[edge]);
-		}
-	}
-	
-	@Override
-	public void setParameters(double[] params) {
-		super.setParameters(params);
-		updateFunction();
-	}
-	
-	private void updateFunction(){
-		updateCalls++;
-		loglikelihood=0;
-
-		for(int tag=0;tag<c.K;tag++){
-			for(int edge=0;edge<data.size();edge++){
-				q[edge][tag]=p[edge][tag]*
-					Math.exp(-parameters[tag*data.size()+edge]/data.get(edge).getCount());
-			}
-		}
-	
-		for(int edge=0;edge<data.size();edge++){
-			loglikelihood+=data.get(edge).getCount() * Math.log(arr.F.l1norm(q[edge]));
-			arr.F.l1normalize(q[edge]);
-		}
-		
-		for(int tag=0;tag<c.K;tag++){
-			for(int edge=0;edge<data.size();edge++){
-				gradient[tag*data.size()+edge]=-q[edge][tag];
-			}
-		}
-	}
-	
-	@Override
-	public double[] projectPoint(double[] point) 
-	{
-		double toProject[]=new double[data.size()];
-		for(int tag=0;tag<c.K;tag++){
-			for(int edge=0;edge<data.size();edge++){
-				toProject[edge]=point[tag*data.size()+edge];
-			}
-			projection.project(toProject);
-			for(int edge=0;edge<data.size();edge++){
-				newPoint[tag*data.size()+edge]=toProject[edge];
-			}
-		}
-		return newPoint;
-	}
-
-	@Override
-	public double[] getGradient() {
-		gradientCalls++;
-		return gradient;
-	}
-
-	@Override
-	public double getValue() {
-		functionCalls++;
-		return loglikelihood;
-	}
-
-	@Override
-	public String toString() {
-		return Arrays.toString(parameters);
-	}
-
-	public double [][]posterior(){
-		return q;
-	}
-	
-	long optimizationTime;
-	
-	public boolean optimizeWithProjectedGradientDescent(){
-		long start = System.currentTimeMillis();
-		
-		LineSearchMethod ls =
-			new ArmijoLineSearchMinimizationAlongProjectionArc
-				(new InterpolationPickFirstStep(INIT_STEP_SIZE));
-		//LineSearchMethod  ls = new WolfRuleLineSearch(
-		//		(new InterpolationPickFirstStep(INIT_STEP_SIZE)), c1, c2);
-		OptimizerStats stats = new OptimizerStats();
-		
-		
-		ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
-		StopingCriteria stopGrad = new ProjectedGradientL2Norm(GRAD_DIFF);
-		StopingCriteria stopValue = new ValueDifference(VAL_DIFF*(-llh));
-		CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
-		compositeStop.add(stopGrad);
-		compositeStop.add(stopValue);
-		optimizer.setMaxIterations(ITERATIONS);
-		updateFunction();
-		boolean success = optimizer.optimize(this,stats,compositeStop);
-		//System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
-		//if(succed){
-			//System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
-		//}else{
-//			System.out.println("Failed to optimize");
-		//}
-		//System.out.println(Arrays.toString(parameters));
-		
-		//	for(int edge=0;edge<data.getSize();edge++){
-		//	ps.println(Arrays.toString(q[edge]));
-		//	}
-
-		return success;
-	}
-	
-	public double KL_divergence()
-	{
-		return -loglikelihood + MathUtils.dotProduct(parameters, gradient);
-	}
-	
-	public double loglikelihood()
-	{
-		return llh;
-	}
-	
-	public double l1lmax()
-	{
-		double sum=0;
-		for(int tag=0;tag<c.K;tag++){
-			double max=0;
-			for(int edge=0;edge<data.size();edge++){
-				if(q[edge][tag]>max)
-					max=q[edge][tag];
-			}
-			sum+=max;
-		}
-		return sum;
-	}
-
-	public double primal(double scale)
-	{
-		return loglikelihood() - KL_divergence() - scale * l1lmax();	
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java b/gi/posterior-regularisation/prjava/src/phrase/Trainer.java
deleted file mode 100644
index 6f302b20..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java
+++ /dev/null
@@ -1,257 +0,0 @@
-package phrase;
-
-import io.FileUtil;
-import joptsimple.OptionParser;
-import joptsimple.OptionSet;
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.List;
-import java.util.Random;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-
-import phrase.Corpus.Edge;
-
-import arr.F;
-
-public class Trainer 
-{
-	public static void main(String[] args) 
-	{
-        OptionParser parser = new OptionParser();
-        parser.accepts("help");
-        parser.accepts("in").withRequiredArg().ofType(File.class);
-        parser.accepts("in1").withRequiredArg().ofType(File.class);
-        parser.accepts("test").withRequiredArg().ofType(File.class);
-        parser.accepts("out").withRequiredArg().ofType(File.class);
-        parser.accepts("start").withRequiredArg().ofType(File.class);
-        parser.accepts("parameters").withRequiredArg().ofType(File.class);
-        parser.accepts("topics").withRequiredArg().ofType(Integer.class).defaultsTo(5);
-        parser.accepts("iterations").withRequiredArg().ofType(Integer.class).defaultsTo(10);
-        parser.accepts("threads").withRequiredArg().ofType(Integer.class).defaultsTo(0);
-        parser.accepts("scale-phrase").withRequiredArg().ofType(Double.class).defaultsTo(0.0);
-        parser.accepts("scale-context").withRequiredArg().ofType(Double.class).defaultsTo(0.0);
-        parser.accepts("seed").withRequiredArg().ofType(Long.class).defaultsTo(0l);
-        parser.accepts("convergence-threshold").withRequiredArg().ofType(Double.class).defaultsTo(1e-6);
-        parser.accepts("variational-bayes");
-        parser.accepts("alpha-emit").withRequiredArg().ofType(Double.class).defaultsTo(0.1);
-        parser.accepts("alpha-pi").withRequiredArg().ofType(Double.class).defaultsTo(0.0001);
-        parser.accepts("agree-direction");
-        parser.accepts("agree-language");
-        parser.accepts("no-parameter-cache");
-        parser.accepts("skip-large-phrases").withRequiredArg().ofType(Integer.class).defaultsTo(5);
-        OptionSet options = parser.parse(args);
-
-        if (options.has("help") || !options.has("in"))
-        {
-        	try {
-				parser.printHelpOn(System.err);
-			} catch (IOException e) {
-				System.err.println("This should never happen.");
-				e.printStackTrace();
-			}
-        	System.exit(1);     
-        }
-		
-		int tags = (Integer) options.valueOf("topics");
-		int iterations = (Integer) options.valueOf("iterations");
-		double scale_phrase = (Double) options.valueOf("scale-phrase");
-		double scale_context = (Double) options.valueOf("scale-context");
-		int threads = (Integer) options.valueOf("threads");
-		double threshold = (Double) options.valueOf("convergence-threshold");
-		boolean vb = options.has("variational-bayes");
-		double alphaEmit = (vb) ? (Double) options.valueOf("alpha-emit") : 0;
-		double alphaPi = (vb) ? (Double) options.valueOf("alpha-pi") : 0;
-		int skip = (Integer) options.valueOf("skip-large-phrases");
-		
-		if (options.has("seed"))
-			F.rng = new Random((Long) options.valueOf("seed"));
-		
-		ExecutorService threadPool = null;
-		if (threads > 0)
-			threadPool = Executors.newFixedThreadPool(threads);			
-		
-		if (tags <= 1 || scale_phrase < 0 || scale_context < 0 || threshold < 0)
-		{
-			System.err.println("Invalid arguments. Try again!");
-			System.exit(1);
-		}
-		
-		Corpus corpus = null;
-		File infile = (File) options.valueOf("in");
-		Corpus corpus1 = null;
-		File infile1 = (File) options.valueOf("in1");
-		try {
-			System.out.println("Reading concordance from " + infile);
-			corpus = Corpus.readFromFile(FileUtil.reader(infile));
-			corpus.printStats(System.out);
-			if(options.has("in1")){
-				corpus1 = Corpus.readFromFile(FileUtil.reader(infile1));
-				corpus1.printStats(System.out);
-			}
-		} catch (IOException e) {
-			System.err.println("Failed to open input file: " + infile);
-			e.printStackTrace();
-			System.exit(1);
-		}
-				
-		if (!(options.has("agree-direction")||options.has("agree-language")))
-			System.out.println("Running with " + tags + " tags " +
-					"for " + iterations + " iterations " +
-					((skip > 0) ? "skipping large phrases for first " + skip + " iterations " : "") +
-					"with scale " + scale_phrase + " phrase and " + scale_context + " context " +
-					"and " + threads + " threads");
-		else
-			System.out.println("Running agreement model with " + tags + " tags " +
-	 				"for " + iterations);
-
-	 	System.out.println();
-		
- 		PhraseCluster cluster = null;
- 		Agree2Sides agree2sides = null;
- 		Agree agree= null;
- 		VB vbModel=null;
- 		if (options.has("agree-language"))
- 			agree2sides = new Agree2Sides(tags, corpus,corpus1);
- 		else if (options.has("agree-direction"))
- 			agree = new Agree(tags, corpus);
- 		else
- 		{
- 			if (vb)	
- 			{
- 				vbModel=new VB(tags,corpus);
- 				vbModel.alpha=alphaPi;
- 				vbModel.lambda=alphaEmit;
- 	 			if (threadPool != null) vbModel.useThreadPool(threadPool);
- 			} 
- 			else 
- 			{
- 				cluster = new PhraseCluster(tags, corpus);
- 	 			if (threadPool != null) cluster.useThreadPool(threadPool);
- 				
-	 			if (options.has("no-parameter-cache")) 
-	 				cluster.cacheLambda = false;
-	 			if (options.has("start"))
-	 			{
-	 				try {
-						System.err.println("Reading starting parameters from " + options.valueOf("start"));
-						cluster.loadParameters(FileUtil.reader((File)options.valueOf("start")));
-					} catch (IOException e) {
-						System.err.println("Failed to open input file: " + options.valueOf("start"));
-						e.printStackTrace();
-					}
-	 			}
- 			}
- 		}
-				
-		double last = 0;
-		for (int i=0; i < iterations; i++)
-		{
-			double o;
-			if (agree != null)
-				o = agree.EM();
-			else if(agree2sides!=null)
-				o = agree2sides.EM();
-			else
-			{
-				if (i < skip)
-					System.out.println("Skipping phrases of length > " + (i+1));
-				
-				if (scale_phrase <= 0 && scale_context <= 0)
-				{
-					if (!vb)
-						o = cluster.EM((i < skip) ? i+1 : 0);
-					else
-						o = vbModel.EM();	
-				}
-				else
-					o = cluster.PREM(scale_phrase, scale_context, (i < skip) ? i+1 : 0);
-			}
-			
-			System.out.println("ITER: "+i+" objective: " + o);
-			
-			// sometimes takes a few iterations to break the ties
-			if (i > 5 && Math.abs((o - last) / o) < threshold)
-			{
-				last = o;
-				break;
-			}
-			last = o;
-		}
-		
-		double pl1lmax = 0, cl1lmax = 0;
-		if (cluster != null)
-		{
-			pl1lmax = cluster.phrase_l1lmax();
-			cl1lmax = cluster.context_l1lmax();
-		}
-		else if (agree != null)
-		{
-			// fairly arbitrary choice of model1 cf model2
-			pl1lmax = agree.model1.phrase_l1lmax();
-			cl1lmax = agree.model1.context_l1lmax();
-		}
-		else if (agree2sides != null)
-		{
-			// fairly arbitrary choice of model1 cf model2
-			pl1lmax = agree2sides.model1.phrase_l1lmax();
-			cl1lmax = agree2sides.model1.context_l1lmax();
-		}
-
-		System.out.println("\nFinal posterior phrase l1lmax " + pl1lmax + " context l1lmax " + cl1lmax);
-		
-		if (options.has("out"))
-		{
-			File outfile = (File) options.valueOf("out");
-			try {
-				PrintStream ps = FileUtil.printstream(outfile);
-				List<Edge> test;
-				if (!options.has("test")) // just use the training
-					test = corpus.getEdges();
-				else
-				{	// if --test supplied, load up the file
-					infile = (File) options.valueOf("test");
-					System.out.println("Reading testing concordance from " + infile);
-					test = corpus.readEdges(FileUtil.reader(infile));
-				}
-				if(vb) {
-					assert !options.has("test");
-					vbModel.displayPosterior(ps);
-				} else if (cluster != null) 
-					cluster.displayPosterior(ps, test);
-				else if (agree != null) 
-					agree.displayPosterior(ps, test);
-				else if (agree2sides != null) {
-					assert !options.has("test");
-					agree2sides.displayPosterior(ps);
-				}
-				
-				ps.close();
-			} catch (IOException e) {
-				System.err.println("Failed to open either testing file or output file");
-				e.printStackTrace();
-				System.exit(1);
-			}
-		}
-
-		if (options.has("parameters"))
-		{
-			assert !vb;
-			File outfile = (File) options.valueOf("parameters");
-			PrintStream ps;
-			try {
-				ps = FileUtil.printstream(outfile);
-				cluster.displayModelParam(ps);
-				ps.close();
-			} catch (IOException e) {
-				System.err.println("Failed to open output parameters file: " + outfile);
-				e.printStackTrace();
-				System.exit(1);
-			}
-		}
-		
-		if (cluster != null && cluster.pool != null)
-			cluster.pool.shutdown();
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/VB.java b/gi/posterior-regularisation/prjava/src/phrase/VB.java
deleted file mode 100644
index cd3f4966..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/VB.java
+++ /dev/null
@@ -1,419 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Future;
-
-import org.apache.commons.math.special.Gamma;
-
-import phrase.Corpus.Edge;
-
-public class VB {
-
-	public static int MAX_ITER=400;
-	
-	/**@brief
-	 * hyper param for beta
-	 * where beta is multinomial
-	 * for generating words from a topic
-	 */
-	public double lambda=0.1;
-	/**@brief
-	 * hyper param for theta
-	 * where theta is dirichlet for z
-	 */
-	public double alpha=0.0001;
-	/**@brief
-	 * variational param for beta
-	 */
-	private double rho[][][];
-	private double digamma_rho[][][];
-	private double rho_sum[][];
-	/**@brief
-	 * variational param for z
-	 */
-	//private double phi[][];
-	/**@brief
-	 * variational param for theta
-	 */
-	private double gamma[];
-	private static double VAL_DIFF_RATIO=0.005;
-	
-	private int n_positions;
-	private int n_words;
-	private int K;
-	private ExecutorService pool;
-	
-	private Corpus c;
-	public static void main(String[] args) {
-	//	String in="../pdata/canned.con";
-		String in="../pdata/btec.con";
-		String out="../pdata/vb.out";
-		int numCluster=25;
-		Corpus corpus = null;
-		File infile = new File(in);
-		try {
-			System.out.println("Reading concordance from " + infile);
-			corpus = Corpus.readFromFile(FileUtil.reader(infile));
-			corpus.printStats(System.out);
-		} catch (IOException e) {
-			System.err.println("Failed to open input file: " + infile);
-			e.printStackTrace();
-			System.exit(1);
-		}
-		
-		VB vb=new VB(numCluster, corpus);
-		int iter=20;
-		for(int i=0;i<iter;i++){
-			double obj=vb.EM();
-			System.out.println("Iter "+i+": "+obj);
-		}
-		
-		File outfile = new File (out);
-		try {
-			PrintStream ps = FileUtil.printstream(outfile);
-			vb.displayPosterior(ps);
-		//	ps.println();
-		//	c2f.displayModelParam(ps);
-			ps.close();
-		} catch (IOException e) {
-			System.err.println("Failed to open output file: " + outfile);
-			e.printStackTrace();
-			System.exit(1);
-		}
-	}
-
-	public VB(int numCluster, Corpus corpus){
-		c=corpus;
-		K=numCluster;
-		n_positions=c.getNumContextPositions();
-		n_words=c.getNumWords();
-		rho=new double[K][n_positions][n_words];
-		//to init rho
-		//loop through data and count up words
-		double[] phi_tmp=new double[K];
-		for(int i=0;i<K;i++){
-			for(int pos=0;pos<n_positions;pos++){
-				Arrays.fill(rho[i][pos], lambda);
-			}
-		}
-		for(int d=0;d<c.getNumPhrases();d++){
-			List<Edge>doc=c.getEdgesForPhrase(d);
-			for(int n=0;n<doc.size();n++){
-				TIntArrayList context=doc.get(n).getContext();
-				arr.F.randomise(phi_tmp);
-				for(int i=0;i<K;i++){
-					for(int pos=0;pos<n_positions;pos++){
-						rho[i][pos][context.get(pos)]+=phi_tmp[i];
-					}
-				}
-			}
-		}
-		
-	}
-	
-	private double inference(int phraseID, double[][] phi, double[] gamma)
-	{
-		List<Edge > doc=c.getEdgesForPhrase(phraseID);
-		for(int i=0;i<phi.length;i++){
-			for(int j=0;j<phi[i].length;j++){
-				phi[i][j]=1.0/K;
-			}
-		}
-		Arrays.fill(gamma,alpha+1.0/K);
-		
-		double digamma_gamma[]=new double[K];
-		
-		double gamma_sum=digamma(arr.F.l1norm(gamma));
-		for(int i=0;i<K;i++){
-			digamma_gamma[i]=digamma(gamma[i]);
-		}
-		double gammaSum[]=new double [K];
-		double prev_val=0;
-		double obj=0;
-		
-		for(int iter=0;iter<MAX_ITER;iter++){
-			prev_val=obj;
-			obj=0;
-			Arrays.fill(gammaSum,0.0);
-			for(int n=0;n<doc.size();n++){
-				TIntArrayList context=doc.get(n).getContext();
-				double phisum=0;
-				for(int i=0;i<K;i++){
-					double sum=0;
-					for(int pos=0;pos<n_positions;pos++){
-						int word=context.get(pos);
-						sum+=digamma_rho[i][pos][word]-rho_sum[i][pos];
-					}
-					sum+= digamma_gamma[i]-gamma_sum;
-					phi[n][i]=sum;
-					
-					if (i > 0){
-	                    phisum = log_sum(phisum, phi[n][i]);
-					}
-	                else{
-	                    phisum = phi[n][i];
-	                }
-					
-				}//end of  a word
-				
-				for(int i=0;i<K;i++){
-					phi[n][i]=Math.exp(phi[n][i]-phisum);
-					gammaSum[i]+=phi[n][i];
-				}
-				
-			}//end of doc
-			
-			for(int i=0;i<K;i++){
-				gamma[i]=alpha+gammaSum[i];
-			}
-			gamma_sum=digamma(arr.F.l1norm(gamma));
-			for(int i=0;i<K;i++){
-				digamma_gamma[i]=digamma(gamma[i]);
-			}
-			//compute objective for reporting
-
-			obj=0;
-			
-			for(int i=0;i<K;i++){
-				obj+=(alpha-1)*(digamma_gamma[i]-gamma_sum);
-			}
-			
-			
-			for(int n=0;n<doc.size();n++){
-				TIntArrayList context=doc.get(n).getContext();
-				
-				for(int i=0;i<K;i++){
-					//entropy of phi + expected log likelihood of z
-					obj+=phi[n][i]*(digamma_gamma[i]-gamma_sum);
-					
-					if(phi[n][i]>1e-10){
-						obj+=phi[n][i]*Math.log(phi[n][i]);
-					}
-					
-					double beta_sum=0;
-					for(int pos=0;pos<n_positions;pos++){
-						int word=context.get(pos);
-						beta_sum+=(digamma(rho[i][pos][word])-rho_sum[i][pos]);
-					}
-					obj+=phi[n][i]*beta_sum;
-				}
-			}
-			
-			obj-=log_gamma(arr.F.l1norm(gamma));
-			for(int i=0;i<K;i++){
-				obj+=Gamma.logGamma(gamma[i]);
-				obj-=(gamma[i]-1)*(digamma_gamma[i]-gamma_sum);
-			}
-			
-//			System.out.println(phraseID+": "+obj);
-			if(iter>0 && (obj-prev_val)/Math.abs(obj)<VAL_DIFF_RATIO){
-				break;
-			}
-		}//end of inference loop
-		
-		return obj;
-	}//end of inference
-	
-	/**
-	 * @return objective of this iteration
-	 */
-	public double EM(){
-		double emObj=0;
-		if(digamma_rho==null){
-			digamma_rho=new double[K][n_positions][n_words];
-		}
-		for(int i=0;i<K;i++){
-			for (int pos=0;pos<n_positions;pos++){
-				for(int j=0;j<n_words;j++){
-					digamma_rho[i][pos][j]= digamma(rho[i][pos][j]);
-				}
-			}
-		}
-		
-		if(rho_sum==null){
-			rho_sum=new double [K][n_positions];
-		}
-		for(int i=0;i<K;i++){
-			for(int pos=0;pos<n_positions;pos++){
-				rho_sum[i][pos]=digamma(arr.F.l1norm(rho[i][pos]));
-			}
-		}
-
-		//E
-		double exp_rho[][][]=new double[K][n_positions][n_words];
-		if (pool == null)
-		{
-			for (int d=0;d<c.getNumPhrases();d++)
-			{		
-				List<Edge > doc=c.getEdgesForPhrase(d);
-				double[][] phi = new double[doc.size()][K];
-				double[] gamma = new double[K];
-				
-				emObj += inference(d, phi, gamma);
-				
-				for(int n=0;n<doc.size();n++){
-					TIntArrayList context=doc.get(n).getContext();
-					for(int pos=0;pos<n_positions;pos++){
-						int word=context.get(pos);
-						for(int i=0;i<K;i++){	
-							exp_rho[i][pos][word]+=phi[n][i];
-						}
-					}
-				}
-				//if(d!=0 && d%100==0)  System.out.print(".");
-				//if(d!=0 && d%1000==0) System.out.println(d);
-			}
-		}
-		else // multi-threaded version of above loop
-		{
-			class PartialEStep implements Callable<PartialEStep>
-			{
-				double[][] phi;
-				double[] gamma;
-				double obj;
-				int d;
-				PartialEStep(int d) { this.d = d; }
-
-				public PartialEStep call()
-				{
-					phi = new double[c.getEdgesForPhrase(d).size()][K];
-					gamma = new double[K];
-					obj = inference(d, phi, gamma);
-					return this;
-				}			
-			}
-
-			List<Future<PartialEStep>> jobs = new ArrayList<Future<PartialEStep>>();
-			for (int d=0;d<c.getNumPhrases();d++)
-				jobs.add(pool.submit(new PartialEStep(d)));
-		
-			for (Future<PartialEStep> job: jobs)
-			{
-				try {
-					PartialEStep e = job.get();
-					
-					emObj += e.obj;				
-					List<Edge> doc = c.getEdgesForPhrase(e.d);
-					for(int n=0;n<doc.size();n++){
-						TIntArrayList context=doc.get(n).getContext();
-						for(int pos=0;pos<n_positions;pos++){
-							int word=context.get(pos);
-							for(int i=0;i<K;i++){	
-								exp_rho[i][pos][word]+=e.phi[n][i];
-							}
-						}
-					}
-				} catch (ExecutionException e) {
-					System.err.println("ERROR: E-step thread execution failed.");
-					throw new RuntimeException(e);
-				} catch (InterruptedException e) {
-					System.err.println("ERROR: Failed to join E-step thread.");
-					throw new RuntimeException(e);
-				}
-			}
-		}	
-	//	System.out.println("EM Objective:"+emObj);
-		
-		//M
-		for(int i=0;i<K;i++){
-			for(int pos=0;pos<n_positions;pos++){
-				for(int j=0;j<n_words;j++){
-					rho[i][pos][j]=lambda+exp_rho[i][pos][j];
-				}
-			}
-		}
-		
-		//E[\log p(\beta|\lambda)] - E[\log q(\beta)]
-		for(int i=0;i<K;i++){
-			double rhoSum=0;
-			for(int pos=0;pos<n_positions;pos++){
-				for(int j=0;j<n_words;j++){
-					rhoSum+=rho[i][pos][j];
-				}
-				double digamma_rhoSum=Gamma.digamma(rhoSum);
-				emObj-=Gamma.logGamma(rhoSum);
-				for(int j=0;j<n_words;j++){
-					emObj+=(lambda-rho[i][pos][j])*(Gamma.digamma(rho[i][pos][j])-digamma_rhoSum);
-					emObj+=Gamma.logGamma(rho[i][pos][j]);
-				}
-			}
-		}
-		
-		return emObj;
-	}//end of EM
-	
-	public void displayPosterior(PrintStream ps)
-	{	
-		for(int d=0;d<c.getNumPhrases();d++){
-			List<Edge > doc=c.getEdgesForPhrase(d);
-			double[][] phi = new double[doc.size()][K];
-			for(int i=0;i<phi.length;i++)
-				for(int j=0;j<phi[i].length;j++)
-					phi[i][j]=1.0/K;
-			double[] gamma = new double[K];
-
-			inference(d, phi, gamma);
-
-			for(int n=0;n<doc.size();n++){
-				Edge edge=doc.get(n);
-				int tag=arr.F.argmax(phi[n]);
-				ps.print(edge.getPhraseString());
-				ps.print("\t");
-				ps.print(edge.getContextString(true));
-
-				ps.println(" ||| C=" + tag);
-			}
-		}
-	}
-
-	double log_sum(double log_a, double log_b)
-	{
-	  double v;
-
-	  if (log_a < log_b)
-	      v = log_b+Math.log(1 + Math.exp(log_a-log_b));
-	  else
-	      v = log_a+Math.log(1 + Math.exp(log_b-log_a));
-	  return(v);
-	}
-		
-	double digamma(double x)
-	{
-	    double p;
-	    x=x+6;
-	    p=1/(x*x);
-	    p=(((0.004166666666667*p-0.003968253986254)*p+
-		0.008333333333333)*p-0.083333333333333)*p;
-	    p=p+Math.log(x)-0.5/x-1/(x-1)-1/(x-2)-1/(x-3)-1/(x-4)-1/(x-5)-1/(x-6);
-	    return p;
-	}
-	
-	double log_gamma(double x)
-	{
-	     double z=1/(x*x);
-
-	    x=x+6;
-	    z=(((-0.000595238095238*z+0.000793650793651)
-		*z-0.002777777777778)*z+0.083333333333333)/x;
-	    z=(x-0.5)*Math.log(x)-x+0.918938533204673+z-Math.log(x-1)-
-	    Math.log(x-2)-Math.log(x-3)-Math.log(x-4)-Math.log(x-5)-Math.log(x-6);
-	    return z;
-	}
-
-	public void useThreadPool(ExecutorService threadPool) 
-	{
-		pool = threadPool;
-	}
-}//End of  class
diff --git a/gi/posterior-regularisation/prjava/src/test/CorpusTest.java b/gi/posterior-regularisation/prjava/src/test/CorpusTest.java
deleted file mode 100644
index b4c3041f..00000000
--- a/gi/posterior-regularisation/prjava/src/test/CorpusTest.java
+++ /dev/null
@@ -1,60 +0,0 @@
-package test;
-
-import java.util.Arrays;
-import java.util.HashMap;
-
-import data.Corpus;
-import hmm.POS;
-
-public class CorpusTest {
-
-	public static void main(String[] args) {
-		Corpus c=new Corpus(POS.trainFilename);
-
-		
-		int idx=30;
-		
-		
-		HashMap<String, Integer>vocab=
-			(HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(Corpus.alphaFilename);
-		
-		HashMap<String, Integer>tagVocab=
-			(HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(Corpus.tagalphaFilename);
-		
-		
-		String [] dict=new String [vocab.size()+1];
-		for(String key:vocab.keySet()){
-			dict[vocab.get(key)]=key;
-		}
-		dict[dict.length-1]=Corpus.UNK_TOK;
-		
-		String [] tagdict=new String [tagVocab.size()+1];
-		for(String key:tagVocab.keySet()){
-			tagdict[tagVocab.get(key)]=key;
-		}
-		tagdict[tagdict.length-1]=Corpus.UNK_TOK;
-		
-		String[] sent=c.get(idx);
-		int []data=c.getInt(idx);
-		
-		
-		String []roundtrip=new String [sent.length];
-		for(int i=0;i<sent.length;i++){
-			roundtrip[i]=dict[data[i]];
-		}
-		System.out.println(Arrays.toString(sent));
-		System.out.println(Arrays.toString(roundtrip));
-		
-		sent=c.tag.get(idx);
-		data=c.tagData.get(idx);
-		
-		
-		roundtrip=new String [sent.length];
-		for(int i=0;i<sent.length;i++){
-			roundtrip[i]=tagdict[data[i]];
-		}
-		System.out.println(Arrays.toString(sent));
-		System.out.println(Arrays.toString(roundtrip));
-	}
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java b/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java
deleted file mode 100644
index d54525c8..00000000
--- a/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java
+++ /dev/null
@@ -1,105 +0,0 @@
-package test;
-
-import hmm.HMM;
-import hmm.POS;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-
-import data.Corpus;
-
-public class HMMModelStats {
-
-	public static String modelFilename="../posdata/posModel.out";
-	public static String alphaFilename="../posdata/corpus.alphabet";
-	public static String statsFilename="../posdata/model.stats";
-
-	public static final int NUM_WORD=50;
-	
-	public static String testFilename="../posdata/en_test.conll";
-	
-	public static double [][]maxwt;
-	
-	public static void main(String[] args) {
-		HashMap<String, Integer>vocab=
-			(HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(alphaFilename);
-		
-		Corpus test=new Corpus(testFilename,vocab);
-		
-		String [] dict=new String [vocab.size()+1];
-		for(String key:vocab.keySet()){
-			dict[vocab.get(key)]=key;
-		}
-		dict[dict.length-1]=Corpus.UNK_TOK;
-		
-		HMM hmm=new HMM();
-		hmm.readModel(modelFilename);
-
-		
-		
-		PrintStream ps = null;
-		try {
-			ps = io.FileUtil.printstream(new File(statsFilename));
-		} catch (IOException e) {
-			e.printStackTrace();
-			System.exit(1);
-		}
-		
-		double [][] emit=hmm.getEmitProb();
-		for(int i=0;i<emit.length;i++){
-			ArrayList<IntDoublePair>l=new ArrayList<IntDoublePair>();
-			for(int j=0;j<emit[i].length;j++){
-				l.add(new IntDoublePair(j,emit[i][j]));
-			}
-			Collections.sort(l);
-			ps.println(i);
-			for(int j=0;j<NUM_WORD;j++){
-				if(j>=dict.length){
-					break;
-				}
-				ps.print(dict[l.get(j).idx]+"\t");
-				if((1+j)%10==0){
-					ps.println();
-				}
-			}
-			ps.println("\n");
-		}
-		
-		checkMaxwt(hmm,ps,test.getAllData());
-		
-		int terminalSym=vocab.get(Corpus .END_SYM);
-		//sample 10 sentences
-		for(int i=0;i<10;i++){
-			int []sent=hmm.sample(terminalSym);
-			for(int j=0;j<sent.length;j++){
-				ps.print(dict[sent[j]]+"\t");
-			}
-			ps.println();
-		}
-		
-		ps.close();
-		
-	}
-	
-	public static void checkMaxwt(HMM hmm,PrintStream ps,int [][]data){
-		double [][]emit=hmm.getEmitProb();
-		maxwt=new double[emit.length][emit[0].length];
-		
-		hmm.computeMaxwt(maxwt,data);
-		double sum=0;
-		for(int i=0;i<maxwt.length;i++){
-			for(int j=0;j<maxwt.length;j++){
-				sum+=maxwt[i][j];
-			}
-		}
-		
-		ps.println("max w t P(w_i|t): "+sum);
-		
-	}
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/test/IntDoublePair.java b/gi/posterior-regularisation/prjava/src/test/IntDoublePair.java
deleted file mode 100644
index 3f9f0ad7..00000000
--- a/gi/posterior-regularisation/prjava/src/test/IntDoublePair.java
+++ /dev/null
@@ -1,23 +0,0 @@
-package test;
-
-public class IntDoublePair implements Comparable{
-	double val;
-	int idx;
-	public int compareTo(Object o){
-		if(o instanceof IntDoublePair){
-			IntDoublePair pair=(IntDoublePair)o;
-			if(pair.val>val){
-				return 1;
-			}
-			if(pair.val<val){
-				return -1;
-			}
-			return 0;
-		}
-		return -1;
-	}
-	public IntDoublePair(int i,double v){
-		val=v;
-		idx=i;
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java b/gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java
deleted file mode 100644
index 9059a59e..00000000
--- a/gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java
+++ /dev/null
@@ -1,131 +0,0 @@
-package test;
-
-
-
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.projections.BoundsProjection;
-import optimization.projections.Projection;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.GradientL2Norm;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-
-
-/**
- * @author javg
- * 
- * 
- *ax2+ b(y2 -displacement)
- */
-public class X2y2WithConstraints extends ProjectedObjective{
-
-
-	double a, b;
-	double dx;
-	double dy;
-	Projection projection;
-	
-	
-	public X2y2WithConstraints(double a, double b, double[] params, double dx, double dy, Projection proj){
-		//projection = new BoundsProjection(0.2,Double.MAX_VALUE);
-		super();
-		projection = proj;	
-		this.a = a;
-		this.b = b;
-		this.dx = dx;
-		this.dy = dy;
-		setInitialParameters(params);
-		System.out.println("Function " +a+"(x-"+dx+")^2 + "+b+"(y-"+dy+")^2");
-		System.out.println("Gradient " +(2*a)+"(x-"+dx+") ; "+(b*2)+"(y-"+dy+")");
-		printParameters();
-		projection.project(parameters);
-		printParameters();
-		gradient = new double[2];
-	}
-	
-	public double getValue() {
-		functionCalls++;
-		return a*(parameters[0]-dx)*(parameters[0]-dx)+b*((parameters[1]-dy)*(parameters[1]-dy));
-	}
-
-	public double[] getGradient() {
-		if(gradient == null){
-			gradient = new double[2];
-		}
-		gradientCalls++;
-		gradient[0]=2*a*(parameters[0]-dx);
-		gradient[1]=2*b*(parameters[1]-dy);
-		return gradient;
-	}
-	
-	
-	public double[] projectPoint(double[] point) {
-		double[] newPoint = point.clone();
-		projection.project(newPoint);
-		return newPoint;
-	}	
-	
-	public void optimizeWithProjectedGradientDescent(LineSearchMethod ls, OptimizerStats stats, X2y2WithConstraints o){
-		ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
-		StopingCriteria stopGrad = new ProjectedGradientL2Norm(0.001);
-		StopingCriteria stopValue = new ValueDifference(0.001);
-		CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
-		compositeStop.add(stopGrad);
-		compositeStop.add(stopValue);
-		
-		optimizer.setMaxIterations(5);
-		boolean succed = optimizer.optimize(o,stats,compositeStop);
-		System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
-		System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
-		if(succed){
-			System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
-		}else{
-			System.out.println("Failed to optimize");
-		}
-	}
-	
-	
-	
-	public String toString(){
-		
-		return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue() + " grad (" + getGradient()[0] + ":" + getGradient()[1]+")";
-	}
-	
-	public static void main(String[] args) {
-		double a = 1;
-		double b=1;
-		double x0 = 0;
-		double y0  =1;
-		double dx = 0.5;
-		double dy = 0.2	;
-		double [] parameters = new double[2];
-		parameters[0] = x0;
-		parameters[1] = y0;
-		X2y2WithConstraints o = new X2y2WithConstraints(a,b,parameters,dx,dy, 
-				new SimplexProjection(0.5)
-				//new BoundsProjection(0.0,0.4)
-		);
-		System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1] + " a " + a + " b "+b );
-		o.setDebugLevel(4);
-		
-		LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1));
-		
-		OptimizerStats stats = new OptimizerStats();
-		o.optimizeWithProjectedGradientDescent(ls, stats, o);
-		
-//		o = new x2y2WithConstraints(a,b,x0,y0,dx,dy);
-//		stats = new OptimizerStats();
-//		o.optimizeWithSpectralProjectedGradientDescent(stats, o);
-	}
-	
-	
-	
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/Array.java b/gi/posterior-regularisation/prjava/src/util/Array.java
deleted file mode 100644
index cc4725af..00000000
--- a/gi/posterior-regularisation/prjava/src/util/Array.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package util;
-
-import java.util.Arrays;
-
-public class Array {
-
-	
-	
-	public  static void sortDescending(double[] ds){
-		for (int i = 0; i < ds.length; i++) ds[i] = -ds[i];
-		Arrays.sort(ds);
-		for (int i = 0; i < ds.length; i++) ds[i] = -ds[i];
-	}
-	
-	/** 
-	 * Return a new reversed array
-	 * @param array
-	 * @return
-	 */
-	public static int[] reverseIntArray(int[] array){
-		int[] reversed = new int[array.length];
-		for (int i = 0; i < reversed.length; i++) {
-			reversed[i] = array[reversed.length-1-i];
-		}
-		return reversed;
-	}
-	
-	public static String[] sumArray(String[] in, int from){
-		String[] res = new String[in.length-from];
-		for (int i = from; i < in.length; i++) {
-			res[i-from] = in[i];
-		}
-		return res;
-	}
-	
-	public static void main(String[] args) {
-		int[] i = {1,2,3,4};
-		util.Printing.printIntArray(i, null, "original");
-		util.Printing.printIntArray(reverseIntArray(i), null, "reversed");
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/ArrayMath.java b/gi/posterior-regularisation/prjava/src/util/ArrayMath.java
deleted file mode 100644
index 398a13a2..00000000
--- a/gi/posterior-regularisation/prjava/src/util/ArrayMath.java
+++ /dev/null
@@ -1,186 +0,0 @@
-package util;
-
-import java.util.Arrays;
-
-public class ArrayMath {
-
-	public static double dotProduct(double[] v1, double[] v2) {
-		assert(v1.length == v2.length);
-		double result = 0;
-		for(int i = 0; i < v1.length; i++)
-			result += v1[i]*v2[i];
-		return result;
-	}
-
-	public static double twoNormSquared(double[] v) {
-		double result = 0;
-		for(double d : v)
-			result += d*d;
-		return result;
-	}
-
-	public static boolean containsInvalid(double[] v) {
-		for(int i = 0; i < v.length; i++)
-			if(Double.isNaN(v[i]) || Double.isInfinite(v[i]))
-				return true;
-		return false;
-	}
-
-
-	
-	public static double safeAdd(double[] toAdd) {
-		// Make sure there are no positive infinities
-		double sum = 0;
-		for(int i = 0; i < toAdd.length; i++) {
-			assert(!(Double.isInfinite(toAdd[i]) && toAdd[i] > 0));
-			assert(!Double.isNaN(toAdd[i]));
-			sum += toAdd[i];
-		}
-		
-		return sum;
-	}
-
-	/* Methods for filling integer and double arrays (of up to four dimensions) with the given value. */
-	
-	public static void set(int[][][][] array, int value) {
-		for(int i = 0; i < array.length; i++) {
-			set(array[i], value);
-		}
-	}
-	
-	public static void set(int[][][] array, int value) {
-		for(int i = 0; i < array.length; i++) {
-			set(array[i], value);
-		}
-	}
-	
-	public static void set(int[][] array, int value) {
-		for(int i = 0; i < array.length; i++) {
-			set(array[i], value);
-		}
-	}
-	
-	public static void set(int[] array, int value) {
-		Arrays.fill(array, value);
-	}
-	
-	
-	public static void set(double[][][][] array, double value) {
-		for(int i = 0; i < array.length; i++) {
-			set(array[i], value);
-		}
-	}
-	
-	public static void set(double[][][] array, double value) {
-		for(int i = 0; i < array.length; i++) {
-			set(array[i], value);
-		}
-	}
-	
-	public static void set(double[][] array, double value) {
-		for(int i = 0; i < array.length; i++) {
-			set(array[i], value);
-		}
-	}
-	
-	public static void set(double[] array, double value) {
-		Arrays.fill(array, value);
-	}
-
-	public static void setEqual(double[][][][] dest, double[][][][] source){
-		for (int i = 0; i < source.length; i++) {
-			setEqual(dest[i],source[i]);
-		}
-	}
-
-	
-	public static void setEqual(double[][][] dest, double[][][] source){
-		for (int i = 0; i < source.length; i++) {
-			set(dest[i],source[i]);
-		}
-	}
-
-	
-	public static void set(double[][] dest, double[][] source){
-		for (int i = 0; i < source.length; i++) {
-			setEqual(dest[i],source[i]);
-		}
-	}
-
-	public static void setEqual(double[] dest, double[] source){
-		System.arraycopy(source, 0, dest, 0, source.length);
-	}
-
-	public static void plusEquals(double[][][][] array, double val){
-		for (int i = 0; i < array.length; i++) {
-			plusEquals(array[i], val);
-		}
-	}	
-	
-	public static void plusEquals(double[][][] array, double val){
-		for (int i = 0; i < array.length; i++) {
-			plusEquals(array[i], val);
-		}
-	}	
-	
-	public static void plusEquals(double[][] array, double val){
-		for (int i = 0; i < array.length; i++) {
-			plusEquals(array[i], val);
-		}
-	}	
-	
-	public static void plusEquals(double[] array, double val){
-		for (int i = 0; i < array.length; i++) {
-			array[i] += val;
-		}
-	}
-
-	
-	public static double sum(double[] array) {
-		double res = 0;
-		for (int i = 0; i < array.length; i++) res += array[i];
-		return res;
-	}
-
-
-	
-	public static  double[][] deepclone(double[][] in){
-		double[][] res = new double[in.length][];
-		for (int i = 0; i < res.length; i++) {
-			res[i] = in[i].clone();
-		}
-		return res;
-	}
-
-	
-	public static  double[][][] deepclone(double[][][] in){
-		double[][][] res = new double[in.length][][];
-		for (int i = 0; i < res.length; i++) {
-			res[i] = deepclone(in[i]);
-		}
-		return res;
-	}
-
-	public static double cosine(double[] a,
-			double[] b) {
-		return (dotProduct(a, b)+1e-5)/(Math.sqrt(dotProduct(a, a)+1e-5)*Math.sqrt(dotProduct(b, b)+1e-5));
-	}
-
-	public static double max(double[] ds) {
-		double max = Double.NEGATIVE_INFINITY;
-		for(double d:ds) max = Math.max(d,max);
-		return max;
-	}
-
-	public static void exponentiate(double[] a) {
-		for (int i = 0; i < a.length; i++) {
-			a[i] = Math.exp(a[i]);
-		}
-	}
-
-	public static int sum(int[] array) {
-		int res = 0;
-		for (int i = 0; i < array.length; i++) res += array[i];
-		return res;
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java b/gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java
deleted file mode 100644
index 1ff1ae4a..00000000
--- a/gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java
+++ /dev/null
@@ -1,14 +0,0 @@
-package util;
-
-public interface DifferentiableObjective {
-
-	public double getValue();
-
-	public void getGradient(double[] gradient);
-
-	public void getParameters(double[] params);
-
-	public void setParameters(double[] newParameters);
-
-	public int getNumParameters();
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/DigammaFunction.java b/gi/posterior-regularisation/prjava/src/util/DigammaFunction.java
deleted file mode 100644
index ff1478ad..00000000
--- a/gi/posterior-regularisation/prjava/src/util/DigammaFunction.java
+++ /dev/null
@@ -1,21 +0,0 @@
-package util;
-
-public class DigammaFunction {
-	public static double expDigamma(double number){
-		if(number==0)return number;
-		return Math.exp(digamma(number));
-	}
-	
-	public static double digamma(double number){
-		if(number > 7){
-			return digammApprox(number-0.5);
-		}else{
-			return digamma(number+1) - 1.0/number;
-		}
-	}
-	
-	private static double digammApprox(double value){
-		return Math.log(value) + 0.04167*Math.pow(value, -2) - 0.00729*Math.pow(value, -4) 
-		+  0.00384*Math.pow(value, -6) - 0.00413*Math.pow(value, -8);
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/FileSystem.java b/gi/posterior-regularisation/prjava/src/util/FileSystem.java
deleted file mode 100644
index d7812e40..00000000
--- a/gi/posterior-regularisation/prjava/src/util/FileSystem.java
+++ /dev/null
@@ -1,21 +0,0 @@
-package util;
-
-import java.io.File;
-
-public class FileSystem {
-	public static boolean createDir(String directory) {
-
-		File dir = new File(directory);
-		if (!dir.isDirectory()) {
-			boolean success = dir.mkdirs();
-			if (!success) {
-				System.out.println("Unable to create directory " + directory);
-				return false;
-			}
-			System.out.println("Created directory " + directory);
-		} else {
-			System.out.println("Reusing directory " + directory);
-		}
-		return true;
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/InputOutput.java b/gi/posterior-regularisation/prjava/src/util/InputOutput.java
deleted file mode 100644
index da7f71bf..00000000
--- a/gi/posterior-regularisation/prjava/src/util/InputOutput.java
+++ /dev/null
@@ -1,67 +0,0 @@
-package util;
-
-import java.io.BufferedReader;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.io.PrintStream;
-import java.io.UnsupportedEncodingException;
-import java.util.Properties;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
-
-public class InputOutput {
-
-	/**
-	 * Opens a file either compress with gzip or not compressed.
-	 */
-	public static BufferedReader openReader(String fileName) throws UnsupportedEncodingException, FileNotFoundException, IOException{
-		System.out.println("Reading: " + fileName);
-		BufferedReader reader;
-		fileName = fileName.trim();
-		if(fileName.endsWith("gz")){
-			reader = new BufferedReader(
-			new InputStreamReader(new GZIPInputStream(new FileInputStream(fileName)),"UTF8"));
-		}else{
-			reader = new BufferedReader(new InputStreamReader(
-					new FileInputStream(fileName), "UTF8"));
-		}
-		
-		return reader;
-	}
-	
-	
-	public static PrintStream openWriter(String fileName) 
-	throws UnsupportedEncodingException, FileNotFoundException, IOException{
-		System.out.println("Writting to file: " + fileName);
-		PrintStream writter;
-		fileName = fileName.trim();
-		if(fileName.endsWith("gz")){
-			writter = new PrintStream(new GZIPOutputStream(new FileOutputStream(fileName)),
-					true, "UTF-8");
-
-		}else{
-			writter = new PrintStream(new FileOutputStream(fileName),
-					true, "UTF-8");
-
-		}
-		
-		return writter;
-	}
-	
-	public static Properties readPropertiesFile(String fileName) {
-		Properties properties = new Properties();
-		try {
-			properties.load(new FileInputStream(fileName));
-		} catch (IOException e) {
-			e.printStackTrace();
-			throw new AssertionError("Wrong properties file " + fileName);
-		}
-		System.out.println(properties.toString());
-		
-		return properties;
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/LogSummer.java b/gi/posterior-regularisation/prjava/src/util/LogSummer.java
deleted file mode 100644
index 117393b9..00000000
--- a/gi/posterior-regularisation/prjava/src/util/LogSummer.java
+++ /dev/null
@@ -1,86 +0,0 @@
-package util;
-
-import java.lang.Math;
-
-/*
- * Math tool for computing logs of sums, when the terms of the sum are already in log form.
- * (Useful if the terms of the sum are very small numbers.)
- */
-public class LogSummer {
-	
-	private LogSummer() {
-	}
-		
-	/**
-	 * Given log(a) and log(b), computes log(a + b).
-	 * 
-	 * @param  loga log of first sum term
-	 * @param  logb log of second sum term
-	 * @return     log(sum), where sum = a + b
-	 */
-	public static double sum(double loga, double logb) {
-		assert(!Double.isNaN(loga));
-		assert(!Double.isNaN(logb));
-		
-		if(Double.isInfinite(loga))
-			return logb;
-		if(Double.isInfinite(logb))
-			return loga;
-
-		double maxLog;
-		double difference;
-		if(loga > logb) {
-			difference = logb - loga;
-			maxLog = loga;
-		}
-		else {
-			difference = loga - logb;
-			maxLog = logb;
-		}
-
-		return Math.log1p(Math.exp(difference)) + maxLog;
-	}
-
-	/**
-	 * Computes log(exp(array[index]) + b), and
-	 * modifies array[index] to contain this new value.
-	 * 
-	 * @param array array to modify
-	 * @param index index at which to modify
-	 * @param logb  log of the second sum term
-	 */
-	public static void sum(double[] array, int index, double logb) {
-		array[index] = sum(array[index], logb);
-	}
-	
-	/**
-	 * Computes log(a + b + c + ...) from log(a), log(b), log(c), ...
-	 * by recursively splitting the input and delegating to the sum method.
-	 * 
-	 * @param  terms an array containing the log of all the terms for the sum
-	 * @return log(sum), where sum = exp(terms[0]) + exp(terms[1]) + ...
-	 */
-	public static double sumAll(double... terms) {
-		return sumAllHelper(terms, 0, terms.length);
-	}
-	
-	/**
-	 * Computes log(a_0 + a_1 + ...) from a_0 = exp(terms[begin]),
-	 * a_1 = exp(terms[begin + 1]), ..., a_{end - 1 - begin} = exp(terms[end - 1]).
-	 * 
-	 * @param  terms an array containing the log of all the terms for the sum,
-	 *               and possibly some other terms that will not go into the sum
-	 * @return log of the sum of the elements in the [begin, end) region of the terms array
-	 */
-	private static double sumAllHelper(final double[] terms, final int begin, final int end) {
-		int length = end - begin;
-		switch(length) {
-			case 0: return Double.NEGATIVE_INFINITY;
-			case 1: return terms[begin];
-			default:
-				int midIndex = begin + length/2;
-				return sum(sumAllHelper(terms, begin, midIndex), sumAllHelper(terms, midIndex, end));
-		}
-	}
-
-}
-\ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/util/MathUtil.java b/gi/posterior-regularisation/prjava/src/util/MathUtil.java
deleted file mode 100644
index 799b1faf..00000000
--- a/gi/posterior-regularisation/prjava/src/util/MathUtil.java
+++ /dev/null
@@ -1,148 +0,0 @@
-package util;
-
-import java.util.Random;
-
-public class MathUtil {
-	public static final boolean closeToOne(double number){
-		return Math.abs(number-1) < 1.E-10;
-	}
-	
-	public static final boolean closeToZero(double number){
-		return Math.abs(number) < 1.E-5;
-	}
-	
-	/**
-	 * Return a ramdom multinominal distribution.
-	 * 
-	 * @param size
-	 * @return
-	 */
-	public static final double[] randomVector(int size, Random r){
-		double[] random = new double[size];
-		double sum=0;
-		for(int i = 0; i < size; i++){
-			double number = r.nextDouble();
-			random[i] = number;
-			sum+=number;
-		}
-		for(int i = 0; i < size; i++){
-			random[i] = random[i]/sum;
-		}
-		return random;
-	}
-	
-	
-
-	public static double sum(double[] ds) {
-		double res = 0;
-		for (int i = 0; i < ds.length; i++) {
-			res+=ds[i];
-		}
-		return res;
-	}
-
-	public static double max(double[] ds) {
-		double res = Double.NEGATIVE_INFINITY;
-		for (int i = 0; i < ds.length; i++) {
-			res = Math.max(res, ds[i]);
-		}
-		return res;
-	}
-
-	public static double min(double[] ds) {
-		double res = Double.POSITIVE_INFINITY;
-		for (int i = 0; i < ds.length; i++) {
-			res = Math.min(res, ds[i]);
-		}
-		return res;
-	}
-
-	
-	public static double KLDistance(double[] p, double[] q) {
-		int len = p.length;
-		double kl = 0;
-		for (int j = 0; j < len; j++) {
-				if (p[j] == 0 || q[j] == 0) {
-					continue;
-				} else {
-					kl += q[j] * Math.log(q[j] / p[j]);
-				}
-
-		}
-		return kl;
-	}
-	
-	public static double L2Distance(double[] p, double[] q) {
-		int len = p.length;
-		double l2 = 0;
-		for (int j = 0; j < len; j++) {
-				if (p[j] == 0 || q[j] == 0) {
-					continue;
-				} else {
-					l2 += (q[j] - p[j])*(q[j] - p[j]);
-				}
-
-		}
-		return Math.sqrt(l2);
-	}
-	
-	public static double L1Distance(double[] p, double[] q) {
-		int len = p.length;
-		double l1 = 0;
-		for (int j = 0; j < len; j++) {
-				if (p[j] == 0 || q[j] == 0) {
-					continue;
-				} else {
-					l1 += Math.abs(q[j] - p[j]);
-				}
-
-		}
-		return l1;
-	}
-
-	public static double dot(double[] ds, double[] ds2) {
-		double res = 0;
-		for (int i = 0; i < ds2.length; i++) {
-			res+= ds[i]*ds2[i];
-		}
-		return res;
-	}
-	
-	public static double expDigamma(double number){
-		return Math.exp(digamma(number));
-	}
-	
-	public static double digamma(double number){
-		if(number > 7){
-			return digammApprox(number-0.5);
-		}else{
-			return digamma(number+1) - 1.0/number;
-		}
-	}
-	
-	private static double digammApprox(double value){
-		return Math.log(value) + 0.04167*Math.pow(value, -2) - 0.00729*Math.pow(value, -4) 
-		+  0.00384*Math.pow(value, -6) - 0.00413*Math.pow(value, -8);
-	}
-
-	public static double eulerGamma = 0.57721566490152386060651209008240243;
-	// FIXME -- so far just the initialization from Minka's paper "Estimating a Dirichlet distribution". 
-	public static double invDigamma(double y) {
-		if (y>= -2.22) return Math.exp(y)+0.5;
-		return -1.0/(y+eulerGamma);
-	}
-
-	
-	
-	public static void main(String[] args) {
-		for(double i = 0; i < 10 ; i+=0.1){
-			System.out.println(i+"\t"+expDigamma(i)+"\t"+(i-0.5));
-		}
-//		double gammaValue = (expDigamma(3)/expDigamma(10) + expDigamma(3)/expDigamma(10) + expDigamma(4)/expDigamma(10));
-//		double normalValue = 3/10+3/4+10/10;
-//		System.out.println("Gamma " + gammaValue + " normal " + normalValue);
-	}
-
-	
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/Matrix.java b/gi/posterior-regularisation/prjava/src/util/Matrix.java
deleted file mode 100644
index 8fb6d911..00000000
--- a/gi/posterior-regularisation/prjava/src/util/Matrix.java
+++ /dev/null
@@ -1,16 +0,0 @@
-package util;
-
-public class Matrix {
-	int x;
-	int y;
-	double[][] values;
-	
-	public Matrix(int x, int y){
-		this.x = x;
-		this.y=y;
-		values = new double[x][y];
-	}
-	
-	
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/MemoryTracker.java b/gi/posterior-regularisation/prjava/src/util/MemoryTracker.java
deleted file mode 100644
index 83a65611..00000000
--- a/gi/posterior-regularisation/prjava/src/util/MemoryTracker.java
+++ /dev/null
@@ -1,47 +0,0 @@
-package util;
-
-
-public class MemoryTracker {
-	
-	double initM,finalM;
-	boolean start = false,finish = false;
-	
-	public MemoryTracker(){
-		
-	}
-	
-	public void start(){
-		System.gc();
-	    System.gc();
-	    System.gc();
-	    initM = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/(1024*1024);  
-	    start = true;
-	}
-	
-	public void finish(){
-		if(!start){
-			throw new RuntimeException("Canot stop before starting");
-		}
-		System.gc();
-	    System.gc();
-	    System.gc();
-	    finalM = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/(1024*1024);  
-	    finish = true;
-	}
-	
-	public String print(){
-		if(!finish){
-			throw new RuntimeException("Canot print before stopping");
-		}
-		return "Used: " + (finalM - initM) + "MB";
-	}
-	
-	public void clear(){
-		initM = 0;
-		finalM = 0;
-		finish = false;
-		start = false;
-	}
-	
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/Pair.java b/gi/posterior-regularisation/prjava/src/util/Pair.java
deleted file mode 100644
index 7b1f108d..00000000
--- a/gi/posterior-regularisation/prjava/src/util/Pair.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package util;
-
-public class Pair<O1, O2> {
-	public O1 _first;
-	public O2 _second;
-
-	public final O1 first() {
-		return _first;
-	}
-
-	public final O2 second() {
-		return _second;
-	}
-
-	public final void setFirst(O1 value){
-		_first = value;
-	}
-	
-	public final void setSecond(O2 value){
-		_second = value;
-	}
-	
-	public Pair(O1 first, O2 second) {
-		_first = first;
-		_second = second;
-	}
-
-	public String toString(){
-		return _first + " " + _second; 
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/Printing.java b/gi/posterior-regularisation/prjava/src/util/Printing.java
deleted file mode 100644
index 14fcbe91..00000000
--- a/gi/posterior-regularisation/prjava/src/util/Printing.java
+++ /dev/null
@@ -1,158 +0,0 @@
-package util;
-
-public class Printing {
-	static java.text.DecimalFormat fmt = new java.text.DecimalFormat();
-
-	public static String padWithSpace(String s, int len){
-		StringBuffer sb = new StringBuffer();
-		while(sb.length() +s.length() < len){
-			sb.append(" ");
-		}
-		sb.append(s);
-		return sb.toString();
-	}
-	
-	public static String prettyPrint(double d, String patt, int len) {
-		fmt.applyPattern(patt);
-		String s = fmt.format(d);
-		while (s.length() < len) {
-			s = " " + s;
-		}
-		return s;
-	}
-	
-	public static  String formatTime(long duration) {
-		StringBuilder sb = new StringBuilder();
-		double d = duration / 1000;
-		fmt.applyPattern("00");
-		sb.append(fmt.format((int) (d / (60 * 60))) + ":");
-		d -= ((int) d / (60 * 60)) * 60 * 60;
-		sb.append(fmt.format((int) (d / 60)) + ":");
-		d -= ((int) d / 60) * 60;
-		fmt.applyPattern("00.0");
-		sb.append(fmt.format(d));
-		return sb.toString();
-	}
-	
-	
-	public static String doubleArrayToString(double[] array, String[] labels, String arrayName) {
-		StringBuffer res = new StringBuffer();
-		res.append(arrayName);
-		res.append("\n");
-		for (int i = 0; i < array.length; i++) {
-			if (labels == null){
-				res.append(i+"       \t");
-			}else{
-				res.append(labels[i]+     "\t");
-			}
-		}
-		res.append("sum\n");
-		double sum = 0;
-		for (int i = 0; i < array.length; i++) {
-			res.append(prettyPrint(array[i],
-					"0.00000E00", 8) + "\t");
-			sum+=array[i];
-		}
-		res.append(prettyPrint(sum,
-				"0.00000E00", 8)+"\n");
-		return res.toString();
-	}
-	
-	
-	
-	public static void printDoubleArray(double[] array, String labels[], String arrayName) {
-		System.out.println(doubleArrayToString(array, labels,arrayName));
-	}
-	
-	
-	public static String doubleArrayToString(double[][] array, String[] labels1, String[] labels2,
-			String arrayName){
-		StringBuffer res = new StringBuffer();
-		res.append(arrayName);
-		res.append("\n\t");
-		//Calculates the column sum to keeps the sums
-		double[] sums = new double[array[0].length+1];
-		//Prints rows headings
-		for (int i = 0; i < array[0].length; i++) {
-			if (labels1 == null){
-				res.append(i+"        \t");
-			}else{
-				res.append(labels1[i]+"        \t");
-			}
-		}
-		res.append("sum\n");
-		double sum = 0;
-		//For each row print heading
-		for (int i = 0; i < array.length; i++) {
-			if (labels2 == null){
-				res.append(i+"\t");
-			}else{
-				res.append(labels2[i]+"\t");
-			}
-			//Print values for that row
-			for (int j = 0; j < array[0].length; j++) {
-				res.append(" " + prettyPrint(array[i][j],
-						"0.00000E00", 8) + "\t");
-				sums[j] += array[i][j]; 
-				sum+=array[i][j]; //Sum all values of that row
-			}
-			//Print row sum
-			res.append(prettyPrint(sum,"0.00000E00", 8)+"\n");
-			sums[array[0].length]+=sum;
-			sum=0;
-		}
-		res.append("sum\t");
-		//Print values for colums sum
-		for (int i = 0; i < array[0].length+1; i++) {
-			res.append(prettyPrint(sums[i],"0.00000E00", 8)+"\t");
-		}
-		res.append("\n");
-		return res.toString();
-	}
-	
-	public static void printDoubleArray(double[][] array, String[] labels1, String[] labels2
-			, String arrayName) {
-		System.out.println(doubleArrayToString(array, labels1,labels2,arrayName));
-	}
-	
-	
-	public static void printIntArray(int[][] array, String[] labels1, String[] labels2, String arrayName,
-			int size1, int size2) {
-		System.out.println(arrayName);
-		for (int i = 0; i < size1; i++) {
-			for (int j = 0; j < size2; j++) {
-				System.out.print(" " + array[i][j] +  " ");
-
-			}
-			System.out.println();
-		}
-		System.out.println();
-	}
-	
-	public static String intArrayToString(int[] array, String[] labels, String arrayName) {
-		StringBuffer res = new StringBuffer();
-		res.append(arrayName);
-		for (int i = 0; i < array.length; i++) {
-			res.append(" " + array[i] + " ");
-			
-		}
-		res.append("\n");
-		return res.toString();
-	}
-	
-	public static void printIntArray(int[] array, String[] labels, String arrayName) {
-		System.out.println(intArrayToString(array, labels,arrayName));
-	}
-	
-	public static String toString(double[][] d){
-		StringBuffer sb = new StringBuffer();
-		for (int i = 0; i < d.length; i++) {
-			for (int j = 0; j < d[0].length; j++) {
-				sb.append(prettyPrint(d[i][j], "0.00E0", 10));
-			}
-			sb.append("\n");
-		}
-		return sb.toString();
-	}
-	
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/Sorters.java b/gi/posterior-regularisation/prjava/src/util/Sorters.java
deleted file mode 100644
index 836444e5..00000000
--- a/gi/posterior-regularisation/prjava/src/util/Sorters.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package util;
-
-import java.util.Comparator;
-
-public class Sorters {
-	public static class sortWordsCounts implements Comparator{
-		
-		/**
-		 * Sorter for a pair of word id, counts. Sort ascending by counts
-		 */
-		public int compare(Object arg0, Object arg1) {
-			Pair<Integer,Integer> p1 = (Pair<Integer,Integer>)arg0;
-			Pair<Integer,Integer> p2 = (Pair<Integer,Integer>)arg1;
-			if(p1.second() > p2.second()){
-				return 1;
-			}else{
-				return -1;
-			}
-		}
-		
-	}
-	
-public static class sortWordsDouble implements Comparator{
-		
-		/**
-		 * Sorter for a pair of word id, counts. Sort by counts
-		 */
-		public int compare(Object arg0, Object arg1) {
-			Pair<Integer,Double> p1 = (Pair<Integer,Double>)arg0;
-			Pair<Integer,Double> p2 = (Pair<Integer,Double>)arg1;
-			if(p1.second() < p2.second()){
-				return 1;
-			}else{
-				return -1;
-			}
-		}
-		
-	}
-}
diff --git a/gi/posterior-regularisation/prjava/train-PR-cluster.sh b/gi/posterior-regularisation/prjava/train-PR-cluster.sh
deleted file mode 100755
index 67552c00..00000000
--- a/gi/posterior-regularisation/prjava/train-PR-cluster.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-
-d=`dirname $0`
-java -ea -Xmx30g -cp $d/prjava.jar:$d/lib/trove-2.0.2.jar:$d/lib/optimization.jar:$d/lib/jopt-simple-3.2.jar:$d/lib/commons-math-2.1.jar phrase.Trainer $*
diff --git a/gi/posterior-regularisation/projected_gradient.cc b/gi/posterior-regularisation/projected_gradient.cc
deleted file mode 100644
index f7c39817..00000000
--- a/gi/posterior-regularisation/projected_gradient.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-//
-// Minimises given functional using the projected gradient method. Based on
-// algorithm and demonstration example in Linear and Nonlinear Programming,
-// Luenberger and Ye, 3rd ed., p 370.
-//
-
-#include "invert.hh"
-#include <iostream>
-
-using namespace std;
-
-double 
-f(double x1, double x2, double x3, double x4)
-{
-    return x1 * x1 + x2 * x2 + x3 * x3 + x4 * x4 - 2 * x1 - 3 * x4;
-}
-
-ublas::vector<double> 
-g(double x1, double x2, double x3, double x4)
-{
-    ublas::vector<double> v(4);
-    v(0) = 2 * x1 - 2;
-    v(1) = 2 * x2;
-    v(2) = 2 * x3;
-    v(3) = 2 * x4 - 3;
-    return v;
-}
-
-ublas::matrix<double> 
-activeConstraints(double x1, double x2, double x3, double x4)
-{
-    int n = 2;
-    if (x1 == 0) ++n;
-    if (x2 == 0) ++n;
-    if (x3 == 0) ++n;
-    if (x4 == 0) ++n;
-
-    ublas::matrix<double> a(n,4);
-    a(0, 0) = 2; a(0, 1) = 1; a(0, 2) = 1; a(0, 3) = 4;
-    a(1, 0) = 1; a(1, 1) = 1; a(1, 2) = 2; a(1, 3) = 1;
-
-    int c = 2;
-    if (x1 == 0) a(c++, 0) = 1;
-    if (x2 == 0) a(c++, 1) = 1;
-    if (x3 == 0) a(c++, 2) = 1;
-    if (x4 == 0) a(c++, 3) = 1;
-
-    return a;
-}
-
-ublas::matrix<double>
-projection(const ublas::matrix<double> &a)
-{
-    ublas::matrix<double> aT = ublas::trans(a);
-    ublas::matrix<double> inv(a.size1(), a.size1());
-    bool ok = invert_matrix(ublas::matrix<double>(ublas::prod(a, aT)), inv);
-    assert(ok && "Failed to invert matrix");
-    return ublas::identity_matrix<double>(4) - 
-        ublas::prod(aT, ublas::matrix<double>(ublas::prod(inv, a)));
-}
-
-int main(int argc, char *argv[])
-{
-    double x1 = 2, x2 = 2, x3 = 1, x4 = 0;
-
-    double fval = f(x1, x2, x3, x4);
-    cout << "f = " << fval << endl;
-    ublas::vector<double> grad = g(x1, x2, x3, x4);
-    cout << "g = " << grad << endl;
-    ublas::matrix<double> A = activeConstraints(x1, x2, x3, x4);
-    cout << "A = " << A << endl;
-    ublas::matrix<double> P = projection(A);
-    cout << "P = " << P << endl;
-    // the direction of movement
-    ublas::vector<double> d = prod(P, grad);
-    cout << "d = " << (d / d(0)) << endl;
-
-    // special case for d = 0
-
-    // next solve for limits on the line search
-
-    // then use golden rule technique between these values (if bounded)
-
-    // or simple Armijo's rule technique
-
-    return 0;
-}
diff --git a/gi/posterior-regularisation/simplex_pg.py b/gi/posterior-regularisation/simplex_pg.py
deleted file mode 100644
index 5da796d3..00000000
--- a/gi/posterior-regularisation/simplex_pg.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#
-# Following Leunberger and Ye, Linear and Nonlinear Progamming, 3rd ed. p367
-# "The gradient projection method"
-# applied to an equality constraint for a simplex.
-#
-#   min f(x)
-#   s.t. x >= 0, sum_i x = d
-#
-# FIXME: enforce the positivity constraint - a limit on the line search?
-#
-
-from numpy import *
-from scipy import *
-from linesearch import line_search
-# local copy of scipy's Amijo line_search - wasn't enforcing alpha max correctly
-import sys
-
-dims = 4
-
-def f(x):
-    fv = x[0]*x[0] + x[1]*x[1] + x[2]*x[2] + x[3]*x[3] - 2*x[0] - 3*x[3]
-    # print 'evaluating f at', x, 'value', fv
-    return fv
-
-def g(x):
-    return array([2*x[0] - 2, 2*x[1], 2*x[2], 2*x[3]-3])
-
-def pg(x):
-    gv = g(x)
-    return gv - sum(gv) / dims
-
-x = ones(dims) / dims
-old_fval = None
-
-while True:
-    fv = f(x)
-    gv = g(x)
-    dv = pg(x)
-
-    print 'x', x, 'f', fv, 'g', gv, 'd', dv
-
-    if old_fval == None:
-        old_fval = fv + 0.1
-
-    # solve for maximum step size i.e. when positivity constraints kick in
-    # x - alpha d = 0   => alpha = x/d
-    amax = max(x/dv)
-    if amax < 1e-8: break
-
-    stuff = line_search(f, pg, x, -dv, dv, fv, old_fval, amax=amax)
-    alpha = stuff[0] # Nb. can avoid next evaluation of f,g,d using 'stuff'
-    if alpha < 1e-8: break
-    x -= alpha * dv
-
-    old_fval = fv
diff --git a/gi/posterior-regularisation/split-languages.py b/gi/posterior-regularisation/split-languages.py
deleted file mode 100755
index 206da661..00000000
--- a/gi/posterior-regularisation/split-languages.py
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/python
-
-import sys
-
-sout = open(sys.argv[1], 'w')
-tout = open(sys.argv[2], 'w')
-for line in sys.stdin:
-	phrase, contexts = line.rstrip().split('\t')
-	sp, tp = phrase.split(' <SPLIT> ')
-	sout.write('%s\t' % sp)
-	tout.write('%s\t' % tp)
-	parts = contexts.split(' ||| ')
-	for i in range(0, len(parts), 2):
-		sc, tc = parts[i].split(' <SPLIT> ')
-		if i != 0:
-			sout.write(' ||| ')
-			tout.write(' ||| ')
-		sout.write('%s ||| %s' % (sc, parts[i+1]))
-		tout.write('%s ||| %s' % (tc, parts[i+1]))
-	sout.write('\n')
-	tout.write('\n')
-sout.close()
-tout.close()
diff --git a/gi/posterior-regularisation/train_pr_agree.py b/gi/posterior-regularisation/train_pr_agree.py
deleted file mode 100644
index 9d41362d..00000000
--- a/gi/posterior-regularisation/train_pr_agree.py
+++ /dev/null
@@ -1,400 +0,0 @@
-import sys
-import scipy.optimize
-from scipy.stats import geom
-from numpy import *
-from numpy.random import random, seed
-
-style = sys.argv[1]
-if len(sys.argv) >= 3:
-     seed(int(sys.argv[2]))
-
-#
-# Step 1: load the concordance counts
-# 
-
-edges = []
-word_types = {}
-phrase_types = {}
-context_types = {}
-
-for line in sys.stdin:
-    phrase, rest = line.strip().split('\t')
-    ptoks = tuple(map(lambda t: word_types.setdefault(t, len(word_types)), phrase.split()))
-    pid = phrase_types.setdefault(ptoks, len(phrase_types))
-
-    parts = rest.split('|||')
-    for i in range(0, len(parts), 2):
-        context, count = parts[i:i+2]
-
-        ctx = filter(lambda x: x != '<PHRASE>', context.split())
-        ctoks = tuple(map(lambda t: word_types.setdefault(t, len(word_types)), ctx))
-        cid = context_types.setdefault(ctoks, len(context_types))
-
-        cnt = int(count.strip()[2:])
-        edges.append((pid, cid, cnt))
-
-word_type_list = [None] * len(word_types)
-for typ, index in word_types.items():
-    word_type_list[index] = typ
-
-phrase_type_list = [None] * len(phrase_types)
-for typ, index in phrase_types.items():
-    phrase_type_list[index] = typ
-
-context_type_list = [None] * len(context_types)
-for typ, index in context_types.items():
-    context_type_list[index] = typ
-
-num_tags = 5
-num_types = len(word_types)
-num_phrases = len(phrase_types)
-num_contexts = len(context_types)
-num_edges = len(edges)
-
-print 'Read in', num_edges, 'edges', num_phrases, 'phrases', num_contexts, 'contexts and', num_types, 'word types'
-
-#
-# Step 2:  expectation maximisation 
-#
-
-def normalise(a):
-    return a / float(sum(a))
-
-class PhraseToContextModel:
-    def __init__(self):
-        # Pr(tag | phrase)
-        self.tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)]
-        # Pr(context at pos i = w | tag) indexed by i, tag, word
-        self.contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)]
-
-    def prob(self, pid, cid):
-        # return distribution p(tag, context | phrase) as vector of length |tags|
-        context = context_type_list[cid]
-        dist = zeros(num_tags)
-        for t in range(num_tags):
-            prob = self.tagDist[pid][t]
-            for k, tokid in enumerate(context):
-                prob *= self.contextWordDist[k][t][tokid]
-            dist[t] = prob
-        return dist
-
-    def expectation_maximisation_step(self, lamba=None):
-        tagCounts = zeros((num_phrases, num_tags))
-        contextWordCounts = zeros((4, num_tags, num_types))
-
-        # E-step
-        llh = 0
-        for pid, cid, cnt in edges:
-            q = self.prob(pid, cid)
-            z = sum(q)
-            q /= z
-            llh += log(z)
-            context = context_type_list[cid]
-            if lamba != None:
-                q *= exp(lamba)
-                q /= sum(q)
-            for t in range(num_tags):
-                tagCounts[pid][t] += cnt * q[t]
-            for i in range(4):
-                for t in range(num_tags):
-                    contextWordCounts[i][t][context[i]] += cnt * q[t]
-
-        # M-step
-        for p in range(num_phrases):
-            self.tagDist[p] = normalise(tagCounts[p])
-        for i in range(4):
-            for t in range(num_tags):
-                self.contextWordDist[i][t] = normalise(contextWordCounts[i,t])
-
-        return llh
-
-class ContextToPhraseModel:
-    def __init__(self):
-        # Pr(tag | context) = Multinomial
-        self.tagDist = [normalise(random(num_tags)+1) for p in range(num_contexts)]
-        # Pr(phrase = w | tag) = Multinomial
-        self.phraseSingleDist = [normalise(random(num_types)+1) for t in range(num_tags)]
-        # Pr(phrase_1 = w | tag) = Multinomial
-        self.phraseLeftDist = [normalise(random(num_types)+1) for t in range(num_tags)]
-        # Pr(phrase_-1 = w | tag) = Multinomial
-        self.phraseRightDist = [normalise(random(num_types)+1) for t in range(num_tags)]
-        # Pr(|phrase| = l | tag) = Geometric
-        self.phraseLengthDist = [0.5] * num_tags
-        # n.b. internal words for phrases of length >= 3 are drawn from uniform distribution
-
-    def prob(self, pid, cid):
-        # return distribution p(tag, phrase | context) as vector of length |tags|
-        phrase = phrase_type_list[pid]
-        dist = zeros(num_tags)
-        for t in range(num_tags):
-            prob = self.tagDist[cid][t]
-            f = self.phraseLengthDist[t]
-            prob *= geom.pmf(len(phrase), f)
-            if len(phrase) == 1:
-                prob *= self.phraseSingleDist[t][phrase[0]]
-            else:
-                prob *= self.phraseLeftDist[t][phrase[0]]
-                prob *= self.phraseRightDist[t][phrase[-1]]
-            dist[t] = prob
-        return dist
-
-    def expectation_maximisation_step(self, lamba=None):
-        tagCounts = zeros((num_contexts, num_tags))
-        phraseSingleCounts = zeros((num_tags, num_types))
-        phraseLeftCounts = zeros((num_tags, num_types))
-        phraseRightCounts = zeros((num_tags, num_types))
-        phraseLength = zeros(num_types)
-
-        # E-step
-        llh = 0
-        for pid, cid, cnt in edges:
-            q = self.prob(pid, cid)
-            z = sum(q)
-            q /= z
-            llh += log(z)
-            if lamba != None:
-                q *= exp(lamba)
-                q /= sum(q)
-            #print 'p', phrase_type_list[pid], 'c', context_type_list[cid], 'q', q
-            phrase = phrase_type_list[pid]
-            for t in range(num_tags):
-                tagCounts[cid][t] += cnt * q[t]
-                phraseLength[t] += cnt * len(phrase) * q[t]
-                if len(phrase) == 1:
-                    phraseSingleCounts[t][phrase[0]] += cnt * q[t]
-                else:
-                    phraseLeftCounts[t][phrase[0]] += cnt * q[t]
-                    phraseRightCounts[t][phrase[-1]] += cnt * q[t]
-
-        # M-step
-        for t in range(num_tags):
-            self.phraseLengthDist[t] = min(max(sum(tagCounts[:,t]) / phraseLength[t], 1e-6), 1-1e-6)
-            self.phraseSingleDist[t] = normalise(phraseSingleCounts[t])
-            self.phraseLeftDist[t] = normalise(phraseLeftCounts[t])
-            self.phraseRightDist[t] = normalise(phraseRightCounts[t])
-        for c in range(num_contexts):
-            self.tagDist[c] = normalise(tagCounts[c])
-
-        #print 't', self.tagDist
-        #print 'l', self.phraseLengthDist
-        #print 's', self.phraseSingleDist
-        #print 'L', self.phraseLeftDist
-        #print 'R', self.phraseRightDist
-
-        return llh
-
-class ProductModel:
-    """
-    WARNING: I haven't verified the maths behind this model. It's quite likely to be incorrect.
-    """
-
-    def __init__(self):
-        self.pcm = PhraseToContextModel()
-        self.cpm = ContextToPhraseModel()
-
-    def prob(self, pid, cid):
-        p1 = self.pcm.prob(pid, cid)
-        p2 = self.cpm.prob(pid, cid)
-        return (p1 / sum(p1)) * (p2 / sum(p2))
-
-    def expectation_maximisation_step(self):
-        tagCountsGivenPhrase = zeros((num_phrases, num_tags))
-        contextWordCounts = zeros((4, num_tags, num_types))
-
-        tagCountsGivenContext = zeros((num_contexts, num_tags))
-        phraseSingleCounts = zeros((num_tags, num_types))
-        phraseLeftCounts = zeros((num_tags, num_types))
-        phraseRightCounts = zeros((num_tags, num_types))
-        phraseLength = zeros(num_types)
-
-        kl = llh1 = llh2 = 0
-        for pid, cid, cnt in edges:
-            p1 = self.pcm.prob(pid, cid)
-            llh1 += log(sum(p1)) * cnt
-            p2 = self.cpm.prob(pid, cid)
-            llh2 += log(sum(p2)) * cnt
-
-            q = (p1 / sum(p1)) * (p2 / sum(p2))
-            kl += log(sum(q)) * cnt
-            qi = sqrt(q)
-            qi /= sum(qi)
-
-            phrase = phrase_type_list[pid]
-            context = context_type_list[cid]
-            for t in range(num_tags):
-                tagCountsGivenPhrase[pid][t] += cnt * qi[t]
-                tagCountsGivenContext[cid][t] += cnt * qi[t]
-                phraseLength[t] += cnt * len(phrase) * qi[t]
-                if len(phrase) == 1:
-                    phraseSingleCounts[t][phrase[0]] += cnt * qi[t]
-                else:
-                    phraseLeftCounts[t][phrase[0]] += cnt * qi[t]
-                    phraseRightCounts[t][phrase[-1]] += cnt * qi[t]
-                for i in range(4):
-                    contextWordCounts[i][t][context[i]] += cnt * qi[t]
-
-        kl *= -2
-
-        for t in range(num_tags):
-            for i in range(4):
-                self.pcm.contextWordDist[i][t] = normalise(contextWordCounts[i,t])
-            self.cpm.phraseLengthDist[t] = min(max(sum(tagCountsGivenContext[:,t]) / phraseLength[t], 1e-6), 1-1e-6)
-            self.cpm.phraseSingleDist[t] = normalise(phraseSingleCounts[t])
-            self.cpm.phraseLeftDist[t] = normalise(phraseLeftCounts[t])
-            self.cpm.phraseRightDist[t] = normalise(phraseRightCounts[t])
-        for p in range(num_phrases):
-            self.pcm.tagDist[p] = normalise(tagCountsGivenPhrase[p])
-        for c in range(num_contexts):
-            self.cpm.tagDist[c] = normalise(tagCountsGivenContext[c])
-
-        # return the overall objective
-        return llh1 + llh2 + kl
-
-class RegularisedProductModel:
-    # as above, but with a slack regularisation term which kills the
-    # closed-form solution for the E-step
-
-    def __init__(self, epsilon):
-        self.pcm = PhraseToContextModel()
-        self.cpm = ContextToPhraseModel()
-        self.epsilon = epsilon
-        self.lamba = zeros(num_tags)
-
-    def prob(self, pid, cid):
-        p1 = self.pcm.prob(pid, cid)
-        p2 = self.cpm.prob(pid, cid)
-        return (p1 / sum(p1)) * (p2 / sum(p2))
-
-    def dual(self, lamba):
-        return self.logz(lamba) + self.epsilon * dot(lamba, lamba) ** 0.5
-
-    def dual_gradient(self, lamba):
-        return self.expected_features(lamba) + self.epsilon * 2 * lamba
-
-    def expectation_maximisation_step(self):
-        # PR-step: optimise lambda to minimise log(z_lambda) + eps ||lambda||_2
-        self.lamba = scipy.optimize.fmin_slsqp(self.dual, self.lamba,
-                                bounds=[(0, 1e100)] * num_tags,
-                                fprime=self.dual_gradient, iprint=1)
-
-        # E,M-steps: collect expected counts under q_lambda and normalise
-        llh1 = self.pcm.expectation_maximisation_step(self.lamba)
-        llh2 = self.cpm.expectation_maximisation_step(-self.lamba)
-
-        # return the overall objective: llh - KL(q||p1.p2)
-        # llh = llh1 + llh2
-        # kl = sum q log q / p1 p2 = sum q { lambda . phi } - log Z
-        return llh1 + llh2 + self.logz(self.lamba) \
-            - dot(self.lamba, self.expected_features(self.lamba))
-
-    def logz(self, lamba):
-        lz = 0
-        for pid, cid, cnt in edges:
-            p1 = self.pcm.prob(pid, cid)
-            z1 = dot(p1 / sum(p1), exp(lamba))
-            lz += log(z1) * cnt
-
-            p2 = self.cpm.prob(pid, cid)
-            z2 = dot(p2 / sum(p2), exp(-lamba))
-            lz += log(z2) * cnt
-        return lz
-
-    def expected_features(self, lamba):
-        fs = zeros(num_tags)
-        for pid, cid, cnt in edges:
-            p1 = self.pcm.prob(pid, cid)
-            q1 = (p1 / sum(p1)) * exp(lamba)
-            fs += cnt * q1 / sum(q1)
-
-            p2 = self.cpm.prob(pid, cid)
-            q2 = (p2 / sum(p2)) * exp(-lamba)
-            fs -= cnt * q2 / sum(q2)
-        return fs
-
-
-class InterpolatedModel:
-    def __init__(self, epsilon):
-        self.pcm = PhraseToContextModel()
-        self.cpm = ContextToPhraseModel()
-        self.epsilon = epsilon
-        self.lamba = zeros(num_tags)
-
-    def prob(self, pid, cid):
-        p1 = self.pcm.prob(pid, cid)
-        p2 = self.cpm.prob(pid, cid)
-        return (p1 + p2) / 2
-
-    def dual(self, lamba):
-        return self.logz(lamba) + self.epsilon * dot(lamba, lamba) ** 0.5
-
-    def dual_gradient(self, lamba):
-        return self.expected_features(lamba) + self.epsilon * 2 * lamba
-
-    def expectation_maximisation_step(self):
-        # PR-step: optimise lambda to minimise log(z_lambda) + eps ||lambda||_2
-        self.lamba = scipy.optimize.fmin_slsqp(self.dual, self.lamba,
-                                bounds=[(0, 1e100)] * num_tags,
-                                fprime=self.dual_gradient, iprint=2)
-
-        # E,M-steps: collect expected counts under q_lambda and normalise
-        llh1 = self.pcm.expectation_maximisation_step(self.lamba)
-        llh2 = self.cpm.expectation_maximisation_step(self.lamba)
-
-        # return the overall objective: llh1 + llh2 - KL(q||p1.p2)
-        # kl = sum_y q log q / 0.5 * (p1 + p2) = sum_y q(y) { -lambda . phi(y) } - log Z
-        #    = -log Z + lambda . (E_q1[-phi] + E_q2[-phi]) / 2
-        kl = -self.logz(self.lamba) + dot(self.lamba, self.expected_features(self.lamba))
-        return llh1 + llh2 - kl, llh1, llh2, kl
-        # FIXME: KL comes out negative...
-
-    def logz(self, lamba):
-        lz = 0
-        for pid, cid, cnt in edges:
-            p1 = self.pcm.prob(pid, cid)
-            q1 = p1 / sum(p1) * exp(-lamba)
-            q1z = sum(q1)
-
-            p2 = self.cpm.prob(pid, cid)
-            q2 = p2 / sum(p2) * exp(-lamba)
-            q2z = sum(q2)
-
-            lz += log(0.5 * (q1z + q2z)) * cnt
-        return lz
-
-    # z = 1/2 * (sum_y p1(y|x) exp (-lambda . phi(y)) + sum_y p2(y|x) exp (-lambda . phi(y)))
-    #   = 1/2 (z1 + z2)
-    # d (log z) / dlambda = 1/2 (E_q1 [ -phi ] + E_q2 [ -phi ] )
-    def expected_features(self, lamba):
-        fs = zeros(num_tags)
-        for pid, cid, cnt in edges:
-            p1 = self.pcm.prob(pid, cid)
-            q1 = (p1 / sum(p1)) * exp(-lamba)
-            fs -= 0.5 * cnt * q1 / sum(q1)
-
-            p2 = self.cpm.prob(pid, cid)
-            q2 = (p2 / sum(p2)) * exp(-lamba)
-            fs -= 0.5 * cnt * q2 / sum(q2)
-        return fs
-
-if style == 'p2c':
-    m = PhraseToContextModel()
-elif style == 'c2p':
-    m = ContextToPhraseModel()
-elif style == 'prod':
-    m = ProductModel()
-elif style == 'prodslack':
-    m = RegularisedProductModel(0.5)
-elif style == 'sum':
-    m = InterpolatedModel(0.5)
-
-for iteration in range(30):
-    obj = m.expectation_maximisation_step()
-    print 'iteration', iteration, 'objective', obj
-
-for pid, cid, cnt in edges:
-    p = m.prob(pid, cid)
-    phrase = phrase_type_list[pid]
-    phrase_str = ' '.join(map(word_type_list.__getitem__, phrase))
-    context = context_type_list[cid]
-    context_str = ' '.join(map(word_type_list.__getitem__, context))
-    print '%s\t%s ||| C=%d' % (phrase_str, context_str, argmax(p))
diff --git a/gi/posterior-regularisation/train_pr_global.py b/gi/posterior-regularisation/train_pr_global.py
deleted file mode 100644
index 8521bccb..00000000
--- a/gi/posterior-regularisation/train_pr_global.py
+++ /dev/null
@@ -1,296 +0,0 @@
-import sys
-import scipy.optimize
-from numpy import *
-from numpy.random import random
-
-#
-# Step 1: load the concordance counts
-# 
-
-edges_phrase_to_context = []
-edges_context_to_phrase = []
-types = {}
-context_types = {}
-num_edges = 0
-
-for line in sys.stdin:
-    phrase, rest = line.strip().split('\t')
-    parts = rest.split('|||')
-    edges_phrase_to_context.append((phrase, []))
-    for i in range(0, len(parts), 2):
-        context, count = parts[i:i+2]
-
-        ctx = tuple(filter(lambda x: x != '<PHRASE>', context.split()))
-        cnt = int(count.strip()[2:])
-        edges_phrase_to_context[-1][1].append((ctx, cnt))
-
-        cid = context_types.get(ctx, len(context_types))
-        if cid == len(context_types):
-            context_types[ctx] = cid
-            edges_context_to_phrase.append((ctx, []))
-        edges_context_to_phrase[cid][1].append((phrase, cnt))
-
-        for token in ctx:
-            types.setdefault(token, len(types))
-        for token in phrase.split():
-            types.setdefault(token, len(types))
-
-        num_edges += 1
-
-print 'Read in', num_edges, 'edges and', len(types), 'word types'
-
-print 'edges_phrase_to_context', edges_phrase_to_context
-
-#
-# Step 2: initialise the model parameters
-#
-
-num_tags = 10
-num_types = len(types)
-num_phrases = len(edges_phrase_to_context)
-num_contexts = len(edges_context_to_phrase)
-delta = int(sys.argv[1])
-gamma = int(sys.argv[2])
-
-def normalise(a):
-    return a / float(sum(a))
-
-# Pr(tag | phrase)
-tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)]
-#tagDist = [normalise(array(range(1,num_tags+1))) for p in range(num_phrases)]
-# Pr(context at pos i = w | tag) indexed by i, tag, word
-#contextWordDist = [[normalise(array(range(1,num_types+1))) for t in range(num_tags)] for i in range(4)]
-contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)]
-# PR langrange multipliers
-lamba = zeros(2 * num_edges * num_tags)
-omega_offset = num_edges * num_tags
-lamba_index = {}
-next = 0
-for phrase, ccs in edges_phrase_to_context:
-    for context, count in ccs:
-        lamba_index[phrase,context] = next
-        next += num_tags
-#print lamba_index
-
-#
-# Step 3: expectation maximisation
-#
-
-for iteration in range(20):
-    tagCounts = [zeros(num_tags) for p in range(num_phrases)]
-    contextWordCounts = [[zeros(num_types) for t in range(num_tags)] for i in range(4)]
-
-    #print 'tagDist', tagDist
-    #print 'contextWordCounts[0][0]', contextWordCounts[0][0]
-
-    # Tune lambda
-    # dual: min log Z(lamba) s.t. lamba >= 0;
-    # sum_c lamba_pct <= delta; sum_p lamba_pct <= gamma
-    def dual(ls):
-        logz = 0
-        for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
-            for context, count in ccs:
-                conditionals = zeros(num_tags)
-                for t in range(num_tags):
-                    prob = tagDist[p][t]
-                    for i in range(4):
-                        prob *= contextWordDist[i][t][types[context[i]]]
-                    conditionals[t] = prob
-                cz = sum(conditionals)
-                conditionals /= cz
-
-                #print 'dual', phrase, context, count, 'p =', conditionals
-
-                local_z = 0
-                for t in range(num_tags):
-                    li = lamba_index[phrase,context] + t
-                    local_z += conditionals[t] * exp(-ls[li] - ls[omega_offset+li])
-                logz += log(local_z) * count
-
-        #print 'ls', ls
-        #print 'lambda', list(ls)
-        #print 'dual', logz
-        return logz
-
-    def loglikelihood():
-        llh = 0
-        for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
-            for context, count in ccs:
-                conditionals = zeros(num_tags)
-                for t in range(num_tags):
-                    prob = tagDist[p][t]
-                    for i in range(4):
-                        prob *= contextWordDist[i][t][types[context[i]]]
-                    conditionals[t] = prob
-                cz = sum(conditionals)
-                llh += log(cz) * count
-        return llh
-
-    def primal(ls):
-        # FIXME: returns negative values for KL (impossible)
-        logz = dual(ls)
-        expectations = -dual_deriv(ls)
-        kl = -logz - dot(ls, expectations)
-        llh = loglikelihood()
-
-        pt_l1linf = 0
-        for phrase, ccs in edges_phrase_to_context:
-            for t in range(num_tags):
-                best = -1e500
-                for context, count in ccs:
-                    li = lamba_index[phrase,context] + t
-                    s = expectations[li]
-                    if s > best: best = s
-                pt_l1linf += best
-
-        ct_l1linf = 0
-        for context, pcs in edges_context_to_phrase:
-            for t in range(num_tags):
-                best = -1e500
-                for phrase, count in pcs:
-                    li = omega_offset + lamba_index[phrase,context] + t
-                    s = expectations[li]
-                    if s > best: best = s
-                ct_l1linf += best
-
-        return llh, kl, pt_l1linf, ct_l1linf, llh - kl - delta * pt_l1linf - gamma * ct_l1linf
-
-    def dual_deriv(ls):
-        # d/dl log(z) = E_q[phi]
-        deriv = zeros(2 * num_edges * num_tags)
-        for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
-            for context, count in ccs:
-                conditionals = zeros(num_tags)
-                for t in range(num_tags):
-                    prob = tagDist[p][t]
-                    for i in range(4):
-                        prob *= contextWordDist[i][t][types[context[i]]]
-                    conditionals[t] = prob
-                cz = sum(conditionals)
-                conditionals /= cz
-
-                scores = zeros(num_tags)
-                for t in range(num_tags):
-                    li = lamba_index[phrase,context] + t
-                    scores[t] = conditionals[t] * exp(-ls[li] - ls[omega_offset + li])
-                local_z = sum(scores)
-
-                #print 'ddual', phrase, context, count, 'q =', scores / local_z
-
-                for t in range(num_tags):
-                    deriv[lamba_index[phrase,context] + t] -= count * scores[t] / local_z
-                    deriv[omega_offset + lamba_index[phrase,context] + t] -= count * scores[t] / local_z
-
-        #print 'ddual', list(deriv)
-        return deriv
-
-    def constraints(ls):
-        cons = zeros(num_phrases * num_tags + num_edges * num_tags)
-
-        index = 0
-        for phrase, ccs in edges_phrase_to_context:
-            for t in range(num_tags):
-                if delta > 0:
-                    total = delta
-                    for cprime, count in ccs:
-                        total -= ls[lamba_index[phrase, cprime] + t]
-                    cons[index] = total
-                index += 1
-
-        for context, pcs in edges_context_to_phrase:
-            for t in range(num_tags):
-                if gamma > 0:
-                    total = gamma
-                    for pprime, count in pcs:
-                        total -= ls[omega_offset + lamba_index[pprime, context] + t]
-                    cons[index] = total
-                index += 1
-
-        #print 'cons', cons
-        return cons
-
-    def constraints_deriv(ls):
-        cons = zeros((num_phrases * num_tags + num_edges * num_tags, 2 * num_edges * num_tags))
-
-        index = 0
-        for phrase, ccs in edges_phrase_to_context:
-            for t in range(num_tags):
-                if delta > 0:
-                    d = cons[index,:]#zeros(num_edges * num_tags)
-                    for cprime, count in ccs:
-                        d[lamba_index[phrase, cprime] + t] = -1
-                    #cons[index] = d
-                index += 1
-
-        for context, pcs in edges_context_to_phrase:
-            for t in range(num_tags):
-                if gamma > 0:
-                    d = cons[index,:]#d = zeros(num_edges * num_tags)
-                    for pprime, count in pcs:
-                        d[omega_offset + lamba_index[pprime, context] + t] = -1
-                    #cons[index] = d
-                index += 1
-        #print 'dcons', cons
-        return cons
-
-    print 'Pre lambda optimisation dual', dual(lamba), 'primal', primal(lamba)
-    #print 'lambda', lamba, lamba.shape
-    #print 'bounds', [(0, max(delta, gamma))] * (2 * num_edges * num_tags)
-
-    lamba = scipy.optimize.fmin_slsqp(dual, lamba,
-                            bounds=[(0, max(delta, gamma))] * (2 * num_edges * num_tags),
-                            f_ieqcons=constraints,
-                            fprime=dual_deriv,
-                            fprime_ieqcons=constraints_deriv,
-                            iprint=0)
-    print 'Post lambda optimisation dual', dual(lamba), 'primal', primal(lamba)
-
-    # E-step
-    llh = log_z = 0
-    for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
-        for context, count in ccs:
-            conditionals = zeros(num_tags)
-            for t in range(num_tags):
-                prob = tagDist[p][t]
-                for i in range(4):
-                    prob *= contextWordDist[i][t][types[context[i]]]
-                conditionals[t] = prob
-            cz = sum(conditionals)
-            conditionals /= cz
-            llh += log(cz) * count
-
-            q = zeros(num_tags)
-            li = lamba_index[phrase, context]
-            for t in range(num_tags):
-                q[t] = conditionals[t] * exp(-lamba[li + t] - lamba[omega_offset + li + t])
-            qz = sum(q)
-            log_z += count * log(qz)
-
-            for t in range(num_tags):
-                tagCounts[p][t] += count * q[t] / qz
-
-            for i in range(4):
-                for t in range(num_tags):
-                    contextWordCounts[i][t][types[context[i]]] += count * q[t] / qz
-
-    print 'iteration', iteration, 'llh', llh, 'logz', log_z
-
-    # M-step
-    for p in range(num_phrases):
-        tagDist[p] = normalise(tagCounts[p])
-    for i in range(4):
-        for t in range(num_tags):
-            contextWordDist[i][t] = normalise(contextWordCounts[i][t])
-
-for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
-    for context, count in ccs:
-        conditionals = zeros(num_tags)
-        for t in range(num_tags):
-            prob = tagDist[p][t]
-            for i in range(4):
-                prob *= contextWordDist[i][t][types[context[i]]]
-            conditionals[t] = prob
-        cz = sum(conditionals)
-        conditionals /= cz
-
-        print '%s\t%s ||| C=%d |||' % (phrase, context, argmax(conditionals)), conditionals
diff --git a/gi/posterior-regularisation/train_pr_parallel.py b/gi/posterior-regularisation/train_pr_parallel.py
deleted file mode 100644
index 3b9cefed..00000000
--- a/gi/posterior-regularisation/train_pr_parallel.py
+++ /dev/null
@@ -1,333 +0,0 @@
-import sys
-import scipy.optimize
-from numpy import *
-from numpy.random import random, seed
-
-#
-# Step 1: load the concordance counts
-# 
-
-edges_phrase_to_context = []
-edges_context_to_phrase = []
-types = {}
-context_types = {}
-num_edges = 0
-
-for line in sys.stdin:
-    phrase, rest = line.strip().split('\t')
-    parts = rest.split('|||')
-    edges_phrase_to_context.append((phrase, []))
-    for i in range(0, len(parts), 2):
-        context, count = parts[i:i+2]
-
-        ctx = tuple(filter(lambda x: x != '<PHRASE>', context.split()))
-        cnt = int(count.strip()[2:])
-        edges_phrase_to_context[-1][1].append((ctx, cnt))
-
-        cid = context_types.get(ctx, len(context_types))
-        if cid == len(context_types):
-            context_types[ctx] = cid
-            edges_context_to_phrase.append((ctx, []))
-        edges_context_to_phrase[cid][1].append((phrase, cnt))
-
-        for token in ctx:
-            types.setdefault(token, len(types))
-        for token in phrase.split():
-            types.setdefault(token, len(types))
-
-        num_edges += 1
-
-#
-# Step 2: initialise the model parameters
-#
-
-num_tags = 25
-num_types = len(types)
-num_phrases = len(edges_phrase_to_context)
-num_contexts = len(edges_context_to_phrase)
-delta = float(sys.argv[1])
-assert sys.argv[2] in ('local', 'global')
-local = sys.argv[2] == 'local'
-if len(sys.argv) >= 2:
-     seed(int(sys.argv[3]))
-
-print 'Read in', num_edges, 'edges', num_phrases, 'phrases', num_contexts, 'contexts and', len(types), 'word types'
-
-def normalise(a):
-    return a / float(sum(a))
-
-# Pr(tag | phrase)
-tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)]
-# Pr(context at pos i = w | tag) indexed by i, tag, word
-contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)]
-
-#
-# Step 3: expectation maximisation
-#
-
-class GlobalDualObjective:
-    """
-    Objective, log(z), for all phrases s.t. lambda >= 0, sum_c lambda_pct <= scale 
-    """
-
-    def __init__(self, scale):
-        self.scale = scale
-        self.posterior = zeros((num_edges, num_tags))
-        self.q = zeros((num_edges, num_tags))
-        self.llh = 0
-
-        index = 0
-        for j, (phrase, edges) in enumerate(edges_phrase_to_context):
-            for context, count in edges:
-                for t in range(num_tags):
-                    prob = tagDist[j][t]
-                    for k, token in enumerate(context):
-                        prob *= contextWordDist[k][t][types[token]]
-                    self.posterior[index,t] = prob
-                z = sum(self.posterior[index,:])
-                self.posterior[index,:] /= z
-                self.llh += log(z) * count
-                index += 1
-
-    def objective(self, ls):
-        ls = ls.reshape((num_edges, num_tags))
-        logz = 0
-
-        index = 0
-        for j, (phrase, edges) in enumerate(edges_phrase_to_context):
-            for context, count in edges:
-                for t in range(num_tags):
-                    self.q[index,t] = self.posterior[index,t] * exp(-ls[index,t])
-                local_z = sum(self.q[index,:])
-                self.q[index,:] /= local_z
-                logz += log(local_z) * count
-                index += 1
-
-        return logz
-
-    # FIXME: recomputes q many more times than necessary
-
-    def gradient(self, ls):
-        ls = ls.reshape((num_edges, num_tags))
-        gradient = zeros((num_edges, num_tags))
-
-        index = 0
-        for j, (phrase, edges) in enumerate(edges_phrase_to_context):
-            for context, count in edges:
-                for t in range(num_tags):
-                    self.q[index,t] = self.posterior[index,t] * exp(-ls[index,t])
-                local_z = sum(self.q[index,:])
-                self.q[index,:] /= local_z
-                for t in range(num_tags):
-                    gradient[index,t] -= self.q[index,t] * count
-                index += 1
-
-        return gradient.ravel()
-
-    def constraints(self, ls):
-        ls = ls.reshape((num_edges, num_tags))
-        cons = ones((num_phrases, num_tags)) * self.scale
-        index = 0
-        for j, (phrase, edges) in enumerate(edges_phrase_to_context):
-            for i, (context, count) in enumerate(edges):
-                for t in range(num_tags):
-                    cons[j,t] -= ls[index,t] * count
-                index += 1
-        return cons.ravel()
-
-    def constraints_gradient(self, ls):
-        ls = ls.reshape((num_edges, num_tags))
-        gradient = zeros((num_phrases, num_tags, num_edges, num_tags))
-        index = 0
-        for j, (phrase, edges) in enumerate(edges_phrase_to_context):
-            for i, (context, count) in enumerate(edges):
-                for t in range(num_tags):
-                    gradient[j,t,index,t] -= count
-                index += 1
-        return gradient.reshape((num_phrases*num_tags, num_edges*num_tags))
-
-    def optimize(self):
-        ls = zeros(num_edges * num_tags)
-        #print '\tpre lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba)
-        ls = scipy.optimize.fmin_slsqp(self.objective, ls,
-                                bounds=[(0, self.scale)] * num_edges * num_tags,
-                                f_ieqcons=self.constraints,
-                                fprime=self.gradient,
-                                fprime_ieqcons=self.constraints_gradient,
-                                iprint=0) # =2 for verbose
-        #print '\tpost lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba)
-
-        # returns llh, kl and l1lmax contribution
-        l1lmax = 0
-        index = 0
-        for j, (phrase, edges) in enumerate(edges_phrase_to_context):
-            for t in range(num_tags):
-                lmax = None
-                for i, (context, count) in enumerate(edges):
-                    lmax = max(lmax, self.q[index+i,t])
-                l1lmax += lmax
-            index += len(edges)
-
-        return self.llh, -self.objective(ls) + dot(ls, self.gradient(ls)), l1lmax
-
-class LocalDualObjective:
-    """
-    Local part of objective, log(z) relevant to lambda_p**.
-    Optimised subject to lambda >= 0, sum_c lambda_pct <= scale forall t 
-    """
-
-    def __init__(self, phraseId, scale):
-        self.phraseId = phraseId
-        self.scale = scale
-        edges = edges_phrase_to_context[self.phraseId][1]
-        self.posterior = zeros((len(edges), num_tags))
-        self.q = zeros((len(edges), num_tags))
-        self.llh = 0
-
-        for i, (context, count) in enumerate(edges):
-            for t in range(num_tags):
-                prob = tagDist[phraseId][t]
-                for j, token in enumerate(context):
-                    prob *= contextWordDist[j][t][types[token]]
-                self.posterior[i,t] = prob
-            z = sum(self.posterior[i,:])
-            self.posterior[i,:] /= z
-            self.llh += log(z) * count
-
-    def objective(self, ls):
-        edges = edges_phrase_to_context[self.phraseId][1]
-        ls = ls.reshape((len(edges), num_tags))
-        logz = 0
-
-        for i, (context, count) in enumerate(edges):
-            for t in range(num_tags):
-                self.q[i,t] = self.posterior[i,t] * exp(-ls[i,t])
-            local_z = sum(self.q[i,:])
-            self.q[i,:] /= local_z
-            logz += log(local_z) * count
-
-        return logz
-
-    # FIXME: recomputes q many more times than necessary
-
-    def gradient(self, ls):
-        edges = edges_phrase_to_context[self.phraseId][1]
-        ls = ls.reshape((len(edges), num_tags))
-        gradient = zeros((len(edges), num_tags))
-
-        for i, (context, count) in enumerate(edges):
-            for t in range(num_tags):
-                self.q[i,t] = self.posterior[i,t] * exp(-ls[i,t])
-            local_z = sum(self.q[i,:])
-            self.q[i,:] /= local_z
-            for t in range(num_tags):
-                gradient[i,t] -= self.q[i,t] * count
-
-        return gradient.ravel()
-
-    def constraints(self, ls):
-        edges = edges_phrase_to_context[self.phraseId][1]
-        ls = ls.reshape((len(edges), num_tags))
-        cons = ones(num_tags) * self.scale
-        for t in range(num_tags):
-            for i, (context, count) in enumerate(edges):
-                cons[t] -= ls[i,t] * count
-        return cons
-
-    def constraints_gradient(self, ls):
-        edges = edges_phrase_to_context[self.phraseId][1]
-        ls = ls.reshape((len(edges), num_tags))
-        gradient = zeros((num_tags, len(edges), num_tags))
-        for t in range(num_tags):
-            for i, (context, count) in enumerate(edges):
-                gradient[t,i,t] -= count
-        return gradient.reshape((num_tags, len(edges)*num_tags))
-
-    def optimize(self, ls=None):
-        edges = edges_phrase_to_context[self.phraseId][1]
-        if ls == None:
-            ls = zeros(len(edges) * num_tags)
-        #print '\tpre lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba)
-        ls = scipy.optimize.fmin_slsqp(self.objective, ls,
-                                bounds=[(0, self.scale)] * len(edges) * num_tags,
-                                f_ieqcons=self.constraints,
-                                fprime=self.gradient,
-                                fprime_ieqcons=self.constraints_gradient,
-                                iprint=0) # =2 for verbose
-        #print '\tlambda', list(ls)
-        #print '\tpost lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba)
-
-        # returns llh, kl and l1lmax contribution
-        l1lmax = 0
-        for t in range(num_tags):
-            lmax = None
-            for i, (context, count) in enumerate(edges):
-                lmax = max(lmax, self.q[i,t])
-            l1lmax += lmax
-
-        return self.llh, -self.objective(ls) + dot(ls, self.gradient(ls)), l1lmax, ls
-
-ls = [None] * num_phrases
-for iteration in range(20):
-    tagCounts = [zeros(num_tags) for p in range(num_phrases)]
-    contextWordCounts = [[zeros(num_types) for t in range(num_tags)] for i in range(4)]
-
-    # E-step
-    llh = kl = l1lmax = 0
-    if local:
-        for p in range(num_phrases):
-            o = LocalDualObjective(p, delta)
-            #print '\toptimising lambda for phrase', p, '=', edges_phrase_to_context[p][0]
-            #print '\toptimising lambda for phrase', p, 'ls', ls[p]
-            obj = o.optimize(ls[p])
-            #print '\tphrase', p, 'deltas', obj
-            llh += obj[0]
-            kl += obj[1]
-            l1lmax += obj[2]
-            ls[p] = obj[3]
-
-            edges = edges_phrase_to_context[p][1]
-            for j, (context, count) in enumerate(edges):
-                for t in range(num_tags):
-                    tagCounts[p][t] += count * o.q[j,t]
-                for i in range(4):
-                    for t in range(num_tags):
-                        contextWordCounts[i][t][types[context[i]]] += count * o.q[j,t]
-
-        #print 'iteration', iteration, 'LOCAL objective', (llh + kl + delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax
-    else:
-        o = GlobalDualObjective(delta)
-        obj = o.optimize()
-        llh, kl, l1lmax = o.optimize()
-
-        index = 0
-        for p, (phrase, edges) in enumerate(edges_phrase_to_context):
-            for context, count in edges:
-                for t in range(num_tags):
-                    tagCounts[p][t] += count * o.q[index,t]
-                for i in range(4):
-                    for t in range(num_tags):
-                        contextWordCounts[i][t][types[context[i]]] += count * o.q[index,t]
-                index += 1
-
-    print 'iteration', iteration, 'objective', (llh - kl - delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax
-
-    # M-step
-    for p in range(num_phrases):
-        tagDist[p] = normalise(tagCounts[p])
-    for i in range(4):
-        for t in range(num_tags):
-            contextWordDist[i][t] = normalise(contextWordCounts[i][t])
-
-for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
-    for context, count in ccs:
-        conditionals = zeros(num_tags)
-        for t in range(num_tags):
-            prob = tagDist[p][t]
-            for i in range(4):
-                prob *= contextWordDist[i][t][types[context[i]]]
-            conditionals[t] = prob
-        cz = sum(conditionals)
-        conditionals /= cz
-
-        print '%s\t%s ||| C=%d |||' % (phrase, context, argmax(conditionals)), conditionals
author	Kenneth Heafield <github@kheafield.com>	2012-10-22 12:07:20 +0100
committer	Kenneth Heafield <github@kheafield.com>	2012-10-22 12:07:20 +0100
commit	5f98fe5c4f2a2090eeb9d30c030305a70a8347d1 (patch)
tree	9b6002f850e6dea1e3400c6b19bb31a9cdf3067f /gi/posterior-regularisation
parent	cf9994131993b40be62e90e213b1e11e6b550143 (diff)
parent	21825a09d97c2e0afd20512f306fb25fed55e529 (diff)