Merge remote branch 'upstream/master'

Conflicts: Jamroot bjam decoder/Jamfile decoder/cdec.cc dpmert/Jamfile jam-files/sanity.jam klm/lm/Jamfile klm/util/Jamfile mira/Jamfile
author: Kenneth Heafield <github@kheafield.com> 2012-10-22 12:07:20 +0100
committer: Kenneth Heafield <github@kheafield.com> 2012-10-22 12:07:20 +0100
commit: 5f98fe5c4f2a2090eeb9d30c030305a70a8347d1 (patch)
tree: 9b6002f850e6dea1e3400c6b19bb31a9cdf3067f /gi/posterior-regularisation/PhraseContextModel.java
parent: cf9994131993b40be62e90e213b1e11e6b550143 (diff)
parent: 21825a09d97c2e0afd20512f306fb25fed55e529 (diff)
1 files changed, 0 insertions, 466 deletions
diff --git a/gi/posterior-regularisation/PhraseContextModel.java b/gi/posterior-regularisation/PhraseContextModel.java
deleted file mode 100644
index 85bcfb89..00000000
--- a/gi/posterior-regularisation/PhraseContextModel.java
+++ /dev/null
@@ -1,466 +0,0 @@
-// Input of the form:
-// " the phantom of the opera "    tickets for <PHRASE> tonight ? ||| C=1 ||| seats for <PHRASE> ? </s> ||| C=1 ||| i see <PHRASE> ? </s> ||| C=1
-//                      phrase TAB [context]+
-// where    context =   phrase ||| C=...        which are separated by |||
-
-// Model parameterised as follows:
-// - each phrase, p, is allocated a latent state, t
-// - this is used to generate the contexts, c
-// - each context is generated using 4 independent multinomials, one for each position LL, L, R, RR
-
-// Training with EM:
-// - e-step is estimating q(t) = P(t|p,c) for all x,c
-// - m-step is estimating model parameters P(c,t|p) = P(t) P(c|t)
-// - PR uses alternate e-step, which first optimizes lambda 
-//      min_q KL(q||p) + delta sum_pt max_c E_q[phi_ptc]
-//   where
-//      q(t|p,c) propto p(t,c|p) exp( -phi_ptc )
-//   Then q is used to obtain expectations for vanilla M-step.
-
-// Sexing it up:
-// - learn p-specific conditionals P(t|p)
-// - or generate phrase internals, e.g., generate edge words from
-//   different distribution to central words
-// - agreement between phrase->context model and context->phrase model
-
-import java.io.*;
-import optimization.gradientBasedMethods.*;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.gradientBasedMethods.stats.ProjectedOptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.GenericPickFirstStep;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.linesearch.WolfRuleLineSearch;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.NormalizedProjectedGradientL2Norm;
-import optimization.stopCriteria.NormalizedValueDifference;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-import optimization.util.MathUtils;
-import java.util.*;
-import java.util.regex.*;
-import gnu.trove.TDoubleArrayList;
-import gnu.trove.TIntArrayList;
-import static java.lang.Math.*;
-
-class PhraseContextModel
-{
-	// model/optimisation configuration parameters
-	int numTags;
-	boolean posteriorRegularisation = true;
-	double constraintScale = 3; // FIXME: make configurable
-	
-	// copied from L1LMax in depparsing code
-	final double c1= 0.0001, c2=0.9, stoppingPrecision = 1e-5, maxStep = 10;
-	final int maxZoomEvals = 10, maxExtrapolationIters = 200;
-	int maxProjectionIterations = 200;
-	int minOccurrencesForProjection = 0;
-
-	// book keeping
-	int numPositions;
-	Random rng = new Random();
-
-	// training set
-	Corpus training;
-
-	// model parameters (learnt)
-	double emissions[][][]; // position in 0 .. 3 x tag x word Pr(word | tag, position)
-	double prior[][]; // phrase x tag Pr(tag | phrase)
-	double lambda[]; // edge = (phrase, context) x tag flattened lagrange multipliers
-
-	PhraseContextModel(Corpus training, int tags)
-	{
-		this.training = training;
-		this.numTags = tags;
-		assert (!training.getEdges().isEmpty());
-		assert (numTags > 1);
-
-		// now initialise emissions
-		numPositions = training.getEdges().get(0).getContext().size();
-		assert (numPositions > 0);
-
-		emissions = new double[numPositions][numTags][training.getNumTokens()];
-		prior = new double[training.getNumEdges()][numTags];
-		if (posteriorRegularisation)
-			lambda = new double[training.getNumEdges() * numTags];
-
-		for (double[][] emissionTW : emissions)
-		{
-			for (double[] emissionW : emissionTW)
-			{
-				randomise(emissionW);
-//				for (int i = 0; i < emissionW.length; ++i)
-//					emissionW[i] = i+1;
-//				normalise(emissionW);
-			}
-		}
-					
-		for (double[] priorTag : prior)
-		{
-			randomise(priorTag);
-//			for (int i = 0; i < priorTag.length; ++i)
-//				priorTag[i] = i+1;
-//			normalise(priorTag);
-		}
-	}
-
-	void expectationMaximisation(int numIterations)
-	{
-		double lastLlh = Double.NEGATIVE_INFINITY;
-
-		for (int iteration = 0; iteration < numIterations; ++iteration)
-		{
-			double emissionsCounts[][][] = new double[numPositions][numTags][training.getNumTokens()];
-			double priorCounts[][] = new double[training.getNumPhrases()][numTags];
-			
-			// E-step
-			double llh = 0;
-			if (posteriorRegularisation)
-			{
-				EStepDualObjective objective = new EStepDualObjective();
-				
-				// copied from x2y2withconstraints
-//				LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1));				
-//				OptimizerStats stats = new OptimizerStats();
-//				ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
-//				CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
-//				compositeStop.add(new ProjectedGradientL2Norm(0.001));
-//				compositeStop.add(new ValueDifference(0.001));
-//				optimizer.setMaxIterations(50);
-//				boolean succeed = optimizer.optimize(objective,stats,compositeStop);
-				
-				// copied from depparser l1lmaxobjective
-				ProjectedOptimizerStats stats = new ProjectedOptimizerStats();
-				GenericPickFirstStep pickFirstStep = new GenericPickFirstStep(1);
-				LineSearchMethod linesearch = new WolfRuleLineSearch(pickFirstStep, c1, c2);
-				ProjectedGradientDescent optimizer = new ProjectedGradientDescent(linesearch);
-				optimizer.setMaxIterations(maxProjectionIterations);
-		        CompositeStopingCriteria stop = new CompositeStopingCriteria();
-		        stop.add(new NormalizedProjectedGradientL2Norm(stoppingPrecision));
-		        stop.add(new NormalizedValueDifference(stoppingPrecision));
-		        boolean succeed = optimizer.optimize(objective, stats, stop);
-
-				System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
-				//System.out.println("Solution: " + objective.parameters);
-				if (!succeed)
-					System.out.println("Failed to optimize");
-				//System.out.println("Ended optimization in " + optimizer.getCurrentIteration());				
-
-				//lambda = objective.getParameters();
-				llh = objective.primal();
-				
-				for (int i = 0; i < training.getNumPhrases(); ++i)
-				{
-					List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-					for (int j = 0; j < edges.size(); ++j)
-					{
-						Corpus.Edge e = edges.get(j);
-						for (int t = 0; t < numTags; t++)
-						{
-							double p = objective.q.get(i).get(j).get(t);
-							priorCounts[i][t] += e.getCount() * p;
-							TIntArrayList tokens = e.getContext();
-							for (int k = 0; k < tokens.size(); ++k)
-								emissionsCounts[k][t][tokens.get(k)] += e.getCount() * p;
-						}
-					}
-				}
-			}
-			else
-			{
-				for (int i = 0; i < training.getNumPhrases(); ++i)
-				{
-					List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-					for (int j = 0; j < edges.size(); ++j)
-					{
-						Corpus.Edge e = edges.get(j);
-						double probs[] = posterior(i, e);			
-						double z = normalise(probs);
-						llh += log(z) * e.getCount();
-						
-						TIntArrayList tokens = e.getContext();
-						for (int t = 0; t < numTags; ++t)
-						{
-							priorCounts[i][t] += e.getCount() * probs[t];
-							for (int k = 0; k < tokens.size(); ++k)
-								emissionsCounts[j][t][tokens.get(k)] += e.getCount() * probs[t];
-						}
-					}
-				}
-			}
-
-			// M-step: normalise
-			for (double[][] emissionTW : emissionsCounts)
-				for (double[] emissionW : emissionTW)
-					normalise(emissionW);
-
-			for (double[] priorTag : priorCounts)
-				normalise(priorTag);
-
-			emissions = emissionsCounts;
-			prior = priorCounts;
-
-			System.out.println("Iteration " + iteration + " llh " + llh);
-
-//			if (llh - lastLlh < 1e-4)
-//				break;
-//			else
-//				lastLlh = llh;
-		}
-	}
-
-	static double normalise(double probs[])
-	{
-		double z = 0;
-		for (double p : probs)
-			z += p;
-		for (int i = 0; i < probs.length; ++i)
-			probs[i] /= z;
-		return z;
-	}
-
-	void randomise(double probs[])
-	{
-		double z = 0;
-		for (int i = 0; i < probs.length; ++i)
-		{
-			probs[i] = 10 + rng.nextDouble();
-			z += probs[i];
-		}
-
-		for (int i = 0; i < probs.length; ++i)
-			probs[i] /= z;
-	}
-
-	static int argmax(double probs[])
-	{
-		double m = Double.NEGATIVE_INFINITY;
-		int mi = -1;
-		for (int i = 0; i < probs.length; ++i)
-		{
-			if (probs[i] > m)
-			{
-				m = probs[i];
-				mi = i;
-			}
-		}
-		return mi;
-	}
-
-	double[] posterior(int phraseId, Corpus.Edge e) // unnormalised
-	{
-		double probs[] = new double[numTags];
-		TIntArrayList tokens = e.getContext();
-		for (int t = 0; t < numTags; ++t)
-		{
-			probs[t] = prior[phraseId][t];
-			for (int k = 0; k < tokens.size(); ++k)
-				probs[t] *= emissions[k][t][tokens.get(k)];
-		}
-		return probs;
-	}
-
-	void displayPosterior()
-	{
-		for (int i = 0; i < training.getNumPhrases(); ++i)
-		{
-			List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-			for (Corpus.Edge e: edges)
-			{
-				double probs[] = posterior(i, e);
-				normalise(probs);
-
-				// emit phrase
-				System.out.print(e.getPhraseString());
-				System.out.print("\t");
-				System.out.print(e.getContextString());
-				System.out.print("||| C=" + e.getCount() + " |||");
-
-				int t = argmax(probs);
-				System.out.print(" " + t + " ||| " + probs[t]);
-				// for (int t = 0; t < numTags; ++t)
-				// System.out.print(" " + probs[t]);
-				System.out.println();
-			}
-		}
-	}
-
-	public static void main(String[] args)
-	{
-		assert (args.length >= 2);
-		try
-		{
-			Corpus corpus = Corpus.readFromFile(new FileReader(new File(args[0])));
-			PhraseContextModel model = new PhraseContextModel(corpus, Integer.parseInt(args[1]));
-			model.expectationMaximisation(Integer.parseInt(args[2]));
-			model.displayPosterior();
-		} 
-		catch (IOException e)
-		{
-			System.out.println("Failed to read input file: " + args[0]);
-			e.printStackTrace();
-		}
-	}
-
-	class EStepDualObjective extends ProjectedObjective
-	{
-		List<List<TDoubleArrayList>> conditionals; // phrase id x context # x tag - precomputed
-		List<List<TDoubleArrayList>> q; // ditto, but including exp(-lambda) terms
-		double objective = 0; // log(z)
-		// Objective.gradient = d log(z) / d lambda = E_q[phi]
-		double llh = 0;
-
-		public EStepDualObjective()
-		{
-			super();
-			// compute conditionals p(context, tag | phrase) for all training instances
-			conditionals = new ArrayList<List<TDoubleArrayList>>(training.getNumPhrases());
-			q = new ArrayList<List<TDoubleArrayList>>(training.getNumPhrases());
-			for (int i = 0; i < training.getNumPhrases(); ++i)
-			{
-				List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-
-				conditionals.add(new ArrayList<TDoubleArrayList>(edges.size()));
-				q.add(new ArrayList<TDoubleArrayList>(edges.size()));
-
-				for (int j = 0; j < edges.size(); ++j)
-				{
-					Corpus.Edge e = edges.get(j);
-					double probs[] = posterior(i, e);
-					double z = normalise(probs);
-					llh += log(z) * e.getCount();
-					conditionals.get(i).add(new TDoubleArrayList(probs));
-					q.get(i).add(new TDoubleArrayList(probs));
-				}
-			}
-			
-			gradient = new double[training.getNumEdges()*numTags];
-			setInitialParameters(lambda);
-			computeObjectiveAndGradient();
-		}
-
-		@Override
-		public double[] projectPoint(double[] point)
-		{
-			SimplexProjection p = new SimplexProjection(constraintScale);
-
-			double[] newPoint = point.clone();
-			int edgeIndex = 0;
-			for (int i = 0; i < training.getNumPhrases(); ++i)
-			{
-				List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-
-				for (int t = 0; t < numTags; t++)
-				{
-					double[] subPoint = new double[edges.size()];
-					for (int j = 0; j < edges.size(); ++j)
-						subPoint[j] = point[edgeIndex+j*numTags+t];
-				
-					p.project(subPoint);
-					for (int j = 0; j < edges.size(); ++j)
-						newPoint[edgeIndex+j*numTags+t] = subPoint[j];
-				}
-				
-				edgeIndex += edges.size() * numTags;
-			}
-//			System.out.println("Proj from: " + Arrays.toString(point)); 
-//			System.out.println("Proj to:   " + Arrays.toString(newPoint)); 
-			return newPoint;
-		}
-
-		@Override
-		public void setParameters(double[] params)
-		{
-			super.setParameters(params);
-			computeObjectiveAndGradient();
-		}
-
-		@Override
-		public double[] getGradient()
-		{
-			gradientCalls += 1;
-			return gradient;
-		}
-
-		@Override
-		public double getValue()
-		{
-			functionCalls += 1;
-			return objective;
-		}
-
-		public void computeObjectiveAndGradient()
-		{
-			int edgeIndex = 0;
-			objective = 0;
-			Arrays.fill(gradient, 0);
-			for (int i = 0; i < training.getNumPhrases(); ++i)
-			{
-				List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-
-				for (int j = 0; j < edges.size(); ++j)
-				{
-					Corpus.Edge e = edges.get(j);
-					
-					double z = 0;
-					for (int t = 0; t < numTags; t++)
-					{
-						double v = conditionals.get(i).get(j).get(t) * exp(-parameters[edgeIndex+t]);
-						q.get(i).get(j).set(t, v);
-						z += v;
-					}
-					objective += log(z) * e.getCount();
-
-					for (int t = 0; t < numTags; t++)
-					{
-						double v = q.get(i).get(j).get(t) / z; 
-						q.get(i).get(j).set(t, v);
-						gradient[edgeIndex+t] -= e.getCount() * v;
-					}
-					
-					edgeIndex += numTags;
-				}
-			}			
-//			System.out.println("computeObjectiveAndGradient logz=" + objective);
-//			System.out.println("lambda=  " + Arrays.toString(parameters));
-//			System.out.println("gradient=" + Arrays.toString(gradient));
-		}
-
-		public String toString()
-		{
-			StringBuilder sb = new StringBuilder();
-			sb.append(getClass().getCanonicalName()).append(" with ");
-			sb.append(parameters.length).append(" parameters and ");
-			sb.append(training.getNumPhrases() * numTags).append(" constraints");
-			return sb.toString();
-		}
-				
-		double primal()
-		{
-			// primal = llh + KL(q||p) + scale * sum_pt max_c E_q[phi_pct]
-			// kl = sum_Y q(Y) log q(Y) / p(Y|X)
-			//    = sum_Y q(Y) { -lambda . phi(Y) - log Z }
-			//    = -log Z - lambda . E_q[phi]
-			//    = -objective + lambda . gradient
-			
-			double kl = -objective + MathUtils.dotProduct(parameters, gradient);
-			double l1lmax = 0;
-			for (int i = 0; i < training.getNumPhrases(); ++i)
-			{
-				List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-				for (int t = 0; t < numTags; t++)
-				{
-					double lmax = Double.NEGATIVE_INFINITY;
-					for (int j = 0; j < edges.size(); ++j)
-						lmax = max(lmax, q.get(i).get(j).get(t));
-					l1lmax += lmax;
-				}
-			}
-
-			return llh + kl + constraintScale * l1lmax;
-		}
-	}
-}
author	Kenneth Heafield <github@kheafield.com>	2012-10-22 12:07:20 +0100
committer	Kenneth Heafield <github@kheafield.com>	2012-10-22 12:07:20 +0100
commit	5f98fe5c4f2a2090eeb9d30c030305a70a8347d1 (patch)
tree	9b6002f850e6dea1e3400c6b19bb31a9cdf3067f /gi/posterior-regularisation/PhraseContextModel.java
parent	cf9994131993b40be62e90e213b1e11e6b550143 (diff)
parent	21825a09d97c2e0afd20512f306fb25fed55e529 (diff)