summaryrefslogtreecommitdiff
path: root/gi/posterior-regularisation/PhraseContextModel.java
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2012-10-22 12:07:20 +0100
committerKenneth Heafield <github@kheafield.com>2012-10-22 12:07:20 +0100
commit5f98fe5c4f2a2090eeb9d30c030305a70a8347d1 (patch)
tree9b6002f850e6dea1e3400c6b19bb31a9cdf3067f /gi/posterior-regularisation/PhraseContextModel.java
parentcf9994131993b40be62e90e213b1e11e6b550143 (diff)
parent21825a09d97c2e0afd20512f306fb25fed55e529 (diff)
Merge remote branch 'upstream/master'
Conflicts: Jamroot bjam decoder/Jamfile decoder/cdec.cc dpmert/Jamfile jam-files/sanity.jam klm/lm/Jamfile klm/util/Jamfile mira/Jamfile
Diffstat (limited to 'gi/posterior-regularisation/PhraseContextModel.java')
-rw-r--r--gi/posterior-regularisation/PhraseContextModel.java466
1 files changed, 0 insertions, 466 deletions
diff --git a/gi/posterior-regularisation/PhraseContextModel.java b/gi/posterior-regularisation/PhraseContextModel.java
deleted file mode 100644
index 85bcfb89..00000000
--- a/gi/posterior-regularisation/PhraseContextModel.java
+++ /dev/null
@@ -1,466 +0,0 @@
-// Input of the form:
-// " the phantom of the opera " tickets for <PHRASE> tonight ? ||| C=1 ||| seats for <PHRASE> ? </s> ||| C=1 ||| i see <PHRASE> ? </s> ||| C=1
-// phrase TAB [context]+
-// where context = phrase ||| C=... which are separated by |||
-
-// Model parameterised as follows:
-// - each phrase, p, is allocated a latent state, t
-// - this is used to generate the contexts, c
-// - each context is generated using 4 independent multinomials, one for each position LL, L, R, RR
-
-// Training with EM:
-// - e-step is estimating q(t) = P(t|p,c) for all x,c
-// - m-step is estimating model parameters P(c,t|p) = P(t) P(c|t)
-// - PR uses alternate e-step, which first optimizes lambda
-// min_q KL(q||p) + delta sum_pt max_c E_q[phi_ptc]
-// where
-// q(t|p,c) propto p(t,c|p) exp( -phi_ptc )
-// Then q is used to obtain expectations for vanilla M-step.
-
-// Sexing it up:
-// - learn p-specific conditionals P(t|p)
-// - or generate phrase internals, e.g., generate edge words from
-// different distribution to central words
-// - agreement between phrase->context model and context->phrase model
-
-import java.io.*;
-import optimization.gradientBasedMethods.*;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.gradientBasedMethods.stats.ProjectedOptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.GenericPickFirstStep;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.linesearch.WolfRuleLineSearch;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.NormalizedProjectedGradientL2Norm;
-import optimization.stopCriteria.NormalizedValueDifference;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-import optimization.util.MathUtils;
-import java.util.*;
-import java.util.regex.*;
-import gnu.trove.TDoubleArrayList;
-import gnu.trove.TIntArrayList;
-import static java.lang.Math.*;
-
-class PhraseContextModel
-{
- // model/optimisation configuration parameters
- int numTags;
- boolean posteriorRegularisation = true;
- double constraintScale = 3; // FIXME: make configurable
-
- // copied from L1LMax in depparsing code
- final double c1= 0.0001, c2=0.9, stoppingPrecision = 1e-5, maxStep = 10;
- final int maxZoomEvals = 10, maxExtrapolationIters = 200;
- int maxProjectionIterations = 200;
- int minOccurrencesForProjection = 0;
-
- // book keeping
- int numPositions;
- Random rng = new Random();
-
- // training set
- Corpus training;
-
- // model parameters (learnt)
- double emissions[][][]; // position in 0 .. 3 x tag x word Pr(word | tag, position)
- double prior[][]; // phrase x tag Pr(tag | phrase)
- double lambda[]; // edge = (phrase, context) x tag flattened lagrange multipliers
-
- PhraseContextModel(Corpus training, int tags)
- {
- this.training = training;
- this.numTags = tags;
- assert (!training.getEdges().isEmpty());
- assert (numTags > 1);
-
- // now initialise emissions
- numPositions = training.getEdges().get(0).getContext().size();
- assert (numPositions > 0);
-
- emissions = new double[numPositions][numTags][training.getNumTokens()];
- prior = new double[training.getNumEdges()][numTags];
- if (posteriorRegularisation)
- lambda = new double[training.getNumEdges() * numTags];
-
- for (double[][] emissionTW : emissions)
- {
- for (double[] emissionW : emissionTW)
- {
- randomise(emissionW);
-// for (int i = 0; i < emissionW.length; ++i)
-// emissionW[i] = i+1;
-// normalise(emissionW);
- }
- }
-
- for (double[] priorTag : prior)
- {
- randomise(priorTag);
-// for (int i = 0; i < priorTag.length; ++i)
-// priorTag[i] = i+1;
-// normalise(priorTag);
- }
- }
-
- void expectationMaximisation(int numIterations)
- {
- double lastLlh = Double.NEGATIVE_INFINITY;
-
- for (int iteration = 0; iteration < numIterations; ++iteration)
- {
- double emissionsCounts[][][] = new double[numPositions][numTags][training.getNumTokens()];
- double priorCounts[][] = new double[training.getNumPhrases()][numTags];
-
- // E-step
- double llh = 0;
- if (posteriorRegularisation)
- {
- EStepDualObjective objective = new EStepDualObjective();
-
- // copied from x2y2withconstraints
-// LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1));
-// OptimizerStats stats = new OptimizerStats();
-// ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
-// CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
-// compositeStop.add(new ProjectedGradientL2Norm(0.001));
-// compositeStop.add(new ValueDifference(0.001));
-// optimizer.setMaxIterations(50);
-// boolean succeed = optimizer.optimize(objective,stats,compositeStop);
-
- // copied from depparser l1lmaxobjective
- ProjectedOptimizerStats stats = new ProjectedOptimizerStats();
- GenericPickFirstStep pickFirstStep = new GenericPickFirstStep(1);
- LineSearchMethod linesearch = new WolfRuleLineSearch(pickFirstStep, c1, c2);
- ProjectedGradientDescent optimizer = new ProjectedGradientDescent(linesearch);
- optimizer.setMaxIterations(maxProjectionIterations);
- CompositeStopingCriteria stop = new CompositeStopingCriteria();
- stop.add(new NormalizedProjectedGradientL2Norm(stoppingPrecision));
- stop.add(new NormalizedValueDifference(stoppingPrecision));
- boolean succeed = optimizer.optimize(objective, stats, stop);
-
- System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
- //System.out.println("Solution: " + objective.parameters);
- if (!succeed)
- System.out.println("Failed to optimize");
- //System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
-
- //lambda = objective.getParameters();
- llh = objective.primal();
-
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
- for (int j = 0; j < edges.size(); ++j)
- {
- Corpus.Edge e = edges.get(j);
- for (int t = 0; t < numTags; t++)
- {
- double p = objective.q.get(i).get(j).get(t);
- priorCounts[i][t] += e.getCount() * p;
- TIntArrayList tokens = e.getContext();
- for (int k = 0; k < tokens.size(); ++k)
- emissionsCounts[k][t][tokens.get(k)] += e.getCount() * p;
- }
- }
- }
- }
- else
- {
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
- for (int j = 0; j < edges.size(); ++j)
- {
- Corpus.Edge e = edges.get(j);
- double probs[] = posterior(i, e);
- double z = normalise(probs);
- llh += log(z) * e.getCount();
-
- TIntArrayList tokens = e.getContext();
- for (int t = 0; t < numTags; ++t)
- {
- priorCounts[i][t] += e.getCount() * probs[t];
- for (int k = 0; k < tokens.size(); ++k)
- emissionsCounts[j][t][tokens.get(k)] += e.getCount() * probs[t];
- }
- }
- }
- }
-
- // M-step: normalise
- for (double[][] emissionTW : emissionsCounts)
- for (double[] emissionW : emissionTW)
- normalise(emissionW);
-
- for (double[] priorTag : priorCounts)
- normalise(priorTag);
-
- emissions = emissionsCounts;
- prior = priorCounts;
-
- System.out.println("Iteration " + iteration + " llh " + llh);
-
-// if (llh - lastLlh < 1e-4)
-// break;
-// else
-// lastLlh = llh;
- }
- }
-
- static double normalise(double probs[])
- {
- double z = 0;
- for (double p : probs)
- z += p;
- for (int i = 0; i < probs.length; ++i)
- probs[i] /= z;
- return z;
- }
-
- void randomise(double probs[])
- {
- double z = 0;
- for (int i = 0; i < probs.length; ++i)
- {
- probs[i] = 10 + rng.nextDouble();
- z += probs[i];
- }
-
- for (int i = 0; i < probs.length; ++i)
- probs[i] /= z;
- }
-
- static int argmax(double probs[])
- {
- double m = Double.NEGATIVE_INFINITY;
- int mi = -1;
- for (int i = 0; i < probs.length; ++i)
- {
- if (probs[i] > m)
- {
- m = probs[i];
- mi = i;
- }
- }
- return mi;
- }
-
- double[] posterior(int phraseId, Corpus.Edge e) // unnormalised
- {
- double probs[] = new double[numTags];
- TIntArrayList tokens = e.getContext();
- for (int t = 0; t < numTags; ++t)
- {
- probs[t] = prior[phraseId][t];
- for (int k = 0; k < tokens.size(); ++k)
- probs[t] *= emissions[k][t][tokens.get(k)];
- }
- return probs;
- }
-
- void displayPosterior()
- {
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
- for (Corpus.Edge e: edges)
- {
- double probs[] = posterior(i, e);
- normalise(probs);
-
- // emit phrase
- System.out.print(e.getPhraseString());
- System.out.print("\t");
- System.out.print(e.getContextString());
- System.out.print("||| C=" + e.getCount() + " |||");
-
- int t = argmax(probs);
- System.out.print(" " + t + " ||| " + probs[t]);
- // for (int t = 0; t < numTags; ++t)
- // System.out.print(" " + probs[t]);
- System.out.println();
- }
- }
- }
-
- public static void main(String[] args)
- {
- assert (args.length >= 2);
- try
- {
- Corpus corpus = Corpus.readFromFile(new FileReader(new File(args[0])));
- PhraseContextModel model = new PhraseContextModel(corpus, Integer.parseInt(args[1]));
- model.expectationMaximisation(Integer.parseInt(args[2]));
- model.displayPosterior();
- }
- catch (IOException e)
- {
- System.out.println("Failed to read input file: " + args[0]);
- e.printStackTrace();
- }
- }
-
- class EStepDualObjective extends ProjectedObjective
- {
- List<List<TDoubleArrayList>> conditionals; // phrase id x context # x tag - precomputed
- List<List<TDoubleArrayList>> q; // ditto, but including exp(-lambda) terms
- double objective = 0; // log(z)
- // Objective.gradient = d log(z) / d lambda = E_q[phi]
- double llh = 0;
-
- public EStepDualObjective()
- {
- super();
- // compute conditionals p(context, tag | phrase) for all training instances
- conditionals = new ArrayList<List<TDoubleArrayList>>(training.getNumPhrases());
- q = new ArrayList<List<TDoubleArrayList>>(training.getNumPhrases());
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-
- conditionals.add(new ArrayList<TDoubleArrayList>(edges.size()));
- q.add(new ArrayList<TDoubleArrayList>(edges.size()));
-
- for (int j = 0; j < edges.size(); ++j)
- {
- Corpus.Edge e = edges.get(j);
- double probs[] = posterior(i, e);
- double z = normalise(probs);
- llh += log(z) * e.getCount();
- conditionals.get(i).add(new TDoubleArrayList(probs));
- q.get(i).add(new TDoubleArrayList(probs));
- }
- }
-
- gradient = new double[training.getNumEdges()*numTags];
- setInitialParameters(lambda);
- computeObjectiveAndGradient();
- }
-
- @Override
- public double[] projectPoint(double[] point)
- {
- SimplexProjection p = new SimplexProjection(constraintScale);
-
- double[] newPoint = point.clone();
- int edgeIndex = 0;
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-
- for (int t = 0; t < numTags; t++)
- {
- double[] subPoint = new double[edges.size()];
- for (int j = 0; j < edges.size(); ++j)
- subPoint[j] = point[edgeIndex+j*numTags+t];
-
- p.project(subPoint);
- for (int j = 0; j < edges.size(); ++j)
- newPoint[edgeIndex+j*numTags+t] = subPoint[j];
- }
-
- edgeIndex += edges.size() * numTags;
- }
-// System.out.println("Proj from: " + Arrays.toString(point));
-// System.out.println("Proj to: " + Arrays.toString(newPoint));
- return newPoint;
- }
-
- @Override
- public void setParameters(double[] params)
- {
- super.setParameters(params);
- computeObjectiveAndGradient();
- }
-
- @Override
- public double[] getGradient()
- {
- gradientCalls += 1;
- return gradient;
- }
-
- @Override
- public double getValue()
- {
- functionCalls += 1;
- return objective;
- }
-
- public void computeObjectiveAndGradient()
- {
- int edgeIndex = 0;
- objective = 0;
- Arrays.fill(gradient, 0);
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-
- for (int j = 0; j < edges.size(); ++j)
- {
- Corpus.Edge e = edges.get(j);
-
- double z = 0;
- for (int t = 0; t < numTags; t++)
- {
- double v = conditionals.get(i).get(j).get(t) * exp(-parameters[edgeIndex+t]);
- q.get(i).get(j).set(t, v);
- z += v;
- }
- objective += log(z) * e.getCount();
-
- for (int t = 0; t < numTags; t++)
- {
- double v = q.get(i).get(j).get(t) / z;
- q.get(i).get(j).set(t, v);
- gradient[edgeIndex+t] -= e.getCount() * v;
- }
-
- edgeIndex += numTags;
- }
- }
-// System.out.println("computeObjectiveAndGradient logz=" + objective);
-// System.out.println("lambda= " + Arrays.toString(parameters));
-// System.out.println("gradient=" + Arrays.toString(gradient));
- }
-
- public String toString()
- {
- StringBuilder sb = new StringBuilder();
- sb.append(getClass().getCanonicalName()).append(" with ");
- sb.append(parameters.length).append(" parameters and ");
- sb.append(training.getNumPhrases() * numTags).append(" constraints");
- return sb.toString();
- }
-
- double primal()
- {
- // primal = llh + KL(q||p) + scale * sum_pt max_c E_q[phi_pct]
- // kl = sum_Y q(Y) log q(Y) / p(Y|X)
- // = sum_Y q(Y) { -lambda . phi(Y) - log Z }
- // = -log Z - lambda . E_q[phi]
- // = -objective + lambda . gradient
-
- double kl = -objective + MathUtils.dotProduct(parameters, gradient);
- double l1lmax = 0;
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
- for (int t = 0; t < numTags; t++)
- {
- double lmax = Double.NEGATIVE_INFINITY;
- for (int j = 0; j < edges.size(); ++j)
- lmax = max(lmax, q.get(i).get(j).get(t));
- l1lmax += lmax;
- }
- }
-
- return llh + kl + constraintScale * l1lmax;
- }
- }
-}