summaryrefslogtreecommitdiff
path: root/gi/posterior-regularisation
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-10-11 14:06:32 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-10-11 14:06:32 -0400
commit9339c80d465545aec5a6dccfef7c83ca715bf11f (patch)
tree64c56d558331edad1db3832018c80e799551c39a /gi/posterior-regularisation
parent438dac41810b7c69fa10203ac5130d20efa2da9f (diff)
parentafd7da3b2338661657ad0c4e9eec681e014d37bf (diff)
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'gi/posterior-regularisation')
-rw-r--r--gi/posterior-regularisation/Corpus.java167
-rw-r--r--gi/posterior-regularisation/Lexicon.java32
-rw-r--r--gi/posterior-regularisation/PhraseContextModel.java466
-rw-r--r--gi/posterior-regularisation/README3
-rw-r--r--gi/posterior-regularisation/alphabet.hh61
-rw-r--r--gi/posterior-regularisation/canned.concordance4
-rw-r--r--gi/posterior-regularisation/em.cc830
-rw-r--r--gi/posterior-regularisation/invert.hh45
-rw-r--r--gi/posterior-regularisation/linesearch.py58
-rw-r--r--gi/posterior-regularisation/log_add.hh30
l---------gi/posterior-regularisation/prjava.jar1
-rwxr-xr-xgi/posterior-regularisation/prjava/Makefile8
-rw-r--r--gi/posterior-regularisation/prjava/build.xml38
-rw-r--r--gi/posterior-regularisation/prjava/lib/commons-math-2.1.jarbin832410 -> 0 bytes
-rw-r--r--gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jarbin53244 -> 0 bytes
-rw-r--r--gi/posterior-regularisation/prjava/lib/trove-2.0.2.jarbin737844 -> 0 bytes
-rw-r--r--gi/posterior-regularisation/prjava/src/arr/F.java99
-rw-r--r--gi/posterior-regularisation/prjava/src/data/Corpus.java233
-rw-r--r--gi/posterior-regularisation/prjava/src/hmm/HMM.java579
-rw-r--r--gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java351
-rw-r--r--gi/posterior-regularisation/prjava/src/hmm/POS.java120
-rw-r--r--gi/posterior-regularisation/prjava/src/io/FileUtil.java48
-rw-r--r--gi/posterior-regularisation/prjava/src/io/SerializedObjects.java83
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java110
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java128
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java127
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java120
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java92
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java65
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java19
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java234
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java87
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java19
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java11
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java154
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java29
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java10
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java86
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java70
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java102
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java141
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java185
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java20
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java25
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java14
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java33
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java137
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java300
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java45
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java104
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java72
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java127
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java33
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java30
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java48
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java60
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java54
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java51
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java8
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java41
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java37
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/util/Logger.java7
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java339
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java28
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java180
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/Agree.java204
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java197
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/C2F.java216
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/Corpus.java288
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/Lexicon.java34
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java540
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java436
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java193
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java224
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/Trainer.java257
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/VB.java419
-rw-r--r--gi/posterior-regularisation/prjava/src/test/CorpusTest.java60
-rw-r--r--gi/posterior-regularisation/prjava/src/test/HMMModelStats.java105
-rw-r--r--gi/posterior-regularisation/prjava/src/test/IntDoublePair.java23
-rw-r--r--gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java131
-rw-r--r--gi/posterior-regularisation/prjava/src/util/Array.java41
-rw-r--r--gi/posterior-regularisation/prjava/src/util/ArrayMath.java186
-rw-r--r--gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java14
-rw-r--r--gi/posterior-regularisation/prjava/src/util/DigammaFunction.java21
-rw-r--r--gi/posterior-regularisation/prjava/src/util/FileSystem.java21
-rw-r--r--gi/posterior-regularisation/prjava/src/util/InputOutput.java67
-rw-r--r--gi/posterior-regularisation/prjava/src/util/LogSummer.java86
-rw-r--r--gi/posterior-regularisation/prjava/src/util/MathUtil.java148
-rw-r--r--gi/posterior-regularisation/prjava/src/util/Matrix.java16
-rw-r--r--gi/posterior-regularisation/prjava/src/util/MemoryTracker.java47
-rw-r--r--gi/posterior-regularisation/prjava/src/util/Pair.java31
-rw-r--r--gi/posterior-regularisation/prjava/src/util/Printing.java158
-rw-r--r--gi/posterior-regularisation/prjava/src/util/Sorters.java39
-rwxr-xr-xgi/posterior-regularisation/prjava/train-PR-cluster.sh4
-rw-r--r--gi/posterior-regularisation/projected_gradient.cc87
-rw-r--r--gi/posterior-regularisation/simplex_pg.py55
-rwxr-xr-xgi/posterior-regularisation/split-languages.py23
-rw-r--r--gi/posterior-regularisation/train_pr_agree.py400
-rw-r--r--gi/posterior-regularisation/train_pr_global.py296
-rw-r--r--gi/posterior-regularisation/train_pr_parallel.py333
100 files changed, 0 insertions, 12238 deletions
diff --git a/gi/posterior-regularisation/Corpus.java b/gi/posterior-regularisation/Corpus.java
deleted file mode 100644
index 07b27387..00000000
--- a/gi/posterior-regularisation/Corpus.java
+++ /dev/null
@@ -1,167 +0,0 @@
-import gnu.trove.TIntArrayList;
-
-import java.io.*;
-import java.util.*;
-import java.util.regex.Pattern;
-
-public class Corpus
-{
- private Lexicon<String> tokenLexicon = new Lexicon<String>();
- private Lexicon<TIntArrayList> phraseLexicon = new Lexicon<TIntArrayList>();
- private Lexicon<TIntArrayList> contextLexicon = new Lexicon<TIntArrayList>();
- private List<Edge> edges = new ArrayList<Edge>();
- private List<List<Edge>> phraseToContext = new ArrayList<List<Edge>>();
- private List<List<Edge>> contextToPhrase = new ArrayList<List<Edge>>();
-
- public class Edge
- {
- Edge(int phraseId, int contextId, int count)
- {
- this.phraseId = phraseId;
- this.contextId = contextId;
- this.count = count;
- }
- public int getPhraseId()
- {
- return phraseId;
- }
- public TIntArrayList getPhrase()
- {
- return phraseLexicon.lookup(phraseId);
- }
- public String getPhraseString()
- {
- StringBuffer b = new StringBuffer();
- for (int tid: getPhrase().toNativeArray())
- {
- if (b.length() > 0)
- b.append(" ");
- b.append(tokenLexicon.lookup(tid));
- }
- return b.toString();
- }
- public int getContextId()
- {
- return contextId;
- }
- public TIntArrayList getContext()
- {
- return contextLexicon.lookup(contextId);
- }
- public String getContextString()
- {
- StringBuffer b = new StringBuffer();
- for (int tid: getContext().toNativeArray())
- {
- if (b.length() > 0)
- b.append(" ");
- b.append(tokenLexicon.lookup(tid));
- }
- return b.toString();
- }
- public int getCount()
- {
- return count;
- }
- private int phraseId;
- private int contextId;
- private int count;
- }
-
- List<Edge> getEdges()
- {
- return edges;
- }
-
- int getNumEdges()
- {
- return edges.size();
- }
-
- int getNumPhrases()
- {
- return phraseLexicon.size();
- }
-
- List<Edge> getEdgesForPhrase(int phraseId)
- {
- return phraseToContext.get(phraseId);
- }
-
- int getNumContexts()
- {
- return contextLexicon.size();
- }
-
- List<Edge> getEdgesForContext(int contextId)
- {
- return contextToPhrase.get(contextId);
- }
-
- int getNumTokens()
- {
- return tokenLexicon.size();
- }
-
- static Corpus readFromFile(Reader in) throws IOException
- {
- Corpus c = new Corpus();
-
- // read in line-by-line
- BufferedReader bin = new BufferedReader(in);
- String line;
- Pattern separator = Pattern.compile(" \\|\\|\\| ");
-
- while ((line = bin.readLine()) != null)
- {
- // split into phrase and contexts
- StringTokenizer st = new StringTokenizer(line, "\t");
- assert (st.hasMoreTokens());
- String phraseToks = st.nextToken();
- assert (st.hasMoreTokens());
- String rest = st.nextToken();
- assert (!st.hasMoreTokens());
-
- // process phrase
- st = new StringTokenizer(phraseToks, " ");
- TIntArrayList ptoks = new TIntArrayList();
- while (st.hasMoreTokens())
- ptoks.add(c.tokenLexicon.insert(st.nextToken()));
- int phraseId = c.phraseLexicon.insert(ptoks);
- if (phraseId == c.phraseToContext.size())
- c.phraseToContext.add(new ArrayList<Edge>());
-
- // process contexts
- String[] parts = separator.split(rest);
- assert (parts.length % 2 == 0);
- for (int i = 0; i < parts.length; i += 2)
- {
- // process pairs of strings - context and count
- TIntArrayList ctx = new TIntArrayList();
- String ctxString = parts[i];
- String countString = parts[i + 1];
- StringTokenizer ctxStrtok = new StringTokenizer(ctxString, " ");
- while (ctxStrtok.hasMoreTokens())
- {
- String token = ctxStrtok.nextToken();
- if (!token.equals("<PHRASE>"))
- ctx.add(c.tokenLexicon.insert(token));
- }
- int contextId = c.contextLexicon.insert(ctx);
- if (contextId == c.contextToPhrase.size())
- c.contextToPhrase.add(new ArrayList<Edge>());
-
- assert (countString.startsWith("C="));
- Edge e = c.new Edge(phraseId, contextId,
- Integer.parseInt(countString.substring(2).trim()));
- c.edges.add(e);
-
- // index the edge for fast phrase, context lookup
- c.phraseToContext.get(phraseId).add(e);
- c.contextToPhrase.get(contextId).add(e);
- }
- }
-
- return c;
- }
-}
diff --git a/gi/posterior-regularisation/Lexicon.java b/gi/posterior-regularisation/Lexicon.java
deleted file mode 100644
index 9f0245ee..00000000
--- a/gi/posterior-regularisation/Lexicon.java
+++ /dev/null
@@ -1,32 +0,0 @@
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-public class Lexicon<T>
-{
- public int insert(T word)
- {
- Integer i = wordToIndex.get(word);
- if (i == null)
- {
- i = indexToWord.size();
- wordToIndex.put(word, i);
- indexToWord.add(word);
- }
- return i;
- }
-
- public T lookup(int index)
- {
- return indexToWord.get(index);
- }
-
- public int size()
- {
- return indexToWord.size();
- }
-
- private Map<T, Integer> wordToIndex = new HashMap<T, Integer>();
- private List<T> indexToWord = new ArrayList<T>();
-} \ No newline at end of file
diff --git a/gi/posterior-regularisation/PhraseContextModel.java b/gi/posterior-regularisation/PhraseContextModel.java
deleted file mode 100644
index 85bcfb89..00000000
--- a/gi/posterior-regularisation/PhraseContextModel.java
+++ /dev/null
@@ -1,466 +0,0 @@
-// Input of the form:
-// " the phantom of the opera " tickets for <PHRASE> tonight ? ||| C=1 ||| seats for <PHRASE> ? </s> ||| C=1 ||| i see <PHRASE> ? </s> ||| C=1
-// phrase TAB [context]+
-// where context = phrase ||| C=... which are separated by |||
-
-// Model parameterised as follows:
-// - each phrase, p, is allocated a latent state, t
-// - this is used to generate the contexts, c
-// - each context is generated using 4 independent multinomials, one for each position LL, L, R, RR
-
-// Training with EM:
-// - e-step is estimating q(t) = P(t|p,c) for all x,c
-// - m-step is estimating model parameters P(c,t|p) = P(t) P(c|t)
-// - PR uses alternate e-step, which first optimizes lambda
-// min_q KL(q||p) + delta sum_pt max_c E_q[phi_ptc]
-// where
-// q(t|p,c) propto p(t,c|p) exp( -phi_ptc )
-// Then q is used to obtain expectations for vanilla M-step.
-
-// Sexing it up:
-// - learn p-specific conditionals P(t|p)
-// - or generate phrase internals, e.g., generate edge words from
-// different distribution to central words
-// - agreement between phrase->context model and context->phrase model
-
-import java.io.*;
-import optimization.gradientBasedMethods.*;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.gradientBasedMethods.stats.ProjectedOptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.GenericPickFirstStep;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.linesearch.WolfRuleLineSearch;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.NormalizedProjectedGradientL2Norm;
-import optimization.stopCriteria.NormalizedValueDifference;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-import optimization.util.MathUtils;
-import java.util.*;
-import java.util.regex.*;
-import gnu.trove.TDoubleArrayList;
-import gnu.trove.TIntArrayList;
-import static java.lang.Math.*;
-
-class PhraseContextModel
-{
- // model/optimisation configuration parameters
- int numTags;
- boolean posteriorRegularisation = true;
- double constraintScale = 3; // FIXME: make configurable
-
- // copied from L1LMax in depparsing code
- final double c1= 0.0001, c2=0.9, stoppingPrecision = 1e-5, maxStep = 10;
- final int maxZoomEvals = 10, maxExtrapolationIters = 200;
- int maxProjectionIterations = 200;
- int minOccurrencesForProjection = 0;
-
- // book keeping
- int numPositions;
- Random rng = new Random();
-
- // training set
- Corpus training;
-
- // model parameters (learnt)
- double emissions[][][]; // position in 0 .. 3 x tag x word Pr(word | tag, position)
- double prior[][]; // phrase x tag Pr(tag | phrase)
- double lambda[]; // edge = (phrase, context) x tag flattened lagrange multipliers
-
- PhraseContextModel(Corpus training, int tags)
- {
- this.training = training;
- this.numTags = tags;
- assert (!training.getEdges().isEmpty());
- assert (numTags > 1);
-
- // now initialise emissions
- numPositions = training.getEdges().get(0).getContext().size();
- assert (numPositions > 0);
-
- emissions = new double[numPositions][numTags][training.getNumTokens()];
- prior = new double[training.getNumEdges()][numTags];
- if (posteriorRegularisation)
- lambda = new double[training.getNumEdges() * numTags];
-
- for (double[][] emissionTW : emissions)
- {
- for (double[] emissionW : emissionTW)
- {
- randomise(emissionW);
-// for (int i = 0; i < emissionW.length; ++i)
-// emissionW[i] = i+1;
-// normalise(emissionW);
- }
- }
-
- for (double[] priorTag : prior)
- {
- randomise(priorTag);
-// for (int i = 0; i < priorTag.length; ++i)
-// priorTag[i] = i+1;
-// normalise(priorTag);
- }
- }
-
- void expectationMaximisation(int numIterations)
- {
- double lastLlh = Double.NEGATIVE_INFINITY;
-
- for (int iteration = 0; iteration < numIterations; ++iteration)
- {
- double emissionsCounts[][][] = new double[numPositions][numTags][training.getNumTokens()];
- double priorCounts[][] = new double[training.getNumPhrases()][numTags];
-
- // E-step
- double llh = 0;
- if (posteriorRegularisation)
- {
- EStepDualObjective objective = new EStepDualObjective();
-
- // copied from x2y2withconstraints
-// LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1));
-// OptimizerStats stats = new OptimizerStats();
-// ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
-// CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
-// compositeStop.add(new ProjectedGradientL2Norm(0.001));
-// compositeStop.add(new ValueDifference(0.001));
-// optimizer.setMaxIterations(50);
-// boolean succeed = optimizer.optimize(objective,stats,compositeStop);
-
- // copied from depparser l1lmaxobjective
- ProjectedOptimizerStats stats = new ProjectedOptimizerStats();
- GenericPickFirstStep pickFirstStep = new GenericPickFirstStep(1);
- LineSearchMethod linesearch = new WolfRuleLineSearch(pickFirstStep, c1, c2);
- ProjectedGradientDescent optimizer = new ProjectedGradientDescent(linesearch);
- optimizer.setMaxIterations(maxProjectionIterations);
- CompositeStopingCriteria stop = new CompositeStopingCriteria();
- stop.add(new NormalizedProjectedGradientL2Norm(stoppingPrecision));
- stop.add(new NormalizedValueDifference(stoppingPrecision));
- boolean succeed = optimizer.optimize(objective, stats, stop);
-
- System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
- //System.out.println("Solution: " + objective.parameters);
- if (!succeed)
- System.out.println("Failed to optimize");
- //System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
-
- //lambda = objective.getParameters();
- llh = objective.primal();
-
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
- for (int j = 0; j < edges.size(); ++j)
- {
- Corpus.Edge e = edges.get(j);
- for (int t = 0; t < numTags; t++)
- {
- double p = objective.q.get(i).get(j).get(t);
- priorCounts[i][t] += e.getCount() * p;
- TIntArrayList tokens = e.getContext();
- for (int k = 0; k < tokens.size(); ++k)
- emissionsCounts[k][t][tokens.get(k)] += e.getCount() * p;
- }
- }
- }
- }
- else
- {
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
- for (int j = 0; j < edges.size(); ++j)
- {
- Corpus.Edge e = edges.get(j);
- double probs[] = posterior(i, e);
- double z = normalise(probs);
- llh += log(z) * e.getCount();
-
- TIntArrayList tokens = e.getContext();
- for (int t = 0; t < numTags; ++t)
- {
- priorCounts[i][t] += e.getCount() * probs[t];
- for (int k = 0; k < tokens.size(); ++k)
- emissionsCounts[j][t][tokens.get(k)] += e.getCount() * probs[t];
- }
- }
- }
- }
-
- // M-step: normalise
- for (double[][] emissionTW : emissionsCounts)
- for (double[] emissionW : emissionTW)
- normalise(emissionW);
-
- for (double[] priorTag : priorCounts)
- normalise(priorTag);
-
- emissions = emissionsCounts;
- prior = priorCounts;
-
- System.out.println("Iteration " + iteration + " llh " + llh);
-
-// if (llh - lastLlh < 1e-4)
-// break;
-// else
-// lastLlh = llh;
- }
- }
-
- static double normalise(double probs[])
- {
- double z = 0;
- for (double p : probs)
- z += p;
- for (int i = 0; i < probs.length; ++i)
- probs[i] /= z;
- return z;
- }
-
- void randomise(double probs[])
- {
- double z = 0;
- for (int i = 0; i < probs.length; ++i)
- {
- probs[i] = 10 + rng.nextDouble();
- z += probs[i];
- }
-
- for (int i = 0; i < probs.length; ++i)
- probs[i] /= z;
- }
-
- static int argmax(double probs[])
- {
- double m = Double.NEGATIVE_INFINITY;
- int mi = -1;
- for (int i = 0; i < probs.length; ++i)
- {
- if (probs[i] > m)
- {
- m = probs[i];
- mi = i;
- }
- }
- return mi;
- }
-
- double[] posterior(int phraseId, Corpus.Edge e) // unnormalised
- {
- double probs[] = new double[numTags];
- TIntArrayList tokens = e.getContext();
- for (int t = 0; t < numTags; ++t)
- {
- probs[t] = prior[phraseId][t];
- for (int k = 0; k < tokens.size(); ++k)
- probs[t] *= emissions[k][t][tokens.get(k)];
- }
- return probs;
- }
-
- void displayPosterior()
- {
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
- for (Corpus.Edge e: edges)
- {
- double probs[] = posterior(i, e);
- normalise(probs);
-
- // emit phrase
- System.out.print(e.getPhraseString());
- System.out.print("\t");
- System.out.print(e.getContextString());
- System.out.print("||| C=" + e.getCount() + " |||");
-
- int t = argmax(probs);
- System.out.print(" " + t + " ||| " + probs[t]);
- // for (int t = 0; t < numTags; ++t)
- // System.out.print(" " + probs[t]);
- System.out.println();
- }
- }
- }
-
- public static void main(String[] args)
- {
- assert (args.length >= 2);
- try
- {
- Corpus corpus = Corpus.readFromFile(new FileReader(new File(args[0])));
- PhraseContextModel model = new PhraseContextModel(corpus, Integer.parseInt(args[1]));
- model.expectationMaximisation(Integer.parseInt(args[2]));
- model.displayPosterior();
- }
- catch (IOException e)
- {
- System.out.println("Failed to read input file: " + args[0]);
- e.printStackTrace();
- }
- }
-
- class EStepDualObjective extends ProjectedObjective
- {
- List<List<TDoubleArrayList>> conditionals; // phrase id x context # x tag - precomputed
- List<List<TDoubleArrayList>> q; // ditto, but including exp(-lambda) terms
- double objective = 0; // log(z)
- // Objective.gradient = d log(z) / d lambda = E_q[phi]
- double llh = 0;
-
- public EStepDualObjective()
- {
- super();
- // compute conditionals p(context, tag | phrase) for all training instances
- conditionals = new ArrayList<List<TDoubleArrayList>>(training.getNumPhrases());
- q = new ArrayList<List<TDoubleArrayList>>(training.getNumPhrases());
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-
- conditionals.add(new ArrayList<TDoubleArrayList>(edges.size()));
- q.add(new ArrayList<TDoubleArrayList>(edges.size()));
-
- for (int j = 0; j < edges.size(); ++j)
- {
- Corpus.Edge e = edges.get(j);
- double probs[] = posterior(i, e);
- double z = normalise(probs);
- llh += log(z) * e.getCount();
- conditionals.get(i).add(new TDoubleArrayList(probs));
- q.get(i).add(new TDoubleArrayList(probs));
- }
- }
-
- gradient = new double[training.getNumEdges()*numTags];
- setInitialParameters(lambda);
- computeObjectiveAndGradient();
- }
-
- @Override
- public double[] projectPoint(double[] point)
- {
- SimplexProjection p = new SimplexProjection(constraintScale);
-
- double[] newPoint = point.clone();
- int edgeIndex = 0;
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-
- for (int t = 0; t < numTags; t++)
- {
- double[] subPoint = new double[edges.size()];
- for (int j = 0; j < edges.size(); ++j)
- subPoint[j] = point[edgeIndex+j*numTags+t];
-
- p.project(subPoint);
- for (int j = 0; j < edges.size(); ++j)
- newPoint[edgeIndex+j*numTags+t] = subPoint[j];
- }
-
- edgeIndex += edges.size() * numTags;
- }
-// System.out.println("Proj from: " + Arrays.toString(point));
-// System.out.println("Proj to: " + Arrays.toString(newPoint));
- return newPoint;
- }
-
- @Override
- public void setParameters(double[] params)
- {
- super.setParameters(params);
- computeObjectiveAndGradient();
- }
-
- @Override
- public double[] getGradient()
- {
- gradientCalls += 1;
- return gradient;
- }
-
- @Override
- public double getValue()
- {
- functionCalls += 1;
- return objective;
- }
-
- public void computeObjectiveAndGradient()
- {
- int edgeIndex = 0;
- objective = 0;
- Arrays.fill(gradient, 0);
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-
- for (int j = 0; j < edges.size(); ++j)
- {
- Corpus.Edge e = edges.get(j);
-
- double z = 0;
- for (int t = 0; t < numTags; t++)
- {
- double v = conditionals.get(i).get(j).get(t) * exp(-parameters[edgeIndex+t]);
- q.get(i).get(j).set(t, v);
- z += v;
- }
- objective += log(z) * e.getCount();
-
- for (int t = 0; t < numTags; t++)
- {
- double v = q.get(i).get(j).get(t) / z;
- q.get(i).get(j).set(t, v);
- gradient[edgeIndex+t] -= e.getCount() * v;
- }
-
- edgeIndex += numTags;
- }
- }
-// System.out.println("computeObjectiveAndGradient logz=" + objective);
-// System.out.println("lambda= " + Arrays.toString(parameters));
-// System.out.println("gradient=" + Arrays.toString(gradient));
- }
-
- public String toString()
- {
- StringBuilder sb = new StringBuilder();
- sb.append(getClass().getCanonicalName()).append(" with ");
- sb.append(parameters.length).append(" parameters and ");
- sb.append(training.getNumPhrases() * numTags).append(" constraints");
- return sb.toString();
- }
-
- double primal()
- {
- // primal = llh + KL(q||p) + scale * sum_pt max_c E_q[phi_pct]
- // kl = sum_Y q(Y) log q(Y) / p(Y|X)
- // = sum_Y q(Y) { -lambda . phi(Y) - log Z }
- // = -log Z - lambda . E_q[phi]
- // = -objective + lambda . gradient
-
- double kl = -objective + MathUtils.dotProduct(parameters, gradient);
- double l1lmax = 0;
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
- for (int t = 0; t < numTags; t++)
- {
- double lmax = Double.NEGATIVE_INFINITY;
- for (int j = 0; j < edges.size(); ++j)
- lmax = max(lmax, q.get(i).get(j).get(t));
- l1lmax += lmax;
- }
- }
-
- return llh + kl + constraintScale * l1lmax;
- }
- }
-}
diff --git a/gi/posterior-regularisation/README b/gi/posterior-regularisation/README
deleted file mode 100644
index a3d54ffc..00000000
--- a/gi/posterior-regularisation/README
+++ /dev/null
@@ -1,3 +0,0 @@
- 557 ./cdec_extools/extractor -i btec/split.zh-en.al -c 500000 -L 12 -C | sort -t $'\t' -k 1 | ./cdec_extools/mr_stripe_rule_reduce > btec.concordance
- 559 wc -l btec.concordance
- 588 cat btec.concordance | sed 's/.* //' | awk '{ for (i=1; i < NF; i++) { x=substr($i, 1, 2); if (x == "C=") printf "\n"; else if (x != "||") printf "%s ", $i; }; printf "\n"; }' | sort | uniq | wc -l
diff --git a/gi/posterior-regularisation/alphabet.hh b/gi/posterior-regularisation/alphabet.hh
deleted file mode 100644
index 1db928da..00000000
--- a/gi/posterior-regularisation/alphabet.hh
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef _alphabet_hh
-#define _alphabet_hh
-
-#include <cassert>
-#include <iosfwd>
-#include <map>
-#include <string>
-#include <vector>
-
-// Alphabet: indexes a set of types
-template <typename T>
-class Alphabet: protected std::map<T, int>
-{
-public:
- Alphabet() {};
-
- bool empty() const { return std::map<T,int>::empty(); }
- int size() const { return std::map<T,int>::size(); }
-
- int operator[](const T &k) const
- {
- typename std::map<T,int>::const_iterator cit = find(k);
- if (cit != std::map<T,int>::end())
- return cit->second;
- else
- return -1;
- }
-
- int lookup(const T &k) const { return (*this)[k]; }
-
- int insert(const T &k)
- {
- int sz = size();
- assert((unsigned) sz == _items.size());
-
- std::pair<typename std::map<T,int>::iterator, bool>
- ins = std::map<T,int>::insert(make_pair(k, sz));
-
- if (ins.second)
- _items.push_back(k);
-
- return ins.first->second;
- }
-
- const T &type(int i) const
- {
- assert(i >= 0);
- assert(i < size());
- return _items[i];
- }
-
- std::ostream &display(std::ostream &out, int i) const
- {
- return out << type(i);
- }
-
-private:
- std::vector<T> _items;
-};
-
-#endif
diff --git a/gi/posterior-regularisation/canned.concordance b/gi/posterior-regularisation/canned.concordance
deleted file mode 100644
index 710973ff..00000000
--- a/gi/posterior-regularisation/canned.concordance
+++ /dev/null
@@ -1,4 +0,0 @@
-a 0 0 <PHRASE> 0 0 ||| C=1 ||| 1 1 <PHRASE> 1 1 ||| C=1 ||| 2 2 <PHRASE> 2 2 ||| C=1
-b 0 0 <PHRASE> 0 0 ||| C=1 ||| 1 1 <PHRASE> 1 1 ||| C=1
-c 2 2 <PHRASE> 2 2 ||| C=1 ||| 4 4 <PHRASE> 4 4 ||| C=1 ||| 5 5 <PHRASE> 5 5 ||| C=1
-d 4 4 <PHRASE> 4 4 ||| C=1 ||| 5 5 <PHRASE> 5 5 ||| C=1
diff --git a/gi/posterior-regularisation/em.cc b/gi/posterior-regularisation/em.cc
deleted file mode 100644
index f6c9fd68..00000000
--- a/gi/posterior-regularisation/em.cc
+++ /dev/null
@@ -1,830 +0,0 @@
-// Input of the form:
-// " the phantom of the opera " tickets for <PHRASE> tonight ? ||| C=1 ||| seats for <PHRASE> ? </s> ||| C=1 ||| i see <PHRASE> ? </s> ||| C=1
-// phrase TAB [context]+
-// where context = phrase ||| C=... which are separated by |||
-
-// Model parameterised as follows:
-// - each phrase, p, is allocated a latent state, t
-// - this is used to generate the contexts, c
-// - each context is generated using 4 independent multinomials, one for each position LL, L, R, RR
-
-// Training with EM:
-// - e-step is estimating P(t|p,c) for all x,c
-// - m-step is estimating model parameters P(p,c,t) = P(t) P(p|t) P(c|t)
-
-// Sexing it up:
-// - constrain the posteriors P(t|c) and P(t|p) to have few high-magnitude entries
-// - improve the generation of phrase internals, e.g., generate edge words from
-// different distribution to central words
-
-#include "alphabet.hh"
-#include "log_add.hh"
-#include <algorithm>
-#include <fstream>
-#include <iostream>
-#include <iterator>
-#include <map>
-#include <sstream>
-#include <stdexcept>
-#include <vector>
-#include <tr1/random>
-#include <tr1/tuple>
-#include <nlopt.h>
-
-using namespace std;
-using namespace std::tr1;
-
-const int numTags = 5;
-const int numIterations = 100;
-const bool posterior_regularisation = true;
-const double PHRASE_VIOLATION_WEIGHT = 10;
-const double CONTEXT_VIOLATION_WEIGHT = 0;
-const bool includePhraseProb = false;
-
-// Data structures:
-Alphabet<string> lexicon;
-typedef vector<int> Phrase;
-typedef tuple<int, int, int, int> Context;
-Alphabet<Phrase> phrases;
-Alphabet<Context> contexts;
-
-typedef map<int, int> ContextCounts;
-typedef map<int, int> PhraseCounts;
-typedef map<int, ContextCounts> PhraseToContextCounts;
-typedef map<int, PhraseCounts> ContextToPhraseCounts;
-
-PhraseToContextCounts concordancePhraseToContexts;
-ContextToPhraseCounts concordanceContextToPhrases;
-
-typedef vector<double> Dist;
-typedef vector<Dist> ConditionalDist;
-Dist prior; // class -> P(class)
-vector<ConditionalDist> probCtx; // word -> class -> P(word | class), for each position of context word
-ConditionalDist probPhrase; // class -> P(word | class)
-Dist probPhraseLength; // class -> P(length | class) expressed as geometric distribution parameter
-
-mt19937 randomGenerator((size_t) time(NULL));
-uniform_real<double> uniDist(0.0, 1e-1);
-variate_generator< mt19937, uniform_real<double> > rng(randomGenerator, uniDist);
-
-void addRandomNoise(Dist &d);
-void normalise(Dist &d);
-void addTo(Dist &d, const Dist &e);
-int argmax(const Dist &d);
-
-map<Phrase, map<Context, int> > lambda_indices;
-
-Dist conditional_probs(const Phrase &phrase, const Context &context, double *normalisation = 0);
-template <typename T>
-Dist
-penalised_conditionals(const Phrase &phrase, const Context &context,
- const T &lambda, double *normalisation);
-//Dist penalised_conditionals(const Phrase &phrase, const Context &context, const double *lambda, double *normalisation = 0);
-double penalised_log_likelihood(int n, const double *lambda, double *gradient, void *data);
-void optimise_lambda(double delta, double gamma, vector<double> &lambda);
-double expected_violation_phrases(const double *lambda);
-double expected_violation_contexts(const double *lambda);
-double primal_kl_divergence(const double *lambda);
-double dual(const double *lambda);
-void print_primal_dual(const double *lambda, double delta, double gamma);
-
-ostream &operator<<(ostream &, const Phrase &);
-ostream &operator<<(ostream &, const Context &);
-ostream &operator<<(ostream &, const Dist &);
-ostream &operator<<(ostream &, const ConditionalDist &);
-
-int
-main(int argc, char *argv[])
-{
- randomGenerator.seed(time(NULL));
-
- int edges = 0;
- istream &input = cin;
- while (input.good())
- {
- // read the phrase
- string phraseString;
- Phrase phrase;
- getline(input, phraseString, '\t');
- istringstream pinput(phraseString);
- string token;
- while (pinput >> token)
- phrase.push_back(lexicon.insert(token));
- int phraseId = phrases.insert(phrase);
-
- // read the rest, storing each context
- string remainder;
- getline(input, remainder, '\n');
- istringstream rinput(remainder);
- Context context(-1, -1, -1, -1);
- int index = 0;
- while (rinput >> token)
- {
- if (token != "|||" && token != "<PHRASE>")
- {
- if (index < 4)
- {
- // eugh! damn templates
- switch (index)
- {
- case 0: get<0>(context) = lexicon.insert(token); break;
- case 1: get<1>(context) = lexicon.insert(token); break;
- case 2: get<2>(context) = lexicon.insert(token); break;
- case 3: get<3>(context) = lexicon.insert(token); break;
- default: assert(false);
- }
- index += 1;
- }
- else if (token.find("C=") == 0)
- {
- int contextId = contexts.insert(context);
- int count = atoi(token.substr(strlen("C=")).c_str());
- concordancePhraseToContexts[phraseId][contextId] += count;
- concordanceContextToPhrases[contextId][phraseId] += count;
- index = 0;
- context = Context(-1, -1, -1, -1);
- edges += 1;
- }
- }
- }
-
- // trigger EOF
- input >> ws;
- }
-
- cout << "Read in " << phrases.size() << " phrases"
- << " and " << contexts.size() << " contexts"
- << " and " << edges << " edges"
- << " and " << lexicon.size() << " word types\n";
-
- // FIXME: filter out low count phrases and low count contexts (based on individual words?)
- // now populate model parameters with uniform + random noise
- prior.resize(numTags, 1.0);
- addRandomNoise(prior);
- normalise(prior);
-
- probCtx.resize(4, ConditionalDist(numTags, Dist(lexicon.size(), 1.0)));
- if (includePhraseProb)
- probPhrase.resize(numTags, Dist(lexicon.size(), 1.0));
- for (int t = 0; t < numTags; ++t)
- {
- for (int j = 0; j < 4; ++j)
- {
- addRandomNoise(probCtx[j][t]);
- normalise(probCtx[j][t]);
- }
- if (includePhraseProb)
- {
- addRandomNoise(probPhrase[t]);
- normalise(probPhrase[t]);
- }
- }
- if (includePhraseProb)
- {
- probPhraseLength.resize(numTags, 0.5); // geometric distribution p=0.5
- addRandomNoise(probPhraseLength);
- }
-
- cout << "\tprior: " << prior << "\n";
- //cout << "\tcontext: " << probCtx << "\n";
- //cout << "\tphrase: " << probPhrase << "\n";
- //cout << "\tphraseLen: " << probPhraseLength << endl;
-
- vector<double> lambda;
-
- // now do EM training
- for (int iteration = 0; iteration < numIterations; ++iteration)
- {
- cout << "EM iteration " << iteration << endl;
-
- if (posterior_regularisation)
- optimise_lambda(PHRASE_VIOLATION_WEIGHT, CONTEXT_VIOLATION_WEIGHT, lambda);
- //cout << "\tlambda " << lambda << endl;
-
- Dist countsPrior(numTags, 0.0);
- vector<ConditionalDist> countsCtx(4, ConditionalDist(numTags, Dist(lexicon.size(), 1e-10)));
- ConditionalDist countsPhrase(numTags, Dist(lexicon.size(), 1e-10));
- Dist countsPhraseLength(numTags, 0.0);
- Dist nPhrases(numTags, 0.0);
-
- double llh = 0;
- for (PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.begin();
- pcit != concordancePhraseToContexts.end(); ++pcit)
- {
- const Phrase &phrase = phrases.type(pcit->first);
-
- // e-step: estimate latent class probs; compile (class,word) stats for m-step
- for (ContextCounts::iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
-
- double z = 0;
- Dist tagCounts;
- if (!posterior_regularisation)
- tagCounts = conditional_probs(phrase, context, &z);
- else
- tagCounts = penalised_conditionals(phrase, context, lambda, &z);
-
- llh += log(z) * ccit->second;
- addTo(countsPrior, tagCounts); // FIXME: times ccit->secon
-
- for (int t = 0; t < numTags; ++t)
- {
- for (int j = 0; j < 4; ++j)
- countsCtx[j][t][get<0>(context)] += tagCounts[t] * ccit->second;
-
- if (includePhraseProb)
- {
- for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit)
- countsPhrase[t][*pit] += tagCounts[t] * ccit->second;
- countsPhraseLength[t] += phrase.size() * tagCounts[t] * ccit->second;
- nPhrases[t] += tagCounts[t] * ccit->second;
- }
- }
- }
- }
-
- cout << "M-step\n";
-
- // m-step: normalise prior and (class,word) stats and assign to model parameters
- normalise(countsPrior);
- prior = countsPrior;
- for (int t = 0; t < numTags; ++t)
- {
- //cout << "\t\tt " << t << " prior " << countsPrior[t] << "\n";
- for (int j = 0; j < 4; ++j)
- normalise(countsCtx[j][t]);
- if (includePhraseProb)
- {
- normalise(countsPhrase[t]);
- countsPhraseLength[t] = nPhrases[t] / countsPhraseLength[t];
- }
- }
- probCtx = countsCtx;
- if (includePhraseProb)
- {
- probPhrase = countsPhrase;
- probPhraseLength = countsPhraseLength;
- }
-
- double *larray = new double[lambda.size()];
- copy(lambda.begin(), lambda.end(), larray);
- print_primal_dual(larray, PHRASE_VIOLATION_WEIGHT, CONTEXT_VIOLATION_WEIGHT);
- delete [] larray;
-
- //cout << "\tllh " << llh << endl;
- //cout << "\tprior: " << prior << "\n";
- //cout << "\tcontext: " << probCtx << "\n";
- //cout << "\tphrase: " << probPhrase << "\n";
- //cout << "\tphraseLen: " << probPhraseLength << "\n";
- }
-
- // output class membership
- for (PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.begin();
- pcit != concordancePhraseToContexts.end(); ++pcit)
- {
- const Phrase &phrase = phrases.type(pcit->first);
- for (ContextCounts::iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
- Dist tagCounts = conditional_probs(phrase, context, 0);
- cout << phrase << " ||| " << context << " ||| " << argmax(tagCounts) << "\n";
- }
- }
-
- return 0;
-}
-
-void addRandomNoise(Dist &d)
-{
- for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit)
- *dit += rng();
-}
-
-void normalise(Dist &d)
-{
- double z = 0;
- for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit)
- z += *dit;
- for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit)
- *dit /= z;
-}
-
-void addTo(Dist &d, const Dist &e)
-{
- assert(d.size() == e.size());
- for (int i = 0; i < (int) d.size(); ++i)
- d[i] += e[i];
-}
-
-int argmax(const Dist &d)
-{
- double best = d[0];
- int index = 0;
- for (int i = 1; i < (int) d.size(); ++i)
- {
- if (d[i] > best)
- {
- best = d[i];
- index = i;
- }
- }
- return index;
-}
-
-ostream &operator<<(ostream &out, const Phrase &phrase)
-{
- for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit)
- lexicon.display(((pit == phrase.begin()) ? out : out << " "), *pit);
- return out;
-}
-
-ostream &operator<<(ostream &out, const Context &context)
-{
- lexicon.display(out, get<0>(context));
- lexicon.display(out << " ", get<1>(context));
- lexicon.display(out << " <PHRASE> ", get<2>(context));
- lexicon.display(out << " ", get<3>(context));
- return out;
-}
-
-ostream &operator<<(ostream &out, const Dist &dist)
-{
- for (Dist::const_iterator dit = dist.begin(); dit != dist.end(); ++dit)
- out << ((dit == dist.begin()) ? "" : " ") << *dit;
- return out;
-}
-
-ostream &operator<<(ostream &out, const ConditionalDist &dist)
-{
- for (ConditionalDist::const_iterator dit = dist.begin(); dit != dist.end(); ++dit)
- out << ((dit == dist.begin()) ? "" : "; ") << *dit;
- return out;
-}
-
-// FIXME: slow - just use the phrase index, context index to do the mapping
-// (n.b. it's a sparse setup, not just equal to 3d array index)
-int
-lambda_index(const Phrase &phrase, const Context &context, int tag)
-{
- return lambda_indices[phrase][context] + tag;
-}
-
-template <typename T>
-Dist
-penalised_conditionals(const Phrase &phrase, const Context &context,
- const T &lambda, double *normalisation)
-{
- Dist d = conditional_probs(phrase, context, 0);
-
- double z = 0;
- for (int t = 0; t < numTags; ++t)
- {
- d[t] *= exp(-lambda[lambda_index(phrase, context, t)]);
- z += d[t];
- }
-
- if (normalisation)
- *normalisation = z;
-
- for (int t = 0; t < numTags; ++t)
- d[t] /= z;
-
- return d;
-}
-
-Dist
-conditional_probs(const Phrase &phrase, const Context &context, double *normalisation)
-{
- Dist tagCounts(numTags, 0.0);
- double z = 0;
- for (int t = 0; t < numTags; ++t)
- {
- double prob = prior[t];
- prob *= (probCtx[0][t][get<0>(context)] * probCtx[1][t][get<1>(context)] *
- probCtx[2][t][get<2>(context)] * probCtx[3][t][get<3>(context)]);
-
- if (includePhraseProb)
- {
- prob *= pow(1 - probPhraseLength[t], phrase.size() - 1) * probPhraseLength[t];
- for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit)
- prob *= probPhrase[t][*pit];
- }
-
- tagCounts[t] = prob;
- z += prob;
- }
- if (normalisation)
- *normalisation = z;
-
- for (int t = 0; t < numTags; ++t)
- tagCounts[t] /= z;
-
- return tagCounts;
-}
-
-double
-penalised_log_likelihood(int n, const double *lambda, double *grad, void *)
-{
- // return log Z(lambda, theta) over the corpus
- // where theta are the global parameters (prior, probCtx*, probPhrase*)
- // and lambda are lagrange multipliers for the posterior sparsity constraints
- //
- // this is formulated as:
- // f = log Z(lambda) = sum_i log ( sum_i p_theta(t_i|p_i,c_i) exp [-lambda_{t_i,p_i,c_i}] )
- // where i indexes the training examples - specifying the (p, c) pair (which may occur with count > 1)
- //
- // with derivative:
- // f'_{tpc} = frac { - count(t,p,c) p_theta(t|p,c) exp (-lambda_{t,p,c}) }
- // { sum_t' p_theta(t'|p,c) exp (-lambda_{t',p,c}) }
-
- //cout << "penalised_log_likelihood with lambda ";
- //copy(lambda, lambda+n, ostream_iterator<double>(cout, " "));
- //cout << "\n";
-
- double f = 0;
- if (grad)
- {
- for (int i = 0; i < n; ++i)
- grad[i] = 0.0;
- }
-
- for (int p = 0; p < phrases.size(); ++p)
- {
- const Phrase &phrase = phrases.type(p);
- PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p);
- for (ContextCounts::const_iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
- double z = 0;
- Dist scores = penalised_conditionals(phrase, context, lambda, &z);
-
- f += ccit->second * log(z);
- //cout << "\tphrase: " << phrase << " context: " << context << " count: " << ccit->second << " z " << z << endl;
- //cout << "\t\tscores: " << scores << "\n";
-
- if (grad)
- {
- for (int t = 0; t < numTags; ++t)
- {
- int i = lambda_index(phrase, context, t); // FIXME: redundant lookups
- assert(grad[i] == 0.0);
- grad[i] = - ccit->second * scores[t];
- }
- }
- }
- }
-
- //cout << "penalised_log_likelihood returning " << f;
- //if (grad)
- //{
- //cout << "\ngradient: ";
- //copy(grad, grad+n, ostream_iterator<double>(cout, " "));
- //}
- //cout << "\n";
-
- return f;
-}
-
-typedef struct
-{
- // one of p or c should be set to -1, in which case it will be marginalised out
- // i.e. sum_p' lambda_{p'ct} <= threshold
- // or sum_c' lambda_{pc't} <= threshold
- int p, c, t, threshold;
-} constraint_data;
-
-double
-constraint_and_gradient(int n, const double *lambda, double *grad, void *data)
-{
- constraint_data *d = (constraint_data *) data;
- assert(d->t >= 0);
- assert(d->threshold >= 0);
-
- //cout << "constraint_and_gradient: t " << d->t << " p " << d->p << " c " << d->c << " tau " << d->threshold << endl;
- //cout << "\tlambda ";
- //copy(lambda, lambda+n, ostream_iterator<double>(cout, " "));
- //cout << "\n";
-
- // FIXME: it's crazy to use a dense gradient here => will only have a handful of non-zero entries
- if (grad)
- {
- for (int i = 0; i < n; ++i)
- grad[i] = 0.0;
- }
-
- //cout << "constraint_and_gradient: " << d->p << "; " << d->c << "; " << d->t << "; " << d->threshold << endl;
-
- if (d->p >= 0)
- {
- assert(d->c < 0);
- // sum_c lambda_pct <= delta [a.k.a. threshold]
- // => sum_c lambda_pct - delta <= 0
- // derivative_pct = { 1, if p and t match; 0, otherwise }
-
- double val = -d->threshold;
-
- const Phrase &phrase = phrases.type(d->p);
- PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(d->p);
- assert(pcit != concordancePhraseToContexts.end());
- for (ContextCounts::const_iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
- int i = lambda_index(phrase, context, d->t);
- val += lambda[i];
- if (grad) grad[i] = 1;
- }
- //cout << "\treturning " << val << endl;
-
- return val;
- }
- else
- {
- assert(d->c >= 0);
- assert(d->p < 0);
- // sum_p lambda_pct <= gamma [a.k.a. threshold]
- // => sum_p lambda_pct - gamma <= 0
- // derivative_pct = { 1, if c and t match; 0, otherwise }
-
- double val = -d->threshold;
-
- const Context &context = contexts.type(d->c);
- ContextToPhraseCounts::iterator cpit = concordanceContextToPhrases.find(d->c);
- assert(cpit != concordanceContextToPhrases.end());
- for (PhraseCounts::iterator pcit = cpit->second.begin();
- pcit != cpit->second.end(); ++pcit)
- {
- const Phrase &phrase = phrases.type(pcit->first);
- int i = lambda_index(phrase, context, d->t);
- val += lambda[i];
- if (grad) grad[i] = 1;
- }
- //cout << "\treturning " << val << endl;
-
- return val;
- }
-}
-
-void
-optimise_lambda(double delta, double gamma, vector<double> &lambdav)
-{
- int num_lambdas = lambdav.size();
- if (lambda_indices.empty() || lambdav.empty())
- {
- lambda_indices.clear();
- lambdav.clear();
-
- int i = 0;
- for (int p = 0; p < phrases.size(); ++p)
- {
- const Phrase &phrase = phrases.type(p);
- PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.find(p);
- for (ContextCounts::iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
- lambda_indices[phrase][context] = i;
- i += numTags;
- }
- }
- num_lambdas = i;
- lambdav.resize(num_lambdas);
- }
- //cout << "optimise_lambda: #langrange multipliers " << num_lambdas << endl;
-
- // FIXME: better to work with an implicit representation to save memory usage
- int num_constraints = (((delta > 0) ? phrases.size() : 0) + ((gamma > 0) ? contexts.size() : 0)) * numTags;
- //cout << "optimise_lambda: #constraints " << num_constraints << endl;
- constraint_data *data = new constraint_data[num_constraints];
- int i = 0;
- if (delta > 0)
- {
- for (int p = 0; p < phrases.size(); ++p)
- {
- for (int t = 0; t < numTags; ++t, ++i)
- {
- constraint_data &d = data[i];
- d.p = p;
- d.c = -1;
- d.t = t;
- d.threshold = delta;
- }
- }
- }
-
- if (gamma > 0)
- {
- for (int c = 0; c < contexts.size(); ++c)
- {
- for (int t = 0; t < numTags; ++t, ++i)
- {
- constraint_data &d = data[i];
- d.p = -1;
- d.c = c;
- d.t = t;
- d.threshold = gamma;
- }
- }
- }
- assert(i == num_constraints);
-
- double lambda[num_lambdas];
- double lb[num_lambdas], ub[num_lambdas];
- for (i = 0; i < num_lambdas; ++i)
- {
- lambda[i] = lambdav[i]; // starting value
- lb[i] = 0; // lower bound
- if (delta <= 0) // upper bound
- ub[i] = gamma;
- else if (gamma <= 0)
- ub[i] = delta;
- else
- assert(false);
- }
-
- //print_primal_dual(lambda, delta, gamma);
-
- double minf;
- int error_code = nlopt_minimize_constrained(NLOPT_LN_COBYLA, num_lambdas, penalised_log_likelihood, NULL,
- num_constraints, constraint_and_gradient, data, sizeof(constraint_data),
- lb, ub, lambda, &minf, -HUGE_VAL, 0.0, 0.0, 1e-4, NULL, 0, 0.0);
- //cout << "optimise error code " << error_code << endl;
-
- //print_primal_dual(lambda, delta, gamma);
-
- delete [] data;
-
- if (error_code < 0)
- cout << "WARNING: optimisation failed with error code: " << error_code << endl;
- //else
- //{
- //cout << "success; minf " << minf << endl;
- //print_primal_dual(lambda, delta, gamma);
- //}
-
- lambdav = vector<double>(&lambda[0], &lambda[0] + num_lambdas);
-}
-
-// FIXME: inefficient - cache the scores
-double
-expected_violation_phrases(const double *lambda)
-{
- // sum_pt max_c E_q[phi_pct]
- double violation = 0;
-
- for (int p = 0; p < phrases.size(); ++p)
- {
- const Phrase &phrase = phrases.type(p);
- PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p);
-
- for (int t = 0; t < numTags; ++t)
- {
- double best = 0;
- for (ContextCounts::const_iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
- Dist scores = penalised_conditionals(phrase, context, lambda, 0);
- best = max(best, scores[t]);
- }
- violation += best;
- }
- }
-
- return violation;
-}
-
-// FIXME: inefficient - cache the scores
-double
-expected_violation_contexts(const double *lambda)
-{
- // sum_ct max_p E_q[phi_pct]
- double violation = 0;
-
- for (int c = 0; c < contexts.size(); ++c)
- {
- const Context &context = contexts.type(c);
- ContextToPhraseCounts::iterator cpit = concordanceContextToPhrases.find(c);
-
- for (int t = 0; t < numTags; ++t)
- {
- double best = 0;
- for (PhraseCounts::iterator pit = cpit->second.begin();
- pit != cpit->second.end(); ++pit)
- {
- const Phrase &phrase = phrases.type(pit->first);
- Dist scores = penalised_conditionals(phrase, context, lambda, 0);
- best = max(best, scores[t]);
- }
- violation += best;
- }
- }
-
- return violation;
-}
-
-// FIXME: possibly inefficient
-double
-primal_likelihood() // FIXME: primal evaluation needs to use lambda and calculate l1linf terms
-{
- double llh = 0;
- for (int p = 0; p < phrases.size(); ++p)
- {
- const Phrase &phrase = phrases.type(p);
- PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p);
- for (ContextCounts::const_iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
- double z = 0;
- Dist scores = conditional_probs(phrase, context, &z);
- llh += ccit->second * log(z);
- }
- }
- return llh;
-}
-
-// FIXME: inefficient - cache the scores
-double
-primal_kl_divergence(const double *lambda)
-{
- // return KL(q || p) = sum_y q(y) { log q(y) - log p(y | x) }
- // = sum_y q(y) { log p(y | x) - lambda . phi(x, y) - log Z - log p(y | x) }
- // = sum_y q(y) { - lambda . phi(x, y) } - log Z
- // and q(y) factors with each edge, ditto for Z
-
- double feature_sum = 0, log_z = 0;
- for (int p = 0; p < phrases.size(); ++p)
- {
- const Phrase &phrase = phrases.type(p);
- PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p);
- for (ContextCounts::const_iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
-
- double local_z = 0;
- double local_f = 0;
- Dist d = conditional_probs(phrase, context, 0);
- for (int t = 0; t < numTags; ++t)
- {
- int i = lambda_index(phrase, context, t);
- double s = d[t] * exp(-lambda[i]);
- local_f += lambda[i] * s;
- local_z += s;
- }
-
- log_z += ccit->second * log(local_z);
- feature_sum += ccit->second * (local_f / local_z);
- }
- }
-
- return -feature_sum - log_z;
-}
-
-// FIXME: inefficient - cache the scores
-double
-dual(const double *lambda)
-{
- // return log(Z) = - log { sum_y p(y | x) exp( - lambda . phi(x, y) }
- // n.b. have flipped the sign as we're minimising
-
- double z = 0;
- for (int p = 0; p < phrases.size(); ++p)
- {
- const Phrase &phrase = phrases.type(p);
- PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p);
- for (ContextCounts::const_iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
- double lz = 0;
- Dist scores = penalised_conditionals(phrase, context, lambda, &z);
- z += lz * ccit->second;
- }
- }
- return log(z);
-}
-
-void
-print_primal_dual(const double *lambda, double delta, double gamma)
-{
- double likelihood = primal_likelihood();
- double kl = primal_kl_divergence(lambda);
- double sum_pt = expected_violation_phrases(lambda);
- double sum_ct = expected_violation_contexts(lambda);
- //double d = dual(lambda);
-
- cout << "\tllh=" << likelihood
- << " kl=" << kl
- << " violations phrases=" << sum_pt
- << " contexts=" << sum_ct
- //<< " primal=" << (kl + delta * sum_pt + gamma * sum_ct)
- //<< " dual=" << d
- << " objective=" << (likelihood - kl + delta * sum_pt + gamma * sum_ct)
- << endl;
-}
diff --git a/gi/posterior-regularisation/invert.hh b/gi/posterior-regularisation/invert.hh
deleted file mode 100644
index d06356e9..00000000
--- a/gi/posterior-regularisation/invert.hh
+++ /dev/null
@@ -1,45 +0,0 @@
-// The following code inverts the matrix input using LU-decomposition with
-// backsubstitution of unit vectors. Reference: Numerical Recipies in C, 2nd
-// ed., by Press, Teukolsky, Vetterling & Flannery.
-// Code written by Fredrik Orderud.
-// http://www.crystalclearsoftware.com/cgi-bin/boost_wiki/wiki.pl?LU_Matrix_Inversion
-
-#ifndef INVERT_MATRIX_HPP
-#define INVERT_MATRIX_HPP
-
-// REMEMBER to update "lu.hpp" header includes from boost-CVS
-#include <boost/numeric/ublas/vector.hpp>
-#include <boost/numeric/ublas/vector_proxy.hpp>
-#include <boost/numeric/ublas/matrix.hpp>
-#include <boost/numeric/ublas/triangular.hpp>
-#include <boost/numeric/ublas/lu.hpp>
-#include <boost/numeric/ublas/io.hpp>
-
-namespace ublas = boost::numeric::ublas;
-
-/* Matrix inversion routine.
- Uses lu_factorize and lu_substitute in uBLAS to invert a matrix */
-template<class T>
-bool invert_matrix(const ublas::matrix<T>& input, ublas::matrix<T>& inverse)
-{
- using namespace boost::numeric::ublas;
- typedef permutation_matrix<std::size_t> pmatrix;
- // create a working copy of the input
- matrix<T> A(input);
- // create a permutation matrix for the LU-factorization
- pmatrix pm(A.size1());
-
- // perform LU-factorization
- int res = lu_factorize(A,pm);
- if( res != 0 ) return false;
-
- // create identity matrix of "inverse"
- inverse.assign(ublas::identity_matrix<T>(A.size1()));
-
- // backsubstitute to get the inverse
- lu_substitute(A, pm, inverse);
-
- return true;
-}
-
-#endif //INVERT_MATRIX_HPP
diff --git a/gi/posterior-regularisation/linesearch.py b/gi/posterior-regularisation/linesearch.py
deleted file mode 100644
index 5a3f2e9c..00000000
--- a/gi/posterior-regularisation/linesearch.py
+++ /dev/null
@@ -1,58 +0,0 @@
-## Automatically adapted for scipy Oct 07, 2005 by convertcode.py
-
-from scipy.optimize import minpack2
-import numpy
-
-import __builtin__
-pymin = __builtin__.min
-
-def line_search(f, myfprime, xk, pk, gfk, old_fval, old_old_fval,
- args=(), c1=1e-4, c2=0.9, amax=50):
-
- fc = 0
- gc = 0
- phi0 = old_fval
- derphi0 = numpy.dot(gfk,pk)
- alpha1 = pymin(1.0,1.01*2*(phi0-old_old_fval)/derphi0)
- # trevor: added this test
- alpha1 = pymin(alpha1,amax)
-
- if isinstance(myfprime,type(())):
- eps = myfprime[1]
- fprime = myfprime[0]
- newargs = (f,eps) + args
- gradient = False
- else:
- fprime = myfprime
- newargs = args
- gradient = True
-
- xtol = 1e-14
- amin = 1e-8
- isave = numpy.zeros((2,), numpy.intc)
- dsave = numpy.zeros((13,), float)
- task = 'START'
- fval = old_fval
- gval = gfk
-
- while 1:
- stp,fval,derphi,task = minpack2.dcsrch(alpha1, phi0, derphi0, c1, c2,
- xtol, task, amin, amax,isave,dsave)
- #print 'minpack2.dcsrch', alpha1, phi0, derphi0, c1, c2, xtol, task, amin, amax,isave,dsave
- #print 'returns', stp,fval,derphi,task
-
- if task[:2] == 'FG':
- alpha1 = stp
- fval = f(xk+stp*pk,*args)
- fc += 1
- gval = fprime(xk+stp*pk,*newargs)
- if gradient: gc += 1
- else: fc += len(xk) + 1
- phi0 = fval
- derphi0 = numpy.dot(gval,pk)
- else:
- break
-
- if task[:5] == 'ERROR' or task[1:4] == 'WARN':
- stp = None # failed
- return stp, fc, gc, fval, old_fval, gval
diff --git a/gi/posterior-regularisation/log_add.hh b/gi/posterior-regularisation/log_add.hh
deleted file mode 100644
index e0620c5a..00000000
--- a/gi/posterior-regularisation/log_add.hh
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef log_add_hh
-#define log_add_hh
-
-#include <limits>
-#include <iostream>
-#include <cassert>
-#include <cmath>
-
-template <typename T>
-struct Log
-{
- static T zero() { return -std::numeric_limits<T>::infinity(); }
-
- static T add(T l1, T l2)
- {
- if (l1 == zero()) return l2;
- if (l1 > l2)
- return l1 + std::log(1 + exp(l2 - l1));
- else
- return l2 + std::log(1 + exp(l1 - l2));
- }
-
- static T subtract(T l1, T l2)
- {
- //std::assert(l1 >= l2);
- return l1 + log(1 - exp(l2 - l1));
- }
-};
-
-#endif
diff --git a/gi/posterior-regularisation/prjava.jar b/gi/posterior-regularisation/prjava.jar
deleted file mode 120000
index da8bf761..00000000
--- a/gi/posterior-regularisation/prjava.jar
+++ /dev/null
@@ -1 +0,0 @@
-prjava/prjava-20100708.jar \ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/Makefile b/gi/posterior-regularisation/prjava/Makefile
deleted file mode 100755
index bd3bfca0..00000000
--- a/gi/posterior-regularisation/prjava/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-all:
- ant dist
-
-check:
- echo no tests
-
-clean:
- ant clean
diff --git a/gi/posterior-regularisation/prjava/build.xml b/gi/posterior-regularisation/prjava/build.xml
deleted file mode 100644
index 7222b3c8..00000000
--- a/gi/posterior-regularisation/prjava/build.xml
+++ /dev/null
@@ -1,38 +0,0 @@
-<project name="prjava" default="dist" basedir=".">
- <!-- set global properties for this build -->
- <property name="src" location="src"/>
- <property name="build" location="build"/>
- <property name="dist" location="lib"/>
- <path id="classpath">
- <pathelement location="lib/trove-2.0.2.jar"/>
- <pathelement location="lib/optimization.jar"/>
- <pathelement location="lib/jopt-simple-3.2.jar"/>
- <pathelement location="lib/commons-math-2.1.jar"/>
- </path>
-
- <target name="init">
- <!-- Create the time stamp -->
- <tstamp/>
- <!-- Create the build directory structure used by compile -->
- <mkdir dir="${build}"/>
- </target>
-
- <target name="compile" depends="init"
- description="compile the source " >
- <!-- Compile the java code from ${src} into ${build} -->
- <javac srcdir="${src}" destdir="${build}" includeantruntime="false">
- <classpath refid="classpath"/>
- </javac>
- </target>
-
- <target name="dist" depends="compile"
- description="generate the distribution" >
- <jar jarfile="${dist}/prjava-${DSTAMP}.jar" basedir="${build}"/>
- <symlink link="./prjava.jar" resource="${dist}/prjava-${DSTAMP}.jar" overwrite="true"/>
- </target>
-
- <target name="clean"
- description="clean up" >
- <delete dir="${build}"/>
- </target>
-</project>
diff --git a/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar b/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar
deleted file mode 100644
index 43b4b369..00000000
--- a/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar
+++ /dev/null
Binary files differ
diff --git a/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar b/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar
deleted file mode 100644
index 56373621..00000000
--- a/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar
+++ /dev/null
Binary files differ
diff --git a/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar b/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar
deleted file mode 100644
index 3e59fbf3..00000000
--- a/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar
+++ /dev/null
Binary files differ
diff --git a/gi/posterior-regularisation/prjava/src/arr/F.java b/gi/posterior-regularisation/prjava/src/arr/F.java
deleted file mode 100644
index be0a6ed6..00000000
--- a/gi/posterior-regularisation/prjava/src/arr/F.java
+++ /dev/null
@@ -1,99 +0,0 @@
-package arr;
-
-import java.util.Arrays;
-import java.util.Random;
-
-public class F {
- public static Random rng = new Random();
-
- public static void randomise(double probs[])
- {
- randomise(probs, true);
- }
-
- public static void randomise(double probs[], boolean normalise)
- {
- double z = 0;
- for (int i = 0; i < probs.length; ++i)
- {
- probs[i] = 10 + rng.nextDouble();
- if (normalise)
- z += probs[i];
- }
-
- if (normalise)
- for (int i = 0; i < probs.length; ++i)
- probs[i] /= z;
- }
-
- public static void uniform(double probs[])
- {
- for (int i = 0; i < probs.length; ++i)
- probs[i] = 1.0 / probs.length;
- }
-
- public static void l1normalize(double [] a){
- double sum=0;
- for(int i=0;i<a.length;i++){
- sum+=a[i];
- }
- if(sum==0)
- Arrays.fill(a, 1.0/a.length);
- else
- {
- for(int i=0;i<a.length;i++){
- a[i]/=sum;
- }
- }
- }
-
- public static void l1normalize(double [][] a){
- double sum=0;
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- sum+=a[i][j];
- }
- }
- if(sum==0){
- return;
- }
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- a[i][j]/=sum;
- }
- }
- }
-
- public static double l1norm(double a[]){
- // FIXME: this isn't the l1 norm for a < 0
- double norm=0;
- for(int i=0;i<a.length;i++){
- norm += a[i];
- }
- return norm;
- }
-
- public static double l2norm(double a[]){
- double norm=0;
- for(int i=0;i<a.length;i++){
- norm += a[i]*a[i];
- }
- return Math.sqrt(norm);
- }
-
- public static int argmax(double probs[])
- {
- double m = Double.NEGATIVE_INFINITY;
- int mi = -1;
- for (int i = 0; i < probs.length; ++i)
- {
- if (probs[i] > m)
- {
- m = probs[i];
- mi = i;
- }
- }
- return mi;
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/data/Corpus.java b/gi/posterior-regularisation/prjava/src/data/Corpus.java
deleted file mode 100644
index 425ede11..00000000
--- a/gi/posterior-regularisation/prjava/src/data/Corpus.java
+++ /dev/null
@@ -1,233 +0,0 @@
-package data;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Scanner;
-
-public class Corpus {
-
- public static final String alphaFilename="../posdata/corpus.alphabet";
- public static final String tagalphaFilename="../posdata/corpus.tag.alphabet";
-
-// public static final String START_SYM="<s>";
- public static final String END_SYM="<e>";
- public static final String NUM_TOK="<NUM>";
-
- public static final String UNK_TOK="<unk>";
-
- private ArrayList<String[]>sent;
- private ArrayList<int[]>data;
-
- public ArrayList<String[]>tag;
- public ArrayList<int[]>tagData;
-
- public static boolean convertNumTok=true;
-
- private HashMap<String,Integer>freq;
- public HashMap<String,Integer>vocab;
-
- public HashMap<String,Integer>tagVocab;
- private int tagV;
-
- private int V;
-
- public static void main(String[] args) {
- Corpus c=new Corpus("../posdata/en_test.conll");
- System.out.println(
- Arrays.toString(c.get(0))
- );
- System.out.println(
- Arrays.toString(c.getInt(0))
- );
-
- System.out.println(
- Arrays.toString(c.get(1))
- );
- System.out.println(
- Arrays.toString(c.getInt(1))
- );
- }
-
- public Corpus(String filename,HashMap<String,Integer>dict){
- V=0;
- tagV=0;
- freq=new HashMap<String,Integer>();
- tagVocab=new HashMap<String,Integer>();
- vocab=dict;
-
- sent=new ArrayList<String[]>();
- tag=new ArrayList<String[]>();
-
- Scanner sc=io.FileUtil.openInFile(filename);
- ArrayList<String>s=new ArrayList<String>();
- // s.add(START_SYM);
- while(sc.hasNextLine()){
- String line=sc.nextLine();
- String toks[]=line.split("\t");
- if(toks.length<2){
- s.add(END_SYM);
- sent.add(s.toArray(new String[0]));
- s=new ArrayList<String>();
- // s.add(START_SYM);
- continue;
- }
- String tok=toks[1].toLowerCase();
- s.add(tok);
- }
- sc.close();
-
- buildData();
- }
-
- public Corpus(String filename){
- V=0;
- freq=new HashMap<String,Integer>();
- vocab=new HashMap<String,Integer>();
- tagVocab=new HashMap<String,Integer>();
-
- sent=new ArrayList<String[]>();
- tag=new ArrayList<String[]>();
-
- System.out.println("Reading:"+filename);
-
- Scanner sc=io.FileUtil.openInFile(filename);
- ArrayList<String>s=new ArrayList<String>();
- ArrayList<String>tags=new ArrayList<String>();
- //s.add(START_SYM);
- while(sc.hasNextLine()){
- String line=sc.nextLine();
- String toks[]=line.split("\t");
- if(toks.length<2){
- s.add(END_SYM);
- tags.add(END_SYM);
- if(s.size()>2){
- sent.add(s.toArray(new String[0]));
- tag.add(tags.toArray(new String [0]));
- }
- s=new ArrayList<String>();
- tags=new ArrayList<String>();
- // s.add(START_SYM);
- continue;
- }
-
- String tok=toks[1].toLowerCase();
- if(convertNumTok && tok.matches(".*\\d.*")){
- tok=NUM_TOK;
- }
- s.add(tok);
-
- if(toks.length>3){
- tok=toks[3].toLowerCase();
- }else{
- tok="_";
- }
- tags.add(tok);
-
- }
- sc.close();
-
- for(int i=0;i<sent.size();i++){
- String[]toks=sent.get(i);
- for(int j=0;j<toks.length;j++){
- addVocab(toks[j]);
- addTag(tag.get(i)[j]);
- }
- }
-
- buildVocab();
- buildData();
- System.out.println(data.size()+"sentences, "+vocab.keySet().size()+" word types");
- }
-
- public String[] get(int idx){
- return sent.get(idx);
- }
-
- private void addVocab(String s){
- Integer integer=freq.get(s);
- if(integer==null){
- integer=0;
- }
- freq.put(s, integer+1);
- }
-
- public int tokIdx(String tok){
- Integer integer=vocab.get(tok);
- if(integer==null){
- return V;
- }
- return integer;
- }
-
- public int tagIdx(String tok){
- Integer integer=tagVocab.get(tok);
- if(integer==null){
- return tagV;
- }
- return integer;
- }
-
- private void buildData(){
- data=new ArrayList<int[]>();
- for(int i=0;i<sent.size();i++){
- String s[]=sent.get(i);
- data.add(new int [s.length]);
- for(int j=0;j<s.length;j++){
- data.get(i)[j]=tokIdx(s[j]);
- }
- }
-
- tagData=new ArrayList<int[]>();
- for(int i=0;i<tag.size();i++){
- String s[]=tag.get(i);
- tagData.add(new int [s.length]);
- for(int j=0;j<s.length;j++){
- tagData.get(i)[j]=tagIdx(s[j]);
- }
- }
- sent=null;
- tag=null;
- System.gc();
- }
-
- public int [] getInt(int idx){
- return data.get(idx);
- }
-
- /**
- *
- * @return size of vocabulary
- */
- public int getVocabSize(){
- return V;
- }
-
- public int [][]getAllData(){
- return data.toArray(new int [0][]);
- }
-
- public int [][]getTagData(){
- return tagData.toArray(new int [0][]);
- }
-
- private void buildVocab(){
- for (String key:freq.keySet()){
- if(freq.get(key)>2){
- vocab.put(key, V);
- V++;
- }
- }
- io.SerializedObjects.writeSerializedObject(vocab, alphaFilename);
- io.SerializedObjects.writeSerializedObject(tagVocab,tagalphaFilename);
- }
-
- private void addTag(String tag){
- Integer i=tagVocab.get(tag);
- if(i==null){
- tagVocab.put(tag, tagV);
- tagV++;
- }
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/hmm/HMM.java b/gi/posterior-regularisation/prjava/src/hmm/HMM.java
deleted file mode 100644
index 17a4679f..00000000
--- a/gi/posterior-regularisation/prjava/src/hmm/HMM.java
+++ /dev/null
@@ -1,579 +0,0 @@
-package hmm;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Scanner;
-
-public class HMM {
-
-
- //trans[i][j]=prob of going FROM i to j
- double [][]trans;
- double [][]emit;
- double []pi;
- int [][]data;
- int [][]tagdata;
-
- double logtrans[][];
-
- public HMMObjective o;
-
- public static void main(String[] args) {
-
- }
-
- public HMM(int n_state,int n_emit,int [][]data){
- trans=new double [n_state][n_state];
- emit=new double[n_state][n_emit];
- pi=new double [n_state];
- System.out.println(" random initial parameters");
- fillRand(trans);
- fillRand(emit);
- fillRand(pi);
-
- this.data=data;
-
- }
-
- private void fillRand(double [][] a){
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- a[i][j]=Math.random();
- }
- l1normalize(a[i]);
- }
- }
- private void fillRand(double []a){
- for(int i=0;i<a.length;i++){
- a[i]=Math.random();
- }
- l1normalize(a);
- }
-
- private double loglikely=0;
-
- public void EM(){
- double trans_exp_cnt[][]=new double [trans.length][trans.length];
- double emit_exp_cnt[][]=new double[trans.length][emit[0].length];
- double start_exp_cnt[]=new double[trans.length];
- loglikely=0;
-
- //E
- for(int i=0;i<data.length;i++){
-
- double [][][] post=forwardBackward(data[i]);
- incrementExpCnt(post, data[i],
- trans_exp_cnt,
- emit_exp_cnt,
- start_exp_cnt);
-
-
- if(i%100==0){
- System.out.print(".");
- }
- if(i%1000==0){
- System.out.println(i);
- }
-
- }
- System.out.println("Log likelihood: "+loglikely);
-
- //M
- addOneSmooth(emit_exp_cnt);
- for(int i=0;i<trans.length;i++){
-
- //transition probs
- double sum=0;
- for(int j=0;j<trans.length;j++){
- sum+=trans_exp_cnt[i][j];
- }
- //avoid NAN
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<trans[i].length;j++){
- trans[i][j]=trans_exp_cnt[i][j]/sum;
- }
-
- //emission probs
-
- sum=0;
- for(int j=0;j<emit[i].length;j++){
- sum+=emit_exp_cnt[i][j];
- }
- //avoid NAN
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<emit[i].length;j++){
- emit[i][j]=emit_exp_cnt[i][j]/sum;
- }
-
-
- //initial probs
- for(int j=0;j<pi.length;j++){
- pi[j]=start_exp_cnt[j];
- }
- l1normalize(pi);
- }
- }
-
- private double [][][]forwardBackward(int [] seq){
- double a[][]=new double [seq.length][trans.length];
- double b[][]=new double [seq.length][trans.length];
-
- int len=seq.length;
- //initialize the first step
- for(int i=0;i<trans.length;i++){
- a[0][i]=emit[i][seq[0]]*pi[i];
- b[len-1][i]=1;
- }
-
- //log of denominator for likelyhood
- double c=Math.log(l1norm(a[0]));
-
- l1normalize(a[0]);
- l1normalize(b[len-1]);
-
-
-
- //forward
- for(int n=1;n<len;n++){
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans.length;j++){
- a[n][i]+=trans[j][i]*a[n-1][j];
- }
- a[n][i]*=emit[i][seq[n]];
- }
- c+=Math.log(l1norm(a[n]));
- l1normalize(a[n]);
- }
-
- loglikely+=c;
-
- //backward
- for(int n=len-2;n>=0;n--){
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans.length;j++){
- b[n][i]+=trans[i][j]*b[n+1][j]*emit[j][seq[n+1]];
- }
- }
- l1normalize(b[n]);
- }
-
-
- //expected transition
- double p[][][]=new double [seq.length][trans.length][trans.length];
- for(int n=0;n<len-1;n++){
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans.length;j++){
- p[n][i][j]=a[n][i]*trans[i][j]*emit[j][seq[n+1]]*b[n+1][j];
-
- }
- }
-
- l1normalize(p[n]);
- }
- return p;
- }
-
- private void incrementExpCnt(
- double post[][][],int [] seq,
- double trans_exp_cnt[][],
- double emit_exp_cnt[][],
- double start_exp_cnt[])
- {
-
- for(int n=0;n<post.length;n++){
- for(int i=0;i<trans.length;i++){
- double py=0;
- for(int j=0;j<trans.length;j++){
- py+=post[n][i][j];
- trans_exp_cnt[i][j]+=post[n][i][j];
- }
-
- emit_exp_cnt[i][seq[n]]+=py;
-
- }
- }
-
- //the first state
- for(int i=0;i<trans.length;i++){
- double py=0;
- for(int j=0;j<trans.length;j++){
- py+=post[0][i][j];
- }
- start_exp_cnt[i]+=py;
- }
-
-
- //the last state
- int len=post.length;
- for(int i=0;i<trans.length;i++){
- double py=0;
- for(int j=0;j<trans.length;j++){
- py+=post[len-2][j][i];
- }
- emit_exp_cnt[i][seq[len-1]]+=py;
- }
- }
-
- public void l1normalize(double [] a){
- double sum=0;
- for(int i=0;i<a.length;i++){
- sum+=a[i];
- }
- if(sum==0){
- return ;
- }
- for(int i=0;i<a.length;i++){
- a[i]/=sum;
- }
- }
-
- public void l1normalize(double [][] a){
- double sum=0;
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- sum+=a[i][j];
- }
- }
- if(sum==0){
- return;
- }
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- a[i][j]/=sum;
- }
- }
- }
-
- public void writeModel(String modelFilename) throws FileNotFoundException, IOException{
- PrintStream ps=io.FileUtil.printstream(new File(modelFilename));
- ps.println(trans.length);
- ps.println("Initial Probabilities:");
- for(int i=0;i<pi.length;i++){
- ps.print(pi[i]+"\t");
- }
- ps.println();
- ps.println("Transition Probabilities:");
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans[i].length;j++){
- ps.print(trans[i][j]+"\t");
- }
- ps.println();
- }
- ps.println("Emission Probabilities:");
- ps.println(emit[0].length);
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<emit[i].length;j++){
- ps.println(emit[i][j]);
- }
- ps.println();
- }
- ps.close();
- }
-
- public HMM(){
-
- }
-
- public void readModel(String modelFilename){
- Scanner sc=io.FileUtil.openInFile(modelFilename);
-
- int n_state=sc.nextInt();
- sc.nextLine();
- sc.nextLine();
- pi=new double [n_state];
- for(int i=0;i<n_state;i++){
- pi[i]=sc.nextDouble();
- }
- sc.nextLine();
- sc.nextLine();
- trans=new double[n_state][n_state];
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans[i].length;j++){
- trans[i][j]=sc.nextDouble();
- }
- }
- sc.nextLine();
- sc.nextLine();
-
- int n_obs=sc.nextInt();
- emit=new double[n_state][n_obs];
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<emit[i].length;j++){
- emit[i][j]=sc.nextDouble();
- }
- }
- sc.close();
- }
-
- public int []viterbi(int [] seq){
- double [][]p=new double [seq.length][trans.length];
- int backp[][]=new int [seq.length][trans.length];
-
- for(int i=0;i<trans.length;i++){
- p[0][i]=Math.log(emit[i][seq[0]]*pi[i]);
- }
-
- double a[][]=logtrans;
- if(logtrans==null){
- a=new double [trans.length][trans.length];
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans.length;j++){
- a[i][j]=Math.log(trans[i][j]);
- }
- }
- logtrans=a;
- }
-
- double maxprob=0;
- for(int n=1;n<seq.length;n++){
- for(int i=0;i<trans.length;i++){
- maxprob=p[n-1][0]+a[0][i];
- backp[n][i]=0;
- for(int j=1;j<trans.length;j++){
- double prob=p[n-1][j]+a[j][i];
- if(maxprob<prob){
- backp[n][i]=j;
- maxprob=prob;
- }
- }
- p[n][i]=maxprob+Math.log(emit[i][seq[n]]);
- }
- }
-
- maxprob=p[seq.length-1][0];
- int maxIdx=0;
- for(int i=1;i<trans.length;i++){
- if(p[seq.length-1][i]>maxprob){
- maxprob=p[seq.length-1][i];
- maxIdx=i;
- }
- }
- int ans[]=new int [seq.length];
- ans[seq.length-1]=maxIdx;
- for(int i=seq.length-2;i>=0;i--){
- ans[i]=backp[i+1][ans[i+1]];
- }
- return ans;
- }
-
- public double l1norm(double a[]){
- double norm=0;
- for(int i=0;i<a.length;i++){
- norm += a[i];
- }
- return norm;
- }
-
- public double [][]getEmitProb(){
- return emit;
- }
-
- public int [] sample(int terminalSym){
- ArrayList<Integer > s=new ArrayList<Integer>();
- int state=sample(pi);
- int sym=sample(emit[state]);
- while(sym!=terminalSym){
- s.add(sym);
- state=sample(trans[state]);
- sym=sample(emit[state]);
- }
-
- int ans[]=new int [s.size()];
- for(int i=0;i<ans.length;i++){
- ans[i]=s.get(i);
- }
- return ans;
- }
-
- public int sample(double p[]){
- double r=Math.random();
- double sum=0;
- for(int i=0;i<p.length;i++){
- sum+=p[i];
- if(sum>=r){
- return i;
- }
- }
- return p.length-1;
- }
-
- public void train(int tagdata[][]){
- double trans_exp_cnt[][]=new double [trans.length][trans.length];
- double emit_exp_cnt[][]=new double[trans.length][emit[0].length];
- double start_exp_cnt[]=new double[trans.length];
-
- for(int i=0;i<tagdata.length;i++){
- start_exp_cnt[tagdata[i][0]]++;
-
- for(int j=0;j<tagdata[i].length;j++){
- if(j+1<tagdata[i].length){
- trans_exp_cnt[ tagdata[i][j] ] [ tagdata[i][j+1] ]++;
- }
- emit_exp_cnt[tagdata[i][j]][data[i][j]]++;
- }
-
- }
-
- //M
- addOneSmooth(emit_exp_cnt);
- for(int i=0;i<trans.length;i++){
-
- //transition probs
- double sum=0;
- for(int j=0;j<trans.length;j++){
- sum+=trans_exp_cnt[i][j];
- }
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<trans[i].length;j++){
- trans[i][j]=trans_exp_cnt[i][j]/sum;
- }
-
- //emission probs
-
- sum=0;
- for(int j=0;j<emit[i].length;j++){
- sum+=emit_exp_cnt[i][j];
- }
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<emit[i].length;j++){
- emit[i][j]=emit_exp_cnt[i][j]/sum;
- }
-
-
- //initial probs
- for(int j=0;j<pi.length;j++){
- pi[j]=start_exp_cnt[j];
- }
- l1normalize(pi);
- }
- }
-
- private void addOneSmooth(double a[][]){
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- a[i][j]+=0.01;
- }
- //l1normalize(a[i]);
- }
- }
-
- public void PREM(){
-
- o.optimizeWithProjectedGradientDescent();
-
- double trans_exp_cnt[][]=new double [trans.length][trans.length];
- double emit_exp_cnt[][]=new double[trans.length][emit[0].length];
- double start_exp_cnt[]=new double[trans.length];
-
- o.loglikelihood=0;
- //E
- for(int sentNum=0;sentNum<data.length;sentNum++){
-
- double [][][] post=o.forwardBackward(sentNum);
- incrementExpCnt(post, data[sentNum],
- trans_exp_cnt,
- emit_exp_cnt,
- start_exp_cnt);
-
-
- if(sentNum%100==0){
- System.out.print(".");
- }
- if(sentNum%1000==0){
- System.out.println(sentNum);
- }
-
- }
-
- System.out.println("Log likelihood: "+o.getValue());
-
- //M
- addOneSmooth(emit_exp_cnt);
- for(int i=0;i<trans.length;i++){
-
- //transition probs
- double sum=0;
- for(int j=0;j<trans.length;j++){
- sum+=trans_exp_cnt[i][j];
- }
- //avoid NAN
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<trans[i].length;j++){
- trans[i][j]=trans_exp_cnt[i][j]/sum;
- }
-
- //emission probs
-
- sum=0;
- for(int j=0;j<emit[i].length;j++){
- sum+=emit_exp_cnt[i][j];
- }
- //avoid NAN
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<emit[i].length;j++){
- emit[i][j]=emit_exp_cnt[i][j]/sum;
- }
-
-
- //initial probs
- for(int j=0;j<pi.length;j++){
- pi[j]=start_exp_cnt[j];
- }
- l1normalize(pi);
- }
-
- }
-
- public void computeMaxwt(double[][]maxwt, int[][] d){
-
- for(int sentNum=0;sentNum<d.length;sentNum++){
- double post[][][]=forwardBackward(d[sentNum]);
-
- for(int n=0;n<post.length;n++){
- for(int i=0;i<trans.length;i++){
- double py=0;
- for(int j=0;j<trans.length;j++){
- py+=post[n][i][j];
- }
-
- if(py>maxwt[i][d[sentNum][n]]){
- maxwt[i][d[sentNum][n]]=py;
- }
-
- }
- }
-
- //the last state
- int len=post.length;
- for(int i=0;i<trans.length;i++){
- double py=0;
- for(int j=0;j<trans.length;j++){
- py+=post[len-2][j][i];
- }
-
- if(py>maxwt[i][d[sentNum][len-1]]){
- maxwt[i][d[sentNum][len-1]]=py;
- }
-
- }
-
- }
-
- }
-
-}//end of class
diff --git a/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java b/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java
deleted file mode 100644
index 70b6c966..00000000
--- a/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java
+++ /dev/null
@@ -1,351 +0,0 @@
-package hmm;
-
-import gnu.trove.TIntArrayList;
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-
-public class HMMObjective extends ProjectedObjective{
-
-
- private static final double GRAD_DIFF = 3;
- public static double INIT_STEP_SIZE=10;
- public static double VAL_DIFF=1000;
-
- private HMM hmm;
- double[] newPoint ;
-
- //posterior[sent num][tok num][tag]=index into lambda
- private int posteriorMap[][][];
- //projection[word][tag].get(occurence)=index into lambda
- private TIntArrayList projectionMap[][];
-
- //Size of the simplex
- public double scale=10;
- private SimplexProjection projection;
-
- private int wordFreq[];
- private static int MIN_FREQ=10;
- private int numWordsToProject=0;
-
- private int n_param;
-
- public double loglikelihood;
-
- public HMMObjective(HMM h){
- hmm=h;
-
- countWords();
- buildMap();
-
- gradient=new double [n_param];
- projection = new SimplexProjection(scale);
- newPoint = new double[n_param];
- setInitialParameters(new double[n_param]);
-
- }
-
- /**@brief counts word frequency in the corpus
- *
- */
- private void countWords(){
- wordFreq=new int [hmm.emit[0].length];
- for(int i=0;i<hmm.data.length;i++){
- for(int j=0;j<hmm.data[i].length;j++){
- wordFreq[hmm.data[i][j]]++;
- }
- }
- }
-
- /**@brief build posterior and projection indices
- *
- */
- private void buildMap(){
- //number of sentences hidden states and words
- int n_states=hmm.trans.length;
- int n_words=hmm.emit[0].length;
- int n_sents=hmm.data.length;
-
- n_param=0;
- posteriorMap=new int[n_sents][][];
- projectionMap=new TIntArrayList[n_words][];
- for(int sentNum=0;sentNum<n_sents;sentNum++){
- int [] data=hmm.data[sentNum];
- posteriorMap[sentNum]=new int[data.length][n_states];
- numWordsToProject=0;
- for(int i=0;i<data.length;i++){
- int word=data[i];
- for(int state=0;state<n_states;state++){
- if(wordFreq[word]>MIN_FREQ){
- if(projectionMap[word]==null){
- projectionMap[word]=new TIntArrayList[n_states];
- }
- // if(posteriorMap[sentNum][i]==null){
- // posteriorMap[sentNum][i]=new int[n_states];
- // }
-
- posteriorMap[sentNum][i][state]=n_param;
- if(projectionMap[word][state]==null){
- projectionMap[word][state]=new TIntArrayList();
- numWordsToProject++;
- }
- projectionMap[word][state].add(n_param);
- n_param++;
- }
- else{
- posteriorMap[sentNum][i][state]=-1;
- }
- }
- }
- }
- }
-
- @Override
- public double[] projectPoint(double[] point) {
- // TODO Auto-generated method stub
- for(int i=0;i<projectionMap.length;i++){
-
- if(projectionMap[i]==null){
- //this word is not constrained
- continue;
- }
-
- for(int j=0;j<projectionMap[i].length;j++){
- TIntArrayList instances=projectionMap[i][j];
- double[] toProject = new double[instances.size()];
-
- for (int k = 0; k < toProject.length; k++) {
- // System.out.print(instances.get(k) + " ");
- toProject[k] = point[instances.get(k)];
- }
-
- projection.project(toProject);
- for (int k = 0; k < toProject.length; k++) {
- newPoint[instances.get(k)]=toProject[k];
- }
- }
- }
- return newPoint;
- }
-
- @Override
- public double[] getGradient() {
- // TODO Auto-generated method stub
- gradientCalls++;
- return gradient;
- }
-
- @Override
- public double getValue() {
- // TODO Auto-generated method stub
- functionCalls++;
- return loglikelihood;
- }
-
-
- @Override
- public String toString() {
- // TODO Auto-generated method stub
- StringBuffer sb = new StringBuffer();
- for (int i = 0; i < parameters.length; i++) {
- sb.append(parameters[i]+" ");
- if(i%100==0){
- sb.append("\n");
- }
- }
- sb.append("\n");
- /*
- for (int i = 0; i < gradient.length; i++) {
- sb.append(gradient[i]+" ");
- if(i%100==0){
- sb.append("\n");
- }
- }
- sb.append("\n");
- */
- return sb.toString();
- }
-
-
- /**
- * @param seq
- * @return posterior probability of each transition
- */
- public double [][][]forwardBackward(int sentNum){
- int [] seq=hmm.data[sentNum];
- int n_states=hmm.trans.length;
- double a[][]=new double [seq.length][n_states];
- double b[][]=new double [seq.length][n_states];
-
- int len=seq.length;
-
- boolean constrained=
- (projectionMap[seq[0]]!=null);
-
- //initialize the first step
- for(int i=0;i<n_states;i++){
- a[0][i]=hmm.emit[i][seq[0]]*hmm.pi[i];
- if(constrained){
- a[0][i]*=
- Math.exp(- parameters[ posteriorMap[sentNum][0][i] ] );
- }
- b[len-1][i]=1;
- }
-
- loglikelihood+=Math.log(hmm.l1norm(a[0]));
- hmm.l1normalize(a[0]);
- hmm.l1normalize(b[len-1]);
-
- //forward
- for(int n=1;n<len;n++){
-
- constrained=
- (projectionMap[seq[n]]!=null);
-
- for(int i=0;i<n_states;i++){
- for(int j=0;j<n_states;j++){
- a[n][i]+=hmm.trans[j][i]*a[n-1][j];
- }
- a[n][i]*=hmm.emit[i][seq[n]];
-
- if(constrained){
- a[n][i]*=
- Math.exp(- parameters[ posteriorMap[sentNum][n][i] ] );
- }
-
- }
- loglikelihood+=Math.log(hmm.l1norm(a[n]));
- hmm.l1normalize(a[n]);
- }
-
- //temp variable for e^{-\lambda}
- double factor=1;
- //backward
- for(int n=len-2;n>=0;n--){
-
- constrained=
- (projectionMap[seq[n+1]]!=null);
-
- for(int i=0;i<n_states;i++){
- for(int j=0;j<n_states;j++){
-
- if(constrained){
- factor=
- Math.exp(- parameters[ posteriorMap[sentNum][n+1][j] ] );
- }else{
- factor=1;
- }
-
- b[n][i]+=hmm.trans[i][j]*b[n+1][j]*hmm.emit[j][seq[n+1]]*factor;
-
- }
- }
- hmm.l1normalize(b[n]);
- }
-
- //expected transition
- double p[][][]=new double [seq.length][n_states][n_states];
- for(int n=0;n<len-1;n++){
-
- constrained=
- (projectionMap[seq[n+1]]!=null);
-
- for(int i=0;i<n_states;i++){
- for(int j=0;j<n_states;j++){
-
- if(constrained){
- factor=
- Math.exp(- parameters[ posteriorMap[sentNum][n+1][j] ] );
- }else{
- factor=1;
- }
-
- p[n][i][j]=a[n][i]*hmm.trans[i][j]*
- hmm.emit[j][seq[n+1]]*b[n+1][j]*factor;
-
- }
- }
-
- hmm.l1normalize(p[n]);
- }
- return p;
- }
-
- public void optimizeWithProjectedGradientDescent(){
- LineSearchMethod ls =
- new ArmijoLineSearchMinimizationAlongProjectionArc
- (new InterpolationPickFirstStep(INIT_STEP_SIZE));
-
- OptimizerStats stats = new OptimizerStats();
-
-
- ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
- StopingCriteria stopGrad = new ProjectedGradientL2Norm(GRAD_DIFF);
- StopingCriteria stopValue = new ValueDifference(VAL_DIFF);
- CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
- compositeStop.add(stopGrad);
- compositeStop.add(stopValue);
-
- optimizer.setMaxIterations(10);
- updateFunction();
- boolean succed = optimizer.optimize(this,stats,compositeStop);
- System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
- if(succed){
- System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
- }else{
- System.out.println("Failed to optimize");
- }
- }
-
- @Override
- public void setParameters(double[] params) {
- super.setParameters(params);
- updateFunction();
- }
-
- private void updateFunction(){
-
- updateCalls++;
- loglikelihood=0;
-
- for(int sentNum=0;sentNum<hmm.data.length;sentNum++){
- double [][][]p=forwardBackward(sentNum);
-
- for(int n=0;n<p.length-1;n++){
- for(int i=0;i<p[n].length;i++){
- if(projectionMap[hmm.data[sentNum][n]]!=null){
- double posterior=0;
- for(int j=0;j<p[n][i].length;j++){
- posterior+=p[n][i][j];
- }
- gradient[posteriorMap[sentNum][n][i]]=-posterior;
- }
- }
- }
-
- //the last state
- int n=p.length-2;
- for(int i=0;i<p[n].length;i++){
- if(projectionMap[hmm.data[sentNum][n+1]]!=null){
-
- double posterior=0;
- for(int j=0;j<p[n].length;j++){
- posterior+=p[n][j][i];
- }
- gradient[posteriorMap[sentNum][n+1][i]]=-posterior;
-
- }
- }
- }
-
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/hmm/POS.java b/gi/posterior-regularisation/prjava/src/hmm/POS.java
deleted file mode 100644
index bdcbc683..00000000
--- a/gi/posterior-regularisation/prjava/src/hmm/POS.java
+++ /dev/null
@@ -1,120 +0,0 @@
-package hmm;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.HashMap;
-
-import data.Corpus;
-
-public class POS {
-
- //public String trainFilename="../posdata/en_train.conll";
- public static String trainFilename="../posdata/small_train.txt";
-// public static String trainFilename="../posdata/en_test.conll";
-// public static String trainFilename="../posdata/trial1.txt";
-
- public static String testFilename="../posdata/en_test.conll";
- //public static String testFilename="../posdata/trial1.txt";
-
- public static String predFilename="../posdata/en_test.predict.conll";
- public static String modelFilename="../posdata/posModel.out";
- public static final int ITER=20;
- public static final int N_STATE=30;
-
- public static void main(String[] args) {
- //POS p=new POS();
- //POS p=new POS(true);
- try {
- PRPOS();
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
-
- public POS() throws FileNotFoundException, IOException{
- Corpus c= new Corpus(trainFilename);
- //size of vocabulary +1 for unknown tokens
- HMM hmm =new HMM(N_STATE, c.getVocabSize()+1,c.getAllData());
- for(int i=0;i<ITER;i++){
- System.out.println("Iter"+i);
- hmm.EM();
- if((i+1)%10==0){
- hmm.writeModel(modelFilename+i);
- }
- }
-
- hmm.writeModel(modelFilename);
-
- Corpus test=new Corpus(testFilename,c.vocab);
-
- PrintStream ps= io.FileUtil.printstream(new File(predFilename));
-
- int [][]data=test.getAllData();
- for(int i=0;i<data.length;i++){
- int []tag=hmm.viterbi(data[i]);
- String sent[]=test.get(i);
- for(int j=0;j<data[i].length;j++){
- ps.println(sent[j]+"\t"+tag[j]);
- }
- ps.println();
- }
- ps.close();
- }
-
- //POS induction with L1/Linf constraints
- public static void PRPOS() throws FileNotFoundException, IOException{
- Corpus c= new Corpus(trainFilename);
- //size of vocabulary +1 for unknown tokens
- HMM hmm =new HMM(N_STATE, c.getVocabSize()+1,c.getAllData());
- hmm.o=new HMMObjective(hmm);
- for(int i=0;i<ITER;i++){
- System.out.println("Iter: "+i);
- hmm.PREM();
- if((i+1)%10==0){
- hmm.writeModel(modelFilename+i);
- }
- }
-
- hmm.writeModel(modelFilename);
- }
-
-
- public POS(boolean supervised) throws FileNotFoundException, IOException{
- Corpus c= new Corpus(trainFilename);
- //size of vocabulary +1 for unknown tokens
- HMM hmm =new HMM(c.tagVocab.size() , c.getVocabSize()+1,c.getAllData());
- hmm.train(c.getTagData());
-
- hmm.writeModel(modelFilename);
-
- Corpus test=new Corpus(testFilename,c.vocab);
-
- HashMap<String, Integer>tagVocab=
- (HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(Corpus.tagalphaFilename);
- String [] tagdict=new String [tagVocab.size()+1];
- for(String key:tagVocab.keySet()){
- tagdict[tagVocab.get(key)]=key;
- }
- tagdict[tagdict.length-1]=Corpus.UNK_TOK;
-
- System.out.println(c.vocab.get("<e>"));
-
- PrintStream ps= io.FileUtil.printstream(new File(predFilename));
-
- int [][]data=test.getAllData();
- for(int i=0;i<data.length;i++){
- int []tag=hmm.viterbi(data[i]);
- String sent[]=test.get(i);
- for(int j=0;j<data[i].length;j++){
- ps.println(sent[j]+"\t"+tagdict[tag[j]]);
- }
- ps.println();
- }
- ps.close();
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/io/FileUtil.java b/gi/posterior-regularisation/prjava/src/io/FileUtil.java
deleted file mode 100644
index 6720d087..00000000
--- a/gi/posterior-regularisation/prjava/src/io/FileUtil.java
+++ /dev/null
@@ -1,48 +0,0 @@
-package io;
-import java.util.*;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
-import java.io.*;
-public class FileUtil
-{
- public static BufferedReader reader(File file) throws FileNotFoundException, IOException
- {
- if (file.getName().endsWith(".gz"))
- return new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file)), "UTF8"));
- else
- return new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF8"));
- }
-
- public static PrintStream printstream(File file) throws FileNotFoundException, IOException
- {
- if (file.getName().endsWith(".gz"))
- return new PrintStream(new GZIPOutputStream(new FileOutputStream(file)), true, "UTF8");
- else
- return new PrintStream(new FileOutputStream(file), true, "UTF8");
- }
-
- public static Scanner openInFile(String filename)
- {
- Scanner localsc=null;
- try
- {
- localsc=new Scanner(new FileInputStream(filename), "UTF8");
-
- }catch(IOException ioe){
- System.out.println(ioe.getMessage());
- }
- return localsc;
- }
-
- public static FileInputStream openInputStream(String infilename)
- {
- FileInputStream fis=null;
- try {
- fis = new FileInputStream(infilename);
-
- } catch (IOException ioe) {
- System.out.println(ioe.getMessage());
- }
- return fis;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/io/SerializedObjects.java b/gi/posterior-regularisation/prjava/src/io/SerializedObjects.java
deleted file mode 100644
index d1631b51..00000000
--- a/gi/posterior-regularisation/prjava/src/io/SerializedObjects.java
+++ /dev/null
@@ -1,83 +0,0 @@
-package io;
-
-
-
-import java.io.BufferedInputStream;
-import java.io.BufferedOutputStream;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.ObjectInput;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutput;
-import java.io.ObjectOutputStream;
-import java.io.OutputStream;
-
-public class SerializedObjects
-{
- public static void writeSerializedObject(Object object, String outFile)
- {
- ObjectOutput output = null;
- try{
- //use buffering
- OutputStream file = new FileOutputStream(outFile);
- OutputStream buffer = new BufferedOutputStream( file );
- output = new ObjectOutputStream( buffer );
- output.writeObject(object);
- buffer.close();
- file.close();
- }
- catch(IOException ex){
- ex.printStackTrace();
- }
- finally{
- try {
- if (output != null) {
- //flush and close "output" and its underlying streams
- output.close();
- }
- }
- catch (IOException ex ){
- ex.printStackTrace();
- }
- }
- }
-
- public static Object readSerializedObject(String inputFile)
- {
- ObjectInput input = null;
- Object recoveredObject=null;
- try{
- //use buffering
- InputStream file = new FileInputStream(inputFile);
- InputStream buffer = new BufferedInputStream(file);
- input = new ObjectInputStream(buffer);
- //deserialize the List
- recoveredObject = input.readObject();
- }
- catch(IOException ex){
- ex.printStackTrace();
- }
- catch (ClassNotFoundException ex){
- ex.printStackTrace();
- }
- catch(Exception ex)
- {
- ex.printStackTrace();
- }
- finally{
- try {
- if ( input != null ) {
- //close "input" and its underlying streams
- input.close();
- }
- }
- catch (IOException ex){
- ex.printStackTrace();
- }
- }
- return recoveredObject;
- }
-
-} \ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java b/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java
deleted file mode 100644
index 25fa7f09..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java
+++ /dev/null
@@ -1,110 +0,0 @@
-package optimization.examples;
-
-
-import optimization.gradientBasedMethods.ConjugateGradient;
-import optimization.gradientBasedMethods.GradientDescent;
-import optimization.gradientBasedMethods.LBFGS;
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.Optimizer;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimization;
-import optimization.linesearch.LineSearchMethod;
-import optimization.stopCriteria.GradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.util.MathUtils;
-
-/**
- *
- * @author javg
- * f(x) = \sum_{i=1}^{N-1} \left[ (1-x_i)^2+ 100 (x_{i+1} - x_i^2 )^2 \right] \quad \forall x\in\mathbb{R}^N.
- */
-public class GeneralizedRosenbrock extends Objective{
-
-
-
- public GeneralizedRosenbrock(int dimensions){
- parameters = new double[dimensions];
- java.util.Arrays.fill(parameters, 0);
- gradient = new double[dimensions];
-
- }
-
- public GeneralizedRosenbrock(int dimensions, double[] params){
- parameters = params;
- gradient = new double[dimensions];
- }
-
-
- public double getValue() {
- functionCalls++;
- double value = 0;
- for(int i = 0; i < parameters.length-1; i++){
- value += MathUtils.square(1-parameters[i]) + 100*MathUtils.square(parameters[i+1] - MathUtils.square(parameters[i]));
- }
-
- return value;
- }
-
- /**
- * gx = -2(1-x) -2x200(y-x^2)
- * gy = 200(y-x^2)
- */
- public double[] getGradient() {
- gradientCalls++;
- java.util.Arrays.fill(gradient,0);
- for(int i = 0; i < parameters.length-1; i++){
- gradient[i]+=-2*(1-parameters[i]) - 400*parameters[i]*(parameters[i+1] - MathUtils.square(parameters[i]));
- gradient[i+1]+=200*(parameters[i+1] - MathUtils.square(parameters[i]));
- }
- return gradient;
- }
-
-
-
-
-
-
-
- public String toString(){
- String res ="";
- for(int i = 0; i < parameters.length; i++){
- res += "P" + i+ " " + parameters[i];
- }
- res += " Value " + getValue();
- return res;
- }
-
- public static void main(String[] args) {
-
- GeneralizedRosenbrock o = new GeneralizedRosenbrock(2);
- System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
- ;
-
- System.out.println("Doing Gradient descent");
- //LineSearchMethod wolfe = new WolfRuleLineSearch(new InterpolationPickFirstStep(1),100,0.001,0.1);
- StopingCriteria stop = new GradientL2Norm(0.001);
- LineSearchMethod ls = new ArmijoLineSearchMinimization();
- Optimizer optimizer = new GradientDescent(ls);
- OptimizerStats stats = new OptimizerStats();
- optimizer.setMaxIterations(1000);
- boolean succed = optimizer.optimize(o,stats, stop);
- System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1));
- System.out.println("Doing Conjugate Gradient descent");
- o = new GeneralizedRosenbrock(2);
- // wolfe = new WolfRuleLineSearch(new InterpolationPickFirstStep(1),100,0.001,0.1);
- optimizer = new ConjugateGradient(ls);
- stats = new OptimizerStats();
- optimizer.setMaxIterations(1000);
- succed = optimizer.optimize(o,stats,stop);
- System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1));
- System.out.println("Doing Quasi newton descent");
- o = new GeneralizedRosenbrock(2);
- optimizer = new LBFGS(ls,10);
- stats = new OptimizerStats();
- optimizer.setMaxIterations(1000);
- succed = optimizer.optimize(o,stats,stop);
- System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1));
-
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java
deleted file mode 100644
index f087681e..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java
+++ /dev/null
@@ -1,128 +0,0 @@
-package optimization.examples;
-
-
-import optimization.gradientBasedMethods.ConjugateGradient;
-
-import optimization.gradientBasedMethods.GradientDescent;
-import optimization.gradientBasedMethods.LBFGS;
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.GenericPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.linesearch.WolfRuleLineSearch;
-import optimization.stopCriteria.GradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-
-
-/**
- * @author javg
- *
- */
-public class x2y2 extends Objective{
-
-
- //Implements function ax2+ by2
- double a, b;
- public x2y2(double a, double b){
- this.a = a;
- this.b = b;
- parameters = new double[2];
- parameters[0] = 4;
- parameters[1] = 4;
- gradient = new double[2];
- }
-
- public double getValue() {
- functionCalls++;
- return a*parameters[0]*parameters[0]+b*parameters[1]*parameters[1];
- }
-
- public double[] getGradient() {
- gradientCalls++;
- gradient[0]=2*a*parameters[0];
- gradient[1]=2*b*parameters[1];
- return gradient;
-// if(debugLevel >=2){
-// double[] numericalGradient = DebugHelpers.getNumericalGradient(this, parameters, 0.000001);
-// for(int i = 0; i < parameters.length; i++){
-// double diff = Math.abs(gradient[i]-numericalGradient[i]);
-// if(diff > 0.00001){
-// System.out.println("Numerical Gradient does not match");
-// System.exit(1);
-// }
-// }
-// }
- }
-
-
-
- public void optimizeWithGradientDescent(LineSearchMethod ls, OptimizerStats stats, x2y2 o){
- GradientDescent optimizer = new GradientDescent(ls);
- StopingCriteria stop = new GradientL2Norm(0.001);
-// optimizer.setGradientConvergenceValue(0.001);
- optimizer.setMaxIterations(100);
- boolean succed = optimizer.optimize(o,stats,stop);
- System.out.println("Ended optimzation Gradient Descent\n" + stats.prettyPrint(1));
- System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
- if(succed){
- System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
- }else{
- System.out.println("Failed to optimize");
- }
- }
-
- public void optimizeWithConjugateGradient(LineSearchMethod ls, OptimizerStats stats, x2y2 o){
- ConjugateGradient optimizer = new ConjugateGradient(ls);
- StopingCriteria stop = new GradientL2Norm(0.001);
-
- optimizer.setMaxIterations(10);
- boolean succed = optimizer.optimize(o,stats,stop);
- System.out.println("Ended optimzation Conjugate Gradient\n" + stats.prettyPrint(1));
- System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
- if(succed){
- System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
- }else{
- System.out.println("Failed to optimize");
- }
- }
-
- public void optimizeWithLBFGS(LineSearchMethod ls, OptimizerStats stats, x2y2 o){
- LBFGS optimizer = new LBFGS(ls,10);
- StopingCriteria stop = new GradientL2Norm(0.001);
- optimizer.setMaxIterations(10);
- boolean succed = optimizer.optimize(o,stats,stop);
- System.out.println("Ended optimzation LBFGS\n" + stats.prettyPrint(1));
- System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
- if(succed){
- System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
- }else{
- System.out.println("Failed to optimize");
- }
- }
-
- public static void main(String[] args) {
- x2y2 o = new x2y2(1,10);
- System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
- o.setDebugLevel(4);
- LineSearchMethod wolfe = new WolfRuleLineSearch(new GenericPickFirstStep(1),0.001,0.9);;
-// LineSearchMethod ls = new ArmijoLineSearchMinimization();
- OptimizerStats stats = new OptimizerStats();
- o.optimizeWithGradientDescent(wolfe, stats, o);
- o = new x2y2(1,10);
- System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
-// ls = new ArmijoLineSearchMinimization();
- stats = new OptimizerStats();
- o.optimizeWithConjugateGradient(wolfe, stats, o);
- o = new x2y2(1,10);
- System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
-// ls = new ArmijoLineSearchMinimization();
- stats = new OptimizerStats();
- o.optimizeWithLBFGS(wolfe, stats, o);
- }
-
- public String toString(){
- return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue();
- }
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java
deleted file mode 100644
index 391775b7..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java
+++ /dev/null
@@ -1,127 +0,0 @@
-package optimization.examples;
-
-
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.projections.BoundsProjection;
-import optimization.projections.Projection;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.GradientL2Norm;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-
-
-/**
- * @author javg
- *
- *
- *ax2+ b(y2 -displacement)
- */
-public class x2y2WithConstraints extends ProjectedObjective{
-
-
- double a, b;
- double dx;
- double dy;
- Projection projection;
-
-
- public x2y2WithConstraints(double a, double b, double[] params, double dx, double dy, Projection proj){
- //projection = new BoundsProjection(0.2,Double.MAX_VALUE);
- super();
- projection = proj;
- this.a = a;
- this.b = b;
- this.dx = dx;
- this.dy = dy;
- setInitialParameters(params);
- System.out.println("Function " +a+"(x-"+dx+")^2 + "+b+"(y-"+dy+")^2");
- System.out.println("Gradient " +(2*a)+"(x-"+dx+") ; "+(b*2)+"(y-"+dy+")");
- printParameters();
- projection.project(parameters);
- printParameters();
- gradient = new double[2];
- }
-
- public double getValue() {
- functionCalls++;
- return a*(parameters[0]-dx)*(parameters[0]-dx)+b*((parameters[1]-dy)*(parameters[1]-dy));
- }
-
- public double[] getGradient() {
- if(gradient == null){
- gradient = new double[2];
- }
- gradientCalls++;
- gradient[0]=2*a*(parameters[0]-dx);
- gradient[1]=2*b*(parameters[1]-dy);
- return gradient;
- }
-
-
- public double[] projectPoint(double[] point) {
- double[] newPoint = point.clone();
- projection.project(newPoint);
- return newPoint;
- }
-
- public void optimizeWithProjectedGradientDescent(LineSearchMethod ls, OptimizerStats stats, x2y2WithConstraints o){
- ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
- StopingCriteria stopGrad = new ProjectedGradientL2Norm(0.001);
- StopingCriteria stopValue = new ValueDifference(0.001);
- CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
- compositeStop.add(stopGrad);
- compositeStop.add(stopValue);
-
- optimizer.setMaxIterations(5);
- boolean succed = optimizer.optimize(o,stats,compositeStop);
- System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
- System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
- if(succed){
- System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
- }else{
- System.out.println("Failed to optimize");
- }
- }
-
-
-
- public String toString(){
-
- return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue() + " grad (" + getGradient()[0] + ":" + getGradient()[1]+")";
- }
-
- public static void main(String[] args) {
- double a = 1;
- double b=1;
- double x0 = 0;
- double y0 =1;
- double dx = 0.5;
- double dy = 0.5 ;
- double [] parameters = new double[2];
- parameters[0] = x0;
- parameters[1] = y0;
- x2y2WithConstraints o = new x2y2WithConstraints(a,b,parameters,dx,dy, new SimplexProjection(0.5));
- System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1] + " a " + a + " b "+b );
- o.setDebugLevel(4);
-
- LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1));
-
- OptimizerStats stats = new OptimizerStats();
- o.optimizeWithProjectedGradientDescent(ls, stats, o);
-
-// o = new x2y2WithConstraints(a,b,x0,y0,dx,dy);
-// stats = new OptimizerStats();
-// o.optimizeWithSpectralProjectedGradientDescent(stats, o);
- }
-
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java
deleted file mode 100644
index 2fcb7990..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java
+++ /dev/null
@@ -1,120 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.DifferentiableLineSearchObjective;
-import optimization.linesearch.LineSearchMethod;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.util.MathUtils;
-
-/**
- *
- * @author javg
- *
- */
-public abstract class AbstractGradientBaseMethod implements Optimizer{
-
- protected int maxNumberOfIterations=10000;
-
-
-
- protected int currentProjectionIteration;
- protected double currValue;
- protected double previousValue = Double.MAX_VALUE;;
- protected double step;
- protected double[] gradient;
- public double[] direction;
-
- //Original values
- protected double originalGradientL2Norm;
-
- protected LineSearchMethod lineSearch;
- DifferentiableLineSearchObjective lso;
-
-
- public void reset(){
- direction = null;
- gradient = null;
- previousValue = Double.MAX_VALUE;
- currentProjectionIteration = 0;
- originalGradientL2Norm = 0;
- step = 0;
- currValue = 0;
- }
-
- public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){
- lso = new DifferentiableLineSearchObjective(o);
- }
- public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){
- }
-
- public void updateStructuresAfterStep(Objective o,OptimizerStats stats, StopingCriteria stop){
- }
-
- public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){
- //Initialize structures
-
- stats.collectInitStats(this, o);
- direction = new double[o.getNumParameters()];
- initializeStructures(o, stats, stop);
- for (currentProjectionIteration = 1; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){
- //System.out.println("\tgradient descent iteration " + currentProjectionIteration);
- //System.out.print("\tparameters:" );
- //o.printParameters();
- previousValue = currValue;
- currValue = o.getValue();
- gradient = o.getGradient();
- if(stop.stopOptimization(o)){
- stats.collectFinalStats(this, o);
- return true;
- }
-
- getDirection();
- if(MathUtils.dotProduct(gradient, direction) > 0){
- System.out.println("Not a descent direction");
- System.out.println(" current stats " + stats.prettyPrint(1));
- System.exit(-1);
- }
- updateStructuresBeforeStep(o, stats, stop);
- lso.reset(direction);
- step = lineSearch.getStepSize(lso);
- //System.out.println("\t\tLeave with step: " + step);
- if(step==-1){
- System.out.println("Failed to find step");
- stats.collectFinalStats(this, o);
- return false;
- }
- updateStructuresAfterStep( o, stats, stop);
-// previousValue = currValue;
-// currValue = o.getValue();
-// gradient = o.getGradient();
- stats.collectIterationStats(this, o);
- }
- stats.collectFinalStats(this, o);
- return false;
- }
-
-
- public int getCurrentIteration() {
- return currentProjectionIteration;
- }
-
-
- /**
- * Method specific
- */
- public abstract double[] getDirection();
-
- public double getCurrentStep() {
- return step;
- }
-
-
-
- public void setMaxIterations(int max) {
- maxNumberOfIterations = max;
- }
-
- public double getCurrentValue() {
- return currValue;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java
deleted file mode 100644
index 28295729..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java
+++ /dev/null
@@ -1,92 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.DifferentiableLineSearchObjective;
-import optimization.linesearch.LineSearchMethod;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.util.MathUtils;
-
-
-
-public class ConjugateGradient extends AbstractGradientBaseMethod{
-
-
- double[] previousGradient;
- double[] previousDirection;
-
- public ConjugateGradient(LineSearchMethod lineSearch) {
- this.lineSearch = lineSearch;
- }
-
- public void reset(){
- super.reset();
- java.util.Arrays.fill(previousDirection, 0);
- java.util.Arrays.fill(previousGradient, 0);
- }
-
- public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){
- super.initializeStructures(o, stats, stop);
- previousGradient = new double[o.getNumParameters()];
- previousDirection = new double[o.getNumParameters()];
- }
- public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){
- System.arraycopy(gradient, 0, previousGradient, 0, gradient.length);
- System.arraycopy(direction, 0, previousDirection, 0, direction.length);
- }
-
-// public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){
-// DifferentiableLineSearchObjective lso = new DifferentiableLineSearchObjective(o);
-// stats.collectInitStats(this, o);
-// direction = new double[o.getNumParameters()];
-// initializeStructures(o, stats, stop);
-// for (currentProjectionIteration = 0; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){
-// previousValue = currValue;
-// currValue = o.getValue();
-// gradient =o.getGradient();
-// if(stop.stopOptimization(gradient)){
-// stats.collectFinalStats(this, o);
-// return true;
-// }
-// getDirection();
-// updateStructures(o, stats, stop);
-// lso.reset(direction);
-// step = lineSearch.getStepSize(lso);
-// if(step==-1){
-// System.out.println("Failed to find a step size");
-// System.out.println("Failed to find step");
-// stats.collectFinalStats(this, o);
-// return false;
-// }
-//
-// stats.collectIterationStats(this, o);
-// }
-// stats.collectFinalStats(this, o);
-// return false;
-// }
-
- public double[] getDirection(){
- direction = MathUtils.negation(gradient);
- if(currentProjectionIteration != 1){
- //Using Polak-Ribiere method (book equation 5.45)
- double b = MathUtils.dotProduct(gradient, MathUtils.arrayMinus(gradient, previousGradient))
- /MathUtils.dotProduct(previousGradient, previousGradient);
- if(b<0){
- System.out.println("Defaulting to gradient descent");
- b = Math.max(b, 0);
- }
- MathUtils.plusEquals(direction, previousDirection, b);
- //Debug code
- if(MathUtils.dotProduct(direction, gradient) > 0){
- System.out.println("Not an descent direction reseting to gradien");
- direction = MathUtils.negation(gradient);
- }
- }
- return direction;
- }
-
-
-
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java
deleted file mode 100644
index 6dc4ef6c..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java
+++ /dev/null
@@ -1,65 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import java.util.ArrayList;
-
-import optimization.util.MathUtils;
-
-
-
-public class DebugHelpers {
- public static void getLineSearchGraph(Objective o, double[] direction,
- double[] parameters, double originalObj,
- double originalDot, double c1, double c2){
- ArrayList<Double> stepS = new ArrayList<Double>();
- ArrayList<Double> obj = new ArrayList<Double>();
- ArrayList<Double> norm = new ArrayList<Double>();
- double[] gradient = new double[o.getNumParameters()];
- double[] newParameters = parameters.clone();
- MathUtils.plusEquals(newParameters,direction,0);
- o.setParameters(newParameters);
- double minValue = o.getValue();
- int valuesBiggerThanMax = 0;
- for(double step = 0; step < 2; step +=0.01 ){
- newParameters = parameters.clone();
- MathUtils.plusEquals(newParameters,direction,step);
- o.setParameters(newParameters);
- double newValue = o.getValue();
- gradient = o.getGradient();
- double newgradDirectionDot = MathUtils.dotProduct(gradient,direction);
- stepS.add(step);
- obj.add(newValue);
- norm.add(newgradDirectionDot);
- if(newValue <= minValue){
- minValue = newValue;
- }else{
- valuesBiggerThanMax++;
- }
-
- if(valuesBiggerThanMax > 10){
- break;
- }
-
- }
- System.out.println("step\torigObj\tobj\tsuffdec\tnorm\tcurvature1");
- for(int i = 0; i < stepS.size(); i++){
- double cnorm= norm.get(i);
- System.out.println(stepS.get(i)+"\t"+originalObj +"\t"+obj.get(i) + "\t" +
- (originalObj + originalDot*((Double)stepS.get(i))*c1) +"\t"+Math.abs(cnorm) +"\t"+c2*Math.abs(originalDot));
- }
- }
-
- public static double[] getNumericalGradient(Objective o, double[] parameters, double epsilon){
- int nrParameters = o.getNumParameters();
- double[] gradient = new double[nrParameters];
- double[] newParameters;
- double originalValue = o.getValue();
- for(int parameter = 0; parameter < nrParameters; parameter++){
- newParameters = parameters.clone();
- newParameters[parameter]+=epsilon;
- o.setParameters(newParameters);
- double newValue = o.getValue();
- gradient[parameter]=(newValue-originalValue)/epsilon;
- }
- return gradient;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java
deleted file mode 100644
index 9a53cef4..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java
+++ /dev/null
@@ -1,19 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import optimization.linesearch.LineSearchMethod;
-
-
-
-public class GradientDescent extends AbstractGradientBaseMethod{
-
- public GradientDescent(LineSearchMethod lineSearch) {
- this.lineSearch = lineSearch;
- }
-
- public double[] getDirection(){
- for(int i = 0; i< gradient.length; i++){
- direction[i] = -gradient[i];
- }
- return direction;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java
deleted file mode 100644
index dedbc942..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java
+++ /dev/null
@@ -1,234 +0,0 @@
-package optimization.gradientBasedMethods;
-
-
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.DifferentiableLineSearchObjective;
-import optimization.linesearch.LineSearchMethod;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.util.MathUtils;
-
-public class LBFGS extends AbstractGradientBaseMethod{
-
- //How many previous values are being saved
- int history;
- double[][] skList;
- double[][] ykList;
- double initialHessianParameters;
- double[] previousGradient;
- double[] previousParameters;
-
- //auxiliar structures
- double q[];
- double[] roi;
- double[] alphai;
-
- public LBFGS(LineSearchMethod ls, int history) {
- lineSearch = ls;
- this.history = history;
- skList = new double[history][];
- ykList = new double[history][];
-
- }
-
- public void reset(){
- super.reset();
- initialHessianParameters = 0;
- previousParameters = null;
- previousGradient = null;
- skList = new double[history][];
- ykList = new double[history][];
- q = null;
- roi = null;
- alphai = null;
- }
-
- public double[] LBFGSTwoLoopRecursion(double hessianConst){
- //Only create array once
- if(q == null){
- q = new double[gradient.length];
- }
- System.arraycopy(gradient, 0, q, 0, gradient.length);
- //Only create array once
- if(roi == null){
- roi = new double[history];
- }
- //Only create array once
- if(alphai == null){
- alphai = new double[history];
- }
-
- for(int i = history-1; i >=0 && skList[i]!= null && ykList[i]!=null; i-- ){
- // System.out.println("New to Old proj " + currentProjectionIteration + " history "+history + " index " + i);
- double[] si = skList[i];
- double[] yi = ykList[i];
- roi[i]= 1.0/MathUtils.dotProduct(yi,si);
- alphai[i] = MathUtils.dotProduct(si, q)*roi[i];
- MathUtils.plusEquals(q, yi, -alphai[i]);
- }
- //Initial Hessian is just a constant
- MathUtils.scalarMultiplication(q, hessianConst);
- for(int i = 0; i <history && skList[i]!= null && ykList[i]!=null; i++ ){
- // System.out.println("Old to New proj " + currentProjectionIteration + " history "+history + " index " + i);
- double beta = MathUtils.dotProduct(ykList[i], q)*roi[i];
- MathUtils.plusEquals(q, skList[i], (alphai[i]-beta));
- }
- return q;
- }
-
-
-
-
- @Override
- public double[] getDirection() {
-
- calculateInitialHessianParameter();
-// System.out.println("Initial hessian " + initialHessianParameters);
- return direction = MathUtils.negation(LBFGSTwoLoopRecursion(initialHessianParameters));
- }
-
- public void calculateInitialHessianParameter(){
- if(currentProjectionIteration == 1){
- //Use gradient
- initialHessianParameters = 1;
- }else if(currentProjectionIteration <= history){
- double[] sk = skList[currentProjectionIteration-2];
- double[] yk = ykList[currentProjectionIteration-2];
- initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk);
- }else{
- //get the last one
- double[] sk = skList[history-1];
- double[] yk = ykList[history-1];
- initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk);
- }
- }
-
- //TODO if structures exit just reset them to zero
- public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){
- super.initializeStructures(o, stats, stop);
- previousParameters = new double[o.getNumParameters()];
- previousGradient = new double[o.getNumParameters()];
- }
- public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){
- super.initializeStructures(o, stats, stop);
- System.arraycopy(o.getParameters(), 0, previousParameters, 0, previousParameters.length);
- System.arraycopy(gradient, 0, previousGradient, 0, gradient.length);
- }
-
- public void updateStructuresAfterStep( Objective o,OptimizerStats stats, StopingCriteria stop){
- double[] diffX = MathUtils.arrayMinus(o.getParameters(), previousParameters);
- double[] diffGrad = MathUtils.arrayMinus(gradient, previousGradient);
- //Save new values and discard new ones
- if(currentProjectionIteration > history){
- for(int i = 0; i < history-1;i++){
- skList[i]=skList[i+1];
- ykList[i]=ykList[i+1];
- }
- skList[history-1]=diffX;
- ykList[history-1]=diffGrad;
- }else{
- skList[currentProjectionIteration-1]=diffX;
- ykList[currentProjectionIteration-1]=diffGrad;
- }
- }
-
-// public boolean optimize(Objective o, OptimizerStats stats, StopingCriteria stop) {
-// DifferentiableLineSearchObjective lso = new DifferentiableLineSearchObjective(o);
-// gradient = o.getGradient();
-// direction = new double[o.getNumParameters()];
-// previousGradient = new double[o.getNumParameters()];
-//
-// previousParameters = new double[o.getNumParameters()];
-//
-// stats.collectInitStats(this, o);
-// previousValue = Double.MAX_VALUE;
-// currValue= o.getValue();
-// //Used for stopping criteria
-// double[] originalGradient = o.getGradient();
-//
-// originalGradientL2Norm = MathUtils.L2Norm(originalGradient);
-// if(stop.stopOptimization(originalGradient)){
-// stats.collectFinalStats(this, o);
-// return true;
-// }
-// for (currentProjectionIteration = 1; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){
-//
-//
-// currValue = o.getValue();
-// gradient = o.getGradient();
-// currParameters = o.getParameters();
-//
-//
-// if(currentProjectionIteration == 1){
-// //Use gradient
-// initialHessianParameters = 1;
-// }else if(currentProjectionIteration <= history){
-// double[] sk = skList[currentProjectionIteration-2];
-// double[] yk = ykList[currentProjectionIteration-2];
-// initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk);
-// }else{
-// //get the last one
-// double[] sk = skList[history-1];
-// double[] yk = ykList[history-1];
-// initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk);
-// }
-//
-// getDirection();
-//
-// //MatrixOutput.printDoubleArray(direction, "direction");
-// double dot = MathUtils.dotProduct(direction, gradient);
-// if(dot > 0){
-// throw new RuntimeException("Not a descent direction");
-// } if (Double.isNaN(dot)){
-// throw new RuntimeException("dot is not a number!!");
-// }
-// System.arraycopy(currParameters, 0, previousParameters, 0, currParameters.length);
-// System.arraycopy(gradient, 0, previousGradient, 0, gradient.length);
-// lso.reset(direction);
-// step = lineSearch.getStepSize(lso);
-// if(step==-1){
-// System.out.println("Failed to find a step size");
-//// lso.printLineSearchSteps();
-//// System.out.println(stats.prettyPrint(1));
-// stats.collectFinalStats(this, o);
-// return false;
-// }
-// stats.collectIterationStats(this, o);
-//
-// //We are not updating the alpha since it is done in line search already
-// currParameters = o.getParameters();
-// gradient = o.getGradient();
-//
-// if(stop.stopOptimization(gradient)){
-// stats.collectFinalStats(this, o);
-// return true;
-// }
-// double[] diffX = MathUtils.arrayMinus(currParameters, previousParameters);
-// double[] diffGrad = MathUtils.arrayMinus(gradient, previousGradient);
-// //Save new values and discard new ones
-// if(currentProjectionIteration > history){
-// for(int i = 0; i < history-1;i++){
-// skList[i]=skList[i+1];
-// ykList[i]=ykList[i+1];
-// }
-// skList[history-1]=diffX;
-// ykList[history-1]=diffGrad;
-// }else{
-// skList[currentProjectionIteration-1]=diffX;
-// ykList[currentProjectionIteration-1]=diffGrad;
-// }
-// previousValue = currValue;
-// }
-// stats.collectFinalStats(this, o);
-// return false;
-// }
-
-
-
-
-
-
-
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java
deleted file mode 100644
index 6be01bf9..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java
+++ /dev/null
@@ -1,87 +0,0 @@
-package optimization.gradientBasedMethods;
-
-
-/**
- * Defines an optimization objective:
- *
- *
- * @author javg
- *
- */
-public abstract class Objective {
-
- protected int functionCalls = 0;
- protected int gradientCalls = 0;
- protected int updateCalls = 0;
-
- protected double[] parameters;
-
- //Contains a cache with the gradient
- public double[] gradient;
- int debugLevel = 0;
-
- public void setDebugLevel(int level){
- debugLevel = level;
- }
-
- public int getNumParameters() {
- return parameters.length;
- }
-
- public double getParameter(int index) {
- return parameters[index];
- }
-
- public double[] getParameters() {
- return parameters;
- }
-
- public abstract double[] getGradient( );
-
- public void setParameter(int index, double value) {
- parameters[index]=value;
- }
-
- public void setParameters(double[] params) {
- if(parameters == null){
- parameters = new double[params.length];
- }
- updateCalls++;
- System.arraycopy(params, 0, parameters, 0, params.length);
- }
-
-
- public int getNumberFunctionCalls() {
- return functionCalls;
- }
-
- public int getNumberGradientCalls() {
- return gradientCalls;
- }
-
- public int getNumberUpdateCalls() {
- return updateCalls;
- }
-
- public String finalInfoString() {
- return "FE: " + functionCalls + " GE " + gradientCalls + " Params updates" +
- updateCalls;
- }
- public void printParameters() {
- System.out.println(toString());
- }
-
- public abstract String toString();
- public abstract double getValue ();
-
- /**
- * Sets the initial objective parameters
- * For unconstrained models this just sets the objective params = argument no copying
- * For a constrained objective project the parameters and then set
- * @param params
- */
- public void setInitialParameters(double[] params){
- parameters = params;
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java
deleted file mode 100644
index 96fce5b0..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java
+++ /dev/null
@@ -1,19 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.stopCriteria.StopingCriteria;
-
-public interface Optimizer {
- public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stoping);
-
-
- public double[] getDirection();
- public double getCurrentStep();
- public double getCurrentValue();
- public int getCurrentIteration();
- public void reset();
-
- public void setMaxIterations(int max);
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java
deleted file mode 100644
index afb29d04..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java
+++ /dev/null
@@ -1,11 +0,0 @@
-package optimization.gradientBasedMethods;
-
-
-/**
- *
- * @author javg
- *
- */
-public abstract class ProjectedAbstractGradientBaseMethod extends AbstractGradientBaseMethod implements ProjectedOptimizer{
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java
deleted file mode 100644
index 0186e945..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java
+++ /dev/null
@@ -1,154 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import java.io.IOException;
-
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.DifferentiableLineSearchObjective;
-import optimization.linesearch.LineSearchMethod;
-import optimization.linesearch.ProjectedDifferentiableLineSearchObjective;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.util.MathUtils;
-
-
-/**
- * This class implements the projected gradiend
- * as described in Bertsekas "Non Linear Programming"
- * section 2.3.
- *
- * The update is given by:
- * x_k+1 = x_k + alpha^k(xbar_k-x_k)
- * Where xbar is:
- * xbar = [x_k -s_k grad(f(x_k))]+
- * where []+ is the projection into the feasibility set
- *
- * alpha is the step size
- * s_k - is a positive scalar which can be view as a step size as well, by
- * setting alpha to 1, then x_k+1 = [x_k -s_k grad(f(x_k))]+
- * This is called taking a step size along the projection arc (Bertsekas) which
- * we will use by default.
- *
- * Note that the only place where we actually take a step size is on pick a step size
- * so this is going to be just like a normal gradient descent but use a different
- * armijo line search where we project after taking a step.
- *
- *
- * @author javg
- *
- */
-public class ProjectedGradientDescent extends ProjectedAbstractGradientBaseMethod{
-
-
-
-
- public ProjectedGradientDescent(LineSearchMethod lineSearch) {
- this.lineSearch = lineSearch;
- }
-
- //Use projected differential objective instead
- public void initializeStructures(Objective o, OptimizerStats stats, StopingCriteria stop) {
- lso = new ProjectedDifferentiableLineSearchObjective(o);
- };
-
-
- ProjectedObjective obj;
- public boolean optimize(ProjectedObjective o,OptimizerStats stats, StopingCriteria stop){
- obj = o;
- return super.optimize(o, stats, stop);
- }
-
- public double[] getDirection(){
- for(int i = 0; i< gradient.length; i++){
- direction[i] = -gradient[i];
- }
- return direction;
- }
-
-
-
-
-}
-
-
-
-
-
-
-
-///OLD CODE
-
-//Use projected gradient norm
-//public boolean stopCriteria(double[] gradient){
-// if(originalDirenctionL2Norm == 0){
-// System.out.println("Leaving original direction norm is zero");
-// return true;
-// }
-// if(MathUtils.L2Norm(direction)/originalDirenctionL2Norm < gradientConvergenceValue){
-// System.out.println("Leaving projected gradient Norm smaller than epsilon");
-// return true;
-// }
-// if((previousValue - currValue)/Math.abs(previousValue) < valueConvergenceValue) {
-// System.out.println("Leaving value change below treshold " + previousValue + " - " + currValue);
-// System.out.println(previousValue/currValue + " - " + currValue/currValue
-// + " = " + (previousValue - currValue)/Math.abs(previousValue));
-// return true;
-// }
-// return false;
-//}
-//
-
-//public boolean optimize(ProjectedObjective o,OptimizerStats stats, StopingCriteria stop){
-// stats.collectInitStats(this, o);
-// obj = o;
-// step = 0;
-// currValue = o.getValue();
-// previousValue = Double.MAX_VALUE;
-// gradient = o.getGradient();
-// originalGradientL2Norm = MathUtils.L2Norm(gradient);
-// parameterChange = new double[gradient.length];
-// getDirection();
-// ProjectedDifferentiableLineSearchObjective lso = new ProjectedDifferentiableLineSearchObjective(o,direction);
-//
-// originalDirenctionL2Norm = MathUtils.L2Norm(direction);
-// //MatrixOutput.printDoubleArray(currParameters, "parameters");
-// for (currentProjectionIteration = 0; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){
-// // System.out.println("Iter " + currentProjectionIteration);
-// //o.printParameters();
-//
-//
-//
-// if(stop.stopOptimization(gradient)){
-// stats.collectFinalStats(this, o);
-// lastStepUsed = step;
-// return true;
-// }
-// lso.reset(direction);
-// step = lineSearch.getStepSize(lso);
-// if(step==-1){
-// System.out.println("Failed to find step");
-// stats.collectFinalStats(this, o);
-// return false;
-//
-// }
-//
-// //Update the direction for stopping criteria
-// previousValue = currValue;
-// currValue = o.getValue();
-// gradient = o.getGradient();
-// direction = getDirection();
-// if(MathUtils.dotProduct(gradient, direction) > 0){
-// System.out.println("Not a descent direction");
-// System.out.println(" current stats " + stats.prettyPrint(1));
-// System.exit(-1);
-// }
-// stats.collectIterationStats(this, o);
-// }
-// lastStepUsed = step;
-// stats.collectFinalStats(this, o);
-// return false;
-// }
-
-//public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){
-// System.out.println("Objective is not a projected objective");
-// throw new RuntimeException();
-//}
-
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java
deleted file mode 100644
index c3d21393..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import optimization.util.MathUtils;
-
-
-/**
- * Computes a projected objective
- * When we tell it to set some parameters it automatically projects the parameters back into the simplex:
- *
- *
- * When we tell it to get the gradient in automatically returns the projected gradient:
- * @author javg
- *
- */
-public abstract class ProjectedObjective extends Objective{
-
- public abstract double[] projectPoint (double[] point);
-
- public double[] auxParameters;
-
-
- public void setInitialParameters(double[] params){
- setParameters(projectPoint(params));
- }
-
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java
deleted file mode 100644
index 81d8403e..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java
+++ /dev/null
@@ -1,10 +0,0 @@
-package optimization.gradientBasedMethods;
-
-
-
-public interface ProjectedOptimizer extends Optimizer{
-
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java
deleted file mode 100644
index 6340ef73..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java
+++ /dev/null
@@ -1,86 +0,0 @@
-package optimization.gradientBasedMethods.stats;
-
-import java.util.ArrayList;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.Optimizer;
-import optimization.util.MathUtils;
-import optimization.util.StaticTools;
-
-
-public class OptimizerStats {
-
- double start = 0;
- double totalTime = 0;
-
- String objectiveFinalStats;
-
- ArrayList<Double> gradientNorms = new ArrayList<Double>();
- ArrayList<Double> steps = new ArrayList<Double>();
- ArrayList<Double> value = new ArrayList<Double>();
- ArrayList<Integer> iterations = new ArrayList<Integer>();
- double prevValue =0;
-
- public void reset(){
- start = 0;
- totalTime = 0;
-
- objectiveFinalStats="";
-
- gradientNorms.clear();
- steps.clear();
- value.clear();
- iterations.clear();
- prevValue =0;
- }
-
- public void startTime() {
- start = System.currentTimeMillis();
- }
- public void stopTime() {
- totalTime += System.currentTimeMillis() - start;
- }
-
- public String prettyPrint(int level){
- StringBuffer res = new StringBuffer();
- res.append("Total time " + totalTime/1000 + " seconds \n" + "Iterations " + iterations.size() + "\n");
- res.append(objectiveFinalStats+"\n");
- if(level > 0){
- if(iterations.size() > 0){
- res.append("\tIteration"+iterations.get(0)+"\tstep: "+StaticTools.prettyPrint(steps.get(0), "0.00E00", 6)+ "\tgradientNorm "+
- StaticTools.prettyPrint(gradientNorms.get(0), "0.00000E00", 10)+ "\tvalue "+ StaticTools.prettyPrint(value.get(0), "0.000000E00",11)+"\n");
- }
- for(int i = 1; i < iterations.size(); i++){
- res.append("\tIteration:\t"+iterations.get(i)+"\tstep:"+StaticTools.prettyPrint(steps.get(i), "0.00E00", 6)+ "\tgradientNorm "+
- StaticTools.prettyPrint(gradientNorms.get(i), "0.00000E00", 10)+
- "\tvalue:\t"+ StaticTools.prettyPrint(value.get(i), "0.000000E00",11)+
- "\tvalueDiff:\t"+ StaticTools.prettyPrint((value.get(i-1)-value.get(i)), "0.000000E00",11)+
- "\n");
- }
- }
- return res.toString();
- }
-
-
- public void collectInitStats(Optimizer optimizer, Objective objective){
- startTime();
- iterations.add(-1);
- gradientNorms.add(MathUtils.L2Norm(objective.getGradient()));
- steps.add(0.0);
- value.add(objective.getValue());
- }
-
- public void collectIterationStats(Optimizer optimizer, Objective objective){
- iterations.add(optimizer.getCurrentIteration());
- gradientNorms.add(MathUtils.L2Norm(objective.getGradient()));
- steps.add(optimizer.getCurrentStep());
- value.add(optimizer.getCurrentValue());
- }
-
-
- public void collectFinalStats(Optimizer optimizer, Objective objective){
- stopTime();
- objectiveFinalStats = objective.finalInfoString();
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java
deleted file mode 100644
index d65a1267..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java
+++ /dev/null
@@ -1,70 +0,0 @@
-package optimization.gradientBasedMethods.stats;
-
-import java.util.ArrayList;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.Optimizer;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.ProjectedOptimizer;
-import optimization.util.MathUtils;
-import optimization.util.StaticTools;
-
-
-public class ProjectedOptimizerStats extends OptimizerStats{
-
-
-
- public void reset(){
- super.reset();
- projectedGradientNorms.clear();
- }
-
- ArrayList<Double> projectedGradientNorms = new ArrayList<Double>();
-
- public String prettyPrint(int level){
- StringBuffer res = new StringBuffer();
- res.append("Total time " + totalTime/1000 + " seconds \n" + "Iterations " + iterations.size() + "\n");
- res.append(objectiveFinalStats+"\n");
- if(level > 0){
- if(iterations.size() > 0){
- res.append("\tIteration"+iterations.get(0)+"\tstep: "+
- StaticTools.prettyPrint(steps.get(0), "0.00E00", 6)+ "\tgradientNorm "+
- StaticTools.prettyPrint(gradientNorms.get(0), "0.00000E00", 10)
- + "\tdirection"+
- StaticTools.prettyPrint(projectedGradientNorms.get(0), "0.00000E00", 10)+
- "\tvalue "+ StaticTools.prettyPrint(value.get(0), "0.000000E00",11)+"\n");
- }
- for(int i = 1; i < iterations.size(); i++){
- res.append("\tIteration"+iterations.get(i)+"\tstep: "+StaticTools.prettyPrint(steps.get(i), "0.00E00", 6)+ "\tgradientNorm "+
- StaticTools.prettyPrint(gradientNorms.get(i), "0.00000E00", 10)+
- "\t direction "+
- StaticTools.prettyPrint(projectedGradientNorms.get(i), "0.00000E00", 10)+
- "\tvalue "+ StaticTools.prettyPrint(value.get(i), "0.000000E00",11)+
- "\tvalueDiff "+ StaticTools.prettyPrint((value.get(i-1)-value.get(i)), "0.000000E00",11)+
- "\n");
- }
- }
- return res.toString();
- }
-
-
- public void collectInitStats(Optimizer optimizer, Objective objective){
- startTime();
- }
-
- public void collectIterationStats(Optimizer optimizer, Objective objective){
- iterations.add(optimizer.getCurrentIteration());
- gradientNorms.add(MathUtils.L2Norm(objective.getGradient()));
- projectedGradientNorms.add(MathUtils.L2Norm(optimizer.getDirection()));
- steps.add(optimizer.getCurrentStep());
- value.add(optimizer.getCurrentValue());
- }
-
-
-
- public void collectFinalStats(Optimizer optimizer, Objective objective){
- stopTime();
- objectiveFinalStats = objective.finalInfoString();
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java
deleted file mode 100644
index c9f9b8df..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java
+++ /dev/null
@@ -1,102 +0,0 @@
-package optimization.linesearch;
-
-import optimization.util.Interpolation;
-
-
-/**
- * Implements Back Tracking Line Search as described on page 37 of Numerical Optimization.
- * Also known as armijo rule
- * @author javg
- *
- */
-public class ArmijoLineSearchMinimization implements LineSearchMethod{
-
- /**
- * How much should the step size decrease at each iteration.
- */
- double contractionFactor = 0.5;
- double c1 = 0.0001;
-
- double sigma1 = 0.1;
- double sigma2 = 0.9;
-
-
-
- double initialStep;
- int maxIterations = 10;
-
-
- public ArmijoLineSearchMinimization(){
- this.initialStep = 1;
- }
-
- //Experiment
- double previousStepPicked = -1;;
- double previousInitGradientDot = -1;
- double currentInitGradientDot = -1;
-
-
- public void reset(){
- previousStepPicked = -1;;
- previousInitGradientDot = -1;
- currentInitGradientDot = -1;
- }
-
- public void setInitialStep(double initial){
- initialStep = initial;
- }
-
- /**
- *
- */
-
- public double getStepSize(DifferentiableLineSearchObjective o) {
- currentInitGradientDot = o.getInitialGradient();
- //Should update all in the objective
- o.updateAlpha(initialStep);
- int nrIterations = 0;
- //System.out.println("tried alpha" + initialStep + " value " + o.getCurrentValue());
- while(!WolfeConditions.suficientDecrease(o,c1)){
- if(nrIterations >= maxIterations){
- o.printLineSearchSteps();
- return -1;
- }
- double alpha=o.getAlpha();
- double alphaTemp =
- Interpolation.quadraticInterpolation(o.getOriginalValue(), o.getInitialGradient(), alpha, o.getCurrentValue());
- if(alphaTemp >= sigma1 || alphaTemp <= sigma2*o.getAlpha()){
-// System.out.println("using alpha temp " + alphaTemp);
- alpha = alphaTemp;
- }else{
-// System.out.println("Discarding alpha temp " + alphaTemp);
- alpha = alpha*contractionFactor;
- }
-// double alpha =o.getAlpha()*contractionFactor;
-
- o.updateAlpha(alpha);
- //System.out.println("tried alpha" + alpha+ " value " + o.getCurrentValue());
- nrIterations++;
- }
-
- //System.out.println("Leavning line search used:");
- //o.printLineSearchSteps();
-
- previousInitGradientDot = currentInitGradientDot;
- previousStepPicked = o.getAlpha();
- return o.getAlpha();
- }
-
- public double getInitialGradient() {
- return currentInitGradientDot;
-
- }
-
- public double getPreviousInitialGradient() {
- return previousInitGradientDot;
- }
-
- public double getPreviousStepUsed() {
- return previousStepPicked;
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java
deleted file mode 100644
index e153f2da..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java
+++ /dev/null
@@ -1,141 +0,0 @@
-package optimization.linesearch;
-
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.util.Interpolation;
-import optimization.util.MathUtils;
-
-
-
-
-
-/**
- * Implements Armijo Rule Line search along the projection arc (Non-Linear Programming page 230)
- * To be used with Projected gradient Methods.
- *
- * Recall that armijo tries successive step sizes alpha until the sufficient decrease is satisfied:
- * f(x+alpha*direction) < f(x) + alpha*c1*grad(f)*direction
- *
- * In this case we are optimizing over a convex set X so we must guarantee that the new point stays inside the
- * constraints.
- * First the direction as to be feasible (inside constraints) and will be define as:
- * d = (x_k_f - x_k) where x_k_f is a feasible point.
- * so the armijo condition can be rewritten as:
- * f(x+alpha(x_k_f - x_k)) < f(x) + c1*grad(f)*(x_k_f - x_k)
- * and x_k_f is defined as:
- * [x_k-alpha*grad(f)]+
- * where []+ mean a projection to the feasibility set.
- * So this means that we take a step on the negative gradient (gradient descent) and then obtain then project
- * that point to the feasibility set.
- * Note that if the point is already feasible then we are back to the normal armijo rule.
- *
- * @author javg
- *
- */
-public class ArmijoLineSearchMinimizationAlongProjectionArc implements LineSearchMethod{
-
- /**
- * How much should the step size decrease at each iteration.
- */
- double contractionFactor = 0.5;
- double c1 = 0.0001;
-
-
- double initialStep;
- int maxIterations = 100;
-
-
- double sigma1 = 0.1;
- double sigma2 = 0.9;
-
- //Experiment
- double previousStepPicked = -1;;
- double previousInitGradientDot = -1;
- double currentInitGradientDot = -1;
-
- GenericPickFirstStep strategy;
-
-
- public void reset(){
- previousStepPicked = -1;;
- previousInitGradientDot = -1;
- currentInitGradientDot = -1;
- }
-
-
- public ArmijoLineSearchMinimizationAlongProjectionArc(){
- this.initialStep = 1;
- }
-
- public ArmijoLineSearchMinimizationAlongProjectionArc(GenericPickFirstStep strategy){
- this.strategy = strategy;
- this.initialStep = strategy.getFirstStep(this);
- }
-
-
- public void setInitialStep(double initial){
- this.initialStep = initial;
- }
-
- /**
- *
- */
-
- public double getStepSize(DifferentiableLineSearchObjective o) {
-
-
- //Should update all in the objective
- initialStep = strategy.getFirstStep(this);
- o.updateAlpha(initialStep);
- previousInitGradientDot=currentInitGradientDot;
- currentInitGradientDot=o.getCurrentGradient();
- int nrIterations = 0;
-
- //Armijo rule, the current value has to be smaller than the original value plus a small step of the gradient
- while(o.getCurrentValue() >
- o.getOriginalValue() + c1*(o.getCurrentGradient())){
-// System.out.println("curr value "+o.getCurrentValue());
-// System.out.println("original value "+o.getOriginalValue());
-// System.out.println("GRADIENT decrease" +(MathUtils.dotProduct(o.o.gradient,
-// MathUtils.arrayMinus(o.originalParameters,((ProjectedObjective)o.o).auxParameters))));
-// System.out.println("GRADIENT SAVED" + o.getCurrentGradient());
- if(nrIterations >= maxIterations){
- System.out.println("Could not find a step leaving line search with -1");
- o.printLineSearchSteps();
- return -1;
- }
- double alpha=o.getAlpha();
- double alphaTemp =
- Interpolation.quadraticInterpolation(o.getOriginalValue(), o.getInitialGradient(), alpha, o.getCurrentValue());
- if(alphaTemp >= sigma1 || alphaTemp <= sigma2*o.getAlpha()){
- alpha = alphaTemp;
- }else{
- alpha = alpha*contractionFactor;
- }
-// double alpha =obj.getAlpha()*contractionFactor;
- o.updateAlpha(alpha);
- nrIterations++;
- }
-// System.out.println("curr value "+o.getCurrentValue());
-// System.out.println("original value "+o.getOriginalValue());
-// System.out.println("sufficient decrease" +c1*o.getCurrentGradient());
-// System.out.println("Leavning line search used:");
-// o.printSmallLineSearchSteps();
-
- previousStepPicked = o.getAlpha();
- return o.getAlpha();
- }
-
- public double getInitialGradient() {
- return currentInitGradientDot;
-
- }
-
- public double getPreviousInitialGradient() {
- return previousInitGradientDot;
- }
-
- public double getPreviousStepUsed() {
- return previousStepPicked;
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java
deleted file mode 100644
index a5bc958e..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java
+++ /dev/null
@@ -1,185 +0,0 @@
-package optimization.linesearch;
-
-import gnu.trove.TDoubleArrayList;
-import gnu.trove.TIntArrayList;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.util.MathUtils;
-import optimization.util.StaticTools;
-
-
-
-import util.MathUtil;
-import util.Printing;
-
-
-/**
- * A wrapper class for the actual objective in order to perform
- * line search. The optimization code assumes that this does a lot
- * of caching in order to simplify legibility. For the applications
- * we use it for, caching the entire history of evaluations should be
- * a win.
- *
- * Note: the lastEvaluatedAt value is very important, since we will use
- * it to avoid doing an evaluation of the gradient after the line search.
- *
- * The differentiable line search objective defines a search along the ray
- * given by a direction of the main objective.
- * It defines the following function,
- * where f is the original objective function:
- * g(alpha) = f(x_0 + alpha*direction)
- * g'(alpha) = f'(x_0 + alpha*direction)*direction
- *
- * @author joao
- *
- */
-public class DifferentiableLineSearchObjective {
-
-
-
- Objective o;
- int nrIterations;
- TDoubleArrayList steps;
- TDoubleArrayList values;
- TDoubleArrayList gradients;
-
- //This variables cannot change
- public double[] originalParameters;
- public double[] searchDirection;
-
-
- /**
- * Defines a line search objective:
- * Receives:
- * Objective to each we are performing the line search, is used to calculate values and gradients
- * Direction where to do the ray search, note that the direction does not depend of the
- * objective but depends from the method.
- * @param o
- * @param direction
- */
- public DifferentiableLineSearchObjective(Objective o) {
- this.o = o;
- originalParameters = new double[o.getNumParameters()];
- searchDirection = new double[o.getNumParameters()];
- steps = new TDoubleArrayList();
- values = new TDoubleArrayList();
- gradients = new TDoubleArrayList();
- }
- /**
- * Called whenever we start a new iteration.
- * Receives the ray where we are searching for and resets all values
- *
- */
- public void reset(double[] direction){
- //Copy initial values
- System.arraycopy(o.getParameters(), 0, originalParameters, 0, o.getNumParameters());
- System.arraycopy(direction, 0, searchDirection, 0, o.getNumParameters());
-
- //Initialize variables
- nrIterations = 0;
- steps.clear();
- values.clear();
- gradients.clear();
-
- values.add(o.getValue());
- gradients.add(MathUtils.dotProduct(o.getGradient(),direction));
- steps.add(0);
- }
-
-
- /**
- * update the current value of alpha.
- * Takes a step with that alpha in direction
- * Get the real objective value and gradient and calculate all required information.
- */
- public void updateAlpha(double alpha){
- if(alpha < 0){
- System.out.println("alpha may not be smaller that zero");
- throw new RuntimeException();
- }
- nrIterations++;
- steps.add(alpha);
- //x_t+1 = x_t + alpha*direction
- System.arraycopy(originalParameters,0, o.getParameters(), 0, originalParameters.length);
- MathUtils.plusEquals(o.getParameters(), searchDirection, alpha);
- o.setParameters(o.getParameters());
-// System.out.println("Took a step of " + alpha + " new value " + o.getValue());
- values.add(o.getValue());
- gradients.add(MathUtils.dotProduct(o.getGradient(),searchDirection));
- }
-
-
-
- public int getNrIterations(){
- return nrIterations;
- }
-
- /**
- * return g(alpha) for the current value of alpha
- * @param iter
- * @return
- */
- public double getValue(int iter){
- return values.get(iter);
- }
-
- public double getCurrentValue(){
- return values.get(nrIterations);
- }
-
- public double getOriginalValue(){
- return values.get(0);
- }
-
- /**
- * return g'(alpha) for the current value of alpha
- * @param iter
- * @return
- */
- public double getGradient(int iter){
- return gradients.get(iter);
- }
-
- public double getCurrentGradient(){
- return gradients.get(nrIterations);
- }
-
- public double getInitialGradient(){
- return gradients.get(0);
- }
-
-
-
-
- public double getAlpha(){
- return steps.get(nrIterations);
- }
-
- public void printLineSearchSteps(){
- System.out.println(
- " Steps size "+steps.size() +
- "Values size "+values.size() +
- "Gradeients size "+gradients.size());
- for(int i =0; i < steps.size();i++){
- System.out.println("Iter " + i + " step " + steps.get(i) +
- " value " + values.get(i) + " grad " + gradients.get(i));
- }
- }
-
- public void printSmallLineSearchSteps(){
- for(int i =0; i < steps.size();i++){
- System.out.print(StaticTools.prettyPrint(steps.get(i), "0.0000E00",8) + " ");
- }
- System.out.println();
- }
-
- public static void main(String[] args) {
-
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java
deleted file mode 100644
index a33eb311..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java
+++ /dev/null
@@ -1,20 +0,0 @@
-package optimization.linesearch;
-
-
-public class GenericPickFirstStep{
- double _initValue;
- public GenericPickFirstStep(double initValue) {
- _initValue = initValue;
- }
-
- public double getFirstStep(LineSearchMethod ls){
- return _initValue;
- }
- public void collectInitValues(LineSearchMethod ls){
-
- }
-
- public void collectFinalValues(LineSearchMethod ls){
-
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java
deleted file mode 100644
index 0deebcdb..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package optimization.linesearch;
-
-
-public class InterpolationPickFirstStep extends GenericPickFirstStep{
- public InterpolationPickFirstStep(double initValue) {
- super(initValue);
- }
-
- public double getFirstStep(LineSearchMethod ls){
- if(ls.getPreviousStepUsed() != -1 && ls.getPreviousInitialGradient()!=0){
- double newStep = Math.min(300, 1.02*ls.getPreviousInitialGradient()*ls.getPreviousStepUsed()/ls.getInitialGradient());
- // System.out.println("proposing " + newStep);
- return newStep;
-
- }
- return _initValue;
- }
- public void collectInitValues(WolfRuleLineSearch ls){
-
- }
-
- public void collectFinalValues(WolfRuleLineSearch ls){
-
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java
deleted file mode 100644
index 80cd7f39..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java
+++ /dev/null
@@ -1,14 +0,0 @@
-package optimization.linesearch;
-
-
-public interface LineSearchMethod {
-
- double getStepSize(DifferentiableLineSearchObjective o);
-
- public double getInitialGradient();
- public double getPreviousInitialGradient();
- public double getPreviousStepUsed();
-
- public void setInitialStep(double initial);
- public void reset();
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java
deleted file mode 100644
index 4b354fd9..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package optimization.linesearch;
-
-/**
- * Non newtwon since we don't always try 1...
- * Not sure if that is even usefull for newton
- * @author javg
- *
- */
-public class NonNewtonInterpolationPickFirstStep extends GenericPickFirstStep{
- public NonNewtonInterpolationPickFirstStep(double initValue) {
- super(initValue);
- }
-
- public double getFirstStep(LineSearchMethod ls){
-// System.out.println("Previous step used " + ls.getPreviousStepUsed());
-// System.out.println("PreviousGradinebt " + ls.getPreviousInitialGradient());
-// System.out.println("CurrentGradinebt " + ls.getInitialGradient());
- if(ls.getPreviousStepUsed() != -1 && ls.getPreviousInitialGradient()!=0){
- double newStep = 1.01*ls.getPreviousInitialGradient()*ls.getPreviousStepUsed()/ls.getInitialGradient();
- //System.out.println("Suggesting " + newStep);
- return newStep;
-
- }
- return _initValue;
- }
- public void collectInitValues(WolfRuleLineSearch ls){
-
- }
-
- public void collectFinalValues(WolfRuleLineSearch ls){
-
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java
deleted file mode 100644
index 29ccbc32..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java
+++ /dev/null
@@ -1,137 +0,0 @@
-package optimization.linesearch;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.util.MathUtils;
-import optimization.util.MatrixOutput;
-
-
-/**
- * See ArmijoLineSearchMinimizationAlongProjectionArc for description
- * @author javg
- *
- */
-public class ProjectedDifferentiableLineSearchObjective extends DifferentiableLineSearchObjective{
-
-
-
- ProjectedObjective obj;
- public ProjectedDifferentiableLineSearchObjective(Objective o) {
- super(o);
- if(!(o instanceof ProjectedObjective)){
- System.out.println("Must receive a projected objective");
- throw new RuntimeException();
- }
- obj = (ProjectedObjective) o;
- }
-
-
-
- public double[] projectPoint (double[] point){
- return ((ProjectedObjective)o).projectPoint(point);
- }
- public void updateAlpha(double alpha){
- if(alpha < 0){
- System.out.println("alpha may not be smaller that zero");
- throw new RuntimeException();
- }
-
- if(obj.auxParameters == null){
- obj.auxParameters = new double[obj.getParameters().length];
- }
-
- nrIterations++;
-
- steps.add(alpha);
- System.arraycopy(originalParameters, 0, obj.auxParameters, 0, obj.auxParameters.length);
-
- //Take a step into the search direction
-
-// MatrixOutput.printDoubleArray(obj.getGradient(), "gradient");
-
-// alpha=gradients.get(0)*alpha/(gradients.get(gradients.size()-1));
-
- //x_t+1 = x_t - alpha*gradient = x_t + alpha*direction
- MathUtils.plusEquals(obj.auxParameters, searchDirection, alpha);
-// MatrixOutput.printDoubleArray(obj.auxParameters, "before projection");
- obj.auxParameters = projectPoint(obj.auxParameters);
-// MatrixOutput.printDoubleArray(obj.auxParameters, "after projection");
- o.setParameters(obj.auxParameters);
-// System.out.println("new parameters");
-// o.printParameters();
- values.add(o.getValue());
- //Computes the new gradient x_k-[x_k-alpha*Gradient(x_k)]+
- MathUtils.minusEqualsInverse(originalParameters,obj.auxParameters,1);
-// MatrixOutput.printDoubleArray(obj.auxParameters, "new gradient");
- //Dot product between the new direction and the new gradient
- double gradient = MathUtils.dotProduct(obj.auxParameters,searchDirection);
- gradients.add(gradient);
- if(gradient > 0){
- System.out.println("Gradient on line search has to be smaller than zero");
- System.out.println("Iter: " + nrIterations);
- MatrixOutput.printDoubleArray(obj.auxParameters, "new direction");
- MatrixOutput.printDoubleArray(searchDirection, "search direction");
- throw new RuntimeException();
-
- }
-
- }
-
- /**
- *
- */
-// public void updateAlpha(double alpha){
-//
-// if(alpha < 0){
-// System.out.println("alpha may not be smaller that zero");
-// throw new RuntimeException();
-// }
-//
-// nrIterations++;
-// steps.add(alpha);
-// //x_t+1 = x_t - alpha*direction
-// System.arraycopy(originalParameters, 0, parametersChange, 0, parametersChange.length);
-//// MatrixOutput.printDoubleArray(parametersChange, "parameters before step");
-//// System.out.println("Step" + alpha);
-// MatrixOutput.printDoubleArray(originalGradient, "gradient + " + alpha);
-//
-// MathUtils.minusEquals(parametersChange, originalGradient, alpha);
-//
-// //Project the points into the feasibility set
-//// MatrixOutput.printDoubleArray(parametersChange, "before projection");
-// //x_k(alpha) = [x_k - alpha*grad f(x_k)]+
-// parametersChange = projectPoint(parametersChange);
-//// MatrixOutput.printDoubleArray(parametersChange, "after projection");
-// o.setParameters(parametersChange);
-// values.add(o.getValue());
-// //Computes the new direction x_k-[x_k-alpha*Gradient(x_k)]+
-//
-// direction=MathUtils.arrayMinus(parametersChange,originalParameters);
-//// MatrixOutput.printDoubleArray(direction, "new direction");
-//
-// double gradient = MathUtils.dotProduct(originalGradient,direction);
-// gradients.add(gradient);
-// if(gradient > 1E-10){
-// System.out.println("cosine " + gradient/(MathUtils.L2Norm(originalGradient)*MathUtils.L2Norm(direction)));
-//
-//
-// System.out.println("not a descent direction for alpha " + alpha);
-// System.arraycopy(originalParameters, 0, parametersChange, 0, parametersChange.length);
-// MathUtils.minusEquals(parametersChange, originalGradient, 1E-20);
-//
-// parametersChange = projectPoint(parametersChange);
-// direction=MathUtils.arrayMinus(parametersChange,originalParameters);
-// gradient = MathUtils.dotProduct(originalGradient,direction);
-// if(gradient > 0){
-// System.out.println("Direction is really non-descent evern for small alphas:" + gradient);
-// }
-// System.out.println("ProjecteLineSearchObjective: Should be a descent direction at " + nrIterations + ": "+ gradient);
-//// System.out.println(Printing.doubleArrayToString(originalGradient, null,"Original gradient"));
-//// System.out.println(Printing.doubleArrayToString(originalParameters, null,"Original parameters"));
-//// System.out.println(Printing.doubleArrayToString(parametersChange, null,"Projected parameters"));
-//// System.out.println(Printing.doubleArrayToString(direction, null,"Direction"));
-// throw new RuntimeException();
-// }
-// }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java
deleted file mode 100644
index 5489f2d0..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java
+++ /dev/null
@@ -1,300 +0,0 @@
-package optimization.linesearch;
-
-import java.io.PrintStream;
-import java.util.ArrayList;
-
-import optimization.util.Interpolation;
-
-
-
-
-/**
- *
- * @author javg
- *
- */
-public class WolfRuleLineSearch implements LineSearchMethod{
-
- GenericPickFirstStep pickFirstStep;
-
- double c1 = 1.0E-4;
- double c2 = 0.9;
-
- //Application dependent
- double maxStep=100;
-
- int extrapolationIteration;
- int maxExtrapolationIteration = 1000;
-
-
- double minZoomDiffTresh = 10E-10;
-
-
- ArrayList<Double> steps;
- ArrayList<Double> gradientDots;
- ArrayList<Double> functionVals;
-
- int debugLevel = 0;
- boolean foudStep = false;
-
- public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep){
- this.pickFirstStep = pickFirstStep;
-
- }
-
-
-
-
- public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep, double c1, double c2){
- this.pickFirstStep = pickFirstStep;
- initialStep = pickFirstStep.getFirstStep(this);
- this.c1 = c1;
- this.c2 = c2;
- }
-
- public void setDebugLevel(int level){
- debugLevel = level;
- }
-
- //Experiment
- double previousStepPicked = -1;;
- double previousInitGradientDot = -1;
- double currentInitGradientDot = -1;
-
- double initialStep;
-
-
- public void reset(){
- previousStepPicked = -1;;
- previousInitGradientDot = -1;
- currentInitGradientDot = -1;
- if(steps != null)
- steps.clear();
- if(gradientDots != null)
- gradientDots.clear();
- if(functionVals != null)
- functionVals.clear();
- }
-
- public void setInitialStep(double initial){
- initialStep = pickFirstStep.getFirstStep(this);
- }
-
-
-
- /**
- * Implements Wolf Line search as described in nocetal.
- * This process consists in two stages. The first stage we try to satisfy the
- * biggest step size that still satisfies the curvature condition. We keep increasing
- * the initial step size until we find a step satisfying the curvature condition, we return
- * success, we failed the sufficient increase so we cannot increase more and we can call zoom with
- * that maximum step, or we pass the minimum in which case we can call zoom the same way.
- *
- */
- public double getStepSize(DifferentiableLineSearchObjective objective){
- //System.out.println("entering line search");
-
- foudStep = false;
- if(debugLevel >= 1){
- steps = new ArrayList<Double>();
- gradientDots = new ArrayList<Double>();
- functionVals =new ArrayList<Double>();
- }
-
- //test
- currentInitGradientDot = objective.getInitialGradient();
-
-
- double previousValue = objective.getCurrentValue();
- double previousStep = 0;
- double currentStep =pickFirstStep.getFirstStep(this);
- for(extrapolationIteration = 0;
- extrapolationIteration < maxExtrapolationIteration; extrapolationIteration++){
-
- objective.updateAlpha(currentStep);
- double currentValue = objective.getCurrentValue();
- if(debugLevel >= 1){
- steps.add(currentStep);
- functionVals.add(currentValue);
- gradientDots.add(objective.getCurrentGradient());
- }
-
-
- //The current step does not satisfy the sufficient decrease condition anymore
- // so we cannot get bigger than that calling zoom.
- if(!WolfeConditions.suficientDecrease(objective,c1)||
- (extrapolationIteration > 0 && currentValue >= previousValue)){
- currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations);
- break;
- }
-
- //Satisfying both conditions ready to leave
- if(WolfeConditions.sufficientCurvature(objective,c1,c2)){
- //Found step
- foudStep = true;
- break;
- }
-
- /**
- * This means that we passed the minimum already since the dot product that should be
- * negative (descent direction) is now positive. So we cannot increase more. On the other hand
- * since we know the direction is a descent direction the value the objective at the current step
- * is for sure smaller than the preivous step so we change the order.
- */
- if(objective.getCurrentGradient() >= 0){
- currentStep = zoom(objective,currentStep,previousStep,objective.nrIterations,objective.nrIterations-1);
- break;
- }
-
-
- //Ok, so we can still get a bigger step,
- double aux = currentStep;
- //currentStep = currentStep*2;
- if(Math.abs(currentStep-maxStep)>1.1e-2){
- currentStep = (currentStep+maxStep)/2;
- }else{
- currentStep = currentStep*2;
- }
- previousStep = aux;
- previousValue = currentValue;
- //Could be done better
- if(currentStep >= maxStep){
- System.out.println("Excedded max step...calling zoom with maxStepSize");
- currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations);
- }
- }
- if(!foudStep){
- System.out.println("Wolfe Rule exceed number of iterations");
- if(debugLevel >= 1){
- printSmallWolfeStats(System.out);
-// System.out.println("Line search values");
-// DebugHelpers.getLineSearchGraph(o, direction, originalParameters,origValue, origGradDirectionDot,c1,c2);
- }
- return -1;
- }
- if(debugLevel >= 1){
- printSmallWolfeStats(System.out);
- }
-
- previousStepPicked = currentStep;
- previousInitGradientDot = currentInitGradientDot;
-// objective.printLineSearchSteps();
- return currentStep;
- }
-
-
-
-
-
- public void printWolfeStats(PrintStream out){
- for(int i = 0; i < steps.size(); i++){
- out.println("Step " + steps.get(i) + " value " + functionVals.get(i) + " dot " + gradientDots.get(i));
- }
- }
-
- public void printSmallWolfeStats(PrintStream out){
- for(int i = 0; i < steps.size(); i++){
- out.print(steps.get(i) + ":"+functionVals.get(i)+":"+gradientDots.get(i)+" ");
- }
- System.out.println();
- }
-
-
-
- /**
- * Pick a step satisfying the strong wolfe condition from an given from lowerStep and higherStep
- * picked on the routine above.
- *
- * Both lowerStep and higherStep have been evaluated, so we only need to pass the iteration where they have
- * been evaluated and save extra evaluations.
- *
- * We know that lowerStepValue as to be smaller than higherStepValue, and that a point
- * satisfying both conditions exists in such interval.
- *
- * LowerStep always satisfies at least the sufficient decrease
- * @return
- */
- public double zoom(DifferentiableLineSearchObjective o, double lowerStep, double higherStep,
- int lowerStepIter, int higherStepIter){
-
- if(debugLevel >=2){
- System.out.println("Entering zoom with " + lowerStep+"-"+higherStep);
- }
-
- double currentStep=-1;
-
- int zoomIter = 0;
- while(zoomIter < 1000){
- if(Math.abs(lowerStep-higherStep) < minZoomDiffTresh){
- o.updateAlpha(lowerStep);
- if(debugLevel >= 1){
- steps.add(lowerStep);
- functionVals.add(o.getCurrentValue());
- gradientDots.add(o.getCurrentGradient());
- }
- foudStep = true;
- return lowerStep;
- }
-
- //Cubic interpolation
- currentStep =
- Interpolation.cubicInterpolation(lowerStep, o.getValue(lowerStepIter), o.getGradient(lowerStepIter),
- higherStep, o.getValue(higherStepIter), o.getGradient(higherStepIter));
-
- //Safeguard.... should not be required check in what condtions it is required
- if(currentStep < 0 ){
- currentStep = (lowerStep+higherStep)/2;
- }
- if(Double.isNaN(currentStep) || Double.isInfinite(currentStep)){
- currentStep = (lowerStep+higherStep)/2;
- }
-// currentStep = (lowerStep+higherStep)/2;
-// System.out.println("Trying "+currentStep);
- o.updateAlpha(currentStep);
- if(debugLevel >=1){
- steps.add(currentStep);
- functionVals.add(o.getCurrentValue());
- gradientDots.add(o.getCurrentGradient());
- }
- if(!WolfeConditions.suficientDecrease(o,c1)
- || o.getCurrentValue() >= o.getValue(lowerStepIter)){
- higherStepIter = o.nrIterations;
- higherStep = currentStep;
- }
- //Note when entering here the new step satisfies the sufficent decrease and
- // or as a function value that is better than the previous best (lowerStepFunctionValues)
- // so we either leave or change the value of the alpha low.
- else{
- if(WolfeConditions.sufficientCurvature(o,c1,c2)){
- //Satisfies the both wolf conditions
- foudStep = true;
- break;
- }
- //If does not satisfy curvature
- if(o.getCurrentGradient()*(higherStep-lowerStep) >= 0){
- higherStep = lowerStep;
- higherStepIter = lowerStepIter;
- }
- lowerStep = currentStep;
- lowerStepIter = o.nrIterations;
- }
- zoomIter++;
- }
- return currentStep;
- }
-
- public double getInitialGradient() {
- return currentInitGradientDot;
-
- }
-
- public double getPreviousInitialGradient() {
- return previousInitGradientDot;
- }
-
- public double getPreviousStepUsed() {
- return previousStepPicked;
- }
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java
deleted file mode 100644
index dcc704eb..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java
+++ /dev/null
@@ -1,45 +0,0 @@
-package optimization.linesearch;
-
-
-public class WolfeConditions {
-
- /**
- * Sufficient Increase number. Default constant
- */
-
-
- /**
- * Value for suficient curvature:
- * 0.9 - For newton and quase netwon methods
- * 0.1 - Non linear conhugate gradient
- */
-
- int debugLevel = 0;
- public void setDebugLevel(int level){
- debugLevel = level;
- }
-
- public static boolean suficientDecrease(DifferentiableLineSearchObjective o, double c1){
- double value = o.getOriginalValue()+c1*o.getAlpha()*o.getInitialGradient();
-// System.out.println("Sufficient Decrease original "+value+" new "+ o.getCurrentValue());
- return o.getCurrentValue() <= value;
- }
-
-
-
-
- public static boolean sufficientCurvature(DifferentiableLineSearchObjective o, double c1, double c2){
-// if(debugLevel >= 2){
-// double current = Math.abs(o.getCurrentGradient());
-// double orig = -c2*o.getInitialGradient();
-// if(current <= orig){
-// return true;
-// }else{
-// System.out.println("Not satistfying curvature condition curvature " + current + " wants " + orig);
-// return false;
-// }
-// }
- return Math.abs(o.getCurrentGradient()) <= -c2*o.getInitialGradient();
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java
deleted file mode 100644
index 0429d531..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java
+++ /dev/null
@@ -1,104 +0,0 @@
-package optimization.projections;
-
-
-import java.util.Random;
-
-import optimization.util.MathUtils;
-import optimization.util.MatrixOutput;
-
-/**
- * Implements a projection into a box set defined by a and b.
- * If either a or b are infinity then that bound is ignored.
- * @author javg
- *
- */
-public class BoundsProjection extends Projection{
-
- double a,b;
- boolean ignoreA = false;
- boolean ignoreB = false;
- public BoundsProjection(double lowerBound, double upperBound) {
- if(Double.isInfinite(lowerBound)){
- this.ignoreA = true;
- }else{
- this.a =lowerBound;
- }
- if(Double.isInfinite(upperBound)){
- this.ignoreB = true;
- }else{
- this.b =upperBound;
- }
- }
-
-
-
- /**
- * Projects into the bounds
- * a <= x_i <=b
- */
- public void project(double[] original){
- for (int i = 0; i < original.length; i++) {
- if(!ignoreA && original[i] < a){
- original[i] = a;
- }else if(!ignoreB && original[i]>b){
- original[i]=b;
- }
- }
- }
-
- /**
- * Generates a random number between a and b.
- */
-
- Random r = new Random();
-
- public double[] samplePoint(int numParams) {
- double[] point = new double[numParams];
- for (int i = 0; i < point.length; i++) {
- double rand = r.nextDouble();
- if(ignoreA && ignoreB){
- //Use const to avoid number near overflow
- point[i] = rand*(1.E100+1.E100)-1.E100;
- }else if(ignoreA){
- point[i] = rand*(b-1.E100)-1.E100;
- }else if(ignoreB){
- point[i] = rand*(1.E100-a)-a;
- }else{
- point[i] = rand*(b-a)-a;
- }
- }
- return point;
- }
-
- public static void main(String[] args) {
- BoundsProjection sp = new BoundsProjection(0,Double.POSITIVE_INFINITY);
-
-
- MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 1");
- MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 2");
- MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 3");
-
- double[] d = {-1.1,1.2,1.4};
- double[] original = d.clone();
- MatrixOutput.printDoubleArray(d, "before");
-
- sp.project(d);
- MatrixOutput.printDoubleArray(d, "after");
- System.out.println("Test projection: " + sp.testProjection(original, d));
- }
-
- double epsilon = 1.E-10;
- public double[] perturbePoint(double[] point, int parameter){
- double[] newPoint = point.clone();
- if(!ignoreA && MathUtils.almost(point[parameter], a)){
- newPoint[parameter]+=epsilon;
- }else if(!ignoreB && MathUtils.almost(point[parameter], b)){
- newPoint[parameter]-=epsilon;
- }else{
- newPoint[parameter]-=epsilon;
- }
- return newPoint;
- }
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java
deleted file mode 100644
index b5a9f92f..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java
+++ /dev/null
@@ -1,72 +0,0 @@
-package optimization.projections;
-
-import optimization.util.MathUtils;
-import optimization.util.MatrixOutput;
-import util.ArrayMath;
-import util.Printing;
-
-
-
-public abstract class Projection {
-
-
- public abstract void project(double[] original);
-
-
- /**
- * From the projection theorem "Non-Linear Programming" page
- * 201 fact 2.
- *
- * Given some z in R, and a vector x* in X;
- * x* = z+ iif for all x in X
- * (z-x*)'(x-x*) <= 0 where 0 is when x*=x
- * See figure 2.16 in book
- *
- * @param original
- * @param projected
- * @return
- */
- public boolean testProjection(double[] original, double[] projected){
- double[] original1 = original.clone();
- //System.out.println(Printing.doubleArrayToString(original1, null, "original"));
- //System.out.println(Printing.doubleArrayToString(projected, null, "projected"));
- MathUtils.minusEquals(original1, projected, 1);
- //System.out.println(Printing.doubleArrayToString(original1, null, "minus1"));
- for(int i = 0; i < 10; i++){
- double[] x = samplePoint(original.length);
- // System.out.println(Printing.doubleArrayToString(x, null, "sample"));
- //If the same this returns zero so we are there.
- MathUtils.minusEquals(x, projected, 1);
- // System.out.println(Printing.doubleArrayToString(x, null, "minus2"));
- double dotProd = MathUtils.dotProduct(original1, x);
-
- // System.out.println("dot " + dotProd);
- if(dotProd > 0) return false;
- }
-
- //Perturbs the point a bit in all possible directions
- for(int i = 0; i < original.length; i++){
- double[] x = perturbePoint(projected,i);
- // System.out.println(Printing.doubleArrayToString(x, null, "perturbed"));
- //If the same this returns zero so we are there.
- MathUtils.minusEquals(x, projected, 1);
- // System.out.println(Printing.doubleArrayToString(x, null, "minus2"));
- double dotProd = MathUtils.dotProduct(original1, x);
-
- // System.out.println("dot " + dotProd);
- if(dotProd > 0) return false;
- }
-
-
-
- return true;
- }
-
- //Samples a point from the constrained set
- public abstract double[] samplePoint(int dimensions);
-
- //Perturbs a point a bit still leaving it at the constraints set
- public abstract double[] perturbePoint(double[] point, int parameter);
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java
deleted file mode 100644
index f22afcaf..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java
+++ /dev/null
@@ -1,127 +0,0 @@
-package optimization.projections;
-
-
-
-import java.util.Random;
-
-import optimization.util.MathUtils;
-import optimization.util.MatrixOutput;
-
-public class SimplexProjection extends Projection{
-
- double scale;
- public SimplexProjection(double scale) {
- this.scale = scale;
- }
-
- /**
- * projects the numbers of the array
- * into a simplex of size.
- * We follow the description of the paper
- * "Efficient Projetions onto the l1-Ball
- * for learning in high dimensions"
- */
- public void project(double[] original){
- double[] ds = new double[original.length];
- System.arraycopy(original, 0, ds, 0, ds.length);
- //If sum is smaller then zero then its ok
- for (int i = 0; i < ds.length; i++) ds[i] = ds[i]>0? ds[i]:0;
- double sum = MathUtils.sum(ds);
- if (scale - sum >= -1.E-10 ){
- System.arraycopy(ds, 0, original, 0, ds.length);
- //System.out.println("Not projecting");
- return;
- }
- //System.out.println("projecting " + sum + " scontraints " + scale);
- util.Array.sortDescending(ds);
- double currentSum = 0;
- double previousTheta = 0;
- double theta = 0;
- for (int i = 0; i < ds.length; i++) {
- currentSum+=ds[i];
- theta = (currentSum-scale)/(i+1);
- if(ds[i]-theta < -1e-10){
- break;
- }
- previousTheta = theta;
- }
- //DEBUG
- if(previousTheta < 0){
- System.out.println("Simple Projection: Theta is smaller than zero: " + previousTheta);
- System.exit(-1);
- }
- for (int i = 0; i < original.length; i++) {
- original[i] = Math.max(original[i]-previousTheta, 0);
- }
- }
-
-
-
-
-
-
- /**
- * Samples a point from the simplex of scale. Just sample
- * random number from 0-scale and then if
- * their sum is bigger then sum make them normalize.
- * This is probably not sampling uniformly from the simplex but it is
- * enough for our goals in here.
- */
- Random r = new Random();
- public double[] samplePoint(int dimensions) {
- double[] newPoint = new double[dimensions];
- double sum =0;
- for (int i = 0; i < newPoint.length; i++) {
- double rand = r.nextDouble()*scale;
- sum+=rand;
- newPoint[i]=rand;
- }
- //Normalize
- if(sum > scale){
- for (int i = 0; i < newPoint.length; i++) {
- newPoint[i]=scale*newPoint[i]/sum;
- }
- }
- return newPoint;
- }
-
- public static void main(String[] args) {
- SimplexProjection sp = new SimplexProjection(1);
-
-
- double[] point = sp.samplePoint(3);
- MatrixOutput.printDoubleArray(point , "random 1 sum:" + MathUtils.sum(point));
- point = sp.samplePoint(3);
- MatrixOutput.printDoubleArray(point , "random 2 sum:" + MathUtils.sum(point));
- point = sp.samplePoint(3);
- MatrixOutput.printDoubleArray(point , "random 3 sum:" + MathUtils.sum(point));
-
- double[] d = {0,1.1,-10};
- double[] original = d.clone();
- MatrixOutput.printDoubleArray(d, "before");
-
- sp.project(d);
- MatrixOutput.printDoubleArray(d, "after");
- System.out.println("Test projection: " + sp.testProjection(original, d));
-
- }
-
-
- double epsilon = 1.E-10;
- public double[] perturbePoint(double[] point, int parameter){
- double[] newPoint = point.clone();
- if(MathUtils.almost(MathUtils.sum(point), scale)){
- newPoint[parameter]-=epsilon;
- }
- else if(point[parameter]==0){
- newPoint[parameter]+=epsilon;
- }else if(MathUtils.almost(point[parameter], scale)){
- newPoint[parameter]-=epsilon;
- }
- else{
- newPoint[parameter]-=epsilon;
- }
- return newPoint;
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java
deleted file mode 100644
index 15760f18..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package optimization.stopCriteria;
-
-import java.util.ArrayList;
-
-import optimization.gradientBasedMethods.Objective;
-
-public class CompositeStopingCriteria implements StopingCriteria {
-
- ArrayList<StopingCriteria> criterias;
-
- public CompositeStopingCriteria() {
- criterias = new ArrayList<StopingCriteria>();
- }
-
- public void add(StopingCriteria criteria){
- criterias.add(criteria);
- }
-
- public boolean stopOptimization(Objective obj){
- for(StopingCriteria criteria: criterias){
- if(criteria.stopOptimization(obj)){
- return true;
- }
- }
- return false;
- }
-
- public void reset(){
- for(StopingCriteria criteria: criterias){
- criteria.reset();
- }
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java
deleted file mode 100644
index 534ff833..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java
+++ /dev/null
@@ -1,30 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.util.MathUtils;
-
-public class GradientL2Norm implements StopingCriteria{
-
- /**
- * Stop if gradientNorm/(originalGradientNorm) smaller
- * than gradientConvergenceValue
- */
- protected double gradientConvergenceValue;
-
-
- public GradientL2Norm(double gradientConvergenceValue){
- this.gradientConvergenceValue = gradientConvergenceValue;
- }
-
- public void reset(){}
-
- public boolean stopOptimization(Objective obj){
- double norm = MathUtils.L2Norm(obj.gradient);
- if(norm < gradientConvergenceValue){
- System.out.println("Gradient norm below treshold");
- return true;
- }
- return false;
-
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java
deleted file mode 100644
index 4a489641..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java
+++ /dev/null
@@ -1,48 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.util.MathUtils;
-
-/**
- * Divides the norm by the norm at the begining of the iteration
- * @author javg
- *
- */
-public class NormalizedGradientL2Norm extends GradientL2Norm{
-
- /**
- * Stop if gradientNorm/(originalGradientNorm) smaller
- * than gradientConvergenceValue
- */
- protected double originalGradientNorm = -1;
-
- public void reset(){
- originalGradientNorm = -1;
- }
- public NormalizedGradientL2Norm(double gradientConvergenceValue){
- super(gradientConvergenceValue);
- }
-
-
-
-
- public boolean stopOptimization(Objective obj){
- double norm = MathUtils.L2Norm(obj.gradient);
- if(originalGradientNorm == -1){
- originalGradientNorm = norm;
- }
- if(originalGradientNorm < 1E-10){
- System.out.println("Gradient norm is zero " + originalGradientNorm);
- return true;
- }
- double normalizedNorm = 1.0*norm/originalGradientNorm;
- if( normalizedNorm < gradientConvergenceValue){
- System.out.println("Gradient norm below normalized normtreshold: " + norm + " original: " + originalGradientNorm + " normalized norm: " + normalizedNorm);
- return true;
- }else{
-// System.out.println("projected gradient norm: " + norm);
- return false;
- }
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java
deleted file mode 100644
index 5ae554c2..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java
+++ /dev/null
@@ -1,60 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.util.MathUtils;
-
-/**
- * Divides the norm by the norm at the begining of the iteration
- * @author javg
- *
- */
-public class NormalizedProjectedGradientL2Norm extends ProjectedGradientL2Norm{
-
- /**
- * Stop if gradientNorm/(originalGradientNorm) smaller
- * than gradientConvergenceValue
- */
- double originalProjectedNorm = -1;
-
- public NormalizedProjectedGradientL2Norm(double gradientConvergenceValue){
- super(gradientConvergenceValue);
- }
-
- public void reset(){
- originalProjectedNorm = -1;
- }
-
-
- double[] projectGradient(ProjectedObjective obj){
-
- if(obj.auxParameters == null){
- obj.auxParameters = new double[obj.getNumParameters()];
- }
- System.arraycopy(obj.getParameters(), 0, obj.auxParameters, 0, obj.getNumParameters());
- MathUtils.minusEquals(obj.auxParameters, obj.gradient, 1);
- obj.auxParameters = obj.projectPoint(obj.auxParameters);
- MathUtils.minusEquals(obj.auxParameters,obj.getParameters(),1);
- return obj.auxParameters;
- }
-
- public boolean stopOptimization(Objective obj){
- if(obj instanceof ProjectedObjective) {
- ProjectedObjective o = (ProjectedObjective) obj;
- double norm = MathUtils.L2Norm(projectGradient(o));
- if(originalProjectedNorm == -1){
- originalProjectedNorm = norm;
- }
- double normalizedNorm = 1.0*norm/originalProjectedNorm;
- if( normalizedNorm < gradientConvergenceValue){
- System.out.println("Gradient norm below normalized normtreshold: " + norm + " original: " + originalProjectedNorm + " normalized norm: " + normalizedNorm);
- return true;
- }else{
-// System.out.println("projected gradient norm: " + norm);
- return false;
- }
- }
- System.out.println("Not a projected objective");
- throw new RuntimeException();
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java
deleted file mode 100644
index 6dbbc50d..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java
+++ /dev/null
@@ -1,54 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.util.MathUtils;
-
-public class NormalizedValueDifference implements StopingCriteria{
-
- /**
- * Stop if the different between values is smaller than a treshold
- */
- protected double valueConvergenceValue=0.01;
- protected double previousValue = Double.NaN;
- protected double currentValue = Double.NaN;
-
- public NormalizedValueDifference(double valueConvergenceValue){
- this.valueConvergenceValue = valueConvergenceValue;
- }
-
- public void reset(){
- previousValue = Double.NaN;
- currentValue = Double.NaN;
- }
-
-
- public boolean stopOptimization(Objective obj){
- if(Double.isNaN(currentValue)){
- currentValue = obj.getValue();
- return false;
- }else {
- previousValue = currentValue;
- currentValue = obj.getValue();
- if(previousValue != 0){
- double valueDiff = Math.abs(previousValue - currentValue)/Math.abs(previousValue);
- if( valueDiff < valueConvergenceValue){
- System.out.println("Leaving different in values is to small: Prev "
- + (previousValue/previousValue) + " Curr: " + (currentValue/previousValue)
- + " diff: " + valueDiff);
- return true;
- }
- }else{
- double valueDiff = Math.abs(previousValue - currentValue);
- if( valueDiff < valueConvergenceValue){
- System.out.println("Leaving different in values is to small: Prev "
- + (previousValue) + " Curr: " + (currentValue)
- + " diff: " + valueDiff);
- return true;
- }
- }
-
- return false;
- }
-
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java
deleted file mode 100644
index aadf1fd5..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java
+++ /dev/null
@@ -1,51 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.util.MathUtils;
-
-public class ProjectedGradientL2Norm implements StopingCriteria{
-
- /**
- * Stop if gradientNorm/(originalGradientNorm) smaller
- * than gradientConvergenceValue
- */
- protected double gradientConvergenceValue;
-
-
- public ProjectedGradientL2Norm(double gradientConvergenceValue){
- this.gradientConvergenceValue = gradientConvergenceValue;
- }
-
- public void reset(){
-
- }
-
- double[] projectGradient(ProjectedObjective obj){
-
- if(obj.auxParameters == null){
- obj.auxParameters = new double[obj.getNumParameters()];
- }
- System.arraycopy(obj.getParameters(), 0, obj.auxParameters, 0, obj.getNumParameters());
- MathUtils.minusEquals(obj.auxParameters, obj.gradient, 1);
- obj.auxParameters = obj.projectPoint(obj.auxParameters);
- MathUtils.minusEquals(obj.auxParameters,obj.getParameters(),1);
- return obj.auxParameters;
- }
-
- public boolean stopOptimization(Objective obj){
- if(obj instanceof ProjectedObjective) {
- ProjectedObjective o = (ProjectedObjective) obj;
- double norm = MathUtils.L2Norm(projectGradient(o));
- if(norm < gradientConvergenceValue){
- // System.out.println("Gradient norm below treshold: " + norm);
- return true;
- }else{
-// System.out.println("projected gradient norm: " + norm);
- return false;
- }
- }
- System.out.println("Not a projected objective");
- throw new RuntimeException();
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java
deleted file mode 100644
index 10cf0522..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java
+++ /dev/null
@@ -1,8 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-
-public interface StopingCriteria {
- public boolean stopOptimization(Objective obj);
- public void reset();
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java
deleted file mode 100644
index e5d07229..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.util.MathUtils;
-
-public class ValueDifference implements StopingCriteria{
-
- /**
- * Stop if the different between values is smaller than a treshold
- */
- protected double valueConvergenceValue=0.01;
- protected double previousValue = Double.NaN;
- protected double currentValue = Double.NaN;
-
- public ValueDifference(double valueConvergenceValue){
- this.valueConvergenceValue = valueConvergenceValue;
- }
-
- public void reset(){
- previousValue = Double.NaN;
- currentValue = Double.NaN;
- }
-
- public boolean stopOptimization(Objective obj){
- if(Double.isNaN(currentValue)){
- currentValue = obj.getValue();
- return false;
- }else {
- previousValue = currentValue;
- currentValue = obj.getValue();
- if(previousValue - currentValue < valueConvergenceValue){
-// System.out.println("Leaving different in values is to small: Prev "
-// + previousValue + " Curr: " + currentValue
-// + " diff: " + (previousValue - currentValue));
- return true;
- }
- return false;
- }
-
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java b/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java
deleted file mode 100644
index cdbdefc6..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java
+++ /dev/null
@@ -1,37 +0,0 @@
-package optimization.util;
-
-public class Interpolation {
-
- /**
- * Fits a cubic polinomyal to a function given two points,
- * such that either gradB is bigger than zero or funcB >= funcA
- *
- * NonLinear Programming appendix C
- * @param funcA
- * @param gradA
- * @param funcB
- * @param gradB
- */
- public final static double cubicInterpolation(double a,
- double funcA, double gradA, double b,double funcB, double gradB ){
- if(gradB < 0 && funcA > funcB){
- System.out.println("Cannot call cubic interpolation");
- return -1;
- }
-
- double z = 3*(funcA-funcB)/(b-a) + gradA + gradB;
- double w = Math.sqrt(z*z - gradA*gradB);
- double min = b -(gradB+w-z)*(b-a)/(gradB-gradA+2*w);
- return min;
- }
-
- public final static double quadraticInterpolation(double initFValue,
- double initGrad, double point,double pointFValue){
- double min = -1*initGrad*point*point/(2*(pointFValue-initGrad*point-initFValue));
- return min;
- }
-
- public static void main(String[] args) {
-
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java b/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java
deleted file mode 100644
index 5343a39b..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java
+++ /dev/null
@@ -1,7 +0,0 @@
-package optimization.util;
-
-public class Logger {
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java b/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java
deleted file mode 100644
index af66f82c..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java
+++ /dev/null
@@ -1,339 +0,0 @@
-package optimization.util;
-
-import java.util.Arrays;
-
-
-
-public class MathUtils {
-
- /**
- *
- * @param vector
- * @return
- */
- public static double L2Norm(double[] vector){
- double value = 0;
- for(int i = 0; i < vector.length; i++){
- double v = vector[i];
- value+=v*v;
- }
- return Math.sqrt(value);
- }
-
- public static double sum(double[] v){
- double sum = 0;
- for (int i = 0; i < v.length; i++) {
- sum+=v[i];
- }
- return sum;
- }
-
-
-
-
- /**
- * w = w + v
- * @param w
- * @param v
- */
- public static void plusEquals(double[] w, double[] v) {
- for(int i=0; i<w.length;i++){
- w[i] += w[i] + v[i];
- }
- }
-
- /**
- * w[i] = w[i] + v
- * @param w
- * @param v
- */
- public static void plusEquals(double[] w, double v) {
- for(int i=0; i<w.length;i++){
- w[i] += w[i] + v;
- }
- }
-
- /**
- * w[i] = w[i] - v
- * @param w
- * @param v
- */
- public static void minusEquals(double[] w, double v) {
- for(int i=0; i<w.length;i++){
- w[i] -= w[i] + v;
- }
- }
-
- /**
- * w = w + a*v
- * @param w
- * @param v
- * @param a
- */
- public static void plusEquals(double[] w, double[] v, double a) {
- for(int i=0; i<w.length;i++){
- w[i] += a*v[i];
- }
- }
-
- /**
- * w = w - a*v
- * @param w
- * @param v
- * @param a
- */
- public static void minusEquals(double[] w, double[] v, double a) {
- for(int i=0; i<w.length;i++){
- w[i] -= a*v[i];
- }
- }
- /**
- * v = w - a*v
- * @param w
- * @param v
- * @param a
- */
- public static void minusEqualsInverse(double[] w, double[] v, double a) {
- for(int i=0; i<w.length;i++){
- v[i] = w[i] - a*v[i];
- }
- }
-
- public static double dotProduct(double[] w, double[] v){
- double accum = 0;
- for(int i=0; i<w.length;i++){
- accum += w[i]*v[i];
- }
- return accum;
- }
-
- public static double[] arrayMinus(double[]w, double[]v){
- double result[] = w.clone();
- for(int i=0; i<w.length;i++){
- result[i] -= v[i];
- }
- return result;
- }
-
- public static double[] arrayMinus(double[] result , double[]w, double[]v){
- for(int i=0; i<w.length;i++){
- result[i] = w[i]-v[i];
- }
- return result;
- }
-
- public static double[] negation(double[]w){
- double result[] = new double[w.length];
- for(int i=0; i<w.length;i++){
- result[i] = -w[i];
- }
- return result;
- }
-
- public static double square(double value){
- return value*value;
- }
- public static double[][] outerProduct(double[] w, double[] v){
- double[][] result = new double[w.length][v.length];
- for(int i = 0; i < w.length; i++){
- for(int j = 0; j < v.length; j++){
- result[i][j] = w[i]*v[j];
- }
- }
- return result;
- }
- /**
- * results = a*W*V
- * @param w
- * @param v
- * @param a
- * @return
- */
- public static double[][] weightedouterProduct(double[] w, double[] v, double a){
- double[][] result = new double[w.length][v.length];
- for(int i = 0; i < w.length; i++){
- for(int j = 0; j < v.length; j++){
- result[i][j] = a*w[i]*v[j];
- }
- }
- return result;
- }
-
- public static double[][] identity(int size){
- double[][] result = new double[size][size];
- for(int i = 0; i < size; i++){
- result[i][i] = 1;
- }
- return result;
- }
-
- /**
- * v -= w
- * @param v
- * @param w
- */
- public static void minusEquals(double[][] w, double[][] v){
- for(int i = 0; i < w.length; i++){
- for(int j = 0; j < w[0].length; j++){
- w[i][j] -= v[i][j];
- }
- }
- }
-
- /**
- * v[i][j] -= a*w[i][j]
- * @param v
- * @param w
- */
- public static void minusEquals(double[][] w, double[][] v, double a){
- for(int i = 0; i < w.length; i++){
- for(int j = 0; j < w[0].length; j++){
- w[i][j] -= a*v[i][j];
- }
- }
- }
-
- /**
- * v += w
- * @param v
- * @param w
- */
- public static void plusEquals(double[][] w, double[][] v){
- for(int i = 0; i < w.length; i++){
- for(int j = 0; j < w[0].length; j++){
- w[i][j] += v[i][j];
- }
- }
- }
-
- /**
- * v[i][j] += a*w[i][j]
- * @param v
- * @param w
- */
- public static void plusEquals(double[][] w, double[][] v, double a){
- for(int i = 0; i < w.length; i++){
- for(int j = 0; j < w[0].length; j++){
- w[i][j] += a*v[i][j];
- }
- }
- }
-
-
- /**
- * results = w*v
- * @param w
- * @param v
- * @return
- */
- public static double[][] matrixMultiplication(double[][] w,double[][] v){
- int w1 = w.length;
- int w2 = w[0].length;
- int v1 = v.length;
- int v2 = v[0].length;
-
- if(w2 != v1){
- System.out.println("Matrix dimensions do not agree...");
- System.exit(-1);
- }
-
- double[][] result = new double[w1][v2];
- for(int w_i1 = 0; w_i1 < w1; w_i1++){
- for(int v_i2 = 0; v_i2 < v2; v_i2++){
- double sum = 0;
- for(int w_i2 = 0; w_i2 < w2; w_i2++){
- sum += w[w_i1 ][w_i2]*v[w_i2][v_i2];
- }
- result[w_i1][v_i2] = sum;
- }
- }
- return result;
- }
-
- /**
- * w = w.*v
- * @param w
- * @param v
- */
- public static void matrixScalarMultiplication(double[][] w,double v){
- int w1 = w.length;
- int w2 = w[0].length;
- for(int w_i1 = 0; w_i1 < w1; w_i1++){
- for(int w_i2 = 0; w_i2 < w2; w_i2++){
- w[w_i1 ][w_i2] *= v;
- }
- }
- }
-
- public static void scalarMultiplication(double[] w,double v){
- int w1 = w.length;
- for(int w_i1 = 0; w_i1 < w1; w_i1++){
- w[w_i1 ] *= v;
- }
-
- }
-
- public static double[] matrixVector(double[][] w,double[] v){
- int w1 = w.length;
- int w2 = w[0].length;
- int v1 = v.length;
-
- if(w2 != v1){
- System.out.println("Matrix dimensions do not agree...");
- System.exit(-1);
- }
-
- double[] result = new double[w1];
- for(int w_i1 = 0; w_i1 < w1; w_i1++){
- double sum = 0;
- for(int w_i2 = 0; w_i2 < w2; w_i2++){
- sum += w[w_i1 ][w_i2]*v[w_i2];
- }
- result[w_i1] = sum;
- }
- return result;
- }
-
- public static boolean allPositive(double[] array){
- for (int i = 0; i < array.length; i++) {
- if(array[i] < 0) return false;
- }
- return true;
- }
-
-
-
-
-
- public static void main(String[] args) {
- double[][] m1 = new double[2][2];
- m1[0][0]=2;
- m1[1][0]=2;
- m1[0][1]=2;
- m1[1][1]=2;
- MatrixOutput.printDoubleArray(m1, "m1");
- double[][] m2 = new double[2][2];
- m2[0][0]=3;
- m2[1][0]=3;
- m2[0][1]=3;
- m2[1][1]=3;
- MatrixOutput.printDoubleArray(m2, "m2");
- double[][] result = matrixMultiplication(m1, m2);
- MatrixOutput.printDoubleArray(result, "result");
- matrixScalarMultiplication(result, 3);
- MatrixOutput.printDoubleArray(result, "result after multiply by 3");
- }
-
- public static boolean almost(double a, double b, double prec){
- return Math.abs(a-b)/Math.abs(a+b) <= prec || (almostZero(a) && almostZero(b));
- }
-
- public static boolean almost(double a, double b){
- return Math.abs(a-b)/Math.abs(a+b) <= 1e-10 || (almostZero(a) && almostZero(b));
- }
-
- public static boolean almostZero(double a) {
- return Math.abs(a) <= 1e-30;
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java b/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java
deleted file mode 100644
index 9fbdf955..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java
+++ /dev/null
@@ -1,28 +0,0 @@
-package optimization.util;
-
-
-public class MatrixOutput {
- public static void printDoubleArray(double[][] array, String arrayName) {
- int size1 = array.length;
- int size2 = array[0].length;
- System.out.println(arrayName);
- for (int i = 0; i < size1; i++) {
- for (int j = 0; j < size2; j++) {
- System.out.print(" " + StaticTools.prettyPrint(array[i][j],
- "00.00E00", 4) + " ");
-
- }
- System.out.println();
- }
- System.out.println();
- }
-
- public static void printDoubleArray(double[] array, String arrayName) {
- System.out.println(arrayName);
- for (int i = 0; i < array.length; i++) {
- System.out.print(" " + StaticTools.prettyPrint(array[i],
- "00.00E00", 4) + " ");
- }
- System.out.println();
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java b/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java
deleted file mode 100644
index bcabee06..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java
+++ /dev/null
@@ -1,180 +0,0 @@
-package optimization.util;
-
-
-import java.io.File;
-import java.io.PrintStream;
-
-public class StaticTools {
-
- static java.text.DecimalFormat fmt = new java.text.DecimalFormat();
-
- public static void createDir(String directory) {
-
- File dir = new File(directory);
- if (!dir.isDirectory()) {
- boolean success = dir.mkdirs();
- if (!success) {
- System.out.println("Unable to create directory " + directory);
- System.exit(0);
- }
- System.out.println("Created directory " + directory);
- } else {
- System.out.println("Reusing directory " + directory);
- }
- }
-
- /*
- * q and p are indexed by source/foreign Sum_S(q) = 1 the same for p KL(q,p) =
- * Eq*q/p
- */
- public static double KLDistance(double[][] p, double[][] q, int sourceSize,
- int foreignSize) {
- double totalKL = 0;
- // common.StaticTools.printMatrix(q, sourceSize, foreignSize, "q",
- // System.out);
- // common.StaticTools.printMatrix(p, sourceSize, foreignSize, "p",
- // System.out);
- for (int i = 0; i < sourceSize; i++) {
- double kl = 0;
- for (int j = 0; j < foreignSize; j++) {
- assert !Double.isNaN(q[i][j]) : "KLDistance q: prob is NaN";
- assert !Double.isNaN(p[i][j]) : "KLDistance p: prob is NaN";
- if (p[i][j] == 0 || q[i][j] == 0) {
- continue;
- } else {
- kl += q[i][j] * Math.log(q[i][j] / p[i][j]);
- }
-
- }
- totalKL += kl;
- }
- assert !Double.isNaN(totalKL) : "KLDistance: prob is NaN";
- if (totalKL < -1.0E-10) {
- System.out.println("KL Smaller than zero " + totalKL);
- System.out.println("Source Size" + sourceSize);
- System.out.println("Foreign Size" + foreignSize);
- StaticTools.printMatrix(q, sourceSize, foreignSize, "q",
- System.out);
- StaticTools.printMatrix(p, sourceSize, foreignSize, "p",
- System.out);
- System.exit(-1);
- }
- return totalKL / sourceSize;
- }
-
- /*
- * indexed the by [fi][si]
- */
- public static double KLDistancePrime(double[][] p, double[][] q,
- int sourceSize, int foreignSize) {
- double totalKL = 0;
- for (int i = 0; i < sourceSize; i++) {
- double kl = 0;
- for (int j = 0; j < foreignSize; j++) {
- assert !Double.isNaN(q[j][i]) : "KLDistance q: prob is NaN";
- assert !Double.isNaN(p[j][i]) : "KLDistance p: prob is NaN";
- if (p[j][i] == 0 || q[j][i] == 0) {
- continue;
- } else {
- kl += q[j][i] * Math.log(q[j][i] / p[j][i]);
- }
-
- }
- totalKL += kl;
- }
- assert !Double.isNaN(totalKL) : "KLDistance: prob is NaN";
- return totalKL / sourceSize;
- }
-
- public static double Entropy(double[][] p, int sourceSize, int foreignSize) {
- double totalE = 0;
- for (int i = 0; i < foreignSize; i++) {
- double e = 0;
- for (int j = 0; j < sourceSize; j++) {
- e += p[i][j] * Math.log(p[i][j]);
- }
- totalE += e;
- }
- return totalE / sourceSize;
- }
-
- public static double[][] copyMatrix(double[][] original, int sourceSize,
- int foreignSize) {
- double[][] result = new double[sourceSize][foreignSize];
- for (int i = 0; i < sourceSize; i++) {
- for (int j = 0; j < foreignSize; j++) {
- result[i][j] = original[i][j];
- }
- }
- return result;
- }
-
- public static void printMatrix(double[][] matrix, int sourceSize,
- int foreignSize, String info, PrintStream out) {
-
- java.text.DecimalFormat fmt = new java.text.DecimalFormat();
- fmt.setMaximumFractionDigits(3);
- fmt.setMaximumIntegerDigits(3);
- fmt.setMinimumFractionDigits(3);
- fmt.setMinimumIntegerDigits(3);
-
- out.println(info);
-
- for (int i = 0; i < foreignSize; i++) {
- for (int j = 0; j < sourceSize; j++) {
- out.print(prettyPrint(matrix[j][i], ".00E00", 6) + " ");
- }
- out.println();
- }
- out.println();
- out.println();
- }
-
- public static void printMatrix(int[][] matrix, int sourceSize,
- int foreignSize, String info, PrintStream out) {
-
- out.println(info);
- for (int i = 0; i < foreignSize; i++) {
- for (int j = 0; j < sourceSize; j++) {
- out.print(matrix[j][i] + " ");
- }
- out.println();
- }
- out.println();
- out.println();
- }
-
- public static String formatTime(long duration) {
- StringBuilder sb = new StringBuilder();
- double d = duration / 1000;
- fmt.applyPattern("00");
- sb.append(fmt.format((int) (d / (60 * 60))) + ":");
- d -= ((int) d / (60 * 60)) * 60 * 60;
- sb.append(fmt.format((int) (d / 60)) + ":");
- d -= ((int) d / 60) * 60;
- fmt.applyPattern("00.0");
- sb.append(fmt.format(d));
- return sb.toString();
- }
-
- public static String prettyPrint(double d, String patt, int len) {
- fmt.applyPattern(patt);
- String s = fmt.format(d);
- while (s.length() < len) {
- s = " " + s;
- }
- return s;
- }
-
-
- public static long getUsedMemory(){
- System.gc();
- return (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/ (1024 * 1024);
- }
-
- public final static boolean compareDoubles(double d1, double d2){
- return Math.abs(d1-d2) <= 1.E-10;
- }
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Agree.java b/gi/posterior-regularisation/prjava/src/phrase/Agree.java
deleted file mode 100644
index 8f7b499e..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Agree.java
+++ /dev/null
@@ -1,204 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.List;
-
-import phrase.Corpus.Edge;
-
-public class Agree {
- PhraseCluster model1;
- C2F model2;
- Corpus c;
- private int K,n_phrases, n_words, n_contexts, n_positions1,n_positions2;
-
- /**@brief sum of loglikelihood of two
- * individual models
- */
- public double llh;
- /**@brief Bhattacharyya distance
- *
- */
- public double bdist;
- /**
- *
- * @param numCluster
- * @param corpus
- */
- public Agree(int numCluster, Corpus corpus){
-
- model1=new PhraseCluster(numCluster, corpus);
- model2=new C2F(numCluster,corpus);
- c=corpus;
- n_words=c.getNumWords();
- n_phrases=c.getNumPhrases();
- n_contexts=c.getNumContexts();
- n_positions1=c.getNumContextPositions();
- n_positions2=2;
- K=numCluster;
-
- }
-
- /**@brief test
- *
- */
- public static void main(String args[]){
- //String in="../pdata/canned.con";
- String in="../pdata/btec.con";
- String out="../pdata/posterior.out";
- int numCluster=25;
- Corpus corpus = null;
- File infile = new File(in);
- try {
- System.out.println("Reading concordance from " + infile);
- corpus = Corpus.readFromFile(FileUtil.reader(infile));
- corpus.printStats(System.out);
- } catch (IOException e) {
- System.err.println("Failed to open input file: " + infile);
- e.printStackTrace();
- System.exit(1);
- }
-
- Agree agree=new Agree(numCluster, corpus);
- int iter=20;
- for(int i=0;i<iter;i++){
- agree.EM();
- System.out.println("Iter"+i+", llh: "+agree.llh+
- ", divergence:"+agree.bdist+
- " sum: "+(agree.llh+agree.bdist));
- }
-
- File outfile = new File (out);
- try {
- PrintStream ps = FileUtil.printstream(outfile);
- agree.displayPosterior(ps);
- // ps.println();
- // c2f.displayModelParam(ps);
- ps.close();
- } catch (IOException e) {
- System.err.println("Failed to open output file: " + outfile);
- e.printStackTrace();
- System.exit(1);
- }
-
- }
-
- public double EM(){
-
- double [][][]exp_emit1=new double [K][n_positions1][n_words];
- double [][]exp_pi1=new double[n_phrases][K];
-
- double [][][]exp_emit2=new double [K][n_positions2][n_words];
- double [][]exp_pi2=new double[n_contexts][K];
-
- llh=0;
- bdist=0;
- //E
- for(int context=0; context< n_contexts; context++){
-
- List<Edge> contexts = c.getEdgesForContext(context);
-
- for (int ctx=0; ctx<contexts.size(); ctx++){
- Edge edge = contexts.get(ctx);
- int phrase=edge.getPhraseId();
- double p[]=posterior(edge);
- double z = arr.F.l1norm(p);
- assert z > 0;
- bdist += edge.getCount() * Math.log(z);
- arr.F.l1normalize(p);
-
- double count = edge.getCount();
- //increment expected count
- TIntArrayList phraseToks = edge.getPhrase();
- TIntArrayList contextToks = edge.getContext();
- for(int tag=0;tag<K;tag++){
-
- for(int position=0;position<n_positions1;position++){
- exp_emit1[tag][position][contextToks.get(position)]+=p[tag]*count;
- }
-
- exp_emit2[tag][0][phraseToks.get(0)]+=p[tag]*count;
- exp_emit2[tag][1][phraseToks.get(phraseToks.size()-1)]+=p[tag]*count;
-
- exp_pi1[phrase][tag]+=p[tag]*count;
- exp_pi2[context][tag]+=p[tag]*count;
- }
- }
- }
-
- //System.out.println("Log likelihood: "+loglikelihood);
-
- //M
- for(double [][]i:exp_emit1){
- for(double []j:i){
- arr.F.l1normalize(j);
- }
- }
-
- for(double []j:exp_pi1){
- arr.F.l1normalize(j);
- }
-
- for(double [][]i:exp_emit2){
- for(double []j:i){
- arr.F.l1normalize(j);
- }
- }
-
- for(double []j:exp_pi2){
- arr.F.l1normalize(j);
- }
-
- model1.emit=exp_emit1;
- model1.pi=exp_pi1;
- model2.emit=exp_emit2;
- model2.pi=exp_pi2;
-
- return llh;
- }
-
- public double[] posterior(Corpus.Edge edge)
- {
- double[] prob1=model1.posterior(edge);
- double[] prob2=model2.posterior(edge);
-
- llh+=edge.getCount()*Math.log(arr.F.l1norm(prob1));
- llh+=edge.getCount()*Math.log(arr.F.l1norm(prob2));
- arr.F.l1normalize(prob1);
- arr.F.l1normalize(prob2);
-
- for(int i=0;i<prob1.length;i++){
- prob1[i]*=prob2[i];
- prob1[i]=Math.sqrt(prob1[i]);
- }
-
- return prob1;
- }
-
- public void displayPosterior(PrintStream ps)
- {
- displayPosterior(ps, c.getEdges());
- }
-
- public void displayPosterior(PrintStream ps, List<Edge> test)
- {
- for (Edge edge : test)
- {
- double probs[] = posterior(edge);
- arr.F.l1normalize(probs);
-
- // emit phrase
- ps.print(edge.getPhraseString());
- ps.print("\t");
- ps.print(edge.getContextString(true));
- int t=arr.F.argmax(probs);
- ps.println(" ||| C=" + t);
- }
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java b/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java
deleted file mode 100644
index 031f887f..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java
+++ /dev/null
@@ -1,197 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.List;
-
-import phrase.Corpus.Edge;
-
-public class Agree2Sides {
- PhraseCluster model1,model2;
- Corpus c1,c2;
- private int K;
-
- /**@brief sum of loglikelihood of two
- * individual models
- */
- public double llh;
- /**@brief Bhattacharyya distance
- *
- */
- public double bdist;
- /**
- *
- * @param numCluster
- * @param corpus
- */
- public Agree2Sides(int numCluster, Corpus corpus1 , Corpus corpus2 ){
-
- model1=new PhraseCluster(numCluster, corpus1);
- model2=new PhraseCluster(numCluster,corpus2);
- c1=corpus1;
- c2=corpus2;
- K=numCluster;
-
- }
-
- /**@brief test
- *
- */
- public static void main(String args[]){
- //String in="../pdata/canned.con";
- // String in="../pdata/btec.con";
- String in1="../pdata/source.txt";
- String in2="../pdata/target.txt";
- String out="../pdata/posterior.out";
- int numCluster=25;
- Corpus corpus1 = null,corpus2=null;
- File infile1 = new File(in1),infile2=new File(in2);
- try {
- System.out.println("Reading concordance from " + infile1);
- corpus1 = Corpus.readFromFile(FileUtil.reader(infile1));
- System.out.println("Reading concordance from " + infile2);
- corpus2 = Corpus.readFromFile(FileUtil.reader(infile2));
- corpus1.printStats(System.out);
- } catch (IOException e) {
- System.err.println("Failed to open input file: " + infile1);
- e.printStackTrace();
- System.exit(1);
- }
-
- Agree2Sides agree=new Agree2Sides(numCluster, corpus1,corpus2);
- int iter=20;
- for(int i=0;i<iter;i++){
- agree.EM();
- System.out.println("Iter"+i+", llh: "+agree.llh+
- ", divergence:"+agree.bdist+
- " sum: "+(agree.llh+agree.bdist));
- }
-
- File outfile = new File (out);
- try {
- PrintStream ps = FileUtil.printstream(outfile);
- agree.displayPosterior(ps);
- // ps.println();
- // c2f.displayModelParam(ps);
- ps.close();
- } catch (IOException e) {
- System.err.println("Failed to open output file: " + outfile);
- e.printStackTrace();
- System.exit(1);
- }
-
- }
-
- public double EM(){
-
- double [][][]exp_emit1=new double [K][c1.getNumContextPositions()][c1.getNumWords()];
- double [][]exp_pi1=new double[c1.getNumPhrases()][K];
-
- double [][][]exp_emit2=new double [K][c2.getNumContextPositions()][c2.getNumWords()];
- double [][]exp_pi2=new double[c2.getNumPhrases()][K];
-
- llh=0;
- bdist=0;
- //E
- for(int i=0;i<c1.getEdges().size();i++){
- Edge edge1=c1.getEdges().get(i);
- Edge edge2=c2.getEdges().get(i);
- double p[]=posterior(i);
- double z = arr.F.l1norm(p);
- assert z > 0;
- bdist += edge1.getCount() * Math.log(z);
- arr.F.l1normalize(p);
- double count = edge1.getCount();
- //increment expected count
- TIntArrayList contextToks1 = edge1.getContext();
- TIntArrayList contextToks2 = edge2.getContext();
- int phrase1=edge1.getPhraseId();
- int phrase2=edge2.getPhraseId();
- for(int tag=0;tag<K;tag++){
- for(int position=0;position<c1.getNumContextPositions();position++){
- exp_emit1[tag][position][contextToks1.get(position)]+=p[tag]*count;
- }
- for(int position=0;position<c2.getNumContextPositions();position++){
- exp_emit2[tag][position][contextToks2.get(position)]+=p[tag]*count;
- }
- exp_pi1[phrase1][tag]+=p[tag]*count;
- exp_pi2[phrase2][tag]+=p[tag]*count;
- }
- }
-
- //System.out.println("Log likelihood: "+loglikelihood);
-
- //M
- for(double [][]i:exp_emit1){
- for(double []j:i){
- arr.F.l1normalize(j);
- }
- }
-
- for(double []j:exp_pi1){
- arr.F.l1normalize(j);
- }
-
- for(double [][]i:exp_emit2){
- for(double []j:i){
- arr.F.l1normalize(j);
- }
- }
-
- for(double []j:exp_pi2){
- arr.F.l1normalize(j);
- }
-
- model1.emit=exp_emit1;
- model1.pi=exp_pi1;
- model2.emit=exp_emit2;
- model2.pi=exp_pi2;
-
- return llh;
- }
-
- public double[] posterior(int edgeIdx)
- {
- return posterior(c1.getEdges().get(edgeIdx), c2.getEdges().get(edgeIdx));
- }
-
- public double[] posterior(Edge e1, Edge e2)
- {
- double[] prob1=model1.posterior(e1);
- double[] prob2=model2.posterior(e2);
-
- llh+=e1.getCount()*Math.log(arr.F.l1norm(prob1));
- llh+=e2.getCount()*Math.log(arr.F.l1norm(prob2));
- arr.F.l1normalize(prob1);
- arr.F.l1normalize(prob2);
-
- for(int i=0;i<prob1.length;i++){
- prob1[i]*=prob2[i];
- prob1[i]=Math.sqrt(prob1[i]);
- }
-
- return prob1;
- }
-
- public void displayPosterior(PrintStream ps)
- {
- for (int i=0;i<c1.getEdges().size();i++)
- {
- Edge edge=c1.getEdges().get(i);
- double probs[] = posterior(i);
- arr.F.l1normalize(probs);
-
- // emit phrase
- ps.print(edge.getPhraseString());
- ps.print("\t");
- ps.print(edge.getContextString(true));
- int t=arr.F.argmax(probs);
- ps.println(" ||| C=" + t);
- }
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/C2F.java b/gi/posterior-regularisation/prjava/src/phrase/C2F.java
deleted file mode 100644
index e8783950..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/C2F.java
+++ /dev/null
@@ -1,216 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.Arrays;
-import java.util.List;
-
-import phrase.Corpus.Edge;
-
-/**
- * @brief context generates phrase
- * @author desaic
- *
- */
-public class C2F {
- public int K;
- private int n_words, n_contexts, n_positions;
- public Corpus c;
-
- /**@brief
- * emit[tag][position][word] = p(word | tag, position in phrase)
- */
- public double emit[][][];
- /**@brief
- * pi[context][tag] = p(tag | context)
- */
- public double pi[][];
-
- public C2F(int numCluster, Corpus corpus){
- K=numCluster;
- c=corpus;
- n_words=c.getNumWords();
- n_contexts=c.getNumContexts();
-
- //number of words in a phrase to be considered
- //currently the first and last word in source and target
- //if the phrase has length 1 in either dimension then
- //we use the same word for two positions
- n_positions=c.phraseEdges(c.getEdges().get(0).getPhrase()).size();
-
- emit=new double [K][n_positions][n_words];
- pi=new double[n_contexts][K];
-
- for(double [][]i:emit){
- for(double []j:i){
- arr.F.randomise(j);
- }
- }
-
- for(double []j:pi){
- arr.F.randomise(j);
- }
- }
-
- /**@brief test
- *
- */
- public static void main(String args[]){
- String in="../pdata/canned.con";
- String out="../pdata/posterior.out";
- int numCluster=25;
- Corpus corpus = null;
- File infile = new File(in);
- try {
- System.out.println("Reading concordance from " + infile);
- corpus = Corpus.readFromFile(FileUtil.reader(infile));
- corpus.printStats(System.out);
- } catch (IOException e) {
- System.err.println("Failed to open input file: " + infile);
- e.printStackTrace();
- System.exit(1);
- }
-
- C2F c2f=new C2F(numCluster,corpus);
- int iter=20;
- double llh=0;
- for(int i=0;i<iter;i++){
- llh=c2f.EM();
- System.out.println("Iter"+i+", llh: "+llh);
- }
-
- File outfile = new File (out);
- try {
- PrintStream ps = FileUtil.printstream(outfile);
- c2f.displayPosterior(ps);
- // ps.println();
- // c2f.displayModelParam(ps);
- ps.close();
- } catch (IOException e) {
- System.err.println("Failed to open output file: " + outfile);
- e.printStackTrace();
- System.exit(1);
- }
-
- }
-
- public double EM(){
- double [][][]exp_emit=new double [K][n_positions][n_words];
- double [][]exp_pi=new double[n_contexts][K];
-
- double loglikelihood=0;
-
- //E
- for(int context=0; context< n_contexts; context++){
-
- List<Edge> contexts = c.getEdgesForContext(context);
-
- for (int ctx=0; ctx<contexts.size(); ctx++){
- Edge edge = contexts.get(ctx);
- double p[]=posterior(edge);
- double z = arr.F.l1norm(p);
- assert z > 0;
- loglikelihood += edge.getCount() * Math.log(z);
- arr.F.l1normalize(p);
-
- double count = edge.getCount();
- //increment expected count
- TIntArrayList phrase= edge.getPhrase();
- for(int tag=0;tag<K;tag++){
-
- exp_emit[tag][0][phrase.get(0)]+=p[tag]*count;
- exp_emit[tag][1][phrase.get(phrase.size()-1)]+=p[tag]*count;
-
- exp_pi[context][tag]+=p[tag]*count;
- }
- }
- }
-
- //System.out.println("Log likelihood: "+loglikelihood);
-
- //M
- for(double [][]i:exp_emit){
- for(double []j:i){
- arr.F.l1normalize(j);
- }
- }
-
- emit=exp_emit;
-
- for(double []j:exp_pi){
- arr.F.l1normalize(j);
- }
-
- pi=exp_pi;
-
- return loglikelihood;
- }
-
- public double[] posterior(Corpus.Edge edge)
- {
- double[] prob=Arrays.copyOf(pi[edge.getContextId()], K);
-
- TIntArrayList phrase = edge.getPhrase();
- TIntArrayList offsets = c.phraseEdges(phrase);
- for(int tag=0;tag<K;tag++)
- {
- for (int i=0; i < offsets.size(); ++i)
- prob[tag]*=emit[tag][i][phrase.get(offsets.get(i))];
- }
-
- return prob;
- }
-
- public void displayPosterior(PrintStream ps)
- {
- for (Edge edge : c.getEdges())
- {
- double probs[] = posterior(edge);
- arr.F.l1normalize(probs);
-
- // emit phrase
- ps.print(edge.getPhraseString());
- ps.print("\t");
- ps.print(edge.getContextString(true));
- int t=arr.F.argmax(probs);
- ps.println(" ||| C=" + t);
- }
- }
-
- public void displayModelParam(PrintStream ps)
- {
- final double EPS = 1e-6;
-
- ps.println("P(tag|context)");
- for (int i = 0; i < n_contexts; ++i)
- {
- ps.print(c.getContext(i));
- for(int j=0;j<pi[i].length;j++){
- if (pi[i][j] > EPS)
- ps.print("\t" + j + ": " + pi[i][j]);
- }
- ps.println();
- }
-
- ps.println("P(word|tag,position)");
- for (int i = 0; i < K; ++i)
- {
- for(int position=0;position<n_positions;position++){
- ps.println("tag " + i + " position " + position);
- for(int word=0;word<emit[i][position].length;word++){
- if (emit[i][position][word] > EPS)
- ps.print(c.getWord(word)+"="+emit[i][position][word]+"\t");
- }
- ps.println();
- }
- ps.println();
- }
-
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Corpus.java b/gi/posterior-regularisation/prjava/src/phrase/Corpus.java
deleted file mode 100644
index 4b1939cd..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Corpus.java
+++ /dev/null
@@ -1,288 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import java.io.*;
-import java.util.*;
-import java.util.regex.Pattern;
-
-
-public class Corpus
-{
- private Lexicon<String> wordLexicon = new Lexicon<String>();
- private Lexicon<TIntArrayList> phraseLexicon = new Lexicon<TIntArrayList>();
- private Lexicon<TIntArrayList> contextLexicon = new Lexicon<TIntArrayList>();
- private List<Edge> edges = new ArrayList<Edge>();
- private List<List<Edge>> phraseToContext = new ArrayList<List<Edge>>();
- private List<List<Edge>> contextToPhrase = new ArrayList<List<Edge>>();
- public int splitSentinel;
- public int phraseSentinel;
- public int rareSentinel;
-
- public Corpus()
- {
- splitSentinel = wordLexicon.insert("<SPLIT>");
- phraseSentinel = wordLexicon.insert("<PHRASE>");
- rareSentinel = wordLexicon.insert("<RARE>");
- }
-
- public class Edge
- {
-
- Edge(int phraseId, int contextId, double count,int tag)
- {
- this.phraseId = phraseId;
- this.contextId = contextId;
- this.count = count;
- fixTag=tag;
- }
-
- Edge(int phraseId, int contextId, double count)
- {
- this.phraseId = phraseId;
- this.contextId = contextId;
- this.count = count;
- fixTag=-1;
- }
- public int getTag(){
- return fixTag;
- }
-
- public int getPhraseId()
- {
- return phraseId;
- }
- public TIntArrayList getPhrase()
- {
- return Corpus.this.getPhrase(phraseId);
- }
- public String getPhraseString()
- {
- return Corpus.this.getPhraseString(phraseId);
- }
- public int getContextId()
- {
- return contextId;
- }
- public TIntArrayList getContext()
- {
- return Corpus.this.getContext(contextId);
- }
- public String getContextString(boolean insertPhraseSentinel)
- {
- return Corpus.this.getContextString(contextId, insertPhraseSentinel);
- }
- public double getCount()
- {
- return count;
- }
- public boolean equals(Object other)
- {
- if (other instanceof Edge)
- {
- Edge oe = (Edge) other;
- return oe.phraseId == phraseId && oe.contextId == contextId;
- }
- else return false;
- }
- public int hashCode()
- { // this is how boost's hash_combine does it
- int seed = phraseId;
- seed ^= contextId + 0x9e3779b9 + (seed << 6) + (seed >> 2);
- return seed;
- }
- public String toString()
- {
- return getPhraseString() + "\t" + getContextString(true);
- }
-
- private int phraseId;
- private int contextId;
- private double count;
- private int fixTag;
- }
-
- List<Edge> getEdges()
- {
- return edges;
- }
-
- int getNumEdges()
- {
- return edges.size();
- }
-
- int getNumPhrases()
- {
- return phraseLexicon.size();
- }
-
- int getNumContextPositions()
- {
- return contextLexicon.lookup(0).size();
- }
-
- List<Edge> getEdgesForPhrase(int phraseId)
- {
- return phraseToContext.get(phraseId);
- }
-
- int getNumContexts()
- {
- return contextLexicon.size();
- }
-
- List<Edge> getEdgesForContext(int contextId)
- {
- return contextToPhrase.get(contextId);
- }
-
- int getNumWords()
- {
- return wordLexicon.size();
- }
-
- String getWord(int wordId)
- {
- return wordLexicon.lookup(wordId);
- }
-
- public TIntArrayList getPhrase(int phraseId)
- {
- return phraseLexicon.lookup(phraseId);
- }
-
- public String getPhraseString(int phraseId)
- {
- StringBuffer b = new StringBuffer();
- for (int tid: getPhrase(phraseId).toNativeArray())
- {
- if (b.length() > 0)
- b.append(" ");
- b.append(wordLexicon.lookup(tid));
- }
- return b.toString();
- }
-
- public TIntArrayList getContext(int contextId)
- {
- return contextLexicon.lookup(contextId);
- }
-
- public String getContextString(int contextId, boolean insertPhraseSentinel)
- {
- StringBuffer b = new StringBuffer();
- TIntArrayList c = getContext(contextId);
- for (int i = 0; i < c.size(); ++i)
- {
- if (i > 0) b.append(" ");
- //if (i == c.size() / 2) b.append("<PHRASE> ");
- b.append(wordLexicon.lookup(c.get(i)));
- }
- return b.toString();
- }
-
- public boolean isSentinel(int wordId)
- {
- return wordId == splitSentinel || wordId == phraseSentinel;
- }
-
- List<Edge> readEdges(Reader in) throws IOException
- {
- // read in line-by-line
- BufferedReader bin = new BufferedReader(in);
- String line;
- Pattern separator = Pattern.compile(" \\|\\|\\| ");
-
- List<Edge> edges = new ArrayList<Edge>();
- while ((line = bin.readLine()) != null)
- {
- // split into phrase and contexts
- StringTokenizer st = new StringTokenizer(line, "\t");
- assert (st.hasMoreTokens());
- String phraseToks = st.nextToken();
- assert (st.hasMoreTokens());
- String rest = st.nextToken();
- assert (!st.hasMoreTokens());
-
- // process phrase
- st = new StringTokenizer(phraseToks, " ");
- TIntArrayList ptoks = new TIntArrayList();
- while (st.hasMoreTokens())
- ptoks.add(wordLexicon.insert(st.nextToken()));
- int phraseId = phraseLexicon.insert(ptoks);
-
- // process contexts
- String[] parts = separator.split(rest);
- assert (parts.length % 2 == 0);
- for (int i = 0; i < parts.length; i += 2)
- {
- // process pairs of strings - context and count
- String ctxString = parts[i];
- String countString = parts[i + 1];
-
- assert (countString.startsWith("C="));
-
- String []countToks=countString.split(" ");
-
- double count = Double.parseDouble(countToks[0].substring(2).trim());
-
- TIntArrayList ctx = new TIntArrayList();
- StringTokenizer ctxStrtok = new StringTokenizer(ctxString, " ");
- while (ctxStrtok.hasMoreTokens())
- {
- String token = ctxStrtok.nextToken();
- ctx.add(wordLexicon.insert(token));
- }
- int contextId = contextLexicon.insert(ctx);
-
-
- if(countToks.length<2){
- edges.add(new Edge(phraseId, contextId, count));
- }
- else{
- int tag=Integer.parseInt(countToks[1].substring(2));
- edges.add(new Edge(phraseId, contextId, count,tag));
- }
- }
- }
- return edges;
- }
-
- static Corpus readFromFile(Reader in) throws IOException
- {
- Corpus c = new Corpus();
- c.edges = c.readEdges(in);
- for (Edge edge: c.edges)
- {
- while (edge.getPhraseId() >= c.phraseToContext.size())
- c.phraseToContext.add(new ArrayList<Edge>());
- while (edge.getContextId() >= c.contextToPhrase.size())
- c.contextToPhrase.add(new ArrayList<Edge>());
-
- // index the edge for fast phrase, context lookup
- c.phraseToContext.get(edge.getPhraseId()).add(edge);
- c.contextToPhrase.get(edge.getContextId()).add(edge);
- }
- return c;
- }
-
- TIntArrayList phraseEdges(TIntArrayList phrase)
- {
- TIntArrayList r = new TIntArrayList(4);
- for (int p = 0; p < phrase.size(); ++p)
- {
- if (p == 0 || phrase.get(p-1) == splitSentinel)
- r.add(p);
- if (p == phrase.size() - 1 || phrase.get(p+1) == splitSentinel)
- r.add(p);
- }
- return r;
- }
-
- public void printStats(PrintStream out)
- {
- out.println("Corpus has " + edges.size() + " edges " + phraseLexicon.size() + " phrases "
- + contextLexicon.size() + " contexts and " + wordLexicon.size() + " word types");
- }
-} \ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java b/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java
deleted file mode 100644
index a386e4a3..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java
+++ /dev/null
@@ -1,34 +0,0 @@
-package phrase;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-public class Lexicon<T>
-{
- public int insert(T word)
- {
- Integer i = wordToIndex.get(word);
- if (i == null)
- {
- i = indexToWord.size();
- wordToIndex.put(word, i);
- indexToWord.add(word);
- }
- return i;
- }
-
- public T lookup(int index)
- {
- return indexToWord.get(index);
- }
-
- public int size()
- {
- return indexToWord.size();
- }
-
- private Map<T, Integer> wordToIndex = new HashMap<T, Integer>();
- private List<T> indexToWord = new ArrayList<T>();
-} \ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java
deleted file mode 100644
index c032bb2b..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java
+++ /dev/null
@@ -1,540 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-import org.apache.commons.math.special.Gamma;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.regex.Pattern;
-
-import phrase.Corpus.Edge;
-
-
-public class PhraseCluster {
-
- public int K;
- private int n_phrases, n_words, n_contexts, n_positions;
- public Corpus c;
- public ExecutorService pool;
-
- double[] lambdaPTCT;
- double[][] lambdaPT;
- boolean cacheLambda = true;
-
- // emit[tag][position][word] = p(word | tag, position in context)
- double emit[][][];
- // pi[phrase][tag] = p(tag | phrase)
- double pi[][];
-
- public PhraseCluster(int numCluster, Corpus corpus)
- {
- K=numCluster;
- c=corpus;
- n_words=c.getNumWords();
- n_phrases=c.getNumPhrases();
- n_contexts=c.getNumContexts();
- n_positions=c.getNumContextPositions();
-
- emit=new double [K][n_positions][n_words];
- pi=new double[n_phrases][K];
-
- for(double [][]i:emit)
- for(double []j:i)
- arr.F.randomise(j, true);
-
- for(double []j:pi)
- arr.F.randomise(j, true);
- }
-
- void useThreadPool(ExecutorService pool)
- {
- this.pool = pool;
- }
-
- public double EM(int phraseSizeLimit)
- {
- double [][][]exp_emit=new double [K][n_positions][n_words];
- double []exp_pi=new double[K];
-
- for(double [][]i:exp_emit)
- for(double []j:i)
- Arrays.fill(j, 1e-10);
-
- double loglikelihood=0;
-
- //E
- for(int phrase=0; phrase < n_phrases; phrase++)
- {
- if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit)
- continue;
-
- Arrays.fill(exp_pi, 1e-10);
-
- List<Edge> contexts = c.getEdgesForPhrase(phrase);
-
- for (int ctx=0; ctx<contexts.size(); ctx++)
- {
- Edge edge = contexts.get(ctx);
-
- double p[]=posterior(edge);
- double z = arr.F.l1norm(p);
- assert z > 0;
- loglikelihood += edge.getCount() * Math.log(z);
- arr.F.l1normalize(p);
-
- double count = edge.getCount();
- //increment expected count
- TIntArrayList context = edge.getContext();
- for(int tag=0;tag<K;tag++)
- {
- for(int pos=0;pos<n_positions;pos++){
- exp_emit[tag][pos][context.get(pos)]+=p[tag]*count;
- }
- exp_pi[tag]+=p[tag]*count;
- }
- }
- arr.F.l1normalize(exp_pi);
- System.arraycopy(exp_pi, 0, pi[phrase], 0, K);
- }
-
- //M
- for(double [][]i:exp_emit)
- for(double []j:i)
- arr.F.l1normalize(j);
-
- emit=exp_emit;
-
- return loglikelihood;
- }
-
- public double PREM(double scalePT, double scaleCT, int phraseSizeLimit)
- {
- if (scaleCT == 0)
- {
- if (pool != null)
- return PREM_phrase_constraints_parallel(scalePT, phraseSizeLimit);
- else
- return PREM_phrase_constraints(scalePT, phraseSizeLimit);
- }
- else // FIXME: ignores phraseSizeLimit
- return this.PREM_phrase_context_constraints(scalePT, scaleCT);
- }
-
-
- public double PREM_phrase_constraints(double scalePT, int phraseSizeLimit)
- {
- double [][][]exp_emit=new double[K][n_positions][n_words];
- double []exp_pi=new double[K];
-
- for(double [][]i:exp_emit)
- for(double []j:i)
- Arrays.fill(j, 1e-10);
-
- if (lambdaPT == null && cacheLambda)
- lambdaPT = new double[n_phrases][];
-
- double loglikelihood=0, kl=0, l1lmax=0, primal=0;
- int failures=0, iterations=0;
- long start = System.currentTimeMillis();
- //E
- for(int phrase=0; phrase<n_phrases; phrase++)
- {
- if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit)
- {
- //System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K);
- continue;
- }
-
- Arrays.fill(exp_pi, 1e-10);
-
- // FIXME: add rare edge check to phrase objective & posterior processing
- PhraseObjective po = new PhraseObjective(this, phrase, scalePT, (cacheLambda) ? lambdaPT[phrase] : null);
- boolean ok = po.optimizeWithProjectedGradientDescent();
- if (!ok) ++failures;
- if (cacheLambda) lambdaPT[phrase] = po.getParameters();
- iterations += po.getNumberUpdateCalls();
- double [][] q=po.posterior();
- loglikelihood += po.loglikelihood();
- kl += po.KL_divergence();
- l1lmax += po.l1lmax();
- primal += po.primal(scalePT);
- List<Edge> edges = c.getEdgesForPhrase(phrase);
-
- for(int edge=0;edge<q.length;edge++){
- Edge e = edges.get(edge);
- TIntArrayList context = e.getContext();
- double contextCnt = e.getCount();
- //increment expected count
- for(int tag=0;tag<K;tag++){
- for(int pos=0;pos<n_positions;pos++){
- exp_emit[tag][pos][context.get(pos)]+=q[edge][tag]*contextCnt;
- }
-
- exp_pi[tag]+=q[edge][tag]*contextCnt;
-
- }
- }
- arr.F.l1normalize(exp_pi);
- System.arraycopy(exp_pi, 0, pi[phrase], 0, K);
- }
-
- long end = System.currentTimeMillis();
- if (failures > 0)
- System.out.println("WARNING: failed to converge in " + failures + "/" + n_phrases + " cases");
- System.out.println("\tmean iters: " + iterations/(double)n_phrases + " elapsed time " + (end - start) / 1000.0);
- System.out.println("\tllh: " + loglikelihood);
- System.out.println("\tKL: " + kl);
- System.out.println("\tphrase l1lmax: " + l1lmax);
-
- //M
- for(double [][]i:exp_emit)
- for(double []j:i)
- arr.F.l1normalize(j);
- emit=exp_emit;
-
- return primal;
- }
-
- public double PREM_phrase_constraints_parallel(final double scalePT, int phraseSizeLimit)
- {
- assert(pool != null);
-
- final LinkedBlockingQueue<PhraseObjective> expectations
- = new LinkedBlockingQueue<PhraseObjective>();
-
- double [][][]exp_emit=new double [K][n_positions][n_words];
- double [][]exp_pi=new double[n_phrases][K];
-
- for(double [][]i:exp_emit)
- for(double []j:i)
- Arrays.fill(j, 1e-10);
- for(double []j:exp_pi)
- Arrays.fill(j, 1e-10);
-
- double loglikelihood=0, kl=0, l1lmax=0, primal=0;
- final AtomicInteger failures = new AtomicInteger(0);
- final AtomicLong elapsed = new AtomicLong(0l);
- int iterations=0;
- long start = System.currentTimeMillis();
- List<Future<PhraseObjective>> results = new ArrayList<Future<PhraseObjective>>();
-
- if (lambdaPT == null && cacheLambda)
- lambdaPT = new double[n_phrases][];
-
- //E
- for(int phrase=0;phrase<n_phrases;phrase++) {
- if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit) {
- System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K);
- continue;
- }
-
- final int p=phrase;
- results.add(pool.submit(new Callable<PhraseObjective>() {
- public PhraseObjective call() {
- //System.out.println("" + Thread.currentThread().getId() + " optimising lambda for " + p);
- long start = System.currentTimeMillis();
- PhraseObjective po = new PhraseObjective(PhraseCluster.this, p, scalePT, (cacheLambda) ? lambdaPT[p] : null);
- boolean ok = po.optimizeWithProjectedGradientDescent();
- if (!ok) failures.incrementAndGet();
- long end = System.currentTimeMillis();
- elapsed.addAndGet(end - start);
- //System.out.println("" + Thread.currentThread().getId() + " done optimising lambda for " + p);
- return po;
- }
- }));
- }
-
- // aggregate the expectations as they become available
- for (Future<PhraseObjective> fpo : results)
- {
- try {
- //System.out.println("" + Thread.currentThread().getId() + " reading queue #" + count);
-
- // wait (blocking) until something is ready
- PhraseObjective po = fpo.get();
- // process
- int phrase = po.phrase;
- if (cacheLambda) lambdaPT[phrase] = po.getParameters();
- //System.out.println("" + Thread.currentThread().getId() + " taken phrase " + phrase);
- double [][] q=po.posterior();
- loglikelihood += po.loglikelihood();
- kl += po.KL_divergence();
- l1lmax += po.l1lmax();
- primal += po.primal(scalePT);
- iterations += po.getNumberUpdateCalls();
-
- List<Edge> edges = c.getEdgesForPhrase(phrase);
- for(int edge=0;edge<q.length;edge++){
- Edge e = edges.get(edge);
- TIntArrayList context = e.getContext();
- double contextCnt = e.getCount();
- //increment expected count
- for(int tag=0;tag<K;tag++){
- for(int pos=0;pos<n_positions;pos++){
- exp_emit[tag][pos][context.get(pos)]+=q[edge][tag]*contextCnt;
- }
- exp_pi[phrase][tag]+=q[edge][tag]*contextCnt;
- }
- }
- } catch (InterruptedException e) {
- System.err.println("M-step thread interrupted. Probably fatal!");
- throw new RuntimeException(e);
- } catch (ExecutionException e) {
- System.err.println("M-step thread execution died. Probably fatal!");
- throw new RuntimeException(e);
- }
- }
-
- long end = System.currentTimeMillis();
-
- if (failures.get() > 0)
- System.out.println("WARNING: failed to converge in " + failures.get() + "/" + n_phrases + " cases");
- System.out.println("\tmean iters: " + iterations/(double)n_phrases + " walltime " + (end-start)/1000.0 + " threads " + elapsed.get() / 1000.0);
- System.out.println("\tllh: " + loglikelihood);
- System.out.println("\tKL: " + kl);
- System.out.println("\tphrase l1lmax: " + l1lmax);
-
- //M
- for(double [][]i:exp_emit)
- for(double []j:i)
- arr.F.l1normalize(j);
- emit=exp_emit;
-
- for(double []j:exp_pi)
- arr.F.l1normalize(j);
- pi=exp_pi;
-
- return primal;
- }
-
- public double PREM_phrase_context_constraints(double scalePT, double scaleCT)
- {
- double[][][] exp_emit = new double [K][n_positions][n_words];
- double[][] exp_pi = new double[n_phrases][K];
-
- //E step
- PhraseContextObjective pco = new PhraseContextObjective(this, lambdaPTCT, pool, scalePT, scaleCT);
- boolean ok = pco.optimizeWithProjectedGradientDescent();
- if (cacheLambda) lambdaPTCT = pco.getParameters();
-
- //now extract expectations
- List<Corpus.Edge> edges = c.getEdges();
- for(int e = 0; e < edges.size(); ++e)
- {
- double [] q = pco.posterior(e);
- Corpus.Edge edge = edges.get(e);
-
- TIntArrayList context = edge.getContext();
- double contextCnt = edge.getCount();
- //increment expected count
- for(int tag=0;tag<K;tag++)
- {
- for(int pos=0;pos<n_positions;pos++)
- exp_emit[tag][pos][context.get(pos)]+=q[tag]*contextCnt;
- exp_pi[edge.getPhraseId()][tag]+=q[tag]*contextCnt;
- }
- }
-
- System.out.println("\tllh: " + pco.loglikelihood());
- System.out.println("\tKL: " + pco.KL_divergence());
- System.out.println("\tphrase l1lmax: " + pco.phrase_l1lmax());
- System.out.println("\tcontext l1lmax: " + pco.context_l1lmax());
-
- //M step
- for(double [][]i:exp_emit)
- for(double []j:i)
- arr.F.l1normalize(j);
- emit=exp_emit;
-
- for(double []j:exp_pi)
- arr.F.l1normalize(j);
- pi=exp_pi;
-
- return pco.primal();
- }
-
- /**
- * @param phrase index of phrase
- * @param ctx array of context
- * @return unnormalized posterior
- */
- public double[] posterior(Corpus.Edge edge)
- {
- double[] prob;
-
- if(edge.getTag()>=0){
- prob=new double[K];
- prob[edge.getTag()]=1;
- return prob;
- }
-
- if (edge.getPhraseId() < n_phrases)
- prob = Arrays.copyOf(pi[edge.getPhraseId()], K);
- else
- {
- prob = new double[K];
- Arrays.fill(prob, 1.0);
- }
-
- TIntArrayList ctx = edge.getContext();
- for(int tag=0;tag<K;tag++)
- {
- for(int c=0;c<n_positions;c++)
- {
- int word = ctx.get(c);
- if (!this.c.isSentinel(word) && word < n_words)
- prob[tag]*=emit[tag][c][word];
- }
- }
-
- return prob;
- }
-
- public void displayPosterior(PrintStream ps, List<Edge> testing)
- {
- for (Edge edge : testing)
- {
- double probs[] = posterior(edge);
- arr.F.l1normalize(probs);
-
- // emit phrase
- ps.print(edge.getPhraseString());
- ps.print("\t");
- ps.print(edge.getContextString(true));
- int t=arr.F.argmax(probs);
- ps.println(" ||| C=" + t + " T=" + edge.getCount() + " P=" + probs[t]);
- //ps.println("# probs " + Arrays.toString(probs));
- }
- }
-
- public void displayModelParam(PrintStream ps)
- {
- final double EPS = 1e-6;
- ps.println("phrases " + n_phrases + " tags " + K + " positions " + n_positions);
-
- for (int i = 0; i < n_phrases; ++i)
- for(int j=0;j<pi[i].length;j++)
- if (pi[i][j] > EPS)
- ps.println(i + " " + j + " " + pi[i][j]);
-
- ps.println();
- for (int i = 0; i < K; ++i)
- {
- for(int position=0;position<n_positions;position++)
- {
- for(int word=0;word<emit[i][position].length;word++)
- {
- if (emit[i][position][word] > EPS)
- ps.println(i + " " + position + " " + word + " " + emit[i][position][word]);
- }
- }
- }
- }
-
- double phrase_l1lmax()
- {
- double sum=0;
- for(int phrase=0; phrase<n_phrases; phrase++)
- {
- double [] maxes = new double[K];
- for (Edge edge : c.getEdgesForPhrase(phrase))
- {
- double p[] = posterior(edge);
- arr.F.l1normalize(p);
- for(int tag=0;tag<K;tag++)
- maxes[tag] = Math.max(maxes[tag], p[tag]);
- }
- for(int tag=0;tag<K;tag++)
- sum += maxes[tag];
- }
- return sum;
- }
-
- double context_l1lmax()
- {
- double sum=0;
- for(int context=0; context<n_contexts; context++)
- {
- double [] maxes = new double[K];
- for (Edge edge : c.getEdgesForContext(context))
- {
- double p[] = posterior(edge);
- arr.F.l1normalize(p);
- for(int tag=0;tag<K;tag++)
- maxes[tag] = Math.max(maxes[tag], p[tag]);
- }
- for(int tag=0;tag<K;tag++)
- sum += maxes[tag];
- }
- return sum;
- }
-
- public void loadParameters(BufferedReader input) throws IOException
- {
- final double EPS = 1e-50;
-
- // overwrite pi, emit with ~zeros
- for(double [][]i:emit)
- for(double []j:i)
- Arrays.fill(j, EPS);
-
- for(double []j:pi)
- Arrays.fill(j, EPS);
-
- String line = input.readLine();
- assert line != null;
-
- Pattern space = Pattern.compile(" +");
- String[] parts = space.split(line);
- assert parts.length == 6;
-
- assert parts[0].equals("phrases");
- int phrases = Integer.parseInt(parts[1]);
- int tags = Integer.parseInt(parts[3]);
- int positions = Integer.parseInt(parts[5]);
-
- assert phrases == n_phrases;
- assert tags == K;
- assert positions == n_positions;
-
- // read in pi
- while ((line = input.readLine()) != null)
- {
- line = line.trim();
- if (line.isEmpty()) break;
-
- String[] tokens = space.split(line);
- assert tokens.length == 3;
- int p = Integer.parseInt(tokens[0]);
- int t = Integer.parseInt(tokens[1]);
- double v = Double.parseDouble(tokens[2]);
-
- pi[p][t] = v;
- }
-
- // read in emissions
- while ((line = input.readLine()) != null)
- {
- String[] tokens = space.split(line);
- assert tokens.length == 4;
- int t = Integer.parseInt(tokens[0]);
- int p = Integer.parseInt(tokens[1]);
- int w = Integer.parseInt(tokens[2]);
- double v = Double.parseDouble(tokens[3]);
-
- emit[t][p][w] = v;
- }
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java
deleted file mode 100644
index 646ff392..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java
+++ /dev/null
@@ -1,436 +0,0 @@
-package phrase;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Future;
-
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-import optimization.util.MathUtils;
-import phrase.Corpus.Edge;
-
-public class PhraseContextObjective extends ProjectedObjective
-{
- private static final double GRAD_DIFF = 0.00002;
- private static double INIT_STEP_SIZE = 300;
- private static double VAL_DIFF = 1e-8;
- private static int ITERATIONS = 20;
- boolean debug = false;
-
- private PhraseCluster c;
-
- // un-regularized unnormalized posterior, p[edge][tag]
- // P(tag|edge) \propto P(tag|phrase)P(context|tag)
- private double p[][];
-
- // regularized unnormalized posterior
- // q[edge][tag] propto p[edge][tag]*exp(-lambda)
- private double q[][];
- private List<Corpus.Edge> data;
-
- // log likelihood under q
- private double loglikelihood;
- private SimplexProjection projectionPhrase;
- private SimplexProjection projectionContext;
-
- double[] newPoint;
- private int n_param;
-
- // likelihood under p
- public double llh;
-
- private static Map<Corpus.Edge, Integer> edgeIndex;
-
- private long projectionTime;
- private long objectiveTime;
- private long actualProjectionTime;
- private ExecutorService pool;
-
- double scalePT;
- double scaleCT;
-
- public PhraseContextObjective(PhraseCluster cluster, double[] startingParameters, ExecutorService pool,
- double scalePT, double scaleCT)
- {
- c=cluster;
- data=c.c.getEdges();
- n_param=data.size()*c.K*2;
- this.pool=pool;
- this.scalePT = scalePT;
- this.scaleCT = scaleCT;
-
- parameters = startingParameters;
- if (parameters == null)
- parameters = new double[n_param];
-
- System.out.println("Num parameters " + n_param);
- newPoint = new double[n_param];
- gradient = new double[n_param];
- initP();
- projectionPhrase = new SimplexProjection(scalePT);
- projectionContext = new SimplexProjection(scaleCT);
- q=new double [data.size()][c.K];
-
- if (edgeIndex == null) {
- edgeIndex = new HashMap<Edge, Integer>();
- for (int e=0; e<data.size(); e++)
- {
- edgeIndex.put(data.get(e), e);
- //if (debug) System.out.println("Edge " + data.get(e) + " index " + e);
- }
- }
-
- setParameters(parameters);
- }
-
- private void initP(){
- p=new double[data.size()][];
- for(int edge=0;edge<data.size();edge++)
- {
- p[edge]=c.posterior(data.get(edge));
- llh += data.get(edge).getCount() * Math.log(arr.F.l1norm(p[edge]));
- arr.F.l1normalize(p[edge]);
- }
- }
-
- @Override
- public void setParameters(double[] params) {
- //System.out.println("setParameters " + Arrays.toString(parameters));
- // TODO: test if params have changed and skip update otherwise
- super.setParameters(params);
- updateFunction();
- }
-
- private void updateFunction()
- {
- updateCalls++;
- loglikelihood=0;
-
- System.out.print(".");
- System.out.flush();
-
- long begin = System.currentTimeMillis();
- for (int e=0; e<data.size(); e++)
- {
- Edge edge = data.get(e);
- for(int tag=0; tag<c.K; tag++)
- {
- int ip = index(e, tag, true);
- int ic = index(e, tag, false);
- q[e][tag] = p[e][tag]*
- Math.exp((-parameters[ip]-parameters[ic]) / edge.getCount());
- //if (debug)
- //System.out.println("\tposterior " + edge + " with tag " + tag + " p " + p[e][tag] + " params " + parameters[ip] + " and " + parameters[ic] + " q " + q[e][tag]);
- }
- }
-
- for(int edge=0;edge<data.size();edge++) {
- loglikelihood+=data.get(edge).getCount() * Math.log(arr.F.l1norm(q[edge]));
- arr.F.l1normalize(q[edge]);
- }
-
- for (int e=0; e<data.size(); e++)
- {
- for(int tag=0; tag<c.K; tag++)
- {
- int ip = index(e, tag, true);
- int ic = index(e, tag, false);
- gradient[ip]=-q[e][tag];
- gradient[ic]=-q[e][tag];
- }
- }
- //if (debug) {
- //System.out.println("objective " + loglikelihood + " ||gradient||_2: " + arr.F.l2norm(gradient));
- //System.out.println("gradient " + Arrays.toString(gradient));
- //}
- objectiveTime += System.currentTimeMillis() - begin;
- }
-
- @Override
- public double[] projectPoint(double[] point)
- {
- long begin = System.currentTimeMillis();
- List<Future<?>> tasks = new ArrayList<Future<?>>();
-
- System.out.print(",");
- System.out.flush();
-
- Arrays.fill(newPoint, 0, newPoint.length, 0);
-
- // first project using the phrase-tag constraints,
- // for all p,t: sum_c lambda_ptc < scaleP
- if (pool == null)
- {
- for (int p = 0; p < c.c.getNumPhrases(); ++p)
- {
- List<Edge> edges = c.c.getEdgesForPhrase(p);
- double[] toProject = new double[edges.size()];
- for(int tag=0;tag<c.K;tag++)
- {
- // FIXME: slow hash lookup for e (twice)
- for(int e=0; e<edges.size(); e++)
- toProject[e] = point[index(edges.get(e), tag, true)];
- long lbegin = System.currentTimeMillis();
- projectionPhrase.project(toProject);
- actualProjectionTime += System.currentTimeMillis() - lbegin;
- for(int e=0; e<edges.size(); e++)
- newPoint[index(edges.get(e), tag, true)] = toProject[e];
- }
- }
- }
- else // do above in parallel using thread pool
- {
- for (int p = 0; p < c.c.getNumPhrases(); ++p)
- {
- final int phrase = p;
- final double[] inPoint = point;
- Runnable task = new Runnable()
- {
- public void run()
- {
- List<Edge> edges = c.c.getEdgesForPhrase(phrase);
- double toProject[] = new double[edges.size()];
- for(int tag=0;tag<c.K;tag++)
- {
- // FIXME: slow hash lookup for e
- for(int e=0; e<edges.size(); e++)
- toProject[e] = inPoint[index(edges.get(e), tag, true)];
- projectionPhrase.project(toProject);
- for(int e=0; e<edges.size(); e++)
- newPoint[index(edges.get(e), tag, true)] = toProject[e];
- }
- }
- };
- tasks.add(pool.submit(task));
- }
- }
- //System.out.println("after PT " + Arrays.toString(newPoint));
-
- // now project using the context-tag constraints,
- // for all c,t: sum_p omega_pct < scaleC
- if (pool == null)
- {
- for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
- {
- List<Edge> edges = c.c.getEdgesForContext(ctx);
- double toProject[] = new double[edges.size()];
- for(int tag=0;tag<c.K;tag++)
- {
- // FIXME: slow hash lookup for e
- for(int e=0; e<edges.size(); e++)
- toProject[e] = point[index(edges.get(e), tag, false)];
- long lbegin = System.currentTimeMillis();
- projectionContext.project(toProject);
- actualProjectionTime += System.currentTimeMillis() - lbegin;
- for(int e=0; e<edges.size(); e++)
- newPoint[index(edges.get(e), tag, false)] = toProject[e];
- }
- }
- }
- else
- {
- // do above in parallel using thread pool
- for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
- {
- final int context = ctx;
- final double[] inPoint = point;
- Runnable task = new Runnable()
- {
- public void run()
- {
- List<Edge> edges = c.c.getEdgesForContext(context);
- double toProject[] = new double[edges.size()];
- for(int tag=0;tag<c.K;tag++)
- {
- // FIXME: slow hash lookup for e
- for(int e=0; e<edges.size(); e++)
- toProject[e] = inPoint[index(edges.get(e), tag, false)];
- projectionContext.project(toProject);
- for(int e=0; e<edges.size(); e++)
- newPoint[index(edges.get(e), tag, false)] = toProject[e];
- }
- }
- };
- tasks.add(pool.submit(task));
- }
- }
-
- if (pool != null)
- {
- // wait for all the jobs to complete
- Exception failure = null;
- for (Future<?> task: tasks)
- {
- try {
- task.get();
- } catch (InterruptedException e) {
- System.err.println("ERROR: Projection thread interrupted");
- e.printStackTrace();
- failure = e;
- } catch (ExecutionException e) {
- System.err.println("ERROR: Projection thread died");
- e.printStackTrace();
- failure = e;
- }
- }
- // rethrow the exception
- if (failure != null)
- {
- pool.shutdownNow();
- throw new RuntimeException(failure);
- }
- }
-
- double[] tmp = newPoint;
- newPoint = point;
- projectionTime += System.currentTimeMillis() - begin;
-
- //if (debug)
- //System.out.println("\t\treturning " + Arrays.toString(tmp));
- return tmp;
- }
-
- private int index(Edge edge, int tag, boolean phrase)
- {
- // NB if indexing changes must also change code in updateFunction and constructor
- if (phrase)
- return tag * edgeIndex.size() + edgeIndex.get(edge);
- else
- return (c.K + tag) * edgeIndex.size() + edgeIndex.get(edge);
- }
-
- private int index(int e, int tag, boolean phrase)
- {
- // NB if indexing changes must also change code in updateFunction and constructor
- if (phrase)
- return tag * edgeIndex.size() + e;
- else
- return (c.K + tag) * edgeIndex.size() + e;
- }
-
- @Override
- public double[] getGradient() {
- gradientCalls++;
- return gradient;
- }
-
- @Override
- public double getValue() {
- functionCalls++;
- return loglikelihood;
- }
-
- @Override
- public String toString() {
- return "No need for pointless toString";
- }
-
- public double []posterior(int edgeIndex){
- return q[edgeIndex];
- }
-
- public boolean optimizeWithProjectedGradientDescent()
- {
- projectionTime = 0;
- actualProjectionTime = 0;
- objectiveTime = 0;
- long start = System.currentTimeMillis();
-
- LineSearchMethod ls =
- new ArmijoLineSearchMinimizationAlongProjectionArc
- (new InterpolationPickFirstStep(INIT_STEP_SIZE));
- //LineSearchMethod ls = new WolfRuleLineSearch(
- // (new InterpolationPickFirstStep(INIT_STEP_SIZE)), c1, c2);
- OptimizerStats stats = new OptimizerStats();
-
-
- ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
- StopingCriteria stopGrad = new ProjectedGradientL2Norm(GRAD_DIFF);
- StopingCriteria stopValue = new ValueDifference(VAL_DIFF*(-llh));
- CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
- compositeStop.add(stopGrad);
- compositeStop.add(stopValue);
- optimizer.setMaxIterations(ITERATIONS);
- updateFunction();
- boolean success = optimizer.optimize(this,stats,compositeStop);
-
- System.out.println();
- System.out.println(stats.prettyPrint(1));
-
- if (success)
- System.out.print("\toptimization took " + optimizer.getCurrentIteration() + " iterations");
- else
- System.out.print("\toptimization failed to converge");
- long total = System.currentTimeMillis() - start;
- System.out.println(" and " + total + " ms: projection " + projectionTime +
- " actual " + actualProjectionTime + " objective " + objectiveTime);
-
- return success;
- }
-
- double loglikelihood()
- {
- return llh;
- }
-
- double KL_divergence()
- {
- return -loglikelihood + MathUtils.dotProduct(parameters, gradient);
- }
-
- double phrase_l1lmax()
- {
- // \sum_{tag,phrase} max_{context} P(tag|context,phrase)
- double sum=0;
- for (int p = 0; p < c.c.getNumPhrases(); ++p)
- {
- List<Edge> edges = c.c.getEdgesForPhrase(p);
- for(int tag=0;tag<c.K;tag++)
- {
- double max=0;
- for (Edge edge: edges)
- max = Math.max(max, q[edgeIndex.get(edge)][tag]);
- sum+=max;
- }
- }
- return sum;
- }
-
- double context_l1lmax()
- {
- // \sum_{tag,context} max_{phrase} P(tag|context,phrase)
- double sum=0;
- for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
- {
- List<Edge> edges = c.c.getEdgesForContext(ctx);
- for(int tag=0; tag<c.K; tag++)
- {
- double max=0;
- for (Edge edge: edges)
- max = Math.max(max, q[edgeIndex.get(edge)][tag]);
- sum+=max;
- }
- }
- return sum;
- }
-
- // L - KL(q||p) - scalePT * l1lmax_phrase - scaleCT * l1lmax_context
- public double primal()
- {
- return loglikelihood() - KL_divergence() - scalePT * phrase_l1lmax() - scaleCT * context_l1lmax();
- }
-} \ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java
deleted file mode 100644
index 0cf31c1c..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java
+++ /dev/null
@@ -1,193 +0,0 @@
-package phrase;
-
-import io.FileUtil;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Scanner;
-
-public class PhraseCorpus
-{
- public HashMap<String,Integer>wordLex;
- public HashMap<String,Integer>phraseLex;
-
- public String wordList[];
- public String phraseList[];
-
- //data[phrase][num context][position]
- public int data[][][];
- public int numContexts;
-
- public PhraseCorpus(String filename) throws FileNotFoundException, IOException
- {
- BufferedReader r = FileUtil.reader(new File(filename));
-
- phraseLex=new HashMap<String,Integer>();
- wordLex=new HashMap<String,Integer>();
-
- ArrayList<int[][]>dataList=new ArrayList<int[][]>();
- String line=null;
- numContexts = 0;
-
- while((line=readLine(r))!=null){
-
- String toks[]=line.split("\t");
- String phrase=toks[0];
- addLex(phrase,phraseLex);
-
- toks=toks[1].split(" \\|\\|\\| ");
-
- ArrayList <int[]>ctxList=new ArrayList<int[]>();
-
- for(int i=0;i<toks.length;i+=2){
- String ctx=toks[i];
- String words[]=ctx.split(" ");
- if (numContexts == 0)
- numContexts = words.length - 1;
- else
- assert numContexts == words.length - 1;
-
- int []context=new int [numContexts+1];
- int idx=0;
- for(String word:words){
- if(word.equals("<PHRASE>")){
- continue;
- }
- addLex(word,wordLex);
- context[idx]=wordLex.get(word);
- idx++;
- }
-
- String count=toks[i+1];
- context[idx]=Integer.parseInt(count.trim().substring(2));
-
- ctxList.add(context);
- }
-
- dataList.add(ctxList.toArray(new int [0][]));
-
- }
- try{
- r.close();
- }catch(IOException ioe){
- ioe.printStackTrace();
- }
- data=dataList.toArray(new int[0][][]);
- }
-
- private void addLex(String key, HashMap<String,Integer>lex){
- Integer i=lex.get(key);
- if(i==null){
- lex.put(key, lex.size());
- }
- }
-
- //for debugging
- public void saveLex(String lexFilename) throws FileNotFoundException, IOException
- {
- PrintStream ps = FileUtil.printstream(new File(lexFilename));
- ps.println("Phrase Lexicon");
- ps.println(phraseLex.size());
- printDict(phraseLex,ps);
-
- ps.println("Word Lexicon");
- ps.println(wordLex.size());
- printDict(wordLex,ps);
- ps.close();
- }
-
- private static void printDict(HashMap<String,Integer>lex,PrintStream ps){
- String []dict=buildList(lex);
- for(int i=0;i<dict.length;i++){
- ps.println(dict[i]);
- }
- }
-
- public void loadLex(String lexFilename){
- Scanner sc=io.FileUtil.openInFile(lexFilename);
-
- sc.nextLine();
- int size=sc.nextInt();
- sc.nextLine();
- String[]dict=new String[size];
- for(int i=0;i<size;i++){
- dict[i]=sc.nextLine();
- }
- phraseLex=buildMap(dict);
-
- sc.nextLine();
- size=sc.nextInt();
- sc.nextLine();
- dict=new String[size];
- for(int i=0;i<size;i++){
- dict[i]=sc.nextLine();
- }
- wordLex=buildMap(dict);
- sc.close();
- }
-
- private HashMap<String, Integer> buildMap(String[]dict){
- HashMap<String,Integer> map=new HashMap<String,Integer>();
- for(int i=0;i<dict.length;i++){
- map.put(dict[i], i);
- }
- return map;
- }
-
- public void buildList(){
- if(wordList==null){
- wordList=buildList(wordLex);
- phraseList=buildList(phraseLex);
- }
- }
-
- private static String[]buildList(HashMap<String,Integer>lex){
- String dict[]=new String [lex.size()];
- for(String key:lex.keySet()){
- dict[lex.get(key)]=key;
- }
- return dict;
- }
-
- public String getContextString(int context[], boolean addPhraseMarker)
- {
- StringBuffer b = new StringBuffer();
- for (int i=0;i<context.length-1;i++)
- {
- if (b.length() > 0)
- b.append(" ");
-
- if (i == context.length/2)
- b.append("<PHRASE> ");
-
- b.append(wordList[context[i]]);
- }
- return b.toString();
- }
-
- public static String readLine(BufferedReader r){
- try{
- return r.readLine();
- }
- catch(IOException ioe){
- ioe.printStackTrace();
- }
- return null;
- }
-
- public static void main(String[] args) throws Exception
- {
- String LEX_FILENAME="../pdata/lex.out";
- String DATA_FILENAME="../pdata/btec.con";
- PhraseCorpus c=new PhraseCorpus(DATA_FILENAME);
- c.saveLex(LEX_FILENAME);
- c.loadLex(LEX_FILENAME);
- c.saveLex(LEX_FILENAME);
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java
deleted file mode 100644
index ac73a075..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java
+++ /dev/null
@@ -1,224 +0,0 @@
-package phrase;
-
-import java.util.Arrays;
-import java.util.List;
-
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.linesearch.WolfRuleLineSearch;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-import optimization.util.MathUtils;
-
-public class PhraseObjective extends ProjectedObjective
-{
- static final double GRAD_DIFF = 0.00002;
- static double INIT_STEP_SIZE = 300;
- static double VAL_DIFF = 1e-8; // tuned to BTEC subsample
- static int ITERATIONS = 100;
- private PhraseCluster c;
-
- /**@brief
- * for debugging purposes
- */
- //public static PrintStream ps;
-
- /**@brief current phrase being optimzed*/
- public int phrase;
-
- /**@brief un-regularized posterior
- * unnormalized
- * p[edge][tag]
- * P(tag|edge) \propto P(tag|phrase)P(context|tag)
- */
- private double[][]p;
-
- /**@brief regularized posterior
- * q[edge][tag] propto p[edge][tag]*exp(-lambda)
- */
- private double q[][];
- private List<Corpus.Edge> data;
-
- /**@brief log likelihood of the associated phrase
- *
- */
- private double loglikelihood;
- private SimplexProjection projection;
-
- double[] newPoint ;
-
- private int n_param;
-
- /**@brief likelihood under p
- *
- */
- public double llh;
-
- public PhraseObjective(PhraseCluster cluster, int phraseIdx, double scale, double[] lambda){
- phrase=phraseIdx;
- c=cluster;
- data=c.c.getEdgesForPhrase(phrase);
- n_param=data.size()*c.K;
- //System.out.println("Num parameters " + n_param + " for phrase #" + phraseIdx);
-
- if (lambda==null)
- lambda=new double[n_param];
-
- parameters = lambda;
- newPoint = new double[n_param];
- gradient = new double[n_param];
- initP();
- projection=new SimplexProjection(scale);
- q=new double [data.size()][c.K];
-
- setParameters(parameters);
- }
-
- private void initP(){
- p=new double[data.size()][];
- for(int edge=0;edge<data.size();edge++){
- p[edge]=c.posterior(data.get(edge));
- llh += data.get(edge).getCount() * Math.log(arr.F.l1norm(p[edge])); // Was bug here - count inside log!
- arr.F.l1normalize(p[edge]);
- }
- }
-
- @Override
- public void setParameters(double[] params) {
- super.setParameters(params);
- updateFunction();
- }
-
- private void updateFunction(){
- updateCalls++;
- loglikelihood=0;
-
- for(int tag=0;tag<c.K;tag++){
- for(int edge=0;edge<data.size();edge++){
- q[edge][tag]=p[edge][tag]*
- Math.exp(-parameters[tag*data.size()+edge]/data.get(edge).getCount());
- }
- }
-
- for(int edge=0;edge<data.size();edge++){
- loglikelihood+=data.get(edge).getCount() * Math.log(arr.F.l1norm(q[edge]));
- arr.F.l1normalize(q[edge]);
- }
-
- for(int tag=0;tag<c.K;tag++){
- for(int edge=0;edge<data.size();edge++){
- gradient[tag*data.size()+edge]=-q[edge][tag];
- }
- }
- }
-
- @Override
- public double[] projectPoint(double[] point)
- {
- double toProject[]=new double[data.size()];
- for(int tag=0;tag<c.K;tag++){
- for(int edge=0;edge<data.size();edge++){
- toProject[edge]=point[tag*data.size()+edge];
- }
- projection.project(toProject);
- for(int edge=0;edge<data.size();edge++){
- newPoint[tag*data.size()+edge]=toProject[edge];
- }
- }
- return newPoint;
- }
-
- @Override
- public double[] getGradient() {
- gradientCalls++;
- return gradient;
- }
-
- @Override
- public double getValue() {
- functionCalls++;
- return loglikelihood;
- }
-
- @Override
- public String toString() {
- return Arrays.toString(parameters);
- }
-
- public double [][]posterior(){
- return q;
- }
-
- long optimizationTime;
-
- public boolean optimizeWithProjectedGradientDescent(){
- long start = System.currentTimeMillis();
-
- LineSearchMethod ls =
- new ArmijoLineSearchMinimizationAlongProjectionArc
- (new InterpolationPickFirstStep(INIT_STEP_SIZE));
- //LineSearchMethod ls = new WolfRuleLineSearch(
- // (new InterpolationPickFirstStep(INIT_STEP_SIZE)), c1, c2);
- OptimizerStats stats = new OptimizerStats();
-
-
- ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
- StopingCriteria stopGrad = new ProjectedGradientL2Norm(GRAD_DIFF);
- StopingCriteria stopValue = new ValueDifference(VAL_DIFF*(-llh));
- CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
- compositeStop.add(stopGrad);
- compositeStop.add(stopValue);
- optimizer.setMaxIterations(ITERATIONS);
- updateFunction();
- boolean success = optimizer.optimize(this,stats,compositeStop);
- //System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
- //if(succed){
- //System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
- //}else{
-// System.out.println("Failed to optimize");
- //}
- //System.out.println(Arrays.toString(parameters));
-
- // for(int edge=0;edge<data.getSize();edge++){
- // ps.println(Arrays.toString(q[edge]));
- // }
-
- return success;
- }
-
- public double KL_divergence()
- {
- return -loglikelihood + MathUtils.dotProduct(parameters, gradient);
- }
-
- public double loglikelihood()
- {
- return llh;
- }
-
- public double l1lmax()
- {
- double sum=0;
- for(int tag=0;tag<c.K;tag++){
- double max=0;
- for(int edge=0;edge<data.size();edge++){
- if(q[edge][tag]>max)
- max=q[edge][tag];
- }
- sum+=max;
- }
- return sum;
- }
-
- public double primal(double scale)
- {
- return loglikelihood() - KL_divergence() - scale * l1lmax();
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java b/gi/posterior-regularisation/prjava/src/phrase/Trainer.java
deleted file mode 100644
index 6f302b20..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java
+++ /dev/null
@@ -1,257 +0,0 @@
-package phrase;
-
-import io.FileUtil;
-import joptsimple.OptionParser;
-import joptsimple.OptionSet;
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.List;
-import java.util.Random;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-
-import phrase.Corpus.Edge;
-
-import arr.F;
-
-public class Trainer
-{
- public static void main(String[] args)
- {
- OptionParser parser = new OptionParser();
- parser.accepts("help");
- parser.accepts("in").withRequiredArg().ofType(File.class);
- parser.accepts("in1").withRequiredArg().ofType(File.class);
- parser.accepts("test").withRequiredArg().ofType(File.class);
- parser.accepts("out").withRequiredArg().ofType(File.class);
- parser.accepts("start").withRequiredArg().ofType(File.class);
- parser.accepts("parameters").withRequiredArg().ofType(File.class);
- parser.accepts("topics").withRequiredArg().ofType(Integer.class).defaultsTo(5);
- parser.accepts("iterations").withRequiredArg().ofType(Integer.class).defaultsTo(10);
- parser.accepts("threads").withRequiredArg().ofType(Integer.class).defaultsTo(0);
- parser.accepts("scale-phrase").withRequiredArg().ofType(Double.class).defaultsTo(0.0);
- parser.accepts("scale-context").withRequiredArg().ofType(Double.class).defaultsTo(0.0);
- parser.accepts("seed").withRequiredArg().ofType(Long.class).defaultsTo(0l);
- parser.accepts("convergence-threshold").withRequiredArg().ofType(Double.class).defaultsTo(1e-6);
- parser.accepts("variational-bayes");
- parser.accepts("alpha-emit").withRequiredArg().ofType(Double.class).defaultsTo(0.1);
- parser.accepts("alpha-pi").withRequiredArg().ofType(Double.class).defaultsTo(0.0001);
- parser.accepts("agree-direction");
- parser.accepts("agree-language");
- parser.accepts("no-parameter-cache");
- parser.accepts("skip-large-phrases").withRequiredArg().ofType(Integer.class).defaultsTo(5);
- OptionSet options = parser.parse(args);
-
- if (options.has("help") || !options.has("in"))
- {
- try {
- parser.printHelpOn(System.err);
- } catch (IOException e) {
- System.err.println("This should never happen.");
- e.printStackTrace();
- }
- System.exit(1);
- }
-
- int tags = (Integer) options.valueOf("topics");
- int iterations = (Integer) options.valueOf("iterations");
- double scale_phrase = (Double) options.valueOf("scale-phrase");
- double scale_context = (Double) options.valueOf("scale-context");
- int threads = (Integer) options.valueOf("threads");
- double threshold = (Double) options.valueOf("convergence-threshold");
- boolean vb = options.has("variational-bayes");
- double alphaEmit = (vb) ? (Double) options.valueOf("alpha-emit") : 0;
- double alphaPi = (vb) ? (Double) options.valueOf("alpha-pi") : 0;
- int skip = (Integer) options.valueOf("skip-large-phrases");
-
- if (options.has("seed"))
- F.rng = new Random((Long) options.valueOf("seed"));
-
- ExecutorService threadPool = null;
- if (threads > 0)
- threadPool = Executors.newFixedThreadPool(threads);
-
- if (tags <= 1 || scale_phrase < 0 || scale_context < 0 || threshold < 0)
- {
- System.err.println("Invalid arguments. Try again!");
- System.exit(1);
- }
-
- Corpus corpus = null;
- File infile = (File) options.valueOf("in");
- Corpus corpus1 = null;
- File infile1 = (File) options.valueOf("in1");
- try {
- System.out.println("Reading concordance from " + infile);
- corpus = Corpus.readFromFile(FileUtil.reader(infile));
- corpus.printStats(System.out);
- if(options.has("in1")){
- corpus1 = Corpus.readFromFile(FileUtil.reader(infile1));
- corpus1.printStats(System.out);
- }
- } catch (IOException e) {
- System.err.println("Failed to open input file: " + infile);
- e.printStackTrace();
- System.exit(1);
- }
-
- if (!(options.has("agree-direction")||options.has("agree-language")))
- System.out.println("Running with " + tags + " tags " +
- "for " + iterations + " iterations " +
- ((skip > 0) ? "skipping large phrases for first " + skip + " iterations " : "") +
- "with scale " + scale_phrase + " phrase and " + scale_context + " context " +
- "and " + threads + " threads");
- else
- System.out.println("Running agreement model with " + tags + " tags " +
- "for " + iterations);
-
- System.out.println();
-
- PhraseCluster cluster = null;
- Agree2Sides agree2sides = null;
- Agree agree= null;
- VB vbModel=null;
- if (options.has("agree-language"))
- agree2sides = new Agree2Sides(tags, corpus,corpus1);
- else if (options.has("agree-direction"))
- agree = new Agree(tags, corpus);
- else
- {
- if (vb)
- {
- vbModel=new VB(tags,corpus);
- vbModel.alpha=alphaPi;
- vbModel.lambda=alphaEmit;
- if (threadPool != null) vbModel.useThreadPool(threadPool);
- }
- else
- {
- cluster = new PhraseCluster(tags, corpus);
- if (threadPool != null) cluster.useThreadPool(threadPool);
-
- if (options.has("no-parameter-cache"))
- cluster.cacheLambda = false;
- if (options.has("start"))
- {
- try {
- System.err.println("Reading starting parameters from " + options.valueOf("start"));
- cluster.loadParameters(FileUtil.reader((File)options.valueOf("start")));
- } catch (IOException e) {
- System.err.println("Failed to open input file: " + options.valueOf("start"));
- e.printStackTrace();
- }
- }
- }
- }
-
- double last = 0;
- for (int i=0; i < iterations; i++)
- {
- double o;
- if (agree != null)
- o = agree.EM();
- else if(agree2sides!=null)
- o = agree2sides.EM();
- else
- {
- if (i < skip)
- System.out.println("Skipping phrases of length > " + (i+1));
-
- if (scale_phrase <= 0 && scale_context <= 0)
- {
- if (!vb)
- o = cluster.EM((i < skip) ? i+1 : 0);
- else
- o = vbModel.EM();
- }
- else
- o = cluster.PREM(scale_phrase, scale_context, (i < skip) ? i+1 : 0);
- }
-
- System.out.println("ITER: "+i+" objective: " + o);
-
- // sometimes takes a few iterations to break the ties
- if (i > 5 && Math.abs((o - last) / o) < threshold)
- {
- last = o;
- break;
- }
- last = o;
- }
-
- double pl1lmax = 0, cl1lmax = 0;
- if (cluster != null)
- {
- pl1lmax = cluster.phrase_l1lmax();
- cl1lmax = cluster.context_l1lmax();
- }
- else if (agree != null)
- {
- // fairly arbitrary choice of model1 cf model2
- pl1lmax = agree.model1.phrase_l1lmax();
- cl1lmax = agree.model1.context_l1lmax();
- }
- else if (agree2sides != null)
- {
- // fairly arbitrary choice of model1 cf model2
- pl1lmax = agree2sides.model1.phrase_l1lmax();
- cl1lmax = agree2sides.model1.context_l1lmax();
- }
-
- System.out.println("\nFinal posterior phrase l1lmax " + pl1lmax + " context l1lmax " + cl1lmax);
-
- if (options.has("out"))
- {
- File outfile = (File) options.valueOf("out");
- try {
- PrintStream ps = FileUtil.printstream(outfile);
- List<Edge> test;
- if (!options.has("test")) // just use the training
- test = corpus.getEdges();
- else
- { // if --test supplied, load up the file
- infile = (File) options.valueOf("test");
- System.out.println("Reading testing concordance from " + infile);
- test = corpus.readEdges(FileUtil.reader(infile));
- }
- if(vb) {
- assert !options.has("test");
- vbModel.displayPosterior(ps);
- } else if (cluster != null)
- cluster.displayPosterior(ps, test);
- else if (agree != null)
- agree.displayPosterior(ps, test);
- else if (agree2sides != null) {
- assert !options.has("test");
- agree2sides.displayPosterior(ps);
- }
-
- ps.close();
- } catch (IOException e) {
- System.err.println("Failed to open either testing file or output file");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- if (options.has("parameters"))
- {
- assert !vb;
- File outfile = (File) options.valueOf("parameters");
- PrintStream ps;
- try {
- ps = FileUtil.printstream(outfile);
- cluster.displayModelParam(ps);
- ps.close();
- } catch (IOException e) {
- System.err.println("Failed to open output parameters file: " + outfile);
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- if (cluster != null && cluster.pool != null)
- cluster.pool.shutdown();
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/VB.java b/gi/posterior-regularisation/prjava/src/phrase/VB.java
deleted file mode 100644
index cd3f4966..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/VB.java
+++ /dev/null
@@ -1,419 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Future;
-
-import org.apache.commons.math.special.Gamma;
-
-import phrase.Corpus.Edge;
-
-public class VB {
-
- public static int MAX_ITER=400;
-
- /**@brief
- * hyper param for beta
- * where beta is multinomial
- * for generating words from a topic
- */
- public double lambda=0.1;
- /**@brief
- * hyper param for theta
- * where theta is dirichlet for z
- */
- public double alpha=0.0001;
- /**@brief
- * variational param for beta
- */
- private double rho[][][];
- private double digamma_rho[][][];
- private double rho_sum[][];
- /**@brief
- * variational param for z
- */
- //private double phi[][];
- /**@brief
- * variational param for theta
- */
- private double gamma[];
- private static double VAL_DIFF_RATIO=0.005;
-
- private int n_positions;
- private int n_words;
- private int K;
- private ExecutorService pool;
-
- private Corpus c;
- public static void main(String[] args) {
- // String in="../pdata/canned.con";
- String in="../pdata/btec.con";
- String out="../pdata/vb.out";
- int numCluster=25;
- Corpus corpus = null;
- File infile = new File(in);
- try {
- System.out.println("Reading concordance from " + infile);
- corpus = Corpus.readFromFile(FileUtil.reader(infile));
- corpus.printStats(System.out);
- } catch (IOException e) {
- System.err.println("Failed to open input file: " + infile);
- e.printStackTrace();
- System.exit(1);
- }
-
- VB vb=new VB(numCluster, corpus);
- int iter=20;
- for(int i=0;i<iter;i++){
- double obj=vb.EM();
- System.out.println("Iter "+i+": "+obj);
- }
-
- File outfile = new File (out);
- try {
- PrintStream ps = FileUtil.printstream(outfile);
- vb.displayPosterior(ps);
- // ps.println();
- // c2f.displayModelParam(ps);
- ps.close();
- } catch (IOException e) {
- System.err.println("Failed to open output file: " + outfile);
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- public VB(int numCluster, Corpus corpus){
- c=corpus;
- K=numCluster;
- n_positions=c.getNumContextPositions();
- n_words=c.getNumWords();
- rho=new double[K][n_positions][n_words];
- //to init rho
- //loop through data and count up words
- double[] phi_tmp=new double[K];
- for(int i=0;i<K;i++){
- for(int pos=0;pos<n_positions;pos++){
- Arrays.fill(rho[i][pos], lambda);
- }
- }
- for(int d=0;d<c.getNumPhrases();d++){
- List<Edge>doc=c.getEdgesForPhrase(d);
- for(int n=0;n<doc.size();n++){
- TIntArrayList context=doc.get(n).getContext();
- arr.F.randomise(phi_tmp);
- for(int i=0;i<K;i++){
- for(int pos=0;pos<n_positions;pos++){
- rho[i][pos][context.get(pos)]+=phi_tmp[i];
- }
- }
- }
- }
-
- }
-
- private double inference(int phraseID, double[][] phi, double[] gamma)
- {
- List<Edge > doc=c.getEdgesForPhrase(phraseID);
- for(int i=0;i<phi.length;i++){
- for(int j=0;j<phi[i].length;j++){
- phi[i][j]=1.0/K;
- }
- }
- Arrays.fill(gamma,alpha+1.0/K);
-
- double digamma_gamma[]=new double[K];
-
- double gamma_sum=digamma(arr.F.l1norm(gamma));
- for(int i=0;i<K;i++){
- digamma_gamma[i]=digamma(gamma[i]);
- }
- double gammaSum[]=new double [K];
- double prev_val=0;
- double obj=0;
-
- for(int iter=0;iter<MAX_ITER;iter++){
- prev_val=obj;
- obj=0;
- Arrays.fill(gammaSum,0.0);
- for(int n=0;n<doc.size();n++){
- TIntArrayList context=doc.get(n).getContext();
- double phisum=0;
- for(int i=0;i<K;i++){
- double sum=0;
- for(int pos=0;pos<n_positions;pos++){
- int word=context.get(pos);
- sum+=digamma_rho[i][pos][word]-rho_sum[i][pos];
- }
- sum+= digamma_gamma[i]-gamma_sum;
- phi[n][i]=sum;
-
- if (i > 0){
- phisum = log_sum(phisum, phi[n][i]);
- }
- else{
- phisum = phi[n][i];
- }
-
- }//end of a word
-
- for(int i=0;i<K;i++){
- phi[n][i]=Math.exp(phi[n][i]-phisum);
- gammaSum[i]+=phi[n][i];
- }
-
- }//end of doc
-
- for(int i=0;i<K;i++){
- gamma[i]=alpha+gammaSum[i];
- }
- gamma_sum=digamma(arr.F.l1norm(gamma));
- for(int i=0;i<K;i++){
- digamma_gamma[i]=digamma(gamma[i]);
- }
- //compute objective for reporting
-
- obj=0;
-
- for(int i=0;i<K;i++){
- obj+=(alpha-1)*(digamma_gamma[i]-gamma_sum);
- }
-
-
- for(int n=0;n<doc.size();n++){
- TIntArrayList context=doc.get(n).getContext();
-
- for(int i=0;i<K;i++){
- //entropy of phi + expected log likelihood of z
- obj+=phi[n][i]*(digamma_gamma[i]-gamma_sum);
-
- if(phi[n][i]>1e-10){
- obj+=phi[n][i]*Math.log(phi[n][i]);
- }
-
- double beta_sum=0;
- for(int pos=0;pos<n_positions;pos++){
- int word=context.get(pos);
- beta_sum+=(digamma(rho[i][pos][word])-rho_sum[i][pos]);
- }
- obj+=phi[n][i]*beta_sum;
- }
- }
-
- obj-=log_gamma(arr.F.l1norm(gamma));
- for(int i=0;i<K;i++){
- obj+=Gamma.logGamma(gamma[i]);
- obj-=(gamma[i]-1)*(digamma_gamma[i]-gamma_sum);
- }
-
-// System.out.println(phraseID+": "+obj);
- if(iter>0 && (obj-prev_val)/Math.abs(obj)<VAL_DIFF_RATIO){
- break;
- }
- }//end of inference loop
-
- return obj;
- }//end of inference
-
- /**
- * @return objective of this iteration
- */
- public double EM(){
- double emObj=0;
- if(digamma_rho==null){
- digamma_rho=new double[K][n_positions][n_words];
- }
- for(int i=0;i<K;i++){
- for (int pos=0;pos<n_positions;pos++){
- for(int j=0;j<n_words;j++){
- digamma_rho[i][pos][j]= digamma(rho[i][pos][j]);
- }
- }
- }
-
- if(rho_sum==null){
- rho_sum=new double [K][n_positions];
- }
- for(int i=0;i<K;i++){
- for(int pos=0;pos<n_positions;pos++){
- rho_sum[i][pos]=digamma(arr.F.l1norm(rho[i][pos]));
- }
- }
-
- //E
- double exp_rho[][][]=new double[K][n_positions][n_words];
- if (pool == null)
- {
- for (int d=0;d<c.getNumPhrases();d++)
- {
- List<Edge > doc=c.getEdgesForPhrase(d);
- double[][] phi = new double[doc.size()][K];
- double[] gamma = new double[K];
-
- emObj += inference(d, phi, gamma);
-
- for(int n=0;n<doc.size();n++){
- TIntArrayList context=doc.get(n).getContext();
- for(int pos=0;pos<n_positions;pos++){
- int word=context.get(pos);
- for(int i=0;i<K;i++){
- exp_rho[i][pos][word]+=phi[n][i];
- }
- }
- }
- //if(d!=0 && d%100==0) System.out.print(".");
- //if(d!=0 && d%1000==0) System.out.println(d);
- }
- }
- else // multi-threaded version of above loop
- {
- class PartialEStep implements Callable<PartialEStep>
- {
- double[][] phi;
- double[] gamma;
- double obj;
- int d;
- PartialEStep(int d) { this.d = d; }
-
- public PartialEStep call()
- {
- phi = new double[c.getEdgesForPhrase(d).size()][K];
- gamma = new double[K];
- obj = inference(d, phi, gamma);
- return this;
- }
- }
-
- List<Future<PartialEStep>> jobs = new ArrayList<Future<PartialEStep>>();
- for (int d=0;d<c.getNumPhrases();d++)
- jobs.add(pool.submit(new PartialEStep(d)));
-
- for (Future<PartialEStep> job: jobs)
- {
- try {
- PartialEStep e = job.get();
-
- emObj += e.obj;
- List<Edge> doc = c.getEdgesForPhrase(e.d);
- for(int n=0;n<doc.size();n++){
- TIntArrayList context=doc.get(n).getContext();
- for(int pos=0;pos<n_positions;pos++){
- int word=context.get(pos);
- for(int i=0;i<K;i++){
- exp_rho[i][pos][word]+=e.phi[n][i];
- }
- }
- }
- } catch (ExecutionException e) {
- System.err.println("ERROR: E-step thread execution failed.");
- throw new RuntimeException(e);
- } catch (InterruptedException e) {
- System.err.println("ERROR: Failed to join E-step thread.");
- throw new RuntimeException(e);
- }
- }
- }
- // System.out.println("EM Objective:"+emObj);
-
- //M
- for(int i=0;i<K;i++){
- for(int pos=0;pos<n_positions;pos++){
- for(int j=0;j<n_words;j++){
- rho[i][pos][j]=lambda+exp_rho[i][pos][j];
- }
- }
- }
-
- //E[\log p(\beta|\lambda)] - E[\log q(\beta)]
- for(int i=0;i<K;i++){
- double rhoSum=0;
- for(int pos=0;pos<n_positions;pos++){
- for(int j=0;j<n_words;j++){
- rhoSum+=rho[i][pos][j];
- }
- double digamma_rhoSum=Gamma.digamma(rhoSum);
- emObj-=Gamma.logGamma(rhoSum);
- for(int j=0;j<n_words;j++){
- emObj+=(lambda-rho[i][pos][j])*(Gamma.digamma(rho[i][pos][j])-digamma_rhoSum);
- emObj+=Gamma.logGamma(rho[i][pos][j]);
- }
- }
- }
-
- return emObj;
- }//end of EM
-
- public void displayPosterior(PrintStream ps)
- {
- for(int d=0;d<c.getNumPhrases();d++){
- List<Edge > doc=c.getEdgesForPhrase(d);
- double[][] phi = new double[doc.size()][K];
- for(int i=0;i<phi.length;i++)
- for(int j=0;j<phi[i].length;j++)
- phi[i][j]=1.0/K;
- double[] gamma = new double[K];
-
- inference(d, phi, gamma);
-
- for(int n=0;n<doc.size();n++){
- Edge edge=doc.get(n);
- int tag=arr.F.argmax(phi[n]);
- ps.print(edge.getPhraseString());
- ps.print("\t");
- ps.print(edge.getContextString(true));
-
- ps.println(" ||| C=" + tag);
- }
- }
- }
-
- double log_sum(double log_a, double log_b)
- {
- double v;
-
- if (log_a < log_b)
- v = log_b+Math.log(1 + Math.exp(log_a-log_b));
- else
- v = log_a+Math.log(1 + Math.exp(log_b-log_a));
- return(v);
- }
-
- double digamma(double x)
- {
- double p;
- x=x+6;
- p=1/(x*x);
- p=(((0.004166666666667*p-0.003968253986254)*p+
- 0.008333333333333)*p-0.083333333333333)*p;
- p=p+Math.log(x)-0.5/x-1/(x-1)-1/(x-2)-1/(x-3)-1/(x-4)-1/(x-5)-1/(x-6);
- return p;
- }
-
- double log_gamma(double x)
- {
- double z=1/(x*x);
-
- x=x+6;
- z=(((-0.000595238095238*z+0.000793650793651)
- *z-0.002777777777778)*z+0.083333333333333)/x;
- z=(x-0.5)*Math.log(x)-x+0.918938533204673+z-Math.log(x-1)-
- Math.log(x-2)-Math.log(x-3)-Math.log(x-4)-Math.log(x-5)-Math.log(x-6);
- return z;
- }
-
- public void useThreadPool(ExecutorService threadPool)
- {
- pool = threadPool;
- }
-}//End of class
diff --git a/gi/posterior-regularisation/prjava/src/test/CorpusTest.java b/gi/posterior-regularisation/prjava/src/test/CorpusTest.java
deleted file mode 100644
index b4c3041f..00000000
--- a/gi/posterior-regularisation/prjava/src/test/CorpusTest.java
+++ /dev/null
@@ -1,60 +0,0 @@
-package test;
-
-import java.util.Arrays;
-import java.util.HashMap;
-
-import data.Corpus;
-import hmm.POS;
-
-public class CorpusTest {
-
- public static void main(String[] args) {
- Corpus c=new Corpus(POS.trainFilename);
-
-
- int idx=30;
-
-
- HashMap<String, Integer>vocab=
- (HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(Corpus.alphaFilename);
-
- HashMap<String, Integer>tagVocab=
- (HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(Corpus.tagalphaFilename);
-
-
- String [] dict=new String [vocab.size()+1];
- for(String key:vocab.keySet()){
- dict[vocab.get(key)]=key;
- }
- dict[dict.length-1]=Corpus.UNK_TOK;
-
- String [] tagdict=new String [tagVocab.size()+1];
- for(String key:tagVocab.keySet()){
- tagdict[tagVocab.get(key)]=key;
- }
- tagdict[tagdict.length-1]=Corpus.UNK_TOK;
-
- String[] sent=c.get(idx);
- int []data=c.getInt(idx);
-
-
- String []roundtrip=new String [sent.length];
- for(int i=0;i<sent.length;i++){
- roundtrip[i]=dict[data[i]];
- }
- System.out.println(Arrays.toString(sent));
- System.out.println(Arrays.toString(roundtrip));
-
- sent=c.tag.get(idx);
- data=c.tagData.get(idx);
-
-
- roundtrip=new String [sent.length];
- for(int i=0;i<sent.length;i++){
- roundtrip[i]=tagdict[data[i]];
- }
- System.out.println(Arrays.toString(sent));
- System.out.println(Arrays.toString(roundtrip));
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java b/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java
deleted file mode 100644
index d54525c8..00000000
--- a/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java
+++ /dev/null
@@ -1,105 +0,0 @@
-package test;
-
-import hmm.HMM;
-import hmm.POS;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-
-import data.Corpus;
-
-public class HMMModelStats {
-
- public static String modelFilename="../posdata/posModel.out";
- public static String alphaFilename="../posdata/corpus.alphabet";
- public static String statsFilename="../posdata/model.stats";
-
- public static final int NUM_WORD=50;
-
- public static String testFilename="../posdata/en_test.conll";
-
- public static double [][]maxwt;
-
- public static void main(String[] args) {
- HashMap<String, Integer>vocab=
- (HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(alphaFilename);
-
- Corpus test=new Corpus(testFilename,vocab);
-
- String [] dict=new String [vocab.size()+1];
- for(String key:vocab.keySet()){
- dict[vocab.get(key)]=key;
- }
- dict[dict.length-1]=Corpus.UNK_TOK;
-
- HMM hmm=new HMM();
- hmm.readModel(modelFilename);
-
-
-
- PrintStream ps = null;
- try {
- ps = io.FileUtil.printstream(new File(statsFilename));
- } catch (IOException e) {
- e.printStackTrace();
- System.exit(1);
- }
-
- double [][] emit=hmm.getEmitProb();
- for(int i=0;i<emit.length;i++){
- ArrayList<IntDoublePair>l=new ArrayList<IntDoublePair>();
- for(int j=0;j<emit[i].length;j++){
- l.add(new IntDoublePair(j,emit[i][j]));
- }
- Collections.sort(l);
- ps.println(i);
- for(int j=0;j<NUM_WORD;j++){
- if(j>=dict.length){
- break;
- }
- ps.print(dict[l.get(j).idx]+"\t");
- if((1+j)%10==0){
- ps.println();
- }
- }
- ps.println("\n");
- }
-
- checkMaxwt(hmm,ps,test.getAllData());
-
- int terminalSym=vocab.get(Corpus .END_SYM);
- //sample 10 sentences
- for(int i=0;i<10;i++){
- int []sent=hmm.sample(terminalSym);
- for(int j=0;j<sent.length;j++){
- ps.print(dict[sent[j]]+"\t");
- }
- ps.println();
- }
-
- ps.close();
-
- }
-
- public static void checkMaxwt(HMM hmm,PrintStream ps,int [][]data){
- double [][]emit=hmm.getEmitProb();
- maxwt=new double[emit.length][emit[0].length];
-
- hmm.computeMaxwt(maxwt,data);
- double sum=0;
- for(int i=0;i<maxwt.length;i++){
- for(int j=0;j<maxwt.length;j++){
- sum+=maxwt[i][j];
- }
- }
-
- ps.println("max w t P(w_i|t): "+sum);
-
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/test/IntDoublePair.java b/gi/posterior-regularisation/prjava/src/test/IntDoublePair.java
deleted file mode 100644
index 3f9f0ad7..00000000
--- a/gi/posterior-regularisation/prjava/src/test/IntDoublePair.java
+++ /dev/null
@@ -1,23 +0,0 @@
-package test;
-
-public class IntDoublePair implements Comparable{
- double val;
- int idx;
- public int compareTo(Object o){
- if(o instanceof IntDoublePair){
- IntDoublePair pair=(IntDoublePair)o;
- if(pair.val>val){
- return 1;
- }
- if(pair.val<val){
- return -1;
- }
- return 0;
- }
- return -1;
- }
- public IntDoublePair(int i,double v){
- val=v;
- idx=i;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java b/gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java
deleted file mode 100644
index 9059a59e..00000000
--- a/gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java
+++ /dev/null
@@ -1,131 +0,0 @@
-package test;
-
-
-
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.projections.BoundsProjection;
-import optimization.projections.Projection;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.GradientL2Norm;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-
-
-/**
- * @author javg
- *
- *
- *ax2+ b(y2 -displacement)
- */
-public class X2y2WithConstraints extends ProjectedObjective{
-
-
- double a, b;
- double dx;
- double dy;
- Projection projection;
-
-
- public X2y2WithConstraints(double a, double b, double[] params, double dx, double dy, Projection proj){
- //projection = new BoundsProjection(0.2,Double.MAX_VALUE);
- super();
- projection = proj;
- this.a = a;
- this.b = b;
- this.dx = dx;
- this.dy = dy;
- setInitialParameters(params);
- System.out.println("Function " +a+"(x-"+dx+")^2 + "+b+"(y-"+dy+")^2");
- System.out.println("Gradient " +(2*a)+"(x-"+dx+") ; "+(b*2)+"(y-"+dy+")");
- printParameters();
- projection.project(parameters);
- printParameters();
- gradient = new double[2];
- }
-
- public double getValue() {
- functionCalls++;
- return a*(parameters[0]-dx)*(parameters[0]-dx)+b*((parameters[1]-dy)*(parameters[1]-dy));
- }
-
- public double[] getGradient() {
- if(gradient == null){
- gradient = new double[2];
- }
- gradientCalls++;
- gradient[0]=2*a*(parameters[0]-dx);
- gradient[1]=2*b*(parameters[1]-dy);
- return gradient;
- }
-
-
- public double[] projectPoint(double[] point) {
- double[] newPoint = point.clone();
- projection.project(newPoint);
- return newPoint;
- }
-
- public void optimizeWithProjectedGradientDescent(LineSearchMethod ls, OptimizerStats stats, X2y2WithConstraints o){
- ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
- StopingCriteria stopGrad = new ProjectedGradientL2Norm(0.001);
- StopingCriteria stopValue = new ValueDifference(0.001);
- CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
- compositeStop.add(stopGrad);
- compositeStop.add(stopValue);
-
- optimizer.setMaxIterations(5);
- boolean succed = optimizer.optimize(o,stats,compositeStop);
- System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
- System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
- if(succed){
- System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
- }else{
- System.out.println("Failed to optimize");
- }
- }
-
-
-
- public String toString(){
-
- return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue() + " grad (" + getGradient()[0] + ":" + getGradient()[1]+")";
- }
-
- public static void main(String[] args) {
- double a = 1;
- double b=1;
- double x0 = 0;
- double y0 =1;
- double dx = 0.5;
- double dy = 0.2 ;
- double [] parameters = new double[2];
- parameters[0] = x0;
- parameters[1] = y0;
- X2y2WithConstraints o = new X2y2WithConstraints(a,b,parameters,dx,dy,
- new SimplexProjection(0.5)
- //new BoundsProjection(0.0,0.4)
- );
- System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1] + " a " + a + " b "+b );
- o.setDebugLevel(4);
-
- LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1));
-
- OptimizerStats stats = new OptimizerStats();
- o.optimizeWithProjectedGradientDescent(ls, stats, o);
-
-// o = new x2y2WithConstraints(a,b,x0,y0,dx,dy);
-// stats = new OptimizerStats();
-// o.optimizeWithSpectralProjectedGradientDescent(stats, o);
- }
-
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/Array.java b/gi/posterior-regularisation/prjava/src/util/Array.java
deleted file mode 100644
index cc4725af..00000000
--- a/gi/posterior-regularisation/prjava/src/util/Array.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package util;
-
-import java.util.Arrays;
-
-public class Array {
-
-
-
- public static void sortDescending(double[] ds){
- for (int i = 0; i < ds.length; i++) ds[i] = -ds[i];
- Arrays.sort(ds);
- for (int i = 0; i < ds.length; i++) ds[i] = -ds[i];
- }
-
- /**
- * Return a new reversed array
- * @param array
- * @return
- */
- public static int[] reverseIntArray(int[] array){
- int[] reversed = new int[array.length];
- for (int i = 0; i < reversed.length; i++) {
- reversed[i] = array[reversed.length-1-i];
- }
- return reversed;
- }
-
- public static String[] sumArray(String[] in, int from){
- String[] res = new String[in.length-from];
- for (int i = from; i < in.length; i++) {
- res[i-from] = in[i];
- }
- return res;
- }
-
- public static void main(String[] args) {
- int[] i = {1,2,3,4};
- util.Printing.printIntArray(i, null, "original");
- util.Printing.printIntArray(reverseIntArray(i), null, "reversed");
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/ArrayMath.java b/gi/posterior-regularisation/prjava/src/util/ArrayMath.java
deleted file mode 100644
index 398a13a2..00000000
--- a/gi/posterior-regularisation/prjava/src/util/ArrayMath.java
+++ /dev/null
@@ -1,186 +0,0 @@
-package util;
-
-import java.util.Arrays;
-
-public class ArrayMath {
-
- public static double dotProduct(double[] v1, double[] v2) {
- assert(v1.length == v2.length);
- double result = 0;
- for(int i = 0; i < v1.length; i++)
- result += v1[i]*v2[i];
- return result;
- }
-
- public static double twoNormSquared(double[] v) {
- double result = 0;
- for(double d : v)
- result += d*d;
- return result;
- }
-
- public static boolean containsInvalid(double[] v) {
- for(int i = 0; i < v.length; i++)
- if(Double.isNaN(v[i]) || Double.isInfinite(v[i]))
- return true;
- return false;
- }
-
-
-
- public static double safeAdd(double[] toAdd) {
- // Make sure there are no positive infinities
- double sum = 0;
- for(int i = 0; i < toAdd.length; i++) {
- assert(!(Double.isInfinite(toAdd[i]) && toAdd[i] > 0));
- assert(!Double.isNaN(toAdd[i]));
- sum += toAdd[i];
- }
-
- return sum;
- }
-
- /* Methods for filling integer and double arrays (of up to four dimensions) with the given value. */
-
- public static void set(int[][][][] array, int value) {
- for(int i = 0; i < array.length; i++) {
- set(array[i], value);
- }
- }
-
- public static void set(int[][][] array, int value) {
- for(int i = 0; i < array.length; i++) {
- set(array[i], value);
- }
- }
-
- public static void set(int[][] array, int value) {
- for(int i = 0; i < array.length; i++) {
- set(array[i], value);
- }
- }
-
- public static void set(int[] array, int value) {
- Arrays.fill(array, value);
- }
-
-
- public static void set(double[][][][] array, double value) {
- for(int i = 0; i < array.length; i++) {
- set(array[i], value);
- }
- }
-
- public static void set(double[][][] array, double value) {
- for(int i = 0; i < array.length; i++) {
- set(array[i], value);
- }
- }
-
- public static void set(double[][] array, double value) {
- for(int i = 0; i < array.length; i++) {
- set(array[i], value);
- }
- }
-
- public static void set(double[] array, double value) {
- Arrays.fill(array, value);
- }
-
- public static void setEqual(double[][][][] dest, double[][][][] source){
- for (int i = 0; i < source.length; i++) {
- setEqual(dest[i],source[i]);
- }
- }
-
-
- public static void setEqual(double[][][] dest, double[][][] source){
- for (int i = 0; i < source.length; i++) {
- set(dest[i],source[i]);
- }
- }
-
-
- public static void set(double[][] dest, double[][] source){
- for (int i = 0; i < source.length; i++) {
- setEqual(dest[i],source[i]);
- }
- }
-
- public static void setEqual(double[] dest, double[] source){
- System.arraycopy(source, 0, dest, 0, source.length);
- }
-
- public static void plusEquals(double[][][][] array, double val){
- for (int i = 0; i < array.length; i++) {
- plusEquals(array[i], val);
- }
- }
-
- public static void plusEquals(double[][][] array, double val){
- for (int i = 0; i < array.length; i++) {
- plusEquals(array[i], val);
- }
- }
-
- public static void plusEquals(double[][] array, double val){
- for (int i = 0; i < array.length; i++) {
- plusEquals(array[i], val);
- }
- }
-
- public static void plusEquals(double[] array, double val){
- for (int i = 0; i < array.length; i++) {
- array[i] += val;
- }
- }
-
-
- public static double sum(double[] array) {
- double res = 0;
- for (int i = 0; i < array.length; i++) res += array[i];
- return res;
- }
-
-
-
- public static double[][] deepclone(double[][] in){
- double[][] res = new double[in.length][];
- for (int i = 0; i < res.length; i++) {
- res[i] = in[i].clone();
- }
- return res;
- }
-
-
- public static double[][][] deepclone(double[][][] in){
- double[][][] res = new double[in.length][][];
- for (int i = 0; i < res.length; i++) {
- res[i] = deepclone(in[i]);
- }
- return res;
- }
-
- public static double cosine(double[] a,
- double[] b) {
- return (dotProduct(a, b)+1e-5)/(Math.sqrt(dotProduct(a, a)+1e-5)*Math.sqrt(dotProduct(b, b)+1e-5));
- }
-
- public static double max(double[] ds) {
- double max = Double.NEGATIVE_INFINITY;
- for(double d:ds) max = Math.max(d,max);
- return max;
- }
-
- public static void exponentiate(double[] a) {
- for (int i = 0; i < a.length; i++) {
- a[i] = Math.exp(a[i]);
- }
- }
-
- public static int sum(int[] array) {
- int res = 0;
- for (int i = 0; i < array.length; i++) res += array[i];
- return res;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java b/gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java
deleted file mode 100644
index 1ff1ae4a..00000000
--- a/gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java
+++ /dev/null
@@ -1,14 +0,0 @@
-package util;
-
-public interface DifferentiableObjective {
-
- public double getValue();
-
- public void getGradient(double[] gradient);
-
- public void getParameters(double[] params);
-
- public void setParameters(double[] newParameters);
-
- public int getNumParameters();
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/DigammaFunction.java b/gi/posterior-regularisation/prjava/src/util/DigammaFunction.java
deleted file mode 100644
index ff1478ad..00000000
--- a/gi/posterior-regularisation/prjava/src/util/DigammaFunction.java
+++ /dev/null
@@ -1,21 +0,0 @@
-package util;
-
-public class DigammaFunction {
- public static double expDigamma(double number){
- if(number==0)return number;
- return Math.exp(digamma(number));
- }
-
- public static double digamma(double number){
- if(number > 7){
- return digammApprox(number-0.5);
- }else{
- return digamma(number+1) - 1.0/number;
- }
- }
-
- private static double digammApprox(double value){
- return Math.log(value) + 0.04167*Math.pow(value, -2) - 0.00729*Math.pow(value, -4)
- + 0.00384*Math.pow(value, -6) - 0.00413*Math.pow(value, -8);
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/FileSystem.java b/gi/posterior-regularisation/prjava/src/util/FileSystem.java
deleted file mode 100644
index d7812e40..00000000
--- a/gi/posterior-regularisation/prjava/src/util/FileSystem.java
+++ /dev/null
@@ -1,21 +0,0 @@
-package util;
-
-import java.io.File;
-
-public class FileSystem {
- public static boolean createDir(String directory) {
-
- File dir = new File(directory);
- if (!dir.isDirectory()) {
- boolean success = dir.mkdirs();
- if (!success) {
- System.out.println("Unable to create directory " + directory);
- return false;
- }
- System.out.println("Created directory " + directory);
- } else {
- System.out.println("Reusing directory " + directory);
- }
- return true;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/InputOutput.java b/gi/posterior-regularisation/prjava/src/util/InputOutput.java
deleted file mode 100644
index da7f71bf..00000000
--- a/gi/posterior-regularisation/prjava/src/util/InputOutput.java
+++ /dev/null
@@ -1,67 +0,0 @@
-package util;
-
-import java.io.BufferedReader;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.io.PrintStream;
-import java.io.UnsupportedEncodingException;
-import java.util.Properties;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
-
-public class InputOutput {
-
- /**
- * Opens a file either compress with gzip or not compressed.
- */
- public static BufferedReader openReader(String fileName) throws UnsupportedEncodingException, FileNotFoundException, IOException{
- System.out.println("Reading: " + fileName);
- BufferedReader reader;
- fileName = fileName.trim();
- if(fileName.endsWith("gz")){
- reader = new BufferedReader(
- new InputStreamReader(new GZIPInputStream(new FileInputStream(fileName)),"UTF8"));
- }else{
- reader = new BufferedReader(new InputStreamReader(
- new FileInputStream(fileName), "UTF8"));
- }
-
- return reader;
- }
-
-
- public static PrintStream openWriter(String fileName)
- throws UnsupportedEncodingException, FileNotFoundException, IOException{
- System.out.println("Writting to file: " + fileName);
- PrintStream writter;
- fileName = fileName.trim();
- if(fileName.endsWith("gz")){
- writter = new PrintStream(new GZIPOutputStream(new FileOutputStream(fileName)),
- true, "UTF-8");
-
- }else{
- writter = new PrintStream(new FileOutputStream(fileName),
- true, "UTF-8");
-
- }
-
- return writter;
- }
-
- public static Properties readPropertiesFile(String fileName) {
- Properties properties = new Properties();
- try {
- properties.load(new FileInputStream(fileName));
- } catch (IOException e) {
- e.printStackTrace();
- throw new AssertionError("Wrong properties file " + fileName);
- }
- System.out.println(properties.toString());
-
- return properties;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/LogSummer.java b/gi/posterior-regularisation/prjava/src/util/LogSummer.java
deleted file mode 100644
index 117393b9..00000000
--- a/gi/posterior-regularisation/prjava/src/util/LogSummer.java
+++ /dev/null
@@ -1,86 +0,0 @@
-package util;
-
-import java.lang.Math;
-
-/*
- * Math tool for computing logs of sums, when the terms of the sum are already in log form.
- * (Useful if the terms of the sum are very small numbers.)
- */
-public class LogSummer {
-
- private LogSummer() {
- }
-
- /**
- * Given log(a) and log(b), computes log(a + b).
- *
- * @param loga log of first sum term
- * @param logb log of second sum term
- * @return log(sum), where sum = a + b
- */
- public static double sum(double loga, double logb) {
- assert(!Double.isNaN(loga));
- assert(!Double.isNaN(logb));
-
- if(Double.isInfinite(loga))
- return logb;
- if(Double.isInfinite(logb))
- return loga;
-
- double maxLog;
- double difference;
- if(loga > logb) {
- difference = logb - loga;
- maxLog = loga;
- }
- else {
- difference = loga - logb;
- maxLog = logb;
- }
-
- return Math.log1p(Math.exp(difference)) + maxLog;
- }
-
- /**
- * Computes log(exp(array[index]) + b), and
- * modifies array[index] to contain this new value.
- *
- * @param array array to modify
- * @param index index at which to modify
- * @param logb log of the second sum term
- */
- public static void sum(double[] array, int index, double logb) {
- array[index] = sum(array[index], logb);
- }
-
- /**
- * Computes log(a + b + c + ...) from log(a), log(b), log(c), ...
- * by recursively splitting the input and delegating to the sum method.
- *
- * @param terms an array containing the log of all the terms for the sum
- * @return log(sum), where sum = exp(terms[0]) + exp(terms[1]) + ...
- */
- public static double sumAll(double... terms) {
- return sumAllHelper(terms, 0, terms.length);
- }
-
- /**
- * Computes log(a_0 + a_1 + ...) from a_0 = exp(terms[begin]),
- * a_1 = exp(terms[begin + 1]), ..., a_{end - 1 - begin} = exp(terms[end - 1]).
- *
- * @param terms an array containing the log of all the terms for the sum,
- * and possibly some other terms that will not go into the sum
- * @return log of the sum of the elements in the [begin, end) region of the terms array
- */
- private static double sumAllHelper(final double[] terms, final int begin, final int end) {
- int length = end - begin;
- switch(length) {
- case 0: return Double.NEGATIVE_INFINITY;
- case 1: return terms[begin];
- default:
- int midIndex = begin + length/2;
- return sum(sumAllHelper(terms, begin, midIndex), sumAllHelper(terms, midIndex, end));
- }
- }
-
-} \ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/util/MathUtil.java b/gi/posterior-regularisation/prjava/src/util/MathUtil.java
deleted file mode 100644
index 799b1faf..00000000
--- a/gi/posterior-regularisation/prjava/src/util/MathUtil.java
+++ /dev/null
@@ -1,148 +0,0 @@
-package util;
-
-import java.util.Random;
-
-public class MathUtil {
- public static final boolean closeToOne(double number){
- return Math.abs(number-1) < 1.E-10;
- }
-
- public static final boolean closeToZero(double number){
- return Math.abs(number) < 1.E-5;
- }
-
- /**
- * Return a ramdom multinominal distribution.
- *
- * @param size
- * @return
- */
- public static final double[] randomVector(int size, Random r){
- double[] random = new double[size];
- double sum=0;
- for(int i = 0; i < size; i++){
- double number = r.nextDouble();
- random[i] = number;
- sum+=number;
- }
- for(int i = 0; i < size; i++){
- random[i] = random[i]/sum;
- }
- return random;
- }
-
-
-
- public static double sum(double[] ds) {
- double res = 0;
- for (int i = 0; i < ds.length; i++) {
- res+=ds[i];
- }
- return res;
- }
-
- public static double max(double[] ds) {
- double res = Double.NEGATIVE_INFINITY;
- for (int i = 0; i < ds.length; i++) {
- res = Math.max(res, ds[i]);
- }
- return res;
- }
-
- public static double min(double[] ds) {
- double res = Double.POSITIVE_INFINITY;
- for (int i = 0; i < ds.length; i++) {
- res = Math.min(res, ds[i]);
- }
- return res;
- }
-
-
- public static double KLDistance(double[] p, double[] q) {
- int len = p.length;
- double kl = 0;
- for (int j = 0; j < len; j++) {
- if (p[j] == 0 || q[j] == 0) {
- continue;
- } else {
- kl += q[j] * Math.log(q[j] / p[j]);
- }
-
- }
- return kl;
- }
-
- public static double L2Distance(double[] p, double[] q) {
- int len = p.length;
- double l2 = 0;
- for (int j = 0; j < len; j++) {
- if (p[j] == 0 || q[j] == 0) {
- continue;
- } else {
- l2 += (q[j] - p[j])*(q[j] - p[j]);
- }
-
- }
- return Math.sqrt(l2);
- }
-
- public static double L1Distance(double[] p, double[] q) {
- int len = p.length;
- double l1 = 0;
- for (int j = 0; j < len; j++) {
- if (p[j] == 0 || q[j] == 0) {
- continue;
- } else {
- l1 += Math.abs(q[j] - p[j]);
- }
-
- }
- return l1;
- }
-
- public static double dot(double[] ds, double[] ds2) {
- double res = 0;
- for (int i = 0; i < ds2.length; i++) {
- res+= ds[i]*ds2[i];
- }
- return res;
- }
-
- public static double expDigamma(double number){
- return Math.exp(digamma(number));
- }
-
- public static double digamma(double number){
- if(number > 7){
- return digammApprox(number-0.5);
- }else{
- return digamma(number+1) - 1.0/number;
- }
- }
-
- private static double digammApprox(double value){
- return Math.log(value) + 0.04167*Math.pow(value, -2) - 0.00729*Math.pow(value, -4)
- + 0.00384*Math.pow(value, -6) - 0.00413*Math.pow(value, -8);
- }
-
- public static double eulerGamma = 0.57721566490152386060651209008240243;
- // FIXME -- so far just the initialization from Minka's paper "Estimating a Dirichlet distribution".
- public static double invDigamma(double y) {
- if (y>= -2.22) return Math.exp(y)+0.5;
- return -1.0/(y+eulerGamma);
- }
-
-
-
- public static void main(String[] args) {
- for(double i = 0; i < 10 ; i+=0.1){
- System.out.println(i+"\t"+expDigamma(i)+"\t"+(i-0.5));
- }
-// double gammaValue = (expDigamma(3)/expDigamma(10) + expDigamma(3)/expDigamma(10) + expDigamma(4)/expDigamma(10));
-// double normalValue = 3/10+3/4+10/10;
-// System.out.println("Gamma " + gammaValue + " normal " + normalValue);
- }
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/Matrix.java b/gi/posterior-regularisation/prjava/src/util/Matrix.java
deleted file mode 100644
index 8fb6d911..00000000
--- a/gi/posterior-regularisation/prjava/src/util/Matrix.java
+++ /dev/null
@@ -1,16 +0,0 @@
-package util;
-
-public class Matrix {
- int x;
- int y;
- double[][] values;
-
- public Matrix(int x, int y){
- this.x = x;
- this.y=y;
- values = new double[x][y];
- }
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/MemoryTracker.java b/gi/posterior-regularisation/prjava/src/util/MemoryTracker.java
deleted file mode 100644
index 83a65611..00000000
--- a/gi/posterior-regularisation/prjava/src/util/MemoryTracker.java
+++ /dev/null
@@ -1,47 +0,0 @@
-package util;
-
-
-public class MemoryTracker {
-
- double initM,finalM;
- boolean start = false,finish = false;
-
- public MemoryTracker(){
-
- }
-
- public void start(){
- System.gc();
- System.gc();
- System.gc();
- initM = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/(1024*1024);
- start = true;
- }
-
- public void finish(){
- if(!start){
- throw new RuntimeException("Canot stop before starting");
- }
- System.gc();
- System.gc();
- System.gc();
- finalM = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/(1024*1024);
- finish = true;
- }
-
- public String print(){
- if(!finish){
- throw new RuntimeException("Canot print before stopping");
- }
- return "Used: " + (finalM - initM) + "MB";
- }
-
- public void clear(){
- initM = 0;
- finalM = 0;
- finish = false;
- start = false;
- }
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/Pair.java b/gi/posterior-regularisation/prjava/src/util/Pair.java
deleted file mode 100644
index 7b1f108d..00000000
--- a/gi/posterior-regularisation/prjava/src/util/Pair.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package util;
-
-public class Pair<O1, O2> {
- public O1 _first;
- public O2 _second;
-
- public final O1 first() {
- return _first;
- }
-
- public final O2 second() {
- return _second;
- }
-
- public final void setFirst(O1 value){
- _first = value;
- }
-
- public final void setSecond(O2 value){
- _second = value;
- }
-
- public Pair(O1 first, O2 second) {
- _first = first;
- _second = second;
- }
-
- public String toString(){
- return _first + " " + _second;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/Printing.java b/gi/posterior-regularisation/prjava/src/util/Printing.java
deleted file mode 100644
index 14fcbe91..00000000
--- a/gi/posterior-regularisation/prjava/src/util/Printing.java
+++ /dev/null
@@ -1,158 +0,0 @@
-package util;
-
-public class Printing {
- static java.text.DecimalFormat fmt = new java.text.DecimalFormat();
-
- public static String padWithSpace(String s, int len){
- StringBuffer sb = new StringBuffer();
- while(sb.length() +s.length() < len){
- sb.append(" ");
- }
- sb.append(s);
- return sb.toString();
- }
-
- public static String prettyPrint(double d, String patt, int len) {
- fmt.applyPattern(patt);
- String s = fmt.format(d);
- while (s.length() < len) {
- s = " " + s;
- }
- return s;
- }
-
- public static String formatTime(long duration) {
- StringBuilder sb = new StringBuilder();
- double d = duration / 1000;
- fmt.applyPattern("00");
- sb.append(fmt.format((int) (d / (60 * 60))) + ":");
- d -= ((int) d / (60 * 60)) * 60 * 60;
- sb.append(fmt.format((int) (d / 60)) + ":");
- d -= ((int) d / 60) * 60;
- fmt.applyPattern("00.0");
- sb.append(fmt.format(d));
- return sb.toString();
- }
-
-
- public static String doubleArrayToString(double[] array, String[] labels, String arrayName) {
- StringBuffer res = new StringBuffer();
- res.append(arrayName);
- res.append("\n");
- for (int i = 0; i < array.length; i++) {
- if (labels == null){
- res.append(i+" \t");
- }else{
- res.append(labels[i]+ "\t");
- }
- }
- res.append("sum\n");
- double sum = 0;
- for (int i = 0; i < array.length; i++) {
- res.append(prettyPrint(array[i],
- "0.00000E00", 8) + "\t");
- sum+=array[i];
- }
- res.append(prettyPrint(sum,
- "0.00000E00", 8)+"\n");
- return res.toString();
- }
-
-
-
- public static void printDoubleArray(double[] array, String labels[], String arrayName) {
- System.out.println(doubleArrayToString(array, labels,arrayName));
- }
-
-
- public static String doubleArrayToString(double[][] array, String[] labels1, String[] labels2,
- String arrayName){
- StringBuffer res = new StringBuffer();
- res.append(arrayName);
- res.append("\n\t");
- //Calculates the column sum to keeps the sums
- double[] sums = new double[array[0].length+1];
- //Prints rows headings
- for (int i = 0; i < array[0].length; i++) {
- if (labels1 == null){
- res.append(i+" \t");
- }else{
- res.append(labels1[i]+" \t");
- }
- }
- res.append("sum\n");
- double sum = 0;
- //For each row print heading
- for (int i = 0; i < array.length; i++) {
- if (labels2 == null){
- res.append(i+"\t");
- }else{
- res.append(labels2[i]+"\t");
- }
- //Print values for that row
- for (int j = 0; j < array[0].length; j++) {
- res.append(" " + prettyPrint(array[i][j],
- "0.00000E00", 8) + "\t");
- sums[j] += array[i][j];
- sum+=array[i][j]; //Sum all values of that row
- }
- //Print row sum
- res.append(prettyPrint(sum,"0.00000E00", 8)+"\n");
- sums[array[0].length]+=sum;
- sum=0;
- }
- res.append("sum\t");
- //Print values for colums sum
- for (int i = 0; i < array[0].length+1; i++) {
- res.append(prettyPrint(sums[i],"0.00000E00", 8)+"\t");
- }
- res.append("\n");
- return res.toString();
- }
-
- public static void printDoubleArray(double[][] array, String[] labels1, String[] labels2
- , String arrayName) {
- System.out.println(doubleArrayToString(array, labels1,labels2,arrayName));
- }
-
-
- public static void printIntArray(int[][] array, String[] labels1, String[] labels2, String arrayName,
- int size1, int size2) {
- System.out.println(arrayName);
- for (int i = 0; i < size1; i++) {
- for (int j = 0; j < size2; j++) {
- System.out.print(" " + array[i][j] + " ");
-
- }
- System.out.println();
- }
- System.out.println();
- }
-
- public static String intArrayToString(int[] array, String[] labels, String arrayName) {
- StringBuffer res = new StringBuffer();
- res.append(arrayName);
- for (int i = 0; i < array.length; i++) {
- res.append(" " + array[i] + " ");
-
- }
- res.append("\n");
- return res.toString();
- }
-
- public static void printIntArray(int[] array, String[] labels, String arrayName) {
- System.out.println(intArrayToString(array, labels,arrayName));
- }
-
- public static String toString(double[][] d){
- StringBuffer sb = new StringBuffer();
- for (int i = 0; i < d.length; i++) {
- for (int j = 0; j < d[0].length; j++) {
- sb.append(prettyPrint(d[i][j], "0.00E0", 10));
- }
- sb.append("\n");
- }
- return sb.toString();
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/Sorters.java b/gi/posterior-regularisation/prjava/src/util/Sorters.java
deleted file mode 100644
index 836444e5..00000000
--- a/gi/posterior-regularisation/prjava/src/util/Sorters.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package util;
-
-import java.util.Comparator;
-
-public class Sorters {
- public static class sortWordsCounts implements Comparator{
-
- /**
- * Sorter for a pair of word id, counts. Sort ascending by counts
- */
- public int compare(Object arg0, Object arg1) {
- Pair<Integer,Integer> p1 = (Pair<Integer,Integer>)arg0;
- Pair<Integer,Integer> p2 = (Pair<Integer,Integer>)arg1;
- if(p1.second() > p2.second()){
- return 1;
- }else{
- return -1;
- }
- }
-
- }
-
-public static class sortWordsDouble implements Comparator{
-
- /**
- * Sorter for a pair of word id, counts. Sort by counts
- */
- public int compare(Object arg0, Object arg1) {
- Pair<Integer,Double> p1 = (Pair<Integer,Double>)arg0;
- Pair<Integer,Double> p2 = (Pair<Integer,Double>)arg1;
- if(p1.second() < p2.second()){
- return 1;
- }else{
- return -1;
- }
- }
-
- }
-}
diff --git a/gi/posterior-regularisation/prjava/train-PR-cluster.sh b/gi/posterior-regularisation/prjava/train-PR-cluster.sh
deleted file mode 100755
index 67552c00..00000000
--- a/gi/posterior-regularisation/prjava/train-PR-cluster.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-
-d=`dirname $0`
-java -ea -Xmx30g -cp $d/prjava.jar:$d/lib/trove-2.0.2.jar:$d/lib/optimization.jar:$d/lib/jopt-simple-3.2.jar:$d/lib/commons-math-2.1.jar phrase.Trainer $*
diff --git a/gi/posterior-regularisation/projected_gradient.cc b/gi/posterior-regularisation/projected_gradient.cc
deleted file mode 100644
index f7c39817..00000000
--- a/gi/posterior-regularisation/projected_gradient.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-//
-// Minimises given functional using the projected gradient method. Based on
-// algorithm and demonstration example in Linear and Nonlinear Programming,
-// Luenberger and Ye, 3rd ed., p 370.
-//
-
-#include "invert.hh"
-#include <iostream>
-
-using namespace std;
-
-double
-f(double x1, double x2, double x3, double x4)
-{
- return x1 * x1 + x2 * x2 + x3 * x3 + x4 * x4 - 2 * x1 - 3 * x4;
-}
-
-ublas::vector<double>
-g(double x1, double x2, double x3, double x4)
-{
- ublas::vector<double> v(4);
- v(0) = 2 * x1 - 2;
- v(1) = 2 * x2;
- v(2) = 2 * x3;
- v(3) = 2 * x4 - 3;
- return v;
-}
-
-ublas::matrix<double>
-activeConstraints(double x1, double x2, double x3, double x4)
-{
- int n = 2;
- if (x1 == 0) ++n;
- if (x2 == 0) ++n;
- if (x3 == 0) ++n;
- if (x4 == 0) ++n;
-
- ublas::matrix<double> a(n,4);
- a(0, 0) = 2; a(0, 1) = 1; a(0, 2) = 1; a(0, 3) = 4;
- a(1, 0) = 1; a(1, 1) = 1; a(1, 2) = 2; a(1, 3) = 1;
-
- int c = 2;
- if (x1 == 0) a(c++, 0) = 1;
- if (x2 == 0) a(c++, 1) = 1;
- if (x3 == 0) a(c++, 2) = 1;
- if (x4 == 0) a(c++, 3) = 1;
-
- return a;
-}
-
-ublas::matrix<double>
-projection(const ublas::matrix<double> &a)
-{
- ublas::matrix<double> aT = ublas::trans(a);
- ublas::matrix<double> inv(a.size1(), a.size1());
- bool ok = invert_matrix(ublas::matrix<double>(ublas::prod(a, aT)), inv);
- assert(ok && "Failed to invert matrix");
- return ublas::identity_matrix<double>(4) -
- ublas::prod(aT, ublas::matrix<double>(ublas::prod(inv, a)));
-}
-
-int main(int argc, char *argv[])
-{
- double x1 = 2, x2 = 2, x3 = 1, x4 = 0;
-
- double fval = f(x1, x2, x3, x4);
- cout << "f = " << fval << endl;
- ublas::vector<double> grad = g(x1, x2, x3, x4);
- cout << "g = " << grad << endl;
- ublas::matrix<double> A = activeConstraints(x1, x2, x3, x4);
- cout << "A = " << A << endl;
- ublas::matrix<double> P = projection(A);
- cout << "P = " << P << endl;
- // the direction of movement
- ublas::vector<double> d = prod(P, grad);
- cout << "d = " << (d / d(0)) << endl;
-
- // special case for d = 0
-
- // next solve for limits on the line search
-
- // then use golden rule technique between these values (if bounded)
-
- // or simple Armijo's rule technique
-
- return 0;
-}
diff --git a/gi/posterior-regularisation/simplex_pg.py b/gi/posterior-regularisation/simplex_pg.py
deleted file mode 100644
index 5da796d3..00000000
--- a/gi/posterior-regularisation/simplex_pg.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#
-# Following Leunberger and Ye, Linear and Nonlinear Progamming, 3rd ed. p367
-# "The gradient projection method"
-# applied to an equality constraint for a simplex.
-#
-# min f(x)
-# s.t. x >= 0, sum_i x = d
-#
-# FIXME: enforce the positivity constraint - a limit on the line search?
-#
-
-from numpy import *
-from scipy import *
-from linesearch import line_search
-# local copy of scipy's Amijo line_search - wasn't enforcing alpha max correctly
-import sys
-
-dims = 4
-
-def f(x):
- fv = x[0]*x[0] + x[1]*x[1] + x[2]*x[2] + x[3]*x[3] - 2*x[0] - 3*x[3]
- # print 'evaluating f at', x, 'value', fv
- return fv
-
-def g(x):
- return array([2*x[0] - 2, 2*x[1], 2*x[2], 2*x[3]-3])
-
-def pg(x):
- gv = g(x)
- return gv - sum(gv) / dims
-
-x = ones(dims) / dims
-old_fval = None
-
-while True:
- fv = f(x)
- gv = g(x)
- dv = pg(x)
-
- print 'x', x, 'f', fv, 'g', gv, 'd', dv
-
- if old_fval == None:
- old_fval = fv + 0.1
-
- # solve for maximum step size i.e. when positivity constraints kick in
- # x - alpha d = 0 => alpha = x/d
- amax = max(x/dv)
- if amax < 1e-8: break
-
- stuff = line_search(f, pg, x, -dv, dv, fv, old_fval, amax=amax)
- alpha = stuff[0] # Nb. can avoid next evaluation of f,g,d using 'stuff'
- if alpha < 1e-8: break
- x -= alpha * dv
-
- old_fval = fv
diff --git a/gi/posterior-regularisation/split-languages.py b/gi/posterior-regularisation/split-languages.py
deleted file mode 100755
index 206da661..00000000
--- a/gi/posterior-regularisation/split-languages.py
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/python
-
-import sys
-
-sout = open(sys.argv[1], 'w')
-tout = open(sys.argv[2], 'w')
-for line in sys.stdin:
- phrase, contexts = line.rstrip().split('\t')
- sp, tp = phrase.split(' <SPLIT> ')
- sout.write('%s\t' % sp)
- tout.write('%s\t' % tp)
- parts = contexts.split(' ||| ')
- for i in range(0, len(parts), 2):
- sc, tc = parts[i].split(' <SPLIT> ')
- if i != 0:
- sout.write(' ||| ')
- tout.write(' ||| ')
- sout.write('%s ||| %s' % (sc, parts[i+1]))
- tout.write('%s ||| %s' % (tc, parts[i+1]))
- sout.write('\n')
- tout.write('\n')
-sout.close()
-tout.close()
diff --git a/gi/posterior-regularisation/train_pr_agree.py b/gi/posterior-regularisation/train_pr_agree.py
deleted file mode 100644
index 9d41362d..00000000
--- a/gi/posterior-regularisation/train_pr_agree.py
+++ /dev/null
@@ -1,400 +0,0 @@
-import sys
-import scipy.optimize
-from scipy.stats import geom
-from numpy import *
-from numpy.random import random, seed
-
-style = sys.argv[1]
-if len(sys.argv) >= 3:
- seed(int(sys.argv[2]))
-
-#
-# Step 1: load the concordance counts
-#
-
-edges = []
-word_types = {}
-phrase_types = {}
-context_types = {}
-
-for line in sys.stdin:
- phrase, rest = line.strip().split('\t')
- ptoks = tuple(map(lambda t: word_types.setdefault(t, len(word_types)), phrase.split()))
- pid = phrase_types.setdefault(ptoks, len(phrase_types))
-
- parts = rest.split('|||')
- for i in range(0, len(parts), 2):
- context, count = parts[i:i+2]
-
- ctx = filter(lambda x: x != '<PHRASE>', context.split())
- ctoks = tuple(map(lambda t: word_types.setdefault(t, len(word_types)), ctx))
- cid = context_types.setdefault(ctoks, len(context_types))
-
- cnt = int(count.strip()[2:])
- edges.append((pid, cid, cnt))
-
-word_type_list = [None] * len(word_types)
-for typ, index in word_types.items():
- word_type_list[index] = typ
-
-phrase_type_list = [None] * len(phrase_types)
-for typ, index in phrase_types.items():
- phrase_type_list[index] = typ
-
-context_type_list = [None] * len(context_types)
-for typ, index in context_types.items():
- context_type_list[index] = typ
-
-num_tags = 5
-num_types = len(word_types)
-num_phrases = len(phrase_types)
-num_contexts = len(context_types)
-num_edges = len(edges)
-
-print 'Read in', num_edges, 'edges', num_phrases, 'phrases', num_contexts, 'contexts and', num_types, 'word types'
-
-#
-# Step 2: expectation maximisation
-#
-
-def normalise(a):
- return a / float(sum(a))
-
-class PhraseToContextModel:
- def __init__(self):
- # Pr(tag | phrase)
- self.tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)]
- # Pr(context at pos i = w | tag) indexed by i, tag, word
- self.contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)]
-
- def prob(self, pid, cid):
- # return distribution p(tag, context | phrase) as vector of length |tags|
- context = context_type_list[cid]
- dist = zeros(num_tags)
- for t in range(num_tags):
- prob = self.tagDist[pid][t]
- for k, tokid in enumerate(context):
- prob *= self.contextWordDist[k][t][tokid]
- dist[t] = prob
- return dist
-
- def expectation_maximisation_step(self, lamba=None):
- tagCounts = zeros((num_phrases, num_tags))
- contextWordCounts = zeros((4, num_tags, num_types))
-
- # E-step
- llh = 0
- for pid, cid, cnt in edges:
- q = self.prob(pid, cid)
- z = sum(q)
- q /= z
- llh += log(z)
- context = context_type_list[cid]
- if lamba != None:
- q *= exp(lamba)
- q /= sum(q)
- for t in range(num_tags):
- tagCounts[pid][t] += cnt * q[t]
- for i in range(4):
- for t in range(num_tags):
- contextWordCounts[i][t][context[i]] += cnt * q[t]
-
- # M-step
- for p in range(num_phrases):
- self.tagDist[p] = normalise(tagCounts[p])
- for i in range(4):
- for t in range(num_tags):
- self.contextWordDist[i][t] = normalise(contextWordCounts[i,t])
-
- return llh
-
-class ContextToPhraseModel:
- def __init__(self):
- # Pr(tag | context) = Multinomial
- self.tagDist = [normalise(random(num_tags)+1) for p in range(num_contexts)]
- # Pr(phrase = w | tag) = Multinomial
- self.phraseSingleDist = [normalise(random(num_types)+1) for t in range(num_tags)]
- # Pr(phrase_1 = w | tag) = Multinomial
- self.phraseLeftDist = [normalise(random(num_types)+1) for t in range(num_tags)]
- # Pr(phrase_-1 = w | tag) = Multinomial
- self.phraseRightDist = [normalise(random(num_types)+1) for t in range(num_tags)]
- # Pr(|phrase| = l | tag) = Geometric
- self.phraseLengthDist = [0.5] * num_tags
- # n.b. internal words for phrases of length >= 3 are drawn from uniform distribution
-
- def prob(self, pid, cid):
- # return distribution p(tag, phrase | context) as vector of length |tags|
- phrase = phrase_type_list[pid]
- dist = zeros(num_tags)
- for t in range(num_tags):
- prob = self.tagDist[cid][t]
- f = self.phraseLengthDist[t]
- prob *= geom.pmf(len(phrase), f)
- if len(phrase) == 1:
- prob *= self.phraseSingleDist[t][phrase[0]]
- else:
- prob *= self.phraseLeftDist[t][phrase[0]]
- prob *= self.phraseRightDist[t][phrase[-1]]
- dist[t] = prob
- return dist
-
- def expectation_maximisation_step(self, lamba=None):
- tagCounts = zeros((num_contexts, num_tags))
- phraseSingleCounts = zeros((num_tags, num_types))
- phraseLeftCounts = zeros((num_tags, num_types))
- phraseRightCounts = zeros((num_tags, num_types))
- phraseLength = zeros(num_types)
-
- # E-step
- llh = 0
- for pid, cid, cnt in edges:
- q = self.prob(pid, cid)
- z = sum(q)
- q /= z
- llh += log(z)
- if lamba != None:
- q *= exp(lamba)
- q /= sum(q)
- #print 'p', phrase_type_list[pid], 'c', context_type_list[cid], 'q', q
- phrase = phrase_type_list[pid]
- for t in range(num_tags):
- tagCounts[cid][t] += cnt * q[t]
- phraseLength[t] += cnt * len(phrase) * q[t]
- if len(phrase) == 1:
- phraseSingleCounts[t][phrase[0]] += cnt * q[t]
- else:
- phraseLeftCounts[t][phrase[0]] += cnt * q[t]
- phraseRightCounts[t][phrase[-1]] += cnt * q[t]
-
- # M-step
- for t in range(num_tags):
- self.phraseLengthDist[t] = min(max(sum(tagCounts[:,t]) / phraseLength[t], 1e-6), 1-1e-6)
- self.phraseSingleDist[t] = normalise(phraseSingleCounts[t])
- self.phraseLeftDist[t] = normalise(phraseLeftCounts[t])
- self.phraseRightDist[t] = normalise(phraseRightCounts[t])
- for c in range(num_contexts):
- self.tagDist[c] = normalise(tagCounts[c])
-
- #print 't', self.tagDist
- #print 'l', self.phraseLengthDist
- #print 's', self.phraseSingleDist
- #print 'L', self.phraseLeftDist
- #print 'R', self.phraseRightDist
-
- return llh
-
-class ProductModel:
- """
- WARNING: I haven't verified the maths behind this model. It's quite likely to be incorrect.
- """
-
- def __init__(self):
- self.pcm = PhraseToContextModel()
- self.cpm = ContextToPhraseModel()
-
- def prob(self, pid, cid):
- p1 = self.pcm.prob(pid, cid)
- p2 = self.cpm.prob(pid, cid)
- return (p1 / sum(p1)) * (p2 / sum(p2))
-
- def expectation_maximisation_step(self):
- tagCountsGivenPhrase = zeros((num_phrases, num_tags))
- contextWordCounts = zeros((4, num_tags, num_types))
-
- tagCountsGivenContext = zeros((num_contexts, num_tags))
- phraseSingleCounts = zeros((num_tags, num_types))
- phraseLeftCounts = zeros((num_tags, num_types))
- phraseRightCounts = zeros((num_tags, num_types))
- phraseLength = zeros(num_types)
-
- kl = llh1 = llh2 = 0
- for pid, cid, cnt in edges:
- p1 = self.pcm.prob(pid, cid)
- llh1 += log(sum(p1)) * cnt
- p2 = self.cpm.prob(pid, cid)
- llh2 += log(sum(p2)) * cnt
-
- q = (p1 / sum(p1)) * (p2 / sum(p2))
- kl += log(sum(q)) * cnt
- qi = sqrt(q)
- qi /= sum(qi)
-
- phrase = phrase_type_list[pid]
- context = context_type_list[cid]
- for t in range(num_tags):
- tagCountsGivenPhrase[pid][t] += cnt * qi[t]
- tagCountsGivenContext[cid][t] += cnt * qi[t]
- phraseLength[t] += cnt * len(phrase) * qi[t]
- if len(phrase) == 1:
- phraseSingleCounts[t][phrase[0]] += cnt * qi[t]
- else:
- phraseLeftCounts[t][phrase[0]] += cnt * qi[t]
- phraseRightCounts[t][phrase[-1]] += cnt * qi[t]
- for i in range(4):
- contextWordCounts[i][t][context[i]] += cnt * qi[t]
-
- kl *= -2
-
- for t in range(num_tags):
- for i in range(4):
- self.pcm.contextWordDist[i][t] = normalise(contextWordCounts[i,t])
- self.cpm.phraseLengthDist[t] = min(max(sum(tagCountsGivenContext[:,t]) / phraseLength[t], 1e-6), 1-1e-6)
- self.cpm.phraseSingleDist[t] = normalise(phraseSingleCounts[t])
- self.cpm.phraseLeftDist[t] = normalise(phraseLeftCounts[t])
- self.cpm.phraseRightDist[t] = normalise(phraseRightCounts[t])
- for p in range(num_phrases):
- self.pcm.tagDist[p] = normalise(tagCountsGivenPhrase[p])
- for c in range(num_contexts):
- self.cpm.tagDist[c] = normalise(tagCountsGivenContext[c])
-
- # return the overall objective
- return llh1 + llh2 + kl
-
-class RegularisedProductModel:
- # as above, but with a slack regularisation term which kills the
- # closed-form solution for the E-step
-
- def __init__(self, epsilon):
- self.pcm = PhraseToContextModel()
- self.cpm = ContextToPhraseModel()
- self.epsilon = epsilon
- self.lamba = zeros(num_tags)
-
- def prob(self, pid, cid):
- p1 = self.pcm.prob(pid, cid)
- p2 = self.cpm.prob(pid, cid)
- return (p1 / sum(p1)) * (p2 / sum(p2))
-
- def dual(self, lamba):
- return self.logz(lamba) + self.epsilon * dot(lamba, lamba) ** 0.5
-
- def dual_gradient(self, lamba):
- return self.expected_features(lamba) + self.epsilon * 2 * lamba
-
- def expectation_maximisation_step(self):
- # PR-step: optimise lambda to minimise log(z_lambda) + eps ||lambda||_2
- self.lamba = scipy.optimize.fmin_slsqp(self.dual, self.lamba,
- bounds=[(0, 1e100)] * num_tags,
- fprime=self.dual_gradient, iprint=1)
-
- # E,M-steps: collect expected counts under q_lambda and normalise
- llh1 = self.pcm.expectation_maximisation_step(self.lamba)
- llh2 = self.cpm.expectation_maximisation_step(-self.lamba)
-
- # return the overall objective: llh - KL(q||p1.p2)
- # llh = llh1 + llh2
- # kl = sum q log q / p1 p2 = sum q { lambda . phi } - log Z
- return llh1 + llh2 + self.logz(self.lamba) \
- - dot(self.lamba, self.expected_features(self.lamba))
-
- def logz(self, lamba):
- lz = 0
- for pid, cid, cnt in edges:
- p1 = self.pcm.prob(pid, cid)
- z1 = dot(p1 / sum(p1), exp(lamba))
- lz += log(z1) * cnt
-
- p2 = self.cpm.prob(pid, cid)
- z2 = dot(p2 / sum(p2), exp(-lamba))
- lz += log(z2) * cnt
- return lz
-
- def expected_features(self, lamba):
- fs = zeros(num_tags)
- for pid, cid, cnt in edges:
- p1 = self.pcm.prob(pid, cid)
- q1 = (p1 / sum(p1)) * exp(lamba)
- fs += cnt * q1 / sum(q1)
-
- p2 = self.cpm.prob(pid, cid)
- q2 = (p2 / sum(p2)) * exp(-lamba)
- fs -= cnt * q2 / sum(q2)
- return fs
-
-
-class InterpolatedModel:
- def __init__(self, epsilon):
- self.pcm = PhraseToContextModel()
- self.cpm = ContextToPhraseModel()
- self.epsilon = epsilon
- self.lamba = zeros(num_tags)
-
- def prob(self, pid, cid):
- p1 = self.pcm.prob(pid, cid)
- p2 = self.cpm.prob(pid, cid)
- return (p1 + p2) / 2
-
- def dual(self, lamba):
- return self.logz(lamba) + self.epsilon * dot(lamba, lamba) ** 0.5
-
- def dual_gradient(self, lamba):
- return self.expected_features(lamba) + self.epsilon * 2 * lamba
-
- def expectation_maximisation_step(self):
- # PR-step: optimise lambda to minimise log(z_lambda) + eps ||lambda||_2
- self.lamba = scipy.optimize.fmin_slsqp(self.dual, self.lamba,
- bounds=[(0, 1e100)] * num_tags,
- fprime=self.dual_gradient, iprint=2)
-
- # E,M-steps: collect expected counts under q_lambda and normalise
- llh1 = self.pcm.expectation_maximisation_step(self.lamba)
- llh2 = self.cpm.expectation_maximisation_step(self.lamba)
-
- # return the overall objective: llh1 + llh2 - KL(q||p1.p2)
- # kl = sum_y q log q / 0.5 * (p1 + p2) = sum_y q(y) { -lambda . phi(y) } - log Z
- # = -log Z + lambda . (E_q1[-phi] + E_q2[-phi]) / 2
- kl = -self.logz(self.lamba) + dot(self.lamba, self.expected_features(self.lamba))
- return llh1 + llh2 - kl, llh1, llh2, kl
- # FIXME: KL comes out negative...
-
- def logz(self, lamba):
- lz = 0
- for pid, cid, cnt in edges:
- p1 = self.pcm.prob(pid, cid)
- q1 = p1 / sum(p1) * exp(-lamba)
- q1z = sum(q1)
-
- p2 = self.cpm.prob(pid, cid)
- q2 = p2 / sum(p2) * exp(-lamba)
- q2z = sum(q2)
-
- lz += log(0.5 * (q1z + q2z)) * cnt
- return lz
-
- # z = 1/2 * (sum_y p1(y|x) exp (-lambda . phi(y)) + sum_y p2(y|x) exp (-lambda . phi(y)))
- # = 1/2 (z1 + z2)
- # d (log z) / dlambda = 1/2 (E_q1 [ -phi ] + E_q2 [ -phi ] )
- def expected_features(self, lamba):
- fs = zeros(num_tags)
- for pid, cid, cnt in edges:
- p1 = self.pcm.prob(pid, cid)
- q1 = (p1 / sum(p1)) * exp(-lamba)
- fs -= 0.5 * cnt * q1 / sum(q1)
-
- p2 = self.cpm.prob(pid, cid)
- q2 = (p2 / sum(p2)) * exp(-lamba)
- fs -= 0.5 * cnt * q2 / sum(q2)
- return fs
-
-if style == 'p2c':
- m = PhraseToContextModel()
-elif style == 'c2p':
- m = ContextToPhraseModel()
-elif style == 'prod':
- m = ProductModel()
-elif style == 'prodslack':
- m = RegularisedProductModel(0.5)
-elif style == 'sum':
- m = InterpolatedModel(0.5)
-
-for iteration in range(30):
- obj = m.expectation_maximisation_step()
- print 'iteration', iteration, 'objective', obj
-
-for pid, cid, cnt in edges:
- p = m.prob(pid, cid)
- phrase = phrase_type_list[pid]
- phrase_str = ' '.join(map(word_type_list.__getitem__, phrase))
- context = context_type_list[cid]
- context_str = ' '.join(map(word_type_list.__getitem__, context))
- print '%s\t%s ||| C=%d' % (phrase_str, context_str, argmax(p))
diff --git a/gi/posterior-regularisation/train_pr_global.py b/gi/posterior-regularisation/train_pr_global.py
deleted file mode 100644
index 8521bccb..00000000
--- a/gi/posterior-regularisation/train_pr_global.py
+++ /dev/null
@@ -1,296 +0,0 @@
-import sys
-import scipy.optimize
-from numpy import *
-from numpy.random import random
-
-#
-# Step 1: load the concordance counts
-#
-
-edges_phrase_to_context = []
-edges_context_to_phrase = []
-types = {}
-context_types = {}
-num_edges = 0
-
-for line in sys.stdin:
- phrase, rest = line.strip().split('\t')
- parts = rest.split('|||')
- edges_phrase_to_context.append((phrase, []))
- for i in range(0, len(parts), 2):
- context, count = parts[i:i+2]
-
- ctx = tuple(filter(lambda x: x != '<PHRASE>', context.split()))
- cnt = int(count.strip()[2:])
- edges_phrase_to_context[-1][1].append((ctx, cnt))
-
- cid = context_types.get(ctx, len(context_types))
- if cid == len(context_types):
- context_types[ctx] = cid
- edges_context_to_phrase.append((ctx, []))
- edges_context_to_phrase[cid][1].append((phrase, cnt))
-
- for token in ctx:
- types.setdefault(token, len(types))
- for token in phrase.split():
- types.setdefault(token, len(types))
-
- num_edges += 1
-
-print 'Read in', num_edges, 'edges and', len(types), 'word types'
-
-print 'edges_phrase_to_context', edges_phrase_to_context
-
-#
-# Step 2: initialise the model parameters
-#
-
-num_tags = 10
-num_types = len(types)
-num_phrases = len(edges_phrase_to_context)
-num_contexts = len(edges_context_to_phrase)
-delta = int(sys.argv[1])
-gamma = int(sys.argv[2])
-
-def normalise(a):
- return a / float(sum(a))
-
-# Pr(tag | phrase)
-tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)]
-#tagDist = [normalise(array(range(1,num_tags+1))) for p in range(num_phrases)]
-# Pr(context at pos i = w | tag) indexed by i, tag, word
-#contextWordDist = [[normalise(array(range(1,num_types+1))) for t in range(num_tags)] for i in range(4)]
-contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)]
-# PR langrange multipliers
-lamba = zeros(2 * num_edges * num_tags)
-omega_offset = num_edges * num_tags
-lamba_index = {}
-next = 0
-for phrase, ccs in edges_phrase_to_context:
- for context, count in ccs:
- lamba_index[phrase,context] = next
- next += num_tags
-#print lamba_index
-
-#
-# Step 3: expectation maximisation
-#
-
-for iteration in range(20):
- tagCounts = [zeros(num_tags) for p in range(num_phrases)]
- contextWordCounts = [[zeros(num_types) for t in range(num_tags)] for i in range(4)]
-
- #print 'tagDist', tagDist
- #print 'contextWordCounts[0][0]', contextWordCounts[0][0]
-
- # Tune lambda
- # dual: min log Z(lamba) s.t. lamba >= 0;
- # sum_c lamba_pct <= delta; sum_p lamba_pct <= gamma
- def dual(ls):
- logz = 0
- for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
- for context, count in ccs:
- conditionals = zeros(num_tags)
- for t in range(num_tags):
- prob = tagDist[p][t]
- for i in range(4):
- prob *= contextWordDist[i][t][types[context[i]]]
- conditionals[t] = prob
- cz = sum(conditionals)
- conditionals /= cz
-
- #print 'dual', phrase, context, count, 'p =', conditionals
-
- local_z = 0
- for t in range(num_tags):
- li = lamba_index[phrase,context] + t
- local_z += conditionals[t] * exp(-ls[li] - ls[omega_offset+li])
- logz += log(local_z) * count
-
- #print 'ls', ls
- #print 'lambda', list(ls)
- #print 'dual', logz
- return logz
-
- def loglikelihood():
- llh = 0
- for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
- for context, count in ccs:
- conditionals = zeros(num_tags)
- for t in range(num_tags):
- prob = tagDist[p][t]
- for i in range(4):
- prob *= contextWordDist[i][t][types[context[i]]]
- conditionals[t] = prob
- cz = sum(conditionals)
- llh += log(cz) * count
- return llh
-
- def primal(ls):
- # FIXME: returns negative values for KL (impossible)
- logz = dual(ls)
- expectations = -dual_deriv(ls)
- kl = -logz - dot(ls, expectations)
- llh = loglikelihood()
-
- pt_l1linf = 0
- for phrase, ccs in edges_phrase_to_context:
- for t in range(num_tags):
- best = -1e500
- for context, count in ccs:
- li = lamba_index[phrase,context] + t
- s = expectations[li]
- if s > best: best = s
- pt_l1linf += best
-
- ct_l1linf = 0
- for context, pcs in edges_context_to_phrase:
- for t in range(num_tags):
- best = -1e500
- for phrase, count in pcs:
- li = omega_offset + lamba_index[phrase,context] + t
- s = expectations[li]
- if s > best: best = s
- ct_l1linf += best
-
- return llh, kl, pt_l1linf, ct_l1linf, llh - kl - delta * pt_l1linf - gamma * ct_l1linf
-
- def dual_deriv(ls):
- # d/dl log(z) = E_q[phi]
- deriv = zeros(2 * num_edges * num_tags)
- for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
- for context, count in ccs:
- conditionals = zeros(num_tags)
- for t in range(num_tags):
- prob = tagDist[p][t]
- for i in range(4):
- prob *= contextWordDist[i][t][types[context[i]]]
- conditionals[t] = prob
- cz = sum(conditionals)
- conditionals /= cz
-
- scores = zeros(num_tags)
- for t in range(num_tags):
- li = lamba_index[phrase,context] + t
- scores[t] = conditionals[t] * exp(-ls[li] - ls[omega_offset + li])
- local_z = sum(scores)
-
- #print 'ddual', phrase, context, count, 'q =', scores / local_z
-
- for t in range(num_tags):
- deriv[lamba_index[phrase,context] + t] -= count * scores[t] / local_z
- deriv[omega_offset + lamba_index[phrase,context] + t] -= count * scores[t] / local_z
-
- #print 'ddual', list(deriv)
- return deriv
-
- def constraints(ls):
- cons = zeros(num_phrases * num_tags + num_edges * num_tags)
-
- index = 0
- for phrase, ccs in edges_phrase_to_context:
- for t in range(num_tags):
- if delta > 0:
- total = delta
- for cprime, count in ccs:
- total -= ls[lamba_index[phrase, cprime] + t]
- cons[index] = total
- index += 1
-
- for context, pcs in edges_context_to_phrase:
- for t in range(num_tags):
- if gamma > 0:
- total = gamma
- for pprime, count in pcs:
- total -= ls[omega_offset + lamba_index[pprime, context] + t]
- cons[index] = total
- index += 1
-
- #print 'cons', cons
- return cons
-
- def constraints_deriv(ls):
- cons = zeros((num_phrases * num_tags + num_edges * num_tags, 2 * num_edges * num_tags))
-
- index = 0
- for phrase, ccs in edges_phrase_to_context:
- for t in range(num_tags):
- if delta > 0:
- d = cons[index,:]#zeros(num_edges * num_tags)
- for cprime, count in ccs:
- d[lamba_index[phrase, cprime] + t] = -1
- #cons[index] = d
- index += 1
-
- for context, pcs in edges_context_to_phrase:
- for t in range(num_tags):
- if gamma > 0:
- d = cons[index,:]#d = zeros(num_edges * num_tags)
- for pprime, count in pcs:
- d[omega_offset + lamba_index[pprime, context] + t] = -1
- #cons[index] = d
- index += 1
- #print 'dcons', cons
- return cons
-
- print 'Pre lambda optimisation dual', dual(lamba), 'primal', primal(lamba)
- #print 'lambda', lamba, lamba.shape
- #print 'bounds', [(0, max(delta, gamma))] * (2 * num_edges * num_tags)
-
- lamba = scipy.optimize.fmin_slsqp(dual, lamba,
- bounds=[(0, max(delta, gamma))] * (2 * num_edges * num_tags),
- f_ieqcons=constraints,
- fprime=dual_deriv,
- fprime_ieqcons=constraints_deriv,
- iprint=0)
- print 'Post lambda optimisation dual', dual(lamba), 'primal', primal(lamba)
-
- # E-step
- llh = log_z = 0
- for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
- for context, count in ccs:
- conditionals = zeros(num_tags)
- for t in range(num_tags):
- prob = tagDist[p][t]
- for i in range(4):
- prob *= contextWordDist[i][t][types[context[i]]]
- conditionals[t] = prob
- cz = sum(conditionals)
- conditionals /= cz
- llh += log(cz) * count
-
- q = zeros(num_tags)
- li = lamba_index[phrase, context]
- for t in range(num_tags):
- q[t] = conditionals[t] * exp(-lamba[li + t] - lamba[omega_offset + li + t])
- qz = sum(q)
- log_z += count * log(qz)
-
- for t in range(num_tags):
- tagCounts[p][t] += count * q[t] / qz
-
- for i in range(4):
- for t in range(num_tags):
- contextWordCounts[i][t][types[context[i]]] += count * q[t] / qz
-
- print 'iteration', iteration, 'llh', llh, 'logz', log_z
-
- # M-step
- for p in range(num_phrases):
- tagDist[p] = normalise(tagCounts[p])
- for i in range(4):
- for t in range(num_tags):
- contextWordDist[i][t] = normalise(contextWordCounts[i][t])
-
-for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
- for context, count in ccs:
- conditionals = zeros(num_tags)
- for t in range(num_tags):
- prob = tagDist[p][t]
- for i in range(4):
- prob *= contextWordDist[i][t][types[context[i]]]
- conditionals[t] = prob
- cz = sum(conditionals)
- conditionals /= cz
-
- print '%s\t%s ||| C=%d |||' % (phrase, context, argmax(conditionals)), conditionals
diff --git a/gi/posterior-regularisation/train_pr_parallel.py b/gi/posterior-regularisation/train_pr_parallel.py
deleted file mode 100644
index 3b9cefed..00000000
--- a/gi/posterior-regularisation/train_pr_parallel.py
+++ /dev/null
@@ -1,333 +0,0 @@
-import sys
-import scipy.optimize
-from numpy import *
-from numpy.random import random, seed
-
-#
-# Step 1: load the concordance counts
-#
-
-edges_phrase_to_context = []
-edges_context_to_phrase = []
-types = {}
-context_types = {}
-num_edges = 0
-
-for line in sys.stdin:
- phrase, rest = line.strip().split('\t')
- parts = rest.split('|||')
- edges_phrase_to_context.append((phrase, []))
- for i in range(0, len(parts), 2):
- context, count = parts[i:i+2]
-
- ctx = tuple(filter(lambda x: x != '<PHRASE>', context.split()))
- cnt = int(count.strip()[2:])
- edges_phrase_to_context[-1][1].append((ctx, cnt))
-
- cid = context_types.get(ctx, len(context_types))
- if cid == len(context_types):
- context_types[ctx] = cid
- edges_context_to_phrase.append((ctx, []))
- edges_context_to_phrase[cid][1].append((phrase, cnt))
-
- for token in ctx:
- types.setdefault(token, len(types))
- for token in phrase.split():
- types.setdefault(token, len(types))
-
- num_edges += 1
-
-#
-# Step 2: initialise the model parameters
-#
-
-num_tags = 25
-num_types = len(types)
-num_phrases = len(edges_phrase_to_context)
-num_contexts = len(edges_context_to_phrase)
-delta = float(sys.argv[1])
-assert sys.argv[2] in ('local', 'global')
-local = sys.argv[2] == 'local'
-if len(sys.argv) >= 2:
- seed(int(sys.argv[3]))
-
-print 'Read in', num_edges, 'edges', num_phrases, 'phrases', num_contexts, 'contexts and', len(types), 'word types'
-
-def normalise(a):
- return a / float(sum(a))
-
-# Pr(tag | phrase)
-tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)]
-# Pr(context at pos i = w | tag) indexed by i, tag, word
-contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)]
-
-#
-# Step 3: expectation maximisation
-#
-
-class GlobalDualObjective:
- """
- Objective, log(z), for all phrases s.t. lambda >= 0, sum_c lambda_pct <= scale
- """
-
- def __init__(self, scale):
- self.scale = scale
- self.posterior = zeros((num_edges, num_tags))
- self.q = zeros((num_edges, num_tags))
- self.llh = 0
-
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for context, count in edges:
- for t in range(num_tags):
- prob = tagDist[j][t]
- for k, token in enumerate(context):
- prob *= contextWordDist[k][t][types[token]]
- self.posterior[index,t] = prob
- z = sum(self.posterior[index,:])
- self.posterior[index,:] /= z
- self.llh += log(z) * count
- index += 1
-
- def objective(self, ls):
- ls = ls.reshape((num_edges, num_tags))
- logz = 0
-
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for context, count in edges:
- for t in range(num_tags):
- self.q[index,t] = self.posterior[index,t] * exp(-ls[index,t])
- local_z = sum(self.q[index,:])
- self.q[index,:] /= local_z
- logz += log(local_z) * count
- index += 1
-
- return logz
-
- # FIXME: recomputes q many more times than necessary
-
- def gradient(self, ls):
- ls = ls.reshape((num_edges, num_tags))
- gradient = zeros((num_edges, num_tags))
-
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for context, count in edges:
- for t in range(num_tags):
- self.q[index,t] = self.posterior[index,t] * exp(-ls[index,t])
- local_z = sum(self.q[index,:])
- self.q[index,:] /= local_z
- for t in range(num_tags):
- gradient[index,t] -= self.q[index,t] * count
- index += 1
-
- return gradient.ravel()
-
- def constraints(self, ls):
- ls = ls.reshape((num_edges, num_tags))
- cons = ones((num_phrases, num_tags)) * self.scale
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for i, (context, count) in enumerate(edges):
- for t in range(num_tags):
- cons[j,t] -= ls[index,t] * count
- index += 1
- return cons.ravel()
-
- def constraints_gradient(self, ls):
- ls = ls.reshape((num_edges, num_tags))
- gradient = zeros((num_phrases, num_tags, num_edges, num_tags))
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for i, (context, count) in enumerate(edges):
- for t in range(num_tags):
- gradient[j,t,index,t] -= count
- index += 1
- return gradient.reshape((num_phrases*num_tags, num_edges*num_tags))
-
- def optimize(self):
- ls = zeros(num_edges * num_tags)
- #print '\tpre lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba)
- ls = scipy.optimize.fmin_slsqp(self.objective, ls,
- bounds=[(0, self.scale)] * num_edges * num_tags,
- f_ieqcons=self.constraints,
- fprime=self.gradient,
- fprime_ieqcons=self.constraints_gradient,
- iprint=0) # =2 for verbose
- #print '\tpost lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba)
-
- # returns llh, kl and l1lmax contribution
- l1lmax = 0
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for t in range(num_tags):
- lmax = None
- for i, (context, count) in enumerate(edges):
- lmax = max(lmax, self.q[index+i,t])
- l1lmax += lmax
- index += len(edges)
-
- return self.llh, -self.objective(ls) + dot(ls, self.gradient(ls)), l1lmax
-
-class LocalDualObjective:
- """
- Local part of objective, log(z) relevant to lambda_p**.
- Optimised subject to lambda >= 0, sum_c lambda_pct <= scale forall t
- """
-
- def __init__(self, phraseId, scale):
- self.phraseId = phraseId
- self.scale = scale
- edges = edges_phrase_to_context[self.phraseId][1]
- self.posterior = zeros((len(edges), num_tags))
- self.q = zeros((len(edges), num_tags))
- self.llh = 0
-
- for i, (context, count) in enumerate(edges):
- for t in range(num_tags):
- prob = tagDist[phraseId][t]
- for j, token in enumerate(context):
- prob *= contextWordDist[j][t][types[token]]
- self.posterior[i,t] = prob
- z = sum(self.posterior[i,:])
- self.posterior[i,:] /= z
- self.llh += log(z) * count
-
- def objective(self, ls):
- edges = edges_phrase_to_context[self.phraseId][1]
- ls = ls.reshape((len(edges), num_tags))
- logz = 0
-
- for i, (context, count) in enumerate(edges):
- for t in range(num_tags):
- self.q[i,t] = self.posterior[i,t] * exp(-ls[i,t])
- local_z = sum(self.q[i,:])
- self.q[i,:] /= local_z
- logz += log(local_z) * count
-
- return logz
-
- # FIXME: recomputes q many more times than necessary
-
- def gradient(self, ls):
- edges = edges_phrase_to_context[self.phraseId][1]
- ls = ls.reshape((len(edges), num_tags))
- gradient = zeros((len(edges), num_tags))
-
- for i, (context, count) in enumerate(edges):
- for t in range(num_tags):
- self.q[i,t] = self.posterior[i,t] * exp(-ls[i,t])
- local_z = sum(self.q[i,:])
- self.q[i,:] /= local_z
- for t in range(num_tags):
- gradient[i,t] -= self.q[i,t] * count
-
- return gradient.ravel()
-
- def constraints(self, ls):
- edges = edges_phrase_to_context[self.phraseId][1]
- ls = ls.reshape((len(edges), num_tags))
- cons = ones(num_tags) * self.scale
- for t in range(num_tags):
- for i, (context, count) in enumerate(edges):
- cons[t] -= ls[i,t] * count
- return cons
-
- def constraints_gradient(self, ls):
- edges = edges_phrase_to_context[self.phraseId][1]
- ls = ls.reshape((len(edges), num_tags))
- gradient = zeros((num_tags, len(edges), num_tags))
- for t in range(num_tags):
- for i, (context, count) in enumerate(edges):
- gradient[t,i,t] -= count
- return gradient.reshape((num_tags, len(edges)*num_tags))
-
- def optimize(self, ls=None):
- edges = edges_phrase_to_context[self.phraseId][1]
- if ls == None:
- ls = zeros(len(edges) * num_tags)
- #print '\tpre lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba)
- ls = scipy.optimize.fmin_slsqp(self.objective, ls,
- bounds=[(0, self.scale)] * len(edges) * num_tags,
- f_ieqcons=self.constraints,
- fprime=self.gradient,
- fprime_ieqcons=self.constraints_gradient,
- iprint=0) # =2 for verbose
- #print '\tlambda', list(ls)
- #print '\tpost lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba)
-
- # returns llh, kl and l1lmax contribution
- l1lmax = 0
- for t in range(num_tags):
- lmax = None
- for i, (context, count) in enumerate(edges):
- lmax = max(lmax, self.q[i,t])
- l1lmax += lmax
-
- return self.llh, -self.objective(ls) + dot(ls, self.gradient(ls)), l1lmax, ls
-
-ls = [None] * num_phrases
-for iteration in range(20):
- tagCounts = [zeros(num_tags) for p in range(num_phrases)]
- contextWordCounts = [[zeros(num_types) for t in range(num_tags)] for i in range(4)]
-
- # E-step
- llh = kl = l1lmax = 0
- if local:
- for p in range(num_phrases):
- o = LocalDualObjective(p, delta)
- #print '\toptimising lambda for phrase', p, '=', edges_phrase_to_context[p][0]
- #print '\toptimising lambda for phrase', p, 'ls', ls[p]
- obj = o.optimize(ls[p])
- #print '\tphrase', p, 'deltas', obj
- llh += obj[0]
- kl += obj[1]
- l1lmax += obj[2]
- ls[p] = obj[3]
-
- edges = edges_phrase_to_context[p][1]
- for j, (context, count) in enumerate(edges):
- for t in range(num_tags):
- tagCounts[p][t] += count * o.q[j,t]
- for i in range(4):
- for t in range(num_tags):
- contextWordCounts[i][t][types[context[i]]] += count * o.q[j,t]
-
- #print 'iteration', iteration, 'LOCAL objective', (llh + kl + delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax
- else:
- o = GlobalDualObjective(delta)
- obj = o.optimize()
- llh, kl, l1lmax = o.optimize()
-
- index = 0
- for p, (phrase, edges) in enumerate(edges_phrase_to_context):
- for context, count in edges:
- for t in range(num_tags):
- tagCounts[p][t] += count * o.q[index,t]
- for i in range(4):
- for t in range(num_tags):
- contextWordCounts[i][t][types[context[i]]] += count * o.q[index,t]
- index += 1
-
- print 'iteration', iteration, 'objective', (llh - kl - delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax
-
- # M-step
- for p in range(num_phrases):
- tagDist[p] = normalise(tagCounts[p])
- for i in range(4):
- for t in range(num_tags):
- contextWordDist[i][t] = normalise(contextWordCounts[i][t])
-
-for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
- for context, count in ccs:
- conditionals = zeros(num_tags)
- for t in range(num_tags):
- prob = tagDist[p][t]
- for i in range(4):
- prob *= contextWordDist[i][t][types[context[i]]]
- conditionals[t] = prob
- cz = sum(conditionals)
- conditionals /= cz
-
- print '%s\t%s ||| C=%d |||' % (phrase, context, argmax(conditionals)), conditionals