diff options
author | Avneesh Saluja <asaluja@gmail.com> | 2013-03-28 18:28:16 -0700 |
---|---|---|
committer | Avneesh Saluja <asaluja@gmail.com> | 2013-03-28 18:28:16 -0700 |
commit | 5b8253e0e1f1393a509fb9975ba8c1347af758ed (patch) | |
tree | 1790470b1d07a0b4973ebce19192e896566ea60b /gi/posterior-regularisation | |
parent | 2389a5a8a43dda87c355579838559515b0428421 (diff) | |
parent | b203f8c5dc8cff1b9c9c2073832b248fcad0765a (diff) |
fixed conflicts
Diffstat (limited to 'gi/posterior-regularisation')
100 files changed, 0 insertions, 12238 deletions
diff --git a/gi/posterior-regularisation/Corpus.java b/gi/posterior-regularisation/Corpus.java deleted file mode 100644 index 07b27387..00000000 --- a/gi/posterior-regularisation/Corpus.java +++ /dev/null @@ -1,167 +0,0 @@ -import gnu.trove.TIntArrayList; - -import java.io.*; -import java.util.*; -import java.util.regex.Pattern; - -public class Corpus -{ - private Lexicon<String> tokenLexicon = new Lexicon<String>(); - private Lexicon<TIntArrayList> phraseLexicon = new Lexicon<TIntArrayList>(); - private Lexicon<TIntArrayList> contextLexicon = new Lexicon<TIntArrayList>(); - private List<Edge> edges = new ArrayList<Edge>(); - private List<List<Edge>> phraseToContext = new ArrayList<List<Edge>>(); - private List<List<Edge>> contextToPhrase = new ArrayList<List<Edge>>(); - - public class Edge - { - Edge(int phraseId, int contextId, int count) - { - this.phraseId = phraseId; - this.contextId = contextId; - this.count = count; - } - public int getPhraseId() - { - return phraseId; - } - public TIntArrayList getPhrase() - { - return phraseLexicon.lookup(phraseId); - } - public String getPhraseString() - { - StringBuffer b = new StringBuffer(); - for (int tid: getPhrase().toNativeArray()) - { - if (b.length() > 0) - b.append(" "); - b.append(tokenLexicon.lookup(tid)); - } - return b.toString(); - } - public int getContextId() - { - return contextId; - } - public TIntArrayList getContext() - { - return contextLexicon.lookup(contextId); - } - public String getContextString() - { - StringBuffer b = new StringBuffer(); - for (int tid: getContext().toNativeArray()) - { - if (b.length() > 0) - b.append(" "); - b.append(tokenLexicon.lookup(tid)); - } - return b.toString(); - } - public int getCount() - { - return count; - } - private int phraseId; - private int contextId; - private int count; - } - - List<Edge> getEdges() - { - return edges; - } - - int getNumEdges() - { - return edges.size(); - } - - int getNumPhrases() - { - return phraseLexicon.size(); - } - - List<Edge> getEdgesForPhrase(int phraseId) - { - return phraseToContext.get(phraseId); - } - - int getNumContexts() - { - return contextLexicon.size(); - } - - List<Edge> getEdgesForContext(int contextId) - { - return contextToPhrase.get(contextId); - } - - int getNumTokens() - { - return tokenLexicon.size(); - } - - static Corpus readFromFile(Reader in) throws IOException - { - Corpus c = new Corpus(); - - // read in line-by-line - BufferedReader bin = new BufferedReader(in); - String line; - Pattern separator = Pattern.compile(" \\|\\|\\| "); - - while ((line = bin.readLine()) != null) - { - // split into phrase and contexts - StringTokenizer st = new StringTokenizer(line, "\t"); - assert (st.hasMoreTokens()); - String phraseToks = st.nextToken(); - assert (st.hasMoreTokens()); - String rest = st.nextToken(); - assert (!st.hasMoreTokens()); - - // process phrase - st = new StringTokenizer(phraseToks, " "); - TIntArrayList ptoks = new TIntArrayList(); - while (st.hasMoreTokens()) - ptoks.add(c.tokenLexicon.insert(st.nextToken())); - int phraseId = c.phraseLexicon.insert(ptoks); - if (phraseId == c.phraseToContext.size()) - c.phraseToContext.add(new ArrayList<Edge>()); - - // process contexts - String[] parts = separator.split(rest); - assert (parts.length % 2 == 0); - for (int i = 0; i < parts.length; i += 2) - { - // process pairs of strings - context and count - TIntArrayList ctx = new TIntArrayList(); - String ctxString = parts[i]; - String countString = parts[i + 1]; - StringTokenizer ctxStrtok = new StringTokenizer(ctxString, " "); - while (ctxStrtok.hasMoreTokens()) - { - String token = ctxStrtok.nextToken(); - if (!token.equals("<PHRASE>")) - ctx.add(c.tokenLexicon.insert(token)); - } - int contextId = c.contextLexicon.insert(ctx); - if (contextId == c.contextToPhrase.size()) - c.contextToPhrase.add(new ArrayList<Edge>()); - - assert (countString.startsWith("C=")); - Edge e = c.new Edge(phraseId, contextId, - Integer.parseInt(countString.substring(2).trim())); - c.edges.add(e); - - // index the edge for fast phrase, context lookup - c.phraseToContext.get(phraseId).add(e); - c.contextToPhrase.get(contextId).add(e); - } - } - - return c; - } -} diff --git a/gi/posterior-regularisation/Lexicon.java b/gi/posterior-regularisation/Lexicon.java deleted file mode 100644 index 9f0245ee..00000000 --- a/gi/posterior-regularisation/Lexicon.java +++ /dev/null @@ -1,32 +0,0 @@ -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class Lexicon<T> -{ - public int insert(T word) - { - Integer i = wordToIndex.get(word); - if (i == null) - { - i = indexToWord.size(); - wordToIndex.put(word, i); - indexToWord.add(word); - } - return i; - } - - public T lookup(int index) - { - return indexToWord.get(index); - } - - public int size() - { - return indexToWord.size(); - } - - private Map<T, Integer> wordToIndex = new HashMap<T, Integer>(); - private List<T> indexToWord = new ArrayList<T>(); -}
\ No newline at end of file diff --git a/gi/posterior-regularisation/PhraseContextModel.java b/gi/posterior-regularisation/PhraseContextModel.java deleted file mode 100644 index 85bcfb89..00000000 --- a/gi/posterior-regularisation/PhraseContextModel.java +++ /dev/null @@ -1,466 +0,0 @@ -// Input of the form: -// " the phantom of the opera " tickets for <PHRASE> tonight ? ||| C=1 ||| seats for <PHRASE> ? </s> ||| C=1 ||| i see <PHRASE> ? </s> ||| C=1 -// phrase TAB [context]+ -// where context = phrase ||| C=... which are separated by ||| - -// Model parameterised as follows: -// - each phrase, p, is allocated a latent state, t -// - this is used to generate the contexts, c -// - each context is generated using 4 independent multinomials, one for each position LL, L, R, RR - -// Training with EM: -// - e-step is estimating q(t) = P(t|p,c) for all x,c -// - m-step is estimating model parameters P(c,t|p) = P(t) P(c|t) -// - PR uses alternate e-step, which first optimizes lambda -// min_q KL(q||p) + delta sum_pt max_c E_q[phi_ptc] -// where -// q(t|p,c) propto p(t,c|p) exp( -phi_ptc ) -// Then q is used to obtain expectations for vanilla M-step. - -// Sexing it up: -// - learn p-specific conditionals P(t|p) -// - or generate phrase internals, e.g., generate edge words from -// different distribution to central words -// - agreement between phrase->context model and context->phrase model - -import java.io.*; -import optimization.gradientBasedMethods.*; -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.gradientBasedMethods.stats.ProjectedOptimizerStats; -import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc; -import optimization.linesearch.GenericPickFirstStep; -import optimization.linesearch.InterpolationPickFirstStep; -import optimization.linesearch.LineSearchMethod; -import optimization.linesearch.WolfRuleLineSearch; -import optimization.projections.SimplexProjection; -import optimization.stopCriteria.CompositeStopingCriteria; -import optimization.stopCriteria.NormalizedProjectedGradientL2Norm; -import optimization.stopCriteria.NormalizedValueDifference; -import optimization.stopCriteria.ProjectedGradientL2Norm; -import optimization.stopCriteria.StopingCriteria; -import optimization.stopCriteria.ValueDifference; -import optimization.util.MathUtils; -import java.util.*; -import java.util.regex.*; -import gnu.trove.TDoubleArrayList; -import gnu.trove.TIntArrayList; -import static java.lang.Math.*; - -class PhraseContextModel -{ - // model/optimisation configuration parameters - int numTags; - boolean posteriorRegularisation = true; - double constraintScale = 3; // FIXME: make configurable - - // copied from L1LMax in depparsing code - final double c1= 0.0001, c2=0.9, stoppingPrecision = 1e-5, maxStep = 10; - final int maxZoomEvals = 10, maxExtrapolationIters = 200; - int maxProjectionIterations = 200; - int minOccurrencesForProjection = 0; - - // book keeping - int numPositions; - Random rng = new Random(); - - // training set - Corpus training; - - // model parameters (learnt) - double emissions[][][]; // position in 0 .. 3 x tag x word Pr(word | tag, position) - double prior[][]; // phrase x tag Pr(tag | phrase) - double lambda[]; // edge = (phrase, context) x tag flattened lagrange multipliers - - PhraseContextModel(Corpus training, int tags) - { - this.training = training; - this.numTags = tags; - assert (!training.getEdges().isEmpty()); - assert (numTags > 1); - - // now initialise emissions - numPositions = training.getEdges().get(0).getContext().size(); - assert (numPositions > 0); - - emissions = new double[numPositions][numTags][training.getNumTokens()]; - prior = new double[training.getNumEdges()][numTags]; - if (posteriorRegularisation) - lambda = new double[training.getNumEdges() * numTags]; - - for (double[][] emissionTW : emissions) - { - for (double[] emissionW : emissionTW) - { - randomise(emissionW); -// for (int i = 0; i < emissionW.length; ++i) -// emissionW[i] = i+1; -// normalise(emissionW); - } - } - - for (double[] priorTag : prior) - { - randomise(priorTag); -// for (int i = 0; i < priorTag.length; ++i) -// priorTag[i] = i+1; -// normalise(priorTag); - } - } - - void expectationMaximisation(int numIterations) - { - double lastLlh = Double.NEGATIVE_INFINITY; - - for (int iteration = 0; iteration < numIterations; ++iteration) - { - double emissionsCounts[][][] = new double[numPositions][numTags][training.getNumTokens()]; - double priorCounts[][] = new double[training.getNumPhrases()][numTags]; - - // E-step - double llh = 0; - if (posteriorRegularisation) - { - EStepDualObjective objective = new EStepDualObjective(); - - // copied from x2y2withconstraints -// LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1)); -// OptimizerStats stats = new OptimizerStats(); -// ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls); -// CompositeStopingCriteria compositeStop = new CompositeStopingCriteria(); -// compositeStop.add(new ProjectedGradientL2Norm(0.001)); -// compositeStop.add(new ValueDifference(0.001)); -// optimizer.setMaxIterations(50); -// boolean succeed = optimizer.optimize(objective,stats,compositeStop); - - // copied from depparser l1lmaxobjective - ProjectedOptimizerStats stats = new ProjectedOptimizerStats(); - GenericPickFirstStep pickFirstStep = new GenericPickFirstStep(1); - LineSearchMethod linesearch = new WolfRuleLineSearch(pickFirstStep, c1, c2); - ProjectedGradientDescent optimizer = new ProjectedGradientDescent(linesearch); - optimizer.setMaxIterations(maxProjectionIterations); - CompositeStopingCriteria stop = new CompositeStopingCriteria(); - stop.add(new NormalizedProjectedGradientL2Norm(stoppingPrecision)); - stop.add(new NormalizedValueDifference(stoppingPrecision)); - boolean succeed = optimizer.optimize(objective, stats, stop); - - System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1)); - //System.out.println("Solution: " + objective.parameters); - if (!succeed) - System.out.println("Failed to optimize"); - //System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); - - //lambda = objective.getParameters(); - llh = objective.primal(); - - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List<Corpus.Edge> edges = training.getEdgesForPhrase(i); - for (int j = 0; j < edges.size(); ++j) - { - Corpus.Edge e = edges.get(j); - for (int t = 0; t < numTags; t++) - { - double p = objective.q.get(i).get(j).get(t); - priorCounts[i][t] += e.getCount() * p; - TIntArrayList tokens = e.getContext(); - for (int k = 0; k < tokens.size(); ++k) - emissionsCounts[k][t][tokens.get(k)] += e.getCount() * p; - } - } - } - } - else - { - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List<Corpus.Edge> edges = training.getEdgesForPhrase(i); - for (int j = 0; j < edges.size(); ++j) - { - Corpus.Edge e = edges.get(j); - double probs[] = posterior(i, e); - double z = normalise(probs); - llh += log(z) * e.getCount(); - - TIntArrayList tokens = e.getContext(); - for (int t = 0; t < numTags; ++t) - { - priorCounts[i][t] += e.getCount() * probs[t]; - for (int k = 0; k < tokens.size(); ++k) - emissionsCounts[j][t][tokens.get(k)] += e.getCount() * probs[t]; - } - } - } - } - - // M-step: normalise - for (double[][] emissionTW : emissionsCounts) - for (double[] emissionW : emissionTW) - normalise(emissionW); - - for (double[] priorTag : priorCounts) - normalise(priorTag); - - emissions = emissionsCounts; - prior = priorCounts; - - System.out.println("Iteration " + iteration + " llh " + llh); - -// if (llh - lastLlh < 1e-4) -// break; -// else -// lastLlh = llh; - } - } - - static double normalise(double probs[]) - { - double z = 0; - for (double p : probs) - z += p; - for (int i = 0; i < probs.length; ++i) - probs[i] /= z; - return z; - } - - void randomise(double probs[]) - { - double z = 0; - for (int i = 0; i < probs.length; ++i) - { - probs[i] = 10 + rng.nextDouble(); - z += probs[i]; - } - - for (int i = 0; i < probs.length; ++i) - probs[i] /= z; - } - - static int argmax(double probs[]) - { - double m = Double.NEGATIVE_INFINITY; - int mi = -1; - for (int i = 0; i < probs.length; ++i) - { - if (probs[i] > m) - { - m = probs[i]; - mi = i; - } - } - return mi; - } - - double[] posterior(int phraseId, Corpus.Edge e) // unnormalised - { - double probs[] = new double[numTags]; - TIntArrayList tokens = e.getContext(); - for (int t = 0; t < numTags; ++t) - { - probs[t] = prior[phraseId][t]; - for (int k = 0; k < tokens.size(); ++k) - probs[t] *= emissions[k][t][tokens.get(k)]; - } - return probs; - } - - void displayPosterior() - { - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List<Corpus.Edge> edges = training.getEdgesForPhrase(i); - for (Corpus.Edge e: edges) - { - double probs[] = posterior(i, e); - normalise(probs); - - // emit phrase - System.out.print(e.getPhraseString()); - System.out.print("\t"); - System.out.print(e.getContextString()); - System.out.print("||| C=" + e.getCount() + " |||"); - - int t = argmax(probs); - System.out.print(" " + t + " ||| " + probs[t]); - // for (int t = 0; t < numTags; ++t) - // System.out.print(" " + probs[t]); - System.out.println(); - } - } - } - - public static void main(String[] args) - { - assert (args.length >= 2); - try - { - Corpus corpus = Corpus.readFromFile(new FileReader(new File(args[0]))); - PhraseContextModel model = new PhraseContextModel(corpus, Integer.parseInt(args[1])); - model.expectationMaximisation(Integer.parseInt(args[2])); - model.displayPosterior(); - } - catch (IOException e) - { - System.out.println("Failed to read input file: " + args[0]); - e.printStackTrace(); - } - } - - class EStepDualObjective extends ProjectedObjective - { - List<List<TDoubleArrayList>> conditionals; // phrase id x context # x tag - precomputed - List<List<TDoubleArrayList>> q; // ditto, but including exp(-lambda) terms - double objective = 0; // log(z) - // Objective.gradient = d log(z) / d lambda = E_q[phi] - double llh = 0; - - public EStepDualObjective() - { - super(); - // compute conditionals p(context, tag | phrase) for all training instances - conditionals = new ArrayList<List<TDoubleArrayList>>(training.getNumPhrases()); - q = new ArrayList<List<TDoubleArrayList>>(training.getNumPhrases()); - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List<Corpus.Edge> edges = training.getEdgesForPhrase(i); - - conditionals.add(new ArrayList<TDoubleArrayList>(edges.size())); - q.add(new ArrayList<TDoubleArrayList>(edges.size())); - - for (int j = 0; j < edges.size(); ++j) - { - Corpus.Edge e = edges.get(j); - double probs[] = posterior(i, e); - double z = normalise(probs); - llh += log(z) * e.getCount(); - conditionals.get(i).add(new TDoubleArrayList(probs)); - q.get(i).add(new TDoubleArrayList(probs)); - } - } - - gradient = new double[training.getNumEdges()*numTags]; - setInitialParameters(lambda); - computeObjectiveAndGradient(); - } - - @Override - public double[] projectPoint(double[] point) - { - SimplexProjection p = new SimplexProjection(constraintScale); - - double[] newPoint = point.clone(); - int edgeIndex = 0; - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List<Corpus.Edge> edges = training.getEdgesForPhrase(i); - - for (int t = 0; t < numTags; t++) - { - double[] subPoint = new double[edges.size()]; - for (int j = 0; j < edges.size(); ++j) - subPoint[j] = point[edgeIndex+j*numTags+t]; - - p.project(subPoint); - for (int j = 0; j < edges.size(); ++j) - newPoint[edgeIndex+j*numTags+t] = subPoint[j]; - } - - edgeIndex += edges.size() * numTags; - } -// System.out.println("Proj from: " + Arrays.toString(point)); -// System.out.println("Proj to: " + Arrays.toString(newPoint)); - return newPoint; - } - - @Override - public void setParameters(double[] params) - { - super.setParameters(params); - computeObjectiveAndGradient(); - } - - @Override - public double[] getGradient() - { - gradientCalls += 1; - return gradient; - } - - @Override - public double getValue() - { - functionCalls += 1; - return objective; - } - - public void computeObjectiveAndGradient() - { - int edgeIndex = 0; - objective = 0; - Arrays.fill(gradient, 0); - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List<Corpus.Edge> edges = training.getEdgesForPhrase(i); - - for (int j = 0; j < edges.size(); ++j) - { - Corpus.Edge e = edges.get(j); - - double z = 0; - for (int t = 0; t < numTags; t++) - { - double v = conditionals.get(i).get(j).get(t) * exp(-parameters[edgeIndex+t]); - q.get(i).get(j).set(t, v); - z += v; - } - objective += log(z) * e.getCount(); - - for (int t = 0; t < numTags; t++) - { - double v = q.get(i).get(j).get(t) / z; - q.get(i).get(j).set(t, v); - gradient[edgeIndex+t] -= e.getCount() * v; - } - - edgeIndex += numTags; - } - } -// System.out.println("computeObjectiveAndGradient logz=" + objective); -// System.out.println("lambda= " + Arrays.toString(parameters)); -// System.out.println("gradient=" + Arrays.toString(gradient)); - } - - public String toString() - { - StringBuilder sb = new StringBuilder(); - sb.append(getClass().getCanonicalName()).append(" with "); - sb.append(parameters.length).append(" parameters and "); - sb.append(training.getNumPhrases() * numTags).append(" constraints"); - return sb.toString(); - } - - double primal() - { - // primal = llh + KL(q||p) + scale * sum_pt max_c E_q[phi_pct] - // kl = sum_Y q(Y) log q(Y) / p(Y|X) - // = sum_Y q(Y) { -lambda . phi(Y) - log Z } - // = -log Z - lambda . E_q[phi] - // = -objective + lambda . gradient - - double kl = -objective + MathUtils.dotProduct(parameters, gradient); - double l1lmax = 0; - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List<Corpus.Edge> edges = training.getEdgesForPhrase(i); - for (int t = 0; t < numTags; t++) - { - double lmax = Double.NEGATIVE_INFINITY; - for (int j = 0; j < edges.size(); ++j) - lmax = max(lmax, q.get(i).get(j).get(t)); - l1lmax += lmax; - } - } - - return llh + kl + constraintScale * l1lmax; - } - } -} diff --git a/gi/posterior-regularisation/README b/gi/posterior-regularisation/README deleted file mode 100644 index a3d54ffc..00000000 --- a/gi/posterior-regularisation/README +++ /dev/null @@ -1,3 +0,0 @@ - 557 ./cdec_extools/extractor -i btec/split.zh-en.al -c 500000 -L 12 -C | sort -t $'\t' -k 1 | ./cdec_extools/mr_stripe_rule_reduce > btec.concordance - 559 wc -l btec.concordance - 588 cat btec.concordance | sed 's/.* //' | awk '{ for (i=1; i < NF; i++) { x=substr($i, 1, 2); if (x == "C=") printf "\n"; else if (x != "||") printf "%s ", $i; }; printf "\n"; }' | sort | uniq | wc -l diff --git a/gi/posterior-regularisation/alphabet.hh b/gi/posterior-regularisation/alphabet.hh deleted file mode 100644 index 1db928da..00000000 --- a/gi/posterior-regularisation/alphabet.hh +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _alphabet_hh -#define _alphabet_hh - -#include <cassert> -#include <iosfwd> -#include <map> -#include <string> -#include <vector> - -// Alphabet: indexes a set of types -template <typename T> -class Alphabet: protected std::map<T, int> -{ -public: - Alphabet() {}; - - bool empty() const { return std::map<T,int>::empty(); } - int size() const { return std::map<T,int>::size(); } - - int operator[](const T &k) const - { - typename std::map<T,int>::const_iterator cit = find(k); - if (cit != std::map<T,int>::end()) - return cit->second; - else - return -1; - } - - int lookup(const T &k) const { return (*this)[k]; } - - int insert(const T &k) - { - int sz = size(); - assert((unsigned) sz == _items.size()); - - std::pair<typename std::map<T,int>::iterator, bool> - ins = std::map<T,int>::insert(make_pair(k, sz)); - - if (ins.second) - _items.push_back(k); - - return ins.first->second; - } - - const T &type(int i) const - { - assert(i >= 0); - assert(i < size()); - return _items[i]; - } - - std::ostream &display(std::ostream &out, int i) const - { - return out << type(i); - } - -private: - std::vector<T> _items; -}; - -#endif diff --git a/gi/posterior-regularisation/canned.concordance b/gi/posterior-regularisation/canned.concordance deleted file mode 100644 index 710973ff..00000000 --- a/gi/posterior-regularisation/canned.concordance +++ /dev/null @@ -1,4 +0,0 @@ -a 0 0 <PHRASE> 0 0 ||| C=1 ||| 1 1 <PHRASE> 1 1 ||| C=1 ||| 2 2 <PHRASE> 2 2 ||| C=1 -b 0 0 <PHRASE> 0 0 ||| C=1 ||| 1 1 <PHRASE> 1 1 ||| C=1 -c 2 2 <PHRASE> 2 2 ||| C=1 ||| 4 4 <PHRASE> 4 4 ||| C=1 ||| 5 5 <PHRASE> 5 5 ||| C=1 -d 4 4 <PHRASE> 4 4 ||| C=1 ||| 5 5 <PHRASE> 5 5 ||| C=1 diff --git a/gi/posterior-regularisation/em.cc b/gi/posterior-regularisation/em.cc deleted file mode 100644 index f6c9fd68..00000000 --- a/gi/posterior-regularisation/em.cc +++ /dev/null @@ -1,830 +0,0 @@ -// Input of the form: -// " the phantom of the opera " tickets for <PHRASE> tonight ? ||| C=1 ||| seats for <PHRASE> ? </s> ||| C=1 ||| i see <PHRASE> ? </s> ||| C=1 -// phrase TAB [context]+ -// where context = phrase ||| C=... which are separated by ||| - -// Model parameterised as follows: -// - each phrase, p, is allocated a latent state, t -// - this is used to generate the contexts, c -// - each context is generated using 4 independent multinomials, one for each position LL, L, R, RR - -// Training with EM: -// - e-step is estimating P(t|p,c) for all x,c -// - m-step is estimating model parameters P(p,c,t) = P(t) P(p|t) P(c|t) - -// Sexing it up: -// - constrain the posteriors P(t|c) and P(t|p) to have few high-magnitude entries -// - improve the generation of phrase internals, e.g., generate edge words from -// different distribution to central words - -#include "alphabet.hh" -#include "log_add.hh" -#include <algorithm> -#include <fstream> -#include <iostream> -#include <iterator> -#include <map> -#include <sstream> -#include <stdexcept> -#include <vector> -#include <tr1/random> -#include <tr1/tuple> -#include <nlopt.h> - -using namespace std; -using namespace std::tr1; - -const int numTags = 5; -const int numIterations = 100; -const bool posterior_regularisation = true; -const double PHRASE_VIOLATION_WEIGHT = 10; -const double CONTEXT_VIOLATION_WEIGHT = 0; -const bool includePhraseProb = false; - -// Data structures: -Alphabet<string> lexicon; -typedef vector<int> Phrase; -typedef tuple<int, int, int, int> Context; -Alphabet<Phrase> phrases; -Alphabet<Context> contexts; - -typedef map<int, int> ContextCounts; -typedef map<int, int> PhraseCounts; -typedef map<int, ContextCounts> PhraseToContextCounts; -typedef map<int, PhraseCounts> ContextToPhraseCounts; - -PhraseToContextCounts concordancePhraseToContexts; -ContextToPhraseCounts concordanceContextToPhrases; - -typedef vector<double> Dist; -typedef vector<Dist> ConditionalDist; -Dist prior; // class -> P(class) -vector<ConditionalDist> probCtx; // word -> class -> P(word | class), for each position of context word -ConditionalDist probPhrase; // class -> P(word | class) -Dist probPhraseLength; // class -> P(length | class) expressed as geometric distribution parameter - -mt19937 randomGenerator((size_t) time(NULL)); -uniform_real<double> uniDist(0.0, 1e-1); -variate_generator< mt19937, uniform_real<double> > rng(randomGenerator, uniDist); - -void addRandomNoise(Dist &d); -void normalise(Dist &d); -void addTo(Dist &d, const Dist &e); -int argmax(const Dist &d); - -map<Phrase, map<Context, int> > lambda_indices; - -Dist conditional_probs(const Phrase &phrase, const Context &context, double *normalisation = 0); -template <typename T> -Dist -penalised_conditionals(const Phrase &phrase, const Context &context, - const T &lambda, double *normalisation); -//Dist penalised_conditionals(const Phrase &phrase, const Context &context, const double *lambda, double *normalisation = 0); -double penalised_log_likelihood(int n, const double *lambda, double *gradient, void *data); -void optimise_lambda(double delta, double gamma, vector<double> &lambda); -double expected_violation_phrases(const double *lambda); -double expected_violation_contexts(const double *lambda); -double primal_kl_divergence(const double *lambda); -double dual(const double *lambda); -void print_primal_dual(const double *lambda, double delta, double gamma); - -ostream &operator<<(ostream &, const Phrase &); -ostream &operator<<(ostream &, const Context &); -ostream &operator<<(ostream &, const Dist &); -ostream &operator<<(ostream &, const ConditionalDist &); - -int -main(int argc, char *argv[]) -{ - randomGenerator.seed(time(NULL)); - - int edges = 0; - istream &input = cin; - while (input.good()) - { - // read the phrase - string phraseString; - Phrase phrase; - getline(input, phraseString, '\t'); - istringstream pinput(phraseString); - string token; - while (pinput >> token) - phrase.push_back(lexicon.insert(token)); - int phraseId = phrases.insert(phrase); - - // read the rest, storing each context - string remainder; - getline(input, remainder, '\n'); - istringstream rinput(remainder); - Context context(-1, -1, -1, -1); - int index = 0; - while (rinput >> token) - { - if (token != "|||" && token != "<PHRASE>") - { - if (index < 4) - { - // eugh! damn templates - switch (index) - { - case 0: get<0>(context) = lexicon.insert(token); break; - case 1: get<1>(context) = lexicon.insert(token); break; - case 2: get<2>(context) = lexicon.insert(token); break; - case 3: get<3>(context) = lexicon.insert(token); break; - default: assert(false); - } - index += 1; - } - else if (token.find("C=") == 0) - { - int contextId = contexts.insert(context); - int count = atoi(token.substr(strlen("C=")).c_str()); - concordancePhraseToContexts[phraseId][contextId] += count; - concordanceContextToPhrases[contextId][phraseId] += count; - index = 0; - context = Context(-1, -1, -1, -1); - edges += 1; - } - } - } - - // trigger EOF - input >> ws; - } - - cout << "Read in " << phrases.size() << " phrases" - << " and " << contexts.size() << " contexts" - << " and " << edges << " edges" - << " and " << lexicon.size() << " word types\n"; - - // FIXME: filter out low count phrases and low count contexts (based on individual words?) - // now populate model parameters with uniform + random noise - prior.resize(numTags, 1.0); - addRandomNoise(prior); - normalise(prior); - - probCtx.resize(4, ConditionalDist(numTags, Dist(lexicon.size(), 1.0))); - if (includePhraseProb) - probPhrase.resize(numTags, Dist(lexicon.size(), 1.0)); - for (int t = 0; t < numTags; ++t) - { - for (int j = 0; j < 4; ++j) - { - addRandomNoise(probCtx[j][t]); - normalise(probCtx[j][t]); - } - if (includePhraseProb) - { - addRandomNoise(probPhrase[t]); - normalise(probPhrase[t]); - } - } - if (includePhraseProb) - { - probPhraseLength.resize(numTags, 0.5); // geometric distribution p=0.5 - addRandomNoise(probPhraseLength); - } - - cout << "\tprior: " << prior << "\n"; - //cout << "\tcontext: " << probCtx << "\n"; - //cout << "\tphrase: " << probPhrase << "\n"; - //cout << "\tphraseLen: " << probPhraseLength << endl; - - vector<double> lambda; - - // now do EM training - for (int iteration = 0; iteration < numIterations; ++iteration) - { - cout << "EM iteration " << iteration << endl; - - if (posterior_regularisation) - optimise_lambda(PHRASE_VIOLATION_WEIGHT, CONTEXT_VIOLATION_WEIGHT, lambda); - //cout << "\tlambda " << lambda << endl; - - Dist countsPrior(numTags, 0.0); - vector<ConditionalDist> countsCtx(4, ConditionalDist(numTags, Dist(lexicon.size(), 1e-10))); - ConditionalDist countsPhrase(numTags, Dist(lexicon.size(), 1e-10)); - Dist countsPhraseLength(numTags, 0.0); - Dist nPhrases(numTags, 0.0); - - double llh = 0; - for (PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.begin(); - pcit != concordancePhraseToContexts.end(); ++pcit) - { - const Phrase &phrase = phrases.type(pcit->first); - - // e-step: estimate latent class probs; compile (class,word) stats for m-step - for (ContextCounts::iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - - double z = 0; - Dist tagCounts; - if (!posterior_regularisation) - tagCounts = conditional_probs(phrase, context, &z); - else - tagCounts = penalised_conditionals(phrase, context, lambda, &z); - - llh += log(z) * ccit->second; - addTo(countsPrior, tagCounts); // FIXME: times ccit->secon - - for (int t = 0; t < numTags; ++t) - { - for (int j = 0; j < 4; ++j) - countsCtx[j][t][get<0>(context)] += tagCounts[t] * ccit->second; - - if (includePhraseProb) - { - for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit) - countsPhrase[t][*pit] += tagCounts[t] * ccit->second; - countsPhraseLength[t] += phrase.size() * tagCounts[t] * ccit->second; - nPhrases[t] += tagCounts[t] * ccit->second; - } - } - } - } - - cout << "M-step\n"; - - // m-step: normalise prior and (class,word) stats and assign to model parameters - normalise(countsPrior); - prior = countsPrior; - for (int t = 0; t < numTags; ++t) - { - //cout << "\t\tt " << t << " prior " << countsPrior[t] << "\n"; - for (int j = 0; j < 4; ++j) - normalise(countsCtx[j][t]); - if (includePhraseProb) - { - normalise(countsPhrase[t]); - countsPhraseLength[t] = nPhrases[t] / countsPhraseLength[t]; - } - } - probCtx = countsCtx; - if (includePhraseProb) - { - probPhrase = countsPhrase; - probPhraseLength = countsPhraseLength; - } - - double *larray = new double[lambda.size()]; - copy(lambda.begin(), lambda.end(), larray); - print_primal_dual(larray, PHRASE_VIOLATION_WEIGHT, CONTEXT_VIOLATION_WEIGHT); - delete [] larray; - - //cout << "\tllh " << llh << endl; - //cout << "\tprior: " << prior << "\n"; - //cout << "\tcontext: " << probCtx << "\n"; - //cout << "\tphrase: " << probPhrase << "\n"; - //cout << "\tphraseLen: " << probPhraseLength << "\n"; - } - - // output class membership - for (PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.begin(); - pcit != concordancePhraseToContexts.end(); ++pcit) - { - const Phrase &phrase = phrases.type(pcit->first); - for (ContextCounts::iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - Dist tagCounts = conditional_probs(phrase, context, 0); - cout << phrase << " ||| " << context << " ||| " << argmax(tagCounts) << "\n"; - } - } - - return 0; -} - -void addRandomNoise(Dist &d) -{ - for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit) - *dit += rng(); -} - -void normalise(Dist &d) -{ - double z = 0; - for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit) - z += *dit; - for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit) - *dit /= z; -} - -void addTo(Dist &d, const Dist &e) -{ - assert(d.size() == e.size()); - for (int i = 0; i < (int) d.size(); ++i) - d[i] += e[i]; -} - -int argmax(const Dist &d) -{ - double best = d[0]; - int index = 0; - for (int i = 1; i < (int) d.size(); ++i) - { - if (d[i] > best) - { - best = d[i]; - index = i; - } - } - return index; -} - -ostream &operator<<(ostream &out, const Phrase &phrase) -{ - for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit) - lexicon.display(((pit == phrase.begin()) ? out : out << " "), *pit); - return out; -} - -ostream &operator<<(ostream &out, const Context &context) -{ - lexicon.display(out, get<0>(context)); - lexicon.display(out << " ", get<1>(context)); - lexicon.display(out << " <PHRASE> ", get<2>(context)); - lexicon.display(out << " ", get<3>(context)); - return out; -} - -ostream &operator<<(ostream &out, const Dist &dist) -{ - for (Dist::const_iterator dit = dist.begin(); dit != dist.end(); ++dit) - out << ((dit == dist.begin()) ? "" : " ") << *dit; - return out; -} - -ostream &operator<<(ostream &out, const ConditionalDist &dist) -{ - for (ConditionalDist::const_iterator dit = dist.begin(); dit != dist.end(); ++dit) - out << ((dit == dist.begin()) ? "" : "; ") << *dit; - return out; -} - -// FIXME: slow - just use the phrase index, context index to do the mapping -// (n.b. it's a sparse setup, not just equal to 3d array index) -int -lambda_index(const Phrase &phrase, const Context &context, int tag) -{ - return lambda_indices[phrase][context] + tag; -} - -template <typename T> -Dist -penalised_conditionals(const Phrase &phrase, const Context &context, - const T &lambda, double *normalisation) -{ - Dist d = conditional_probs(phrase, context, 0); - - double z = 0; - for (int t = 0; t < numTags; ++t) - { - d[t] *= exp(-lambda[lambda_index(phrase, context, t)]); - z += d[t]; - } - - if (normalisation) - *normalisation = z; - - for (int t = 0; t < numTags; ++t) - d[t] /= z; - - return d; -} - -Dist -conditional_probs(const Phrase &phrase, const Context &context, double *normalisation) -{ - Dist tagCounts(numTags, 0.0); - double z = 0; - for (int t = 0; t < numTags; ++t) - { - double prob = prior[t]; - prob *= (probCtx[0][t][get<0>(context)] * probCtx[1][t][get<1>(context)] * - probCtx[2][t][get<2>(context)] * probCtx[3][t][get<3>(context)]); - - if (includePhraseProb) - { - prob *= pow(1 - probPhraseLength[t], phrase.size() - 1) * probPhraseLength[t]; - for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit) - prob *= probPhrase[t][*pit]; - } - - tagCounts[t] = prob; - z += prob; - } - if (normalisation) - *normalisation = z; - - for (int t = 0; t < numTags; ++t) - tagCounts[t] /= z; - - return tagCounts; -} - -double -penalised_log_likelihood(int n, const double *lambda, double *grad, void *) -{ - // return log Z(lambda, theta) over the corpus - // where theta are the global parameters (prior, probCtx*, probPhrase*) - // and lambda are lagrange multipliers for the posterior sparsity constraints - // - // this is formulated as: - // f = log Z(lambda) = sum_i log ( sum_i p_theta(t_i|p_i,c_i) exp [-lambda_{t_i,p_i,c_i}] ) - // where i indexes the training examples - specifying the (p, c) pair (which may occur with count > 1) - // - // with derivative: - // f'_{tpc} = frac { - count(t,p,c) p_theta(t|p,c) exp (-lambda_{t,p,c}) } - // { sum_t' p_theta(t'|p,c) exp (-lambda_{t',p,c}) } - - //cout << "penalised_log_likelihood with lambda "; - //copy(lambda, lambda+n, ostream_iterator<double>(cout, " ")); - //cout << "\n"; - - double f = 0; - if (grad) - { - for (int i = 0; i < n; ++i) - grad[i] = 0.0; - } - - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - double z = 0; - Dist scores = penalised_conditionals(phrase, context, lambda, &z); - - f += ccit->second * log(z); - //cout << "\tphrase: " << phrase << " context: " << context << " count: " << ccit->second << " z " << z << endl; - //cout << "\t\tscores: " << scores << "\n"; - - if (grad) - { - for (int t = 0; t < numTags; ++t) - { - int i = lambda_index(phrase, context, t); // FIXME: redundant lookups - assert(grad[i] == 0.0); - grad[i] = - ccit->second * scores[t]; - } - } - } - } - - //cout << "penalised_log_likelihood returning " << f; - //if (grad) - //{ - //cout << "\ngradient: "; - //copy(grad, grad+n, ostream_iterator<double>(cout, " ")); - //} - //cout << "\n"; - - return f; -} - -typedef struct -{ - // one of p or c should be set to -1, in which case it will be marginalised out - // i.e. sum_p' lambda_{p'ct} <= threshold - // or sum_c' lambda_{pc't} <= threshold - int p, c, t, threshold; -} constraint_data; - -double -constraint_and_gradient(int n, const double *lambda, double *grad, void *data) -{ - constraint_data *d = (constraint_data *) data; - assert(d->t >= 0); - assert(d->threshold >= 0); - - //cout << "constraint_and_gradient: t " << d->t << " p " << d->p << " c " << d->c << " tau " << d->threshold << endl; - //cout << "\tlambda "; - //copy(lambda, lambda+n, ostream_iterator<double>(cout, " ")); - //cout << "\n"; - - // FIXME: it's crazy to use a dense gradient here => will only have a handful of non-zero entries - if (grad) - { - for (int i = 0; i < n; ++i) - grad[i] = 0.0; - } - - //cout << "constraint_and_gradient: " << d->p << "; " << d->c << "; " << d->t << "; " << d->threshold << endl; - - if (d->p >= 0) - { - assert(d->c < 0); - // sum_c lambda_pct <= delta [a.k.a. threshold] - // => sum_c lambda_pct - delta <= 0 - // derivative_pct = { 1, if p and t match; 0, otherwise } - - double val = -d->threshold; - - const Phrase &phrase = phrases.type(d->p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(d->p); - assert(pcit != concordancePhraseToContexts.end()); - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - int i = lambda_index(phrase, context, d->t); - val += lambda[i]; - if (grad) grad[i] = 1; - } - //cout << "\treturning " << val << endl; - - return val; - } - else - { - assert(d->c >= 0); - assert(d->p < 0); - // sum_p lambda_pct <= gamma [a.k.a. threshold] - // => sum_p lambda_pct - gamma <= 0 - // derivative_pct = { 1, if c and t match; 0, otherwise } - - double val = -d->threshold; - - const Context &context = contexts.type(d->c); - ContextToPhraseCounts::iterator cpit = concordanceContextToPhrases.find(d->c); - assert(cpit != concordanceContextToPhrases.end()); - for (PhraseCounts::iterator pcit = cpit->second.begin(); - pcit != cpit->second.end(); ++pcit) - { - const Phrase &phrase = phrases.type(pcit->first); - int i = lambda_index(phrase, context, d->t); - val += lambda[i]; - if (grad) grad[i] = 1; - } - //cout << "\treturning " << val << endl; - - return val; - } -} - -void -optimise_lambda(double delta, double gamma, vector<double> &lambdav) -{ - int num_lambdas = lambdav.size(); - if (lambda_indices.empty() || lambdav.empty()) - { - lambda_indices.clear(); - lambdav.clear(); - - int i = 0; - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.find(p); - for (ContextCounts::iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - lambda_indices[phrase][context] = i; - i += numTags; - } - } - num_lambdas = i; - lambdav.resize(num_lambdas); - } - //cout << "optimise_lambda: #langrange multipliers " << num_lambdas << endl; - - // FIXME: better to work with an implicit representation to save memory usage - int num_constraints = (((delta > 0) ? phrases.size() : 0) + ((gamma > 0) ? contexts.size() : 0)) * numTags; - //cout << "optimise_lambda: #constraints " << num_constraints << endl; - constraint_data *data = new constraint_data[num_constraints]; - int i = 0; - if (delta > 0) - { - for (int p = 0; p < phrases.size(); ++p) - { - for (int t = 0; t < numTags; ++t, ++i) - { - constraint_data &d = data[i]; - d.p = p; - d.c = -1; - d.t = t; - d.threshold = delta; - } - } - } - - if (gamma > 0) - { - for (int c = 0; c < contexts.size(); ++c) - { - for (int t = 0; t < numTags; ++t, ++i) - { - constraint_data &d = data[i]; - d.p = -1; - d.c = c; - d.t = t; - d.threshold = gamma; - } - } - } - assert(i == num_constraints); - - double lambda[num_lambdas]; - double lb[num_lambdas], ub[num_lambdas]; - for (i = 0; i < num_lambdas; ++i) - { - lambda[i] = lambdav[i]; // starting value - lb[i] = 0; // lower bound - if (delta <= 0) // upper bound - ub[i] = gamma; - else if (gamma <= 0) - ub[i] = delta; - else - assert(false); - } - - //print_primal_dual(lambda, delta, gamma); - - double minf; - int error_code = nlopt_minimize_constrained(NLOPT_LN_COBYLA, num_lambdas, penalised_log_likelihood, NULL, - num_constraints, constraint_and_gradient, data, sizeof(constraint_data), - lb, ub, lambda, &minf, -HUGE_VAL, 0.0, 0.0, 1e-4, NULL, 0, 0.0); - //cout << "optimise error code " << error_code << endl; - - //print_primal_dual(lambda, delta, gamma); - - delete [] data; - - if (error_code < 0) - cout << "WARNING: optimisation failed with error code: " << error_code << endl; - //else - //{ - //cout << "success; minf " << minf << endl; - //print_primal_dual(lambda, delta, gamma); - //} - - lambdav = vector<double>(&lambda[0], &lambda[0] + num_lambdas); -} - -// FIXME: inefficient - cache the scores -double -expected_violation_phrases(const double *lambda) -{ - // sum_pt max_c E_q[phi_pct] - double violation = 0; - - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); - - for (int t = 0; t < numTags; ++t) - { - double best = 0; - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - Dist scores = penalised_conditionals(phrase, context, lambda, 0); - best = max(best, scores[t]); - } - violation += best; - } - } - - return violation; -} - -// FIXME: inefficient - cache the scores -double -expected_violation_contexts(const double *lambda) -{ - // sum_ct max_p E_q[phi_pct] - double violation = 0; - - for (int c = 0; c < contexts.size(); ++c) - { - const Context &context = contexts.type(c); - ContextToPhraseCounts::iterator cpit = concordanceContextToPhrases.find(c); - - for (int t = 0; t < numTags; ++t) - { - double best = 0; - for (PhraseCounts::iterator pit = cpit->second.begin(); - pit != cpit->second.end(); ++pit) - { - const Phrase &phrase = phrases.type(pit->first); - Dist scores = penalised_conditionals(phrase, context, lambda, 0); - best = max(best, scores[t]); - } - violation += best; - } - } - - return violation; -} - -// FIXME: possibly inefficient -double -primal_likelihood() // FIXME: primal evaluation needs to use lambda and calculate l1linf terms -{ - double llh = 0; - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - double z = 0; - Dist scores = conditional_probs(phrase, context, &z); - llh += ccit->second * log(z); - } - } - return llh; -} - -// FIXME: inefficient - cache the scores -double -primal_kl_divergence(const double *lambda) -{ - // return KL(q || p) = sum_y q(y) { log q(y) - log p(y | x) } - // = sum_y q(y) { log p(y | x) - lambda . phi(x, y) - log Z - log p(y | x) } - // = sum_y q(y) { - lambda . phi(x, y) } - log Z - // and q(y) factors with each edge, ditto for Z - - double feature_sum = 0, log_z = 0; - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - - double local_z = 0; - double local_f = 0; - Dist d = conditional_probs(phrase, context, 0); - for (int t = 0; t < numTags; ++t) - { - int i = lambda_index(phrase, context, t); - double s = d[t] * exp(-lambda[i]); - local_f += lambda[i] * s; - local_z += s; - } - - log_z += ccit->second * log(local_z); - feature_sum += ccit->second * (local_f / local_z); - } - } - - return -feature_sum - log_z; -} - -// FIXME: inefficient - cache the scores -double -dual(const double *lambda) -{ - // return log(Z) = - log { sum_y p(y | x) exp( - lambda . phi(x, y) } - // n.b. have flipped the sign as we're minimising - - double z = 0; - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - double lz = 0; - Dist scores = penalised_conditionals(phrase, context, lambda, &z); - z += lz * ccit->second; - } - } - return log(z); -} - -void -print_primal_dual(const double *lambda, double delta, double gamma) -{ - double likelihood = primal_likelihood(); - double kl = primal_kl_divergence(lambda); - double sum_pt = expected_violation_phrases(lambda); - double sum_ct = expected_violation_contexts(lambda); - //double d = dual(lambda); - - cout << "\tllh=" << likelihood - << " kl=" << kl - << " violations phrases=" << sum_pt - << " contexts=" << sum_ct - //<< " primal=" << (kl + delta * sum_pt + gamma * sum_ct) - //<< " dual=" << d - << " objective=" << (likelihood - kl + delta * sum_pt + gamma * sum_ct) - << endl; -} diff --git a/gi/posterior-regularisation/invert.hh b/gi/posterior-regularisation/invert.hh deleted file mode 100644 index d06356e9..00000000 --- a/gi/posterior-regularisation/invert.hh +++ /dev/null @@ -1,45 +0,0 @@ -// The following code inverts the matrix input using LU-decomposition with -// backsubstitution of unit vectors. Reference: Numerical Recipies in C, 2nd -// ed., by Press, Teukolsky, Vetterling & Flannery. -// Code written by Fredrik Orderud. -// http://www.crystalclearsoftware.com/cgi-bin/boost_wiki/wiki.pl?LU_Matrix_Inversion - -#ifndef INVERT_MATRIX_HPP -#define INVERT_MATRIX_HPP - -// REMEMBER to update "lu.hpp" header includes from boost-CVS -#include <boost/numeric/ublas/vector.hpp> -#include <boost/numeric/ublas/vector_proxy.hpp> -#include <boost/numeric/ublas/matrix.hpp> -#include <boost/numeric/ublas/triangular.hpp> -#include <boost/numeric/ublas/lu.hpp> -#include <boost/numeric/ublas/io.hpp> - -namespace ublas = boost::numeric::ublas; - -/* Matrix inversion routine. - Uses lu_factorize and lu_substitute in uBLAS to invert a matrix */ -template<class T> -bool invert_matrix(const ublas::matrix<T>& input, ublas::matrix<T>& inverse) -{ - using namespace boost::numeric::ublas; - typedef permutation_matrix<std::size_t> pmatrix; - // create a working copy of the input - matrix<T> A(input); - // create a permutation matrix for the LU-factorization - pmatrix pm(A.size1()); - - // perform LU-factorization - int res = lu_factorize(A,pm); - if( res != 0 ) return false; - - // create identity matrix of "inverse" - inverse.assign(ublas::identity_matrix<T>(A.size1())); - - // backsubstitute to get the inverse - lu_substitute(A, pm, inverse); - - return true; -} - -#endif //INVERT_MATRIX_HPP diff --git a/gi/posterior-regularisation/linesearch.py b/gi/posterior-regularisation/linesearch.py deleted file mode 100644 index 5a3f2e9c..00000000 --- a/gi/posterior-regularisation/linesearch.py +++ /dev/null @@ -1,58 +0,0 @@ -## Automatically adapted for scipy Oct 07, 2005 by convertcode.py - -from scipy.optimize import minpack2 -import numpy - -import __builtin__ -pymin = __builtin__.min - -def line_search(f, myfprime, xk, pk, gfk, old_fval, old_old_fval, - args=(), c1=1e-4, c2=0.9, amax=50): - - fc = 0 - gc = 0 - phi0 = old_fval - derphi0 = numpy.dot(gfk,pk) - alpha1 = pymin(1.0,1.01*2*(phi0-old_old_fval)/derphi0) - # trevor: added this test - alpha1 = pymin(alpha1,amax) - - if isinstance(myfprime,type(())): - eps = myfprime[1] - fprime = myfprime[0] - newargs = (f,eps) + args - gradient = False - else: - fprime = myfprime - newargs = args - gradient = True - - xtol = 1e-14 - amin = 1e-8 - isave = numpy.zeros((2,), numpy.intc) - dsave = numpy.zeros((13,), float) - task = 'START' - fval = old_fval - gval = gfk - - while 1: - stp,fval,derphi,task = minpack2.dcsrch(alpha1, phi0, derphi0, c1, c2, - xtol, task, amin, amax,isave,dsave) - #print 'minpack2.dcsrch', alpha1, phi0, derphi0, c1, c2, xtol, task, amin, amax,isave,dsave - #print 'returns', stp,fval,derphi,task - - if task[:2] == 'FG': - alpha1 = stp - fval = f(xk+stp*pk,*args) - fc += 1 - gval = fprime(xk+stp*pk,*newargs) - if gradient: gc += 1 - else: fc += len(xk) + 1 - phi0 = fval - derphi0 = numpy.dot(gval,pk) - else: - break - - if task[:5] == 'ERROR' or task[1:4] == 'WARN': - stp = None # failed - return stp, fc, gc, fval, old_fval, gval diff --git a/gi/posterior-regularisation/log_add.hh b/gi/posterior-regularisation/log_add.hh deleted file mode 100644 index e0620c5a..00000000 --- a/gi/posterior-regularisation/log_add.hh +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef log_add_hh -#define log_add_hh - -#include <limits> -#include <iostream> -#include <cassert> -#include <cmath> - -template <typename T> -struct Log -{ - static T zero() { return -std::numeric_limits<T>::infinity(); } - - static T add(T l1, T l2) - { - if (l1 == zero()) return l2; - if (l1 > l2) - return l1 + std::log(1 + exp(l2 - l1)); - else - return l2 + std::log(1 + exp(l1 - l2)); - } - - static T subtract(T l1, T l2) - { - //std::assert(l1 >= l2); - return l1 + log(1 - exp(l2 - l1)); - } -}; - -#endif diff --git a/gi/posterior-regularisation/prjava.jar b/gi/posterior-regularisation/prjava.jar deleted file mode 120000 index da8bf761..00000000 --- a/gi/posterior-regularisation/prjava.jar +++ /dev/null @@ -1 +0,0 @@ -prjava/prjava-20100708.jar
\ No newline at end of file diff --git a/gi/posterior-regularisation/prjava/Makefile b/gi/posterior-regularisation/prjava/Makefile deleted file mode 100755 index bd3bfca0..00000000 --- a/gi/posterior-regularisation/prjava/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -all: - ant dist - -check: - echo no tests - -clean: - ant clean diff --git a/gi/posterior-regularisation/prjava/build.xml b/gi/posterior-regularisation/prjava/build.xml deleted file mode 100644 index 7222b3c8..00000000 --- a/gi/posterior-regularisation/prjava/build.xml +++ /dev/null @@ -1,38 +0,0 @@ -<project name="prjava" default="dist" basedir="."> - <!-- set global properties for this build --> - <property name="src" location="src"/> - <property name="build" location="build"/> - <property name="dist" location="lib"/> - <path id="classpath"> - <pathelement location="lib/trove-2.0.2.jar"/> - <pathelement location="lib/optimization.jar"/> - <pathelement location="lib/jopt-simple-3.2.jar"/> - <pathelement location="lib/commons-math-2.1.jar"/> - </path> - - <target name="init"> - <!-- Create the time stamp --> - <tstamp/> - <!-- Create the build directory structure used by compile --> - <mkdir dir="${build}"/> - </target> - - <target name="compile" depends="init" - description="compile the source " > - <!-- Compile the java code from ${src} into ${build} --> - <javac srcdir="${src}" destdir="${build}" includeantruntime="false"> - <classpath refid="classpath"/> - </javac> - </target> - - <target name="dist" depends="compile" - description="generate the distribution" > - <jar jarfile="${dist}/prjava-${DSTAMP}.jar" basedir="${build}"/> - <symlink link="./prjava.jar" resource="${dist}/prjava-${DSTAMP}.jar" overwrite="true"/> - </target> - - <target name="clean" - description="clean up" > - <delete dir="${build}"/> - </target> -</project> diff --git a/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar b/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar Binary files differdeleted file mode 100644 index 43b4b369..00000000 --- a/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar +++ /dev/null diff --git a/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar b/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar Binary files differdeleted file mode 100644 index 56373621..00000000 --- a/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar +++ /dev/null diff --git a/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar b/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar Binary files differdeleted file mode 100644 index 3e59fbf3..00000000 --- a/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar +++ /dev/null diff --git a/gi/posterior-regularisation/prjava/src/arr/F.java b/gi/posterior-regularisation/prjava/src/arr/F.java deleted file mode 100644 index be0a6ed6..00000000 --- a/gi/posterior-regularisation/prjava/src/arr/F.java +++ /dev/null @@ -1,99 +0,0 @@ -package arr;
-
-import java.util.Arrays;
-import java.util.Random;
-
-public class F {
- public static Random rng = new Random();
-
- public static void randomise(double probs[])
- {
- randomise(probs, true);
- }
-
- public static void randomise(double probs[], boolean normalise)
- {
- double z = 0;
- for (int i = 0; i < probs.length; ++i)
- {
- probs[i] = 10 + rng.nextDouble();
- if (normalise)
- z += probs[i];
- }
-
- if (normalise)
- for (int i = 0; i < probs.length; ++i)
- probs[i] /= z;
- }
-
- public static void uniform(double probs[])
- {
- for (int i = 0; i < probs.length; ++i)
- probs[i] = 1.0 / probs.length;
- }
-
- public static void l1normalize(double [] a){
- double sum=0;
- for(int i=0;i<a.length;i++){
- sum+=a[i];
- }
- if(sum==0)
- Arrays.fill(a, 1.0/a.length);
- else
- {
- for(int i=0;i<a.length;i++){
- a[i]/=sum;
- }
- }
- }
-
- public static void l1normalize(double [][] a){
- double sum=0;
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- sum+=a[i][j];
- }
- }
- if(sum==0){
- return;
- }
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- a[i][j]/=sum;
- }
- }
- }
-
- public static double l1norm(double a[]){
- // FIXME: this isn't the l1 norm for a < 0
- double norm=0;
- for(int i=0;i<a.length;i++){
- norm += a[i];
- }
- return norm;
- }
-
- public static double l2norm(double a[]){
- double norm=0;
- for(int i=0;i<a.length;i++){
- norm += a[i]*a[i];
- }
- return Math.sqrt(norm);
- }
-
- public static int argmax(double probs[])
- {
- double m = Double.NEGATIVE_INFINITY;
- int mi = -1;
- for (int i = 0; i < probs.length; ++i)
- {
- if (probs[i] > m)
- {
- m = probs[i];
- mi = i;
- }
- }
- return mi;
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/data/Corpus.java b/gi/posterior-regularisation/prjava/src/data/Corpus.java deleted file mode 100644 index 425ede11..00000000 --- a/gi/posterior-regularisation/prjava/src/data/Corpus.java +++ /dev/null @@ -1,233 +0,0 @@ -package data;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Scanner;
-
-public class Corpus {
-
- public static final String alphaFilename="../posdata/corpus.alphabet";
- public static final String tagalphaFilename="../posdata/corpus.tag.alphabet";
-
-// public static final String START_SYM="<s>";
- public static final String END_SYM="<e>";
- public static final String NUM_TOK="<NUM>";
-
- public static final String UNK_TOK="<unk>";
-
- private ArrayList<String[]>sent;
- private ArrayList<int[]>data;
-
- public ArrayList<String[]>tag;
- public ArrayList<int[]>tagData;
-
- public static boolean convertNumTok=true;
-
- private HashMap<String,Integer>freq;
- public HashMap<String,Integer>vocab;
-
- public HashMap<String,Integer>tagVocab;
- private int tagV;
-
- private int V;
-
- public static void main(String[] args) {
- Corpus c=new Corpus("../posdata/en_test.conll");
- System.out.println(
- Arrays.toString(c.get(0))
- );
- System.out.println(
- Arrays.toString(c.getInt(0))
- );
-
- System.out.println(
- Arrays.toString(c.get(1))
- );
- System.out.println(
- Arrays.toString(c.getInt(1))
- );
- }
-
- public Corpus(String filename,HashMap<String,Integer>dict){
- V=0;
- tagV=0;
- freq=new HashMap<String,Integer>();
- tagVocab=new HashMap<String,Integer>();
- vocab=dict;
-
- sent=new ArrayList<String[]>();
- tag=new ArrayList<String[]>();
-
- Scanner sc=io.FileUtil.openInFile(filename);
- ArrayList<String>s=new ArrayList<String>();
- // s.add(START_SYM);
- while(sc.hasNextLine()){
- String line=sc.nextLine();
- String toks[]=line.split("\t");
- if(toks.length<2){
- s.add(END_SYM);
- sent.add(s.toArray(new String[0]));
- s=new ArrayList<String>();
- // s.add(START_SYM);
- continue;
- }
- String tok=toks[1].toLowerCase();
- s.add(tok);
- }
- sc.close();
-
- buildData();
- }
-
- public Corpus(String filename){
- V=0;
- freq=new HashMap<String,Integer>();
- vocab=new HashMap<String,Integer>();
- tagVocab=new HashMap<String,Integer>();
-
- sent=new ArrayList<String[]>();
- tag=new ArrayList<String[]>();
-
- System.out.println("Reading:"+filename);
-
- Scanner sc=io.FileUtil.openInFile(filename);
- ArrayList<String>s=new ArrayList<String>();
- ArrayList<String>tags=new ArrayList<String>();
- //s.add(START_SYM);
- while(sc.hasNextLine()){
- String line=sc.nextLine();
- String toks[]=line.split("\t");
- if(toks.length<2){
- s.add(END_SYM);
- tags.add(END_SYM);
- if(s.size()>2){
- sent.add(s.toArray(new String[0]));
- tag.add(tags.toArray(new String [0]));
- }
- s=new ArrayList<String>();
- tags=new ArrayList<String>();
- // s.add(START_SYM);
- continue;
- }
-
- String tok=toks[1].toLowerCase();
- if(convertNumTok && tok.matches(".*\\d.*")){
- tok=NUM_TOK;
- }
- s.add(tok);
-
- if(toks.length>3){
- tok=toks[3].toLowerCase();
- }else{
- tok="_";
- }
- tags.add(tok);
-
- }
- sc.close();
-
- for(int i=0;i<sent.size();i++){
- String[]toks=sent.get(i);
- for(int j=0;j<toks.length;j++){
- addVocab(toks[j]);
- addTag(tag.get(i)[j]);
- }
- }
-
- buildVocab();
- buildData();
- System.out.println(data.size()+"sentences, "+vocab.keySet().size()+" word types");
- }
-
- public String[] get(int idx){
- return sent.get(idx);
- }
-
- private void addVocab(String s){
- Integer integer=freq.get(s);
- if(integer==null){
- integer=0;
- }
- freq.put(s, integer+1);
- }
-
- public int tokIdx(String tok){
- Integer integer=vocab.get(tok);
- if(integer==null){
- return V;
- }
- return integer;
- }
-
- public int tagIdx(String tok){
- Integer integer=tagVocab.get(tok);
- if(integer==null){
- return tagV;
- }
- return integer;
- }
-
- private void buildData(){
- data=new ArrayList<int[]>();
- for(int i=0;i<sent.size();i++){
- String s[]=sent.get(i);
- data.add(new int [s.length]);
- for(int j=0;j<s.length;j++){
- data.get(i)[j]=tokIdx(s[j]);
- }
- }
-
- tagData=new ArrayList<int[]>();
- for(int i=0;i<tag.size();i++){
- String s[]=tag.get(i);
- tagData.add(new int [s.length]);
- for(int j=0;j<s.length;j++){
- tagData.get(i)[j]=tagIdx(s[j]);
- }
- }
- sent=null;
- tag=null;
- System.gc();
- }
-
- public int [] getInt(int idx){
- return data.get(idx);
- }
-
- /**
- *
- * @return size of vocabulary
- */
- public int getVocabSize(){
- return V;
- }
-
- public int [][]getAllData(){
- return data.toArray(new int [0][]);
- }
-
- public int [][]getTagData(){
- return tagData.toArray(new int [0][]);
- }
-
- private void buildVocab(){
- for (String key:freq.keySet()){
- if(freq.get(key)>2){
- vocab.put(key, V);
- V++;
- }
- }
- io.SerializedObjects.writeSerializedObject(vocab, alphaFilename);
- io.SerializedObjects.writeSerializedObject(tagVocab,tagalphaFilename);
- }
-
- private void addTag(String tag){
- Integer i=tagVocab.get(tag);
- if(i==null){
- tagVocab.put(tag, tagV);
- tagV++;
- }
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/hmm/HMM.java b/gi/posterior-regularisation/prjava/src/hmm/HMM.java deleted file mode 100644 index 17a4679f..00000000 --- a/gi/posterior-regularisation/prjava/src/hmm/HMM.java +++ /dev/null @@ -1,579 +0,0 @@ -package hmm;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Scanner;
-
-public class HMM {
-
-
- //trans[i][j]=prob of going FROM i to j
- double [][]trans;
- double [][]emit;
- double []pi;
- int [][]data;
- int [][]tagdata;
-
- double logtrans[][];
-
- public HMMObjective o;
-
- public static void main(String[] args) {
-
- }
-
- public HMM(int n_state,int n_emit,int [][]data){
- trans=new double [n_state][n_state];
- emit=new double[n_state][n_emit];
- pi=new double [n_state];
- System.out.println(" random initial parameters");
- fillRand(trans);
- fillRand(emit);
- fillRand(pi);
-
- this.data=data;
-
- }
-
- private void fillRand(double [][] a){
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- a[i][j]=Math.random();
- }
- l1normalize(a[i]);
- }
- }
- private void fillRand(double []a){
- for(int i=0;i<a.length;i++){
- a[i]=Math.random();
- }
- l1normalize(a);
- }
-
- private double loglikely=0;
-
- public void EM(){
- double trans_exp_cnt[][]=new double [trans.length][trans.length];
- double emit_exp_cnt[][]=new double[trans.length][emit[0].length];
- double start_exp_cnt[]=new double[trans.length];
- loglikely=0;
-
- //E
- for(int i=0;i<data.length;i++){
-
- double [][][] post=forwardBackward(data[i]);
- incrementExpCnt(post, data[i],
- trans_exp_cnt,
- emit_exp_cnt,
- start_exp_cnt);
-
-
- if(i%100==0){
- System.out.print(".");
- }
- if(i%1000==0){
- System.out.println(i);
- }
-
- }
- System.out.println("Log likelihood: "+loglikely);
-
- //M
- addOneSmooth(emit_exp_cnt);
- for(int i=0;i<trans.length;i++){
-
- //transition probs
- double sum=0;
- for(int j=0;j<trans.length;j++){
- sum+=trans_exp_cnt[i][j];
- }
- //avoid NAN
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<trans[i].length;j++){
- trans[i][j]=trans_exp_cnt[i][j]/sum;
- }
-
- //emission probs
-
- sum=0;
- for(int j=0;j<emit[i].length;j++){
- sum+=emit_exp_cnt[i][j];
- }
- //avoid NAN
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<emit[i].length;j++){
- emit[i][j]=emit_exp_cnt[i][j]/sum;
- }
-
-
- //initial probs
- for(int j=0;j<pi.length;j++){
- pi[j]=start_exp_cnt[j];
- }
- l1normalize(pi);
- }
- }
-
- private double [][][]forwardBackward(int [] seq){
- double a[][]=new double [seq.length][trans.length];
- double b[][]=new double [seq.length][trans.length];
-
- int len=seq.length;
- //initialize the first step
- for(int i=0;i<trans.length;i++){
- a[0][i]=emit[i][seq[0]]*pi[i];
- b[len-1][i]=1;
- }
-
- //log of denominator for likelyhood
- double c=Math.log(l1norm(a[0]));
-
- l1normalize(a[0]);
- l1normalize(b[len-1]);
-
-
-
- //forward
- for(int n=1;n<len;n++){
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans.length;j++){
- a[n][i]+=trans[j][i]*a[n-1][j];
- }
- a[n][i]*=emit[i][seq[n]];
- }
- c+=Math.log(l1norm(a[n]));
- l1normalize(a[n]);
- }
-
- loglikely+=c;
-
- //backward
- for(int n=len-2;n>=0;n--){
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans.length;j++){
- b[n][i]+=trans[i][j]*b[n+1][j]*emit[j][seq[n+1]];
- }
- }
- l1normalize(b[n]);
- }
-
-
- //expected transition
- double p[][][]=new double [seq.length][trans.length][trans.length];
- for(int n=0;n<len-1;n++){
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans.length;j++){
- p[n][i][j]=a[n][i]*trans[i][j]*emit[j][seq[n+1]]*b[n+1][j];
-
- }
- }
-
- l1normalize(p[n]);
- }
- return p;
- }
-
- private void incrementExpCnt(
- double post[][][],int [] seq,
- double trans_exp_cnt[][],
- double emit_exp_cnt[][],
- double start_exp_cnt[])
- {
-
- for(int n=0;n<post.length;n++){
- for(int i=0;i<trans.length;i++){
- double py=0;
- for(int j=0;j<trans.length;j++){
- py+=post[n][i][j];
- trans_exp_cnt[i][j]+=post[n][i][j];
- }
-
- emit_exp_cnt[i][seq[n]]+=py;
-
- }
- }
-
- //the first state
- for(int i=0;i<trans.length;i++){
- double py=0;
- for(int j=0;j<trans.length;j++){
- py+=post[0][i][j];
- }
- start_exp_cnt[i]+=py;
- }
-
-
- //the last state
- int len=post.length;
- for(int i=0;i<trans.length;i++){
- double py=0;
- for(int j=0;j<trans.length;j++){
- py+=post[len-2][j][i];
- }
- emit_exp_cnt[i][seq[len-1]]+=py;
- }
- }
-
- public void l1normalize(double [] a){
- double sum=0;
- for(int i=0;i<a.length;i++){
- sum+=a[i];
- }
- if(sum==0){
- return ;
- }
- for(int i=0;i<a.length;i++){
- a[i]/=sum;
- }
- }
-
- public void l1normalize(double [][] a){
- double sum=0;
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- sum+=a[i][j];
- }
- }
- if(sum==0){
- return;
- }
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- a[i][j]/=sum;
- }
- }
- }
-
- public void writeModel(String modelFilename) throws FileNotFoundException, IOException{
- PrintStream ps=io.FileUtil.printstream(new File(modelFilename));
- ps.println(trans.length);
- ps.println("Initial Probabilities:");
- for(int i=0;i<pi.length;i++){
- ps.print(pi[i]+"\t");
- }
- ps.println();
- ps.println("Transition Probabilities:");
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans[i].length;j++){
- ps.print(trans[i][j]+"\t");
- }
- ps.println();
- }
- ps.println("Emission Probabilities:");
- ps.println(emit[0].length);
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<emit[i].length;j++){
- ps.println(emit[i][j]);
- }
- ps.println();
- }
- ps.close();
- }
-
- public HMM(){
-
- }
-
- public void readModel(String modelFilename){
- Scanner sc=io.FileUtil.openInFile(modelFilename);
-
- int n_state=sc.nextInt();
- sc.nextLine();
- sc.nextLine();
- pi=new double [n_state];
- for(int i=0;i<n_state;i++){
- pi[i]=sc.nextDouble();
- }
- sc.nextLine();
- sc.nextLine();
- trans=new double[n_state][n_state];
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans[i].length;j++){
- trans[i][j]=sc.nextDouble();
- }
- }
- sc.nextLine();
- sc.nextLine();
-
- int n_obs=sc.nextInt();
- emit=new double[n_state][n_obs];
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<emit[i].length;j++){
- emit[i][j]=sc.nextDouble();
- }
- }
- sc.close();
- }
-
- public int []viterbi(int [] seq){
- double [][]p=new double [seq.length][trans.length];
- int backp[][]=new int [seq.length][trans.length];
-
- for(int i=0;i<trans.length;i++){
- p[0][i]=Math.log(emit[i][seq[0]]*pi[i]);
- }
-
- double a[][]=logtrans;
- if(logtrans==null){
- a=new double [trans.length][trans.length];
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans.length;j++){
- a[i][j]=Math.log(trans[i][j]);
- }
- }
- logtrans=a;
- }
-
- double maxprob=0;
- for(int n=1;n<seq.length;n++){
- for(int i=0;i<trans.length;i++){
- maxprob=p[n-1][0]+a[0][i];
- backp[n][i]=0;
- for(int j=1;j<trans.length;j++){
- double prob=p[n-1][j]+a[j][i];
- if(maxprob<prob){
- backp[n][i]=j;
- maxprob=prob;
- }
- }
- p[n][i]=maxprob+Math.log(emit[i][seq[n]]);
- }
- }
-
- maxprob=p[seq.length-1][0];
- int maxIdx=0;
- for(int i=1;i<trans.length;i++){
- if(p[seq.length-1][i]>maxprob){
- maxprob=p[seq.length-1][i];
- maxIdx=i;
- }
- }
- int ans[]=new int [seq.length];
- ans[seq.length-1]=maxIdx;
- for(int i=seq.length-2;i>=0;i--){
- ans[i]=backp[i+1][ans[i+1]];
- }
- return ans;
- }
-
- public double l1norm(double a[]){
- double norm=0;
- for(int i=0;i<a.length;i++){
- norm += a[i];
- }
- return norm;
- }
-
- public double [][]getEmitProb(){
- return emit;
- }
-
- public int [] sample(int terminalSym){
- ArrayList<Integer > s=new ArrayList<Integer>();
- int state=sample(pi);
- int sym=sample(emit[state]);
- while(sym!=terminalSym){
- s.add(sym);
- state=sample(trans[state]);
- sym=sample(emit[state]);
- }
-
- int ans[]=new int [s.size()];
- for(int i=0;i<ans.length;i++){
- ans[i]=s.get(i);
- }
- return ans;
- }
-
- public int sample(double p[]){
- double r=Math.random();
- double sum=0;
- for(int i=0;i<p.length;i++){
- sum+=p[i];
- if(sum>=r){
- return i;
- }
- }
- return p.length-1;
- }
-
- public void train(int tagdata[][]){
- double trans_exp_cnt[][]=new double [trans.length][trans.length];
- double emit_exp_cnt[][]=new double[trans.length][emit[0].length];
- double start_exp_cnt[]=new double[trans.length];
-
- for(int i=0;i<tagdata.length;i++){
- start_exp_cnt[tagdata[i][0]]++;
-
- for(int j=0;j<tagdata[i].length;j++){
- if(j+1<tagdata[i].length){
- trans_exp_cnt[ tagdata[i][j] ] [ tagdata[i][j+1] ]++;
- }
- emit_exp_cnt[tagdata[i][j]][data[i][j]]++;
- }
-
- }
-
- //M
- addOneSmooth(emit_exp_cnt);
- for(int i=0;i<trans.length;i++){
-
- //transition probs
- double sum=0;
- for(int j=0;j<trans.length;j++){
- sum+=trans_exp_cnt[i][j];
- }
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<trans[i].length;j++){
- trans[i][j]=trans_exp_cnt[i][j]/sum;
- }
-
- //emission probs
-
- sum=0;
- for(int j=0;j<emit[i].length;j++){
- sum+=emit_exp_cnt[i][j];
- }
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<emit[i].length;j++){
- emit[i][j]=emit_exp_cnt[i][j]/sum;
- }
-
-
- //initial probs
- for(int j=0;j<pi.length;j++){
- pi[j]=start_exp_cnt[j];
- }
- l1normalize(pi);
- }
- }
-
- private void addOneSmooth(double a[][]){
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- a[i][j]+=0.01;
- }
- //l1normalize(a[i]);
- }
- }
-
- public void PREM(){
-
- o.optimizeWithProjectedGradientDescent();
-
- double trans_exp_cnt[][]=new double [trans.length][trans.length];
- double emit_exp_cnt[][]=new double[trans.length][emit[0].length];
- double start_exp_cnt[]=new double[trans.length];
-
- o.loglikelihood=0;
- //E
- for(int sentNum=0;sentNum<data.length;sentNum++){
-
- double [][][] post=o.forwardBackward(sentNum);
- incrementExpCnt(post, data[sentNum],
- trans_exp_cnt,
- emit_exp_cnt,
- start_exp_cnt);
-
-
- if(sentNum%100==0){
- System.out.print(".");
- }
- if(sentNum%1000==0){
- System.out.println(sentNum);
- }
-
- }
-
- System.out.println("Log likelihood: "+o.getValue());
-
- //M
- addOneSmooth(emit_exp_cnt);
- for(int i=0;i<trans.length;i++){
-
- //transition probs
- double sum=0;
- for(int j=0;j<trans.length;j++){
- sum+=trans_exp_cnt[i][j];
- }
- //avoid NAN
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<trans[i].length;j++){
- trans[i][j]=trans_exp_cnt[i][j]/sum;
- }
-
- //emission probs
-
- sum=0;
- for(int j=0;j<emit[i].length;j++){
- sum+=emit_exp_cnt[i][j];
- }
- //avoid NAN
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<emit[i].length;j++){
- emit[i][j]=emit_exp_cnt[i][j]/sum;
- }
-
-
- //initial probs
- for(int j=0;j<pi.length;j++){
- pi[j]=start_exp_cnt[j];
- }
- l1normalize(pi);
- }
-
- }
-
- public void computeMaxwt(double[][]maxwt, int[][] d){
-
- for(int sentNum=0;sentNum<d.length;sentNum++){
- double post[][][]=forwardBackward(d[sentNum]);
-
- for(int n=0;n<post.length;n++){
- for(int i=0;i<trans.length;i++){
- double py=0;
- for(int j=0;j<trans.length;j++){
- py+=post[n][i][j];
- }
-
- if(py>maxwt[i][d[sentNum][n]]){
- maxwt[i][d[sentNum][n]]=py;
- }
-
- }
- }
-
- //the last state
- int len=post.length;
- for(int i=0;i<trans.length;i++){
- double py=0;
- for(int j=0;j<trans.length;j++){
- py+=post[len-2][j][i];
- }
-
- if(py>maxwt[i][d[sentNum][len-1]]){
- maxwt[i][d[sentNum][len-1]]=py;
- }
-
- }
-
- }
-
- }
-
-}//end of class
diff --git a/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java b/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java deleted file mode 100644 index 70b6c966..00000000 --- a/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java +++ /dev/null @@ -1,351 +0,0 @@ -package hmm;
-
-import gnu.trove.TIntArrayList;
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-
-public class HMMObjective extends ProjectedObjective{
-
-
- private static final double GRAD_DIFF = 3;
- public static double INIT_STEP_SIZE=10;
- public static double VAL_DIFF=1000;
-
- private HMM hmm;
- double[] newPoint ;
-
- //posterior[sent num][tok num][tag]=index into lambda
- private int posteriorMap[][][];
- //projection[word][tag].get(occurence)=index into lambda
- private TIntArrayList projectionMap[][];
-
- //Size of the simplex
- public double scale=10;
- private SimplexProjection projection;
-
- private int wordFreq[];
- private static int MIN_FREQ=10;
- private int numWordsToProject=0;
-
- private int n_param;
-
- public double loglikelihood;
-
- public HMMObjective(HMM h){
- hmm=h;
-
- countWords();
- buildMap();
-
- gradient=new double [n_param];
- projection = new SimplexProjection(scale);
- newPoint = new double[n_param];
- setInitialParameters(new double[n_param]);
-
- }
-
- /**@brief counts word frequency in the corpus
- *
- */
- private void countWords(){
- wordFreq=new int [hmm.emit[0].length];
- for(int i=0;i<hmm.data.length;i++){
- for(int j=0;j<hmm.data[i].length;j++){
- wordFreq[hmm.data[i][j]]++;
- }
- }
- }
-
- /**@brief build posterior and projection indices
- *
- */
- private void buildMap(){
- //number of sentences hidden states and words
- int n_states=hmm.trans.length;
- int n_words=hmm.emit[0].length;
- int n_sents=hmm.data.length;
-
- n_param=0;
- posteriorMap=new int[n_sents][][];
- projectionMap=new TIntArrayList[n_words][];
- for(int sentNum=0;sentNum<n_sents;sentNum++){
- int [] data=hmm.data[sentNum];
- posteriorMap[sentNum]=new int[data.length][n_states];
- numWordsToProject=0;
- for(int i=0;i<data.length;i++){
- int word=data[i];
- for(int state=0;state<n_states;state++){
- if(wordFreq[word]>MIN_FREQ){
- if(projectionMap[word]==null){
- projectionMap[word]=new TIntArrayList[n_states];
- }
- // if(posteriorMap[sentNum][i]==null){
- // posteriorMap[sentNum][i]=new int[n_states];
- // }
-
- posteriorMap[sentNum][i][state]=n_param;
- if(projectionMap[word][state]==null){
- projectionMap[word][state]=new TIntArrayList();
- numWordsToProject++;
- }
- projectionMap[word][state].add(n_param);
- n_param++;
- }
- else{
- posteriorMap[sentNum][i][state]=-1;
- }
- }
- }
- }
- }
-
- @Override
- public double[] projectPoint(double[] point) {
- // TODO Auto-generated method stub
- for(int i=0;i<projectionMap.length;i++){
-
- if(projectionMap[i]==null){
- //this word is not constrained
- continue;
- }
-
- for(int j=0;j<projectionMap[i].length;j++){
- TIntArrayList instances=projectionMap[i][j];
- double[] toProject = new double[instances.size()];
-
- for (int k = 0; k < toProject.length; k++) {
- // System.out.print(instances.get(k) + " ");
- toProject[k] = point[instances.get(k)];
- }
-
- projection.project(toProject);
- for (int k = 0; k < toProject.length; k++) {
- newPoint[instances.get(k)]=toProject[k];
- }
- }
- }
- return newPoint;
- }
-
- @Override
- public double[] getGradient() {
- // TODO Auto-generated method stub
- gradientCalls++;
- return gradient;
- }
-
- @Override
- public double getValue() {
- // TODO Auto-generated method stub
- functionCalls++;
- return loglikelihood;
- }
-
-
- @Override
- public String toString() {
- // TODO Auto-generated method stub
- StringBuffer sb = new StringBuffer();
- for (int i = 0; i < parameters.length; i++) {
- sb.append(parameters[i]+" ");
- if(i%100==0){
- sb.append("\n");
- }
- }
- sb.append("\n");
- /*
- for (int i = 0; i < gradient.length; i++) {
- sb.append(gradient[i]+" ");
- if(i%100==0){
- sb.append("\n");
- }
- }
- sb.append("\n");
- */
- return sb.toString();
- }
-
-
- /**
- * @param seq
- * @return posterior probability of each transition
- */
- public double [][][]forwardBackward(int sentNum){
- int [] seq=hmm.data[sentNum];
- int n_states=hmm.trans.length;
- double a[][]=new double [seq.length][n_states];
- double b[][]=new double [seq.length][n_states];
-
- int len=seq.length;
-
- boolean constrained=
- (projectionMap[seq[0]]!=null);
-
- //initialize the first step
- for(int i=0;i<n_states;i++){
- a[0][i]=hmm.emit[i][seq[0]]*hmm.pi[i];
- if(constrained){
- a[0][i]*=
- Math.exp(- parameters[ posteriorMap[sentNum][0][i] ] );
- }
- b[len-1][i]=1;
- }
-
- loglikelihood+=Math.log(hmm.l1norm(a[0]));
- hmm.l1normalize(a[0]);
- hmm.l1normalize(b[len-1]);
-
- //forward
- for(int n=1;n<len;n++){
-
- constrained=
- (projectionMap[seq[n]]!=null);
-
- for(int i=0;i<n_states;i++){
- for(int j=0;j<n_states;j++){
- a[n][i]+=hmm.trans[j][i]*a[n-1][j];
- }
- a[n][i]*=hmm.emit[i][seq[n]];
-
- if(constrained){
- a[n][i]*=
- Math.exp(- parameters[ posteriorMap[sentNum][n][i] ] );
- }
-
- }
- loglikelihood+=Math.log(hmm.l1norm(a[n]));
- hmm.l1normalize(a[n]);
- }
-
- //temp variable for e^{-\lambda}
- double factor=1;
- //backward
- for(int n=len-2;n>=0;n--){
-
- constrained=
- (projectionMap[seq[n+1]]!=null);
-
- for(int i=0;i<n_states;i++){
- for(int j=0;j<n_states;j++){
-
- if(constrained){
- factor=
- Math.exp(- parameters[ posteriorMap[sentNum][n+1][j] ] );
- }else{
- factor=1;
- }
-
- b[n][i]+=hmm.trans[i][j]*b[n+1][j]*hmm.emit[j][seq[n+1]]*factor;
-
- }
- }
- hmm.l1normalize(b[n]);
- }
-
- //expected transition
- double p[][][]=new double [seq.length][n_states][n_states];
- for(int n=0;n<len-1;n++){
-
- constrained=
- (projectionMap[seq[n+1]]!=null);
-
- for(int i=0;i<n_states;i++){
- for(int j=0;j<n_states;j++){
-
- if(constrained){
- factor=
- Math.exp(- parameters[ posteriorMap[sentNum][n+1][j] ] );
- }else{
- factor=1;
- }
-
- p[n][i][j]=a[n][i]*hmm.trans[i][j]*
- hmm.emit[j][seq[n+1]]*b[n+1][j]*factor;
-
- }
- }
-
- hmm.l1normalize(p[n]);
- }
- return p;
- }
-
- public void optimizeWithProjectedGradientDescent(){
- LineSearchMethod ls =
- new ArmijoLineSearchMinimizationAlongProjectionArc
- (new InterpolationPickFirstStep(INIT_STEP_SIZE));
-
- OptimizerStats stats = new OptimizerStats();
-
-
- ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
- StopingCriteria stopGrad = new ProjectedGradientL2Norm(GRAD_DIFF);
- StopingCriteria stopValue = new ValueDifference(VAL_DIFF);
- CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
- compositeStop.add(stopGrad);
- compositeStop.add(stopValue);
-
- optimizer.setMaxIterations(10);
- updateFunction();
- boolean succed = optimizer.optimize(this,stats,compositeStop);
- System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
- if(succed){
- System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
- }else{
- System.out.println("Failed to optimize");
- }
- }
-
- @Override
- public void setParameters(double[] params) {
- super.setParameters(params);
- updateFunction();
- }
-
- private void updateFunction(){
-
- updateCalls++;
- loglikelihood=0;
-
- for(int sentNum=0;sentNum<hmm.data.length;sentNum++){
- double [][][]p=forwardBackward(sentNum);
-
- for(int n=0;n<p.length-1;n++){
- for(int i=0;i<p[n].length;i++){
- if(projectionMap[hmm.data[sentNum][n]]!=null){
- double posterior=0;
- for(int j=0;j<p[n][i].length;j++){
- posterior+=p[n][i][j];
- }
- gradient[posteriorMap[sentNum][n][i]]=-posterior;
- }
- }
- }
-
- //the last state
- int n=p.length-2;
- for(int i=0;i<p[n].length;i++){
- if(projectionMap[hmm.data[sentNum][n+1]]!=null){
-
- double posterior=0;
- for(int j=0;j<p[n].length;j++){
- posterior+=p[n][j][i];
- }
- gradient[posteriorMap[sentNum][n+1][i]]=-posterior;
-
- }
- }
- }
-
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/hmm/POS.java b/gi/posterior-regularisation/prjava/src/hmm/POS.java deleted file mode 100644 index bdcbc683..00000000 --- a/gi/posterior-regularisation/prjava/src/hmm/POS.java +++ /dev/null @@ -1,120 +0,0 @@ -package hmm;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.HashMap;
-
-import data.Corpus;
-
-public class POS {
-
- //public String trainFilename="../posdata/en_train.conll";
- public static String trainFilename="../posdata/small_train.txt";
-// public static String trainFilename="../posdata/en_test.conll";
-// public static String trainFilename="../posdata/trial1.txt";
-
- public static String testFilename="../posdata/en_test.conll";
- //public static String testFilename="../posdata/trial1.txt";
-
- public static String predFilename="../posdata/en_test.predict.conll";
- public static String modelFilename="../posdata/posModel.out";
- public static final int ITER=20;
- public static final int N_STATE=30;
-
- public static void main(String[] args) {
- //POS p=new POS();
- //POS p=new POS(true);
- try {
- PRPOS();
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
-
- public POS() throws FileNotFoundException, IOException{
- Corpus c= new Corpus(trainFilename);
- //size of vocabulary +1 for unknown tokens
- HMM hmm =new HMM(N_STATE, c.getVocabSize()+1,c.getAllData());
- for(int i=0;i<ITER;i++){
- System.out.println("Iter"+i);
- hmm.EM();
- if((i+1)%10==0){
- hmm.writeModel(modelFilename+i);
- }
- }
-
- hmm.writeModel(modelFilename);
-
- Corpus test=new Corpus(testFilename,c.vocab);
-
- PrintStream ps= io.FileUtil.printstream(new File(predFilename));
-
- int [][]data=test.getAllData();
- for(int i=0;i<data.length;i++){
- int []tag=hmm.viterbi(data[i]);
- String sent[]=test.get(i);
- for(int j=0;j<data[i].length;j++){
- ps.println(sent[j]+"\t"+tag[j]);
- }
- ps.println();
- }
- ps.close();
- }
-
- //POS induction with L1/Linf constraints
- public static void PRPOS() throws FileNotFoundException, IOException{
- Corpus c= new Corpus(trainFilename);
- //size of vocabulary +1 for unknown tokens
- HMM hmm =new HMM(N_STATE, c.getVocabSize()+1,c.getAllData());
- hmm.o=new HMMObjective(hmm);
- for(int i=0;i<ITER;i++){
- System.out.println("Iter: "+i);
- hmm.PREM();
- if((i+1)%10==0){
- hmm.writeModel(modelFilename+i);
- }
- }
-
- hmm.writeModel(modelFilename);
- }
-
-
- public POS(boolean supervised) throws FileNotFoundException, IOException{
- Corpus c= new Corpus(trainFilename);
- //size of vocabulary +1 for unknown tokens
- HMM hmm =new HMM(c.tagVocab.size() , c.getVocabSize()+1,c.getAllData());
- hmm.train(c.getTagData());
-
- hmm.writeModel(modelFilename);
-
- Corpus test=new Corpus(testFilename,c.vocab);
-
- HashMap<String, Integer>tagVocab=
- (HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(Corpus.tagalphaFilename);
- String [] tagdict=new String [tagVocab.size()+1];
- for(String key:tagVocab.keySet()){
- tagdict[tagVocab.get(key)]=key;
- }
- tagdict[tagdict.length-1]=Corpus.UNK_TOK;
-
- System.out.println(c.vocab.get("<e>"));
-
- PrintStream ps= io.FileUtil.printstream(new File(predFilename));
-
- int [][]data=test.getAllData();
- for(int i=0;i<data.length;i++){
- int []tag=hmm.viterbi(data[i]);
- String sent[]=test.get(i);
- for(int j=0;j<data[i].length;j++){
- ps.println(sent[j]+"\t"+tagdict[tag[j]]);
- }
- ps.println();
- }
- ps.close();
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/io/FileUtil.java b/gi/posterior-regularisation/prjava/src/io/FileUtil.java deleted file mode 100644 index 6720d087..00000000 --- a/gi/posterior-regularisation/prjava/src/io/FileUtil.java +++ /dev/null @@ -1,48 +0,0 @@ -package io;
-import java.util.*;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
-import java.io.*;
-public class FileUtil
-{
- public static BufferedReader reader(File file) throws FileNotFoundException, IOException
- {
- if (file.getName().endsWith(".gz"))
- return new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file)), "UTF8"));
- else
- return new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF8"));
- }
-
- public static PrintStream printstream(File file) throws FileNotFoundException, IOException
- {
- if (file.getName().endsWith(".gz"))
- return new PrintStream(new GZIPOutputStream(new FileOutputStream(file)), true, "UTF8");
- else
- return new PrintStream(new FileOutputStream(file), true, "UTF8");
- }
-
- public static Scanner openInFile(String filename)
- {
- Scanner localsc=null;
- try
- {
- localsc=new Scanner(new FileInputStream(filename), "UTF8");
-
- }catch(IOException ioe){
- System.out.println(ioe.getMessage());
- }
- return localsc;
- }
-
- public static FileInputStream openInputStream(String infilename)
- {
- FileInputStream fis=null;
- try {
- fis = new FileInputStream(infilename);
-
- } catch (IOException ioe) {
- System.out.println(ioe.getMessage());
- }
- return fis;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/io/SerializedObjects.java b/gi/posterior-regularisation/prjava/src/io/SerializedObjects.java deleted file mode 100644 index d1631b51..00000000 --- a/gi/posterior-regularisation/prjava/src/io/SerializedObjects.java +++ /dev/null @@ -1,83 +0,0 @@ -package io; - - - -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.ObjectInput; -import java.io.ObjectInputStream; -import java.io.ObjectOutput; -import java.io.ObjectOutputStream; -import java.io.OutputStream; - -public class SerializedObjects -{ - public static void writeSerializedObject(Object object, String outFile) - { - ObjectOutput output = null; - try{ - //use buffering - OutputStream file = new FileOutputStream(outFile); - OutputStream buffer = new BufferedOutputStream( file ); - output = new ObjectOutputStream( buffer ); - output.writeObject(object); - buffer.close(); - file.close(); - } - catch(IOException ex){ - ex.printStackTrace(); - } - finally{ - try { - if (output != null) { - //flush and close "output" and its underlying streams - output.close(); - } - } - catch (IOException ex ){ - ex.printStackTrace(); - } - } - } - - public static Object readSerializedObject(String inputFile) - { - ObjectInput input = null; - Object recoveredObject=null; - try{ - //use buffering - InputStream file = new FileInputStream(inputFile); - InputStream buffer = new BufferedInputStream(file); - input = new ObjectInputStream(buffer); - //deserialize the List - recoveredObject = input.readObject(); - } - catch(IOException ex){ - ex.printStackTrace(); - } - catch (ClassNotFoundException ex){ - ex.printStackTrace(); - } - catch(Exception ex) - { - ex.printStackTrace(); - } - finally{ - try { - if ( input != null ) { - //close "input" and its underlying streams - input.close(); - } - } - catch (IOException ex){ - ex.printStackTrace(); - } - } - return recoveredObject; - } - -}
\ No newline at end of file diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java b/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java deleted file mode 100644 index 25fa7f09..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java +++ /dev/null @@ -1,110 +0,0 @@ -package optimization.examples; - - -import optimization.gradientBasedMethods.ConjugateGradient; -import optimization.gradientBasedMethods.GradientDescent; -import optimization.gradientBasedMethods.LBFGS; -import optimization.gradientBasedMethods.Objective; -import optimization.gradientBasedMethods.Optimizer; -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.linesearch.ArmijoLineSearchMinimization; -import optimization.linesearch.LineSearchMethod; -import optimization.stopCriteria.GradientL2Norm; -import optimization.stopCriteria.StopingCriteria; -import optimization.util.MathUtils; - -/** - * - * @author javg - * f(x) = \sum_{i=1}^{N-1} \left[ (1-x_i)^2+ 100 (x_{i+1} - x_i^2 )^2 \right] \quad \forall x\in\mathbb{R}^N. - */ -public class GeneralizedRosenbrock extends Objective{ - - - - public GeneralizedRosenbrock(int dimensions){ - parameters = new double[dimensions]; - java.util.Arrays.fill(parameters, 0); - gradient = new double[dimensions]; - - } - - public GeneralizedRosenbrock(int dimensions, double[] params){ - parameters = params; - gradient = new double[dimensions]; - } - - - public double getValue() { - functionCalls++; - double value = 0; - for(int i = 0; i < parameters.length-1; i++){ - value += MathUtils.square(1-parameters[i]) + 100*MathUtils.square(parameters[i+1] - MathUtils.square(parameters[i])); - } - - return value; - } - - /** - * gx = -2(1-x) -2x200(y-x^2) - * gy = 200(y-x^2) - */ - public double[] getGradient() { - gradientCalls++; - java.util.Arrays.fill(gradient,0); - for(int i = 0; i < parameters.length-1; i++){ - gradient[i]+=-2*(1-parameters[i]) - 400*parameters[i]*(parameters[i+1] - MathUtils.square(parameters[i])); - gradient[i+1]+=200*(parameters[i+1] - MathUtils.square(parameters[i])); - } - return gradient; - } - - - - - - - - public String toString(){ - String res =""; - for(int i = 0; i < parameters.length; i++){ - res += "P" + i+ " " + parameters[i]; - } - res += " Value " + getValue(); - return res; - } - - public static void main(String[] args) { - - GeneralizedRosenbrock o = new GeneralizedRosenbrock(2); - System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); - ; - - System.out.println("Doing Gradient descent"); - //LineSearchMethod wolfe = new WolfRuleLineSearch(new InterpolationPickFirstStep(1),100,0.001,0.1); - StopingCriteria stop = new GradientL2Norm(0.001); - LineSearchMethod ls = new ArmijoLineSearchMinimization(); - Optimizer optimizer = new GradientDescent(ls); - OptimizerStats stats = new OptimizerStats(); - optimizer.setMaxIterations(1000); - boolean succed = optimizer.optimize(o,stats, stop); - System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1)); - System.out.println("Doing Conjugate Gradient descent"); - o = new GeneralizedRosenbrock(2); - // wolfe = new WolfRuleLineSearch(new InterpolationPickFirstStep(1),100,0.001,0.1); - optimizer = new ConjugateGradient(ls); - stats = new OptimizerStats(); - optimizer.setMaxIterations(1000); - succed = optimizer.optimize(o,stats,stop); - System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1)); - System.out.println("Doing Quasi newton descent"); - o = new GeneralizedRosenbrock(2); - optimizer = new LBFGS(ls,10); - stats = new OptimizerStats(); - optimizer.setMaxIterations(1000); - succed = optimizer.optimize(o,stats,stop); - System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1)); - - } - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java deleted file mode 100644 index f087681e..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java +++ /dev/null @@ -1,128 +0,0 @@ -package optimization.examples; - - -import optimization.gradientBasedMethods.ConjugateGradient; - -import optimization.gradientBasedMethods.GradientDescent; -import optimization.gradientBasedMethods.LBFGS; -import optimization.gradientBasedMethods.Objective; -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.linesearch.GenericPickFirstStep; -import optimization.linesearch.LineSearchMethod; -import optimization.linesearch.WolfRuleLineSearch; -import optimization.stopCriteria.GradientL2Norm; -import optimization.stopCriteria.StopingCriteria; - - -/** - * @author javg - * - */ -public class x2y2 extends Objective{ - - - //Implements function ax2+ by2 - double a, b; - public x2y2(double a, double b){ - this.a = a; - this.b = b; - parameters = new double[2]; - parameters[0] = 4; - parameters[1] = 4; - gradient = new double[2]; - } - - public double getValue() { - functionCalls++; - return a*parameters[0]*parameters[0]+b*parameters[1]*parameters[1]; - } - - public double[] getGradient() { - gradientCalls++; - gradient[0]=2*a*parameters[0]; - gradient[1]=2*b*parameters[1]; - return gradient; -// if(debugLevel >=2){ -// double[] numericalGradient = DebugHelpers.getNumericalGradient(this, parameters, 0.000001); -// for(int i = 0; i < parameters.length; i++){ -// double diff = Math.abs(gradient[i]-numericalGradient[i]); -// if(diff > 0.00001){ -// System.out.println("Numerical Gradient does not match"); -// System.exit(1); -// } -// } -// } - } - - - - public void optimizeWithGradientDescent(LineSearchMethod ls, OptimizerStats stats, x2y2 o){ - GradientDescent optimizer = new GradientDescent(ls); - StopingCriteria stop = new GradientL2Norm(0.001); -// optimizer.setGradientConvergenceValue(0.001); - optimizer.setMaxIterations(100); - boolean succed = optimizer.optimize(o,stats,stop); - System.out.println("Ended optimzation Gradient Descent\n" + stats.prettyPrint(1)); - System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); - if(succed){ - System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); - }else{ - System.out.println("Failed to optimize"); - } - } - - public void optimizeWithConjugateGradient(LineSearchMethod ls, OptimizerStats stats, x2y2 o){ - ConjugateGradient optimizer = new ConjugateGradient(ls); - StopingCriteria stop = new GradientL2Norm(0.001); - - optimizer.setMaxIterations(10); - boolean succed = optimizer.optimize(o,stats,stop); - System.out.println("Ended optimzation Conjugate Gradient\n" + stats.prettyPrint(1)); - System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); - if(succed){ - System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); - }else{ - System.out.println("Failed to optimize"); - } - } - - public void optimizeWithLBFGS(LineSearchMethod ls, OptimizerStats stats, x2y2 o){ - LBFGS optimizer = new LBFGS(ls,10); - StopingCriteria stop = new GradientL2Norm(0.001); - optimizer.setMaxIterations(10); - boolean succed = optimizer.optimize(o,stats,stop); - System.out.println("Ended optimzation LBFGS\n" + stats.prettyPrint(1)); - System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); - if(succed){ - System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); - }else{ - System.out.println("Failed to optimize"); - } - } - - public static void main(String[] args) { - x2y2 o = new x2y2(1,10); - System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); - o.setDebugLevel(4); - LineSearchMethod wolfe = new WolfRuleLineSearch(new GenericPickFirstStep(1),0.001,0.9);; -// LineSearchMethod ls = new ArmijoLineSearchMinimization(); - OptimizerStats stats = new OptimizerStats(); - o.optimizeWithGradientDescent(wolfe, stats, o); - o = new x2y2(1,10); - System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); -// ls = new ArmijoLineSearchMinimization(); - stats = new OptimizerStats(); - o.optimizeWithConjugateGradient(wolfe, stats, o); - o = new x2y2(1,10); - System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); -// ls = new ArmijoLineSearchMinimization(); - stats = new OptimizerStats(); - o.optimizeWithLBFGS(wolfe, stats, o); - } - - public String toString(){ - return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue(); - } - - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java deleted file mode 100644 index 391775b7..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java +++ /dev/null @@ -1,127 +0,0 @@ -package optimization.examples; - - -import optimization.gradientBasedMethods.ProjectedGradientDescent; -import optimization.gradientBasedMethods.ProjectedObjective; -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc; -import optimization.linesearch.InterpolationPickFirstStep; -import optimization.linesearch.LineSearchMethod; -import optimization.projections.BoundsProjection; -import optimization.projections.Projection; -import optimization.projections.SimplexProjection; -import optimization.stopCriteria.CompositeStopingCriteria; -import optimization.stopCriteria.GradientL2Norm; -import optimization.stopCriteria.ProjectedGradientL2Norm; -import optimization.stopCriteria.StopingCriteria; -import optimization.stopCriteria.ValueDifference; - - -/** - * @author javg - * - * - *ax2+ b(y2 -displacement) - */ -public class x2y2WithConstraints extends ProjectedObjective{ - - - double a, b; - double dx; - double dy; - Projection projection; - - - public x2y2WithConstraints(double a, double b, double[] params, double dx, double dy, Projection proj){ - //projection = new BoundsProjection(0.2,Double.MAX_VALUE); - super(); - projection = proj; - this.a = a; - this.b = b; - this.dx = dx; - this.dy = dy; - setInitialParameters(params); - System.out.println("Function " +a+"(x-"+dx+")^2 + "+b+"(y-"+dy+")^2"); - System.out.println("Gradient " +(2*a)+"(x-"+dx+") ; "+(b*2)+"(y-"+dy+")"); - printParameters(); - projection.project(parameters); - printParameters(); - gradient = new double[2]; - } - - public double getValue() { - functionCalls++; - return a*(parameters[0]-dx)*(parameters[0]-dx)+b*((parameters[1]-dy)*(parameters[1]-dy)); - } - - public double[] getGradient() { - if(gradient == null){ - gradient = new double[2]; - } - gradientCalls++; - gradient[0]=2*a*(parameters[0]-dx); - gradient[1]=2*b*(parameters[1]-dy); - return gradient; - } - - - public double[] projectPoint(double[] point) { - double[] newPoint = point.clone(); - projection.project(newPoint); - return newPoint; - } - - public void optimizeWithProjectedGradientDescent(LineSearchMethod ls, OptimizerStats stats, x2y2WithConstraints o){ - ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls); - StopingCriteria stopGrad = new ProjectedGradientL2Norm(0.001); - StopingCriteria stopValue = new ValueDifference(0.001); - CompositeStopingCriteria compositeStop = new CompositeStopingCriteria(); - compositeStop.add(stopGrad); - compositeStop.add(stopValue); - - optimizer.setMaxIterations(5); - boolean succed = optimizer.optimize(o,stats,compositeStop); - System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1)); - System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); - if(succed){ - System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); - }else{ - System.out.println("Failed to optimize"); - } - } - - - - public String toString(){ - - return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue() + " grad (" + getGradient()[0] + ":" + getGradient()[1]+")"; - } - - public static void main(String[] args) { - double a = 1; - double b=1; - double x0 = 0; - double y0 =1; - double dx = 0.5; - double dy = 0.5 ; - double [] parameters = new double[2]; - parameters[0] = x0; - parameters[1] = y0; - x2y2WithConstraints o = new x2y2WithConstraints(a,b,parameters,dx,dy, new SimplexProjection(0.5)); - System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1] + " a " + a + " b "+b ); - o.setDebugLevel(4); - - LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1)); - - OptimizerStats stats = new OptimizerStats(); - o.optimizeWithProjectedGradientDescent(ls, stats, o); - -// o = new x2y2WithConstraints(a,b,x0,y0,dx,dy); -// stats = new OptimizerStats(); -// o.optimizeWithSpectralProjectedGradientDescent(stats, o); - } - - - - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java deleted file mode 100644 index 2fcb7990..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java +++ /dev/null @@ -1,120 +0,0 @@ -package optimization.gradientBasedMethods; - -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.linesearch.DifferentiableLineSearchObjective; -import optimization.linesearch.LineSearchMethod; -import optimization.stopCriteria.StopingCriteria; -import optimization.util.MathUtils; - -/** - * - * @author javg - * - */ -public abstract class AbstractGradientBaseMethod implements Optimizer{ - - protected int maxNumberOfIterations=10000; - - - - protected int currentProjectionIteration; - protected double currValue; - protected double previousValue = Double.MAX_VALUE;; - protected double step; - protected double[] gradient; - public double[] direction; - - //Original values - protected double originalGradientL2Norm; - - protected LineSearchMethod lineSearch; - DifferentiableLineSearchObjective lso; - - - public void reset(){ - direction = null; - gradient = null; - previousValue = Double.MAX_VALUE; - currentProjectionIteration = 0; - originalGradientL2Norm = 0; - step = 0; - currValue = 0; - } - - public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){ - lso = new DifferentiableLineSearchObjective(o); - } - public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){ - } - - public void updateStructuresAfterStep(Objective o,OptimizerStats stats, StopingCriteria stop){ - } - - public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){ - //Initialize structures - - stats.collectInitStats(this, o); - direction = new double[o.getNumParameters()]; - initializeStructures(o, stats, stop); - for (currentProjectionIteration = 1; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){ - //System.out.println("\tgradient descent iteration " + currentProjectionIteration); - //System.out.print("\tparameters:" ); - //o.printParameters(); - previousValue = currValue; - currValue = o.getValue(); - gradient = o.getGradient(); - if(stop.stopOptimization(o)){ - stats.collectFinalStats(this, o); - return true; - } - - getDirection(); - if(MathUtils.dotProduct(gradient, direction) > 0){ - System.out.println("Not a descent direction"); - System.out.println(" current stats " + stats.prettyPrint(1)); - System.exit(-1); - } - updateStructuresBeforeStep(o, stats, stop); - lso.reset(direction); - step = lineSearch.getStepSize(lso); - //System.out.println("\t\tLeave with step: " + step); - if(step==-1){ - System.out.println("Failed to find step"); - stats.collectFinalStats(this, o); - return false; - } - updateStructuresAfterStep( o, stats, stop); -// previousValue = currValue; -// currValue = o.getValue(); -// gradient = o.getGradient(); - stats.collectIterationStats(this, o); - } - stats.collectFinalStats(this, o); - return false; - } - - - public int getCurrentIteration() { - return currentProjectionIteration; - } - - - /** - * Method specific - */ - public abstract double[] getDirection(); - - public double getCurrentStep() { - return step; - } - - - - public void setMaxIterations(int max) { - maxNumberOfIterations = max; - } - - public double getCurrentValue() { - return currValue; - } -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java deleted file mode 100644 index 28295729..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java +++ /dev/null @@ -1,92 +0,0 @@ -package optimization.gradientBasedMethods; - -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.linesearch.DifferentiableLineSearchObjective; -import optimization.linesearch.LineSearchMethod; -import optimization.stopCriteria.StopingCriteria; -import optimization.util.MathUtils; - - - -public class ConjugateGradient extends AbstractGradientBaseMethod{ - - - double[] previousGradient; - double[] previousDirection; - - public ConjugateGradient(LineSearchMethod lineSearch) { - this.lineSearch = lineSearch; - } - - public void reset(){ - super.reset(); - java.util.Arrays.fill(previousDirection, 0); - java.util.Arrays.fill(previousGradient, 0); - } - - public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){ - super.initializeStructures(o, stats, stop); - previousGradient = new double[o.getNumParameters()]; - previousDirection = new double[o.getNumParameters()]; - } - public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){ - System.arraycopy(gradient, 0, previousGradient, 0, gradient.length); - System.arraycopy(direction, 0, previousDirection, 0, direction.length); - } - -// public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){ -// DifferentiableLineSearchObjective lso = new DifferentiableLineSearchObjective(o); -// stats.collectInitStats(this, o); -// direction = new double[o.getNumParameters()]; -// initializeStructures(o, stats, stop); -// for (currentProjectionIteration = 0; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){ -// previousValue = currValue; -// currValue = o.getValue(); -// gradient =o.getGradient(); -// if(stop.stopOptimization(gradient)){ -// stats.collectFinalStats(this, o); -// return true; -// } -// getDirection(); -// updateStructures(o, stats, stop); -// lso.reset(direction); -// step = lineSearch.getStepSize(lso); -// if(step==-1){ -// System.out.println("Failed to find a step size"); -// System.out.println("Failed to find step"); -// stats.collectFinalStats(this, o); -// return false; -// } -// -// stats.collectIterationStats(this, o); -// } -// stats.collectFinalStats(this, o); -// return false; -// } - - public double[] getDirection(){ - direction = MathUtils.negation(gradient); - if(currentProjectionIteration != 1){ - //Using Polak-Ribiere method (book equation 5.45) - double b = MathUtils.dotProduct(gradient, MathUtils.arrayMinus(gradient, previousGradient)) - /MathUtils.dotProduct(previousGradient, previousGradient); - if(b<0){ - System.out.println("Defaulting to gradient descent"); - b = Math.max(b, 0); - } - MathUtils.plusEquals(direction, previousDirection, b); - //Debug code - if(MathUtils.dotProduct(direction, gradient) > 0){ - System.out.println("Not an descent direction reseting to gradien"); - direction = MathUtils.negation(gradient); - } - } - return direction; - } - - - - - - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java deleted file mode 100644 index 6dc4ef6c..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java +++ /dev/null @@ -1,65 +0,0 @@ -package optimization.gradientBasedMethods; - -import java.util.ArrayList; - -import optimization.util.MathUtils; - - - -public class DebugHelpers { - public static void getLineSearchGraph(Objective o, double[] direction, - double[] parameters, double originalObj, - double originalDot, double c1, double c2){ - ArrayList<Double> stepS = new ArrayList<Double>(); - ArrayList<Double> obj = new ArrayList<Double>(); - ArrayList<Double> norm = new ArrayList<Double>(); - double[] gradient = new double[o.getNumParameters()]; - double[] newParameters = parameters.clone(); - MathUtils.plusEquals(newParameters,direction,0); - o.setParameters(newParameters); - double minValue = o.getValue(); - int valuesBiggerThanMax = 0; - for(double step = 0; step < 2; step +=0.01 ){ - newParameters = parameters.clone(); - MathUtils.plusEquals(newParameters,direction,step); - o.setParameters(newParameters); - double newValue = o.getValue(); - gradient = o.getGradient(); - double newgradDirectionDot = MathUtils.dotProduct(gradient,direction); - stepS.add(step); - obj.add(newValue); - norm.add(newgradDirectionDot); - if(newValue <= minValue){ - minValue = newValue; - }else{ - valuesBiggerThanMax++; - } - - if(valuesBiggerThanMax > 10){ - break; - } - - } - System.out.println("step\torigObj\tobj\tsuffdec\tnorm\tcurvature1"); - for(int i = 0; i < stepS.size(); i++){ - double cnorm= norm.get(i); - System.out.println(stepS.get(i)+"\t"+originalObj +"\t"+obj.get(i) + "\t" + - (originalObj + originalDot*((Double)stepS.get(i))*c1) +"\t"+Math.abs(cnorm) +"\t"+c2*Math.abs(originalDot)); - } - } - - public static double[] getNumericalGradient(Objective o, double[] parameters, double epsilon){ - int nrParameters = o.getNumParameters(); - double[] gradient = new double[nrParameters]; - double[] newParameters; - double originalValue = o.getValue(); - for(int parameter = 0; parameter < nrParameters; parameter++){ - newParameters = parameters.clone(); - newParameters[parameter]+=epsilon; - o.setParameters(newParameters); - double newValue = o.getValue(); - gradient[parameter]=(newValue-originalValue)/epsilon; - } - return gradient; - } -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java deleted file mode 100644 index 9a53cef4..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java +++ /dev/null @@ -1,19 +0,0 @@ -package optimization.gradientBasedMethods; - -import optimization.linesearch.LineSearchMethod; - - - -public class GradientDescent extends AbstractGradientBaseMethod{ - - public GradientDescent(LineSearchMethod lineSearch) { - this.lineSearch = lineSearch; - } - - public double[] getDirection(){ - for(int i = 0; i< gradient.length; i++){ - direction[i] = -gradient[i]; - } - return direction; - } -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java deleted file mode 100644 index dedbc942..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java +++ /dev/null @@ -1,234 +0,0 @@ -package optimization.gradientBasedMethods; - - -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.linesearch.DifferentiableLineSearchObjective; -import optimization.linesearch.LineSearchMethod; -import optimization.stopCriteria.StopingCriteria; -import optimization.util.MathUtils; - -public class LBFGS extends AbstractGradientBaseMethod{ - - //How many previous values are being saved - int history; - double[][] skList; - double[][] ykList; - double initialHessianParameters; - double[] previousGradient; - double[] previousParameters; - - //auxiliar structures - double q[]; - double[] roi; - double[] alphai; - - public LBFGS(LineSearchMethod ls, int history) { - lineSearch = ls; - this.history = history; - skList = new double[history][]; - ykList = new double[history][]; - - } - - public void reset(){ - super.reset(); - initialHessianParameters = 0; - previousParameters = null; - previousGradient = null; - skList = new double[history][]; - ykList = new double[history][]; - q = null; - roi = null; - alphai = null; - } - - public double[] LBFGSTwoLoopRecursion(double hessianConst){ - //Only create array once - if(q == null){ - q = new double[gradient.length]; - } - System.arraycopy(gradient, 0, q, 0, gradient.length); - //Only create array once - if(roi == null){ - roi = new double[history]; - } - //Only create array once - if(alphai == null){ - alphai = new double[history]; - } - - for(int i = history-1; i >=0 && skList[i]!= null && ykList[i]!=null; i-- ){ - // System.out.println("New to Old proj " + currentProjectionIteration + " history "+history + " index " + i); - double[] si = skList[i]; - double[] yi = ykList[i]; - roi[i]= 1.0/MathUtils.dotProduct(yi,si); - alphai[i] = MathUtils.dotProduct(si, q)*roi[i]; - MathUtils.plusEquals(q, yi, -alphai[i]); - } - //Initial Hessian is just a constant - MathUtils.scalarMultiplication(q, hessianConst); - for(int i = 0; i <history && skList[i]!= null && ykList[i]!=null; i++ ){ - // System.out.println("Old to New proj " + currentProjectionIteration + " history "+history + " index " + i); - double beta = MathUtils.dotProduct(ykList[i], q)*roi[i]; - MathUtils.plusEquals(q, skList[i], (alphai[i]-beta)); - } - return q; - } - - - - - @Override - public double[] getDirection() { - - calculateInitialHessianParameter(); -// System.out.println("Initial hessian " + initialHessianParameters); - return direction = MathUtils.negation(LBFGSTwoLoopRecursion(initialHessianParameters)); - } - - public void calculateInitialHessianParameter(){ - if(currentProjectionIteration == 1){ - //Use gradient - initialHessianParameters = 1; - }else if(currentProjectionIteration <= history){ - double[] sk = skList[currentProjectionIteration-2]; - double[] yk = ykList[currentProjectionIteration-2]; - initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk); - }else{ - //get the last one - double[] sk = skList[history-1]; - double[] yk = ykList[history-1]; - initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk); - } - } - - //TODO if structures exit just reset them to zero - public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){ - super.initializeStructures(o, stats, stop); - previousParameters = new double[o.getNumParameters()]; - previousGradient = new double[o.getNumParameters()]; - } - public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){ - super.initializeStructures(o, stats, stop); - System.arraycopy(o.getParameters(), 0, previousParameters, 0, previousParameters.length); - System.arraycopy(gradient, 0, previousGradient, 0, gradient.length); - } - - public void updateStructuresAfterStep( Objective o,OptimizerStats stats, StopingCriteria stop){ - double[] diffX = MathUtils.arrayMinus(o.getParameters(), previousParameters); - double[] diffGrad = MathUtils.arrayMinus(gradient, previousGradient); - //Save new values and discard new ones - if(currentProjectionIteration > history){ - for(int i = 0; i < history-1;i++){ - skList[i]=skList[i+1]; - ykList[i]=ykList[i+1]; - } - skList[history-1]=diffX; - ykList[history-1]=diffGrad; - }else{ - skList[currentProjectionIteration-1]=diffX; - ykList[currentProjectionIteration-1]=diffGrad; - } - } - -// public boolean optimize(Objective o, OptimizerStats stats, StopingCriteria stop) { -// DifferentiableLineSearchObjective lso = new DifferentiableLineSearchObjective(o); -// gradient = o.getGradient(); -// direction = new double[o.getNumParameters()]; -// previousGradient = new double[o.getNumParameters()]; -// -// previousParameters = new double[o.getNumParameters()]; -// -// stats.collectInitStats(this, o); -// previousValue = Double.MAX_VALUE; -// currValue= o.getValue(); -// //Used for stopping criteria -// double[] originalGradient = o.getGradient(); -// -// originalGradientL2Norm = MathUtils.L2Norm(originalGradient); -// if(stop.stopOptimization(originalGradient)){ -// stats.collectFinalStats(this, o); -// return true; -// } -// for (currentProjectionIteration = 1; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){ -// -// -// currValue = o.getValue(); -// gradient = o.getGradient(); -// currParameters = o.getParameters(); -// -// -// if(currentProjectionIteration == 1){ -// //Use gradient -// initialHessianParameters = 1; -// }else if(currentProjectionIteration <= history){ -// double[] sk = skList[currentProjectionIteration-2]; -// double[] yk = ykList[currentProjectionIteration-2]; -// initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk); -// }else{ -// //get the last one -// double[] sk = skList[history-1]; -// double[] yk = ykList[history-1]; -// initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk); -// } -// -// getDirection(); -// -// //MatrixOutput.printDoubleArray(direction, "direction"); -// double dot = MathUtils.dotProduct(direction, gradient); -// if(dot > 0){ -// throw new RuntimeException("Not a descent direction"); -// } if (Double.isNaN(dot)){ -// throw new RuntimeException("dot is not a number!!"); -// } -// System.arraycopy(currParameters, 0, previousParameters, 0, currParameters.length); -// System.arraycopy(gradient, 0, previousGradient, 0, gradient.length); -// lso.reset(direction); -// step = lineSearch.getStepSize(lso); -// if(step==-1){ -// System.out.println("Failed to find a step size"); -//// lso.printLineSearchSteps(); -//// System.out.println(stats.prettyPrint(1)); -// stats.collectFinalStats(this, o); -// return false; -// } -// stats.collectIterationStats(this, o); -// -// //We are not updating the alpha since it is done in line search already -// currParameters = o.getParameters(); -// gradient = o.getGradient(); -// -// if(stop.stopOptimization(gradient)){ -// stats.collectFinalStats(this, o); -// return true; -// } -// double[] diffX = MathUtils.arrayMinus(currParameters, previousParameters); -// double[] diffGrad = MathUtils.arrayMinus(gradient, previousGradient); -// //Save new values and discard new ones -// if(currentProjectionIteration > history){ -// for(int i = 0; i < history-1;i++){ -// skList[i]=skList[i+1]; -// ykList[i]=ykList[i+1]; -// } -// skList[history-1]=diffX; -// ykList[history-1]=diffGrad; -// }else{ -// skList[currentProjectionIteration-1]=diffX; -// ykList[currentProjectionIteration-1]=diffGrad; -// } -// previousValue = currValue; -// } -// stats.collectFinalStats(this, o); -// return false; -// } - - - - - - - - - - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java deleted file mode 100644 index 6be01bf9..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java +++ /dev/null @@ -1,87 +0,0 @@ -package optimization.gradientBasedMethods; - - -/** - * Defines an optimization objective: - * - * - * @author javg - * - */ -public abstract class Objective { - - protected int functionCalls = 0; - protected int gradientCalls = 0; - protected int updateCalls = 0; - - protected double[] parameters; - - //Contains a cache with the gradient - public double[] gradient; - int debugLevel = 0; - - public void setDebugLevel(int level){ - debugLevel = level; - } - - public int getNumParameters() { - return parameters.length; - } - - public double getParameter(int index) { - return parameters[index]; - } - - public double[] getParameters() { - return parameters; - } - - public abstract double[] getGradient( ); - - public void setParameter(int index, double value) { - parameters[index]=value; - } - - public void setParameters(double[] params) { - if(parameters == null){ - parameters = new double[params.length]; - } - updateCalls++; - System.arraycopy(params, 0, parameters, 0, params.length); - } - - - public int getNumberFunctionCalls() { - return functionCalls; - } - - public int getNumberGradientCalls() { - return gradientCalls; - } - - public int getNumberUpdateCalls() { - return updateCalls; - } - - public String finalInfoString() { - return "FE: " + functionCalls + " GE " + gradientCalls + " Params updates" + - updateCalls; - } - public void printParameters() { - System.out.println(toString()); - } - - public abstract String toString(); - public abstract double getValue (); - - /** - * Sets the initial objective parameters - * For unconstrained models this just sets the objective params = argument no copying - * For a constrained objective project the parameters and then set - * @param params - */ - public void setInitialParameters(double[] params){ - parameters = params; - } - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java deleted file mode 100644 index 96fce5b0..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java +++ /dev/null @@ -1,19 +0,0 @@ -package optimization.gradientBasedMethods; - -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.stopCriteria.StopingCriteria; - -public interface Optimizer { - public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stoping); - - - public double[] getDirection(); - public double getCurrentStep(); - public double getCurrentValue(); - public int getCurrentIteration(); - public void reset(); - - public void setMaxIterations(int max); - - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java deleted file mode 100644 index afb29d04..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java +++ /dev/null @@ -1,11 +0,0 @@ -package optimization.gradientBasedMethods; - - -/** - * - * @author javg - * - */ -public abstract class ProjectedAbstractGradientBaseMethod extends AbstractGradientBaseMethod implements ProjectedOptimizer{ - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java deleted file mode 100644 index 0186e945..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java +++ /dev/null @@ -1,154 +0,0 @@ -package optimization.gradientBasedMethods; - -import java.io.IOException; - -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.linesearch.DifferentiableLineSearchObjective; -import optimization.linesearch.LineSearchMethod; -import optimization.linesearch.ProjectedDifferentiableLineSearchObjective; -import optimization.stopCriteria.StopingCriteria; -import optimization.util.MathUtils; - - -/** - * This class implements the projected gradiend - * as described in Bertsekas "Non Linear Programming" - * section 2.3. - * - * The update is given by: - * x_k+1 = x_k + alpha^k(xbar_k-x_k) - * Where xbar is: - * xbar = [x_k -s_k grad(f(x_k))]+ - * where []+ is the projection into the feasibility set - * - * alpha is the step size - * s_k - is a positive scalar which can be view as a step size as well, by - * setting alpha to 1, then x_k+1 = [x_k -s_k grad(f(x_k))]+ - * This is called taking a step size along the projection arc (Bertsekas) which - * we will use by default. - * - * Note that the only place where we actually take a step size is on pick a step size - * so this is going to be just like a normal gradient descent but use a different - * armijo line search where we project after taking a step. - * - * - * @author javg - * - */ -public class ProjectedGradientDescent extends ProjectedAbstractGradientBaseMethod{ - - - - - public ProjectedGradientDescent(LineSearchMethod lineSearch) { - this.lineSearch = lineSearch; - } - - //Use projected differential objective instead - public void initializeStructures(Objective o, OptimizerStats stats, StopingCriteria stop) { - lso = new ProjectedDifferentiableLineSearchObjective(o); - }; - - - ProjectedObjective obj; - public boolean optimize(ProjectedObjective o,OptimizerStats stats, StopingCriteria stop){ - obj = o; - return super.optimize(o, stats, stop); - } - - public double[] getDirection(){ - for(int i = 0; i< gradient.length; i++){ - direction[i] = -gradient[i]; - } - return direction; - } - - - - -} - - - - - - - -///OLD CODE - -//Use projected gradient norm -//public boolean stopCriteria(double[] gradient){ -// if(originalDirenctionL2Norm == 0){ -// System.out.println("Leaving original direction norm is zero"); -// return true; -// } -// if(MathUtils.L2Norm(direction)/originalDirenctionL2Norm < gradientConvergenceValue){ -// System.out.println("Leaving projected gradient Norm smaller than epsilon"); -// return true; -// } -// if((previousValue - currValue)/Math.abs(previousValue) < valueConvergenceValue) { -// System.out.println("Leaving value change below treshold " + previousValue + " - " + currValue); -// System.out.println(previousValue/currValue + " - " + currValue/currValue -// + " = " + (previousValue - currValue)/Math.abs(previousValue)); -// return true; -// } -// return false; -//} -// - -//public boolean optimize(ProjectedObjective o,OptimizerStats stats, StopingCriteria stop){ -// stats.collectInitStats(this, o); -// obj = o; -// step = 0; -// currValue = o.getValue(); -// previousValue = Double.MAX_VALUE; -// gradient = o.getGradient(); -// originalGradientL2Norm = MathUtils.L2Norm(gradient); -// parameterChange = new double[gradient.length]; -// getDirection(); -// ProjectedDifferentiableLineSearchObjective lso = new ProjectedDifferentiableLineSearchObjective(o,direction); -// -// originalDirenctionL2Norm = MathUtils.L2Norm(direction); -// //MatrixOutput.printDoubleArray(currParameters, "parameters"); -// for (currentProjectionIteration = 0; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){ -// // System.out.println("Iter " + currentProjectionIteration); -// //o.printParameters(); -// -// -// -// if(stop.stopOptimization(gradient)){ -// stats.collectFinalStats(this, o); -// lastStepUsed = step; -// return true; -// } -// lso.reset(direction); -// step = lineSearch.getStepSize(lso); -// if(step==-1){ -// System.out.println("Failed to find step"); -// stats.collectFinalStats(this, o); -// return false; -// -// } -// -// //Update the direction for stopping criteria -// previousValue = currValue; -// currValue = o.getValue(); -// gradient = o.getGradient(); -// direction = getDirection(); -// if(MathUtils.dotProduct(gradient, direction) > 0){ -// System.out.println("Not a descent direction"); -// System.out.println(" current stats " + stats.prettyPrint(1)); -// System.exit(-1); -// } -// stats.collectIterationStats(this, o); -// } -// lastStepUsed = step; -// stats.collectFinalStats(this, o); -// return false; -// } - -//public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){ -// System.out.println("Objective is not a projected objective"); -// throw new RuntimeException(); -//} - diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java deleted file mode 100644 index c3d21393..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java +++ /dev/null @@ -1,29 +0,0 @@ -package optimization.gradientBasedMethods; - -import optimization.util.MathUtils; - - -/** - * Computes a projected objective - * When we tell it to set some parameters it automatically projects the parameters back into the simplex: - * - * - * When we tell it to get the gradient in automatically returns the projected gradient: - * @author javg - * - */ -public abstract class ProjectedObjective extends Objective{ - - public abstract double[] projectPoint (double[] point); - - public double[] auxParameters; - - - public void setInitialParameters(double[] params){ - setParameters(projectPoint(params)); - } - - - - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java deleted file mode 100644 index 81d8403e..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java +++ /dev/null @@ -1,10 +0,0 @@ -package optimization.gradientBasedMethods; - - - -public interface ProjectedOptimizer extends Optimizer{ - - - - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java deleted file mode 100644 index 6340ef73..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java +++ /dev/null @@ -1,86 +0,0 @@ -package optimization.gradientBasedMethods.stats; - -import java.util.ArrayList; - -import optimization.gradientBasedMethods.Objective; -import optimization.gradientBasedMethods.Optimizer; -import optimization.util.MathUtils; -import optimization.util.StaticTools; - - -public class OptimizerStats { - - double start = 0; - double totalTime = 0; - - String objectiveFinalStats; - - ArrayList<Double> gradientNorms = new ArrayList<Double>(); - ArrayList<Double> steps = new ArrayList<Double>(); - ArrayList<Double> value = new ArrayList<Double>(); - ArrayList<Integer> iterations = new ArrayList<Integer>(); - double prevValue =0; - - public void reset(){ - start = 0; - totalTime = 0; - - objectiveFinalStats=""; - - gradientNorms.clear(); - steps.clear(); - value.clear(); - iterations.clear(); - prevValue =0; - } - - public void startTime() { - start = System.currentTimeMillis(); - } - public void stopTime() { - totalTime += System.currentTimeMillis() - start; - } - - public String prettyPrint(int level){ - StringBuffer res = new StringBuffer(); - res.append("Total time " + totalTime/1000 + " seconds \n" + "Iterations " + iterations.size() + "\n"); - res.append(objectiveFinalStats+"\n"); - if(level > 0){ - if(iterations.size() > 0){ - res.append("\tIteration"+iterations.get(0)+"\tstep: "+StaticTools.prettyPrint(steps.get(0), "0.00E00", 6)+ "\tgradientNorm "+ - StaticTools.prettyPrint(gradientNorms.get(0), "0.00000E00", 10)+ "\tvalue "+ StaticTools.prettyPrint(value.get(0), "0.000000E00",11)+"\n"); - } - for(int i = 1; i < iterations.size(); i++){ - res.append("\tIteration:\t"+iterations.get(i)+"\tstep:"+StaticTools.prettyPrint(steps.get(i), "0.00E00", 6)+ "\tgradientNorm "+ - StaticTools.prettyPrint(gradientNorms.get(i), "0.00000E00", 10)+ - "\tvalue:\t"+ StaticTools.prettyPrint(value.get(i), "0.000000E00",11)+ - "\tvalueDiff:\t"+ StaticTools.prettyPrint((value.get(i-1)-value.get(i)), "0.000000E00",11)+ - "\n"); - } - } - return res.toString(); - } - - - public void collectInitStats(Optimizer optimizer, Objective objective){ - startTime(); - iterations.add(-1); - gradientNorms.add(MathUtils.L2Norm(objective.getGradient())); - steps.add(0.0); - value.add(objective.getValue()); - } - - public void collectIterationStats(Optimizer optimizer, Objective objective){ - iterations.add(optimizer.getCurrentIteration()); - gradientNorms.add(MathUtils.L2Norm(objective.getGradient())); - steps.add(optimizer.getCurrentStep()); - value.add(optimizer.getCurrentValue()); - } - - - public void collectFinalStats(Optimizer optimizer, Objective objective){ - stopTime(); - objectiveFinalStats = objective.finalInfoString(); - } - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java deleted file mode 100644 index d65a1267..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java +++ /dev/null @@ -1,70 +0,0 @@ -package optimization.gradientBasedMethods.stats; - -import java.util.ArrayList; - -import optimization.gradientBasedMethods.Objective; -import optimization.gradientBasedMethods.Optimizer; -import optimization.gradientBasedMethods.ProjectedObjective; -import optimization.gradientBasedMethods.ProjectedOptimizer; -import optimization.util.MathUtils; -import optimization.util.StaticTools; - - -public class ProjectedOptimizerStats extends OptimizerStats{ - - - - public void reset(){ - super.reset(); - projectedGradientNorms.clear(); - } - - ArrayList<Double> projectedGradientNorms = new ArrayList<Double>(); - - public String prettyPrint(int level){ - StringBuffer res = new StringBuffer(); - res.append("Total time " + totalTime/1000 + " seconds \n" + "Iterations " + iterations.size() + "\n"); - res.append(objectiveFinalStats+"\n"); - if(level > 0){ - if(iterations.size() > 0){ - res.append("\tIteration"+iterations.get(0)+"\tstep: "+ - StaticTools.prettyPrint(steps.get(0), "0.00E00", 6)+ "\tgradientNorm "+ - StaticTools.prettyPrint(gradientNorms.get(0), "0.00000E00", 10) - + "\tdirection"+ - StaticTools.prettyPrint(projectedGradientNorms.get(0), "0.00000E00", 10)+ - "\tvalue "+ StaticTools.prettyPrint(value.get(0), "0.000000E00",11)+"\n"); - } - for(int i = 1; i < iterations.size(); i++){ - res.append("\tIteration"+iterations.get(i)+"\tstep: "+StaticTools.prettyPrint(steps.get(i), "0.00E00", 6)+ "\tgradientNorm "+ - StaticTools.prettyPrint(gradientNorms.get(i), "0.00000E00", 10)+ - "\t direction "+ - StaticTools.prettyPrint(projectedGradientNorms.get(i), "0.00000E00", 10)+ - "\tvalue "+ StaticTools.prettyPrint(value.get(i), "0.000000E00",11)+ - "\tvalueDiff "+ StaticTools.prettyPrint((value.get(i-1)-value.get(i)), "0.000000E00",11)+ - "\n"); - } - } - return res.toString(); - } - - - public void collectInitStats(Optimizer optimizer, Objective objective){ - startTime(); - } - - public void collectIterationStats(Optimizer optimizer, Objective objective){ - iterations.add(optimizer.getCurrentIteration()); - gradientNorms.add(MathUtils.L2Norm(objective.getGradient())); - projectedGradientNorms.add(MathUtils.L2Norm(optimizer.getDirection())); - steps.add(optimizer.getCurrentStep()); - value.add(optimizer.getCurrentValue()); - } - - - - public void collectFinalStats(Optimizer optimizer, Objective objective){ - stopTime(); - objectiveFinalStats = objective.finalInfoString(); - } - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java deleted file mode 100644 index c9f9b8df..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java +++ /dev/null @@ -1,102 +0,0 @@ -package optimization.linesearch; - -import optimization.util.Interpolation; - - -/** - * Implements Back Tracking Line Search as described on page 37 of Numerical Optimization. - * Also known as armijo rule - * @author javg - * - */ -public class ArmijoLineSearchMinimization implements LineSearchMethod{ - - /** - * How much should the step size decrease at each iteration. - */ - double contractionFactor = 0.5; - double c1 = 0.0001; - - double sigma1 = 0.1; - double sigma2 = 0.9; - - - - double initialStep; - int maxIterations = 10; - - - public ArmijoLineSearchMinimization(){ - this.initialStep = 1; - } - - //Experiment - double previousStepPicked = -1;; - double previousInitGradientDot = -1; - double currentInitGradientDot = -1; - - - public void reset(){ - previousStepPicked = -1;; - previousInitGradientDot = -1; - currentInitGradientDot = -1; - } - - public void setInitialStep(double initial){ - initialStep = initial; - } - - /** - * - */ - - public double getStepSize(DifferentiableLineSearchObjective o) { - currentInitGradientDot = o.getInitialGradient(); - //Should update all in the objective - o.updateAlpha(initialStep); - int nrIterations = 0; - //System.out.println("tried alpha" + initialStep + " value " + o.getCurrentValue()); - while(!WolfeConditions.suficientDecrease(o,c1)){ - if(nrIterations >= maxIterations){ - o.printLineSearchSteps(); - return -1; - } - double alpha=o.getAlpha(); - double alphaTemp = - Interpolation.quadraticInterpolation(o.getOriginalValue(), o.getInitialGradient(), alpha, o.getCurrentValue()); - if(alphaTemp >= sigma1 || alphaTemp <= sigma2*o.getAlpha()){ -// System.out.println("using alpha temp " + alphaTemp); - alpha = alphaTemp; - }else{ -// System.out.println("Discarding alpha temp " + alphaTemp); - alpha = alpha*contractionFactor; - } -// double alpha =o.getAlpha()*contractionFactor; - - o.updateAlpha(alpha); - //System.out.println("tried alpha" + alpha+ " value " + o.getCurrentValue()); - nrIterations++; - } - - //System.out.println("Leavning line search used:"); - //o.printLineSearchSteps(); - - previousInitGradientDot = currentInitGradientDot; - previousStepPicked = o.getAlpha(); - return o.getAlpha(); - } - - public double getInitialGradient() { - return currentInitGradientDot; - - } - - public double getPreviousInitialGradient() { - return previousInitGradientDot; - } - - public double getPreviousStepUsed() { - return previousStepPicked; - } - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java deleted file mode 100644 index e153f2da..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java +++ /dev/null @@ -1,141 +0,0 @@ -package optimization.linesearch; - -import optimization.gradientBasedMethods.ProjectedObjective; -import optimization.util.Interpolation; -import optimization.util.MathUtils; - - - - - -/** - * Implements Armijo Rule Line search along the projection arc (Non-Linear Programming page 230) - * To be used with Projected gradient Methods. - * - * Recall that armijo tries successive step sizes alpha until the sufficient decrease is satisfied: - * f(x+alpha*direction) < f(x) + alpha*c1*grad(f)*direction - * - * In this case we are optimizing over a convex set X so we must guarantee that the new point stays inside the - * constraints. - * First the direction as to be feasible (inside constraints) and will be define as: - * d = (x_k_f - x_k) where x_k_f is a feasible point. - * so the armijo condition can be rewritten as: - * f(x+alpha(x_k_f - x_k)) < f(x) + c1*grad(f)*(x_k_f - x_k) - * and x_k_f is defined as: - * [x_k-alpha*grad(f)]+ - * where []+ mean a projection to the feasibility set. - * So this means that we take a step on the negative gradient (gradient descent) and then obtain then project - * that point to the feasibility set. - * Note that if the point is already feasible then we are back to the normal armijo rule. - * - * @author javg - * - */ -public class ArmijoLineSearchMinimizationAlongProjectionArc implements LineSearchMethod{ - - /** - * How much should the step size decrease at each iteration. - */ - double contractionFactor = 0.5; - double c1 = 0.0001; - - - double initialStep; - int maxIterations = 100; - - - double sigma1 = 0.1; - double sigma2 = 0.9; - - //Experiment - double previousStepPicked = -1;; - double previousInitGradientDot = -1; - double currentInitGradientDot = -1; - - GenericPickFirstStep strategy; - - - public void reset(){ - previousStepPicked = -1;; - previousInitGradientDot = -1; - currentInitGradientDot = -1; - } - - - public ArmijoLineSearchMinimizationAlongProjectionArc(){ - this.initialStep = 1; - } - - public ArmijoLineSearchMinimizationAlongProjectionArc(GenericPickFirstStep strategy){ - this.strategy = strategy; - this.initialStep = strategy.getFirstStep(this); - } - - - public void setInitialStep(double initial){ - this.initialStep = initial; - } - - /** - * - */ - - public double getStepSize(DifferentiableLineSearchObjective o) { - - - //Should update all in the objective - initialStep = strategy.getFirstStep(this); - o.updateAlpha(initialStep); - previousInitGradientDot=currentInitGradientDot; - currentInitGradientDot=o.getCurrentGradient(); - int nrIterations = 0; - - //Armijo rule, the current value has to be smaller than the original value plus a small step of the gradient - while(o.getCurrentValue() > - o.getOriginalValue() + c1*(o.getCurrentGradient())){ -// System.out.println("curr value "+o.getCurrentValue()); -// System.out.println("original value "+o.getOriginalValue()); -// System.out.println("GRADIENT decrease" +(MathUtils.dotProduct(o.o.gradient, -// MathUtils.arrayMinus(o.originalParameters,((ProjectedObjective)o.o).auxParameters)))); -// System.out.println("GRADIENT SAVED" + o.getCurrentGradient()); - if(nrIterations >= maxIterations){ - System.out.println("Could not find a step leaving line search with -1"); - o.printLineSearchSteps(); - return -1; - } - double alpha=o.getAlpha(); - double alphaTemp = - Interpolation.quadraticInterpolation(o.getOriginalValue(), o.getInitialGradient(), alpha, o.getCurrentValue()); - if(alphaTemp >= sigma1 || alphaTemp <= sigma2*o.getAlpha()){ - alpha = alphaTemp; - }else{ - alpha = alpha*contractionFactor; - } -// double alpha =obj.getAlpha()*contractionFactor; - o.updateAlpha(alpha); - nrIterations++; - } -// System.out.println("curr value "+o.getCurrentValue()); -// System.out.println("original value "+o.getOriginalValue()); -// System.out.println("sufficient decrease" +c1*o.getCurrentGradient()); -// System.out.println("Leavning line search used:"); -// o.printSmallLineSearchSteps(); - - previousStepPicked = o.getAlpha(); - return o.getAlpha(); - } - - public double getInitialGradient() { - return currentInitGradientDot; - - } - - public double getPreviousInitialGradient() { - return previousInitGradientDot; - } - - public double getPreviousStepUsed() { - return previousStepPicked; - } - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java deleted file mode 100644 index a5bc958e..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java +++ /dev/null @@ -1,185 +0,0 @@ -package optimization.linesearch; - -import gnu.trove.TDoubleArrayList; -import gnu.trove.TIntArrayList; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; - -import optimization.gradientBasedMethods.Objective; -import optimization.util.MathUtils; -import optimization.util.StaticTools; - - - -import util.MathUtil; -import util.Printing; - - -/** - * A wrapper class for the actual objective in order to perform - * line search. The optimization code assumes that this does a lot - * of caching in order to simplify legibility. For the applications - * we use it for, caching the entire history of evaluations should be - * a win. - * - * Note: the lastEvaluatedAt value is very important, since we will use - * it to avoid doing an evaluation of the gradient after the line search. - * - * The differentiable line search objective defines a search along the ray - * given by a direction of the main objective. - * It defines the following function, - * where f is the original objective function: - * g(alpha) = f(x_0 + alpha*direction) - * g'(alpha) = f'(x_0 + alpha*direction)*direction - * - * @author joao - * - */ -public class DifferentiableLineSearchObjective { - - - - Objective o; - int nrIterations; - TDoubleArrayList steps; - TDoubleArrayList values; - TDoubleArrayList gradients; - - //This variables cannot change - public double[] originalParameters; - public double[] searchDirection; - - - /** - * Defines a line search objective: - * Receives: - * Objective to each we are performing the line search, is used to calculate values and gradients - * Direction where to do the ray search, note that the direction does not depend of the - * objective but depends from the method. - * @param o - * @param direction - */ - public DifferentiableLineSearchObjective(Objective o) { - this.o = o; - originalParameters = new double[o.getNumParameters()]; - searchDirection = new double[o.getNumParameters()]; - steps = new TDoubleArrayList(); - values = new TDoubleArrayList(); - gradients = new TDoubleArrayList(); - } - /** - * Called whenever we start a new iteration. - * Receives the ray where we are searching for and resets all values - * - */ - public void reset(double[] direction){ - //Copy initial values - System.arraycopy(o.getParameters(), 0, originalParameters, 0, o.getNumParameters()); - System.arraycopy(direction, 0, searchDirection, 0, o.getNumParameters()); - - //Initialize variables - nrIterations = 0; - steps.clear(); - values.clear(); - gradients.clear(); - - values.add(o.getValue()); - gradients.add(MathUtils.dotProduct(o.getGradient(),direction)); - steps.add(0); - } - - - /** - * update the current value of alpha. - * Takes a step with that alpha in direction - * Get the real objective value and gradient and calculate all required information. - */ - public void updateAlpha(double alpha){ - if(alpha < 0){ - System.out.println("alpha may not be smaller that zero"); - throw new RuntimeException(); - } - nrIterations++; - steps.add(alpha); - //x_t+1 = x_t + alpha*direction - System.arraycopy(originalParameters,0, o.getParameters(), 0, originalParameters.length); - MathUtils.plusEquals(o.getParameters(), searchDirection, alpha); - o.setParameters(o.getParameters()); -// System.out.println("Took a step of " + alpha + " new value " + o.getValue()); - values.add(o.getValue()); - gradients.add(MathUtils.dotProduct(o.getGradient(),searchDirection)); - } - - - - public int getNrIterations(){ - return nrIterations; - } - - /** - * return g(alpha) for the current value of alpha - * @param iter - * @return - */ - public double getValue(int iter){ - return values.get(iter); - } - - public double getCurrentValue(){ - return values.get(nrIterations); - } - - public double getOriginalValue(){ - return values.get(0); - } - - /** - * return g'(alpha) for the current value of alpha - * @param iter - * @return - */ - public double getGradient(int iter){ - return gradients.get(iter); - } - - public double getCurrentGradient(){ - return gradients.get(nrIterations); - } - - public double getInitialGradient(){ - return gradients.get(0); - } - - - - - public double getAlpha(){ - return steps.get(nrIterations); - } - - public void printLineSearchSteps(){ - System.out.println( - " Steps size "+steps.size() + - "Values size "+values.size() + - "Gradeients size "+gradients.size()); - for(int i =0; i < steps.size();i++){ - System.out.println("Iter " + i + " step " + steps.get(i) + - " value " + values.get(i) + " grad " + gradients.get(i)); - } - } - - public void printSmallLineSearchSteps(){ - for(int i =0; i < steps.size();i++){ - System.out.print(StaticTools.prettyPrint(steps.get(i), "0.0000E00",8) + " "); - } - System.out.println(); - } - - public static void main(String[] args) { - - } - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java deleted file mode 100644 index a33eb311..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java +++ /dev/null @@ -1,20 +0,0 @@ -package optimization.linesearch; - - -public class GenericPickFirstStep{ - double _initValue; - public GenericPickFirstStep(double initValue) { - _initValue = initValue; - } - - public double getFirstStep(LineSearchMethod ls){ - return _initValue; - } - public void collectInitValues(LineSearchMethod ls){ - - } - - public void collectFinalValues(LineSearchMethod ls){ - - } -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java deleted file mode 100644 index 0deebcdb..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java +++ /dev/null @@ -1,25 +0,0 @@ -package optimization.linesearch; - - -public class InterpolationPickFirstStep extends GenericPickFirstStep{ - public InterpolationPickFirstStep(double initValue) { - super(initValue); - } - - public double getFirstStep(LineSearchMethod ls){ - if(ls.getPreviousStepUsed() != -1 && ls.getPreviousInitialGradient()!=0){ - double newStep = Math.min(300, 1.02*ls.getPreviousInitialGradient()*ls.getPreviousStepUsed()/ls.getInitialGradient()); - // System.out.println("proposing " + newStep); - return newStep; - - } - return _initValue; - } - public void collectInitValues(WolfRuleLineSearch ls){ - - } - - public void collectFinalValues(WolfRuleLineSearch ls){ - - } -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java deleted file mode 100644 index 80cd7f39..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java +++ /dev/null @@ -1,14 +0,0 @@ -package optimization.linesearch; - - -public interface LineSearchMethod { - - double getStepSize(DifferentiableLineSearchObjective o); - - public double getInitialGradient(); - public double getPreviousInitialGradient(); - public double getPreviousStepUsed(); - - public void setInitialStep(double initial); - public void reset(); -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java deleted file mode 100644 index 4b354fd9..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java +++ /dev/null @@ -1,33 +0,0 @@ -package optimization.linesearch; - -/** - * Non newtwon since we don't always try 1... - * Not sure if that is even usefull for newton - * @author javg - * - */ -public class NonNewtonInterpolationPickFirstStep extends GenericPickFirstStep{ - public NonNewtonInterpolationPickFirstStep(double initValue) { - super(initValue); - } - - public double getFirstStep(LineSearchMethod ls){ -// System.out.println("Previous step used " + ls.getPreviousStepUsed()); -// System.out.println("PreviousGradinebt " + ls.getPreviousInitialGradient()); -// System.out.println("CurrentGradinebt " + ls.getInitialGradient()); - if(ls.getPreviousStepUsed() != -1 && ls.getPreviousInitialGradient()!=0){ - double newStep = 1.01*ls.getPreviousInitialGradient()*ls.getPreviousStepUsed()/ls.getInitialGradient(); - //System.out.println("Suggesting " + newStep); - return newStep; - - } - return _initValue; - } - public void collectInitValues(WolfRuleLineSearch ls){ - - } - - public void collectFinalValues(WolfRuleLineSearch ls){ - - } -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java deleted file mode 100644 index 29ccbc32..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java +++ /dev/null @@ -1,137 +0,0 @@ -package optimization.linesearch; - -import optimization.gradientBasedMethods.Objective; -import optimization.gradientBasedMethods.ProjectedObjective; -import optimization.util.MathUtils; -import optimization.util.MatrixOutput; - - -/** - * See ArmijoLineSearchMinimizationAlongProjectionArc for description - * @author javg - * - */ -public class ProjectedDifferentiableLineSearchObjective extends DifferentiableLineSearchObjective{ - - - - ProjectedObjective obj; - public ProjectedDifferentiableLineSearchObjective(Objective o) { - super(o); - if(!(o instanceof ProjectedObjective)){ - System.out.println("Must receive a projected objective"); - throw new RuntimeException(); - } - obj = (ProjectedObjective) o; - } - - - - public double[] projectPoint (double[] point){ - return ((ProjectedObjective)o).projectPoint(point); - } - public void updateAlpha(double alpha){ - if(alpha < 0){ - System.out.println("alpha may not be smaller that zero"); - throw new RuntimeException(); - } - - if(obj.auxParameters == null){ - obj.auxParameters = new double[obj.getParameters().length]; - } - - nrIterations++; - - steps.add(alpha); - System.arraycopy(originalParameters, 0, obj.auxParameters, 0, obj.auxParameters.length); - - //Take a step into the search direction - -// MatrixOutput.printDoubleArray(obj.getGradient(), "gradient"); - -// alpha=gradients.get(0)*alpha/(gradients.get(gradients.size()-1)); - - //x_t+1 = x_t - alpha*gradient = x_t + alpha*direction - MathUtils.plusEquals(obj.auxParameters, searchDirection, alpha); -// MatrixOutput.printDoubleArray(obj.auxParameters, "before projection"); - obj.auxParameters = projectPoint(obj.auxParameters); -// MatrixOutput.printDoubleArray(obj.auxParameters, "after projection"); - o.setParameters(obj.auxParameters); -// System.out.println("new parameters"); -// o.printParameters(); - values.add(o.getValue()); - //Computes the new gradient x_k-[x_k-alpha*Gradient(x_k)]+ - MathUtils.minusEqualsInverse(originalParameters,obj.auxParameters,1); -// MatrixOutput.printDoubleArray(obj.auxParameters, "new gradient"); - //Dot product between the new direction and the new gradient - double gradient = MathUtils.dotProduct(obj.auxParameters,searchDirection); - gradients.add(gradient); - if(gradient > 0){ - System.out.println("Gradient on line search has to be smaller than zero"); - System.out.println("Iter: " + nrIterations); - MatrixOutput.printDoubleArray(obj.auxParameters, "new direction"); - MatrixOutput.printDoubleArray(searchDirection, "search direction"); - throw new RuntimeException(); - - } - - } - - /** - * - */ -// public void updateAlpha(double alpha){ -// -// if(alpha < 0){ -// System.out.println("alpha may not be smaller that zero"); -// throw new RuntimeException(); -// } -// -// nrIterations++; -// steps.add(alpha); -// //x_t+1 = x_t - alpha*direction -// System.arraycopy(originalParameters, 0, parametersChange, 0, parametersChange.length); -//// MatrixOutput.printDoubleArray(parametersChange, "parameters before step"); -//// System.out.println("Step" + alpha); -// MatrixOutput.printDoubleArray(originalGradient, "gradient + " + alpha); -// -// MathUtils.minusEquals(parametersChange, originalGradient, alpha); -// -// //Project the points into the feasibility set -//// MatrixOutput.printDoubleArray(parametersChange, "before projection"); -// //x_k(alpha) = [x_k - alpha*grad f(x_k)]+ -// parametersChange = projectPoint(parametersChange); -//// MatrixOutput.printDoubleArray(parametersChange, "after projection"); -// o.setParameters(parametersChange); -// values.add(o.getValue()); -// //Computes the new direction x_k-[x_k-alpha*Gradient(x_k)]+ -// -// direction=MathUtils.arrayMinus(parametersChange,originalParameters); -//// MatrixOutput.printDoubleArray(direction, "new direction"); -// -// double gradient = MathUtils.dotProduct(originalGradient,direction); -// gradients.add(gradient); -// if(gradient > 1E-10){ -// System.out.println("cosine " + gradient/(MathUtils.L2Norm(originalGradient)*MathUtils.L2Norm(direction))); -// -// -// System.out.println("not a descent direction for alpha " + alpha); -// System.arraycopy(originalParameters, 0, parametersChange, 0, parametersChange.length); -// MathUtils.minusEquals(parametersChange, originalGradient, 1E-20); -// -// parametersChange = projectPoint(parametersChange); -// direction=MathUtils.arrayMinus(parametersChange,originalParameters); -// gradient = MathUtils.dotProduct(originalGradient,direction); -// if(gradient > 0){ -// System.out.println("Direction is really non-descent evern for small alphas:" + gradient); -// } -// System.out.println("ProjecteLineSearchObjective: Should be a descent direction at " + nrIterations + ": "+ gradient); -//// System.out.println(Printing.doubleArrayToString(originalGradient, null,"Original gradient")); -//// System.out.println(Printing.doubleArrayToString(originalParameters, null,"Original parameters")); -//// System.out.println(Printing.doubleArrayToString(parametersChange, null,"Projected parameters")); -//// System.out.println(Printing.doubleArrayToString(direction, null,"Direction")); -// throw new RuntimeException(); -// } -// } - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java deleted file mode 100644 index 5489f2d0..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java +++ /dev/null @@ -1,300 +0,0 @@ -package optimization.linesearch; - -import java.io.PrintStream; -import java.util.ArrayList; - -import optimization.util.Interpolation; - - - - -/** - * - * @author javg - * - */ -public class WolfRuleLineSearch implements LineSearchMethod{ - - GenericPickFirstStep pickFirstStep; - - double c1 = 1.0E-4; - double c2 = 0.9; - - //Application dependent - double maxStep=100; - - int extrapolationIteration; - int maxExtrapolationIteration = 1000; - - - double minZoomDiffTresh = 10E-10; - - - ArrayList<Double> steps; - ArrayList<Double> gradientDots; - ArrayList<Double> functionVals; - - int debugLevel = 0; - boolean foudStep = false; - - public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep){ - this.pickFirstStep = pickFirstStep; - - } - - - - - public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep, double c1, double c2){ - this.pickFirstStep = pickFirstStep; - initialStep = pickFirstStep.getFirstStep(this); - this.c1 = c1; - this.c2 = c2; - } - - public void setDebugLevel(int level){ - debugLevel = level; - } - - //Experiment - double previousStepPicked = -1;; - double previousInitGradientDot = -1; - double currentInitGradientDot = -1; - - double initialStep; - - - public void reset(){ - previousStepPicked = -1;; - previousInitGradientDot = -1; - currentInitGradientDot = -1; - if(steps != null) - steps.clear(); - if(gradientDots != null) - gradientDots.clear(); - if(functionVals != null) - functionVals.clear(); - } - - public void setInitialStep(double initial){ - initialStep = pickFirstStep.getFirstStep(this); - } - - - - /** - * Implements Wolf Line search as described in nocetal. - * This process consists in two stages. The first stage we try to satisfy the - * biggest step size that still satisfies the curvature condition. We keep increasing - * the initial step size until we find a step satisfying the curvature condition, we return - * success, we failed the sufficient increase so we cannot increase more and we can call zoom with - * that maximum step, or we pass the minimum in which case we can call zoom the same way. - * - */ - public double getStepSize(DifferentiableLineSearchObjective objective){ - //System.out.println("entering line search"); - - foudStep = false; - if(debugLevel >= 1){ - steps = new ArrayList<Double>(); - gradientDots = new ArrayList<Double>(); - functionVals =new ArrayList<Double>(); - } - - //test - currentInitGradientDot = objective.getInitialGradient(); - - - double previousValue = objective.getCurrentValue(); - double previousStep = 0; - double currentStep =pickFirstStep.getFirstStep(this); - for(extrapolationIteration = 0; - extrapolationIteration < maxExtrapolationIteration; extrapolationIteration++){ - - objective.updateAlpha(currentStep); - double currentValue = objective.getCurrentValue(); - if(debugLevel >= 1){ - steps.add(currentStep); - functionVals.add(currentValue); - gradientDots.add(objective.getCurrentGradient()); - } - - - //The current step does not satisfy the sufficient decrease condition anymore - // so we cannot get bigger than that calling zoom. - if(!WolfeConditions.suficientDecrease(objective,c1)|| - (extrapolationIteration > 0 && currentValue >= previousValue)){ - currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations); - break; - } - - //Satisfying both conditions ready to leave - if(WolfeConditions.sufficientCurvature(objective,c1,c2)){ - //Found step - foudStep = true; - break; - } - - /** - * This means that we passed the minimum already since the dot product that should be - * negative (descent direction) is now positive. So we cannot increase more. On the other hand - * since we know the direction is a descent direction the value the objective at the current step - * is for sure smaller than the preivous step so we change the order. - */ - if(objective.getCurrentGradient() >= 0){ - currentStep = zoom(objective,currentStep,previousStep,objective.nrIterations,objective.nrIterations-1); - break; - } - - - //Ok, so we can still get a bigger step, - double aux = currentStep; - //currentStep = currentStep*2; - if(Math.abs(currentStep-maxStep)>1.1e-2){ - currentStep = (currentStep+maxStep)/2; - }else{ - currentStep = currentStep*2; - } - previousStep = aux; - previousValue = currentValue; - //Could be done better - if(currentStep >= maxStep){ - System.out.println("Excedded max step...calling zoom with maxStepSize"); - currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations); - } - } - if(!foudStep){ - System.out.println("Wolfe Rule exceed number of iterations"); - if(debugLevel >= 1){ - printSmallWolfeStats(System.out); -// System.out.println("Line search values"); -// DebugHelpers.getLineSearchGraph(o, direction, originalParameters,origValue, origGradDirectionDot,c1,c2); - } - return -1; - } - if(debugLevel >= 1){ - printSmallWolfeStats(System.out); - } - - previousStepPicked = currentStep; - previousInitGradientDot = currentInitGradientDot; -// objective.printLineSearchSteps(); - return currentStep; - } - - - - - - public void printWolfeStats(PrintStream out){ - for(int i = 0; i < steps.size(); i++){ - out.println("Step " + steps.get(i) + " value " + functionVals.get(i) + " dot " + gradientDots.get(i)); - } - } - - public void printSmallWolfeStats(PrintStream out){ - for(int i = 0; i < steps.size(); i++){ - out.print(steps.get(i) + ":"+functionVals.get(i)+":"+gradientDots.get(i)+" "); - } - System.out.println(); - } - - - - /** - * Pick a step satisfying the strong wolfe condition from an given from lowerStep and higherStep - * picked on the routine above. - * - * Both lowerStep and higherStep have been evaluated, so we only need to pass the iteration where they have - * been evaluated and save extra evaluations. - * - * We know that lowerStepValue as to be smaller than higherStepValue, and that a point - * satisfying both conditions exists in such interval. - * - * LowerStep always satisfies at least the sufficient decrease - * @return - */ - public double zoom(DifferentiableLineSearchObjective o, double lowerStep, double higherStep, - int lowerStepIter, int higherStepIter){ - - if(debugLevel >=2){ - System.out.println("Entering zoom with " + lowerStep+"-"+higherStep); - } - - double currentStep=-1; - - int zoomIter = 0; - while(zoomIter < 1000){ - if(Math.abs(lowerStep-higherStep) < minZoomDiffTresh){ - o.updateAlpha(lowerStep); - if(debugLevel >= 1){ - steps.add(lowerStep); - functionVals.add(o.getCurrentValue()); - gradientDots.add(o.getCurrentGradient()); - } - foudStep = true; - return lowerStep; - } - - //Cubic interpolation - currentStep = - Interpolation.cubicInterpolation(lowerStep, o.getValue(lowerStepIter), o.getGradient(lowerStepIter), - higherStep, o.getValue(higherStepIter), o.getGradient(higherStepIter)); - - //Safeguard.... should not be required check in what condtions it is required - if(currentStep < 0 ){ - currentStep = (lowerStep+higherStep)/2; - } - if(Double.isNaN(currentStep) || Double.isInfinite(currentStep)){ - currentStep = (lowerStep+higherStep)/2; - } -// currentStep = (lowerStep+higherStep)/2; -// System.out.println("Trying "+currentStep); - o.updateAlpha(currentStep); - if(debugLevel >=1){ - steps.add(currentStep); - functionVals.add(o.getCurrentValue()); - gradientDots.add(o.getCurrentGradient()); - } - if(!WolfeConditions.suficientDecrease(o,c1) - || o.getCurrentValue() >= o.getValue(lowerStepIter)){ - higherStepIter = o.nrIterations; - higherStep = currentStep; - } - //Note when entering here the new step satisfies the sufficent decrease and - // or as a function value that is better than the previous best (lowerStepFunctionValues) - // so we either leave or change the value of the alpha low. - else{ - if(WolfeConditions.sufficientCurvature(o,c1,c2)){ - //Satisfies the both wolf conditions - foudStep = true; - break; - } - //If does not satisfy curvature - if(o.getCurrentGradient()*(higherStep-lowerStep) >= 0){ - higherStep = lowerStep; - higherStepIter = lowerStepIter; - } - lowerStep = currentStep; - lowerStepIter = o.nrIterations; - } - zoomIter++; - } - return currentStep; - } - - public double getInitialGradient() { - return currentInitGradientDot; - - } - - public double getPreviousInitialGradient() { - return previousInitGradientDot; - } - - public double getPreviousStepUsed() { - return previousStepPicked; - } - - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java deleted file mode 100644 index dcc704eb..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java +++ /dev/null @@ -1,45 +0,0 @@ -package optimization.linesearch; - - -public class WolfeConditions { - - /** - * Sufficient Increase number. Default constant - */ - - - /** - * Value for suficient curvature: - * 0.9 - For newton and quase netwon methods - * 0.1 - Non linear conhugate gradient - */ - - int debugLevel = 0; - public void setDebugLevel(int level){ - debugLevel = level; - } - - public static boolean suficientDecrease(DifferentiableLineSearchObjective o, double c1){ - double value = o.getOriginalValue()+c1*o.getAlpha()*o.getInitialGradient(); -// System.out.println("Sufficient Decrease original "+value+" new "+ o.getCurrentValue()); - return o.getCurrentValue() <= value; - } - - - - - public static boolean sufficientCurvature(DifferentiableLineSearchObjective o, double c1, double c2){ -// if(debugLevel >= 2){ -// double current = Math.abs(o.getCurrentGradient()); -// double orig = -c2*o.getInitialGradient(); -// if(current <= orig){ -// return true; -// }else{ -// System.out.println("Not satistfying curvature condition curvature " + current + " wants " + orig); -// return false; -// } -// } - return Math.abs(o.getCurrentGradient()) <= -c2*o.getInitialGradient(); - } - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java deleted file mode 100644 index 0429d531..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java +++ /dev/null @@ -1,104 +0,0 @@ -package optimization.projections; - - -import java.util.Random; - -import optimization.util.MathUtils; -import optimization.util.MatrixOutput; - -/** - * Implements a projection into a box set defined by a and b. - * If either a or b are infinity then that bound is ignored. - * @author javg - * - */ -public class BoundsProjection extends Projection{ - - double a,b; - boolean ignoreA = false; - boolean ignoreB = false; - public BoundsProjection(double lowerBound, double upperBound) { - if(Double.isInfinite(lowerBound)){ - this.ignoreA = true; - }else{ - this.a =lowerBound; - } - if(Double.isInfinite(upperBound)){ - this.ignoreB = true; - }else{ - this.b =upperBound; - } - } - - - - /** - * Projects into the bounds - * a <= x_i <=b - */ - public void project(double[] original){ - for (int i = 0; i < original.length; i++) { - if(!ignoreA && original[i] < a){ - original[i] = a; - }else if(!ignoreB && original[i]>b){ - original[i]=b; - } - } - } - - /** - * Generates a random number between a and b. - */ - - Random r = new Random(); - - public double[] samplePoint(int numParams) { - double[] point = new double[numParams]; - for (int i = 0; i < point.length; i++) { - double rand = r.nextDouble(); - if(ignoreA && ignoreB){ - //Use const to avoid number near overflow - point[i] = rand*(1.E100+1.E100)-1.E100; - }else if(ignoreA){ - point[i] = rand*(b-1.E100)-1.E100; - }else if(ignoreB){ - point[i] = rand*(1.E100-a)-a; - }else{ - point[i] = rand*(b-a)-a; - } - } - return point; - } - - public static void main(String[] args) { - BoundsProjection sp = new BoundsProjection(0,Double.POSITIVE_INFINITY); - - - MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 1"); - MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 2"); - MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 3"); - - double[] d = {-1.1,1.2,1.4}; - double[] original = d.clone(); - MatrixOutput.printDoubleArray(d, "before"); - - sp.project(d); - MatrixOutput.printDoubleArray(d, "after"); - System.out.println("Test projection: " + sp.testProjection(original, d)); - } - - double epsilon = 1.E-10; - public double[] perturbePoint(double[] point, int parameter){ - double[] newPoint = point.clone(); - if(!ignoreA && MathUtils.almost(point[parameter], a)){ - newPoint[parameter]+=epsilon; - }else if(!ignoreB && MathUtils.almost(point[parameter], b)){ - newPoint[parameter]-=epsilon; - }else{ - newPoint[parameter]-=epsilon; - } - return newPoint; - } - - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java deleted file mode 100644 index b5a9f92f..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java +++ /dev/null @@ -1,72 +0,0 @@ -package optimization.projections; - -import optimization.util.MathUtils; -import optimization.util.MatrixOutput; -import util.ArrayMath; -import util.Printing; - - - -public abstract class Projection { - - - public abstract void project(double[] original); - - - /** - * From the projection theorem "Non-Linear Programming" page - * 201 fact 2. - * - * Given some z in R, and a vector x* in X; - * x* = z+ iif for all x in X - * (z-x*)'(x-x*) <= 0 where 0 is when x*=x - * See figure 2.16 in book - * - * @param original - * @param projected - * @return - */ - public boolean testProjection(double[] original, double[] projected){ - double[] original1 = original.clone(); - //System.out.println(Printing.doubleArrayToString(original1, null, "original")); - //System.out.println(Printing.doubleArrayToString(projected, null, "projected")); - MathUtils.minusEquals(original1, projected, 1); - //System.out.println(Printing.doubleArrayToString(original1, null, "minus1")); - for(int i = 0; i < 10; i++){ - double[] x = samplePoint(original.length); - // System.out.println(Printing.doubleArrayToString(x, null, "sample")); - //If the same this returns zero so we are there. - MathUtils.minusEquals(x, projected, 1); - // System.out.println(Printing.doubleArrayToString(x, null, "minus2")); - double dotProd = MathUtils.dotProduct(original1, x); - - // System.out.println("dot " + dotProd); - if(dotProd > 0) return false; - } - - //Perturbs the point a bit in all possible directions - for(int i = 0; i < original.length; i++){ - double[] x = perturbePoint(projected,i); - // System.out.println(Printing.doubleArrayToString(x, null, "perturbed")); - //If the same this returns zero so we are there. - MathUtils.minusEquals(x, projected, 1); - // System.out.println(Printing.doubleArrayToString(x, null, "minus2")); - double dotProd = MathUtils.dotProduct(original1, x); - - // System.out.println("dot " + dotProd); - if(dotProd > 0) return false; - } - - - - return true; - } - - //Samples a point from the constrained set - public abstract double[] samplePoint(int dimensions); - - //Perturbs a point a bit still leaving it at the constraints set - public abstract double[] perturbePoint(double[] point, int parameter); - - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java deleted file mode 100644 index f22afcaf..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java +++ /dev/null @@ -1,127 +0,0 @@ -package optimization.projections; - - - -import java.util.Random; - -import optimization.util.MathUtils; -import optimization.util.MatrixOutput; - -public class SimplexProjection extends Projection{ - - double scale; - public SimplexProjection(double scale) { - this.scale = scale; - } - - /** - * projects the numbers of the array - * into a simplex of size. - * We follow the description of the paper - * "Efficient Projetions onto the l1-Ball - * for learning in high dimensions" - */ - public void project(double[] original){ - double[] ds = new double[original.length]; - System.arraycopy(original, 0, ds, 0, ds.length); - //If sum is smaller then zero then its ok - for (int i = 0; i < ds.length; i++) ds[i] = ds[i]>0? ds[i]:0; - double sum = MathUtils.sum(ds); - if (scale - sum >= -1.E-10 ){ - System.arraycopy(ds, 0, original, 0, ds.length); - //System.out.println("Not projecting"); - return; - } - //System.out.println("projecting " + sum + " scontraints " + scale); - util.Array.sortDescending(ds); - double currentSum = 0; - double previousTheta = 0; - double theta = 0; - for (int i = 0; i < ds.length; i++) { - currentSum+=ds[i]; - theta = (currentSum-scale)/(i+1); - if(ds[i]-theta < -1e-10){ - break; - } - previousTheta = theta; - } - //DEBUG - if(previousTheta < 0){ - System.out.println("Simple Projection: Theta is smaller than zero: " + previousTheta); - System.exit(-1); - } - for (int i = 0; i < original.length; i++) { - original[i] = Math.max(original[i]-previousTheta, 0); - } - } - - - - - - - /** - * Samples a point from the simplex of scale. Just sample - * random number from 0-scale and then if - * their sum is bigger then sum make them normalize. - * This is probably not sampling uniformly from the simplex but it is - * enough for our goals in here. - */ - Random r = new Random(); - public double[] samplePoint(int dimensions) { - double[] newPoint = new double[dimensions]; - double sum =0; - for (int i = 0; i < newPoint.length; i++) { - double rand = r.nextDouble()*scale; - sum+=rand; - newPoint[i]=rand; - } - //Normalize - if(sum > scale){ - for (int i = 0; i < newPoint.length; i++) { - newPoint[i]=scale*newPoint[i]/sum; - } - } - return newPoint; - } - - public static void main(String[] args) { - SimplexProjection sp = new SimplexProjection(1); - - - double[] point = sp.samplePoint(3); - MatrixOutput.printDoubleArray(point , "random 1 sum:" + MathUtils.sum(point)); - point = sp.samplePoint(3); - MatrixOutput.printDoubleArray(point , "random 2 sum:" + MathUtils.sum(point)); - point = sp.samplePoint(3); - MatrixOutput.printDoubleArray(point , "random 3 sum:" + MathUtils.sum(point)); - - double[] d = {0,1.1,-10}; - double[] original = d.clone(); - MatrixOutput.printDoubleArray(d, "before"); - - sp.project(d); - MatrixOutput.printDoubleArray(d, "after"); - System.out.println("Test projection: " + sp.testProjection(original, d)); - - } - - - double epsilon = 1.E-10; - public double[] perturbePoint(double[] point, int parameter){ - double[] newPoint = point.clone(); - if(MathUtils.almost(MathUtils.sum(point), scale)){ - newPoint[parameter]-=epsilon; - } - else if(point[parameter]==0){ - newPoint[parameter]+=epsilon; - }else if(MathUtils.almost(point[parameter], scale)){ - newPoint[parameter]-=epsilon; - } - else{ - newPoint[parameter]-=epsilon; - } - return newPoint; - } - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java deleted file mode 100644 index 15760f18..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java +++ /dev/null @@ -1,33 +0,0 @@ -package optimization.stopCriteria; - -import java.util.ArrayList; - -import optimization.gradientBasedMethods.Objective; - -public class CompositeStopingCriteria implements StopingCriteria { - - ArrayList<StopingCriteria> criterias; - - public CompositeStopingCriteria() { - criterias = new ArrayList<StopingCriteria>(); - } - - public void add(StopingCriteria criteria){ - criterias.add(criteria); - } - - public boolean stopOptimization(Objective obj){ - for(StopingCriteria criteria: criterias){ - if(criteria.stopOptimization(obj)){ - return true; - } - } - return false; - } - - public void reset(){ - for(StopingCriteria criteria: criterias){ - criteria.reset(); - } - } -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java deleted file mode 100644 index 534ff833..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java +++ /dev/null @@ -1,30 +0,0 @@ -package optimization.stopCriteria; - -import optimization.gradientBasedMethods.Objective; -import optimization.util.MathUtils; - -public class GradientL2Norm implements StopingCriteria{ - - /** - * Stop if gradientNorm/(originalGradientNorm) smaller - * than gradientConvergenceValue - */ - protected double gradientConvergenceValue; - - - public GradientL2Norm(double gradientConvergenceValue){ - this.gradientConvergenceValue = gradientConvergenceValue; - } - - public void reset(){} - - public boolean stopOptimization(Objective obj){ - double norm = MathUtils.L2Norm(obj.gradient); - if(norm < gradientConvergenceValue){ - System.out.println("Gradient norm below treshold"); - return true; - } - return false; - - } -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java deleted file mode 100644 index 4a489641..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java +++ /dev/null @@ -1,48 +0,0 @@ -package optimization.stopCriteria; - -import optimization.gradientBasedMethods.Objective; -import optimization.gradientBasedMethods.ProjectedObjective; -import optimization.util.MathUtils; - -/** - * Divides the norm by the norm at the begining of the iteration - * @author javg - * - */ -public class NormalizedGradientL2Norm extends GradientL2Norm{ - - /** - * Stop if gradientNorm/(originalGradientNorm) smaller - * than gradientConvergenceValue - */ - protected double originalGradientNorm = -1; - - public void reset(){ - originalGradientNorm = -1; - } - public NormalizedGradientL2Norm(double gradientConvergenceValue){ - super(gradientConvergenceValue); - } - - - - - public boolean stopOptimization(Objective obj){ - double norm = MathUtils.L2Norm(obj.gradient); - if(originalGradientNorm == -1){ - originalGradientNorm = norm; - } - if(originalGradientNorm < 1E-10){ - System.out.println("Gradient norm is zero " + originalGradientNorm); - return true; - } - double normalizedNorm = 1.0*norm/originalGradientNorm; - if( normalizedNorm < gradientConvergenceValue){ - System.out.println("Gradient norm below normalized normtreshold: " + norm + " original: " + originalGradientNorm + " normalized norm: " + normalizedNorm); - return true; - }else{ -// System.out.println("projected gradient norm: " + norm); - return false; - } - } -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java deleted file mode 100644 index 5ae554c2..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java +++ /dev/null @@ -1,60 +0,0 @@ -package optimization.stopCriteria; - -import optimization.gradientBasedMethods.Objective; -import optimization.gradientBasedMethods.ProjectedObjective; -import optimization.util.MathUtils; - -/** - * Divides the norm by the norm at the begining of the iteration - * @author javg - * - */ -public class NormalizedProjectedGradientL2Norm extends ProjectedGradientL2Norm{ - - /** - * Stop if gradientNorm/(originalGradientNorm) smaller - * than gradientConvergenceValue - */ - double originalProjectedNorm = -1; - - public NormalizedProjectedGradientL2Norm(double gradientConvergenceValue){ - super(gradientConvergenceValue); - } - - public void reset(){ - originalProjectedNorm = -1; - } - - - double[] projectGradient(ProjectedObjective obj){ - - if(obj.auxParameters == null){ - obj.auxParameters = new double[obj.getNumParameters()]; - } - System.arraycopy(obj.getParameters(), 0, obj.auxParameters, 0, obj.getNumParameters()); - MathUtils.minusEquals(obj.auxParameters, obj.gradient, 1); - obj.auxParameters = obj.projectPoint(obj.auxParameters); - MathUtils.minusEquals(obj.auxParameters,obj.getParameters(),1); - return obj.auxParameters; - } - - public boolean stopOptimization(Objective obj){ - if(obj instanceof ProjectedObjective) { - ProjectedObjective o = (ProjectedObjective) obj; - double norm = MathUtils.L2Norm(projectGradient(o)); - if(originalProjectedNorm == -1){ - originalProjectedNorm = norm; - } - double normalizedNorm = 1.0*norm/originalProjectedNorm; - if( normalizedNorm < gradientConvergenceValue){ - System.out.println("Gradient norm below normalized normtreshold: " + norm + " original: " + originalProjectedNorm + " normalized norm: " + normalizedNorm); - return true; - }else{ -// System.out.println("projected gradient norm: " + norm); - return false; - } - } - System.out.println("Not a projected objective"); - throw new RuntimeException(); - } -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java deleted file mode 100644 index 6dbbc50d..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java +++ /dev/null @@ -1,54 +0,0 @@ -package optimization.stopCriteria; - -import optimization.gradientBasedMethods.Objective; -import optimization.util.MathUtils; - -public class NormalizedValueDifference implements StopingCriteria{ - - /** - * Stop if the different between values is smaller than a treshold - */ - protected double valueConvergenceValue=0.01; - protected double previousValue = Double.NaN; - protected double currentValue = Double.NaN; - - public NormalizedValueDifference(double valueConvergenceValue){ - this.valueConvergenceValue = valueConvergenceValue; - } - - public void reset(){ - previousValue = Double.NaN; - currentValue = Double.NaN; - } - - - public boolean stopOptimization(Objective obj){ - if(Double.isNaN(currentValue)){ - currentValue = obj.getValue(); - return false; - }else { - previousValue = currentValue; - currentValue = obj.getValue(); - if(previousValue != 0){ - double valueDiff = Math.abs(previousValue - currentValue)/Math.abs(previousValue); - if( valueDiff < valueConvergenceValue){ - System.out.println("Leaving different in values is to small: Prev " - + (previousValue/previousValue) + " Curr: " + (currentValue/previousValue) - + " diff: " + valueDiff); - return true; - } - }else{ - double valueDiff = Math.abs(previousValue - currentValue); - if( valueDiff < valueConvergenceValue){ - System.out.println("Leaving different in values is to small: Prev " - + (previousValue) + " Curr: " + (currentValue) - + " diff: " + valueDiff); - return true; - } - } - - return false; - } - - } -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java deleted file mode 100644 index aadf1fd5..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java +++ /dev/null @@ -1,51 +0,0 @@ -package optimization.stopCriteria; - -import optimization.gradientBasedMethods.Objective; -import optimization.gradientBasedMethods.ProjectedObjective; -import optimization.util.MathUtils; - -public class ProjectedGradientL2Norm implements StopingCriteria{ - - /** - * Stop if gradientNorm/(originalGradientNorm) smaller - * than gradientConvergenceValue - */ - protected double gradientConvergenceValue; - - - public ProjectedGradientL2Norm(double gradientConvergenceValue){ - this.gradientConvergenceValue = gradientConvergenceValue; - } - - public void reset(){ - - } - - double[] projectGradient(ProjectedObjective obj){ - - if(obj.auxParameters == null){ - obj.auxParameters = new double[obj.getNumParameters()]; - } - System.arraycopy(obj.getParameters(), 0, obj.auxParameters, 0, obj.getNumParameters()); - MathUtils.minusEquals(obj.auxParameters, obj.gradient, 1); - obj.auxParameters = obj.projectPoint(obj.auxParameters); - MathUtils.minusEquals(obj.auxParameters,obj.getParameters(),1); - return obj.auxParameters; - } - - public boolean stopOptimization(Objective obj){ - if(obj instanceof ProjectedObjective) { - ProjectedObjective o = (ProjectedObjective) obj; - double norm = MathUtils.L2Norm(projectGradient(o)); - if(norm < gradientConvergenceValue){ - // System.out.println("Gradient norm below treshold: " + norm); - return true; - }else{ -// System.out.println("projected gradient norm: " + norm); - return false; - } - } - System.out.println("Not a projected objective"); - throw new RuntimeException(); - } -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java deleted file mode 100644 index 10cf0522..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java +++ /dev/null @@ -1,8 +0,0 @@ -package optimization.stopCriteria; - -import optimization.gradientBasedMethods.Objective; - -public interface StopingCriteria { - public boolean stopOptimization(Objective obj); - public void reset(); -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java deleted file mode 100644 index e5d07229..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java +++ /dev/null @@ -1,41 +0,0 @@ -package optimization.stopCriteria; - -import optimization.gradientBasedMethods.Objective; -import optimization.util.MathUtils; - -public class ValueDifference implements StopingCriteria{ - - /** - * Stop if the different between values is smaller than a treshold - */ - protected double valueConvergenceValue=0.01; - protected double previousValue = Double.NaN; - protected double currentValue = Double.NaN; - - public ValueDifference(double valueConvergenceValue){ - this.valueConvergenceValue = valueConvergenceValue; - } - - public void reset(){ - previousValue = Double.NaN; - currentValue = Double.NaN; - } - - public boolean stopOptimization(Objective obj){ - if(Double.isNaN(currentValue)){ - currentValue = obj.getValue(); - return false; - }else { - previousValue = currentValue; - currentValue = obj.getValue(); - if(previousValue - currentValue < valueConvergenceValue){ -// System.out.println("Leaving different in values is to small: Prev " -// + previousValue + " Curr: " + currentValue -// + " diff: " + (previousValue - currentValue)); - return true; - } - return false; - } - - } -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java b/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java deleted file mode 100644 index cdbdefc6..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java +++ /dev/null @@ -1,37 +0,0 @@ -package optimization.util; - -public class Interpolation { - - /** - * Fits a cubic polinomyal to a function given two points, - * such that either gradB is bigger than zero or funcB >= funcA - * - * NonLinear Programming appendix C - * @param funcA - * @param gradA - * @param funcB - * @param gradB - */ - public final static double cubicInterpolation(double a, - double funcA, double gradA, double b,double funcB, double gradB ){ - if(gradB < 0 && funcA > funcB){ - System.out.println("Cannot call cubic interpolation"); - return -1; - } - - double z = 3*(funcA-funcB)/(b-a) + gradA + gradB; - double w = Math.sqrt(z*z - gradA*gradB); - double min = b -(gradB+w-z)*(b-a)/(gradB-gradA+2*w); - return min; - } - - public final static double quadraticInterpolation(double initFValue, - double initGrad, double point,double pointFValue){ - double min = -1*initGrad*point*point/(2*(pointFValue-initGrad*point-initFValue)); - return min; - } - - public static void main(String[] args) { - - } -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java b/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java deleted file mode 100644 index 5343a39b..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java +++ /dev/null @@ -1,7 +0,0 @@ -package optimization.util; - -public class Logger { - - - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java b/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java deleted file mode 100644 index af66f82c..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java +++ /dev/null @@ -1,339 +0,0 @@ -package optimization.util; - -import java.util.Arrays; - - - -public class MathUtils { - - /** - * - * @param vector - * @return - */ - public static double L2Norm(double[] vector){ - double value = 0; - for(int i = 0; i < vector.length; i++){ - double v = vector[i]; - value+=v*v; - } - return Math.sqrt(value); - } - - public static double sum(double[] v){ - double sum = 0; - for (int i = 0; i < v.length; i++) { - sum+=v[i]; - } - return sum; - } - - - - - /** - * w = w + v - * @param w - * @param v - */ - public static void plusEquals(double[] w, double[] v) { - for(int i=0; i<w.length;i++){ - w[i] += w[i] + v[i]; - } - } - - /** - * w[i] = w[i] + v - * @param w - * @param v - */ - public static void plusEquals(double[] w, double v) { - for(int i=0; i<w.length;i++){ - w[i] += w[i] + v; - } - } - - /** - * w[i] = w[i] - v - * @param w - * @param v - */ - public static void minusEquals(double[] w, double v) { - for(int i=0; i<w.length;i++){ - w[i] -= w[i] + v; - } - } - - /** - * w = w + a*v - * @param w - * @param v - * @param a - */ - public static void plusEquals(double[] w, double[] v, double a) { - for(int i=0; i<w.length;i++){ - w[i] += a*v[i]; - } - } - - /** - * w = w - a*v - * @param w - * @param v - * @param a - */ - public static void minusEquals(double[] w, double[] v, double a) { - for(int i=0; i<w.length;i++){ - w[i] -= a*v[i]; - } - } - /** - * v = w - a*v - * @param w - * @param v - * @param a - */ - public static void minusEqualsInverse(double[] w, double[] v, double a) { - for(int i=0; i<w.length;i++){ - v[i] = w[i] - a*v[i]; - } - } - - public static double dotProduct(double[] w, double[] v){ - double accum = 0; - for(int i=0; i<w.length;i++){ - accum += w[i]*v[i]; - } - return accum; - } - - public static double[] arrayMinus(double[]w, double[]v){ - double result[] = w.clone(); - for(int i=0; i<w.length;i++){ - result[i] -= v[i]; - } - return result; - } - - public static double[] arrayMinus(double[] result , double[]w, double[]v){ - for(int i=0; i<w.length;i++){ - result[i] = w[i]-v[i]; - } - return result; - } - - public static double[] negation(double[]w){ - double result[] = new double[w.length]; - for(int i=0; i<w.length;i++){ - result[i] = -w[i]; - } - return result; - } - - public static double square(double value){ - return value*value; - } - public static double[][] outerProduct(double[] w, double[] v){ - double[][] result = new double[w.length][v.length]; - for(int i = 0; i < w.length; i++){ - for(int j = 0; j < v.length; j++){ - result[i][j] = w[i]*v[j]; - } - } - return result; - } - /** - * results = a*W*V - * @param w - * @param v - * @param a - * @return - */ - public static double[][] weightedouterProduct(double[] w, double[] v, double a){ - double[][] result = new double[w.length][v.length]; - for(int i = 0; i < w.length; i++){ - for(int j = 0; j < v.length; j++){ - result[i][j] = a*w[i]*v[j]; - } - } - return result; - } - - public static double[][] identity(int size){ - double[][] result = new double[size][size]; - for(int i = 0; i < size; i++){ - result[i][i] = 1; - } - return result; - } - - /** - * v -= w - * @param v - * @param w - */ - public static void minusEquals(double[][] w, double[][] v){ - for(int i = 0; i < w.length; i++){ - for(int j = 0; j < w[0].length; j++){ - w[i][j] -= v[i][j]; - } - } - } - - /** - * v[i][j] -= a*w[i][j] - * @param v - * @param w - */ - public static void minusEquals(double[][] w, double[][] v, double a){ - for(int i = 0; i < w.length; i++){ - for(int j = 0; j < w[0].length; j++){ - w[i][j] -= a*v[i][j]; - } - } - } - - /** - * v += w - * @param v - * @param w - */ - public static void plusEquals(double[][] w, double[][] v){ - for(int i = 0; i < w.length; i++){ - for(int j = 0; j < w[0].length; j++){ - w[i][j] += v[i][j]; - } - } - } - - /** - * v[i][j] += a*w[i][j] - * @param v - * @param w - */ - public static void plusEquals(double[][] w, double[][] v, double a){ - for(int i = 0; i < w.length; i++){ - for(int j = 0; j < w[0].length; j++){ - w[i][j] += a*v[i][j]; - } - } - } - - - /** - * results = w*v - * @param w - * @param v - * @return - */ - public static double[][] matrixMultiplication(double[][] w,double[][] v){ - int w1 = w.length; - int w2 = w[0].length; - int v1 = v.length; - int v2 = v[0].length; - - if(w2 != v1){ - System.out.println("Matrix dimensions do not agree..."); - System.exit(-1); - } - - double[][] result = new double[w1][v2]; - for(int w_i1 = 0; w_i1 < w1; w_i1++){ - for(int v_i2 = 0; v_i2 < v2; v_i2++){ - double sum = 0; - for(int w_i2 = 0; w_i2 < w2; w_i2++){ - sum += w[w_i1 ][w_i2]*v[w_i2][v_i2]; - } - result[w_i1][v_i2] = sum; - } - } - return result; - } - - /** - * w = w.*v - * @param w - * @param v - */ - public static void matrixScalarMultiplication(double[][] w,double v){ - int w1 = w.length; - int w2 = w[0].length; - for(int w_i1 = 0; w_i1 < w1; w_i1++){ - for(int w_i2 = 0; w_i2 < w2; w_i2++){ - w[w_i1 ][w_i2] *= v; - } - } - } - - public static void scalarMultiplication(double[] w,double v){ - int w1 = w.length; - for(int w_i1 = 0; w_i1 < w1; w_i1++){ - w[w_i1 ] *= v; - } - - } - - public static double[] matrixVector(double[][] w,double[] v){ - int w1 = w.length; - int w2 = w[0].length; - int v1 = v.length; - - if(w2 != v1){ - System.out.println("Matrix dimensions do not agree..."); - System.exit(-1); - } - - double[] result = new double[w1]; - for(int w_i1 = 0; w_i1 < w1; w_i1++){ - double sum = 0; - for(int w_i2 = 0; w_i2 < w2; w_i2++){ - sum += w[w_i1 ][w_i2]*v[w_i2]; - } - result[w_i1] = sum; - } - return result; - } - - public static boolean allPositive(double[] array){ - for (int i = 0; i < array.length; i++) { - if(array[i] < 0) return false; - } - return true; - } - - - - - - public static void main(String[] args) { - double[][] m1 = new double[2][2]; - m1[0][0]=2; - m1[1][0]=2; - m1[0][1]=2; - m1[1][1]=2; - MatrixOutput.printDoubleArray(m1, "m1"); - double[][] m2 = new double[2][2]; - m2[0][0]=3; - m2[1][0]=3; - m2[0][1]=3; - m2[1][1]=3; - MatrixOutput.printDoubleArray(m2, "m2"); - double[][] result = matrixMultiplication(m1, m2); - MatrixOutput.printDoubleArray(result, "result"); - matrixScalarMultiplication(result, 3); - MatrixOutput.printDoubleArray(result, "result after multiply by 3"); - } - - public static boolean almost(double a, double b, double prec){ - return Math.abs(a-b)/Math.abs(a+b) <= prec || (almostZero(a) && almostZero(b)); - } - - public static boolean almost(double a, double b){ - return Math.abs(a-b)/Math.abs(a+b) <= 1e-10 || (almostZero(a) && almostZero(b)); - } - - public static boolean almostZero(double a) { - return Math.abs(a) <= 1e-30; - } - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java b/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java deleted file mode 100644 index 9fbdf955..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java +++ /dev/null @@ -1,28 +0,0 @@ -package optimization.util; - - -public class MatrixOutput { - public static void printDoubleArray(double[][] array, String arrayName) { - int size1 = array.length; - int size2 = array[0].length; - System.out.println(arrayName); - for (int i = 0; i < size1; i++) { - for (int j = 0; j < size2; j++) { - System.out.print(" " + StaticTools.prettyPrint(array[i][j], - "00.00E00", 4) + " "); - - } - System.out.println(); - } - System.out.println(); - } - - public static void printDoubleArray(double[] array, String arrayName) { - System.out.println(arrayName); - for (int i = 0; i < array.length; i++) { - System.out.print(" " + StaticTools.prettyPrint(array[i], - "00.00E00", 4) + " "); - } - System.out.println(); - } -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java b/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java deleted file mode 100644 index bcabee06..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java +++ /dev/null @@ -1,180 +0,0 @@ -package optimization.util; - - -import java.io.File; -import java.io.PrintStream; - -public class StaticTools { - - static java.text.DecimalFormat fmt = new java.text.DecimalFormat(); - - public static void createDir(String directory) { - - File dir = new File(directory); - if (!dir.isDirectory()) { - boolean success = dir.mkdirs(); - if (!success) { - System.out.println("Unable to create directory " + directory); - System.exit(0); - } - System.out.println("Created directory " + directory); - } else { - System.out.println("Reusing directory " + directory); - } - } - - /* - * q and p are indexed by source/foreign Sum_S(q) = 1 the same for p KL(q,p) = - * Eq*q/p - */ - public static double KLDistance(double[][] p, double[][] q, int sourceSize, - int foreignSize) { - double totalKL = 0; - // common.StaticTools.printMatrix(q, sourceSize, foreignSize, "q", - // System.out); - // common.StaticTools.printMatrix(p, sourceSize, foreignSize, "p", - // System.out); - for (int i = 0; i < sourceSize; i++) { - double kl = 0; - for (int j = 0; j < foreignSize; j++) { - assert !Double.isNaN(q[i][j]) : "KLDistance q: prob is NaN"; - assert !Double.isNaN(p[i][j]) : "KLDistance p: prob is NaN"; - if (p[i][j] == 0 || q[i][j] == 0) { - continue; - } else { - kl += q[i][j] * Math.log(q[i][j] / p[i][j]); - } - - } - totalKL += kl; - } - assert !Double.isNaN(totalKL) : "KLDistance: prob is NaN"; - if (totalKL < -1.0E-10) { - System.out.println("KL Smaller than zero " + totalKL); - System.out.println("Source Size" + sourceSize); - System.out.println("Foreign Size" + foreignSize); - StaticTools.printMatrix(q, sourceSize, foreignSize, "q", - System.out); - StaticTools.printMatrix(p, sourceSize, foreignSize, "p", - System.out); - System.exit(-1); - } - return totalKL / sourceSize; - } - - /* - * indexed the by [fi][si] - */ - public static double KLDistancePrime(double[][] p, double[][] q, - int sourceSize, int foreignSize) { - double totalKL = 0; - for (int i = 0; i < sourceSize; i++) { - double kl = 0; - for (int j = 0; j < foreignSize; j++) { - assert !Double.isNaN(q[j][i]) : "KLDistance q: prob is NaN"; - assert !Double.isNaN(p[j][i]) : "KLDistance p: prob is NaN"; - if (p[j][i] == 0 || q[j][i] == 0) { - continue; - } else { - kl += q[j][i] * Math.log(q[j][i] / p[j][i]); - } - - } - totalKL += kl; - } - assert !Double.isNaN(totalKL) : "KLDistance: prob is NaN"; - return totalKL / sourceSize; - } - - public static double Entropy(double[][] p, int sourceSize, int foreignSize) { - double totalE = 0; - for (int i = 0; i < foreignSize; i++) { - double e = 0; - for (int j = 0; j < sourceSize; j++) { - e += p[i][j] * Math.log(p[i][j]); - } - totalE += e; - } - return totalE / sourceSize; - } - - public static double[][] copyMatrix(double[][] original, int sourceSize, - int foreignSize) { - double[][] result = new double[sourceSize][foreignSize]; - for (int i = 0; i < sourceSize; i++) { - for (int j = 0; j < foreignSize; j++) { - result[i][j] = original[i][j]; - } - } - return result; - } - - public static void printMatrix(double[][] matrix, int sourceSize, - int foreignSize, String info, PrintStream out) { - - java.text.DecimalFormat fmt = new java.text.DecimalFormat(); - fmt.setMaximumFractionDigits(3); - fmt.setMaximumIntegerDigits(3); - fmt.setMinimumFractionDigits(3); - fmt.setMinimumIntegerDigits(3); - - out.println(info); - - for (int i = 0; i < foreignSize; i++) { - for (int j = 0; j < sourceSize; j++) { - out.print(prettyPrint(matrix[j][i], ".00E00", 6) + " "); - } - out.println(); - } - out.println(); - out.println(); - } - - public static void printMatrix(int[][] matrix, int sourceSize, - int foreignSize, String info, PrintStream out) { - - out.println(info); - for (int i = 0; i < foreignSize; i++) { - for (int j = 0; j < sourceSize; j++) { - out.print(matrix[j][i] + " "); - } - out.println(); - } - out.println(); - out.println(); - } - - public static String formatTime(long duration) { - StringBuilder sb = new StringBuilder(); - double d = duration / 1000; - fmt.applyPattern("00"); - sb.append(fmt.format((int) (d / (60 * 60))) + ":"); - d -= ((int) d / (60 * 60)) * 60 * 60; - sb.append(fmt.format((int) (d / 60)) + ":"); - d -= ((int) d / 60) * 60; - fmt.applyPattern("00.0"); - sb.append(fmt.format(d)); - return sb.toString(); - } - - public static String prettyPrint(double d, String patt, int len) { - fmt.applyPattern(patt); - String s = fmt.format(d); - while (s.length() < len) { - s = " " + s; - } - return s; - } - - - public static long getUsedMemory(){ - System.gc(); - return (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/ (1024 * 1024); - } - - public final static boolean compareDoubles(double d1, double d2){ - return Math.abs(d1-d2) <= 1.E-10; - } - - -} diff --git a/gi/posterior-regularisation/prjava/src/phrase/Agree.java b/gi/posterior-regularisation/prjava/src/phrase/Agree.java deleted file mode 100644 index 8f7b499e..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/Agree.java +++ /dev/null @@ -1,204 +0,0 @@ -package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.List;
-
-import phrase.Corpus.Edge;
-
-public class Agree {
- PhraseCluster model1;
- C2F model2;
- Corpus c;
- private int K,n_phrases, n_words, n_contexts, n_positions1,n_positions2;
-
- /**@brief sum of loglikelihood of two
- * individual models
- */
- public double llh;
- /**@brief Bhattacharyya distance
- *
- */
- public double bdist;
- /**
- *
- * @param numCluster
- * @param corpus
- */
- public Agree(int numCluster, Corpus corpus){
-
- model1=new PhraseCluster(numCluster, corpus);
- model2=new C2F(numCluster,corpus);
- c=corpus;
- n_words=c.getNumWords();
- n_phrases=c.getNumPhrases();
- n_contexts=c.getNumContexts();
- n_positions1=c.getNumContextPositions();
- n_positions2=2;
- K=numCluster;
-
- }
-
- /**@brief test
- *
- */
- public static void main(String args[]){
- //String in="../pdata/canned.con";
- String in="../pdata/btec.con";
- String out="../pdata/posterior.out";
- int numCluster=25;
- Corpus corpus = null;
- File infile = new File(in);
- try {
- System.out.println("Reading concordance from " + infile);
- corpus = Corpus.readFromFile(FileUtil.reader(infile));
- corpus.printStats(System.out);
- } catch (IOException e) {
- System.err.println("Failed to open input file: " + infile);
- e.printStackTrace();
- System.exit(1);
- }
-
- Agree agree=new Agree(numCluster, corpus);
- int iter=20;
- for(int i=0;i<iter;i++){
- agree.EM();
- System.out.println("Iter"+i+", llh: "+agree.llh+
- ", divergence:"+agree.bdist+
- " sum: "+(agree.llh+agree.bdist));
- }
-
- File outfile = new File (out);
- try {
- PrintStream ps = FileUtil.printstream(outfile);
- agree.displayPosterior(ps);
- // ps.println();
- // c2f.displayModelParam(ps);
- ps.close();
- } catch (IOException e) {
- System.err.println("Failed to open output file: " + outfile);
- e.printStackTrace();
- System.exit(1);
- }
-
- }
-
- public double EM(){
-
- double [][][]exp_emit1=new double [K][n_positions1][n_words];
- double [][]exp_pi1=new double[n_phrases][K];
-
- double [][][]exp_emit2=new double [K][n_positions2][n_words];
- double [][]exp_pi2=new double[n_contexts][K];
-
- llh=0;
- bdist=0;
- //E
- for(int context=0; context< n_contexts; context++){
-
- List<Edge> contexts = c.getEdgesForContext(context);
-
- for (int ctx=0; ctx<contexts.size(); ctx++){
- Edge edge = contexts.get(ctx);
- int phrase=edge.getPhraseId();
- double p[]=posterior(edge);
- double z = arr.F.l1norm(p);
- assert z > 0;
- bdist += edge.getCount() * Math.log(z);
- arr.F.l1normalize(p);
-
- double count = edge.getCount();
- //increment expected count
- TIntArrayList phraseToks = edge.getPhrase();
- TIntArrayList contextToks = edge.getContext();
- for(int tag=0;tag<K;tag++){
-
- for(int position=0;position<n_positions1;position++){
- exp_emit1[tag][position][contextToks.get(position)]+=p[tag]*count;
- }
-
- exp_emit2[tag][0][phraseToks.get(0)]+=p[tag]*count;
- exp_emit2[tag][1][phraseToks.get(phraseToks.size()-1)]+=p[tag]*count;
-
- exp_pi1[phrase][tag]+=p[tag]*count;
- exp_pi2[context][tag]+=p[tag]*count;
- }
- }
- }
-
- //System.out.println("Log likelihood: "+loglikelihood);
-
- //M
- for(double [][]i:exp_emit1){
- for(double []j:i){
- arr.F.l1normalize(j);
- }
- }
-
- for(double []j:exp_pi1){
- arr.F.l1normalize(j);
- }
-
- for(double [][]i:exp_emit2){
- for(double []j:i){
- arr.F.l1normalize(j);
- }
- }
-
- for(double []j:exp_pi2){
- arr.F.l1normalize(j);
- }
-
- model1.emit=exp_emit1;
- model1.pi=exp_pi1;
- model2.emit=exp_emit2;
- model2.pi=exp_pi2;
-
- return llh;
- }
-
- public double[] posterior(Corpus.Edge edge)
- {
- double[] prob1=model1.posterior(edge);
- double[] prob2=model2.posterior(edge);
-
- llh+=edge.getCount()*Math.log(arr.F.l1norm(prob1));
- llh+=edge.getCount()*Math.log(arr.F.l1norm(prob2));
- arr.F.l1normalize(prob1);
- arr.F.l1normalize(prob2);
-
- for(int i=0;i<prob1.length;i++){
- prob1[i]*=prob2[i];
- prob1[i]=Math.sqrt(prob1[i]);
- }
-
- return prob1;
- }
-
- public void displayPosterior(PrintStream ps)
- {
- displayPosterior(ps, c.getEdges());
- }
-
- public void displayPosterior(PrintStream ps, List<Edge> test)
- {
- for (Edge edge : test)
- {
- double probs[] = posterior(edge);
- arr.F.l1normalize(probs);
-
- // emit phrase
- ps.print(edge.getPhraseString());
- ps.print("\t");
- ps.print(edge.getContextString(true));
- int t=arr.F.argmax(probs);
- ps.println(" ||| C=" + t);
- }
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java b/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java deleted file mode 100644 index 031f887f..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java +++ /dev/null @@ -1,197 +0,0 @@ -package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.List;
-
-import phrase.Corpus.Edge;
-
-public class Agree2Sides {
- PhraseCluster model1,model2;
- Corpus c1,c2;
- private int K;
-
- /**@brief sum of loglikelihood of two
- * individual models
- */
- public double llh;
- /**@brief Bhattacharyya distance
- *
- */
- public double bdist;
- /**
- *
- * @param numCluster
- * @param corpus
- */
- public Agree2Sides(int numCluster, Corpus corpus1 , Corpus corpus2 ){
-
- model1=new PhraseCluster(numCluster, corpus1);
- model2=new PhraseCluster(numCluster,corpus2);
- c1=corpus1;
- c2=corpus2;
- K=numCluster;
-
- }
-
- /**@brief test
- *
- */
- public static void main(String args[]){
- //String in="../pdata/canned.con";
- // String in="../pdata/btec.con";
- String in1="../pdata/source.txt";
- String in2="../pdata/target.txt";
- String out="../pdata/posterior.out";
- int numCluster=25;
- Corpus corpus1 = null,corpus2=null;
- File infile1 = new File(in1),infile2=new File(in2);
- try {
- System.out.println("Reading concordance from " + infile1);
- corpus1 = Corpus.readFromFile(FileUtil.reader(infile1));
- System.out.println("Reading concordance from " + infile2);
- corpus2 = Corpus.readFromFile(FileUtil.reader(infile2));
- corpus1.printStats(System.out);
- } catch (IOException e) {
- System.err.println("Failed to open input file: " + infile1);
- e.printStackTrace();
- System.exit(1);
- }
-
- Agree2Sides agree=new Agree2Sides(numCluster, corpus1,corpus2);
- int iter=20;
- for(int i=0;i<iter;i++){
- agree.EM();
- System.out.println("Iter"+i+", llh: "+agree.llh+
- ", divergence:"+agree.bdist+
- " sum: "+(agree.llh+agree.bdist));
- }
-
- File outfile = new File (out);
- try {
- PrintStream ps = FileUtil.printstream(outfile);
- agree.displayPosterior(ps);
- // ps.println();
- // c2f.displayModelParam(ps);
- ps.close();
- } catch (IOException e) {
- System.err.println("Failed to open output file: " + outfile);
- e.printStackTrace();
- System.exit(1);
- }
-
- }
-
- public double EM(){
-
- double [][][]exp_emit1=new double [K][c1.getNumContextPositions()][c1.getNumWords()];
- double [][]exp_pi1=new double[c1.getNumPhrases()][K];
-
- double [][][]exp_emit2=new double [K][c2.getNumContextPositions()][c2.getNumWords()];
- double [][]exp_pi2=new double[c2.getNumPhrases()][K];
-
- llh=0;
- bdist=0;
- //E
- for(int i=0;i<c1.getEdges().size();i++){
- Edge edge1=c1.getEdges().get(i);
- Edge edge2=c2.getEdges().get(i);
- double p[]=posterior(i);
- double z = arr.F.l1norm(p);
- assert z > 0;
- bdist += edge1.getCount() * Math.log(z);
- arr.F.l1normalize(p);
- double count = edge1.getCount();
- //increment expected count
- TIntArrayList contextToks1 = edge1.getContext();
- TIntArrayList contextToks2 = edge2.getContext();
- int phrase1=edge1.getPhraseId();
- int phrase2=edge2.getPhraseId();
- for(int tag=0;tag<K;tag++){
- for(int position=0;position<c1.getNumContextPositions();position++){
- exp_emit1[tag][position][contextToks1.get(position)]+=p[tag]*count;
- }
- for(int position=0;position<c2.getNumContextPositions();position++){
- exp_emit2[tag][position][contextToks2.get(position)]+=p[tag]*count;
- }
- exp_pi1[phrase1][tag]+=p[tag]*count;
- exp_pi2[phrase2][tag]+=p[tag]*count;
- }
- }
-
- //System.out.println("Log likelihood: "+loglikelihood);
-
- //M
- for(double [][]i:exp_emit1){
- for(double []j:i){
- arr.F.l1normalize(j);
- }
- }
-
- for(double []j:exp_pi1){
- arr.F.l1normalize(j);
- }
-
- for(double [][]i:exp_emit2){
- for(double []j:i){
- arr.F.l1normalize(j);
- }
- }
-
- for(double []j:exp_pi2){
- arr.F.l1normalize(j);
- }
-
- model1.emit=exp_emit1;
- model1.pi=exp_pi1;
- model2.emit=exp_emit2;
- model2.pi=exp_pi2;
-
- return llh;
- }
-
- public double[] posterior(int edgeIdx)
- {
- return posterior(c1.getEdges().get(edgeIdx), c2.getEdges().get(edgeIdx));
- }
-
- public double[] posterior(Edge e1, Edge e2)
- {
- double[] prob1=model1.posterior(e1);
- double[] prob2=model2.posterior(e2);
-
- llh+=e1.getCount()*Math.log(arr.F.l1norm(prob1));
- llh+=e2.getCount()*Math.log(arr.F.l1norm(prob2));
- arr.F.l1normalize(prob1);
- arr.F.l1normalize(prob2);
-
- for(int i=0;i<prob1.length;i++){
- prob1[i]*=prob2[i];
- prob1[i]=Math.sqrt(prob1[i]);
- }
-
- return prob1;
- }
-
- public void displayPosterior(PrintStream ps)
- {
- for (int i=0;i<c1.getEdges().size();i++)
- {
- Edge edge=c1.getEdges().get(i);
- double probs[] = posterior(i);
- arr.F.l1normalize(probs);
-
- // emit phrase
- ps.print(edge.getPhraseString());
- ps.print("\t");
- ps.print(edge.getContextString(true));
- int t=arr.F.argmax(probs);
- ps.println(" ||| C=" + t);
- }
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/C2F.java b/gi/posterior-regularisation/prjava/src/phrase/C2F.java deleted file mode 100644 index e8783950..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/C2F.java +++ /dev/null @@ -1,216 +0,0 @@ -package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.Arrays;
-import java.util.List;
-
-import phrase.Corpus.Edge;
-
-/**
- * @brief context generates phrase
- * @author desaic
- *
- */
-public class C2F {
- public int K;
- private int n_words, n_contexts, n_positions;
- public Corpus c;
-
- /**@brief
- * emit[tag][position][word] = p(word | tag, position in phrase)
- */
- public double emit[][][];
- /**@brief
- * pi[context][tag] = p(tag | context)
- */
- public double pi[][];
-
- public C2F(int numCluster, Corpus corpus){
- K=numCluster;
- c=corpus;
- n_words=c.getNumWords();
- n_contexts=c.getNumContexts();
-
- //number of words in a phrase to be considered
- //currently the first and last word in source and target
- //if the phrase has length 1 in either dimension then
- //we use the same word for two positions
- n_positions=c.phraseEdges(c.getEdges().get(0).getPhrase()).size();
-
- emit=new double [K][n_positions][n_words];
- pi=new double[n_contexts][K];
-
- for(double [][]i:emit){
- for(double []j:i){
- arr.F.randomise(j);
- }
- }
-
- for(double []j:pi){
- arr.F.randomise(j);
- }
- }
-
- /**@brief test
- *
- */
- public static void main(String args[]){
- String in="../pdata/canned.con";
- String out="../pdata/posterior.out";
- int numCluster=25;
- Corpus corpus = null;
- File infile = new File(in);
- try {
- System.out.println("Reading concordance from " + infile);
- corpus = Corpus.readFromFile(FileUtil.reader(infile));
- corpus.printStats(System.out);
- } catch (IOException e) {
- System.err.println("Failed to open input file: " + infile);
- e.printStackTrace();
- System.exit(1);
- }
-
- C2F c2f=new C2F(numCluster,corpus);
- int iter=20;
- double llh=0;
- for(int i=0;i<iter;i++){
- llh=c2f.EM();
- System.out.println("Iter"+i+", llh: "+llh);
- }
-
- File outfile = new File (out);
- try {
- PrintStream ps = FileUtil.printstream(outfile);
- c2f.displayPosterior(ps);
- // ps.println();
- // c2f.displayModelParam(ps);
- ps.close();
- } catch (IOException e) {
- System.err.println("Failed to open output file: " + outfile);
- e.printStackTrace();
- System.exit(1);
- }
-
- }
-
- public double EM(){
- double [][][]exp_emit=new double [K][n_positions][n_words];
- double [][]exp_pi=new double[n_contexts][K];
-
- double loglikelihood=0;
-
- //E
- for(int context=0; context< n_contexts; context++){
-
- List<Edge> contexts = c.getEdgesForContext(context);
-
- for (int ctx=0; ctx<contexts.size(); ctx++){
- Edge edge = contexts.get(ctx);
- double p[]=posterior(edge);
- double z = arr.F.l1norm(p);
- assert z > 0;
- loglikelihood += edge.getCount() * Math.log(z);
- arr.F.l1normalize(p);
-
- double count = edge.getCount();
- //increment expected count
- TIntArrayList phrase= edge.getPhrase();
- for(int tag=0;tag<K;tag++){
-
- exp_emit[tag][0][phrase.get(0)]+=p[tag]*count;
- exp_emit[tag][1][phrase.get(phrase.size()-1)]+=p[tag]*count;
-
- exp_pi[context][tag]+=p[tag]*count;
- }
- }
- }
-
- //System.out.println("Log likelihood: "+loglikelihood);
-
- //M
- for(double [][]i:exp_emit){
- for(double []j:i){
- arr.F.l1normalize(j);
- }
- }
-
- emit=exp_emit;
-
- for(double []j:exp_pi){
- arr.F.l1normalize(j);
- }
-
- pi=exp_pi;
-
- return loglikelihood;
- }
-
- public double[] posterior(Corpus.Edge edge)
- {
- double[] prob=Arrays.copyOf(pi[edge.getContextId()], K);
-
- TIntArrayList phrase = edge.getPhrase();
- TIntArrayList offsets = c.phraseEdges(phrase);
- for(int tag=0;tag<K;tag++)
- {
- for (int i=0; i < offsets.size(); ++i)
- prob[tag]*=emit[tag][i][phrase.get(offsets.get(i))];
- }
-
- return prob;
- }
-
- public void displayPosterior(PrintStream ps)
- {
- for (Edge edge : c.getEdges())
- {
- double probs[] = posterior(edge);
- arr.F.l1normalize(probs);
-
- // emit phrase
- ps.print(edge.getPhraseString());
- ps.print("\t");
- ps.print(edge.getContextString(true));
- int t=arr.F.argmax(probs);
- ps.println(" ||| C=" + t);
- }
- }
-
- public void displayModelParam(PrintStream ps)
- {
- final double EPS = 1e-6;
-
- ps.println("P(tag|context)");
- for (int i = 0; i < n_contexts; ++i)
- {
- ps.print(c.getContext(i));
- for(int j=0;j<pi[i].length;j++){
- if (pi[i][j] > EPS)
- ps.print("\t" + j + ": " + pi[i][j]);
- }
- ps.println();
- }
-
- ps.println("P(word|tag,position)");
- for (int i = 0; i < K; ++i)
- {
- for(int position=0;position<n_positions;position++){
- ps.println("tag " + i + " position " + position);
- for(int word=0;word<emit[i][position].length;word++){
- if (emit[i][position][word] > EPS)
- ps.print(c.getWord(word)+"="+emit[i][position][word]+"\t");
- }
- ps.println();
- }
- ps.println();
- }
-
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Corpus.java b/gi/posterior-regularisation/prjava/src/phrase/Corpus.java deleted file mode 100644 index 4b1939cd..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/Corpus.java +++ /dev/null @@ -1,288 +0,0 @@ -package phrase; - -import gnu.trove.TIntArrayList; - -import java.io.*; -import java.util.*; -import java.util.regex.Pattern; - - -public class Corpus -{ - private Lexicon<String> wordLexicon = new Lexicon<String>(); - private Lexicon<TIntArrayList> phraseLexicon = new Lexicon<TIntArrayList>(); - private Lexicon<TIntArrayList> contextLexicon = new Lexicon<TIntArrayList>(); - private List<Edge> edges = new ArrayList<Edge>(); - private List<List<Edge>> phraseToContext = new ArrayList<List<Edge>>(); - private List<List<Edge>> contextToPhrase = new ArrayList<List<Edge>>(); - public int splitSentinel; - public int phraseSentinel; - public int rareSentinel; - - public Corpus() - { - splitSentinel = wordLexicon.insert("<SPLIT>"); - phraseSentinel = wordLexicon.insert("<PHRASE>"); - rareSentinel = wordLexicon.insert("<RARE>"); - } - - public class Edge - { - - Edge(int phraseId, int contextId, double count,int tag) - { - this.phraseId = phraseId; - this.contextId = contextId; - this.count = count; - fixTag=tag; - } - - Edge(int phraseId, int contextId, double count) - { - this.phraseId = phraseId; - this.contextId = contextId; - this.count = count; - fixTag=-1; - } - public int getTag(){ - return fixTag; - } - - public int getPhraseId() - { - return phraseId; - } - public TIntArrayList getPhrase() - { - return Corpus.this.getPhrase(phraseId); - } - public String getPhraseString() - { - return Corpus.this.getPhraseString(phraseId); - } - public int getContextId() - { - return contextId; - } - public TIntArrayList getContext() - { - return Corpus.this.getContext(contextId); - } - public String getContextString(boolean insertPhraseSentinel) - { - return Corpus.this.getContextString(contextId, insertPhraseSentinel); - } - public double getCount() - { - return count; - } - public boolean equals(Object other) - { - if (other instanceof Edge) - { - Edge oe = (Edge) other; - return oe.phraseId == phraseId && oe.contextId == contextId; - } - else return false; - } - public int hashCode() - { // this is how boost's hash_combine does it - int seed = phraseId; - seed ^= contextId + 0x9e3779b9 + (seed << 6) + (seed >> 2); - return seed; - } - public String toString() - { - return getPhraseString() + "\t" + getContextString(true); - } - - private int phraseId; - private int contextId; - private double count; - private int fixTag; - } - - List<Edge> getEdges() - { - return edges; - } - - int getNumEdges() - { - return edges.size(); - } - - int getNumPhrases() - { - return phraseLexicon.size(); - } - - int getNumContextPositions() - { - return contextLexicon.lookup(0).size(); - } - - List<Edge> getEdgesForPhrase(int phraseId) - { - return phraseToContext.get(phraseId); - } - - int getNumContexts() - { - return contextLexicon.size(); - } - - List<Edge> getEdgesForContext(int contextId) - { - return contextToPhrase.get(contextId); - } - - int getNumWords() - { - return wordLexicon.size(); - } - - String getWord(int wordId) - { - return wordLexicon.lookup(wordId); - } - - public TIntArrayList getPhrase(int phraseId) - { - return phraseLexicon.lookup(phraseId); - } - - public String getPhraseString(int phraseId) - { - StringBuffer b = new StringBuffer(); - for (int tid: getPhrase(phraseId).toNativeArray()) - { - if (b.length() > 0) - b.append(" "); - b.append(wordLexicon.lookup(tid)); - } - return b.toString(); - } - - public TIntArrayList getContext(int contextId) - { - return contextLexicon.lookup(contextId); - } - - public String getContextString(int contextId, boolean insertPhraseSentinel) - { - StringBuffer b = new StringBuffer(); - TIntArrayList c = getContext(contextId); - for (int i = 0; i < c.size(); ++i) - { - if (i > 0) b.append(" "); - //if (i == c.size() / 2) b.append("<PHRASE> "); - b.append(wordLexicon.lookup(c.get(i))); - } - return b.toString(); - } - - public boolean isSentinel(int wordId) - { - return wordId == splitSentinel || wordId == phraseSentinel; - } - - List<Edge> readEdges(Reader in) throws IOException - { - // read in line-by-line - BufferedReader bin = new BufferedReader(in); - String line; - Pattern separator = Pattern.compile(" \\|\\|\\| "); - - List<Edge> edges = new ArrayList<Edge>(); - while ((line = bin.readLine()) != null) - { - // split into phrase and contexts - StringTokenizer st = new StringTokenizer(line, "\t"); - assert (st.hasMoreTokens()); - String phraseToks = st.nextToken(); - assert (st.hasMoreTokens()); - String rest = st.nextToken(); - assert (!st.hasMoreTokens()); - - // process phrase - st = new StringTokenizer(phraseToks, " "); - TIntArrayList ptoks = new TIntArrayList(); - while (st.hasMoreTokens()) - ptoks.add(wordLexicon.insert(st.nextToken())); - int phraseId = phraseLexicon.insert(ptoks); - - // process contexts - String[] parts = separator.split(rest); - assert (parts.length % 2 == 0); - for (int i = 0; i < parts.length; i += 2) - { - // process pairs of strings - context and count - String ctxString = parts[i]; - String countString = parts[i + 1]; - - assert (countString.startsWith("C=")); - - String []countToks=countString.split(" "); - - double count = Double.parseDouble(countToks[0].substring(2).trim()); - - TIntArrayList ctx = new TIntArrayList(); - StringTokenizer ctxStrtok = new StringTokenizer(ctxString, " "); - while (ctxStrtok.hasMoreTokens()) - { - String token = ctxStrtok.nextToken(); - ctx.add(wordLexicon.insert(token)); - } - int contextId = contextLexicon.insert(ctx); - - - if(countToks.length<2){ - edges.add(new Edge(phraseId, contextId, count)); - } - else{ - int tag=Integer.parseInt(countToks[1].substring(2)); - edges.add(new Edge(phraseId, contextId, count,tag)); - } - } - } - return edges; - } - - static Corpus readFromFile(Reader in) throws IOException - { - Corpus c = new Corpus(); - c.edges = c.readEdges(in); - for (Edge edge: c.edges) - { - while (edge.getPhraseId() >= c.phraseToContext.size()) - c.phraseToContext.add(new ArrayList<Edge>()); - while (edge.getContextId() >= c.contextToPhrase.size()) - c.contextToPhrase.add(new ArrayList<Edge>()); - - // index the edge for fast phrase, context lookup - c.phraseToContext.get(edge.getPhraseId()).add(edge); - c.contextToPhrase.get(edge.getContextId()).add(edge); - } - return c; - } - - TIntArrayList phraseEdges(TIntArrayList phrase) - { - TIntArrayList r = new TIntArrayList(4); - for (int p = 0; p < phrase.size(); ++p) - { - if (p == 0 || phrase.get(p-1) == splitSentinel) - r.add(p); - if (p == phrase.size() - 1 || phrase.get(p+1) == splitSentinel) - r.add(p); - } - return r; - } - - public void printStats(PrintStream out) - { - out.println("Corpus has " + edges.size() + " edges " + phraseLexicon.size() + " phrases " - + contextLexicon.size() + " contexts and " + wordLexicon.size() + " word types"); - } -}
\ No newline at end of file diff --git a/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java b/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java deleted file mode 100644 index a386e4a3..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java +++ /dev/null @@ -1,34 +0,0 @@ -package phrase; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class Lexicon<T> -{ - public int insert(T word) - { - Integer i = wordToIndex.get(word); - if (i == null) - { - i = indexToWord.size(); - wordToIndex.put(word, i); - indexToWord.add(word); - } - return i; - } - - public T lookup(int index) - { - return indexToWord.get(index); - } - - public int size() - { - return indexToWord.size(); - } - - private Map<T, Integer> wordToIndex = new HashMap<T, Integer>(); - private List<T> indexToWord = new ArrayList<T>(); -}
\ No newline at end of file diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java deleted file mode 100644 index c032bb2b..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java +++ /dev/null @@ -1,540 +0,0 @@ -package phrase;
-
-import gnu.trove.TIntArrayList;
-import org.apache.commons.math.special.Gamma;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.regex.Pattern;
-
-import phrase.Corpus.Edge;
-
-
-public class PhraseCluster {
-
- public int K;
- private int n_phrases, n_words, n_contexts, n_positions;
- public Corpus c;
- public ExecutorService pool;
-
- double[] lambdaPTCT;
- double[][] lambdaPT;
- boolean cacheLambda = true;
-
- // emit[tag][position][word] = p(word | tag, position in context)
- double emit[][][];
- // pi[phrase][tag] = p(tag | phrase)
- double pi[][];
-
- public PhraseCluster(int numCluster, Corpus corpus)
- {
- K=numCluster;
- c=corpus;
- n_words=c.getNumWords();
- n_phrases=c.getNumPhrases();
- n_contexts=c.getNumContexts();
- n_positions=c.getNumContextPositions();
-
- emit=new double [K][n_positions][n_words];
- pi=new double[n_phrases][K];
-
- for(double [][]i:emit)
- for(double []j:i)
- arr.F.randomise(j, true);
-
- for(double []j:pi)
- arr.F.randomise(j, true);
- }
-
- void useThreadPool(ExecutorService pool)
- {
- this.pool = pool;
- }
-
- public double EM(int phraseSizeLimit)
- {
- double [][][]exp_emit=new double [K][n_positions][n_words];
- double []exp_pi=new double[K];
-
- for(double [][]i:exp_emit)
- for(double []j:i)
- Arrays.fill(j, 1e-10);
-
- double loglikelihood=0;
-
- //E
- for(int phrase=0; phrase < n_phrases; phrase++)
- {
- if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit)
- continue;
-
- Arrays.fill(exp_pi, 1e-10);
-
- List<Edge> contexts = c.getEdgesForPhrase(phrase);
-
- for (int ctx=0; ctx<contexts.size(); ctx++)
- {
- Edge edge = contexts.get(ctx);
-
- double p[]=posterior(edge);
- double z = arr.F.l1norm(p);
- assert z > 0;
- loglikelihood += edge.getCount() * Math.log(z);
- arr.F.l1normalize(p);
-
- double count = edge.getCount();
- //increment expected count
- TIntArrayList context = edge.getContext();
- for(int tag=0;tag<K;tag++)
- {
- for(int pos=0;pos<n_positions;pos++){
- exp_emit[tag][pos][context.get(pos)]+=p[tag]*count;
- }
- exp_pi[tag]+=p[tag]*count;
- }
- }
- arr.F.l1normalize(exp_pi);
- System.arraycopy(exp_pi, 0, pi[phrase], 0, K);
- }
-
- //M
- for(double [][]i:exp_emit)
- for(double []j:i)
- arr.F.l1normalize(j);
-
- emit=exp_emit;
-
- return loglikelihood;
- }
-
- public double PREM(double scalePT, double scaleCT, int phraseSizeLimit)
- {
- if (scaleCT == 0)
- {
- if (pool != null)
- return PREM_phrase_constraints_parallel(scalePT, phraseSizeLimit);
- else
- return PREM_phrase_constraints(scalePT, phraseSizeLimit);
- }
- else // FIXME: ignores phraseSizeLimit
- return this.PREM_phrase_context_constraints(scalePT, scaleCT);
- }
-
-
- public double PREM_phrase_constraints(double scalePT, int phraseSizeLimit)
- {
- double [][][]exp_emit=new double[K][n_positions][n_words];
- double []exp_pi=new double[K];
-
- for(double [][]i:exp_emit)
- for(double []j:i)
- Arrays.fill(j, 1e-10);
-
- if (lambdaPT == null && cacheLambda)
- lambdaPT = new double[n_phrases][];
-
- double loglikelihood=0, kl=0, l1lmax=0, primal=0;
- int failures=0, iterations=0;
- long start = System.currentTimeMillis();
- //E
- for(int phrase=0; phrase<n_phrases; phrase++)
- {
- if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit)
- {
- //System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K);
- continue;
- }
-
- Arrays.fill(exp_pi, 1e-10);
-
- // FIXME: add rare edge check to phrase objective & posterior processing
- PhraseObjective po = new PhraseObjective(this, phrase, scalePT, (cacheLambda) ? lambdaPT[phrase] : null);
- boolean ok = po.optimizeWithProjectedGradientDescent();
- if (!ok) ++failures;
- if (cacheLambda) lambdaPT[phrase] = po.getParameters();
- iterations += po.getNumberUpdateCalls();
- double [][] q=po.posterior();
- loglikelihood += po.loglikelihood();
- kl += po.KL_divergence();
- l1lmax += po.l1lmax();
- primal += po.primal(scalePT);
- List<Edge> edges = c.getEdgesForPhrase(phrase);
-
- for(int edge=0;edge<q.length;edge++){
- Edge e = edges.get(edge);
- TIntArrayList context = e.getContext();
- double contextCnt = e.getCount();
- //increment expected count
- for(int tag=0;tag<K;tag++){
- for(int pos=0;pos<n_positions;pos++){
- exp_emit[tag][pos][context.get(pos)]+=q[edge][tag]*contextCnt;
- }
-
- exp_pi[tag]+=q[edge][tag]*contextCnt;
-
- }
- }
- arr.F.l1normalize(exp_pi);
- System.arraycopy(exp_pi, 0, pi[phrase], 0, K);
- }
-
- long end = System.currentTimeMillis();
- if (failures > 0)
- System.out.println("WARNING: failed to converge in " + failures + "/" + n_phrases + " cases");
- System.out.println("\tmean iters: " + iterations/(double)n_phrases + " elapsed time " + (end - start) / 1000.0);
- System.out.println("\tllh: " + loglikelihood);
- System.out.println("\tKL: " + kl);
- System.out.println("\tphrase l1lmax: " + l1lmax);
-
- //M
- for(double [][]i:exp_emit)
- for(double []j:i)
- arr.F.l1normalize(j);
- emit=exp_emit;
-
- return primal;
- }
-
- public double PREM_phrase_constraints_parallel(final double scalePT, int phraseSizeLimit)
- {
- assert(pool != null);
-
- final LinkedBlockingQueue<PhraseObjective> expectations
- = new LinkedBlockingQueue<PhraseObjective>();
-
- double [][][]exp_emit=new double [K][n_positions][n_words];
- double [][]exp_pi=new double[n_phrases][K];
-
- for(double [][]i:exp_emit)
- for(double []j:i)
- Arrays.fill(j, 1e-10);
- for(double []j:exp_pi)
- Arrays.fill(j, 1e-10);
-
- double loglikelihood=0, kl=0, l1lmax=0, primal=0;
- final AtomicInteger failures = new AtomicInteger(0);
- final AtomicLong elapsed = new AtomicLong(0l);
- int iterations=0;
- long start = System.currentTimeMillis();
- List<Future<PhraseObjective>> results = new ArrayList<Future<PhraseObjective>>();
-
- if (lambdaPT == null && cacheLambda)
- lambdaPT = new double[n_phrases][];
-
- //E
- for(int phrase=0;phrase<n_phrases;phrase++) {
- if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit) {
- System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K);
- continue;
- }
-
- final int p=phrase;
- results.add(pool.submit(new Callable<PhraseObjective>() {
- public PhraseObjective call() {
- //System.out.println("" + Thread.currentThread().getId() + " optimising lambda for " + p);
- long start = System.currentTimeMillis();
- PhraseObjective po = new PhraseObjective(PhraseCluster.this, p, scalePT, (cacheLambda) ? lambdaPT[p] : null);
- boolean ok = po.optimizeWithProjectedGradientDescent();
- if (!ok) failures.incrementAndGet();
- long end = System.currentTimeMillis();
- elapsed.addAndGet(end - start);
- //System.out.println("" + Thread.currentThread().getId() + " done optimising lambda for " + p);
- return po;
- }
- }));
- }
-
- // aggregate the expectations as they become available
- for (Future<PhraseObjective> fpo : results)
- {
- try {
- //System.out.println("" + Thread.currentThread().getId() + " reading queue #" + count);
-
- // wait (blocking) until something is ready
- PhraseObjective po = fpo.get();
- // process
- int phrase = po.phrase;
- if (cacheLambda) lambdaPT[phrase] = po.getParameters();
- //System.out.println("" + Thread.currentThread().getId() + " taken phrase " + phrase);
- double [][] q=po.posterior();
- loglikelihood += po.loglikelihood();
- kl += po.KL_divergence();
- l1lmax += po.l1lmax();
- primal += po.primal(scalePT);
- iterations += po.getNumberUpdateCalls();
-
- List<Edge> edges = c.getEdgesForPhrase(phrase);
- for(int edge=0;edge<q.length;edge++){
- Edge e = edges.get(edge);
- TIntArrayList context = e.getContext();
- double contextCnt = e.getCount();
- //increment expected count
- for(int tag=0;tag<K;tag++){
- for(int pos=0;pos<n_positions;pos++){
- exp_emit[tag][pos][context.get(pos)]+=q[edge][tag]*contextCnt;
- }
- exp_pi[phrase][tag]+=q[edge][tag]*contextCnt;
- }
- }
- } catch (InterruptedException e) {
- System.err.println("M-step thread interrupted. Probably fatal!");
- throw new RuntimeException(e);
- } catch (ExecutionException e) {
- System.err.println("M-step thread execution died. Probably fatal!");
- throw new RuntimeException(e);
- }
- }
-
- long end = System.currentTimeMillis();
-
- if (failures.get() > 0)
- System.out.println("WARNING: failed to converge in " + failures.get() + "/" + n_phrases + " cases");
- System.out.println("\tmean iters: " + iterations/(double)n_phrases + " walltime " + (end-start)/1000.0 + " threads " + elapsed.get() / 1000.0);
- System.out.println("\tllh: " + loglikelihood);
- System.out.println("\tKL: " + kl);
- System.out.println("\tphrase l1lmax: " + l1lmax);
-
- //M
- for(double [][]i:exp_emit)
- for(double []j:i)
- arr.F.l1normalize(j);
- emit=exp_emit;
-
- for(double []j:exp_pi)
- arr.F.l1normalize(j);
- pi=exp_pi;
-
- return primal;
- }
-
- public double PREM_phrase_context_constraints(double scalePT, double scaleCT)
- {
- double[][][] exp_emit = new double [K][n_positions][n_words];
- double[][] exp_pi = new double[n_phrases][K];
-
- //E step
- PhraseContextObjective pco = new PhraseContextObjective(this, lambdaPTCT, pool, scalePT, scaleCT);
- boolean ok = pco.optimizeWithProjectedGradientDescent();
- if (cacheLambda) lambdaPTCT = pco.getParameters();
-
- //now extract expectations
- List<Corpus.Edge> edges = c.getEdges();
- for(int e = 0; e < edges.size(); ++e)
- {
- double [] q = pco.posterior(e);
- Corpus.Edge edge = edges.get(e);
-
- TIntArrayList context = edge.getContext();
- double contextCnt = edge.getCount();
- //increment expected count
- for(int tag=0;tag<K;tag++)
- {
- for(int pos=0;pos<n_positions;pos++)
- exp_emit[tag][pos][context.get(pos)]+=q[tag]*contextCnt;
- exp_pi[edge.getPhraseId()][tag]+=q[tag]*contextCnt;
- }
- }
-
- System.out.println("\tllh: " + pco.loglikelihood());
- System.out.println("\tKL: " + pco.KL_divergence());
- System.out.println("\tphrase l1lmax: " + pco.phrase_l1lmax());
- System.out.println("\tcontext l1lmax: " + pco.context_l1lmax());
-
- //M step
- for(double [][]i:exp_emit)
- for(double []j:i)
- arr.F.l1normalize(j);
- emit=exp_emit;
-
- for(double []j:exp_pi)
- arr.F.l1normalize(j);
- pi=exp_pi;
-
- return pco.primal();
- }
-
- /**
- * @param phrase index of phrase
- * @param ctx array of context
- * @return unnormalized posterior
- */
- public double[] posterior(Corpus.Edge edge)
- {
- double[] prob;
-
- if(edge.getTag()>=0){
- prob=new double[K];
- prob[edge.getTag()]=1;
- return prob;
- }
-
- if (edge.getPhraseId() < n_phrases)
- prob = Arrays.copyOf(pi[edge.getPhraseId()], K);
- else
- {
- prob = new double[K];
- Arrays.fill(prob, 1.0);
- }
-
- TIntArrayList ctx = edge.getContext();
- for(int tag=0;tag<K;tag++)
- {
- for(int c=0;c<n_positions;c++)
- {
- int word = ctx.get(c);
- if (!this.c.isSentinel(word) && word < n_words)
- prob[tag]*=emit[tag][c][word];
- }
- }
-
- return prob;
- }
-
- public void displayPosterior(PrintStream ps, List<Edge> testing)
- {
- for (Edge edge : testing)
- {
- double probs[] = posterior(edge);
- arr.F.l1normalize(probs);
-
- // emit phrase
- ps.print(edge.getPhraseString());
- ps.print("\t");
- ps.print(edge.getContextString(true));
- int t=arr.F.argmax(probs);
- ps.println(" ||| C=" + t + " T=" + edge.getCount() + " P=" + probs[t]);
- //ps.println("# probs " + Arrays.toString(probs));
- }
- }
-
- public void displayModelParam(PrintStream ps)
- {
- final double EPS = 1e-6;
- ps.println("phrases " + n_phrases + " tags " + K + " positions " + n_positions);
-
- for (int i = 0; i < n_phrases; ++i)
- for(int j=0;j<pi[i].length;j++)
- if (pi[i][j] > EPS)
- ps.println(i + " " + j + " " + pi[i][j]);
-
- ps.println();
- for (int i = 0; i < K; ++i)
- {
- for(int position=0;position<n_positions;position++)
- {
- for(int word=0;word<emit[i][position].length;word++)
- {
- if (emit[i][position][word] > EPS)
- ps.println(i + " " + position + " " + word + " " + emit[i][position][word]);
- }
- }
- }
- }
-
- double phrase_l1lmax()
- {
- double sum=0;
- for(int phrase=0; phrase<n_phrases; phrase++)
- {
- double [] maxes = new double[K];
- for (Edge edge : c.getEdgesForPhrase(phrase))
- {
- double p[] = posterior(edge);
- arr.F.l1normalize(p);
- for(int tag=0;tag<K;tag++)
- maxes[tag] = Math.max(maxes[tag], p[tag]);
- }
- for(int tag=0;tag<K;tag++)
- sum += maxes[tag];
- }
- return sum;
- }
-
- double context_l1lmax()
- {
- double sum=0;
- for(int context=0; context<n_contexts; context++)
- {
- double [] maxes = new double[K];
- for (Edge edge : c.getEdgesForContext(context))
- {
- double p[] = posterior(edge);
- arr.F.l1normalize(p);
- for(int tag=0;tag<K;tag++)
- maxes[tag] = Math.max(maxes[tag], p[tag]);
- }
- for(int tag=0;tag<K;tag++)
- sum += maxes[tag];
- }
- return sum;
- }
-
- public void loadParameters(BufferedReader input) throws IOException
- {
- final double EPS = 1e-50;
-
- // overwrite pi, emit with ~zeros
- for(double [][]i:emit)
- for(double []j:i)
- Arrays.fill(j, EPS);
-
- for(double []j:pi)
- Arrays.fill(j, EPS);
-
- String line = input.readLine();
- assert line != null;
-
- Pattern space = Pattern.compile(" +");
- String[] parts = space.split(line);
- assert parts.length == 6;
-
- assert parts[0].equals("phrases");
- int phrases = Integer.parseInt(parts[1]);
- int tags = Integer.parseInt(parts[3]);
- int positions = Integer.parseInt(parts[5]);
-
- assert phrases == n_phrases;
- assert tags == K;
- assert positions == n_positions;
-
- // read in pi
- while ((line = input.readLine()) != null)
- {
- line = line.trim();
- if (line.isEmpty()) break;
-
- String[] tokens = space.split(line);
- assert tokens.length == 3;
- int p = Integer.parseInt(tokens[0]);
- int t = Integer.parseInt(tokens[1]);
- double v = Double.parseDouble(tokens[2]);
-
- pi[p][t] = v;
- }
-
- // read in emissions
- while ((line = input.readLine()) != null)
- {
- String[] tokens = space.split(line);
- assert tokens.length == 4;
- int t = Integer.parseInt(tokens[0]);
- int p = Integer.parseInt(tokens[1]);
- int w = Integer.parseInt(tokens[2]);
- double v = Double.parseDouble(tokens[3]);
-
- emit[t][p][w] = v;
- }
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java deleted file mode 100644 index 646ff392..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java +++ /dev/null @@ -1,436 +0,0 @@ -package phrase;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Future;
-
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-import optimization.util.MathUtils;
-import phrase.Corpus.Edge;
-
-public class PhraseContextObjective extends ProjectedObjective
-{
- private static final double GRAD_DIFF = 0.00002;
- private static double INIT_STEP_SIZE = 300;
- private static double VAL_DIFF = 1e-8;
- private static int ITERATIONS = 20;
- boolean debug = false;
-
- private PhraseCluster c;
-
- // un-regularized unnormalized posterior, p[edge][tag]
- // P(tag|edge) \propto P(tag|phrase)P(context|tag)
- private double p[][];
-
- // regularized unnormalized posterior
- // q[edge][tag] propto p[edge][tag]*exp(-lambda)
- private double q[][];
- private List<Corpus.Edge> data;
-
- // log likelihood under q
- private double loglikelihood;
- private SimplexProjection projectionPhrase;
- private SimplexProjection projectionContext;
-
- double[] newPoint;
- private int n_param;
-
- // likelihood under p
- public double llh;
-
- private static Map<Corpus.Edge, Integer> edgeIndex;
-
- private long projectionTime;
- private long objectiveTime;
- private long actualProjectionTime;
- private ExecutorService pool;
-
- double scalePT;
- double scaleCT;
-
- public PhraseContextObjective(PhraseCluster cluster, double[] startingParameters, ExecutorService pool,
- double scalePT, double scaleCT)
- {
- c=cluster;
- data=c.c.getEdges();
- n_param=data.size()*c.K*2;
- this.pool=pool;
- this.scalePT = scalePT;
- this.scaleCT = scaleCT;
-
- parameters = startingParameters;
- if (parameters == null)
- parameters = new double[n_param];
-
- System.out.println("Num parameters " + n_param);
- newPoint = new double[n_param];
- gradient = new double[n_param];
- initP();
- projectionPhrase = new SimplexProjection(scalePT);
- projectionContext = new SimplexProjection(scaleCT);
- q=new double [data.size()][c.K];
-
- if (edgeIndex == null) {
- edgeIndex = new HashMap<Edge, Integer>();
- for (int e=0; e<data.size(); e++)
- {
- edgeIndex.put(data.get(e), e);
- //if (debug) System.out.println("Edge " + data.get(e) + " index " + e);
- }
- }
-
- setParameters(parameters);
- }
-
- private void initP(){
- p=new double[data.size()][];
- for(int edge=0;edge<data.size();edge++)
- {
- p[edge]=c.posterior(data.get(edge));
- llh += data.get(edge).getCount() * Math.log(arr.F.l1norm(p[edge]));
- arr.F.l1normalize(p[edge]);
- }
- }
-
- @Override
- public void setParameters(double[] params) {
- //System.out.println("setParameters " + Arrays.toString(parameters));
- // TODO: test if params have changed and skip update otherwise
- super.setParameters(params);
- updateFunction();
- }
-
- private void updateFunction()
- {
- updateCalls++;
- loglikelihood=0;
-
- System.out.print(".");
- System.out.flush();
-
- long begin = System.currentTimeMillis();
- for (int e=0; e<data.size(); e++)
- {
- Edge edge = data.get(e);
- for(int tag=0; tag<c.K; tag++)
- {
- int ip = index(e, tag, true);
- int ic = index(e, tag, false);
- q[e][tag] = p[e][tag]*
- Math.exp((-parameters[ip]-parameters[ic]) / edge.getCount());
- //if (debug)
- //System.out.println("\tposterior " + edge + " with tag " + tag + " p " + p[e][tag] + " params " + parameters[ip] + " and " + parameters[ic] + " q " + q[e][tag]);
- }
- }
-
- for(int edge=0;edge<data.size();edge++) {
- loglikelihood+=data.get(edge).getCount() * Math.log(arr.F.l1norm(q[edge]));
- arr.F.l1normalize(q[edge]);
- }
-
- for (int e=0; e<data.size(); e++)
- {
- for(int tag=0; tag<c.K; tag++)
- {
- int ip = index(e, tag, true);
- int ic = index(e, tag, false);
- gradient[ip]=-q[e][tag];
- gradient[ic]=-q[e][tag];
- }
- }
- //if (debug) {
- //System.out.println("objective " + loglikelihood + " ||gradient||_2: " + arr.F.l2norm(gradient));
- //System.out.println("gradient " + Arrays.toString(gradient));
- //}
- objectiveTime += System.currentTimeMillis() - begin;
- }
-
- @Override
- public double[] projectPoint(double[] point)
- {
- long begin = System.currentTimeMillis();
- List<Future<?>> tasks = new ArrayList<Future<?>>();
-
- System.out.print(",");
- System.out.flush();
-
- Arrays.fill(newPoint, 0, newPoint.length, 0);
-
- // first project using the phrase-tag constraints,
- // for all p,t: sum_c lambda_ptc < scaleP
- if (pool == null)
- {
- for (int p = 0; p < c.c.getNumPhrases(); ++p)
- {
- List<Edge> edges = c.c.getEdgesForPhrase(p);
- double[] toProject = new double[edges.size()];
- for(int tag=0;tag<c.K;tag++)
- {
- // FIXME: slow hash lookup for e (twice)
- for(int e=0; e<edges.size(); e++)
- toProject[e] = point[index(edges.get(e), tag, true)];
- long lbegin = System.currentTimeMillis();
- projectionPhrase.project(toProject);
- actualProjectionTime += System.currentTimeMillis() - lbegin;
- for(int e=0; e<edges.size(); e++)
- newPoint[index(edges.get(e), tag, true)] = toProject[e];
- }
- }
- }
- else // do above in parallel using thread pool
- {
- for (int p = 0; p < c.c.getNumPhrases(); ++p)
- {
- final int phrase = p;
- final double[] inPoint = point;
- Runnable task = new Runnable()
- {
- public void run()
- {
- List<Edge> edges = c.c.getEdgesForPhrase(phrase);
- double toProject[] = new double[edges.size()];
- for(int tag=0;tag<c.K;tag++)
- {
- // FIXME: slow hash lookup for e
- for(int e=0; e<edges.size(); e++)
- toProject[e] = inPoint[index(edges.get(e), tag, true)];
- projectionPhrase.project(toProject);
- for(int e=0; e<edges.size(); e++)
- newPoint[index(edges.get(e), tag, true)] = toProject[e];
- }
- }
- };
- tasks.add(pool.submit(task));
- }
- }
- //System.out.println("after PT " + Arrays.toString(newPoint));
-
- // now project using the context-tag constraints,
- // for all c,t: sum_p omega_pct < scaleC
- if (pool == null)
- {
- for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
- {
- List<Edge> edges = c.c.getEdgesForContext(ctx);
- double toProject[] = new double[edges.size()];
- for(int tag=0;tag<c.K;tag++)
- {
- // FIXME: slow hash lookup for e
- for(int e=0; e<edges.size(); e++)
- toProject[e] = point[index(edges.get(e), tag, false)];
- long lbegin = System.currentTimeMillis();
- projectionContext.project(toProject);
- actualProjectionTime += System.currentTimeMillis() - lbegin;
- for(int e=0; e<edges.size(); e++)
- newPoint[index(edges.get(e), tag, false)] = toProject[e];
- }
- }
- }
- else
- {
- // do above in parallel using thread pool
- for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
- {
- final int context = ctx;
- final double[] inPoint = point;
- Runnable task = new Runnable()
- {
- public void run()
- {
- List<Edge> edges = c.c.getEdgesForContext(context);
- double toProject[] = new double[edges.size()];
- for(int tag=0;tag<c.K;tag++)
- {
- // FIXME: slow hash lookup for e
- for(int e=0; e<edges.size(); e++)
- toProject[e] = inPoint[index(edges.get(e), tag, false)];
- projectionContext.project(toProject);
- for(int e=0; e<edges.size(); e++)
- newPoint[index(edges.get(e), tag, false)] = toProject[e];
- }
- }
- };
- tasks.add(pool.submit(task));
- }
- }
-
- if (pool != null)
- {
- // wait for all the jobs to complete
- Exception failure = null;
- for (Future<?> task: tasks)
- {
- try {
- task.get();
- } catch (InterruptedException e) {
- System.err.println("ERROR: Projection thread interrupted");
- e.printStackTrace();
- failure = e;
- } catch (ExecutionException e) {
- System.err.println("ERROR: Projection thread died");
- e.printStackTrace();
- failure = e;
- }
- }
- // rethrow the exception
- if (failure != null)
- {
- pool.shutdownNow();
- throw new RuntimeException(failure);
- }
- }
-
- double[] tmp = newPoint;
- newPoint = point;
- projectionTime += System.currentTimeMillis() - begin;
-
- //if (debug)
- //System.out.println("\t\treturning " + Arrays.toString(tmp));
- return tmp;
- }
-
- private int index(Edge edge, int tag, boolean phrase)
- {
- // NB if indexing changes must also change code in updateFunction and constructor
- if (phrase)
- return tag * edgeIndex.size() + edgeIndex.get(edge);
- else
- return (c.K + tag) * edgeIndex.size() + edgeIndex.get(edge);
- }
-
- private int index(int e, int tag, boolean phrase)
- {
- // NB if indexing changes must also change code in updateFunction and constructor
- if (phrase)
- return tag * edgeIndex.size() + e;
- else
- return (c.K + tag) * edgeIndex.size() + e;
- }
-
- @Override
- public double[] getGradient() {
- gradientCalls++;
- return gradient;
- }
-
- @Override
- public double getValue() {
- functionCalls++;
- return loglikelihood;
- }
-
- @Override
- public String toString() {
- return "No need for pointless toString";
- }
-
- public double []posterior(int edgeIndex){
- return q[edgeIndex];
- }
-
- public boolean optimizeWithProjectedGradientDescent()
- {
- projectionTime = 0;
- actualProjectionTime = 0;
- objectiveTime = 0;
- long start = System.currentTimeMillis();
-
- LineSearchMethod ls =
- new ArmijoLineSearchMinimizationAlongProjectionArc
- (new InterpolationPickFirstStep(INIT_STEP_SIZE));
- //LineSearchMethod ls = new WolfRuleLineSearch(
- // (new InterpolationPickFirstStep(INIT_STEP_SIZE)), c1, c2);
- OptimizerStats stats = new OptimizerStats();
-
-
- ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
- StopingCriteria stopGrad = new ProjectedGradientL2Norm(GRAD_DIFF);
- StopingCriteria stopValue = new ValueDifference(VAL_DIFF*(-llh));
- CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
- compositeStop.add(stopGrad);
- compositeStop.add(stopValue);
- optimizer.setMaxIterations(ITERATIONS);
- updateFunction();
- boolean success = optimizer.optimize(this,stats,compositeStop);
-
- System.out.println();
- System.out.println(stats.prettyPrint(1));
-
- if (success)
- System.out.print("\toptimization took " + optimizer.getCurrentIteration() + " iterations");
- else
- System.out.print("\toptimization failed to converge");
- long total = System.currentTimeMillis() - start;
- System.out.println(" and " + total + " ms: projection " + projectionTime +
- " actual " + actualProjectionTime + " objective " + objectiveTime);
-
- return success;
- }
-
- double loglikelihood()
- {
- return llh;
- }
-
- double KL_divergence()
- {
- return -loglikelihood + MathUtils.dotProduct(parameters, gradient);
- }
-
- double phrase_l1lmax()
- {
- // \sum_{tag,phrase} max_{context} P(tag|context,phrase)
- double sum=0;
- for (int p = 0; p < c.c.getNumPhrases(); ++p)
- {
- List<Edge> edges = c.c.getEdgesForPhrase(p);
- for(int tag=0;tag<c.K;tag++)
- {
- double max=0;
- for (Edge edge: edges)
- max = Math.max(max, q[edgeIndex.get(edge)][tag]);
- sum+=max;
- }
- }
- return sum;
- }
-
- double context_l1lmax()
- {
- // \sum_{tag,context} max_{phrase} P(tag|context,phrase)
- double sum=0;
- for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
- {
- List<Edge> edges = c.c.getEdgesForContext(ctx);
- for(int tag=0; tag<c.K; tag++)
- {
- double max=0;
- for (Edge edge: edges)
- max = Math.max(max, q[edgeIndex.get(edge)][tag]);
- sum+=max;
- }
- }
- return sum;
- }
-
- // L - KL(q||p) - scalePT * l1lmax_phrase - scaleCT * l1lmax_context
- public double primal()
- {
- return loglikelihood() - KL_divergence() - scalePT * phrase_l1lmax() - scaleCT * context_l1lmax();
- }
-}
\ No newline at end of file diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java deleted file mode 100644 index 0cf31c1c..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java +++ /dev/null @@ -1,193 +0,0 @@ -package phrase;
-
-import io.FileUtil;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Scanner;
-
-public class PhraseCorpus
-{
- public HashMap<String,Integer>wordLex;
- public HashMap<String,Integer>phraseLex;
-
- public String wordList[];
- public String phraseList[];
-
- //data[phrase][num context][position]
- public int data[][][];
- public int numContexts;
-
- public PhraseCorpus(String filename) throws FileNotFoundException, IOException
- {
- BufferedReader r = FileUtil.reader(new File(filename));
-
- phraseLex=new HashMap<String,Integer>();
- wordLex=new HashMap<String,Integer>();
-
- ArrayList<int[][]>dataList=new ArrayList<int[][]>();
- String line=null;
- numContexts = 0;
-
- while((line=readLine(r))!=null){
-
- String toks[]=line.split("\t");
- String phrase=toks[0];
- addLex(phrase,phraseLex);
-
- toks=toks[1].split(" \\|\\|\\| ");
-
- ArrayList <int[]>ctxList=new ArrayList<int[]>();
-
- for(int i=0;i<toks.length;i+=2){
- String ctx=toks[i];
- String words[]=ctx.split(" ");
- if (numContexts == 0)
- numContexts = words.length - 1;
- else
- assert numContexts == words.length - 1;
-
- int []context=new int [numContexts+1];
- int idx=0;
- for(String word:words){
- if(word.equals("<PHRASE>")){
- continue;
- }
- addLex(word,wordLex);
- context[idx]=wordLex.get(word);
- idx++;
- }
-
- String count=toks[i+1];
- context[idx]=Integer.parseInt(count.trim().substring(2));
-
- ctxList.add(context);
- }
-
- dataList.add(ctxList.toArray(new int [0][]));
-
- }
- try{
- r.close();
- }catch(IOException ioe){
- ioe.printStackTrace();
- }
- data=dataList.toArray(new int[0][][]);
- }
-
- private void addLex(String key, HashMap<String,Integer>lex){
- Integer i=lex.get(key);
- if(i==null){
- lex.put(key, lex.size());
- }
- }
-
- //for debugging
- public void saveLex(String lexFilename) throws FileNotFoundException, IOException
- {
- PrintStream ps = FileUtil.printstream(new File(lexFilename));
- ps.println("Phrase Lexicon");
- ps.println(phraseLex.size());
- printDict(phraseLex,ps);
-
- ps.println("Word Lexicon");
- ps.println(wordLex.size());
- printDict(wordLex,ps);
- ps.close();
- }
-
- private static void printDict(HashMap<String,Integer>lex,PrintStream ps){
- String []dict=buildList(lex);
- for(int i=0;i<dict.length;i++){
- ps.println(dict[i]);
- }
- }
-
- public void loadLex(String lexFilename){
- Scanner sc=io.FileUtil.openInFile(lexFilename);
-
- sc.nextLine();
- int size=sc.nextInt();
- sc.nextLine();
- String[]dict=new String[size];
- for(int i=0;i<size;i++){
- dict[i]=sc.nextLine();
- }
- phraseLex=buildMap(dict);
-
- sc.nextLine();
- size=sc.nextInt();
- sc.nextLine();
- dict=new String[size];
- for(int i=0;i<size;i++){
- dict[i]=sc.nextLine();
- }
- wordLex=buildMap(dict);
- sc.close();
- }
-
- private HashMap<String, Integer> buildMap(String[]dict){
- HashMap<String,Integer> map=new HashMap<String,Integer>();
- for(int i=0;i<dict.length;i++){
- map.put(dict[i], i);
- }
- return map;
- }
-
- public void buildList(){
- if(wordList==null){
- wordList=buildList(wordLex);
- phraseList=buildList(phraseLex);
- }
- }
-
- private static String[]buildList(HashMap<String,Integer>lex){
- String dict[]=new String [lex.size()];
- for(String key:lex.keySet()){
- dict[lex.get(key)]=key;
- }
- return dict;
- }
-
- public String getContextString(int context[], boolean addPhraseMarker)
- {
- StringBuffer b = new StringBuffer();
- for (int i=0;i<context.length-1;i++)
- {
- if (b.length() > 0)
- b.append(" ");
-
- if (i == context.length/2)
- b.append("<PHRASE> ");
-
- b.append(wordList[context[i]]);
- }
- return b.toString();
- }
-
- public static String readLine(BufferedReader r){
- try{
- return r.readLine();
- }
- catch(IOException ioe){
- ioe.printStackTrace();
- }
- return null;
- }
-
- public static void main(String[] args) throws Exception
- {
- String LEX_FILENAME="../pdata/lex.out";
- String DATA_FILENAME="../pdata/btec.con";
- PhraseCorpus c=new PhraseCorpus(DATA_FILENAME);
- c.saveLex(LEX_FILENAME);
- c.loadLex(LEX_FILENAME);
- c.saveLex(LEX_FILENAME);
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java deleted file mode 100644 index ac73a075..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java +++ /dev/null @@ -1,224 +0,0 @@ -package phrase;
-
-import java.util.Arrays;
-import java.util.List;
-
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.linesearch.WolfRuleLineSearch;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-import optimization.util.MathUtils;
-
-public class PhraseObjective extends ProjectedObjective
-{
- static final double GRAD_DIFF = 0.00002;
- static double INIT_STEP_SIZE = 300;
- static double VAL_DIFF = 1e-8; // tuned to BTEC subsample
- static int ITERATIONS = 100;
- private PhraseCluster c;
-
- /**@brief
- * for debugging purposes
- */
- //public static PrintStream ps;
-
- /**@brief current phrase being optimzed*/
- public int phrase;
-
- /**@brief un-regularized posterior
- * unnormalized
- * p[edge][tag]
- * P(tag|edge) \propto P(tag|phrase)P(context|tag)
- */
- private double[][]p;
-
- /**@brief regularized posterior
- * q[edge][tag] propto p[edge][tag]*exp(-lambda)
- */
- private double q[][];
- private List<Corpus.Edge> data;
-
- /**@brief log likelihood of the associated phrase
- *
- */
- private double loglikelihood;
- private SimplexProjection projection;
-
- double[] newPoint ;
-
- private int n_param;
-
- /**@brief likelihood under p
- *
- */
- public double llh;
-
- public PhraseObjective(PhraseCluster cluster, int phraseIdx, double scale, double[] lambda){
- phrase=phraseIdx;
- c=cluster;
- data=c.c.getEdgesForPhrase(phrase);
- n_param=data.size()*c.K;
- //System.out.println("Num parameters " + n_param + " for phrase #" + phraseIdx);
-
- if (lambda==null)
- lambda=new double[n_param];
-
- parameters = lambda;
- newPoint = new double[n_param];
- gradient = new double[n_param];
- initP();
- projection=new SimplexProjection(scale);
- q=new double [data.size()][c.K];
-
- setParameters(parameters);
- }
-
- private void initP(){
- p=new double[data.size()][];
- for(int edge=0;edge<data.size();edge++){
- p[edge]=c.posterior(data.get(edge));
- llh += data.get(edge).getCount() * Math.log(arr.F.l1norm(p[edge])); // Was bug here - count inside log!
- arr.F.l1normalize(p[edge]);
- }
- }
-
- @Override
- public void setParameters(double[] params) {
- super.setParameters(params);
- updateFunction();
- }
-
- private void updateFunction(){
- updateCalls++;
- loglikelihood=0;
-
- for(int tag=0;tag<c.K;tag++){
- for(int edge=0;edge<data.size();edge++){
- q[edge][tag]=p[edge][tag]*
- Math.exp(-parameters[tag*data.size()+edge]/data.get(edge).getCount());
- }
- }
-
- for(int edge=0;edge<data.size();edge++){
- loglikelihood+=data.get(edge).getCount() * Math.log(arr.F.l1norm(q[edge]));
- arr.F.l1normalize(q[edge]);
- }
-
- for(int tag=0;tag<c.K;tag++){
- for(int edge=0;edge<data.size();edge++){
- gradient[tag*data.size()+edge]=-q[edge][tag];
- }
- }
- }
-
- @Override
- public double[] projectPoint(double[] point)
- {
- double toProject[]=new double[data.size()];
- for(int tag=0;tag<c.K;tag++){
- for(int edge=0;edge<data.size();edge++){
- toProject[edge]=point[tag*data.size()+edge];
- }
- projection.project(toProject);
- for(int edge=0;edge<data.size();edge++){
- newPoint[tag*data.size()+edge]=toProject[edge];
- }
- }
- return newPoint;
- }
-
- @Override
- public double[] getGradient() {
- gradientCalls++;
- return gradient;
- }
-
- @Override
- public double getValue() {
- functionCalls++;
- return loglikelihood;
- }
-
- @Override
- public String toString() {
- return Arrays.toString(parameters);
- }
-
- public double [][]posterior(){
- return q;
- }
-
- long optimizationTime;
-
- public boolean optimizeWithProjectedGradientDescent(){
- long start = System.currentTimeMillis();
-
- LineSearchMethod ls =
- new ArmijoLineSearchMinimizationAlongProjectionArc
- (new InterpolationPickFirstStep(INIT_STEP_SIZE));
- //LineSearchMethod ls = new WolfRuleLineSearch(
- // (new InterpolationPickFirstStep(INIT_STEP_SIZE)), c1, c2);
- OptimizerStats stats = new OptimizerStats();
-
-
- ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
- StopingCriteria stopGrad = new ProjectedGradientL2Norm(GRAD_DIFF);
- StopingCriteria stopValue = new ValueDifference(VAL_DIFF*(-llh));
- CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
- compositeStop.add(stopGrad);
- compositeStop.add(stopValue);
- optimizer.setMaxIterations(ITERATIONS);
- updateFunction();
- boolean success = optimizer.optimize(this,stats,compositeStop);
- //System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
- //if(succed){
- //System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
- //}else{
-// System.out.println("Failed to optimize");
- //}
- //System.out.println(Arrays.toString(parameters));
-
- // for(int edge=0;edge<data.getSize();edge++){
- // ps.println(Arrays.toString(q[edge]));
- // }
-
- return success;
- }
-
- public double KL_divergence()
- {
- return -loglikelihood + MathUtils.dotProduct(parameters, gradient);
- }
-
- public double loglikelihood()
- {
- return llh;
- }
-
- public double l1lmax()
- {
- double sum=0;
- for(int tag=0;tag<c.K;tag++){
- double max=0;
- for(int edge=0;edge<data.size();edge++){
- if(q[edge][tag]>max)
- max=q[edge][tag];
- }
- sum+=max;
- }
- return sum;
- }
-
- public double primal(double scale)
- {
- return loglikelihood() - KL_divergence() - scale * l1lmax();
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java b/gi/posterior-regularisation/prjava/src/phrase/Trainer.java deleted file mode 100644 index 6f302b20..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java +++ /dev/null @@ -1,257 +0,0 @@ -package phrase; - -import io.FileUtil; -import joptsimple.OptionParser; -import joptsimple.OptionSet; -import java.io.File; -import java.io.IOException; -import java.io.PrintStream; -import java.util.List; -import java.util.Random; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; - -import phrase.Corpus.Edge; - -import arr.F; - -public class Trainer -{ - public static void main(String[] args) - { - OptionParser parser = new OptionParser(); - parser.accepts("help"); - parser.accepts("in").withRequiredArg().ofType(File.class); - parser.accepts("in1").withRequiredArg().ofType(File.class); - parser.accepts("test").withRequiredArg().ofType(File.class); - parser.accepts("out").withRequiredArg().ofType(File.class); - parser.accepts("start").withRequiredArg().ofType(File.class); - parser.accepts("parameters").withRequiredArg().ofType(File.class); - parser.accepts("topics").withRequiredArg().ofType(Integer.class).defaultsTo(5); - parser.accepts("iterations").withRequiredArg().ofType(Integer.class).defaultsTo(10); - parser.accepts("threads").withRequiredArg().ofType(Integer.class).defaultsTo(0); - parser.accepts("scale-phrase").withRequiredArg().ofType(Double.class).defaultsTo(0.0); - parser.accepts("scale-context").withRequiredArg().ofType(Double.class).defaultsTo(0.0); - parser.accepts("seed").withRequiredArg().ofType(Long.class).defaultsTo(0l); - parser.accepts("convergence-threshold").withRequiredArg().ofType(Double.class).defaultsTo(1e-6); - parser.accepts("variational-bayes"); - parser.accepts("alpha-emit").withRequiredArg().ofType(Double.class).defaultsTo(0.1); - parser.accepts("alpha-pi").withRequiredArg().ofType(Double.class).defaultsTo(0.0001); - parser.accepts("agree-direction"); - parser.accepts("agree-language"); - parser.accepts("no-parameter-cache"); - parser.accepts("skip-large-phrases").withRequiredArg().ofType(Integer.class).defaultsTo(5); - OptionSet options = parser.parse(args); - - if (options.has("help") || !options.has("in")) - { - try { - parser.printHelpOn(System.err); - } catch (IOException e) { - System.err.println("This should never happen."); - e.printStackTrace(); - } - System.exit(1); - } - - int tags = (Integer) options.valueOf("topics"); - int iterations = (Integer) options.valueOf("iterations"); - double scale_phrase = (Double) options.valueOf("scale-phrase"); - double scale_context = (Double) options.valueOf("scale-context"); - int threads = (Integer) options.valueOf("threads"); - double threshold = (Double) options.valueOf("convergence-threshold"); - boolean vb = options.has("variational-bayes"); - double alphaEmit = (vb) ? (Double) options.valueOf("alpha-emit") : 0; - double alphaPi = (vb) ? (Double) options.valueOf("alpha-pi") : 0; - int skip = (Integer) options.valueOf("skip-large-phrases"); - - if (options.has("seed")) - F.rng = new Random((Long) options.valueOf("seed")); - - ExecutorService threadPool = null; - if (threads > 0) - threadPool = Executors.newFixedThreadPool(threads); - - if (tags <= 1 || scale_phrase < 0 || scale_context < 0 || threshold < 0) - { - System.err.println("Invalid arguments. Try again!"); - System.exit(1); - } - - Corpus corpus = null; - File infile = (File) options.valueOf("in"); - Corpus corpus1 = null; - File infile1 = (File) options.valueOf("in1"); - try { - System.out.println("Reading concordance from " + infile); - corpus = Corpus.readFromFile(FileUtil.reader(infile)); - corpus.printStats(System.out); - if(options.has("in1")){ - corpus1 = Corpus.readFromFile(FileUtil.reader(infile1)); - corpus1.printStats(System.out); - } - } catch (IOException e) { - System.err.println("Failed to open input file: " + infile); - e.printStackTrace(); - System.exit(1); - } - - if (!(options.has("agree-direction")||options.has("agree-language"))) - System.out.println("Running with " + tags + " tags " + - "for " + iterations + " iterations " + - ((skip > 0) ? "skipping large phrases for first " + skip + " iterations " : "") + - "with scale " + scale_phrase + " phrase and " + scale_context + " context " + - "and " + threads + " threads"); - else - System.out.println("Running agreement model with " + tags + " tags " + - "for " + iterations); - - System.out.println(); - - PhraseCluster cluster = null; - Agree2Sides agree2sides = null; - Agree agree= null; - VB vbModel=null; - if (options.has("agree-language")) - agree2sides = new Agree2Sides(tags, corpus,corpus1); - else if (options.has("agree-direction")) - agree = new Agree(tags, corpus); - else - { - if (vb) - { - vbModel=new VB(tags,corpus); - vbModel.alpha=alphaPi; - vbModel.lambda=alphaEmit; - if (threadPool != null) vbModel.useThreadPool(threadPool); - } - else - { - cluster = new PhraseCluster(tags, corpus); - if (threadPool != null) cluster.useThreadPool(threadPool); - - if (options.has("no-parameter-cache")) - cluster.cacheLambda = false; - if (options.has("start")) - { - try { - System.err.println("Reading starting parameters from " + options.valueOf("start")); - cluster.loadParameters(FileUtil.reader((File)options.valueOf("start"))); - } catch (IOException e) { - System.err.println("Failed to open input file: " + options.valueOf("start")); - e.printStackTrace(); - } - } - } - } - - double last = 0; - for (int i=0; i < iterations; i++) - { - double o; - if (agree != null) - o = agree.EM(); - else if(agree2sides!=null) - o = agree2sides.EM(); - else - { - if (i < skip) - System.out.println("Skipping phrases of length > " + (i+1)); - - if (scale_phrase <= 0 && scale_context <= 0) - { - if (!vb) - o = cluster.EM((i < skip) ? i+1 : 0); - else - o = vbModel.EM(); - } - else - o = cluster.PREM(scale_phrase, scale_context, (i < skip) ? i+1 : 0); - } - - System.out.println("ITER: "+i+" objective: " + o); - - // sometimes takes a few iterations to break the ties - if (i > 5 && Math.abs((o - last) / o) < threshold) - { - last = o; - break; - } - last = o; - } - - double pl1lmax = 0, cl1lmax = 0; - if (cluster != null) - { - pl1lmax = cluster.phrase_l1lmax(); - cl1lmax = cluster.context_l1lmax(); - } - else if (agree != null) - { - // fairly arbitrary choice of model1 cf model2 - pl1lmax = agree.model1.phrase_l1lmax(); - cl1lmax = agree.model1.context_l1lmax(); - } - else if (agree2sides != null) - { - // fairly arbitrary choice of model1 cf model2 - pl1lmax = agree2sides.model1.phrase_l1lmax(); - cl1lmax = agree2sides.model1.context_l1lmax(); - } - - System.out.println("\nFinal posterior phrase l1lmax " + pl1lmax + " context l1lmax " + cl1lmax); - - if (options.has("out")) - { - File outfile = (File) options.valueOf("out"); - try { - PrintStream ps = FileUtil.printstream(outfile); - List<Edge> test; - if (!options.has("test")) // just use the training - test = corpus.getEdges(); - else - { // if --test supplied, load up the file - infile = (File) options.valueOf("test"); - System.out.println("Reading testing concordance from " + infile); - test = corpus.readEdges(FileUtil.reader(infile)); - } - if(vb) { - assert !options.has("test"); - vbModel.displayPosterior(ps); - } else if (cluster != null) - cluster.displayPosterior(ps, test); - else if (agree != null) - agree.displayPosterior(ps, test); - else if (agree2sides != null) { - assert !options.has("test"); - agree2sides.displayPosterior(ps); - } - - ps.close(); - } catch (IOException e) { - System.err.println("Failed to open either testing file or output file"); - e.printStackTrace(); - System.exit(1); - } - } - - if (options.has("parameters")) - { - assert !vb; - File outfile = (File) options.valueOf("parameters"); - PrintStream ps; - try { - ps = FileUtil.printstream(outfile); - cluster.displayModelParam(ps); - ps.close(); - } catch (IOException e) { - System.err.println("Failed to open output parameters file: " + outfile); - e.printStackTrace(); - System.exit(1); - } - } - - if (cluster != null && cluster.pool != null) - cluster.pool.shutdown(); - } -} diff --git a/gi/posterior-regularisation/prjava/src/phrase/VB.java b/gi/posterior-regularisation/prjava/src/phrase/VB.java deleted file mode 100644 index cd3f4966..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/VB.java +++ /dev/null @@ -1,419 +0,0 @@ -package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Future;
-
-import org.apache.commons.math.special.Gamma;
-
-import phrase.Corpus.Edge;
-
-public class VB {
-
- public static int MAX_ITER=400;
-
- /**@brief
- * hyper param for beta
- * where beta is multinomial
- * for generating words from a topic
- */
- public double lambda=0.1;
- /**@brief
- * hyper param for theta
- * where theta is dirichlet for z
- */
- public double alpha=0.0001;
- /**@brief
- * variational param for beta
- */
- private double rho[][][];
- private double digamma_rho[][][];
- private double rho_sum[][];
- /**@brief
- * variational param for z
- */
- //private double phi[][];
- /**@brief
- * variational param for theta
- */
- private double gamma[];
- private static double VAL_DIFF_RATIO=0.005;
-
- private int n_positions;
- private int n_words;
- private int K;
- private ExecutorService pool;
-
- private Corpus c;
- public static void main(String[] args) {
- // String in="../pdata/canned.con";
- String in="../pdata/btec.con";
- String out="../pdata/vb.out";
- int numCluster=25;
- Corpus corpus = null;
- File infile = new File(in);
- try {
- System.out.println("Reading concordance from " + infile);
- corpus = Corpus.readFromFile(FileUtil.reader(infile));
- corpus.printStats(System.out);
- } catch (IOException e) {
- System.err.println("Failed to open input file: " + infile);
- e.printStackTrace();
- System.exit(1);
- }
-
- VB vb=new VB(numCluster, corpus);
- int iter=20;
- for(int i=0;i<iter;i++){
- double obj=vb.EM();
- System.out.println("Iter "+i+": "+obj);
- }
-
- File outfile = new File (out);
- try {
- PrintStream ps = FileUtil.printstream(outfile);
- vb.displayPosterior(ps);
- // ps.println();
- // c2f.displayModelParam(ps);
- ps.close();
- } catch (IOException e) {
- System.err.println("Failed to open output file: " + outfile);
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- public VB(int numCluster, Corpus corpus){
- c=corpus;
- K=numCluster;
- n_positions=c.getNumContextPositions();
- n_words=c.getNumWords();
- rho=new double[K][n_positions][n_words];
- //to init rho
- //loop through data and count up words
- double[] phi_tmp=new double[K];
- for(int i=0;i<K;i++){
- for(int pos=0;pos<n_positions;pos++){
- Arrays.fill(rho[i][pos], lambda);
- }
- }
- for(int d=0;d<c.getNumPhrases();d++){
- List<Edge>doc=c.getEdgesForPhrase(d);
- for(int n=0;n<doc.size();n++){
- TIntArrayList context=doc.get(n).getContext();
- arr.F.randomise(phi_tmp);
- for(int i=0;i<K;i++){
- for(int pos=0;pos<n_positions;pos++){
- rho[i][pos][context.get(pos)]+=phi_tmp[i];
- }
- }
- }
- }
-
- }
-
- private double inference(int phraseID, double[][] phi, double[] gamma)
- {
- List<Edge > doc=c.getEdgesForPhrase(phraseID);
- for(int i=0;i<phi.length;i++){
- for(int j=0;j<phi[i].length;j++){
- phi[i][j]=1.0/K;
- }
- }
- Arrays.fill(gamma,alpha+1.0/K);
-
- double digamma_gamma[]=new double[K];
-
- double gamma_sum=digamma(arr.F.l1norm(gamma));
- for(int i=0;i<K;i++){
- digamma_gamma[i]=digamma(gamma[i]);
- }
- double gammaSum[]=new double [K];
- double prev_val=0;
- double obj=0;
-
- for(int iter=0;iter<MAX_ITER;iter++){
- prev_val=obj;
- obj=0;
- Arrays.fill(gammaSum,0.0);
- for(int n=0;n<doc.size();n++){
- TIntArrayList context=doc.get(n).getContext();
- double phisum=0;
- for(int i=0;i<K;i++){
- double sum=0;
- for(int pos=0;pos<n_positions;pos++){
- int word=context.get(pos);
- sum+=digamma_rho[i][pos][word]-rho_sum[i][pos];
- }
- sum+= digamma_gamma[i]-gamma_sum;
- phi[n][i]=sum;
-
- if (i > 0){
- phisum = log_sum(phisum, phi[n][i]);
- }
- else{
- phisum = phi[n][i];
- }
-
- }//end of a word
-
- for(int i=0;i<K;i++){
- phi[n][i]=Math.exp(phi[n][i]-phisum);
- gammaSum[i]+=phi[n][i];
- }
-
- }//end of doc
-
- for(int i=0;i<K;i++){
- gamma[i]=alpha+gammaSum[i];
- }
- gamma_sum=digamma(arr.F.l1norm(gamma));
- for(int i=0;i<K;i++){
- digamma_gamma[i]=digamma(gamma[i]);
- }
- //compute objective for reporting
-
- obj=0;
-
- for(int i=0;i<K;i++){
- obj+=(alpha-1)*(digamma_gamma[i]-gamma_sum);
- }
-
-
- for(int n=0;n<doc.size();n++){
- TIntArrayList context=doc.get(n).getContext();
-
- for(int i=0;i<K;i++){
- //entropy of phi + expected log likelihood of z
- obj+=phi[n][i]*(digamma_gamma[i]-gamma_sum);
-
- if(phi[n][i]>1e-10){
- obj+=phi[n][i]*Math.log(phi[n][i]);
- }
-
- double beta_sum=0;
- for(int pos=0;pos<n_positions;pos++){
- int word=context.get(pos);
- beta_sum+=(digamma(rho[i][pos][word])-rho_sum[i][pos]);
- }
- obj+=phi[n][i]*beta_sum;
- }
- }
-
- obj-=log_gamma(arr.F.l1norm(gamma));
- for(int i=0;i<K;i++){
- obj+=Gamma.logGamma(gamma[i]);
- obj-=(gamma[i]-1)*(digamma_gamma[i]-gamma_sum);
- }
-
-// System.out.println(phraseID+": "+obj);
- if(iter>0 && (obj-prev_val)/Math.abs(obj)<VAL_DIFF_RATIO){
- break;
- }
- }//end of inference loop
-
- return obj;
- }//end of inference
-
- /**
- * @return objective of this iteration
- */
- public double EM(){
- double emObj=0;
- if(digamma_rho==null){
- digamma_rho=new double[K][n_positions][n_words];
- }
- for(int i=0;i<K;i++){
- for (int pos=0;pos<n_positions;pos++){
- for(int j=0;j<n_words;j++){
- digamma_rho[i][pos][j]= digamma(rho[i][pos][j]);
- }
- }
- }
-
- if(rho_sum==null){
- rho_sum=new double [K][n_positions];
- }
- for(int i=0;i<K;i++){
- for(int pos=0;pos<n_positions;pos++){
- rho_sum[i][pos]=digamma(arr.F.l1norm(rho[i][pos]));
- }
- }
-
- //E
- double exp_rho[][][]=new double[K][n_positions][n_words];
- if (pool == null)
- {
- for (int d=0;d<c.getNumPhrases();d++)
- {
- List<Edge > doc=c.getEdgesForPhrase(d);
- double[][] phi = new double[doc.size()][K];
- double[] gamma = new double[K];
-
- emObj += inference(d, phi, gamma);
-
- for(int n=0;n<doc.size();n++){
- TIntArrayList context=doc.get(n).getContext();
- for(int pos=0;pos<n_positions;pos++){
- int word=context.get(pos);
- for(int i=0;i<K;i++){
- exp_rho[i][pos][word]+=phi[n][i];
- }
- }
- }
- //if(d!=0 && d%100==0) System.out.print(".");
- //if(d!=0 && d%1000==0) System.out.println(d);
- }
- }
- else // multi-threaded version of above loop
- {
- class PartialEStep implements Callable<PartialEStep>
- {
- double[][] phi;
- double[] gamma;
- double obj;
- int d;
- PartialEStep(int d) { this.d = d; }
-
- public PartialEStep call()
- {
- phi = new double[c.getEdgesForPhrase(d).size()][K];
- gamma = new double[K];
- obj = inference(d, phi, gamma);
- return this;
- }
- }
-
- List<Future<PartialEStep>> jobs = new ArrayList<Future<PartialEStep>>();
- for (int d=0;d<c.getNumPhrases();d++)
- jobs.add(pool.submit(new PartialEStep(d)));
-
- for (Future<PartialEStep> job: jobs)
- {
- try {
- PartialEStep e = job.get();
-
- emObj += e.obj;
- List<Edge> doc = c.getEdgesForPhrase(e.d);
- for(int n=0;n<doc.size();n++){
- TIntArrayList context=doc.get(n).getContext();
- for(int pos=0;pos<n_positions;pos++){
- int word=context.get(pos);
- for(int i=0;i<K;i++){
- exp_rho[i][pos][word]+=e.phi[n][i];
- }
- }
- }
- } catch (ExecutionException e) {
- System.err.println("ERROR: E-step thread execution failed.");
- throw new RuntimeException(e);
- } catch (InterruptedException e) {
- System.err.println("ERROR: Failed to join E-step thread.");
- throw new RuntimeException(e);
- }
- }
- }
- // System.out.println("EM Objective:"+emObj);
-
- //M
- for(int i=0;i<K;i++){
- for(int pos=0;pos<n_positions;pos++){
- for(int j=0;j<n_words;j++){
- rho[i][pos][j]=lambda+exp_rho[i][pos][j];
- }
- }
- }
-
- //E[\log p(\beta|\lambda)] - E[\log q(\beta)]
- for(int i=0;i<K;i++){
- double rhoSum=0;
- for(int pos=0;pos<n_positions;pos++){
- for(int j=0;j<n_words;j++){
- rhoSum+=rho[i][pos][j];
- }
- double digamma_rhoSum=Gamma.digamma(rhoSum);
- emObj-=Gamma.logGamma(rhoSum);
- for(int j=0;j<n_words;j++){
- emObj+=(lambda-rho[i][pos][j])*(Gamma.digamma(rho[i][pos][j])-digamma_rhoSum);
- emObj+=Gamma.logGamma(rho[i][pos][j]);
- }
- }
- }
-
- return emObj;
- }//end of EM
-
- public void displayPosterior(PrintStream ps)
- {
- for(int d=0;d<c.getNumPhrases();d++){
- List<Edge > doc=c.getEdgesForPhrase(d);
- double[][] phi = new double[doc.size()][K];
- for(int i=0;i<phi.length;i++)
- for(int j=0;j<phi[i].length;j++)
- phi[i][j]=1.0/K;
- double[] gamma = new double[K];
-
- inference(d, phi, gamma);
-
- for(int n=0;n<doc.size();n++){
- Edge edge=doc.get(n);
- int tag=arr.F.argmax(phi[n]);
- ps.print(edge.getPhraseString());
- ps.print("\t");
- ps.print(edge.getContextString(true));
-
- ps.println(" ||| C=" + tag);
- }
- }
- }
-
- double log_sum(double log_a, double log_b)
- {
- double v;
-
- if (log_a < log_b)
- v = log_b+Math.log(1 + Math.exp(log_a-log_b));
- else
- v = log_a+Math.log(1 + Math.exp(log_b-log_a));
- return(v);
- }
-
- double digamma(double x)
- {
- double p;
- x=x+6;
- p=1/(x*x);
- p=(((0.004166666666667*p-0.003968253986254)*p+
- 0.008333333333333)*p-0.083333333333333)*p;
- p=p+Math.log(x)-0.5/x-1/(x-1)-1/(x-2)-1/(x-3)-1/(x-4)-1/(x-5)-1/(x-6);
- return p;
- }
-
- double log_gamma(double x)
- {
- double z=1/(x*x);
-
- x=x+6;
- z=(((-0.000595238095238*z+0.000793650793651)
- *z-0.002777777777778)*z+0.083333333333333)/x;
- z=(x-0.5)*Math.log(x)-x+0.918938533204673+z-Math.log(x-1)-
- Math.log(x-2)-Math.log(x-3)-Math.log(x-4)-Math.log(x-5)-Math.log(x-6);
- return z;
- }
-
- public void useThreadPool(ExecutorService threadPool)
- {
- pool = threadPool;
- }
-}//End of class
diff --git a/gi/posterior-regularisation/prjava/src/test/CorpusTest.java b/gi/posterior-regularisation/prjava/src/test/CorpusTest.java deleted file mode 100644 index b4c3041f..00000000 --- a/gi/posterior-regularisation/prjava/src/test/CorpusTest.java +++ /dev/null @@ -1,60 +0,0 @@ -package test;
-
-import java.util.Arrays;
-import java.util.HashMap;
-
-import data.Corpus;
-import hmm.POS;
-
-public class CorpusTest {
-
- public static void main(String[] args) {
- Corpus c=new Corpus(POS.trainFilename);
-
-
- int idx=30;
-
-
- HashMap<String, Integer>vocab=
- (HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(Corpus.alphaFilename);
-
- HashMap<String, Integer>tagVocab=
- (HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(Corpus.tagalphaFilename);
-
-
- String [] dict=new String [vocab.size()+1];
- for(String key:vocab.keySet()){
- dict[vocab.get(key)]=key;
- }
- dict[dict.length-1]=Corpus.UNK_TOK;
-
- String [] tagdict=new String [tagVocab.size()+1];
- for(String key:tagVocab.keySet()){
- tagdict[tagVocab.get(key)]=key;
- }
- tagdict[tagdict.length-1]=Corpus.UNK_TOK;
-
- String[] sent=c.get(idx);
- int []data=c.getInt(idx);
-
-
- String []roundtrip=new String [sent.length];
- for(int i=0;i<sent.length;i++){
- roundtrip[i]=dict[data[i]];
- }
- System.out.println(Arrays.toString(sent));
- System.out.println(Arrays.toString(roundtrip));
-
- sent=c.tag.get(idx);
- data=c.tagData.get(idx);
-
-
- roundtrip=new String [sent.length];
- for(int i=0;i<sent.length;i++){
- roundtrip[i]=tagdict[data[i]];
- }
- System.out.println(Arrays.toString(sent));
- System.out.println(Arrays.toString(roundtrip));
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java b/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java deleted file mode 100644 index d54525c8..00000000 --- a/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java +++ /dev/null @@ -1,105 +0,0 @@ -package test;
-
-import hmm.HMM;
-import hmm.POS;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-
-import data.Corpus;
-
-public class HMMModelStats {
-
- public static String modelFilename="../posdata/posModel.out";
- public static String alphaFilename="../posdata/corpus.alphabet";
- public static String statsFilename="../posdata/model.stats";
-
- public static final int NUM_WORD=50;
-
- public static String testFilename="../posdata/en_test.conll";
-
- public static double [][]maxwt;
-
- public static void main(String[] args) {
- HashMap<String, Integer>vocab=
- (HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(alphaFilename);
-
- Corpus test=new Corpus(testFilename,vocab);
-
- String [] dict=new String [vocab.size()+1];
- for(String key:vocab.keySet()){
- dict[vocab.get(key)]=key;
- }
- dict[dict.length-1]=Corpus.UNK_TOK;
-
- HMM hmm=new HMM();
- hmm.readModel(modelFilename);
-
-
-
- PrintStream ps = null;
- try {
- ps = io.FileUtil.printstream(new File(statsFilename));
- } catch (IOException e) {
- e.printStackTrace();
- System.exit(1);
- }
-
- double [][] emit=hmm.getEmitProb();
- for(int i=0;i<emit.length;i++){
- ArrayList<IntDoublePair>l=new ArrayList<IntDoublePair>();
- for(int j=0;j<emit[i].length;j++){
- l.add(new IntDoublePair(j,emit[i][j]));
- }
- Collections.sort(l);
- ps.println(i);
- for(int j=0;j<NUM_WORD;j++){
- if(j>=dict.length){
- break;
- }
- ps.print(dict[l.get(j).idx]+"\t");
- if((1+j)%10==0){
- ps.println();
- }
- }
- ps.println("\n");
- }
-
- checkMaxwt(hmm,ps,test.getAllData());
-
- int terminalSym=vocab.get(Corpus .END_SYM);
- //sample 10 sentences
- for(int i=0;i<10;i++){
- int []sent=hmm.sample(terminalSym);
- for(int j=0;j<sent.length;j++){
- ps.print(dict[sent[j]]+"\t");
- }
- ps.println();
- }
-
- ps.close();
-
- }
-
- public static void checkMaxwt(HMM hmm,PrintStream ps,int [][]data){
- double [][]emit=hmm.getEmitProb();
- maxwt=new double[emit.length][emit[0].length];
-
- hmm.computeMaxwt(maxwt,data);
- double sum=0;
- for(int i=0;i<maxwt.length;i++){
- for(int j=0;j<maxwt.length;j++){
- sum+=maxwt[i][j];
- }
- }
-
- ps.println("max w t P(w_i|t): "+sum);
-
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/test/IntDoublePair.java b/gi/posterior-regularisation/prjava/src/test/IntDoublePair.java deleted file mode 100644 index 3f9f0ad7..00000000 --- a/gi/posterior-regularisation/prjava/src/test/IntDoublePair.java +++ /dev/null @@ -1,23 +0,0 @@ -package test;
-
-public class IntDoublePair implements Comparable{
- double val;
- int idx;
- public int compareTo(Object o){
- if(o instanceof IntDoublePair){
- IntDoublePair pair=(IntDoublePair)o;
- if(pair.val>val){
- return 1;
- }
- if(pair.val<val){
- return -1;
- }
- return 0;
- }
- return -1;
- }
- public IntDoublePair(int i,double v){
- val=v;
- idx=i;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java b/gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java deleted file mode 100644 index 9059a59e..00000000 --- a/gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java +++ /dev/null @@ -1,131 +0,0 @@ -package test;
-
-
-
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.projections.BoundsProjection;
-import optimization.projections.Projection;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.GradientL2Norm;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-
-
-/**
- * @author javg
- *
- *
- *ax2+ b(y2 -displacement)
- */
-public class X2y2WithConstraints extends ProjectedObjective{
-
-
- double a, b;
- double dx;
- double dy;
- Projection projection;
-
-
- public X2y2WithConstraints(double a, double b, double[] params, double dx, double dy, Projection proj){
- //projection = new BoundsProjection(0.2,Double.MAX_VALUE);
- super();
- projection = proj;
- this.a = a;
- this.b = b;
- this.dx = dx;
- this.dy = dy;
- setInitialParameters(params);
- System.out.println("Function " +a+"(x-"+dx+")^2 + "+b+"(y-"+dy+")^2");
- System.out.println("Gradient " +(2*a)+"(x-"+dx+") ; "+(b*2)+"(y-"+dy+")");
- printParameters();
- projection.project(parameters);
- printParameters();
- gradient = new double[2];
- }
-
- public double getValue() {
- functionCalls++;
- return a*(parameters[0]-dx)*(parameters[0]-dx)+b*((parameters[1]-dy)*(parameters[1]-dy));
- }
-
- public double[] getGradient() {
- if(gradient == null){
- gradient = new double[2];
- }
- gradientCalls++;
- gradient[0]=2*a*(parameters[0]-dx);
- gradient[1]=2*b*(parameters[1]-dy);
- return gradient;
- }
-
-
- public double[] projectPoint(double[] point) {
- double[] newPoint = point.clone();
- projection.project(newPoint);
- return newPoint;
- }
-
- public void optimizeWithProjectedGradientDescent(LineSearchMethod ls, OptimizerStats stats, X2y2WithConstraints o){
- ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
- StopingCriteria stopGrad = new ProjectedGradientL2Norm(0.001);
- StopingCriteria stopValue = new ValueDifference(0.001);
- CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
- compositeStop.add(stopGrad);
- compositeStop.add(stopValue);
-
- optimizer.setMaxIterations(5);
- boolean succed = optimizer.optimize(o,stats,compositeStop);
- System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
- System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
- if(succed){
- System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
- }else{
- System.out.println("Failed to optimize");
- }
- }
-
-
-
- public String toString(){
-
- return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue() + " grad (" + getGradient()[0] + ":" + getGradient()[1]+")";
- }
-
- public static void main(String[] args) {
- double a = 1;
- double b=1;
- double x0 = 0;
- double y0 =1;
- double dx = 0.5;
- double dy = 0.2 ;
- double [] parameters = new double[2];
- parameters[0] = x0;
- parameters[1] = y0;
- X2y2WithConstraints o = new X2y2WithConstraints(a,b,parameters,dx,dy,
- new SimplexProjection(0.5)
- //new BoundsProjection(0.0,0.4)
- );
- System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1] + " a " + a + " b "+b );
- o.setDebugLevel(4);
-
- LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1));
-
- OptimizerStats stats = new OptimizerStats();
- o.optimizeWithProjectedGradientDescent(ls, stats, o);
-
-// o = new x2y2WithConstraints(a,b,x0,y0,dx,dy);
-// stats = new OptimizerStats();
-// o.optimizeWithSpectralProjectedGradientDescent(stats, o);
- }
-
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/Array.java b/gi/posterior-regularisation/prjava/src/util/Array.java deleted file mode 100644 index cc4725af..00000000 --- a/gi/posterior-regularisation/prjava/src/util/Array.java +++ /dev/null @@ -1,41 +0,0 @@ -package util; - -import java.util.Arrays; - -public class Array { - - - - public static void sortDescending(double[] ds){ - for (int i = 0; i < ds.length; i++) ds[i] = -ds[i]; - Arrays.sort(ds); - for (int i = 0; i < ds.length; i++) ds[i] = -ds[i]; - } - - /** - * Return a new reversed array - * @param array - * @return - */ - public static int[] reverseIntArray(int[] array){ - int[] reversed = new int[array.length]; - for (int i = 0; i < reversed.length; i++) { - reversed[i] = array[reversed.length-1-i]; - } - return reversed; - } - - public static String[] sumArray(String[] in, int from){ - String[] res = new String[in.length-from]; - for (int i = from; i < in.length; i++) { - res[i-from] = in[i]; - } - return res; - } - - public static void main(String[] args) { - int[] i = {1,2,3,4}; - util.Printing.printIntArray(i, null, "original"); - util.Printing.printIntArray(reverseIntArray(i), null, "reversed"); - } -} diff --git a/gi/posterior-regularisation/prjava/src/util/ArrayMath.java b/gi/posterior-regularisation/prjava/src/util/ArrayMath.java deleted file mode 100644 index 398a13a2..00000000 --- a/gi/posterior-regularisation/prjava/src/util/ArrayMath.java +++ /dev/null @@ -1,186 +0,0 @@ -package util; - -import java.util.Arrays; - -public class ArrayMath { - - public static double dotProduct(double[] v1, double[] v2) { - assert(v1.length == v2.length); - double result = 0; - for(int i = 0; i < v1.length; i++) - result += v1[i]*v2[i]; - return result; - } - - public static double twoNormSquared(double[] v) { - double result = 0; - for(double d : v) - result += d*d; - return result; - } - - public static boolean containsInvalid(double[] v) { - for(int i = 0; i < v.length; i++) - if(Double.isNaN(v[i]) || Double.isInfinite(v[i])) - return true; - return false; - } - - - - public static double safeAdd(double[] toAdd) { - // Make sure there are no positive infinities - double sum = 0; - for(int i = 0; i < toAdd.length; i++) { - assert(!(Double.isInfinite(toAdd[i]) && toAdd[i] > 0)); - assert(!Double.isNaN(toAdd[i])); - sum += toAdd[i]; - } - - return sum; - } - - /* Methods for filling integer and double arrays (of up to four dimensions) with the given value. */ - - public static void set(int[][][][] array, int value) { - for(int i = 0; i < array.length; i++) { - set(array[i], value); - } - } - - public static void set(int[][][] array, int value) { - for(int i = 0; i < array.length; i++) { - set(array[i], value); - } - } - - public static void set(int[][] array, int value) { - for(int i = 0; i < array.length; i++) { - set(array[i], value); - } - } - - public static void set(int[] array, int value) { - Arrays.fill(array, value); - } - - - public static void set(double[][][][] array, double value) { - for(int i = 0; i < array.length; i++) { - set(array[i], value); - } - } - - public static void set(double[][][] array, double value) { - for(int i = 0; i < array.length; i++) { - set(array[i], value); - } - } - - public static void set(double[][] array, double value) { - for(int i = 0; i < array.length; i++) { - set(array[i], value); - } - } - - public static void set(double[] array, double value) { - Arrays.fill(array, value); - } - - public static void setEqual(double[][][][] dest, double[][][][] source){ - for (int i = 0; i < source.length; i++) { - setEqual(dest[i],source[i]); - } - } - - - public static void setEqual(double[][][] dest, double[][][] source){ - for (int i = 0; i < source.length; i++) { - set(dest[i],source[i]); - } - } - - - public static void set(double[][] dest, double[][] source){ - for (int i = 0; i < source.length; i++) { - setEqual(dest[i],source[i]); - } - } - - public static void setEqual(double[] dest, double[] source){ - System.arraycopy(source, 0, dest, 0, source.length); - } - - public static void plusEquals(double[][][][] array, double val){ - for (int i = 0; i < array.length; i++) { - plusEquals(array[i], val); - } - } - - public static void plusEquals(double[][][] array, double val){ - for (int i = 0; i < array.length; i++) { - plusEquals(array[i], val); - } - } - - public static void plusEquals(double[][] array, double val){ - for (int i = 0; i < array.length; i++) { - plusEquals(array[i], val); - } - } - - public static void plusEquals(double[] array, double val){ - for (int i = 0; i < array.length; i++) { - array[i] += val; - } - } - - - public static double sum(double[] array) { - double res = 0; - for (int i = 0; i < array.length; i++) res += array[i]; - return res; - } - - - - public static double[][] deepclone(double[][] in){ - double[][] res = new double[in.length][]; - for (int i = 0; i < res.length; i++) { - res[i] = in[i].clone(); - } - return res; - } - - - public static double[][][] deepclone(double[][][] in){ - double[][][] res = new double[in.length][][]; - for (int i = 0; i < res.length; i++) { - res[i] = deepclone(in[i]); - } - return res; - } - - public static double cosine(double[] a, - double[] b) { - return (dotProduct(a, b)+1e-5)/(Math.sqrt(dotProduct(a, a)+1e-5)*Math.sqrt(dotProduct(b, b)+1e-5)); - } - - public static double max(double[] ds) { - double max = Double.NEGATIVE_INFINITY; - for(double d:ds) max = Math.max(d,max); - return max; - } - - public static void exponentiate(double[] a) { - for (int i = 0; i < a.length; i++) { - a[i] = Math.exp(a[i]); - } - } - - public static int sum(int[] array) { - int res = 0; - for (int i = 0; i < array.length; i++) res += array[i]; - return res; - } -} diff --git a/gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java b/gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java deleted file mode 100644 index 1ff1ae4a..00000000 --- a/gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java +++ /dev/null @@ -1,14 +0,0 @@ -package util; - -public interface DifferentiableObjective { - - public double getValue(); - - public void getGradient(double[] gradient); - - public void getParameters(double[] params); - - public void setParameters(double[] newParameters); - - public int getNumParameters(); -} diff --git a/gi/posterior-regularisation/prjava/src/util/DigammaFunction.java b/gi/posterior-regularisation/prjava/src/util/DigammaFunction.java deleted file mode 100644 index ff1478ad..00000000 --- a/gi/posterior-regularisation/prjava/src/util/DigammaFunction.java +++ /dev/null @@ -1,21 +0,0 @@ -package util; - -public class DigammaFunction { - public static double expDigamma(double number){ - if(number==0)return number; - return Math.exp(digamma(number)); - } - - public static double digamma(double number){ - if(number > 7){ - return digammApprox(number-0.5); - }else{ - return digamma(number+1) - 1.0/number; - } - } - - private static double digammApprox(double value){ - return Math.log(value) + 0.04167*Math.pow(value, -2) - 0.00729*Math.pow(value, -4) - + 0.00384*Math.pow(value, -6) - 0.00413*Math.pow(value, -8); - } -} diff --git a/gi/posterior-regularisation/prjava/src/util/FileSystem.java b/gi/posterior-regularisation/prjava/src/util/FileSystem.java deleted file mode 100644 index d7812e40..00000000 --- a/gi/posterior-regularisation/prjava/src/util/FileSystem.java +++ /dev/null @@ -1,21 +0,0 @@ -package util; - -import java.io.File; - -public class FileSystem { - public static boolean createDir(String directory) { - - File dir = new File(directory); - if (!dir.isDirectory()) { - boolean success = dir.mkdirs(); - if (!success) { - System.out.println("Unable to create directory " + directory); - return false; - } - System.out.println("Created directory " + directory); - } else { - System.out.println("Reusing directory " + directory); - } - return true; - } -} diff --git a/gi/posterior-regularisation/prjava/src/util/InputOutput.java b/gi/posterior-regularisation/prjava/src/util/InputOutput.java deleted file mode 100644 index da7f71bf..00000000 --- a/gi/posterior-regularisation/prjava/src/util/InputOutput.java +++ /dev/null @@ -1,67 +0,0 @@ -package util; - -import java.io.BufferedReader; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.PrintStream; -import java.io.UnsupportedEncodingException; -import java.util.Properties; -import java.util.zip.GZIPInputStream; -import java.util.zip.GZIPOutputStream; - -public class InputOutput { - - /** - * Opens a file either compress with gzip or not compressed. - */ - public static BufferedReader openReader(String fileName) throws UnsupportedEncodingException, FileNotFoundException, IOException{ - System.out.println("Reading: " + fileName); - BufferedReader reader; - fileName = fileName.trim(); - if(fileName.endsWith("gz")){ - reader = new BufferedReader( - new InputStreamReader(new GZIPInputStream(new FileInputStream(fileName)),"UTF8")); - }else{ - reader = new BufferedReader(new InputStreamReader( - new FileInputStream(fileName), "UTF8")); - } - - return reader; - } - - - public static PrintStream openWriter(String fileName) - throws UnsupportedEncodingException, FileNotFoundException, IOException{ - System.out.println("Writting to file: " + fileName); - PrintStream writter; - fileName = fileName.trim(); - if(fileName.endsWith("gz")){ - writter = new PrintStream(new GZIPOutputStream(new FileOutputStream(fileName)), - true, "UTF-8"); - - }else{ - writter = new PrintStream(new FileOutputStream(fileName), - true, "UTF-8"); - - } - - return writter; - } - - public static Properties readPropertiesFile(String fileName) { - Properties properties = new Properties(); - try { - properties.load(new FileInputStream(fileName)); - } catch (IOException e) { - e.printStackTrace(); - throw new AssertionError("Wrong properties file " + fileName); - } - System.out.println(properties.toString()); - - return properties; - } -} diff --git a/gi/posterior-regularisation/prjava/src/util/LogSummer.java b/gi/posterior-regularisation/prjava/src/util/LogSummer.java deleted file mode 100644 index 117393b9..00000000 --- a/gi/posterior-regularisation/prjava/src/util/LogSummer.java +++ /dev/null @@ -1,86 +0,0 @@ -package util; - -import java.lang.Math; - -/* - * Math tool for computing logs of sums, when the terms of the sum are already in log form. - * (Useful if the terms of the sum are very small numbers.) - */ -public class LogSummer { - - private LogSummer() { - } - - /** - * Given log(a) and log(b), computes log(a + b). - * - * @param loga log of first sum term - * @param logb log of second sum term - * @return log(sum), where sum = a + b - */ - public static double sum(double loga, double logb) { - assert(!Double.isNaN(loga)); - assert(!Double.isNaN(logb)); - - if(Double.isInfinite(loga)) - return logb; - if(Double.isInfinite(logb)) - return loga; - - double maxLog; - double difference; - if(loga > logb) { - difference = logb - loga; - maxLog = loga; - } - else { - difference = loga - logb; - maxLog = logb; - } - - return Math.log1p(Math.exp(difference)) + maxLog; - } - - /** - * Computes log(exp(array[index]) + b), and - * modifies array[index] to contain this new value. - * - * @param array array to modify - * @param index index at which to modify - * @param logb log of the second sum term - */ - public static void sum(double[] array, int index, double logb) { - array[index] = sum(array[index], logb); - } - - /** - * Computes log(a + b + c + ...) from log(a), log(b), log(c), ... - * by recursively splitting the input and delegating to the sum method. - * - * @param terms an array containing the log of all the terms for the sum - * @return log(sum), where sum = exp(terms[0]) + exp(terms[1]) + ... - */ - public static double sumAll(double... terms) { - return sumAllHelper(terms, 0, terms.length); - } - - /** - * Computes log(a_0 + a_1 + ...) from a_0 = exp(terms[begin]), - * a_1 = exp(terms[begin + 1]), ..., a_{end - 1 - begin} = exp(terms[end - 1]). - * - * @param terms an array containing the log of all the terms for the sum, - * and possibly some other terms that will not go into the sum - * @return log of the sum of the elements in the [begin, end) region of the terms array - */ - private static double sumAllHelper(final double[] terms, final int begin, final int end) { - int length = end - begin; - switch(length) { - case 0: return Double.NEGATIVE_INFINITY; - case 1: return terms[begin]; - default: - int midIndex = begin + length/2; - return sum(sumAllHelper(terms, begin, midIndex), sumAllHelper(terms, midIndex, end)); - } - } - -}
\ No newline at end of file diff --git a/gi/posterior-regularisation/prjava/src/util/MathUtil.java b/gi/posterior-regularisation/prjava/src/util/MathUtil.java deleted file mode 100644 index 799b1faf..00000000 --- a/gi/posterior-regularisation/prjava/src/util/MathUtil.java +++ /dev/null @@ -1,148 +0,0 @@ -package util; - -import java.util.Random; - -public class MathUtil { - public static final boolean closeToOne(double number){ - return Math.abs(number-1) < 1.E-10; - } - - public static final boolean closeToZero(double number){ - return Math.abs(number) < 1.E-5; - } - - /** - * Return a ramdom multinominal distribution. - * - * @param size - * @return - */ - public static final double[] randomVector(int size, Random r){ - double[] random = new double[size]; - double sum=0; - for(int i = 0; i < size; i++){ - double number = r.nextDouble(); - random[i] = number; - sum+=number; - } - for(int i = 0; i < size; i++){ - random[i] = random[i]/sum; - } - return random; - } - - - - public static double sum(double[] ds) { - double res = 0; - for (int i = 0; i < ds.length; i++) { - res+=ds[i]; - } - return res; - } - - public static double max(double[] ds) { - double res = Double.NEGATIVE_INFINITY; - for (int i = 0; i < ds.length; i++) { - res = Math.max(res, ds[i]); - } - return res; - } - - public static double min(double[] ds) { - double res = Double.POSITIVE_INFINITY; - for (int i = 0; i < ds.length; i++) { - res = Math.min(res, ds[i]); - } - return res; - } - - - public static double KLDistance(double[] p, double[] q) { - int len = p.length; - double kl = 0; - for (int j = 0; j < len; j++) { - if (p[j] == 0 || q[j] == 0) { - continue; - } else { - kl += q[j] * Math.log(q[j] / p[j]); - } - - } - return kl; - } - - public static double L2Distance(double[] p, double[] q) { - int len = p.length; - double l2 = 0; - for (int j = 0; j < len; j++) { - if (p[j] == 0 || q[j] == 0) { - continue; - } else { - l2 += (q[j] - p[j])*(q[j] - p[j]); - } - - } - return Math.sqrt(l2); - } - - public static double L1Distance(double[] p, double[] q) { - int len = p.length; - double l1 = 0; - for (int j = 0; j < len; j++) { - if (p[j] == 0 || q[j] == 0) { - continue; - } else { - l1 += Math.abs(q[j] - p[j]); - } - - } - return l1; - } - - public static double dot(double[] ds, double[] ds2) { - double res = 0; - for (int i = 0; i < ds2.length; i++) { - res+= ds[i]*ds2[i]; - } - return res; - } - - public static double expDigamma(double number){ - return Math.exp(digamma(number)); - } - - public static double digamma(double number){ - if(number > 7){ - return digammApprox(number-0.5); - }else{ - return digamma(number+1) - 1.0/number; - } - } - - private static double digammApprox(double value){ - return Math.log(value) + 0.04167*Math.pow(value, -2) - 0.00729*Math.pow(value, -4) - + 0.00384*Math.pow(value, -6) - 0.00413*Math.pow(value, -8); - } - - public static double eulerGamma = 0.57721566490152386060651209008240243; - // FIXME -- so far just the initialization from Minka's paper "Estimating a Dirichlet distribution". - public static double invDigamma(double y) { - if (y>= -2.22) return Math.exp(y)+0.5; - return -1.0/(y+eulerGamma); - } - - - - public static void main(String[] args) { - for(double i = 0; i < 10 ; i+=0.1){ - System.out.println(i+"\t"+expDigamma(i)+"\t"+(i-0.5)); - } -// double gammaValue = (expDigamma(3)/expDigamma(10) + expDigamma(3)/expDigamma(10) + expDigamma(4)/expDigamma(10)); -// double normalValue = 3/10+3/4+10/10; -// System.out.println("Gamma " + gammaValue + " normal " + normalValue); - } - - - -} diff --git a/gi/posterior-regularisation/prjava/src/util/Matrix.java b/gi/posterior-regularisation/prjava/src/util/Matrix.java deleted file mode 100644 index 8fb6d911..00000000 --- a/gi/posterior-regularisation/prjava/src/util/Matrix.java +++ /dev/null @@ -1,16 +0,0 @@ -package util; - -public class Matrix { - int x; - int y; - double[][] values; - - public Matrix(int x, int y){ - this.x = x; - this.y=y; - values = new double[x][y]; - } - - - -} diff --git a/gi/posterior-regularisation/prjava/src/util/MemoryTracker.java b/gi/posterior-regularisation/prjava/src/util/MemoryTracker.java deleted file mode 100644 index 83a65611..00000000 --- a/gi/posterior-regularisation/prjava/src/util/MemoryTracker.java +++ /dev/null @@ -1,47 +0,0 @@ -package util; - - -public class MemoryTracker { - - double initM,finalM; - boolean start = false,finish = false; - - public MemoryTracker(){ - - } - - public void start(){ - System.gc(); - System.gc(); - System.gc(); - initM = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/(1024*1024); - start = true; - } - - public void finish(){ - if(!start){ - throw new RuntimeException("Canot stop before starting"); - } - System.gc(); - System.gc(); - System.gc(); - finalM = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/(1024*1024); - finish = true; - } - - public String print(){ - if(!finish){ - throw new RuntimeException("Canot print before stopping"); - } - return "Used: " + (finalM - initM) + "MB"; - } - - public void clear(){ - initM = 0; - finalM = 0; - finish = false; - start = false; - } - - -} diff --git a/gi/posterior-regularisation/prjava/src/util/Pair.java b/gi/posterior-regularisation/prjava/src/util/Pair.java deleted file mode 100644 index 7b1f108d..00000000 --- a/gi/posterior-regularisation/prjava/src/util/Pair.java +++ /dev/null @@ -1,31 +0,0 @@ -package util; - -public class Pair<O1, O2> { - public O1 _first; - public O2 _second; - - public final O1 first() { - return _first; - } - - public final O2 second() { - return _second; - } - - public final void setFirst(O1 value){ - _first = value; - } - - public final void setSecond(O2 value){ - _second = value; - } - - public Pair(O1 first, O2 second) { - _first = first; - _second = second; - } - - public String toString(){ - return _first + " " + _second; - } -} diff --git a/gi/posterior-regularisation/prjava/src/util/Printing.java b/gi/posterior-regularisation/prjava/src/util/Printing.java deleted file mode 100644 index 14fcbe91..00000000 --- a/gi/posterior-regularisation/prjava/src/util/Printing.java +++ /dev/null @@ -1,158 +0,0 @@ -package util; - -public class Printing { - static java.text.DecimalFormat fmt = new java.text.DecimalFormat(); - - public static String padWithSpace(String s, int len){ - StringBuffer sb = new StringBuffer(); - while(sb.length() +s.length() < len){ - sb.append(" "); - } - sb.append(s); - return sb.toString(); - } - - public static String prettyPrint(double d, String patt, int len) { - fmt.applyPattern(patt); - String s = fmt.format(d); - while (s.length() < len) { - s = " " + s; - } - return s; - } - - public static String formatTime(long duration) { - StringBuilder sb = new StringBuilder(); - double d = duration / 1000; - fmt.applyPattern("00"); - sb.append(fmt.format((int) (d / (60 * 60))) + ":"); - d -= ((int) d / (60 * 60)) * 60 * 60; - sb.append(fmt.format((int) (d / 60)) + ":"); - d -= ((int) d / 60) * 60; - fmt.applyPattern("00.0"); - sb.append(fmt.format(d)); - return sb.toString(); - } - - - public static String doubleArrayToString(double[] array, String[] labels, String arrayName) { - StringBuffer res = new StringBuffer(); - res.append(arrayName); - res.append("\n"); - for (int i = 0; i < array.length; i++) { - if (labels == null){ - res.append(i+" \t"); - }else{ - res.append(labels[i]+ "\t"); - } - } - res.append("sum\n"); - double sum = 0; - for (int i = 0; i < array.length; i++) { - res.append(prettyPrint(array[i], - "0.00000E00", 8) + "\t"); - sum+=array[i]; - } - res.append(prettyPrint(sum, - "0.00000E00", 8)+"\n"); - return res.toString(); - } - - - - public static void printDoubleArray(double[] array, String labels[], String arrayName) { - System.out.println(doubleArrayToString(array, labels,arrayName)); - } - - - public static String doubleArrayToString(double[][] array, String[] labels1, String[] labels2, - String arrayName){ - StringBuffer res = new StringBuffer(); - res.append(arrayName); - res.append("\n\t"); - //Calculates the column sum to keeps the sums - double[] sums = new double[array[0].length+1]; - //Prints rows headings - for (int i = 0; i < array[0].length; i++) { - if (labels1 == null){ - res.append(i+" \t"); - }else{ - res.append(labels1[i]+" \t"); - } - } - res.append("sum\n"); - double sum = 0; - //For each row print heading - for (int i = 0; i < array.length; i++) { - if (labels2 == null){ - res.append(i+"\t"); - }else{ - res.append(labels2[i]+"\t"); - } - //Print values for that row - for (int j = 0; j < array[0].length; j++) { - res.append(" " + prettyPrint(array[i][j], - "0.00000E00", 8) + "\t"); - sums[j] += array[i][j]; - sum+=array[i][j]; //Sum all values of that row - } - //Print row sum - res.append(prettyPrint(sum,"0.00000E00", 8)+"\n"); - sums[array[0].length]+=sum; - sum=0; - } - res.append("sum\t"); - //Print values for colums sum - for (int i = 0; i < array[0].length+1; i++) { - res.append(prettyPrint(sums[i],"0.00000E00", 8)+"\t"); - } - res.append("\n"); - return res.toString(); - } - - public static void printDoubleArray(double[][] array, String[] labels1, String[] labels2 - , String arrayName) { - System.out.println(doubleArrayToString(array, labels1,labels2,arrayName)); - } - - - public static void printIntArray(int[][] array, String[] labels1, String[] labels2, String arrayName, - int size1, int size2) { - System.out.println(arrayName); - for (int i = 0; i < size1; i++) { - for (int j = 0; j < size2; j++) { - System.out.print(" " + array[i][j] + " "); - - } - System.out.println(); - } - System.out.println(); - } - - public static String intArrayToString(int[] array, String[] labels, String arrayName) { - StringBuffer res = new StringBuffer(); - res.append(arrayName); - for (int i = 0; i < array.length; i++) { - res.append(" " + array[i] + " "); - - } - res.append("\n"); - return res.toString(); - } - - public static void printIntArray(int[] array, String[] labels, String arrayName) { - System.out.println(intArrayToString(array, labels,arrayName)); - } - - public static String toString(double[][] d){ - StringBuffer sb = new StringBuffer(); - for (int i = 0; i < d.length; i++) { - for (int j = 0; j < d[0].length; j++) { - sb.append(prettyPrint(d[i][j], "0.00E0", 10)); - } - sb.append("\n"); - } - return sb.toString(); - } - -} diff --git a/gi/posterior-regularisation/prjava/src/util/Sorters.java b/gi/posterior-regularisation/prjava/src/util/Sorters.java deleted file mode 100644 index 836444e5..00000000 --- a/gi/posterior-regularisation/prjava/src/util/Sorters.java +++ /dev/null @@ -1,39 +0,0 @@ -package util; - -import java.util.Comparator; - -public class Sorters { - public static class sortWordsCounts implements Comparator{ - - /** - * Sorter for a pair of word id, counts. Sort ascending by counts - */ - public int compare(Object arg0, Object arg1) { - Pair<Integer,Integer> p1 = (Pair<Integer,Integer>)arg0; - Pair<Integer,Integer> p2 = (Pair<Integer,Integer>)arg1; - if(p1.second() > p2.second()){ - return 1; - }else{ - return -1; - } - } - - } - -public static class sortWordsDouble implements Comparator{ - - /** - * Sorter for a pair of word id, counts. Sort by counts - */ - public int compare(Object arg0, Object arg1) { - Pair<Integer,Double> p1 = (Pair<Integer,Double>)arg0; - Pair<Integer,Double> p2 = (Pair<Integer,Double>)arg1; - if(p1.second() < p2.second()){ - return 1; - }else{ - return -1; - } - } - - } -} diff --git a/gi/posterior-regularisation/prjava/train-PR-cluster.sh b/gi/posterior-regularisation/prjava/train-PR-cluster.sh deleted file mode 100755 index 67552c00..00000000 --- a/gi/posterior-regularisation/prjava/train-PR-cluster.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh - -d=`dirname $0` -java -ea -Xmx30g -cp $d/prjava.jar:$d/lib/trove-2.0.2.jar:$d/lib/optimization.jar:$d/lib/jopt-simple-3.2.jar:$d/lib/commons-math-2.1.jar phrase.Trainer $* diff --git a/gi/posterior-regularisation/projected_gradient.cc b/gi/posterior-regularisation/projected_gradient.cc deleted file mode 100644 index f7c39817..00000000 --- a/gi/posterior-regularisation/projected_gradient.cc +++ /dev/null @@ -1,87 +0,0 @@ -// -// Minimises given functional using the projected gradient method. Based on -// algorithm and demonstration example in Linear and Nonlinear Programming, -// Luenberger and Ye, 3rd ed., p 370. -// - -#include "invert.hh" -#include <iostream> - -using namespace std; - -double -f(double x1, double x2, double x3, double x4) -{ - return x1 * x1 + x2 * x2 + x3 * x3 + x4 * x4 - 2 * x1 - 3 * x4; -} - -ublas::vector<double> -g(double x1, double x2, double x3, double x4) -{ - ublas::vector<double> v(4); - v(0) = 2 * x1 - 2; - v(1) = 2 * x2; - v(2) = 2 * x3; - v(3) = 2 * x4 - 3; - return v; -} - -ublas::matrix<double> -activeConstraints(double x1, double x2, double x3, double x4) -{ - int n = 2; - if (x1 == 0) ++n; - if (x2 == 0) ++n; - if (x3 == 0) ++n; - if (x4 == 0) ++n; - - ublas::matrix<double> a(n,4); - a(0, 0) = 2; a(0, 1) = 1; a(0, 2) = 1; a(0, 3) = 4; - a(1, 0) = 1; a(1, 1) = 1; a(1, 2) = 2; a(1, 3) = 1; - - int c = 2; - if (x1 == 0) a(c++, 0) = 1; - if (x2 == 0) a(c++, 1) = 1; - if (x3 == 0) a(c++, 2) = 1; - if (x4 == 0) a(c++, 3) = 1; - - return a; -} - -ublas::matrix<double> -projection(const ublas::matrix<double> &a) -{ - ublas::matrix<double> aT = ublas::trans(a); - ublas::matrix<double> inv(a.size1(), a.size1()); - bool ok = invert_matrix(ublas::matrix<double>(ublas::prod(a, aT)), inv); - assert(ok && "Failed to invert matrix"); - return ublas::identity_matrix<double>(4) - - ublas::prod(aT, ublas::matrix<double>(ublas::prod(inv, a))); -} - -int main(int argc, char *argv[]) -{ - double x1 = 2, x2 = 2, x3 = 1, x4 = 0; - - double fval = f(x1, x2, x3, x4); - cout << "f = " << fval << endl; - ublas::vector<double> grad = g(x1, x2, x3, x4); - cout << "g = " << grad << endl; - ublas::matrix<double> A = activeConstraints(x1, x2, x3, x4); - cout << "A = " << A << endl; - ublas::matrix<double> P = projection(A); - cout << "P = " << P << endl; - // the direction of movement - ublas::vector<double> d = prod(P, grad); - cout << "d = " << (d / d(0)) << endl; - - // special case for d = 0 - - // next solve for limits on the line search - - // then use golden rule technique between these values (if bounded) - - // or simple Armijo's rule technique - - return 0; -} diff --git a/gi/posterior-regularisation/simplex_pg.py b/gi/posterior-regularisation/simplex_pg.py deleted file mode 100644 index 5da796d3..00000000 --- a/gi/posterior-regularisation/simplex_pg.py +++ /dev/null @@ -1,55 +0,0 @@ -# -# Following Leunberger and Ye, Linear and Nonlinear Progamming, 3rd ed. p367 -# "The gradient projection method" -# applied to an equality constraint for a simplex. -# -# min f(x) -# s.t. x >= 0, sum_i x = d -# -# FIXME: enforce the positivity constraint - a limit on the line search? -# - -from numpy import * -from scipy import * -from linesearch import line_search -# local copy of scipy's Amijo line_search - wasn't enforcing alpha max correctly -import sys - -dims = 4 - -def f(x): - fv = x[0]*x[0] + x[1]*x[1] + x[2]*x[2] + x[3]*x[3] - 2*x[0] - 3*x[3] - # print 'evaluating f at', x, 'value', fv - return fv - -def g(x): - return array([2*x[0] - 2, 2*x[1], 2*x[2], 2*x[3]-3]) - -def pg(x): - gv = g(x) - return gv - sum(gv) / dims - -x = ones(dims) / dims -old_fval = None - -while True: - fv = f(x) - gv = g(x) - dv = pg(x) - - print 'x', x, 'f', fv, 'g', gv, 'd', dv - - if old_fval == None: - old_fval = fv + 0.1 - - # solve for maximum step size i.e. when positivity constraints kick in - # x - alpha d = 0 => alpha = x/d - amax = max(x/dv) - if amax < 1e-8: break - - stuff = line_search(f, pg, x, -dv, dv, fv, old_fval, amax=amax) - alpha = stuff[0] # Nb. can avoid next evaluation of f,g,d using 'stuff' - if alpha < 1e-8: break - x -= alpha * dv - - old_fval = fv diff --git a/gi/posterior-regularisation/split-languages.py b/gi/posterior-regularisation/split-languages.py deleted file mode 100755 index 206da661..00000000 --- a/gi/posterior-regularisation/split-languages.py +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/python - -import sys - -sout = open(sys.argv[1], 'w') -tout = open(sys.argv[2], 'w') -for line in sys.stdin: - phrase, contexts = line.rstrip().split('\t') - sp, tp = phrase.split(' <SPLIT> ') - sout.write('%s\t' % sp) - tout.write('%s\t' % tp) - parts = contexts.split(' ||| ') - for i in range(0, len(parts), 2): - sc, tc = parts[i].split(' <SPLIT> ') - if i != 0: - sout.write(' ||| ') - tout.write(' ||| ') - sout.write('%s ||| %s' % (sc, parts[i+1])) - tout.write('%s ||| %s' % (tc, parts[i+1])) - sout.write('\n') - tout.write('\n') -sout.close() -tout.close() diff --git a/gi/posterior-regularisation/train_pr_agree.py b/gi/posterior-regularisation/train_pr_agree.py deleted file mode 100644 index 9d41362d..00000000 --- a/gi/posterior-regularisation/train_pr_agree.py +++ /dev/null @@ -1,400 +0,0 @@ -import sys -import scipy.optimize -from scipy.stats import geom -from numpy import * -from numpy.random import random, seed - -style = sys.argv[1] -if len(sys.argv) >= 3: - seed(int(sys.argv[2])) - -# -# Step 1: load the concordance counts -# - -edges = [] -word_types = {} -phrase_types = {} -context_types = {} - -for line in sys.stdin: - phrase, rest = line.strip().split('\t') - ptoks = tuple(map(lambda t: word_types.setdefault(t, len(word_types)), phrase.split())) - pid = phrase_types.setdefault(ptoks, len(phrase_types)) - - parts = rest.split('|||') - for i in range(0, len(parts), 2): - context, count = parts[i:i+2] - - ctx = filter(lambda x: x != '<PHRASE>', context.split()) - ctoks = tuple(map(lambda t: word_types.setdefault(t, len(word_types)), ctx)) - cid = context_types.setdefault(ctoks, len(context_types)) - - cnt = int(count.strip()[2:]) - edges.append((pid, cid, cnt)) - -word_type_list = [None] * len(word_types) -for typ, index in word_types.items(): - word_type_list[index] = typ - -phrase_type_list = [None] * len(phrase_types) -for typ, index in phrase_types.items(): - phrase_type_list[index] = typ - -context_type_list = [None] * len(context_types) -for typ, index in context_types.items(): - context_type_list[index] = typ - -num_tags = 5 -num_types = len(word_types) -num_phrases = len(phrase_types) -num_contexts = len(context_types) -num_edges = len(edges) - -print 'Read in', num_edges, 'edges', num_phrases, 'phrases', num_contexts, 'contexts and', num_types, 'word types' - -# -# Step 2: expectation maximisation -# - -def normalise(a): - return a / float(sum(a)) - -class PhraseToContextModel: - def __init__(self): - # Pr(tag | phrase) - self.tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)] - # Pr(context at pos i = w | tag) indexed by i, tag, word - self.contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)] - - def prob(self, pid, cid): - # return distribution p(tag, context | phrase) as vector of length |tags| - context = context_type_list[cid] - dist = zeros(num_tags) - for t in range(num_tags): - prob = self.tagDist[pid][t] - for k, tokid in enumerate(context): - prob *= self.contextWordDist[k][t][tokid] - dist[t] = prob - return dist - - def expectation_maximisation_step(self, lamba=None): - tagCounts = zeros((num_phrases, num_tags)) - contextWordCounts = zeros((4, num_tags, num_types)) - - # E-step - llh = 0 - for pid, cid, cnt in edges: - q = self.prob(pid, cid) - z = sum(q) - q /= z - llh += log(z) - context = context_type_list[cid] - if lamba != None: - q *= exp(lamba) - q /= sum(q) - for t in range(num_tags): - tagCounts[pid][t] += cnt * q[t] - for i in range(4): - for t in range(num_tags): - contextWordCounts[i][t][context[i]] += cnt * q[t] - - # M-step - for p in range(num_phrases): - self.tagDist[p] = normalise(tagCounts[p]) - for i in range(4): - for t in range(num_tags): - self.contextWordDist[i][t] = normalise(contextWordCounts[i,t]) - - return llh - -class ContextToPhraseModel: - def __init__(self): - # Pr(tag | context) = Multinomial - self.tagDist = [normalise(random(num_tags)+1) for p in range(num_contexts)] - # Pr(phrase = w | tag) = Multinomial - self.phraseSingleDist = [normalise(random(num_types)+1) for t in range(num_tags)] - # Pr(phrase_1 = w | tag) = Multinomial - self.phraseLeftDist = [normalise(random(num_types)+1) for t in range(num_tags)] - # Pr(phrase_-1 = w | tag) = Multinomial - self.phraseRightDist = [normalise(random(num_types)+1) for t in range(num_tags)] - # Pr(|phrase| = l | tag) = Geometric - self.phraseLengthDist = [0.5] * num_tags - # n.b. internal words for phrases of length >= 3 are drawn from uniform distribution - - def prob(self, pid, cid): - # return distribution p(tag, phrase | context) as vector of length |tags| - phrase = phrase_type_list[pid] - dist = zeros(num_tags) - for t in range(num_tags): - prob = self.tagDist[cid][t] - f = self.phraseLengthDist[t] - prob *= geom.pmf(len(phrase), f) - if len(phrase) == 1: - prob *= self.phraseSingleDist[t][phrase[0]] - else: - prob *= self.phraseLeftDist[t][phrase[0]] - prob *= self.phraseRightDist[t][phrase[-1]] - dist[t] = prob - return dist - - def expectation_maximisation_step(self, lamba=None): - tagCounts = zeros((num_contexts, num_tags)) - phraseSingleCounts = zeros((num_tags, num_types)) - phraseLeftCounts = zeros((num_tags, num_types)) - phraseRightCounts = zeros((num_tags, num_types)) - phraseLength = zeros(num_types) - - # E-step - llh = 0 - for pid, cid, cnt in edges: - q = self.prob(pid, cid) - z = sum(q) - q /= z - llh += log(z) - if lamba != None: - q *= exp(lamba) - q /= sum(q) - #print 'p', phrase_type_list[pid], 'c', context_type_list[cid], 'q', q - phrase = phrase_type_list[pid] - for t in range(num_tags): - tagCounts[cid][t] += cnt * q[t] - phraseLength[t] += cnt * len(phrase) * q[t] - if len(phrase) == 1: - phraseSingleCounts[t][phrase[0]] += cnt * q[t] - else: - phraseLeftCounts[t][phrase[0]] += cnt * q[t] - phraseRightCounts[t][phrase[-1]] += cnt * q[t] - - # M-step - for t in range(num_tags): - self.phraseLengthDist[t] = min(max(sum(tagCounts[:,t]) / phraseLength[t], 1e-6), 1-1e-6) - self.phraseSingleDist[t] = normalise(phraseSingleCounts[t]) - self.phraseLeftDist[t] = normalise(phraseLeftCounts[t]) - self.phraseRightDist[t] = normalise(phraseRightCounts[t]) - for c in range(num_contexts): - self.tagDist[c] = normalise(tagCounts[c]) - - #print 't', self.tagDist - #print 'l', self.phraseLengthDist - #print 's', self.phraseSingleDist - #print 'L', self.phraseLeftDist - #print 'R', self.phraseRightDist - - return llh - -class ProductModel: - """ - WARNING: I haven't verified the maths behind this model. It's quite likely to be incorrect. - """ - - def __init__(self): - self.pcm = PhraseToContextModel() - self.cpm = ContextToPhraseModel() - - def prob(self, pid, cid): - p1 = self.pcm.prob(pid, cid) - p2 = self.cpm.prob(pid, cid) - return (p1 / sum(p1)) * (p2 / sum(p2)) - - def expectation_maximisation_step(self): - tagCountsGivenPhrase = zeros((num_phrases, num_tags)) - contextWordCounts = zeros((4, num_tags, num_types)) - - tagCountsGivenContext = zeros((num_contexts, num_tags)) - phraseSingleCounts = zeros((num_tags, num_types)) - phraseLeftCounts = zeros((num_tags, num_types)) - phraseRightCounts = zeros((num_tags, num_types)) - phraseLength = zeros(num_types) - - kl = llh1 = llh2 = 0 - for pid, cid, cnt in edges: - p1 = self.pcm.prob(pid, cid) - llh1 += log(sum(p1)) * cnt - p2 = self.cpm.prob(pid, cid) - llh2 += log(sum(p2)) * cnt - - q = (p1 / sum(p1)) * (p2 / sum(p2)) - kl += log(sum(q)) * cnt - qi = sqrt(q) - qi /= sum(qi) - - phrase = phrase_type_list[pid] - context = context_type_list[cid] - for t in range(num_tags): - tagCountsGivenPhrase[pid][t] += cnt * qi[t] - tagCountsGivenContext[cid][t] += cnt * qi[t] - phraseLength[t] += cnt * len(phrase) * qi[t] - if len(phrase) == 1: - phraseSingleCounts[t][phrase[0]] += cnt * qi[t] - else: - phraseLeftCounts[t][phrase[0]] += cnt * qi[t] - phraseRightCounts[t][phrase[-1]] += cnt * qi[t] - for i in range(4): - contextWordCounts[i][t][context[i]] += cnt * qi[t] - - kl *= -2 - - for t in range(num_tags): - for i in range(4): - self.pcm.contextWordDist[i][t] = normalise(contextWordCounts[i,t]) - self.cpm.phraseLengthDist[t] = min(max(sum(tagCountsGivenContext[:,t]) / phraseLength[t], 1e-6), 1-1e-6) - self.cpm.phraseSingleDist[t] = normalise(phraseSingleCounts[t]) - self.cpm.phraseLeftDist[t] = normalise(phraseLeftCounts[t]) - self.cpm.phraseRightDist[t] = normalise(phraseRightCounts[t]) - for p in range(num_phrases): - self.pcm.tagDist[p] = normalise(tagCountsGivenPhrase[p]) - for c in range(num_contexts): - self.cpm.tagDist[c] = normalise(tagCountsGivenContext[c]) - - # return the overall objective - return llh1 + llh2 + kl - -class RegularisedProductModel: - # as above, but with a slack regularisation term which kills the - # closed-form solution for the E-step - - def __init__(self, epsilon): - self.pcm = PhraseToContextModel() - self.cpm = ContextToPhraseModel() - self.epsilon = epsilon - self.lamba = zeros(num_tags) - - def prob(self, pid, cid): - p1 = self.pcm.prob(pid, cid) - p2 = self.cpm.prob(pid, cid) - return (p1 / sum(p1)) * (p2 / sum(p2)) - - def dual(self, lamba): - return self.logz(lamba) + self.epsilon * dot(lamba, lamba) ** 0.5 - - def dual_gradient(self, lamba): - return self.expected_features(lamba) + self.epsilon * 2 * lamba - - def expectation_maximisation_step(self): - # PR-step: optimise lambda to minimise log(z_lambda) + eps ||lambda||_2 - self.lamba = scipy.optimize.fmin_slsqp(self.dual, self.lamba, - bounds=[(0, 1e100)] * num_tags, - fprime=self.dual_gradient, iprint=1) - - # E,M-steps: collect expected counts under q_lambda and normalise - llh1 = self.pcm.expectation_maximisation_step(self.lamba) - llh2 = self.cpm.expectation_maximisation_step(-self.lamba) - - # return the overall objective: llh - KL(q||p1.p2) - # llh = llh1 + llh2 - # kl = sum q log q / p1 p2 = sum q { lambda . phi } - log Z - return llh1 + llh2 + self.logz(self.lamba) \ - - dot(self.lamba, self.expected_features(self.lamba)) - - def logz(self, lamba): - lz = 0 - for pid, cid, cnt in edges: - p1 = self.pcm.prob(pid, cid) - z1 = dot(p1 / sum(p1), exp(lamba)) - lz += log(z1) * cnt - - p2 = self.cpm.prob(pid, cid) - z2 = dot(p2 / sum(p2), exp(-lamba)) - lz += log(z2) * cnt - return lz - - def expected_features(self, lamba): - fs = zeros(num_tags) - for pid, cid, cnt in edges: - p1 = self.pcm.prob(pid, cid) - q1 = (p1 / sum(p1)) * exp(lamba) - fs += cnt * q1 / sum(q1) - - p2 = self.cpm.prob(pid, cid) - q2 = (p2 / sum(p2)) * exp(-lamba) - fs -= cnt * q2 / sum(q2) - return fs - - -class InterpolatedModel: - def __init__(self, epsilon): - self.pcm = PhraseToContextModel() - self.cpm = ContextToPhraseModel() - self.epsilon = epsilon - self.lamba = zeros(num_tags) - - def prob(self, pid, cid): - p1 = self.pcm.prob(pid, cid) - p2 = self.cpm.prob(pid, cid) - return (p1 + p2) / 2 - - def dual(self, lamba): - return self.logz(lamba) + self.epsilon * dot(lamba, lamba) ** 0.5 - - def dual_gradient(self, lamba): - return self.expected_features(lamba) + self.epsilon * 2 * lamba - - def expectation_maximisation_step(self): - # PR-step: optimise lambda to minimise log(z_lambda) + eps ||lambda||_2 - self.lamba = scipy.optimize.fmin_slsqp(self.dual, self.lamba, - bounds=[(0, 1e100)] * num_tags, - fprime=self.dual_gradient, iprint=2) - - # E,M-steps: collect expected counts under q_lambda and normalise - llh1 = self.pcm.expectation_maximisation_step(self.lamba) - llh2 = self.cpm.expectation_maximisation_step(self.lamba) - - # return the overall objective: llh1 + llh2 - KL(q||p1.p2) - # kl = sum_y q log q / 0.5 * (p1 + p2) = sum_y q(y) { -lambda . phi(y) } - log Z - # = -log Z + lambda . (E_q1[-phi] + E_q2[-phi]) / 2 - kl = -self.logz(self.lamba) + dot(self.lamba, self.expected_features(self.lamba)) - return llh1 + llh2 - kl, llh1, llh2, kl - # FIXME: KL comes out negative... - - def logz(self, lamba): - lz = 0 - for pid, cid, cnt in edges: - p1 = self.pcm.prob(pid, cid) - q1 = p1 / sum(p1) * exp(-lamba) - q1z = sum(q1) - - p2 = self.cpm.prob(pid, cid) - q2 = p2 / sum(p2) * exp(-lamba) - q2z = sum(q2) - - lz += log(0.5 * (q1z + q2z)) * cnt - return lz - - # z = 1/2 * (sum_y p1(y|x) exp (-lambda . phi(y)) + sum_y p2(y|x) exp (-lambda . phi(y))) - # = 1/2 (z1 + z2) - # d (log z) / dlambda = 1/2 (E_q1 [ -phi ] + E_q2 [ -phi ] ) - def expected_features(self, lamba): - fs = zeros(num_tags) - for pid, cid, cnt in edges: - p1 = self.pcm.prob(pid, cid) - q1 = (p1 / sum(p1)) * exp(-lamba) - fs -= 0.5 * cnt * q1 / sum(q1) - - p2 = self.cpm.prob(pid, cid) - q2 = (p2 / sum(p2)) * exp(-lamba) - fs -= 0.5 * cnt * q2 / sum(q2) - return fs - -if style == 'p2c': - m = PhraseToContextModel() -elif style == 'c2p': - m = ContextToPhraseModel() -elif style == 'prod': - m = ProductModel() -elif style == 'prodslack': - m = RegularisedProductModel(0.5) -elif style == 'sum': - m = InterpolatedModel(0.5) - -for iteration in range(30): - obj = m.expectation_maximisation_step() - print 'iteration', iteration, 'objective', obj - -for pid, cid, cnt in edges: - p = m.prob(pid, cid) - phrase = phrase_type_list[pid] - phrase_str = ' '.join(map(word_type_list.__getitem__, phrase)) - context = context_type_list[cid] - context_str = ' '.join(map(word_type_list.__getitem__, context)) - print '%s\t%s ||| C=%d' % (phrase_str, context_str, argmax(p)) diff --git a/gi/posterior-regularisation/train_pr_global.py b/gi/posterior-regularisation/train_pr_global.py deleted file mode 100644 index 8521bccb..00000000 --- a/gi/posterior-regularisation/train_pr_global.py +++ /dev/null @@ -1,296 +0,0 @@ -import sys -import scipy.optimize -from numpy import * -from numpy.random import random - -# -# Step 1: load the concordance counts -# - -edges_phrase_to_context = [] -edges_context_to_phrase = [] -types = {} -context_types = {} -num_edges = 0 - -for line in sys.stdin: - phrase, rest = line.strip().split('\t') - parts = rest.split('|||') - edges_phrase_to_context.append((phrase, [])) - for i in range(0, len(parts), 2): - context, count = parts[i:i+2] - - ctx = tuple(filter(lambda x: x != '<PHRASE>', context.split())) - cnt = int(count.strip()[2:]) - edges_phrase_to_context[-1][1].append((ctx, cnt)) - - cid = context_types.get(ctx, len(context_types)) - if cid == len(context_types): - context_types[ctx] = cid - edges_context_to_phrase.append((ctx, [])) - edges_context_to_phrase[cid][1].append((phrase, cnt)) - - for token in ctx: - types.setdefault(token, len(types)) - for token in phrase.split(): - types.setdefault(token, len(types)) - - num_edges += 1 - -print 'Read in', num_edges, 'edges and', len(types), 'word types' - -print 'edges_phrase_to_context', edges_phrase_to_context - -# -# Step 2: initialise the model parameters -# - -num_tags = 10 -num_types = len(types) -num_phrases = len(edges_phrase_to_context) -num_contexts = len(edges_context_to_phrase) -delta = int(sys.argv[1]) -gamma = int(sys.argv[2]) - -def normalise(a): - return a / float(sum(a)) - -# Pr(tag | phrase) -tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)] -#tagDist = [normalise(array(range(1,num_tags+1))) for p in range(num_phrases)] -# Pr(context at pos i = w | tag) indexed by i, tag, word -#contextWordDist = [[normalise(array(range(1,num_types+1))) for t in range(num_tags)] for i in range(4)] -contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)] -# PR langrange multipliers -lamba = zeros(2 * num_edges * num_tags) -omega_offset = num_edges * num_tags -lamba_index = {} -next = 0 -for phrase, ccs in edges_phrase_to_context: - for context, count in ccs: - lamba_index[phrase,context] = next - next += num_tags -#print lamba_index - -# -# Step 3: expectation maximisation -# - -for iteration in range(20): - tagCounts = [zeros(num_tags) for p in range(num_phrases)] - contextWordCounts = [[zeros(num_types) for t in range(num_tags)] for i in range(4)] - - #print 'tagDist', tagDist - #print 'contextWordCounts[0][0]', contextWordCounts[0][0] - - # Tune lambda - # dual: min log Z(lamba) s.t. lamba >= 0; - # sum_c lamba_pct <= delta; sum_p lamba_pct <= gamma - def dual(ls): - logz = 0 - for p, (phrase, ccs) in enumerate(edges_phrase_to_context): - for context, count in ccs: - conditionals = zeros(num_tags) - for t in range(num_tags): - prob = tagDist[p][t] - for i in range(4): - prob *= contextWordDist[i][t][types[context[i]]] - conditionals[t] = prob - cz = sum(conditionals) - conditionals /= cz - - #print 'dual', phrase, context, count, 'p =', conditionals - - local_z = 0 - for t in range(num_tags): - li = lamba_index[phrase,context] + t - local_z += conditionals[t] * exp(-ls[li] - ls[omega_offset+li]) - logz += log(local_z) * count - - #print 'ls', ls - #print 'lambda', list(ls) - #print 'dual', logz - return logz - - def loglikelihood(): - llh = 0 - for p, (phrase, ccs) in enumerate(edges_phrase_to_context): - for context, count in ccs: - conditionals = zeros(num_tags) - for t in range(num_tags): - prob = tagDist[p][t] - for i in range(4): - prob *= contextWordDist[i][t][types[context[i]]] - conditionals[t] = prob - cz = sum(conditionals) - llh += log(cz) * count - return llh - - def primal(ls): - # FIXME: returns negative values for KL (impossible) - logz = dual(ls) - expectations = -dual_deriv(ls) - kl = -logz - dot(ls, expectations) - llh = loglikelihood() - - pt_l1linf = 0 - for phrase, ccs in edges_phrase_to_context: - for t in range(num_tags): - best = -1e500 - for context, count in ccs: - li = lamba_index[phrase,context] + t - s = expectations[li] - if s > best: best = s - pt_l1linf += best - - ct_l1linf = 0 - for context, pcs in edges_context_to_phrase: - for t in range(num_tags): - best = -1e500 - for phrase, count in pcs: - li = omega_offset + lamba_index[phrase,context] + t - s = expectations[li] - if s > best: best = s - ct_l1linf += best - - return llh, kl, pt_l1linf, ct_l1linf, llh - kl - delta * pt_l1linf - gamma * ct_l1linf - - def dual_deriv(ls): - # d/dl log(z) = E_q[phi] - deriv = zeros(2 * num_edges * num_tags) - for p, (phrase, ccs) in enumerate(edges_phrase_to_context): - for context, count in ccs: - conditionals = zeros(num_tags) - for t in range(num_tags): - prob = tagDist[p][t] - for i in range(4): - prob *= contextWordDist[i][t][types[context[i]]] - conditionals[t] = prob - cz = sum(conditionals) - conditionals /= cz - - scores = zeros(num_tags) - for t in range(num_tags): - li = lamba_index[phrase,context] + t - scores[t] = conditionals[t] * exp(-ls[li] - ls[omega_offset + li]) - local_z = sum(scores) - - #print 'ddual', phrase, context, count, 'q =', scores / local_z - - for t in range(num_tags): - deriv[lamba_index[phrase,context] + t] -= count * scores[t] / local_z - deriv[omega_offset + lamba_index[phrase,context] + t] -= count * scores[t] / local_z - - #print 'ddual', list(deriv) - return deriv - - def constraints(ls): - cons = zeros(num_phrases * num_tags + num_edges * num_tags) - - index = 0 - for phrase, ccs in edges_phrase_to_context: - for t in range(num_tags): - if delta > 0: - total = delta - for cprime, count in ccs: - total -= ls[lamba_index[phrase, cprime] + t] - cons[index] = total - index += 1 - - for context, pcs in edges_context_to_phrase: - for t in range(num_tags): - if gamma > 0: - total = gamma - for pprime, count in pcs: - total -= ls[omega_offset + lamba_index[pprime, context] + t] - cons[index] = total - index += 1 - - #print 'cons', cons - return cons - - def constraints_deriv(ls): - cons = zeros((num_phrases * num_tags + num_edges * num_tags, 2 * num_edges * num_tags)) - - index = 0 - for phrase, ccs in edges_phrase_to_context: - for t in range(num_tags): - if delta > 0: - d = cons[index,:]#zeros(num_edges * num_tags) - for cprime, count in ccs: - d[lamba_index[phrase, cprime] + t] = -1 - #cons[index] = d - index += 1 - - for context, pcs in edges_context_to_phrase: - for t in range(num_tags): - if gamma > 0: - d = cons[index,:]#d = zeros(num_edges * num_tags) - for pprime, count in pcs: - d[omega_offset + lamba_index[pprime, context] + t] = -1 - #cons[index] = d - index += 1 - #print 'dcons', cons - return cons - - print 'Pre lambda optimisation dual', dual(lamba), 'primal', primal(lamba) - #print 'lambda', lamba, lamba.shape - #print 'bounds', [(0, max(delta, gamma))] * (2 * num_edges * num_tags) - - lamba = scipy.optimize.fmin_slsqp(dual, lamba, - bounds=[(0, max(delta, gamma))] * (2 * num_edges * num_tags), - f_ieqcons=constraints, - fprime=dual_deriv, - fprime_ieqcons=constraints_deriv, - iprint=0) - print 'Post lambda optimisation dual', dual(lamba), 'primal', primal(lamba) - - # E-step - llh = log_z = 0 - for p, (phrase, ccs) in enumerate(edges_phrase_to_context): - for context, count in ccs: - conditionals = zeros(num_tags) - for t in range(num_tags): - prob = tagDist[p][t] - for i in range(4): - prob *= contextWordDist[i][t][types[context[i]]] - conditionals[t] = prob - cz = sum(conditionals) - conditionals /= cz - llh += log(cz) * count - - q = zeros(num_tags) - li = lamba_index[phrase, context] - for t in range(num_tags): - q[t] = conditionals[t] * exp(-lamba[li + t] - lamba[omega_offset + li + t]) - qz = sum(q) - log_z += count * log(qz) - - for t in range(num_tags): - tagCounts[p][t] += count * q[t] / qz - - for i in range(4): - for t in range(num_tags): - contextWordCounts[i][t][types[context[i]]] += count * q[t] / qz - - print 'iteration', iteration, 'llh', llh, 'logz', log_z - - # M-step - for p in range(num_phrases): - tagDist[p] = normalise(tagCounts[p]) - for i in range(4): - for t in range(num_tags): - contextWordDist[i][t] = normalise(contextWordCounts[i][t]) - -for p, (phrase, ccs) in enumerate(edges_phrase_to_context): - for context, count in ccs: - conditionals = zeros(num_tags) - for t in range(num_tags): - prob = tagDist[p][t] - for i in range(4): - prob *= contextWordDist[i][t][types[context[i]]] - conditionals[t] = prob - cz = sum(conditionals) - conditionals /= cz - - print '%s\t%s ||| C=%d |||' % (phrase, context, argmax(conditionals)), conditionals diff --git a/gi/posterior-regularisation/train_pr_parallel.py b/gi/posterior-regularisation/train_pr_parallel.py deleted file mode 100644 index 3b9cefed..00000000 --- a/gi/posterior-regularisation/train_pr_parallel.py +++ /dev/null @@ -1,333 +0,0 @@ -import sys -import scipy.optimize -from numpy import * -from numpy.random import random, seed - -# -# Step 1: load the concordance counts -# - -edges_phrase_to_context = [] -edges_context_to_phrase = [] -types = {} -context_types = {} -num_edges = 0 - -for line in sys.stdin: - phrase, rest = line.strip().split('\t') - parts = rest.split('|||') - edges_phrase_to_context.append((phrase, [])) - for i in range(0, len(parts), 2): - context, count = parts[i:i+2] - - ctx = tuple(filter(lambda x: x != '<PHRASE>', context.split())) - cnt = int(count.strip()[2:]) - edges_phrase_to_context[-1][1].append((ctx, cnt)) - - cid = context_types.get(ctx, len(context_types)) - if cid == len(context_types): - context_types[ctx] = cid - edges_context_to_phrase.append((ctx, [])) - edges_context_to_phrase[cid][1].append((phrase, cnt)) - - for token in ctx: - types.setdefault(token, len(types)) - for token in phrase.split(): - types.setdefault(token, len(types)) - - num_edges += 1 - -# -# Step 2: initialise the model parameters -# - -num_tags = 25 -num_types = len(types) -num_phrases = len(edges_phrase_to_context) -num_contexts = len(edges_context_to_phrase) -delta = float(sys.argv[1]) -assert sys.argv[2] in ('local', 'global') -local = sys.argv[2] == 'local' -if len(sys.argv) >= 2: - seed(int(sys.argv[3])) - -print 'Read in', num_edges, 'edges', num_phrases, 'phrases', num_contexts, 'contexts and', len(types), 'word types' - -def normalise(a): - return a / float(sum(a)) - -# Pr(tag | phrase) -tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)] -# Pr(context at pos i = w | tag) indexed by i, tag, word -contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)] - -# -# Step 3: expectation maximisation -# - -class GlobalDualObjective: - """ - Objective, log(z), for all phrases s.t. lambda >= 0, sum_c lambda_pct <= scale - """ - - def __init__(self, scale): - self.scale = scale - self.posterior = zeros((num_edges, num_tags)) - self.q = zeros((num_edges, num_tags)) - self.llh = 0 - - index = 0 - for j, (phrase, edges) in enumerate(edges_phrase_to_context): - for context, count in edges: - for t in range(num_tags): - prob = tagDist[j][t] - for k, token in enumerate(context): - prob *= contextWordDist[k][t][types[token]] - self.posterior[index,t] = prob - z = sum(self.posterior[index,:]) - self.posterior[index,:] /= z - self.llh += log(z) * count - index += 1 - - def objective(self, ls): - ls = ls.reshape((num_edges, num_tags)) - logz = 0 - - index = 0 - for j, (phrase, edges) in enumerate(edges_phrase_to_context): - for context, count in edges: - for t in range(num_tags): - self.q[index,t] = self.posterior[index,t] * exp(-ls[index,t]) - local_z = sum(self.q[index,:]) - self.q[index,:] /= local_z - logz += log(local_z) * count - index += 1 - - return logz - - # FIXME: recomputes q many more times than necessary - - def gradient(self, ls): - ls = ls.reshape((num_edges, num_tags)) - gradient = zeros((num_edges, num_tags)) - - index = 0 - for j, (phrase, edges) in enumerate(edges_phrase_to_context): - for context, count in edges: - for t in range(num_tags): - self.q[index,t] = self.posterior[index,t] * exp(-ls[index,t]) - local_z = sum(self.q[index,:]) - self.q[index,:] /= local_z - for t in range(num_tags): - gradient[index,t] -= self.q[index,t] * count - index += 1 - - return gradient.ravel() - - def constraints(self, ls): - ls = ls.reshape((num_edges, num_tags)) - cons = ones((num_phrases, num_tags)) * self.scale - index = 0 - for j, (phrase, edges) in enumerate(edges_phrase_to_context): - for i, (context, count) in enumerate(edges): - for t in range(num_tags): - cons[j,t] -= ls[index,t] * count - index += 1 - return cons.ravel() - - def constraints_gradient(self, ls): - ls = ls.reshape((num_edges, num_tags)) - gradient = zeros((num_phrases, num_tags, num_edges, num_tags)) - index = 0 - for j, (phrase, edges) in enumerate(edges_phrase_to_context): - for i, (context, count) in enumerate(edges): - for t in range(num_tags): - gradient[j,t,index,t] -= count - index += 1 - return gradient.reshape((num_phrases*num_tags, num_edges*num_tags)) - - def optimize(self): - ls = zeros(num_edges * num_tags) - #print '\tpre lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba) - ls = scipy.optimize.fmin_slsqp(self.objective, ls, - bounds=[(0, self.scale)] * num_edges * num_tags, - f_ieqcons=self.constraints, - fprime=self.gradient, - fprime_ieqcons=self.constraints_gradient, - iprint=0) # =2 for verbose - #print '\tpost lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba) - - # returns llh, kl and l1lmax contribution - l1lmax = 0 - index = 0 - for j, (phrase, edges) in enumerate(edges_phrase_to_context): - for t in range(num_tags): - lmax = None - for i, (context, count) in enumerate(edges): - lmax = max(lmax, self.q[index+i,t]) - l1lmax += lmax - index += len(edges) - - return self.llh, -self.objective(ls) + dot(ls, self.gradient(ls)), l1lmax - -class LocalDualObjective: - """ - Local part of objective, log(z) relevant to lambda_p**. - Optimised subject to lambda >= 0, sum_c lambda_pct <= scale forall t - """ - - def __init__(self, phraseId, scale): - self.phraseId = phraseId - self.scale = scale - edges = edges_phrase_to_context[self.phraseId][1] - self.posterior = zeros((len(edges), num_tags)) - self.q = zeros((len(edges), num_tags)) - self.llh = 0 - - for i, (context, count) in enumerate(edges): - for t in range(num_tags): - prob = tagDist[phraseId][t] - for j, token in enumerate(context): - prob *= contextWordDist[j][t][types[token]] - self.posterior[i,t] = prob - z = sum(self.posterior[i,:]) - self.posterior[i,:] /= z - self.llh += log(z) * count - - def objective(self, ls): - edges = edges_phrase_to_context[self.phraseId][1] - ls = ls.reshape((len(edges), num_tags)) - logz = 0 - - for i, (context, count) in enumerate(edges): - for t in range(num_tags): - self.q[i,t] = self.posterior[i,t] * exp(-ls[i,t]) - local_z = sum(self.q[i,:]) - self.q[i,:] /= local_z - logz += log(local_z) * count - - return logz - - # FIXME: recomputes q many more times than necessary - - def gradient(self, ls): - edges = edges_phrase_to_context[self.phraseId][1] - ls = ls.reshape((len(edges), num_tags)) - gradient = zeros((len(edges), num_tags)) - - for i, (context, count) in enumerate(edges): - for t in range(num_tags): - self.q[i,t] = self.posterior[i,t] * exp(-ls[i,t]) - local_z = sum(self.q[i,:]) - self.q[i,:] /= local_z - for t in range(num_tags): - gradient[i,t] -= self.q[i,t] * count - - return gradient.ravel() - - def constraints(self, ls): - edges = edges_phrase_to_context[self.phraseId][1] - ls = ls.reshape((len(edges), num_tags)) - cons = ones(num_tags) * self.scale - for t in range(num_tags): - for i, (context, count) in enumerate(edges): - cons[t] -= ls[i,t] * count - return cons - - def constraints_gradient(self, ls): - edges = edges_phrase_to_context[self.phraseId][1] - ls = ls.reshape((len(edges), num_tags)) - gradient = zeros((num_tags, len(edges), num_tags)) - for t in range(num_tags): - for i, (context, count) in enumerate(edges): - gradient[t,i,t] -= count - return gradient.reshape((num_tags, len(edges)*num_tags)) - - def optimize(self, ls=None): - edges = edges_phrase_to_context[self.phraseId][1] - if ls == None: - ls = zeros(len(edges) * num_tags) - #print '\tpre lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba) - ls = scipy.optimize.fmin_slsqp(self.objective, ls, - bounds=[(0, self.scale)] * len(edges) * num_tags, - f_ieqcons=self.constraints, - fprime=self.gradient, - fprime_ieqcons=self.constraints_gradient, - iprint=0) # =2 for verbose - #print '\tlambda', list(ls) - #print '\tpost lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba) - - # returns llh, kl and l1lmax contribution - l1lmax = 0 - for t in range(num_tags): - lmax = None - for i, (context, count) in enumerate(edges): - lmax = max(lmax, self.q[i,t]) - l1lmax += lmax - - return self.llh, -self.objective(ls) + dot(ls, self.gradient(ls)), l1lmax, ls - -ls = [None] * num_phrases -for iteration in range(20): - tagCounts = [zeros(num_tags) for p in range(num_phrases)] - contextWordCounts = [[zeros(num_types) for t in range(num_tags)] for i in range(4)] - - # E-step - llh = kl = l1lmax = 0 - if local: - for p in range(num_phrases): - o = LocalDualObjective(p, delta) - #print '\toptimising lambda for phrase', p, '=', edges_phrase_to_context[p][0] - #print '\toptimising lambda for phrase', p, 'ls', ls[p] - obj = o.optimize(ls[p]) - #print '\tphrase', p, 'deltas', obj - llh += obj[0] - kl += obj[1] - l1lmax += obj[2] - ls[p] = obj[3] - - edges = edges_phrase_to_context[p][1] - for j, (context, count) in enumerate(edges): - for t in range(num_tags): - tagCounts[p][t] += count * o.q[j,t] - for i in range(4): - for t in range(num_tags): - contextWordCounts[i][t][types[context[i]]] += count * o.q[j,t] - - #print 'iteration', iteration, 'LOCAL objective', (llh + kl + delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax - else: - o = GlobalDualObjective(delta) - obj = o.optimize() - llh, kl, l1lmax = o.optimize() - - index = 0 - for p, (phrase, edges) in enumerate(edges_phrase_to_context): - for context, count in edges: - for t in range(num_tags): - tagCounts[p][t] += count * o.q[index,t] - for i in range(4): - for t in range(num_tags): - contextWordCounts[i][t][types[context[i]]] += count * o.q[index,t] - index += 1 - - print 'iteration', iteration, 'objective', (llh - kl - delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax - - # M-step - for p in range(num_phrases): - tagDist[p] = normalise(tagCounts[p]) - for i in range(4): - for t in range(num_tags): - contextWordDist[i][t] = normalise(contextWordCounts[i][t]) - -for p, (phrase, ccs) in enumerate(edges_phrase_to_context): - for context, count in ccs: - conditionals = zeros(num_tags) - for t in range(num_tags): - prob = tagDist[p][t] - for i in range(4): - prob *= contextWordDist[i][t][types[context[i]]] - conditionals[t] = prob - cz = sum(conditionals) - conditionals /= cz - - print '%s\t%s ||| C=%d |||' % (phrase, context, argmax(conditionals)), conditionals |