From e26434979adc33bd949566ba7bf02dff64e80a3e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:19:43 -0400 Subject: cdec cleanup, remove bayesian stuff, parsing stuff --- gi/posterior-regularisation/prjava/src/arr/F.java | 99 ---- .../prjava/src/data/Corpus.java | 233 --------- .../prjava/src/hmm/HMM.java | 579 --------------------- .../prjava/src/hmm/HMMObjective.java | 351 ------------- .../prjava/src/hmm/POS.java | 120 ----- .../prjava/src/io/FileUtil.java | 48 -- .../prjava/src/io/SerializedObjects.java | 83 --- .../examples/GeneralizedRosenbrock.java | 110 ---- .../prjava/src/optimization/examples/x2y2.java | 128 ----- .../optimization/examples/x2y2WithConstraints.java | 127 ----- .../AbstractGradientBaseMethod.java | 120 ----- .../gradientBasedMethods/ConjugateGradient.java | 92 ---- .../gradientBasedMethods/DebugHelpers.java | 65 --- .../gradientBasedMethods/GradientDescent.java | 19 - .../optimization/gradientBasedMethods/LBFGS.java | 234 --------- .../gradientBasedMethods/Objective.java | 87 ---- .../gradientBasedMethods/Optimizer.java | 19 - .../ProjectedAbstractGradientBaseMethod.java | 11 - .../ProjectedGradientDescent.java | 154 ------ .../gradientBasedMethods/ProjectedObjective.java | 29 -- .../gradientBasedMethods/ProjectedOptimizer.java | 10 - .../gradientBasedMethods/stats/OptimizerStats.java | 86 --- .../stats/ProjectedOptimizerStats.java | 70 --- .../linesearch/ArmijoLineSearchMinimization.java | 102 ---- ...joLineSearchMinimizationAlongProjectionArc.java | 141 ----- .../DifferentiableLineSearchObjective.java | 185 ------- .../linesearch/GenericPickFirstStep.java | 20 - .../linesearch/InterpolationPickFirstStep.java | 25 - .../optimization/linesearch/LineSearchMethod.java | 14 - .../NonNewtonInterpolationPickFirstStep.java | 33 -- ...ProjectedDifferentiableLineSearchObjective.java | 137 ----- .../linesearch/WolfRuleLineSearch.java | 300 ----------- .../optimization/linesearch/WolfeConditions.java | 45 -- .../optimization/projections/BoundsProjection.java | 104 ---- .../src/optimization/projections/Projection.java | 72 --- .../projections/SimplexProjection.java | 127 ----- .../stopCriteria/CompositeStopingCriteria.java | 33 -- .../optimization/stopCriteria/GradientL2Norm.java | 30 -- .../stopCriteria/NormalizedGradientL2Norm.java | 48 -- .../NormalizedProjectedGradientL2Norm.java | 60 --- .../stopCriteria/NormalizedValueDifference.java | 54 -- .../stopCriteria/ProjectedGradientL2Norm.java | 51 -- .../optimization/stopCriteria/StopingCriteria.java | 8 - .../optimization/stopCriteria/ValueDifference.java | 41 -- .../src/optimization/util/Interpolation.java | 37 -- .../prjava/src/optimization/util/Logger.java | 7 - .../prjava/src/optimization/util/MathUtils.java | 339 ------------ .../prjava/src/optimization/util/MatrixOutput.java | 28 - .../prjava/src/optimization/util/StaticTools.java | 180 ------- .../prjava/src/phrase/Agree.java | 204 -------- .../prjava/src/phrase/Agree2Sides.java | 197 ------- .../prjava/src/phrase/C2F.java | 216 -------- .../prjava/src/phrase/Corpus.java | 288 ---------- .../prjava/src/phrase/Lexicon.java | 34 -- .../prjava/src/phrase/PhraseCluster.java | 540 ------------------- .../prjava/src/phrase/PhraseContextObjective.java | 436 ---------------- .../prjava/src/phrase/PhraseCorpus.java | 193 ------- .../prjava/src/phrase/PhraseObjective.java | 224 -------- .../prjava/src/phrase/Trainer.java | 257 --------- .../prjava/src/phrase/VB.java | 419 --------------- .../prjava/src/test/CorpusTest.java | 60 --- .../prjava/src/test/HMMModelStats.java | 105 ---- .../prjava/src/test/IntDoublePair.java | 23 - .../prjava/src/test/X2y2WithConstraints.java | 131 ----- .../prjava/src/util/Array.java | 41 -- .../prjava/src/util/ArrayMath.java | 186 ------- .../prjava/src/util/DifferentiableObjective.java | 14 - .../prjava/src/util/DigammaFunction.java | 21 - .../prjava/src/util/FileSystem.java | 21 - .../prjava/src/util/InputOutput.java | 67 --- .../prjava/src/util/LogSummer.java | 86 --- .../prjava/src/util/MathUtil.java | 148 ------ .../prjava/src/util/Matrix.java | 16 - .../prjava/src/util/MemoryTracker.java | 47 -- .../prjava/src/util/Pair.java | 31 -- .../prjava/src/util/Printing.java | 158 ------ .../prjava/src/util/Sorters.java | 39 -- 77 files changed, 9297 deletions(-) delete mode 100644 gi/posterior-regularisation/prjava/src/arr/F.java delete mode 100644 gi/posterior-regularisation/prjava/src/data/Corpus.java delete mode 100644 gi/posterior-regularisation/prjava/src/hmm/HMM.java delete mode 100644 gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/hmm/POS.java delete mode 100644 gi/posterior-regularisation/prjava/src/io/FileUtil.java delete mode 100644 gi/posterior-regularisation/prjava/src/io/SerializedObjects.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/util/Logger.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/Agree.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/C2F.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/Corpus.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/Lexicon.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/Trainer.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/VB.java delete mode 100644 gi/posterior-regularisation/prjava/src/test/CorpusTest.java delete mode 100644 gi/posterior-regularisation/prjava/src/test/HMMModelStats.java delete mode 100644 gi/posterior-regularisation/prjava/src/test/IntDoublePair.java delete mode 100644 gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/Array.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/ArrayMath.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/DigammaFunction.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/FileSystem.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/InputOutput.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/LogSummer.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/MathUtil.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/Matrix.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/MemoryTracker.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/Pair.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/Printing.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/Sorters.java (limited to 'gi/posterior-regularisation/prjava/src') diff --git a/gi/posterior-regularisation/prjava/src/arr/F.java b/gi/posterior-regularisation/prjava/src/arr/F.java deleted file mode 100644 index be0a6ed6..00000000 --- a/gi/posterior-regularisation/prjava/src/arr/F.java +++ /dev/null @@ -1,99 +0,0 @@ -package arr; - -import java.util.Arrays; -import java.util.Random; - -public class F { - public static Random rng = new Random(); - - public static void randomise(double probs[]) - { - randomise(probs, true); - } - - public static void randomise(double probs[], boolean normalise) - { - double z = 0; - for (int i = 0; i < probs.length; ++i) - { - probs[i] = 10 + rng.nextDouble(); - if (normalise) - z += probs[i]; - } - - if (normalise) - for (int i = 0; i < probs.length; ++i) - probs[i] /= z; - } - - public static void uniform(double probs[]) - { - for (int i = 0; i < probs.length; ++i) - probs[i] = 1.0 / probs.length; - } - - public static void l1normalize(double [] a){ - double sum=0; - for(int i=0;i m) - { - m = probs[i]; - mi = i; - } - } - return mi; - } - -} diff --git a/gi/posterior-regularisation/prjava/src/data/Corpus.java b/gi/posterior-regularisation/prjava/src/data/Corpus.java deleted file mode 100644 index 425ede11..00000000 --- a/gi/posterior-regularisation/prjava/src/data/Corpus.java +++ /dev/null @@ -1,233 +0,0 @@ -package data; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Scanner; - -public class Corpus { - - public static final String alphaFilename="../posdata/corpus.alphabet"; - public static final String tagalphaFilename="../posdata/corpus.tag.alphabet"; - -// public static final String START_SYM=""; - public static final String END_SYM=""; - public static final String NUM_TOK=""; - - public static final String UNK_TOK=""; - - private ArrayListsent; - private ArrayListdata; - - public ArrayListtag; - public ArrayListtagData; - - public static boolean convertNumTok=true; - - private HashMapfreq; - public HashMapvocab; - - public HashMaptagVocab; - private int tagV; - - private int V; - - public static void main(String[] args) { - Corpus c=new Corpus("../posdata/en_test.conll"); - System.out.println( - Arrays.toString(c.get(0)) - ); - System.out.println( - Arrays.toString(c.getInt(0)) - ); - - System.out.println( - Arrays.toString(c.get(1)) - ); - System.out.println( - Arrays.toString(c.getInt(1)) - ); - } - - public Corpus(String filename,HashMapdict){ - V=0; - tagV=0; - freq=new HashMap(); - tagVocab=new HashMap(); - vocab=dict; - - sent=new ArrayList(); - tag=new ArrayList(); - - Scanner sc=io.FileUtil.openInFile(filename); - ArrayLists=new ArrayList(); - // s.add(START_SYM); - while(sc.hasNextLine()){ - String line=sc.nextLine(); - String toks[]=line.split("\t"); - if(toks.length<2){ - s.add(END_SYM); - sent.add(s.toArray(new String[0])); - s=new ArrayList(); - // s.add(START_SYM); - continue; - } - String tok=toks[1].toLowerCase(); - s.add(tok); - } - sc.close(); - - buildData(); - } - - public Corpus(String filename){ - V=0; - freq=new HashMap(); - vocab=new HashMap(); - tagVocab=new HashMap(); - - sent=new ArrayList(); - tag=new ArrayList(); - - System.out.println("Reading:"+filename); - - Scanner sc=io.FileUtil.openInFile(filename); - ArrayLists=new ArrayList(); - ArrayListtags=new ArrayList(); - //s.add(START_SYM); - while(sc.hasNextLine()){ - String line=sc.nextLine(); - String toks[]=line.split("\t"); - if(toks.length<2){ - s.add(END_SYM); - tags.add(END_SYM); - if(s.size()>2){ - sent.add(s.toArray(new String[0])); - tag.add(tags.toArray(new String [0])); - } - s=new ArrayList(); - tags=new ArrayList(); - // s.add(START_SYM); - continue; - } - - String tok=toks[1].toLowerCase(); - if(convertNumTok && tok.matches(".*\\d.*")){ - tok=NUM_TOK; - } - s.add(tok); - - if(toks.length>3){ - tok=toks[3].toLowerCase(); - }else{ - tok="_"; - } - tags.add(tok); - - } - sc.close(); - - for(int i=0;i(); - for(int i=0;i(); - for(int i=0;i2){ - vocab.put(key, V); - V++; - } - } - io.SerializedObjects.writeSerializedObject(vocab, alphaFilename); - io.SerializedObjects.writeSerializedObject(tagVocab,tagalphaFilename); - } - - private void addTag(String tag){ - Integer i=tagVocab.get(tag); - if(i==null){ - tagVocab.put(tag, tagV); - tagV++; - } - } - -} diff --git a/gi/posterior-regularisation/prjava/src/hmm/HMM.java b/gi/posterior-regularisation/prjava/src/hmm/HMM.java deleted file mode 100644 index 17a4679f..00000000 --- a/gi/posterior-regularisation/prjava/src/hmm/HMM.java +++ /dev/null @@ -1,579 +0,0 @@ -package hmm; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Scanner; - -public class HMM { - - - //trans[i][j]=prob of going FROM i to j - double [][]trans; - double [][]emit; - double []pi; - int [][]data; - int [][]tagdata; - - double logtrans[][]; - - public HMMObjective o; - - public static void main(String[] args) { - - } - - public HMM(int n_state,int n_emit,int [][]data){ - trans=new double [n_state][n_state]; - emit=new double[n_state][n_emit]; - pi=new double [n_state]; - System.out.println(" random initial parameters"); - fillRand(trans); - fillRand(emit); - fillRand(pi); - - this.data=data; - - } - - private void fillRand(double [][] a){ - for(int i=0;i=0;n--){ - for(int i=0;imaxprob){ - maxprob=p[seq.length-1][i]; - maxIdx=i; - } - } - int ans[]=new int [seq.length]; - ans[seq.length-1]=maxIdx; - for(int i=seq.length-2;i>=0;i--){ - ans[i]=backp[i+1][ans[i+1]]; - } - return ans; - } - - public double l1norm(double a[]){ - double norm=0; - for(int i=0;i s=new ArrayList(); - int state=sample(pi); - int sym=sample(emit[state]); - while(sym!=terminalSym){ - s.add(sym); - state=sample(trans[state]); - sym=sample(emit[state]); - } - - int ans[]=new int [s.size()]; - for(int i=0;i=r){ - return i; - } - } - return p.length-1; - } - - public void train(int tagdata[][]){ - double trans_exp_cnt[][]=new double [trans.length][trans.length]; - double emit_exp_cnt[][]=new double[trans.length][emit[0].length]; - double start_exp_cnt[]=new double[trans.length]; - - for(int i=0;imaxwt[i][d[sentNum][n]]){ - maxwt[i][d[sentNum][n]]=py; - } - - } - } - - //the last state - int len=post.length; - for(int i=0;imaxwt[i][d[sentNum][len-1]]){ - maxwt[i][d[sentNum][len-1]]=py; - } - - } - - } - - } - -}//end of class diff --git a/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java b/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java deleted file mode 100644 index 70b6c966..00000000 --- a/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java +++ /dev/null @@ -1,351 +0,0 @@ -package hmm; - -import gnu.trove.TIntArrayList; -import optimization.gradientBasedMethods.ProjectedGradientDescent; -import optimization.gradientBasedMethods.ProjectedObjective; -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc; -import optimization.linesearch.InterpolationPickFirstStep; -import optimization.linesearch.LineSearchMethod; -import optimization.projections.SimplexProjection; -import optimization.stopCriteria.CompositeStopingCriteria; -import optimization.stopCriteria.ProjectedGradientL2Norm; -import optimization.stopCriteria.StopingCriteria; -import optimization.stopCriteria.ValueDifference; - -public class HMMObjective extends ProjectedObjective{ - - - private static final double GRAD_DIFF = 3; - public static double INIT_STEP_SIZE=10; - public static double VAL_DIFF=1000; - - private HMM hmm; - double[] newPoint ; - - //posterior[sent num][tok num][tag]=index into lambda - private int posteriorMap[][][]; - //projection[word][tag].get(occurence)=index into lambda - private TIntArrayList projectionMap[][]; - - //Size of the simplex - public double scale=10; - private SimplexProjection projection; - - private int wordFreq[]; - private static int MIN_FREQ=10; - private int numWordsToProject=0; - - private int n_param; - - public double loglikelihood; - - public HMMObjective(HMM h){ - hmm=h; - - countWords(); - buildMap(); - - gradient=new double [n_param]; - projection = new SimplexProjection(scale); - newPoint = new double[n_param]; - setInitialParameters(new double[n_param]); - - } - - /**@brief counts word frequency in the corpus - * - */ - private void countWords(){ - wordFreq=new int [hmm.emit[0].length]; - for(int i=0;i