From ad418214fe3b3fcd33d81225eb3d3fb08b67f88a Mon Sep 17 00:00:00 2001 From: desaicwtf Date: Mon, 28 Jun 2010 23:14:21 +0000 Subject: add draft version of POS induction with HMM and L1 Linf constraints git-svn-id: https://ws10smt.googlecode.com/svn/trunk@47 ec762483-ff6d-05da-a07a-a48fb63a330f --- .../prjava/src/hmm/HMMObjective.java | 348 +++++++++++++++++++++ 1 file changed, 348 insertions(+) create mode 100644 gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java (limited to 'gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java') diff --git a/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java b/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java new file mode 100644 index 00000000..551210c0 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java @@ -0,0 +1,348 @@ +package hmm; + +import gnu.trove.TIntArrayList; +import optimization.gradientBasedMethods.ProjectedGradientDescent; +import optimization.gradientBasedMethods.ProjectedObjective; +import optimization.gradientBasedMethods.stats.OptimizerStats; +import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc; +import optimization.linesearch.InterpolationPickFirstStep; +import optimization.linesearch.LineSearchMethod; +import optimization.projections.SimplexProjection; +import optimization.stopCriteria.CompositeStopingCriteria; +import optimization.stopCriteria.ProjectedGradientL2Norm; +import optimization.stopCriteria.StopingCriteria; +import optimization.stopCriteria.ValueDifference; + +public class HMMObjective extends ProjectedObjective{ + + + private static final double GRAD_DIFF = 3; + public static double INIT_STEP_SIZE=10; + public static double VAL_DIFF=2000; + + private HMM hmm; + double[] newPoint ; + + //posterior[sent num][tok num][tag]=index into lambda + private int posteriorMap[][][]; + //projection[word][tag].get(occurence)=index into lambda + private TIntArrayList projectionMap[][]; + + //Size of the simplex + public double scale=10; + private SimplexProjection projection; + + private int wordFreq[]; + private static int MIN_FREQ=3; + private int numWordsToProject=0; + + private int n_param; + + public double loglikelihood; + + public HMMObjective(HMM h){ + hmm=h; + + countWords(); + buildMap(); + + gradient=new double [n_param]; + projection = new SimplexProjection(scale); + newPoint = new double[n_param]; + setInitialParameters(new double[n_param]); + + } + + /**@brief counts word frequency in the corpus + * + */ + private void countWords(){ + wordFreq=new int [hmm.emit[0].length]; + for(int i=0;i