diff options
Diffstat (limited to 'gi/posterior-regularisation')
42 files changed, 3577 insertions, 0 deletions
diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java b/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java new file mode 100644 index 00000000..25fa7f09 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java @@ -0,0 +1,110 @@ +package optimization.examples; + + +import optimization.gradientBasedMethods.ConjugateGradient; +import optimization.gradientBasedMethods.GradientDescent; +import optimization.gradientBasedMethods.LBFGS; +import optimization.gradientBasedMethods.Objective; +import optimization.gradientBasedMethods.Optimizer; +import optimization.gradientBasedMethods.stats.OptimizerStats; +import optimization.linesearch.ArmijoLineSearchMinimization; +import optimization.linesearch.LineSearchMethod; +import optimization.stopCriteria.GradientL2Norm; +import optimization.stopCriteria.StopingCriteria; +import optimization.util.MathUtils; + +/** + * + * @author javg + * f(x) = \sum_{i=1}^{N-1} \left[ (1-x_i)^2+ 100 (x_{i+1} - x_i^2 )^2 \right] \quad \forall x\in\mathbb{R}^N. + */ +public class GeneralizedRosenbrock extends Objective{ + + + + public GeneralizedRosenbrock(int dimensions){ + parameters = new double[dimensions]; + java.util.Arrays.fill(parameters, 0); + gradient = new double[dimensions]; + + } + + public GeneralizedRosenbrock(int dimensions, double[] params){ + parameters = params; + gradient = new double[dimensions]; + } + + + public double getValue() { + functionCalls++; + double value = 0; + for(int i = 0; i < parameters.length-1; i++){ + value += MathUtils.square(1-parameters[i]) + 100*MathUtils.square(parameters[i+1] - MathUtils.square(parameters[i])); + } + + return value; + } + + /** + * gx = -2(1-x) -2x200(y-x^2) + * gy = 200(y-x^2) + */ + public double[] getGradient() { + gradientCalls++; + java.util.Arrays.fill(gradient,0); + for(int i = 0; i < parameters.length-1; i++){ + gradient[i]+=-2*(1-parameters[i]) - 400*parameters[i]*(parameters[i+1] - MathUtils.square(parameters[i])); + gradient[i+1]+=200*(parameters[i+1] - MathUtils.square(parameters[i])); + } + return gradient; + } + + + + + + + + public String toString(){ + String res =""; + for(int i = 0; i < parameters.length; i++){ + res += "P" + i+ " " + parameters[i]; + } + res += " Value " + getValue(); + return res; + } + + public static void main(String[] args) { + + GeneralizedRosenbrock o = new GeneralizedRosenbrock(2); + System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); + ; + + System.out.println("Doing Gradient descent"); + //LineSearchMethod wolfe = new WolfRuleLineSearch(new InterpolationPickFirstStep(1),100,0.001,0.1); + StopingCriteria stop = new GradientL2Norm(0.001); + LineSearchMethod ls = new ArmijoLineSearchMinimization(); + Optimizer optimizer = new GradientDescent(ls); + OptimizerStats stats = new OptimizerStats(); + optimizer.setMaxIterations(1000); + boolean succed = optimizer.optimize(o,stats, stop); + System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1)); + System.out.println("Doing Conjugate Gradient descent"); + o = new GeneralizedRosenbrock(2); + // wolfe = new WolfRuleLineSearch(new InterpolationPickFirstStep(1),100,0.001,0.1); + optimizer = new ConjugateGradient(ls); + stats = new OptimizerStats(); + optimizer.setMaxIterations(1000); + succed = optimizer.optimize(o,stats,stop); + System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1)); + System.out.println("Doing Quasi newton descent"); + o = new GeneralizedRosenbrock(2); + optimizer = new LBFGS(ls,10); + stats = new OptimizerStats(); + optimizer.setMaxIterations(1000); + succed = optimizer.optimize(o,stats,stop); + System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1)); + + } + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java new file mode 100644 index 00000000..f087681e --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java @@ -0,0 +1,128 @@ +package optimization.examples; + + +import optimization.gradientBasedMethods.ConjugateGradient; + +import optimization.gradientBasedMethods.GradientDescent; +import optimization.gradientBasedMethods.LBFGS; +import optimization.gradientBasedMethods.Objective; +import optimization.gradientBasedMethods.stats.OptimizerStats; +import optimization.linesearch.GenericPickFirstStep; +import optimization.linesearch.LineSearchMethod; +import optimization.linesearch.WolfRuleLineSearch; +import optimization.stopCriteria.GradientL2Norm; +import optimization.stopCriteria.StopingCriteria; + + +/** + * @author javg + * + */ +public class x2y2 extends Objective{ + + + //Implements function ax2+ by2 + double a, b; + public x2y2(double a, double b){ + this.a = a; + this.b = b; + parameters = new double[2]; + parameters[0] = 4; + parameters[1] = 4; + gradient = new double[2]; + } + + public double getValue() { + functionCalls++; + return a*parameters[0]*parameters[0]+b*parameters[1]*parameters[1]; + } + + public double[] getGradient() { + gradientCalls++; + gradient[0]=2*a*parameters[0]; + gradient[1]=2*b*parameters[1]; + return gradient; +// if(debugLevel >=2){ +// double[] numericalGradient = DebugHelpers.getNumericalGradient(this, parameters, 0.000001); +// for(int i = 0; i < parameters.length; i++){ +// double diff = Math.abs(gradient[i]-numericalGradient[i]); +// if(diff > 0.00001){ +// System.out.println("Numerical Gradient does not match"); +// System.exit(1); +// } +// } +// } + } + + + + public void optimizeWithGradientDescent(LineSearchMethod ls, OptimizerStats stats, x2y2 o){ + GradientDescent optimizer = new GradientDescent(ls); + StopingCriteria stop = new GradientL2Norm(0.001); +// optimizer.setGradientConvergenceValue(0.001); + optimizer.setMaxIterations(100); + boolean succed = optimizer.optimize(o,stats,stop); + System.out.println("Ended optimzation Gradient Descent\n" + stats.prettyPrint(1)); + System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); + if(succed){ + System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); + }else{ + System.out.println("Failed to optimize"); + } + } + + public void optimizeWithConjugateGradient(LineSearchMethod ls, OptimizerStats stats, x2y2 o){ + ConjugateGradient optimizer = new ConjugateGradient(ls); + StopingCriteria stop = new GradientL2Norm(0.001); + + optimizer.setMaxIterations(10); + boolean succed = optimizer.optimize(o,stats,stop); + System.out.println("Ended optimzation Conjugate Gradient\n" + stats.prettyPrint(1)); + System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); + if(succed){ + System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); + }else{ + System.out.println("Failed to optimize"); + } + } + + public void optimizeWithLBFGS(LineSearchMethod ls, OptimizerStats stats, x2y2 o){ + LBFGS optimizer = new LBFGS(ls,10); + StopingCriteria stop = new GradientL2Norm(0.001); + optimizer.setMaxIterations(10); + boolean succed = optimizer.optimize(o,stats,stop); + System.out.println("Ended optimzation LBFGS\n" + stats.prettyPrint(1)); + System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); + if(succed){ + System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); + }else{ + System.out.println("Failed to optimize"); + } + } + + public static void main(String[] args) { + x2y2 o = new x2y2(1,10); + System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); + o.setDebugLevel(4); + LineSearchMethod wolfe = new WolfRuleLineSearch(new GenericPickFirstStep(1),0.001,0.9);; +// LineSearchMethod ls = new ArmijoLineSearchMinimization(); + OptimizerStats stats = new OptimizerStats(); + o.optimizeWithGradientDescent(wolfe, stats, o); + o = new x2y2(1,10); + System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); +// ls = new ArmijoLineSearchMinimization(); + stats = new OptimizerStats(); + o.optimizeWithConjugateGradient(wolfe, stats, o); + o = new x2y2(1,10); + System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); +// ls = new ArmijoLineSearchMinimization(); + stats = new OptimizerStats(); + o.optimizeWithLBFGS(wolfe, stats, o); + } + + public String toString(){ + return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue(); + } + + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java new file mode 100644 index 00000000..391775b7 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java @@ -0,0 +1,127 @@ +package optimization.examples; + + +import optimization.gradientBasedMethods.ProjectedGradientDescent; +import optimization.gradientBasedMethods.ProjectedObjective; +import optimization.gradientBasedMethods.stats.OptimizerStats; +import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc; +import optimization.linesearch.InterpolationPickFirstStep; +import optimization.linesearch.LineSearchMethod; +import optimization.projections.BoundsProjection; +import optimization.projections.Projection; +import optimization.projections.SimplexProjection; +import optimization.stopCriteria.CompositeStopingCriteria; +import optimization.stopCriteria.GradientL2Norm; +import optimization.stopCriteria.ProjectedGradientL2Norm; +import optimization.stopCriteria.StopingCriteria; +import optimization.stopCriteria.ValueDifference; + + +/** + * @author javg + * + * + *ax2+ b(y2 -displacement) + */ +public class x2y2WithConstraints extends ProjectedObjective{ + + + double a, b; + double dx; + double dy; + Projection projection; + + + public x2y2WithConstraints(double a, double b, double[] params, double dx, double dy, Projection proj){ + //projection = new BoundsProjection(0.2,Double.MAX_VALUE); + super(); + projection = proj; + this.a = a; + this.b = b; + this.dx = dx; + this.dy = dy; + setInitialParameters(params); + System.out.println("Function " +a+"(x-"+dx+")^2 + "+b+"(y-"+dy+")^2"); + System.out.println("Gradient " +(2*a)+"(x-"+dx+") ; "+(b*2)+"(y-"+dy+")"); + printParameters(); + projection.project(parameters); + printParameters(); + gradient = new double[2]; + } + + public double getValue() { + functionCalls++; + return a*(parameters[0]-dx)*(parameters[0]-dx)+b*((parameters[1]-dy)*(parameters[1]-dy)); + } + + public double[] getGradient() { + if(gradient == null){ + gradient = new double[2]; + } + gradientCalls++; + gradient[0]=2*a*(parameters[0]-dx); + gradient[1]=2*b*(parameters[1]-dy); + return gradient; + } + + + public double[] projectPoint(double[] point) { + double[] newPoint = point.clone(); + projection.project(newPoint); + return newPoint; + } + + public void optimizeWithProjectedGradientDescent(LineSearchMethod ls, OptimizerStats stats, x2y2WithConstraints o){ + ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls); + StopingCriteria stopGrad = new ProjectedGradientL2Norm(0.001); + StopingCriteria stopValue = new ValueDifference(0.001); + CompositeStopingCriteria compositeStop = new CompositeStopingCriteria(); + compositeStop.add(stopGrad); + compositeStop.add(stopValue); + + optimizer.setMaxIterations(5); + boolean succed = optimizer.optimize(o,stats,compositeStop); + System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1)); + System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); + if(succed){ + System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); + }else{ + System.out.println("Failed to optimize"); + } + } + + + + public String toString(){ + + return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue() + " grad (" + getGradient()[0] + ":" + getGradient()[1]+")"; + } + + public static void main(String[] args) { + double a = 1; + double b=1; + double x0 = 0; + double y0 =1; + double dx = 0.5; + double dy = 0.5 ; + double [] parameters = new double[2]; + parameters[0] = x0; + parameters[1] = y0; + x2y2WithConstraints o = new x2y2WithConstraints(a,b,parameters,dx,dy, new SimplexProjection(0.5)); + System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1] + " a " + a + " b "+b ); + o.setDebugLevel(4); + + LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1)); + + OptimizerStats stats = new OptimizerStats(); + o.optimizeWithProjectedGradientDescent(ls, stats, o); + +// o = new x2y2WithConstraints(a,b,x0,y0,dx,dy); +// stats = new OptimizerStats(); +// o.optimizeWithSpectralProjectedGradientDescent(stats, o); + } + + + + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java new file mode 100644 index 00000000..0a4a5445 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java @@ -0,0 +1,119 @@ +package optimization.gradientBasedMethods; + +import optimization.gradientBasedMethods.stats.OptimizerStats; +import optimization.linesearch.DifferentiableLineSearchObjective; +import optimization.linesearch.LineSearchMethod; +import optimization.stopCriteria.StopingCriteria; +import optimization.util.MathUtils; + +/** + * + * @author javg + * + */ +public abstract class AbstractGradientBaseMethod implements Optimizer{ + + protected int maxNumberOfIterations=10000; + + + + protected int currentProjectionIteration; + protected double currValue; + protected double previousValue = Double.MAX_VALUE;; + protected double step; + protected double[] gradient; + public double[] direction; + + //Original values + protected double originalGradientL2Norm; + + protected LineSearchMethod lineSearch; + DifferentiableLineSearchObjective lso; + + + public void reset(){ + direction = null; + gradient = null; + previousValue = Double.MAX_VALUE; + currentProjectionIteration = 0; + originalGradientL2Norm = 0; + step = 0; + currValue = 0; + } + + public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){ + lso = new DifferentiableLineSearchObjective(o); + } + public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){ + } + + public void updateStructuresAfterStep(Objective o,OptimizerStats stats, StopingCriteria stop){ + } + + public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){ + //Initialize structures + + stats.collectInitStats(this, o); + direction = new double[o.getNumParameters()]; + initializeStructures(o, stats, stop); + for (currentProjectionIteration = 1; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){ +// System.out.println("starting iterations: parameters:" ); +// o.printParameters(); + previousValue = currValue; + currValue = o.getValue(); + gradient = o.getGradient(); + if(stop.stopOptimization(o)){ + stats.collectFinalStats(this, o); + return true; + } + + getDirection(); + if(MathUtils.dotProduct(gradient, direction) > 0){ + System.out.println("Not a descent direction"); + System.out.println(" current stats " + stats.prettyPrint(1)); + System.exit(-1); + } + updateStructuresBeforeStep(o, stats, stop); + lso.reset(direction); + step = lineSearch.getStepSize(lso); +// System.out.println("Leave with step: " + step); + if(step==-1){ + System.out.println("Failed to find step"); + stats.collectFinalStats(this, o); + return false; + } + updateStructuresAfterStep( o, stats, stop); +// previousValue = currValue; +// currValue = o.getValue(); +// gradient = o.getGradient(); + stats.collectIterationStats(this, o); + } + stats.collectFinalStats(this, o); + return false; + } + + + public int getCurrentIteration() { + return currentProjectionIteration; + } + + + /** + * Method specific + */ + public abstract double[] getDirection(); + + public double getCurrentStep() { + return step; + } + + + + public void setMaxIterations(int max) { + maxNumberOfIterations = max; + } + + public double getCurrentValue() { + return currValue; + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java new file mode 100644 index 00000000..28295729 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java @@ -0,0 +1,92 @@ +package optimization.gradientBasedMethods; + +import optimization.gradientBasedMethods.stats.OptimizerStats; +import optimization.linesearch.DifferentiableLineSearchObjective; +import optimization.linesearch.LineSearchMethod; +import optimization.stopCriteria.StopingCriteria; +import optimization.util.MathUtils; + + + +public class ConjugateGradient extends AbstractGradientBaseMethod{ + + + double[] previousGradient; + double[] previousDirection; + + public ConjugateGradient(LineSearchMethod lineSearch) { + this.lineSearch = lineSearch; + } + + public void reset(){ + super.reset(); + java.util.Arrays.fill(previousDirection, 0); + java.util.Arrays.fill(previousGradient, 0); + } + + public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){ + super.initializeStructures(o, stats, stop); + previousGradient = new double[o.getNumParameters()]; + previousDirection = new double[o.getNumParameters()]; + } + public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){ + System.arraycopy(gradient, 0, previousGradient, 0, gradient.length); + System.arraycopy(direction, 0, previousDirection, 0, direction.length); + } + +// public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){ +// DifferentiableLineSearchObjective lso = new DifferentiableLineSearchObjective(o); +// stats.collectInitStats(this, o); +// direction = new double[o.getNumParameters()]; +// initializeStructures(o, stats, stop); +// for (currentProjectionIteration = 0; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){ +// previousValue = currValue; +// currValue = o.getValue(); +// gradient =o.getGradient(); +// if(stop.stopOptimization(gradient)){ +// stats.collectFinalStats(this, o); +// return true; +// } +// getDirection(); +// updateStructures(o, stats, stop); +// lso.reset(direction); +// step = lineSearch.getStepSize(lso); +// if(step==-1){ +// System.out.println("Failed to find a step size"); +// System.out.println("Failed to find step"); +// stats.collectFinalStats(this, o); +// return false; +// } +// +// stats.collectIterationStats(this, o); +// } +// stats.collectFinalStats(this, o); +// return false; +// } + + public double[] getDirection(){ + direction = MathUtils.negation(gradient); + if(currentProjectionIteration != 1){ + //Using Polak-Ribiere method (book equation 5.45) + double b = MathUtils.dotProduct(gradient, MathUtils.arrayMinus(gradient, previousGradient)) + /MathUtils.dotProduct(previousGradient, previousGradient); + if(b<0){ + System.out.println("Defaulting to gradient descent"); + b = Math.max(b, 0); + } + MathUtils.plusEquals(direction, previousDirection, b); + //Debug code + if(MathUtils.dotProduct(direction, gradient) > 0){ + System.out.println("Not an descent direction reseting to gradien"); + direction = MathUtils.negation(gradient); + } + } + return direction; + } + + + + + + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java new file mode 100644 index 00000000..6dc4ef6c --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java @@ -0,0 +1,65 @@ +package optimization.gradientBasedMethods; + +import java.util.ArrayList; + +import optimization.util.MathUtils; + + + +public class DebugHelpers { + public static void getLineSearchGraph(Objective o, double[] direction, + double[] parameters, double originalObj, + double originalDot, double c1, double c2){ + ArrayList<Double> stepS = new ArrayList<Double>(); + ArrayList<Double> obj = new ArrayList<Double>(); + ArrayList<Double> norm = new ArrayList<Double>(); + double[] gradient = new double[o.getNumParameters()]; + double[] newParameters = parameters.clone(); + MathUtils.plusEquals(newParameters,direction,0); + o.setParameters(newParameters); + double minValue = o.getValue(); + int valuesBiggerThanMax = 0; + for(double step = 0; step < 2; step +=0.01 ){ + newParameters = parameters.clone(); + MathUtils.plusEquals(newParameters,direction,step); + o.setParameters(newParameters); + double newValue = o.getValue(); + gradient = o.getGradient(); + double newgradDirectionDot = MathUtils.dotProduct(gradient,direction); + stepS.add(step); + obj.add(newValue); + norm.add(newgradDirectionDot); + if(newValue <= minValue){ + minValue = newValue; + }else{ + valuesBiggerThanMax++; + } + + if(valuesBiggerThanMax > 10){ + break; + } + + } + System.out.println("step\torigObj\tobj\tsuffdec\tnorm\tcurvature1"); + for(int i = 0; i < stepS.size(); i++){ + double cnorm= norm.get(i); + System.out.println(stepS.get(i)+"\t"+originalObj +"\t"+obj.get(i) + "\t" + + (originalObj + originalDot*((Double)stepS.get(i))*c1) +"\t"+Math.abs(cnorm) +"\t"+c2*Math.abs(originalDot)); + } + } + + public static double[] getNumericalGradient(Objective o, double[] parameters, double epsilon){ + int nrParameters = o.getNumParameters(); + double[] gradient = new double[nrParameters]; + double[] newParameters; + double originalValue = o.getValue(); + for(int parameter = 0; parameter < nrParameters; parameter++){ + newParameters = parameters.clone(); + newParameters[parameter]+=epsilon; + o.setParameters(newParameters); + double newValue = o.getValue(); + gradient[parameter]=(newValue-originalValue)/epsilon; + } + return gradient; + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java new file mode 100644 index 00000000..9a53cef4 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java @@ -0,0 +1,19 @@ +package optimization.gradientBasedMethods; + +import optimization.linesearch.LineSearchMethod; + + + +public class GradientDescent extends AbstractGradientBaseMethod{ + + public GradientDescent(LineSearchMethod lineSearch) { + this.lineSearch = lineSearch; + } + + public double[] getDirection(){ + for(int i = 0; i< gradient.length; i++){ + direction[i] = -gradient[i]; + } + return direction; + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java new file mode 100644 index 00000000..dedbc942 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java @@ -0,0 +1,234 @@ +package optimization.gradientBasedMethods; + + +import optimization.gradientBasedMethods.stats.OptimizerStats; +import optimization.linesearch.DifferentiableLineSearchObjective; +import optimization.linesearch.LineSearchMethod; +import optimization.stopCriteria.StopingCriteria; +import optimization.util.MathUtils; + +public class LBFGS extends AbstractGradientBaseMethod{ + + //How many previous values are being saved + int history; + double[][] skList; + double[][] ykList; + double initialHessianParameters; + double[] previousGradient; + double[] previousParameters; + + //auxiliar structures + double q[]; + double[] roi; + double[] alphai; + + public LBFGS(LineSearchMethod ls, int history) { + lineSearch = ls; + this.history = history; + skList = new double[history][]; + ykList = new double[history][]; + + } + + public void reset(){ + super.reset(); + initialHessianParameters = 0; + previousParameters = null; + previousGradient = null; + skList = new double[history][]; + ykList = new double[history][]; + q = null; + roi = null; + alphai = null; + } + + public double[] LBFGSTwoLoopRecursion(double hessianConst){ + //Only create array once + if(q == null){ + q = new double[gradient.length]; + } + System.arraycopy(gradient, 0, q, 0, gradient.length); + //Only create array once + if(roi == null){ + roi = new double[history]; + } + //Only create array once + if(alphai == null){ + alphai = new double[history]; + } + + for(int i = history-1; i >=0 && skList[i]!= null && ykList[i]!=null; i-- ){ + // System.out.println("New to Old proj " + currentProjectionIteration + " history "+history + " index " + i); + double[] si = skList[i]; + double[] yi = ykList[i]; + roi[i]= 1.0/MathUtils.dotProduct(yi,si); + alphai[i] = MathUtils.dotProduct(si, q)*roi[i]; + MathUtils.plusEquals(q, yi, -alphai[i]); + } + //Initial Hessian is just a constant + MathUtils.scalarMultiplication(q, hessianConst); + for(int i = 0; i <history && skList[i]!= null && ykList[i]!=null; i++ ){ + // System.out.println("Old to New proj " + currentProjectionIteration + " history "+history + " index " + i); + double beta = MathUtils.dotProduct(ykList[i], q)*roi[i]; + MathUtils.plusEquals(q, skList[i], (alphai[i]-beta)); + } + return q; + } + + + + + @Override + public double[] getDirection() { + + calculateInitialHessianParameter(); +// System.out.println("Initial hessian " + initialHessianParameters); + return direction = MathUtils.negation(LBFGSTwoLoopRecursion(initialHessianParameters)); + } + + public void calculateInitialHessianParameter(){ + if(currentProjectionIteration == 1){ + //Use gradient + initialHessianParameters = 1; + }else if(currentProjectionIteration <= history){ + double[] sk = skList[currentProjectionIteration-2]; + double[] yk = ykList[currentProjectionIteration-2]; + initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk); + }else{ + //get the last one + double[] sk = skList[history-1]; + double[] yk = ykList[history-1]; + initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk); + } + } + + //TODO if structures exit just reset them to zero + public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){ + super.initializeStructures(o, stats, stop); + previousParameters = new double[o.getNumParameters()]; + previousGradient = new double[o.getNumParameters()]; + } + public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){ + super.initializeStructures(o, stats, stop); + System.arraycopy(o.getParameters(), 0, previousParameters, 0, previousParameters.length); + System.arraycopy(gradient, 0, previousGradient, 0, gradient.length); + } + + public void updateStructuresAfterStep( Objective o,OptimizerStats stats, StopingCriteria stop){ + double[] diffX = MathUtils.arrayMinus(o.getParameters(), previousParameters); + double[] diffGrad = MathUtils.arrayMinus(gradient, previousGradient); + //Save new values and discard new ones + if(currentProjectionIteration > history){ + for(int i = 0; i < history-1;i++){ + skList[i]=skList[i+1]; + ykList[i]=ykList[i+1]; + } + skList[history-1]=diffX; + ykList[history-1]=diffGrad; + }else{ + skList[currentProjectionIteration-1]=diffX; + ykList[currentProjectionIteration-1]=diffGrad; + } + } + +// public boolean optimize(Objective o, OptimizerStats stats, StopingCriteria stop) { +// DifferentiableLineSearchObjective lso = new DifferentiableLineSearchObjective(o); +// gradient = o.getGradient(); +// direction = new double[o.getNumParameters()]; +// previousGradient = new double[o.getNumParameters()]; +// +// previousParameters = new double[o.getNumParameters()]; +// +// stats.collectInitStats(this, o); +// previousValue = Double.MAX_VALUE; +// currValue= o.getValue(); +// //Used for stopping criteria +// double[] originalGradient = o.getGradient(); +// +// originalGradientL2Norm = MathUtils.L2Norm(originalGradient); +// if(stop.stopOptimization(originalGradient)){ +// stats.collectFinalStats(this, o); +// return true; +// } +// for (currentProjectionIteration = 1; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){ +// +// +// currValue = o.getValue(); +// gradient = o.getGradient(); +// currParameters = o.getParameters(); +// +// +// if(currentProjectionIteration == 1){ +// //Use gradient +// initialHessianParameters = 1; +// }else if(currentProjectionIteration <= history){ +// double[] sk = skList[currentProjectionIteration-2]; +// double[] yk = ykList[currentProjectionIteration-2]; +// initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk); +// }else{ +// //get the last one +// double[] sk = skList[history-1]; +// double[] yk = ykList[history-1]; +// initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk); +// } +// +// getDirection(); +// +// //MatrixOutput.printDoubleArray(direction, "direction"); +// double dot = MathUtils.dotProduct(direction, gradient); +// if(dot > 0){ +// throw new RuntimeException("Not a descent direction"); +// } if (Double.isNaN(dot)){ +// throw new RuntimeException("dot is not a number!!"); +// } +// System.arraycopy(currParameters, 0, previousParameters, 0, currParameters.length); +// System.arraycopy(gradient, 0, previousGradient, 0, gradient.length); +// lso.reset(direction); +// step = lineSearch.getStepSize(lso); +// if(step==-1){ +// System.out.println("Failed to find a step size"); +//// lso.printLineSearchSteps(); +//// System.out.println(stats.prettyPrint(1)); +// stats.collectFinalStats(this, o); +// return false; +// } +// stats.collectIterationStats(this, o); +// +// //We are not updating the alpha since it is done in line search already +// currParameters = o.getParameters(); +// gradient = o.getGradient(); +// +// if(stop.stopOptimization(gradient)){ +// stats.collectFinalStats(this, o); +// return true; +// } +// double[] diffX = MathUtils.arrayMinus(currParameters, previousParameters); +// double[] diffGrad = MathUtils.arrayMinus(gradient, previousGradient); +// //Save new values and discard new ones +// if(currentProjectionIteration > history){ +// for(int i = 0; i < history-1;i++){ +// skList[i]=skList[i+1]; +// ykList[i]=ykList[i+1]; +// } +// skList[history-1]=diffX; +// ykList[history-1]=diffGrad; +// }else{ +// skList[currentProjectionIteration-1]=diffX; +// ykList[currentProjectionIteration-1]=diffGrad; +// } +// previousValue = currValue; +// } +// stats.collectFinalStats(this, o); +// return false; +// } + + + + + + + + + + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java new file mode 100644 index 00000000..0e2e27ac --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java @@ -0,0 +1,83 @@ +package optimization.gradientBasedMethods; + + +/** + * Defines an optimization objective: + * + * + * @author javg + * + */ +public abstract class Objective { + + protected int functionCalls = 0; + protected int gradientCalls = 0; + protected int updateCalls = 0; + + protected double[] parameters; + + //Contains a cache with the gradient + public double[] gradient; + int debugLevel = 0; + + public void setDebugLevel(int level){ + debugLevel = level; + } + + public int getNumParameters() { + return parameters.length; + } + + public double getParameter(int index) { + return parameters[index]; + } + + public double[] getParameters() { + return parameters; + } + + public abstract double[] getGradient( ); + + public void setParameter(int index, double value) { + parameters[index]=value; + } + + public void setParameters(double[] params) { + if(parameters == null){ + parameters = new double[params.length]; + } + updateCalls++; + System.arraycopy(params, 0, parameters, 0, params.length); + } + + + public int getNumberFunctionCalls() { + return functionCalls; + } + + public int getNumberGradientCalls() { + return gradientCalls; + } + + public String finalInfoString() { + return "FE: " + functionCalls + " GE " + gradientCalls + " Params updates" + + updateCalls; + } + public void printParameters() { + System.out.println(toString()); + } + + public abstract String toString(); + public abstract double getValue (); + + /** + * Sets the initial objective parameters + * For unconstrained models this just sets the objective params = argument no copying + * For a constrained objective project the parameters and then set + * @param params + */ + public void setInitialParameters(double[] params){ + parameters = params; + } + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java new file mode 100644 index 00000000..96fce5b0 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java @@ -0,0 +1,19 @@ +package optimization.gradientBasedMethods; + +import optimization.gradientBasedMethods.stats.OptimizerStats; +import optimization.stopCriteria.StopingCriteria; + +public interface Optimizer { + public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stoping); + + + public double[] getDirection(); + public double getCurrentStep(); + public double getCurrentValue(); + public int getCurrentIteration(); + public void reset(); + + public void setMaxIterations(int max); + + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java new file mode 100644 index 00000000..afb29d04 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java @@ -0,0 +1,11 @@ +package optimization.gradientBasedMethods; + + +/** + * + * @author javg + * + */ +public abstract class ProjectedAbstractGradientBaseMethod extends AbstractGradientBaseMethod implements ProjectedOptimizer{ + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java new file mode 100644 index 00000000..0186e945 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java @@ -0,0 +1,154 @@ +package optimization.gradientBasedMethods; + +import java.io.IOException; + +import optimization.gradientBasedMethods.stats.OptimizerStats; +import optimization.linesearch.DifferentiableLineSearchObjective; +import optimization.linesearch.LineSearchMethod; +import optimization.linesearch.ProjectedDifferentiableLineSearchObjective; +import optimization.stopCriteria.StopingCriteria; +import optimization.util.MathUtils; + + +/** + * This class implements the projected gradiend + * as described in Bertsekas "Non Linear Programming" + * section 2.3. + * + * The update is given by: + * x_k+1 = x_k + alpha^k(xbar_k-x_k) + * Where xbar is: + * xbar = [x_k -s_k grad(f(x_k))]+ + * where []+ is the projection into the feasibility set + * + * alpha is the step size + * s_k - is a positive scalar which can be view as a step size as well, by + * setting alpha to 1, then x_k+1 = [x_k -s_k grad(f(x_k))]+ + * This is called taking a step size along the projection arc (Bertsekas) which + * we will use by default. + * + * Note that the only place where we actually take a step size is on pick a step size + * so this is going to be just like a normal gradient descent but use a different + * armijo line search where we project after taking a step. + * + * + * @author javg + * + */ +public class ProjectedGradientDescent extends ProjectedAbstractGradientBaseMethod{ + + + + + public ProjectedGradientDescent(LineSearchMethod lineSearch) { + this.lineSearch = lineSearch; + } + + //Use projected differential objective instead + public void initializeStructures(Objective o, OptimizerStats stats, StopingCriteria stop) { + lso = new ProjectedDifferentiableLineSearchObjective(o); + }; + + + ProjectedObjective obj; + public boolean optimize(ProjectedObjective o,OptimizerStats stats, StopingCriteria stop){ + obj = o; + return super.optimize(o, stats, stop); + } + + public double[] getDirection(){ + for(int i = 0; i< gradient.length; i++){ + direction[i] = -gradient[i]; + } + return direction; + } + + + + +} + + + + + + + +///OLD CODE + +//Use projected gradient norm +//public boolean stopCriteria(double[] gradient){ +// if(originalDirenctionL2Norm == 0){ +// System.out.println("Leaving original direction norm is zero"); +// return true; +// } +// if(MathUtils.L2Norm(direction)/originalDirenctionL2Norm < gradientConvergenceValue){ +// System.out.println("Leaving projected gradient Norm smaller than epsilon"); +// return true; +// } +// if((previousValue - currValue)/Math.abs(previousValue) < valueConvergenceValue) { +// System.out.println("Leaving value change below treshold " + previousValue + " - " + currValue); +// System.out.println(previousValue/currValue + " - " + currValue/currValue +// + " = " + (previousValue - currValue)/Math.abs(previousValue)); +// return true; +// } +// return false; +//} +// + +//public boolean optimize(ProjectedObjective o,OptimizerStats stats, StopingCriteria stop){ +// stats.collectInitStats(this, o); +// obj = o; +// step = 0; +// currValue = o.getValue(); +// previousValue = Double.MAX_VALUE; +// gradient = o.getGradient(); +// originalGradientL2Norm = MathUtils.L2Norm(gradient); +// parameterChange = new double[gradient.length]; +// getDirection(); +// ProjectedDifferentiableLineSearchObjective lso = new ProjectedDifferentiableLineSearchObjective(o,direction); +// +// originalDirenctionL2Norm = MathUtils.L2Norm(direction); +// //MatrixOutput.printDoubleArray(currParameters, "parameters"); +// for (currentProjectionIteration = 0; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){ +// // System.out.println("Iter " + currentProjectionIteration); +// //o.printParameters(); +// +// +// +// if(stop.stopOptimization(gradient)){ +// stats.collectFinalStats(this, o); +// lastStepUsed = step; +// return true; +// } +// lso.reset(direction); +// step = lineSearch.getStepSize(lso); +// if(step==-1){ +// System.out.println("Failed to find step"); +// stats.collectFinalStats(this, o); +// return false; +// +// } +// +// //Update the direction for stopping criteria +// previousValue = currValue; +// currValue = o.getValue(); +// gradient = o.getGradient(); +// direction = getDirection(); +// if(MathUtils.dotProduct(gradient, direction) > 0){ +// System.out.println("Not a descent direction"); +// System.out.println(" current stats " + stats.prettyPrint(1)); +// System.exit(-1); +// } +// stats.collectIterationStats(this, o); +// } +// lastStepUsed = step; +// stats.collectFinalStats(this, o); +// return false; +// } + +//public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){ +// System.out.println("Objective is not a projected objective"); +// throw new RuntimeException(); +//} + diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java new file mode 100644 index 00000000..c3d21393 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java @@ -0,0 +1,29 @@ +package optimization.gradientBasedMethods; + +import optimization.util.MathUtils; + + +/** + * Computes a projected objective + * When we tell it to set some parameters it automatically projects the parameters back into the simplex: + * + * + * When we tell it to get the gradient in automatically returns the projected gradient: + * @author javg + * + */ +public abstract class ProjectedObjective extends Objective{ + + public abstract double[] projectPoint (double[] point); + + public double[] auxParameters; + + + public void setInitialParameters(double[] params){ + setParameters(projectPoint(params)); + } + + + + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java new file mode 100644 index 00000000..81d8403e --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java @@ -0,0 +1,10 @@ +package optimization.gradientBasedMethods; + + + +public interface ProjectedOptimizer extends Optimizer{ + + + + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java new file mode 100644 index 00000000..6340ef73 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java @@ -0,0 +1,86 @@ +package optimization.gradientBasedMethods.stats; + +import java.util.ArrayList; + +import optimization.gradientBasedMethods.Objective; +import optimization.gradientBasedMethods.Optimizer; +import optimization.util.MathUtils; +import optimization.util.StaticTools; + + +public class OptimizerStats { + + double start = 0; + double totalTime = 0; + + String objectiveFinalStats; + + ArrayList<Double> gradientNorms = new ArrayList<Double>(); + ArrayList<Double> steps = new ArrayList<Double>(); + ArrayList<Double> value = new ArrayList<Double>(); + ArrayList<Integer> iterations = new ArrayList<Integer>(); + double prevValue =0; + + public void reset(){ + start = 0; + totalTime = 0; + + objectiveFinalStats=""; + + gradientNorms.clear(); + steps.clear(); + value.clear(); + iterations.clear(); + prevValue =0; + } + + public void startTime() { + start = System.currentTimeMillis(); + } + public void stopTime() { + totalTime += System.currentTimeMillis() - start; + } + + public String prettyPrint(int level){ + StringBuffer res = new StringBuffer(); + res.append("Total time " + totalTime/1000 + " seconds \n" + "Iterations " + iterations.size() + "\n"); + res.append(objectiveFinalStats+"\n"); + if(level > 0){ + if(iterations.size() > 0){ + res.append("\tIteration"+iterations.get(0)+"\tstep: "+StaticTools.prettyPrint(steps.get(0), "0.00E00", 6)+ "\tgradientNorm "+ + StaticTools.prettyPrint(gradientNorms.get(0), "0.00000E00", 10)+ "\tvalue "+ StaticTools.prettyPrint(value.get(0), "0.000000E00",11)+"\n"); + } + for(int i = 1; i < iterations.size(); i++){ + res.append("\tIteration:\t"+iterations.get(i)+"\tstep:"+StaticTools.prettyPrint(steps.get(i), "0.00E00", 6)+ "\tgradientNorm "+ + StaticTools.prettyPrint(gradientNorms.get(i), "0.00000E00", 10)+ + "\tvalue:\t"+ StaticTools.prettyPrint(value.get(i), "0.000000E00",11)+ + "\tvalueDiff:\t"+ StaticTools.prettyPrint((value.get(i-1)-value.get(i)), "0.000000E00",11)+ + "\n"); + } + } + return res.toString(); + } + + + public void collectInitStats(Optimizer optimizer, Objective objective){ + startTime(); + iterations.add(-1); + gradientNorms.add(MathUtils.L2Norm(objective.getGradient())); + steps.add(0.0); + value.add(objective.getValue()); + } + + public void collectIterationStats(Optimizer optimizer, Objective objective){ + iterations.add(optimizer.getCurrentIteration()); + gradientNorms.add(MathUtils.L2Norm(objective.getGradient())); + steps.add(optimizer.getCurrentStep()); + value.add(optimizer.getCurrentValue()); + } + + + public void collectFinalStats(Optimizer optimizer, Objective objective){ + stopTime(); + objectiveFinalStats = objective.finalInfoString(); + } + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java new file mode 100644 index 00000000..d65a1267 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java @@ -0,0 +1,70 @@ +package optimization.gradientBasedMethods.stats; + +import java.util.ArrayList; + +import optimization.gradientBasedMethods.Objective; +import optimization.gradientBasedMethods.Optimizer; +import optimization.gradientBasedMethods.ProjectedObjective; +import optimization.gradientBasedMethods.ProjectedOptimizer; +import optimization.util.MathUtils; +import optimization.util.StaticTools; + + +public class ProjectedOptimizerStats extends OptimizerStats{ + + + + public void reset(){ + super.reset(); + projectedGradientNorms.clear(); + } + + ArrayList<Double> projectedGradientNorms = new ArrayList<Double>(); + + public String prettyPrint(int level){ + StringBuffer res = new StringBuffer(); + res.append("Total time " + totalTime/1000 + " seconds \n" + "Iterations " + iterations.size() + "\n"); + res.append(objectiveFinalStats+"\n"); + if(level > 0){ + if(iterations.size() > 0){ + res.append("\tIteration"+iterations.get(0)+"\tstep: "+ + StaticTools.prettyPrint(steps.get(0), "0.00E00", 6)+ "\tgradientNorm "+ + StaticTools.prettyPrint(gradientNorms.get(0), "0.00000E00", 10) + + "\tdirection"+ + StaticTools.prettyPrint(projectedGradientNorms.get(0), "0.00000E00", 10)+ + "\tvalue "+ StaticTools.prettyPrint(value.get(0), "0.000000E00",11)+"\n"); + } + for(int i = 1; i < iterations.size(); i++){ + res.append("\tIteration"+iterations.get(i)+"\tstep: "+StaticTools.prettyPrint(steps.get(i), "0.00E00", 6)+ "\tgradientNorm "+ + StaticTools.prettyPrint(gradientNorms.get(i), "0.00000E00", 10)+ + "\t direction "+ + StaticTools.prettyPrint(projectedGradientNorms.get(i), "0.00000E00", 10)+ + "\tvalue "+ StaticTools.prettyPrint(value.get(i), "0.000000E00",11)+ + "\tvalueDiff "+ StaticTools.prettyPrint((value.get(i-1)-value.get(i)), "0.000000E00",11)+ + "\n"); + } + } + return res.toString(); + } + + + public void collectInitStats(Optimizer optimizer, Objective objective){ + startTime(); + } + + public void collectIterationStats(Optimizer optimizer, Objective objective){ + iterations.add(optimizer.getCurrentIteration()); + gradientNorms.add(MathUtils.L2Norm(objective.getGradient())); + projectedGradientNorms.add(MathUtils.L2Norm(optimizer.getDirection())); + steps.add(optimizer.getCurrentStep()); + value.add(optimizer.getCurrentValue()); + } + + + + public void collectFinalStats(Optimizer optimizer, Objective objective){ + stopTime(); + objectiveFinalStats = objective.finalInfoString(); + } + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java new file mode 100644 index 00000000..c9f9b8df --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java @@ -0,0 +1,102 @@ +package optimization.linesearch; + +import optimization.util.Interpolation; + + +/** + * Implements Back Tracking Line Search as described on page 37 of Numerical Optimization. + * Also known as armijo rule + * @author javg + * + */ +public class ArmijoLineSearchMinimization implements LineSearchMethod{ + + /** + * How much should the step size decrease at each iteration. + */ + double contractionFactor = 0.5; + double c1 = 0.0001; + + double sigma1 = 0.1; + double sigma2 = 0.9; + + + + double initialStep; + int maxIterations = 10; + + + public ArmijoLineSearchMinimization(){ + this.initialStep = 1; + } + + //Experiment + double previousStepPicked = -1;; + double previousInitGradientDot = -1; + double currentInitGradientDot = -1; + + + public void reset(){ + previousStepPicked = -1;; + previousInitGradientDot = -1; + currentInitGradientDot = -1; + } + + public void setInitialStep(double initial){ + initialStep = initial; + } + + /** + * + */ + + public double getStepSize(DifferentiableLineSearchObjective o) { + currentInitGradientDot = o.getInitialGradient(); + //Should update all in the objective + o.updateAlpha(initialStep); + int nrIterations = 0; + //System.out.println("tried alpha" + initialStep + " value " + o.getCurrentValue()); + while(!WolfeConditions.suficientDecrease(o,c1)){ + if(nrIterations >= maxIterations){ + o.printLineSearchSteps(); + return -1; + } + double alpha=o.getAlpha(); + double alphaTemp = + Interpolation.quadraticInterpolation(o.getOriginalValue(), o.getInitialGradient(), alpha, o.getCurrentValue()); + if(alphaTemp >= sigma1 || alphaTemp <= sigma2*o.getAlpha()){ +// System.out.println("using alpha temp " + alphaTemp); + alpha = alphaTemp; + }else{ +// System.out.println("Discarding alpha temp " + alphaTemp); + alpha = alpha*contractionFactor; + } +// double alpha =o.getAlpha()*contractionFactor; + + o.updateAlpha(alpha); + //System.out.println("tried alpha" + alpha+ " value " + o.getCurrentValue()); + nrIterations++; + } + + //System.out.println("Leavning line search used:"); + //o.printLineSearchSteps(); + + previousInitGradientDot = currentInitGradientDot; + previousStepPicked = o.getAlpha(); + return o.getAlpha(); + } + + public double getInitialGradient() { + return currentInitGradientDot; + + } + + public double getPreviousInitialGradient() { + return previousInitGradientDot; + } + + public double getPreviousStepUsed() { + return previousStepPicked; + } + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java new file mode 100644 index 00000000..e153f2da --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java @@ -0,0 +1,141 @@ +package optimization.linesearch; + +import optimization.gradientBasedMethods.ProjectedObjective; +import optimization.util.Interpolation; +import optimization.util.MathUtils; + + + + + +/** + * Implements Armijo Rule Line search along the projection arc (Non-Linear Programming page 230) + * To be used with Projected gradient Methods. + * + * Recall that armijo tries successive step sizes alpha until the sufficient decrease is satisfied: + * f(x+alpha*direction) < f(x) + alpha*c1*grad(f)*direction + * + * In this case we are optimizing over a convex set X so we must guarantee that the new point stays inside the + * constraints. + * First the direction as to be feasible (inside constraints) and will be define as: + * d = (x_k_f - x_k) where x_k_f is a feasible point. + * so the armijo condition can be rewritten as: + * f(x+alpha(x_k_f - x_k)) < f(x) + c1*grad(f)*(x_k_f - x_k) + * and x_k_f is defined as: + * [x_k-alpha*grad(f)]+ + * where []+ mean a projection to the feasibility set. + * So this means that we take a step on the negative gradient (gradient descent) and then obtain then project + * that point to the feasibility set. + * Note that if the point is already feasible then we are back to the normal armijo rule. + * + * @author javg + * + */ +public class ArmijoLineSearchMinimizationAlongProjectionArc implements LineSearchMethod{ + + /** + * How much should the step size decrease at each iteration. + */ + double contractionFactor = 0.5; + double c1 = 0.0001; + + + double initialStep; + int maxIterations = 100; + + + double sigma1 = 0.1; + double sigma2 = 0.9; + + //Experiment + double previousStepPicked = -1;; + double previousInitGradientDot = -1; + double currentInitGradientDot = -1; + + GenericPickFirstStep strategy; + + + public void reset(){ + previousStepPicked = -1;; + previousInitGradientDot = -1; + currentInitGradientDot = -1; + } + + + public ArmijoLineSearchMinimizationAlongProjectionArc(){ + this.initialStep = 1; + } + + public ArmijoLineSearchMinimizationAlongProjectionArc(GenericPickFirstStep strategy){ + this.strategy = strategy; + this.initialStep = strategy.getFirstStep(this); + } + + + public void setInitialStep(double initial){ + this.initialStep = initial; + } + + /** + * + */ + + public double getStepSize(DifferentiableLineSearchObjective o) { + + + //Should update all in the objective + initialStep = strategy.getFirstStep(this); + o.updateAlpha(initialStep); + previousInitGradientDot=currentInitGradientDot; + currentInitGradientDot=o.getCurrentGradient(); + int nrIterations = 0; + + //Armijo rule, the current value has to be smaller than the original value plus a small step of the gradient + while(o.getCurrentValue() > + o.getOriginalValue() + c1*(o.getCurrentGradient())){ +// System.out.println("curr value "+o.getCurrentValue()); +// System.out.println("original value "+o.getOriginalValue()); +// System.out.println("GRADIENT decrease" +(MathUtils.dotProduct(o.o.gradient, +// MathUtils.arrayMinus(o.originalParameters,((ProjectedObjective)o.o).auxParameters)))); +// System.out.println("GRADIENT SAVED" + o.getCurrentGradient()); + if(nrIterations >= maxIterations){ + System.out.println("Could not find a step leaving line search with -1"); + o.printLineSearchSteps(); + return -1; + } + double alpha=o.getAlpha(); + double alphaTemp = + Interpolation.quadraticInterpolation(o.getOriginalValue(), o.getInitialGradient(), alpha, o.getCurrentValue()); + if(alphaTemp >= sigma1 || alphaTemp <= sigma2*o.getAlpha()){ + alpha = alphaTemp; + }else{ + alpha = alpha*contractionFactor; + } +// double alpha =obj.getAlpha()*contractionFactor; + o.updateAlpha(alpha); + nrIterations++; + } +// System.out.println("curr value "+o.getCurrentValue()); +// System.out.println("original value "+o.getOriginalValue()); +// System.out.println("sufficient decrease" +c1*o.getCurrentGradient()); +// System.out.println("Leavning line search used:"); +// o.printSmallLineSearchSteps(); + + previousStepPicked = o.getAlpha(); + return o.getAlpha(); + } + + public double getInitialGradient() { + return currentInitGradientDot; + + } + + public double getPreviousInitialGradient() { + return previousInitGradientDot; + } + + public double getPreviousStepUsed() { + return previousStepPicked; + } + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java new file mode 100644 index 00000000..a5bc958e --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java @@ -0,0 +1,185 @@ +package optimization.linesearch; + +import gnu.trove.TDoubleArrayList; +import gnu.trove.TIntArrayList; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; + +import optimization.gradientBasedMethods.Objective; +import optimization.util.MathUtils; +import optimization.util.StaticTools; + + + +import util.MathUtil; +import util.Printing; + + +/** + * A wrapper class for the actual objective in order to perform + * line search. The optimization code assumes that this does a lot + * of caching in order to simplify legibility. For the applications + * we use it for, caching the entire history of evaluations should be + * a win. + * + * Note: the lastEvaluatedAt value is very important, since we will use + * it to avoid doing an evaluation of the gradient after the line search. + * + * The differentiable line search objective defines a search along the ray + * given by a direction of the main objective. + * It defines the following function, + * where f is the original objective function: + * g(alpha) = f(x_0 + alpha*direction) + * g'(alpha) = f'(x_0 + alpha*direction)*direction + * + * @author joao + * + */ +public class DifferentiableLineSearchObjective { + + + + Objective o; + int nrIterations; + TDoubleArrayList steps; + TDoubleArrayList values; + TDoubleArrayList gradients; + + //This variables cannot change + public double[] originalParameters; + public double[] searchDirection; + + + /** + * Defines a line search objective: + * Receives: + * Objective to each we are performing the line search, is used to calculate values and gradients + * Direction where to do the ray search, note that the direction does not depend of the + * objective but depends from the method. + * @param o + * @param direction + */ + public DifferentiableLineSearchObjective(Objective o) { + this.o = o; + originalParameters = new double[o.getNumParameters()]; + searchDirection = new double[o.getNumParameters()]; + steps = new TDoubleArrayList(); + values = new TDoubleArrayList(); + gradients = new TDoubleArrayList(); + } + /** + * Called whenever we start a new iteration. + * Receives the ray where we are searching for and resets all values + * + */ + public void reset(double[] direction){ + //Copy initial values + System.arraycopy(o.getParameters(), 0, originalParameters, 0, o.getNumParameters()); + System.arraycopy(direction, 0, searchDirection, 0, o.getNumParameters()); + + //Initialize variables + nrIterations = 0; + steps.clear(); + values.clear(); + gradients.clear(); + + values.add(o.getValue()); + gradients.add(MathUtils.dotProduct(o.getGradient(),direction)); + steps.add(0); + } + + + /** + * update the current value of alpha. + * Takes a step with that alpha in direction + * Get the real objective value and gradient and calculate all required information. + */ + public void updateAlpha(double alpha){ + if(alpha < 0){ + System.out.println("alpha may not be smaller that zero"); + throw new RuntimeException(); + } + nrIterations++; + steps.add(alpha); + //x_t+1 = x_t + alpha*direction + System.arraycopy(originalParameters,0, o.getParameters(), 0, originalParameters.length); + MathUtils.plusEquals(o.getParameters(), searchDirection, alpha); + o.setParameters(o.getParameters()); +// System.out.println("Took a step of " + alpha + " new value " + o.getValue()); + values.add(o.getValue()); + gradients.add(MathUtils.dotProduct(o.getGradient(),searchDirection)); + } + + + + public int getNrIterations(){ + return nrIterations; + } + + /** + * return g(alpha) for the current value of alpha + * @param iter + * @return + */ + public double getValue(int iter){ + return values.get(iter); + } + + public double getCurrentValue(){ + return values.get(nrIterations); + } + + public double getOriginalValue(){ + return values.get(0); + } + + /** + * return g'(alpha) for the current value of alpha + * @param iter + * @return + */ + public double getGradient(int iter){ + return gradients.get(iter); + } + + public double getCurrentGradient(){ + return gradients.get(nrIterations); + } + + public double getInitialGradient(){ + return gradients.get(0); + } + + + + + public double getAlpha(){ + return steps.get(nrIterations); + } + + public void printLineSearchSteps(){ + System.out.println( + " Steps size "+steps.size() + + "Values size "+values.size() + + "Gradeients size "+gradients.size()); + for(int i =0; i < steps.size();i++){ + System.out.println("Iter " + i + " step " + steps.get(i) + + " value " + values.get(i) + " grad " + gradients.get(i)); + } + } + + public void printSmallLineSearchSteps(){ + for(int i =0; i < steps.size();i++){ + System.out.print(StaticTools.prettyPrint(steps.get(i), "0.0000E00",8) + " "); + } + System.out.println(); + } + + public static void main(String[] args) { + + } + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java new file mode 100644 index 00000000..a33eb311 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java @@ -0,0 +1,20 @@ +package optimization.linesearch; + + +public class GenericPickFirstStep{ + double _initValue; + public GenericPickFirstStep(double initValue) { + _initValue = initValue; + } + + public double getFirstStep(LineSearchMethod ls){ + return _initValue; + } + public void collectInitValues(LineSearchMethod ls){ + + } + + public void collectFinalValues(LineSearchMethod ls){ + + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java new file mode 100644 index 00000000..0deebcdb --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java @@ -0,0 +1,25 @@ +package optimization.linesearch; + + +public class InterpolationPickFirstStep extends GenericPickFirstStep{ + public InterpolationPickFirstStep(double initValue) { + super(initValue); + } + + public double getFirstStep(LineSearchMethod ls){ + if(ls.getPreviousStepUsed() != -1 && ls.getPreviousInitialGradient()!=0){ + double newStep = Math.min(300, 1.02*ls.getPreviousInitialGradient()*ls.getPreviousStepUsed()/ls.getInitialGradient()); + // System.out.println("proposing " + newStep); + return newStep; + + } + return _initValue; + } + public void collectInitValues(WolfRuleLineSearch ls){ + + } + + public void collectFinalValues(WolfRuleLineSearch ls){ + + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java new file mode 100644 index 00000000..80cd7f39 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java @@ -0,0 +1,14 @@ +package optimization.linesearch; + + +public interface LineSearchMethod { + + double getStepSize(DifferentiableLineSearchObjective o); + + public double getInitialGradient(); + public double getPreviousInitialGradient(); + public double getPreviousStepUsed(); + + public void setInitialStep(double initial); + public void reset(); +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java new file mode 100644 index 00000000..4b354fd9 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java @@ -0,0 +1,33 @@ +package optimization.linesearch; + +/** + * Non newtwon since we don't always try 1... + * Not sure if that is even usefull for newton + * @author javg + * + */ +public class NonNewtonInterpolationPickFirstStep extends GenericPickFirstStep{ + public NonNewtonInterpolationPickFirstStep(double initValue) { + super(initValue); + } + + public double getFirstStep(LineSearchMethod ls){ +// System.out.println("Previous step used " + ls.getPreviousStepUsed()); +// System.out.println("PreviousGradinebt " + ls.getPreviousInitialGradient()); +// System.out.println("CurrentGradinebt " + ls.getInitialGradient()); + if(ls.getPreviousStepUsed() != -1 && ls.getPreviousInitialGradient()!=0){ + double newStep = 1.01*ls.getPreviousInitialGradient()*ls.getPreviousStepUsed()/ls.getInitialGradient(); + //System.out.println("Suggesting " + newStep); + return newStep; + + } + return _initValue; + } + public void collectInitValues(WolfRuleLineSearch ls){ + + } + + public void collectFinalValues(WolfRuleLineSearch ls){ + + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java new file mode 100644 index 00000000..29ccbc32 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java @@ -0,0 +1,137 @@ +package optimization.linesearch; + +import optimization.gradientBasedMethods.Objective; +import optimization.gradientBasedMethods.ProjectedObjective; +import optimization.util.MathUtils; +import optimization.util.MatrixOutput; + + +/** + * See ArmijoLineSearchMinimizationAlongProjectionArc for description + * @author javg + * + */ +public class ProjectedDifferentiableLineSearchObjective extends DifferentiableLineSearchObjective{ + + + + ProjectedObjective obj; + public ProjectedDifferentiableLineSearchObjective(Objective o) { + super(o); + if(!(o instanceof ProjectedObjective)){ + System.out.println("Must receive a projected objective"); + throw new RuntimeException(); + } + obj = (ProjectedObjective) o; + } + + + + public double[] projectPoint (double[] point){ + return ((ProjectedObjective)o).projectPoint(point); + } + public void updateAlpha(double alpha){ + if(alpha < 0){ + System.out.println("alpha may not be smaller that zero"); + throw new RuntimeException(); + } + + if(obj.auxParameters == null){ + obj.auxParameters = new double[obj.getParameters().length]; + } + + nrIterations++; + + steps.add(alpha); + System.arraycopy(originalParameters, 0, obj.auxParameters, 0, obj.auxParameters.length); + + //Take a step into the search direction + +// MatrixOutput.printDoubleArray(obj.getGradient(), "gradient"); + +// alpha=gradients.get(0)*alpha/(gradients.get(gradients.size()-1)); + + //x_t+1 = x_t - alpha*gradient = x_t + alpha*direction + MathUtils.plusEquals(obj.auxParameters, searchDirection, alpha); +// MatrixOutput.printDoubleArray(obj.auxParameters, "before projection"); + obj.auxParameters = projectPoint(obj.auxParameters); +// MatrixOutput.printDoubleArray(obj.auxParameters, "after projection"); + o.setParameters(obj.auxParameters); +// System.out.println("new parameters"); +// o.printParameters(); + values.add(o.getValue()); + //Computes the new gradient x_k-[x_k-alpha*Gradient(x_k)]+ + MathUtils.minusEqualsInverse(originalParameters,obj.auxParameters,1); +// MatrixOutput.printDoubleArray(obj.auxParameters, "new gradient"); + //Dot product between the new direction and the new gradient + double gradient = MathUtils.dotProduct(obj.auxParameters,searchDirection); + gradients.add(gradient); + if(gradient > 0){ + System.out.println("Gradient on line search has to be smaller than zero"); + System.out.println("Iter: " + nrIterations); + MatrixOutput.printDoubleArray(obj.auxParameters, "new direction"); + MatrixOutput.printDoubleArray(searchDirection, "search direction"); + throw new RuntimeException(); + + } + + } + + /** + * + */ +// public void updateAlpha(double alpha){ +// +// if(alpha < 0){ +// System.out.println("alpha may not be smaller that zero"); +// throw new RuntimeException(); +// } +// +// nrIterations++; +// steps.add(alpha); +// //x_t+1 = x_t - alpha*direction +// System.arraycopy(originalParameters, 0, parametersChange, 0, parametersChange.length); +//// MatrixOutput.printDoubleArray(parametersChange, "parameters before step"); +//// System.out.println("Step" + alpha); +// MatrixOutput.printDoubleArray(originalGradient, "gradient + " + alpha); +// +// MathUtils.minusEquals(parametersChange, originalGradient, alpha); +// +// //Project the points into the feasibility set +//// MatrixOutput.printDoubleArray(parametersChange, "before projection"); +// //x_k(alpha) = [x_k - alpha*grad f(x_k)]+ +// parametersChange = projectPoint(parametersChange); +//// MatrixOutput.printDoubleArray(parametersChange, "after projection"); +// o.setParameters(parametersChange); +// values.add(o.getValue()); +// //Computes the new direction x_k-[x_k-alpha*Gradient(x_k)]+ +// +// direction=MathUtils.arrayMinus(parametersChange,originalParameters); +//// MatrixOutput.printDoubleArray(direction, "new direction"); +// +// double gradient = MathUtils.dotProduct(originalGradient,direction); +// gradients.add(gradient); +// if(gradient > 1E-10){ +// System.out.println("cosine " + gradient/(MathUtils.L2Norm(originalGradient)*MathUtils.L2Norm(direction))); +// +// +// System.out.println("not a descent direction for alpha " + alpha); +// System.arraycopy(originalParameters, 0, parametersChange, 0, parametersChange.length); +// MathUtils.minusEquals(parametersChange, originalGradient, 1E-20); +// +// parametersChange = projectPoint(parametersChange); +// direction=MathUtils.arrayMinus(parametersChange,originalParameters); +// gradient = MathUtils.dotProduct(originalGradient,direction); +// if(gradient > 0){ +// System.out.println("Direction is really non-descent evern for small alphas:" + gradient); +// } +// System.out.println("ProjecteLineSearchObjective: Should be a descent direction at " + nrIterations + ": "+ gradient); +//// System.out.println(Printing.doubleArrayToString(originalGradient, null,"Original gradient")); +//// System.out.println(Printing.doubleArrayToString(originalParameters, null,"Original parameters")); +//// System.out.println(Printing.doubleArrayToString(parametersChange, null,"Projected parameters")); +//// System.out.println(Printing.doubleArrayToString(direction, null,"Direction")); +// throw new RuntimeException(); +// } +// } + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java new file mode 100644 index 00000000..5489f2d0 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java @@ -0,0 +1,300 @@ +package optimization.linesearch; + +import java.io.PrintStream; +import java.util.ArrayList; + +import optimization.util.Interpolation; + + + + +/** + * + * @author javg + * + */ +public class WolfRuleLineSearch implements LineSearchMethod{ + + GenericPickFirstStep pickFirstStep; + + double c1 = 1.0E-4; + double c2 = 0.9; + + //Application dependent + double maxStep=100; + + int extrapolationIteration; + int maxExtrapolationIteration = 1000; + + + double minZoomDiffTresh = 10E-10; + + + ArrayList<Double> steps; + ArrayList<Double> gradientDots; + ArrayList<Double> functionVals; + + int debugLevel = 0; + boolean foudStep = false; + + public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep){ + this.pickFirstStep = pickFirstStep; + + } + + + + + public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep, double c1, double c2){ + this.pickFirstStep = pickFirstStep; + initialStep = pickFirstStep.getFirstStep(this); + this.c1 = c1; + this.c2 = c2; + } + + public void setDebugLevel(int level){ + debugLevel = level; + } + + //Experiment + double previousStepPicked = -1;; + double previousInitGradientDot = -1; + double currentInitGradientDot = -1; + + double initialStep; + + + public void reset(){ + previousStepPicked = -1;; + previousInitGradientDot = -1; + currentInitGradientDot = -1; + if(steps != null) + steps.clear(); + if(gradientDots != null) + gradientDots.clear(); + if(functionVals != null) + functionVals.clear(); + } + + public void setInitialStep(double initial){ + initialStep = pickFirstStep.getFirstStep(this); + } + + + + /** + * Implements Wolf Line search as described in nocetal. + * This process consists in two stages. The first stage we try to satisfy the + * biggest step size that still satisfies the curvature condition. We keep increasing + * the initial step size until we find a step satisfying the curvature condition, we return + * success, we failed the sufficient increase so we cannot increase more and we can call zoom with + * that maximum step, or we pass the minimum in which case we can call zoom the same way. + * + */ + public double getStepSize(DifferentiableLineSearchObjective objective){ + //System.out.println("entering line search"); + + foudStep = false; + if(debugLevel >= 1){ + steps = new ArrayList<Double>(); + gradientDots = new ArrayList<Double>(); + functionVals =new ArrayList<Double>(); + } + + //test + currentInitGradientDot = objective.getInitialGradient(); + + + double previousValue = objective.getCurrentValue(); + double previousStep = 0; + double currentStep =pickFirstStep.getFirstStep(this); + for(extrapolationIteration = 0; + extrapolationIteration < maxExtrapolationIteration; extrapolationIteration++){ + + objective.updateAlpha(currentStep); + double currentValue = objective.getCurrentValue(); + if(debugLevel >= 1){ + steps.add(currentStep); + functionVals.add(currentValue); + gradientDots.add(objective.getCurrentGradient()); + } + + + //The current step does not satisfy the sufficient decrease condition anymore + // so we cannot get bigger than that calling zoom. + if(!WolfeConditions.suficientDecrease(objective,c1)|| + (extrapolationIteration > 0 && currentValue >= previousValue)){ + currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations); + break; + } + + //Satisfying both conditions ready to leave + if(WolfeConditions.sufficientCurvature(objective,c1,c2)){ + //Found step + foudStep = true; + break; + } + + /** + * This means that we passed the minimum already since the dot product that should be + * negative (descent direction) is now positive. So we cannot increase more. On the other hand + * since we know the direction is a descent direction the value the objective at the current step + * is for sure smaller than the preivous step so we change the order. + */ + if(objective.getCurrentGradient() >= 0){ + currentStep = zoom(objective,currentStep,previousStep,objective.nrIterations,objective.nrIterations-1); + break; + } + + + //Ok, so we can still get a bigger step, + double aux = currentStep; + //currentStep = currentStep*2; + if(Math.abs(currentStep-maxStep)>1.1e-2){ + currentStep = (currentStep+maxStep)/2; + }else{ + currentStep = currentStep*2; + } + previousStep = aux; + previousValue = currentValue; + //Could be done better + if(currentStep >= maxStep){ + System.out.println("Excedded max step...calling zoom with maxStepSize"); + currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations); + } + } + if(!foudStep){ + System.out.println("Wolfe Rule exceed number of iterations"); + if(debugLevel >= 1){ + printSmallWolfeStats(System.out); +// System.out.println("Line search values"); +// DebugHelpers.getLineSearchGraph(o, direction, originalParameters,origValue, origGradDirectionDot,c1,c2); + } + return -1; + } + if(debugLevel >= 1){ + printSmallWolfeStats(System.out); + } + + previousStepPicked = currentStep; + previousInitGradientDot = currentInitGradientDot; +// objective.printLineSearchSteps(); + return currentStep; + } + + + + + + public void printWolfeStats(PrintStream out){ + for(int i = 0; i < steps.size(); i++){ + out.println("Step " + steps.get(i) + " value " + functionVals.get(i) + " dot " + gradientDots.get(i)); + } + } + + public void printSmallWolfeStats(PrintStream out){ + for(int i = 0; i < steps.size(); i++){ + out.print(steps.get(i) + ":"+functionVals.get(i)+":"+gradientDots.get(i)+" "); + } + System.out.println(); + } + + + + /** + * Pick a step satisfying the strong wolfe condition from an given from lowerStep and higherStep + * picked on the routine above. + * + * Both lowerStep and higherStep have been evaluated, so we only need to pass the iteration where they have + * been evaluated and save extra evaluations. + * + * We know that lowerStepValue as to be smaller than higherStepValue, and that a point + * satisfying both conditions exists in such interval. + * + * LowerStep always satisfies at least the sufficient decrease + * @return + */ + public double zoom(DifferentiableLineSearchObjective o, double lowerStep, double higherStep, + int lowerStepIter, int higherStepIter){ + + if(debugLevel >=2){ + System.out.println("Entering zoom with " + lowerStep+"-"+higherStep); + } + + double currentStep=-1; + + int zoomIter = 0; + while(zoomIter < 1000){ + if(Math.abs(lowerStep-higherStep) < minZoomDiffTresh){ + o.updateAlpha(lowerStep); + if(debugLevel >= 1){ + steps.add(lowerStep); + functionVals.add(o.getCurrentValue()); + gradientDots.add(o.getCurrentGradient()); + } + foudStep = true; + return lowerStep; + } + + //Cubic interpolation + currentStep = + Interpolation.cubicInterpolation(lowerStep, o.getValue(lowerStepIter), o.getGradient(lowerStepIter), + higherStep, o.getValue(higherStepIter), o.getGradient(higherStepIter)); + + //Safeguard.... should not be required check in what condtions it is required + if(currentStep < 0 ){ + currentStep = (lowerStep+higherStep)/2; + } + if(Double.isNaN(currentStep) || Double.isInfinite(currentStep)){ + currentStep = (lowerStep+higherStep)/2; + } +// currentStep = (lowerStep+higherStep)/2; +// System.out.println("Trying "+currentStep); + o.updateAlpha(currentStep); + if(debugLevel >=1){ + steps.add(currentStep); + functionVals.add(o.getCurrentValue()); + gradientDots.add(o.getCurrentGradient()); + } + if(!WolfeConditions.suficientDecrease(o,c1) + || o.getCurrentValue() >= o.getValue(lowerStepIter)){ + higherStepIter = o.nrIterations; + higherStep = currentStep; + } + //Note when entering here the new step satisfies the sufficent decrease and + // or as a function value that is better than the previous best (lowerStepFunctionValues) + // so we either leave or change the value of the alpha low. + else{ + if(WolfeConditions.sufficientCurvature(o,c1,c2)){ + //Satisfies the both wolf conditions + foudStep = true; + break; + } + //If does not satisfy curvature + if(o.getCurrentGradient()*(higherStep-lowerStep) >= 0){ + higherStep = lowerStep; + higherStepIter = lowerStepIter; + } + lowerStep = currentStep; + lowerStepIter = o.nrIterations; + } + zoomIter++; + } + return currentStep; + } + + public double getInitialGradient() { + return currentInitGradientDot; + + } + + public double getPreviousInitialGradient() { + return previousInitGradientDot; + } + + public double getPreviousStepUsed() { + return previousStepPicked; + } + + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java new file mode 100644 index 00000000..dcc704eb --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java @@ -0,0 +1,45 @@ +package optimization.linesearch; + + +public class WolfeConditions { + + /** + * Sufficient Increase number. Default constant + */ + + + /** + * Value for suficient curvature: + * 0.9 - For newton and quase netwon methods + * 0.1 - Non linear conhugate gradient + */ + + int debugLevel = 0; + public void setDebugLevel(int level){ + debugLevel = level; + } + + public static boolean suficientDecrease(DifferentiableLineSearchObjective o, double c1){ + double value = o.getOriginalValue()+c1*o.getAlpha()*o.getInitialGradient(); +// System.out.println("Sufficient Decrease original "+value+" new "+ o.getCurrentValue()); + return o.getCurrentValue() <= value; + } + + + + + public static boolean sufficientCurvature(DifferentiableLineSearchObjective o, double c1, double c2){ +// if(debugLevel >= 2){ +// double current = Math.abs(o.getCurrentGradient()); +// double orig = -c2*o.getInitialGradient(); +// if(current <= orig){ +// return true; +// }else{ +// System.out.println("Not satistfying curvature condition curvature " + current + " wants " + orig); +// return false; +// } +// } + return Math.abs(o.getCurrentGradient()) <= -c2*o.getInitialGradient(); + } + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java new file mode 100644 index 00000000..0429d531 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java @@ -0,0 +1,104 @@ +package optimization.projections; + + +import java.util.Random; + +import optimization.util.MathUtils; +import optimization.util.MatrixOutput; + +/** + * Implements a projection into a box set defined by a and b. + * If either a or b are infinity then that bound is ignored. + * @author javg + * + */ +public class BoundsProjection extends Projection{ + + double a,b; + boolean ignoreA = false; + boolean ignoreB = false; + public BoundsProjection(double lowerBound, double upperBound) { + if(Double.isInfinite(lowerBound)){ + this.ignoreA = true; + }else{ + this.a =lowerBound; + } + if(Double.isInfinite(upperBound)){ + this.ignoreB = true; + }else{ + this.b =upperBound; + } + } + + + + /** + * Projects into the bounds + * a <= x_i <=b + */ + public void project(double[] original){ + for (int i = 0; i < original.length; i++) { + if(!ignoreA && original[i] < a){ + original[i] = a; + }else if(!ignoreB && original[i]>b){ + original[i]=b; + } + } + } + + /** + * Generates a random number between a and b. + */ + + Random r = new Random(); + + public double[] samplePoint(int numParams) { + double[] point = new double[numParams]; + for (int i = 0; i < point.length; i++) { + double rand = r.nextDouble(); + if(ignoreA && ignoreB){ + //Use const to avoid number near overflow + point[i] = rand*(1.E100+1.E100)-1.E100; + }else if(ignoreA){ + point[i] = rand*(b-1.E100)-1.E100; + }else if(ignoreB){ + point[i] = rand*(1.E100-a)-a; + }else{ + point[i] = rand*(b-a)-a; + } + } + return point; + } + + public static void main(String[] args) { + BoundsProjection sp = new BoundsProjection(0,Double.POSITIVE_INFINITY); + + + MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 1"); + MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 2"); + MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 3"); + + double[] d = {-1.1,1.2,1.4}; + double[] original = d.clone(); + MatrixOutput.printDoubleArray(d, "before"); + + sp.project(d); + MatrixOutput.printDoubleArray(d, "after"); + System.out.println("Test projection: " + sp.testProjection(original, d)); + } + + double epsilon = 1.E-10; + public double[] perturbePoint(double[] point, int parameter){ + double[] newPoint = point.clone(); + if(!ignoreA && MathUtils.almost(point[parameter], a)){ + newPoint[parameter]+=epsilon; + }else if(!ignoreB && MathUtils.almost(point[parameter], b)){ + newPoint[parameter]-=epsilon; + }else{ + newPoint[parameter]-=epsilon; + } + return newPoint; + } + + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java new file mode 100644 index 00000000..b5a9f92f --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java @@ -0,0 +1,72 @@ +package optimization.projections; + +import optimization.util.MathUtils; +import optimization.util.MatrixOutput; +import util.ArrayMath; +import util.Printing; + + + +public abstract class Projection { + + + public abstract void project(double[] original); + + + /** + * From the projection theorem "Non-Linear Programming" page + * 201 fact 2. + * + * Given some z in R, and a vector x* in X; + * x* = z+ iif for all x in X + * (z-x*)'(x-x*) <= 0 where 0 is when x*=x + * See figure 2.16 in book + * + * @param original + * @param projected + * @return + */ + public boolean testProjection(double[] original, double[] projected){ + double[] original1 = original.clone(); + //System.out.println(Printing.doubleArrayToString(original1, null, "original")); + //System.out.println(Printing.doubleArrayToString(projected, null, "projected")); + MathUtils.minusEquals(original1, projected, 1); + //System.out.println(Printing.doubleArrayToString(original1, null, "minus1")); + for(int i = 0; i < 10; i++){ + double[] x = samplePoint(original.length); + // System.out.println(Printing.doubleArrayToString(x, null, "sample")); + //If the same this returns zero so we are there. + MathUtils.minusEquals(x, projected, 1); + // System.out.println(Printing.doubleArrayToString(x, null, "minus2")); + double dotProd = MathUtils.dotProduct(original1, x); + + // System.out.println("dot " + dotProd); + if(dotProd > 0) return false; + } + + //Perturbs the point a bit in all possible directions + for(int i = 0; i < original.length; i++){ + double[] x = perturbePoint(projected,i); + // System.out.println(Printing.doubleArrayToString(x, null, "perturbed")); + //If the same this returns zero so we are there. + MathUtils.minusEquals(x, projected, 1); + // System.out.println(Printing.doubleArrayToString(x, null, "minus2")); + double dotProd = MathUtils.dotProduct(original1, x); + + // System.out.println("dot " + dotProd); + if(dotProd > 0) return false; + } + + + + return true; + } + + //Samples a point from the constrained set + public abstract double[] samplePoint(int dimensions); + + //Perturbs a point a bit still leaving it at the constraints set + public abstract double[] perturbePoint(double[] point, int parameter); + + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java new file mode 100644 index 00000000..eec11bcf --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java @@ -0,0 +1,127 @@ +package optimization.projections; + + + +import java.util.Random; + +import optimization.util.MathUtils; +import optimization.util.MatrixOutput; + +public class SimplexProjection extends Projection{ + + double scale; + public SimplexProjection(double scale) { + this.scale = scale; + } + + /** + * projects the numbers of the array + * into a simplex of size. + * We follow the description of the paper + * "Efficient Projetions onto the l1-Ball + * for learning in high dimensions" + */ + public void project(double[] original){ + double[] ds = new double[original.length]; + System.arraycopy(original, 0, ds, 0, ds.length); + //If sum is smaller then zero then its ok + for (int i = 0; i < ds.length; i++) ds[i] = ds[i]>0? ds[i]:0; + double sum = MathUtils.sum(ds); + if (scale - sum >= -1.E-10 ){ + System.arraycopy(ds, 0, original, 0, ds.length); + //System.out.println("Not projecting"); + return; + } + //System.out.println("projecting " + sum + " scontraints " + scale); + util.Array.sortDescending(ds); + double currentSum = 0; + double previousTheta = 0; + double theta = 0; + for (int i = 0; i < ds.length; i++) { + currentSum+=ds[i]; + theta = (currentSum-scale)/(i+1); + if(ds[i]-theta <= 0){ + break; + } + previousTheta = theta; + } + //DEBUG + if(previousTheta < 0){ + System.out.println("Simple Projection: Theta is smaller than zero: " + previousTheta); + System.exit(-1); + } + for (int i = 0; i < original.length; i++) { + original[i] = Math.max(original[i]-previousTheta, 0); + } + } + + + + + + + /** + * Samples a point from the simplex of scale. Just sample + * random number from 0-scale and then if + * their sum is bigger then sum make them normalize. + * This is probably not sampling uniformly from the simplex but it is + * enough for our goals in here. + */ + Random r = new Random(); + public double[] samplePoint(int dimensions) { + double[] newPoint = new double[dimensions]; + double sum =0; + for (int i = 0; i < newPoint.length; i++) { + double rand = r.nextDouble()*scale; + sum+=rand; + newPoint[i]=rand; + } + //Normalize + if(sum > scale){ + for (int i = 0; i < newPoint.length; i++) { + newPoint[i]=scale*newPoint[i]/sum; + } + } + return newPoint; + } + + public static void main(String[] args) { + SimplexProjection sp = new SimplexProjection(1); + + + double[] point = sp.samplePoint(3); + MatrixOutput.printDoubleArray(point , "random 1 sum:" + MathUtils.sum(point)); + point = sp.samplePoint(3); + MatrixOutput.printDoubleArray(point , "random 2 sum:" + MathUtils.sum(point)); + point = sp.samplePoint(3); + MatrixOutput.printDoubleArray(point , "random 3 sum:" + MathUtils.sum(point)); + + double[] d = {0,1.1,-10}; + double[] original = d.clone(); + MatrixOutput.printDoubleArray(d, "before"); + + sp.project(d); + MatrixOutput.printDoubleArray(d, "after"); + System.out.println("Test projection: " + sp.testProjection(original, d)); + + } + + + double epsilon = 1.E-10; + public double[] perturbePoint(double[] point, int parameter){ + double[] newPoint = point.clone(); + if(MathUtils.almost(MathUtils.sum(point), scale)){ + newPoint[parameter]-=epsilon; + } + else if(point[parameter]==0){ + newPoint[parameter]+=epsilon; + }else if(MathUtils.almost(point[parameter], scale)){ + newPoint[parameter]-=epsilon; + } + else{ + newPoint[parameter]-=epsilon; + } + return newPoint; + } + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java new file mode 100644 index 00000000..15760f18 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java @@ -0,0 +1,33 @@ +package optimization.stopCriteria; + +import java.util.ArrayList; + +import optimization.gradientBasedMethods.Objective; + +public class CompositeStopingCriteria implements StopingCriteria { + + ArrayList<StopingCriteria> criterias; + + public CompositeStopingCriteria() { + criterias = new ArrayList<StopingCriteria>(); + } + + public void add(StopingCriteria criteria){ + criterias.add(criteria); + } + + public boolean stopOptimization(Objective obj){ + for(StopingCriteria criteria: criterias){ + if(criteria.stopOptimization(obj)){ + return true; + } + } + return false; + } + + public void reset(){ + for(StopingCriteria criteria: criterias){ + criteria.reset(); + } + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java new file mode 100644 index 00000000..534ff833 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java @@ -0,0 +1,30 @@ +package optimization.stopCriteria; + +import optimization.gradientBasedMethods.Objective; +import optimization.util.MathUtils; + +public class GradientL2Norm implements StopingCriteria{ + + /** + * Stop if gradientNorm/(originalGradientNorm) smaller + * than gradientConvergenceValue + */ + protected double gradientConvergenceValue; + + + public GradientL2Norm(double gradientConvergenceValue){ + this.gradientConvergenceValue = gradientConvergenceValue; + } + + public void reset(){} + + public boolean stopOptimization(Objective obj){ + double norm = MathUtils.L2Norm(obj.gradient); + if(norm < gradientConvergenceValue){ + System.out.println("Gradient norm below treshold"); + return true; + } + return false; + + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java new file mode 100644 index 00000000..4a489641 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java @@ -0,0 +1,48 @@ +package optimization.stopCriteria; + +import optimization.gradientBasedMethods.Objective; +import optimization.gradientBasedMethods.ProjectedObjective; +import optimization.util.MathUtils; + +/** + * Divides the norm by the norm at the begining of the iteration + * @author javg + * + */ +public class NormalizedGradientL2Norm extends GradientL2Norm{ + + /** + * Stop if gradientNorm/(originalGradientNorm) smaller + * than gradientConvergenceValue + */ + protected double originalGradientNorm = -1; + + public void reset(){ + originalGradientNorm = -1; + } + public NormalizedGradientL2Norm(double gradientConvergenceValue){ + super(gradientConvergenceValue); + } + + + + + public boolean stopOptimization(Objective obj){ + double norm = MathUtils.L2Norm(obj.gradient); + if(originalGradientNorm == -1){ + originalGradientNorm = norm; + } + if(originalGradientNorm < 1E-10){ + System.out.println("Gradient norm is zero " + originalGradientNorm); + return true; + } + double normalizedNorm = 1.0*norm/originalGradientNorm; + if( normalizedNorm < gradientConvergenceValue){ + System.out.println("Gradient norm below normalized normtreshold: " + norm + " original: " + originalGradientNorm + " normalized norm: " + normalizedNorm); + return true; + }else{ +// System.out.println("projected gradient norm: " + norm); + return false; + } + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java new file mode 100644 index 00000000..5ae554c2 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java @@ -0,0 +1,60 @@ +package optimization.stopCriteria; + +import optimization.gradientBasedMethods.Objective; +import optimization.gradientBasedMethods.ProjectedObjective; +import optimization.util.MathUtils; + +/** + * Divides the norm by the norm at the begining of the iteration + * @author javg + * + */ +public class NormalizedProjectedGradientL2Norm extends ProjectedGradientL2Norm{ + + /** + * Stop if gradientNorm/(originalGradientNorm) smaller + * than gradientConvergenceValue + */ + double originalProjectedNorm = -1; + + public NormalizedProjectedGradientL2Norm(double gradientConvergenceValue){ + super(gradientConvergenceValue); + } + + public void reset(){ + originalProjectedNorm = -1; + } + + + double[] projectGradient(ProjectedObjective obj){ + + if(obj.auxParameters == null){ + obj.auxParameters = new double[obj.getNumParameters()]; + } + System.arraycopy(obj.getParameters(), 0, obj.auxParameters, 0, obj.getNumParameters()); + MathUtils.minusEquals(obj.auxParameters, obj.gradient, 1); + obj.auxParameters = obj.projectPoint(obj.auxParameters); + MathUtils.minusEquals(obj.auxParameters,obj.getParameters(),1); + return obj.auxParameters; + } + + public boolean stopOptimization(Objective obj){ + if(obj instanceof ProjectedObjective) { + ProjectedObjective o = (ProjectedObjective) obj; + double norm = MathUtils.L2Norm(projectGradient(o)); + if(originalProjectedNorm == -1){ + originalProjectedNorm = norm; + } + double normalizedNorm = 1.0*norm/originalProjectedNorm; + if( normalizedNorm < gradientConvergenceValue){ + System.out.println("Gradient norm below normalized normtreshold: " + norm + " original: " + originalProjectedNorm + " normalized norm: " + normalizedNorm); + return true; + }else{ +// System.out.println("projected gradient norm: " + norm); + return false; + } + } + System.out.println("Not a projected objective"); + throw new RuntimeException(); + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java new file mode 100644 index 00000000..6dbbc50d --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java @@ -0,0 +1,54 @@ +package optimization.stopCriteria; + +import optimization.gradientBasedMethods.Objective; +import optimization.util.MathUtils; + +public class NormalizedValueDifference implements StopingCriteria{ + + /** + * Stop if the different between values is smaller than a treshold + */ + protected double valueConvergenceValue=0.01; + protected double previousValue = Double.NaN; + protected double currentValue = Double.NaN; + + public NormalizedValueDifference(double valueConvergenceValue){ + this.valueConvergenceValue = valueConvergenceValue; + } + + public void reset(){ + previousValue = Double.NaN; + currentValue = Double.NaN; + } + + + public boolean stopOptimization(Objective obj){ + if(Double.isNaN(currentValue)){ + currentValue = obj.getValue(); + return false; + }else { + previousValue = currentValue; + currentValue = obj.getValue(); + if(previousValue != 0){ + double valueDiff = Math.abs(previousValue - currentValue)/Math.abs(previousValue); + if( valueDiff < valueConvergenceValue){ + System.out.println("Leaving different in values is to small: Prev " + + (previousValue/previousValue) + " Curr: " + (currentValue/previousValue) + + " diff: " + valueDiff); + return true; + } + }else{ + double valueDiff = Math.abs(previousValue - currentValue); + if( valueDiff < valueConvergenceValue){ + System.out.println("Leaving different in values is to small: Prev " + + (previousValue) + " Curr: " + (currentValue) + + " diff: " + valueDiff); + return true; + } + } + + return false; + } + + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java new file mode 100644 index 00000000..aadf1fd5 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java @@ -0,0 +1,51 @@ +package optimization.stopCriteria; + +import optimization.gradientBasedMethods.Objective; +import optimization.gradientBasedMethods.ProjectedObjective; +import optimization.util.MathUtils; + +public class ProjectedGradientL2Norm implements StopingCriteria{ + + /** + * Stop if gradientNorm/(originalGradientNorm) smaller + * than gradientConvergenceValue + */ + protected double gradientConvergenceValue; + + + public ProjectedGradientL2Norm(double gradientConvergenceValue){ + this.gradientConvergenceValue = gradientConvergenceValue; + } + + public void reset(){ + + } + + double[] projectGradient(ProjectedObjective obj){ + + if(obj.auxParameters == null){ + obj.auxParameters = new double[obj.getNumParameters()]; + } + System.arraycopy(obj.getParameters(), 0, obj.auxParameters, 0, obj.getNumParameters()); + MathUtils.minusEquals(obj.auxParameters, obj.gradient, 1); + obj.auxParameters = obj.projectPoint(obj.auxParameters); + MathUtils.minusEquals(obj.auxParameters,obj.getParameters(),1); + return obj.auxParameters; + } + + public boolean stopOptimization(Objective obj){ + if(obj instanceof ProjectedObjective) { + ProjectedObjective o = (ProjectedObjective) obj; + double norm = MathUtils.L2Norm(projectGradient(o)); + if(norm < gradientConvergenceValue){ + // System.out.println("Gradient norm below treshold: " + norm); + return true; + }else{ +// System.out.println("projected gradient norm: " + norm); + return false; + } + } + System.out.println("Not a projected objective"); + throw new RuntimeException(); + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java new file mode 100644 index 00000000..10cf0522 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java @@ -0,0 +1,8 @@ +package optimization.stopCriteria; + +import optimization.gradientBasedMethods.Objective; + +public interface StopingCriteria { + public boolean stopOptimization(Objective obj); + public void reset(); +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java new file mode 100644 index 00000000..e5d07229 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java @@ -0,0 +1,41 @@ +package optimization.stopCriteria; + +import optimization.gradientBasedMethods.Objective; +import optimization.util.MathUtils; + +public class ValueDifference implements StopingCriteria{ + + /** + * Stop if the different between values is smaller than a treshold + */ + protected double valueConvergenceValue=0.01; + protected double previousValue = Double.NaN; + protected double currentValue = Double.NaN; + + public ValueDifference(double valueConvergenceValue){ + this.valueConvergenceValue = valueConvergenceValue; + } + + public void reset(){ + previousValue = Double.NaN; + currentValue = Double.NaN; + } + + public boolean stopOptimization(Objective obj){ + if(Double.isNaN(currentValue)){ + currentValue = obj.getValue(); + return false; + }else { + previousValue = currentValue; + currentValue = obj.getValue(); + if(previousValue - currentValue < valueConvergenceValue){ +// System.out.println("Leaving different in values is to small: Prev " +// + previousValue + " Curr: " + currentValue +// + " diff: " + (previousValue - currentValue)); + return true; + } + return false; + } + + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java b/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java new file mode 100644 index 00000000..cdbdefc6 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java @@ -0,0 +1,37 @@ +package optimization.util; + +public class Interpolation { + + /** + * Fits a cubic polinomyal to a function given two points, + * such that either gradB is bigger than zero or funcB >= funcA + * + * NonLinear Programming appendix C + * @param funcA + * @param gradA + * @param funcB + * @param gradB + */ + public final static double cubicInterpolation(double a, + double funcA, double gradA, double b,double funcB, double gradB ){ + if(gradB < 0 && funcA > funcB){ + System.out.println("Cannot call cubic interpolation"); + return -1; + } + + double z = 3*(funcA-funcB)/(b-a) + gradA + gradB; + double w = Math.sqrt(z*z - gradA*gradB); + double min = b -(gradB+w-z)*(b-a)/(gradB-gradA+2*w); + return min; + } + + public final static double quadraticInterpolation(double initFValue, + double initGrad, double point,double pointFValue){ + double min = -1*initGrad*point*point/(2*(pointFValue-initGrad*point-initFValue)); + return min; + } + + public static void main(String[] args) { + + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java b/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java new file mode 100644 index 00000000..5343a39b --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java @@ -0,0 +1,7 @@ +package optimization.util; + +public class Logger { + + + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java b/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java new file mode 100644 index 00000000..af66f82c --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java @@ -0,0 +1,339 @@ +package optimization.util; + +import java.util.Arrays; + + + +public class MathUtils { + + /** + * + * @param vector + * @return + */ + public static double L2Norm(double[] vector){ + double value = 0; + for(int i = 0; i < vector.length; i++){ + double v = vector[i]; + value+=v*v; + } + return Math.sqrt(value); + } + + public static double sum(double[] v){ + double sum = 0; + for (int i = 0; i < v.length; i++) { + sum+=v[i]; + } + return sum; + } + + + + + /** + * w = w + v + * @param w + * @param v + */ + public static void plusEquals(double[] w, double[] v) { + for(int i=0; i<w.length;i++){ + w[i] += w[i] + v[i]; + } + } + + /** + * w[i] = w[i] + v + * @param w + * @param v + */ + public static void plusEquals(double[] w, double v) { + for(int i=0; i<w.length;i++){ + w[i] += w[i] + v; + } + } + + /** + * w[i] = w[i] - v + * @param w + * @param v + */ + public static void minusEquals(double[] w, double v) { + for(int i=0; i<w.length;i++){ + w[i] -= w[i] + v; + } + } + + /** + * w = w + a*v + * @param w + * @param v + * @param a + */ + public static void plusEquals(double[] w, double[] v, double a) { + for(int i=0; i<w.length;i++){ + w[i] += a*v[i]; + } + } + + /** + * w = w - a*v + * @param w + * @param v + * @param a + */ + public static void minusEquals(double[] w, double[] v, double a) { + for(int i=0; i<w.length;i++){ + w[i] -= a*v[i]; + } + } + /** + * v = w - a*v + * @param w + * @param v + * @param a + */ + public static void minusEqualsInverse(double[] w, double[] v, double a) { + for(int i=0; i<w.length;i++){ + v[i] = w[i] - a*v[i]; + } + } + + public static double dotProduct(double[] w, double[] v){ + double accum = 0; + for(int i=0; i<w.length;i++){ + accum += w[i]*v[i]; + } + return accum; + } + + public static double[] arrayMinus(double[]w, double[]v){ + double result[] = w.clone(); + for(int i=0; i<w.length;i++){ + result[i] -= v[i]; + } + return result; + } + + public static double[] arrayMinus(double[] result , double[]w, double[]v){ + for(int i=0; i<w.length;i++){ + result[i] = w[i]-v[i]; + } + return result; + } + + public static double[] negation(double[]w){ + double result[] = new double[w.length]; + for(int i=0; i<w.length;i++){ + result[i] = -w[i]; + } + return result; + } + + public static double square(double value){ + return value*value; + } + public static double[][] outerProduct(double[] w, double[] v){ + double[][] result = new double[w.length][v.length]; + for(int i = 0; i < w.length; i++){ + for(int j = 0; j < v.length; j++){ + result[i][j] = w[i]*v[j]; + } + } + return result; + } + /** + * results = a*W*V + * @param w + * @param v + * @param a + * @return + */ + public static double[][] weightedouterProduct(double[] w, double[] v, double a){ + double[][] result = new double[w.length][v.length]; + for(int i = 0; i < w.length; i++){ + for(int j = 0; j < v.length; j++){ + result[i][j] = a*w[i]*v[j]; + } + } + return result; + } + + public static double[][] identity(int size){ + double[][] result = new double[size][size]; + for(int i = 0; i < size; i++){ + result[i][i] = 1; + } + return result; + } + + /** + * v -= w + * @param v + * @param w + */ + public static void minusEquals(double[][] w, double[][] v){ + for(int i = 0; i < w.length; i++){ + for(int j = 0; j < w[0].length; j++){ + w[i][j] -= v[i][j]; + } + } + } + + /** + * v[i][j] -= a*w[i][j] + * @param v + * @param w + */ + public static void minusEquals(double[][] w, double[][] v, double a){ + for(int i = 0; i < w.length; i++){ + for(int j = 0; j < w[0].length; j++){ + w[i][j] -= a*v[i][j]; + } + } + } + + /** + * v += w + * @param v + * @param w + */ + public static void plusEquals(double[][] w, double[][] v){ + for(int i = 0; i < w.length; i++){ + for(int j = 0; j < w[0].length; j++){ + w[i][j] += v[i][j]; + } + } + } + + /** + * v[i][j] += a*w[i][j] + * @param v + * @param w + */ + public static void plusEquals(double[][] w, double[][] v, double a){ + for(int i = 0; i < w.length; i++){ + for(int j = 0; j < w[0].length; j++){ + w[i][j] += a*v[i][j]; + } + } + } + + + /** + * results = w*v + * @param w + * @param v + * @return + */ + public static double[][] matrixMultiplication(double[][] w,double[][] v){ + int w1 = w.length; + int w2 = w[0].length; + int v1 = v.length; + int v2 = v[0].length; + + if(w2 != v1){ + System.out.println("Matrix dimensions do not agree..."); + System.exit(-1); + } + + double[][] result = new double[w1][v2]; + for(int w_i1 = 0; w_i1 < w1; w_i1++){ + for(int v_i2 = 0; v_i2 < v2; v_i2++){ + double sum = 0; + for(int w_i2 = 0; w_i2 < w2; w_i2++){ + sum += w[w_i1 ][w_i2]*v[w_i2][v_i2]; + } + result[w_i1][v_i2] = sum; + } + } + return result; + } + + /** + * w = w.*v + * @param w + * @param v + */ + public static void matrixScalarMultiplication(double[][] w,double v){ + int w1 = w.length; + int w2 = w[0].length; + for(int w_i1 = 0; w_i1 < w1; w_i1++){ + for(int w_i2 = 0; w_i2 < w2; w_i2++){ + w[w_i1 ][w_i2] *= v; + } + } + } + + public static void scalarMultiplication(double[] w,double v){ + int w1 = w.length; + for(int w_i1 = 0; w_i1 < w1; w_i1++){ + w[w_i1 ] *= v; + } + + } + + public static double[] matrixVector(double[][] w,double[] v){ + int w1 = w.length; + int w2 = w[0].length; + int v1 = v.length; + + if(w2 != v1){ + System.out.println("Matrix dimensions do not agree..."); + System.exit(-1); + } + + double[] result = new double[w1]; + for(int w_i1 = 0; w_i1 < w1; w_i1++){ + double sum = 0; + for(int w_i2 = 0; w_i2 < w2; w_i2++){ + sum += w[w_i1 ][w_i2]*v[w_i2]; + } + result[w_i1] = sum; + } + return result; + } + + public static boolean allPositive(double[] array){ + for (int i = 0; i < array.length; i++) { + if(array[i] < 0) return false; + } + return true; + } + + + + + + public static void main(String[] args) { + double[][] m1 = new double[2][2]; + m1[0][0]=2; + m1[1][0]=2; + m1[0][1]=2; + m1[1][1]=2; + MatrixOutput.printDoubleArray(m1, "m1"); + double[][] m2 = new double[2][2]; + m2[0][0]=3; + m2[1][0]=3; + m2[0][1]=3; + m2[1][1]=3; + MatrixOutput.printDoubleArray(m2, "m2"); + double[][] result = matrixMultiplication(m1, m2); + MatrixOutput.printDoubleArray(result, "result"); + matrixScalarMultiplication(result, 3); + MatrixOutput.printDoubleArray(result, "result after multiply by 3"); + } + + public static boolean almost(double a, double b, double prec){ + return Math.abs(a-b)/Math.abs(a+b) <= prec || (almostZero(a) && almostZero(b)); + } + + public static boolean almost(double a, double b){ + return Math.abs(a-b)/Math.abs(a+b) <= 1e-10 || (almostZero(a) && almostZero(b)); + } + + public static boolean almostZero(double a) { + return Math.abs(a) <= 1e-30; + } + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java b/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java new file mode 100644 index 00000000..9fbdf955 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java @@ -0,0 +1,28 @@ +package optimization.util; + + +public class MatrixOutput { + public static void printDoubleArray(double[][] array, String arrayName) { + int size1 = array.length; + int size2 = array[0].length; + System.out.println(arrayName); + for (int i = 0; i < size1; i++) { + for (int j = 0; j < size2; j++) { + System.out.print(" " + StaticTools.prettyPrint(array[i][j], + "00.00E00", 4) + " "); + + } + System.out.println(); + } + System.out.println(); + } + + public static void printDoubleArray(double[] array, String arrayName) { + System.out.println(arrayName); + for (int i = 0; i < array.length; i++) { + System.out.print(" " + StaticTools.prettyPrint(array[i], + "00.00E00", 4) + " "); + } + System.out.println(); + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java b/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java new file mode 100644 index 00000000..bcabee06 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java @@ -0,0 +1,180 @@ +package optimization.util; + + +import java.io.File; +import java.io.PrintStream; + +public class StaticTools { + + static java.text.DecimalFormat fmt = new java.text.DecimalFormat(); + + public static void createDir(String directory) { + + File dir = new File(directory); + if (!dir.isDirectory()) { + boolean success = dir.mkdirs(); + if (!success) { + System.out.println("Unable to create directory " + directory); + System.exit(0); + } + System.out.println("Created directory " + directory); + } else { + System.out.println("Reusing directory " + directory); + } + } + + /* + * q and p are indexed by source/foreign Sum_S(q) = 1 the same for p KL(q,p) = + * Eq*q/p + */ + public static double KLDistance(double[][] p, double[][] q, int sourceSize, + int foreignSize) { + double totalKL = 0; + // common.StaticTools.printMatrix(q, sourceSize, foreignSize, "q", + // System.out); + // common.StaticTools.printMatrix(p, sourceSize, foreignSize, "p", + // System.out); + for (int i = 0; i < sourceSize; i++) { + double kl = 0; + for (int j = 0; j < foreignSize; j++) { + assert !Double.isNaN(q[i][j]) : "KLDistance q: prob is NaN"; + assert !Double.isNaN(p[i][j]) : "KLDistance p: prob is NaN"; + if (p[i][j] == 0 || q[i][j] == 0) { + continue; + } else { + kl += q[i][j] * Math.log(q[i][j] / p[i][j]); + } + + } + totalKL += kl; + } + assert !Double.isNaN(totalKL) : "KLDistance: prob is NaN"; + if (totalKL < -1.0E-10) { + System.out.println("KL Smaller than zero " + totalKL); + System.out.println("Source Size" + sourceSize); + System.out.println("Foreign Size" + foreignSize); + StaticTools.printMatrix(q, sourceSize, foreignSize, "q", + System.out); + StaticTools.printMatrix(p, sourceSize, foreignSize, "p", + System.out); + System.exit(-1); + } + return totalKL / sourceSize; + } + + /* + * indexed the by [fi][si] + */ + public static double KLDistancePrime(double[][] p, double[][] q, + int sourceSize, int foreignSize) { + double totalKL = 0; + for (int i = 0; i < sourceSize; i++) { + double kl = 0; + for (int j = 0; j < foreignSize; j++) { + assert !Double.isNaN(q[j][i]) : "KLDistance q: prob is NaN"; + assert !Double.isNaN(p[j][i]) : "KLDistance p: prob is NaN"; + if (p[j][i] == 0 || q[j][i] == 0) { + continue; + } else { + kl += q[j][i] * Math.log(q[j][i] / p[j][i]); + } + + } + totalKL += kl; + } + assert !Double.isNaN(totalKL) : "KLDistance: prob is NaN"; + return totalKL / sourceSize; + } + + public static double Entropy(double[][] p, int sourceSize, int foreignSize) { + double totalE = 0; + for (int i = 0; i < foreignSize; i++) { + double e = 0; + for (int j = 0; j < sourceSize; j++) { + e += p[i][j] * Math.log(p[i][j]); + } + totalE += e; + } + return totalE / sourceSize; + } + + public static double[][] copyMatrix(double[][] original, int sourceSize, + int foreignSize) { + double[][] result = new double[sourceSize][foreignSize]; + for (int i = 0; i < sourceSize; i++) { + for (int j = 0; j < foreignSize; j++) { + result[i][j] = original[i][j]; + } + } + return result; + } + + public static void printMatrix(double[][] matrix, int sourceSize, + int foreignSize, String info, PrintStream out) { + + java.text.DecimalFormat fmt = new java.text.DecimalFormat(); + fmt.setMaximumFractionDigits(3); + fmt.setMaximumIntegerDigits(3); + fmt.setMinimumFractionDigits(3); + fmt.setMinimumIntegerDigits(3); + + out.println(info); + + for (int i = 0; i < foreignSize; i++) { + for (int j = 0; j < sourceSize; j++) { + out.print(prettyPrint(matrix[j][i], ".00E00", 6) + " "); + } + out.println(); + } + out.println(); + out.println(); + } + + public static void printMatrix(int[][] matrix, int sourceSize, + int foreignSize, String info, PrintStream out) { + + out.println(info); + for (int i = 0; i < foreignSize; i++) { + for (int j = 0; j < sourceSize; j++) { + out.print(matrix[j][i] + " "); + } + out.println(); + } + out.println(); + out.println(); + } + + public static String formatTime(long duration) { + StringBuilder sb = new StringBuilder(); + double d = duration / 1000; + fmt.applyPattern("00"); + sb.append(fmt.format((int) (d / (60 * 60))) + ":"); + d -= ((int) d / (60 * 60)) * 60 * 60; + sb.append(fmt.format((int) (d / 60)) + ":"); + d -= ((int) d / 60) * 60; + fmt.applyPattern("00.0"); + sb.append(fmt.format(d)); + return sb.toString(); + } + + public static String prettyPrint(double d, String patt, int len) { + fmt.applyPattern(patt); + String s = fmt.format(d); + while (s.length() < len) { + s = " " + s; + } + return s; + } + + + public static long getUsedMemory(){ + System.gc(); + return (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/ (1024 * 1024); + } + + public final static boolean compareDoubles(double d1, double d2){ + return Math.abs(d1-d2) <= 1.E-10; + } + + +} |