diff options
author | desaicwtf <desaicwtf@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-09 16:59:55 +0000 |
---|---|---|
committer | desaicwtf <desaicwtf@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-09 16:59:55 +0000 |
commit | bdea91300c85539ab7153ccba58689612f66bb4d (patch) | |
tree | e778ffa1ea4d04a239b58c6e6191c0d4549006f0 /gi/posterior-regularisation/prjava/src/optimization/linesearch | |
parent | 0d1d84630a08f1c901cf09b4bcc9356c4165302f (diff) |
add optimization library source code
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@204 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/posterior-regularisation/prjava/src/optimization/linesearch')
10 files changed, 1002 insertions, 0 deletions
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java new file mode 100644 index 00000000..c9f9b8df --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java @@ -0,0 +1,102 @@ +package optimization.linesearch; + +import optimization.util.Interpolation; + + +/** + * Implements Back Tracking Line Search as described on page 37 of Numerical Optimization. + * Also known as armijo rule + * @author javg + * + */ +public class ArmijoLineSearchMinimization implements LineSearchMethod{ + + /** + * How much should the step size decrease at each iteration. + */ + double contractionFactor = 0.5; + double c1 = 0.0001; + + double sigma1 = 0.1; + double sigma2 = 0.9; + + + + double initialStep; + int maxIterations = 10; + + + public ArmijoLineSearchMinimization(){ + this.initialStep = 1; + } + + //Experiment + double previousStepPicked = -1;; + double previousInitGradientDot = -1; + double currentInitGradientDot = -1; + + + public void reset(){ + previousStepPicked = -1;; + previousInitGradientDot = -1; + currentInitGradientDot = -1; + } + + public void setInitialStep(double initial){ + initialStep = initial; + } + + /** + * + */ + + public double getStepSize(DifferentiableLineSearchObjective o) { + currentInitGradientDot = o.getInitialGradient(); + //Should update all in the objective + o.updateAlpha(initialStep); + int nrIterations = 0; + //System.out.println("tried alpha" + initialStep + " value " + o.getCurrentValue()); + while(!WolfeConditions.suficientDecrease(o,c1)){ + if(nrIterations >= maxIterations){ + o.printLineSearchSteps(); + return -1; + } + double alpha=o.getAlpha(); + double alphaTemp = + Interpolation.quadraticInterpolation(o.getOriginalValue(), o.getInitialGradient(), alpha, o.getCurrentValue()); + if(alphaTemp >= sigma1 || alphaTemp <= sigma2*o.getAlpha()){ +// System.out.println("using alpha temp " + alphaTemp); + alpha = alphaTemp; + }else{ +// System.out.println("Discarding alpha temp " + alphaTemp); + alpha = alpha*contractionFactor; + } +// double alpha =o.getAlpha()*contractionFactor; + + o.updateAlpha(alpha); + //System.out.println("tried alpha" + alpha+ " value " + o.getCurrentValue()); + nrIterations++; + } + + //System.out.println("Leavning line search used:"); + //o.printLineSearchSteps(); + + previousInitGradientDot = currentInitGradientDot; + previousStepPicked = o.getAlpha(); + return o.getAlpha(); + } + + public double getInitialGradient() { + return currentInitGradientDot; + + } + + public double getPreviousInitialGradient() { + return previousInitGradientDot; + } + + public double getPreviousStepUsed() { + return previousStepPicked; + } + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java new file mode 100644 index 00000000..e153f2da --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java @@ -0,0 +1,141 @@ +package optimization.linesearch; + +import optimization.gradientBasedMethods.ProjectedObjective; +import optimization.util.Interpolation; +import optimization.util.MathUtils; + + + + + +/** + * Implements Armijo Rule Line search along the projection arc (Non-Linear Programming page 230) + * To be used with Projected gradient Methods. + * + * Recall that armijo tries successive step sizes alpha until the sufficient decrease is satisfied: + * f(x+alpha*direction) < f(x) + alpha*c1*grad(f)*direction + * + * In this case we are optimizing over a convex set X so we must guarantee that the new point stays inside the + * constraints. + * First the direction as to be feasible (inside constraints) and will be define as: + * d = (x_k_f - x_k) where x_k_f is a feasible point. + * so the armijo condition can be rewritten as: + * f(x+alpha(x_k_f - x_k)) < f(x) + c1*grad(f)*(x_k_f - x_k) + * and x_k_f is defined as: + * [x_k-alpha*grad(f)]+ + * where []+ mean a projection to the feasibility set. + * So this means that we take a step on the negative gradient (gradient descent) and then obtain then project + * that point to the feasibility set. + * Note that if the point is already feasible then we are back to the normal armijo rule. + * + * @author javg + * + */ +public class ArmijoLineSearchMinimizationAlongProjectionArc implements LineSearchMethod{ + + /** + * How much should the step size decrease at each iteration. + */ + double contractionFactor = 0.5; + double c1 = 0.0001; + + + double initialStep; + int maxIterations = 100; + + + double sigma1 = 0.1; + double sigma2 = 0.9; + + //Experiment + double previousStepPicked = -1;; + double previousInitGradientDot = -1; + double currentInitGradientDot = -1; + + GenericPickFirstStep strategy; + + + public void reset(){ + previousStepPicked = -1;; + previousInitGradientDot = -1; + currentInitGradientDot = -1; + } + + + public ArmijoLineSearchMinimizationAlongProjectionArc(){ + this.initialStep = 1; + } + + public ArmijoLineSearchMinimizationAlongProjectionArc(GenericPickFirstStep strategy){ + this.strategy = strategy; + this.initialStep = strategy.getFirstStep(this); + } + + + public void setInitialStep(double initial){ + this.initialStep = initial; + } + + /** + * + */ + + public double getStepSize(DifferentiableLineSearchObjective o) { + + + //Should update all in the objective + initialStep = strategy.getFirstStep(this); + o.updateAlpha(initialStep); + previousInitGradientDot=currentInitGradientDot; + currentInitGradientDot=o.getCurrentGradient(); + int nrIterations = 0; + + //Armijo rule, the current value has to be smaller than the original value plus a small step of the gradient + while(o.getCurrentValue() > + o.getOriginalValue() + c1*(o.getCurrentGradient())){ +// System.out.println("curr value "+o.getCurrentValue()); +// System.out.println("original value "+o.getOriginalValue()); +// System.out.println("GRADIENT decrease" +(MathUtils.dotProduct(o.o.gradient, +// MathUtils.arrayMinus(o.originalParameters,((ProjectedObjective)o.o).auxParameters)))); +// System.out.println("GRADIENT SAVED" + o.getCurrentGradient()); + if(nrIterations >= maxIterations){ + System.out.println("Could not find a step leaving line search with -1"); + o.printLineSearchSteps(); + return -1; + } + double alpha=o.getAlpha(); + double alphaTemp = + Interpolation.quadraticInterpolation(o.getOriginalValue(), o.getInitialGradient(), alpha, o.getCurrentValue()); + if(alphaTemp >= sigma1 || alphaTemp <= sigma2*o.getAlpha()){ + alpha = alphaTemp; + }else{ + alpha = alpha*contractionFactor; + } +// double alpha =obj.getAlpha()*contractionFactor; + o.updateAlpha(alpha); + nrIterations++; + } +// System.out.println("curr value "+o.getCurrentValue()); +// System.out.println("original value "+o.getOriginalValue()); +// System.out.println("sufficient decrease" +c1*o.getCurrentGradient()); +// System.out.println("Leavning line search used:"); +// o.printSmallLineSearchSteps(); + + previousStepPicked = o.getAlpha(); + return o.getAlpha(); + } + + public double getInitialGradient() { + return currentInitGradientDot; + + } + + public double getPreviousInitialGradient() { + return previousInitGradientDot; + } + + public double getPreviousStepUsed() { + return previousStepPicked; + } + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java new file mode 100644 index 00000000..a5bc958e --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java @@ -0,0 +1,185 @@ +package optimization.linesearch; + +import gnu.trove.TDoubleArrayList; +import gnu.trove.TIntArrayList; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; + +import optimization.gradientBasedMethods.Objective; +import optimization.util.MathUtils; +import optimization.util.StaticTools; + + + +import util.MathUtil; +import util.Printing; + + +/** + * A wrapper class for the actual objective in order to perform + * line search. The optimization code assumes that this does a lot + * of caching in order to simplify legibility. For the applications + * we use it for, caching the entire history of evaluations should be + * a win. + * + * Note: the lastEvaluatedAt value is very important, since we will use + * it to avoid doing an evaluation of the gradient after the line search. + * + * The differentiable line search objective defines a search along the ray + * given by a direction of the main objective. + * It defines the following function, + * where f is the original objective function: + * g(alpha) = f(x_0 + alpha*direction) + * g'(alpha) = f'(x_0 + alpha*direction)*direction + * + * @author joao + * + */ +public class DifferentiableLineSearchObjective { + + + + Objective o; + int nrIterations; + TDoubleArrayList steps; + TDoubleArrayList values; + TDoubleArrayList gradients; + + //This variables cannot change + public double[] originalParameters; + public double[] searchDirection; + + + /** + * Defines a line search objective: + * Receives: + * Objective to each we are performing the line search, is used to calculate values and gradients + * Direction where to do the ray search, note that the direction does not depend of the + * objective but depends from the method. + * @param o + * @param direction + */ + public DifferentiableLineSearchObjective(Objective o) { + this.o = o; + originalParameters = new double[o.getNumParameters()]; + searchDirection = new double[o.getNumParameters()]; + steps = new TDoubleArrayList(); + values = new TDoubleArrayList(); + gradients = new TDoubleArrayList(); + } + /** + * Called whenever we start a new iteration. + * Receives the ray where we are searching for and resets all values + * + */ + public void reset(double[] direction){ + //Copy initial values + System.arraycopy(o.getParameters(), 0, originalParameters, 0, o.getNumParameters()); + System.arraycopy(direction, 0, searchDirection, 0, o.getNumParameters()); + + //Initialize variables + nrIterations = 0; + steps.clear(); + values.clear(); + gradients.clear(); + + values.add(o.getValue()); + gradients.add(MathUtils.dotProduct(o.getGradient(),direction)); + steps.add(0); + } + + + /** + * update the current value of alpha. + * Takes a step with that alpha in direction + * Get the real objective value and gradient and calculate all required information. + */ + public void updateAlpha(double alpha){ + if(alpha < 0){ + System.out.println("alpha may not be smaller that zero"); + throw new RuntimeException(); + } + nrIterations++; + steps.add(alpha); + //x_t+1 = x_t + alpha*direction + System.arraycopy(originalParameters,0, o.getParameters(), 0, originalParameters.length); + MathUtils.plusEquals(o.getParameters(), searchDirection, alpha); + o.setParameters(o.getParameters()); +// System.out.println("Took a step of " + alpha + " new value " + o.getValue()); + values.add(o.getValue()); + gradients.add(MathUtils.dotProduct(o.getGradient(),searchDirection)); + } + + + + public int getNrIterations(){ + return nrIterations; + } + + /** + * return g(alpha) for the current value of alpha + * @param iter + * @return + */ + public double getValue(int iter){ + return values.get(iter); + } + + public double getCurrentValue(){ + return values.get(nrIterations); + } + + public double getOriginalValue(){ + return values.get(0); + } + + /** + * return g'(alpha) for the current value of alpha + * @param iter + * @return + */ + public double getGradient(int iter){ + return gradients.get(iter); + } + + public double getCurrentGradient(){ + return gradients.get(nrIterations); + } + + public double getInitialGradient(){ + return gradients.get(0); + } + + + + + public double getAlpha(){ + return steps.get(nrIterations); + } + + public void printLineSearchSteps(){ + System.out.println( + " Steps size "+steps.size() + + "Values size "+values.size() + + "Gradeients size "+gradients.size()); + for(int i =0; i < steps.size();i++){ + System.out.println("Iter " + i + " step " + steps.get(i) + + " value " + values.get(i) + " grad " + gradients.get(i)); + } + } + + public void printSmallLineSearchSteps(){ + for(int i =0; i < steps.size();i++){ + System.out.print(StaticTools.prettyPrint(steps.get(i), "0.0000E00",8) + " "); + } + System.out.println(); + } + + public static void main(String[] args) { + + } + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java new file mode 100644 index 00000000..a33eb311 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java @@ -0,0 +1,20 @@ +package optimization.linesearch; + + +public class GenericPickFirstStep{ + double _initValue; + public GenericPickFirstStep(double initValue) { + _initValue = initValue; + } + + public double getFirstStep(LineSearchMethod ls){ + return _initValue; + } + public void collectInitValues(LineSearchMethod ls){ + + } + + public void collectFinalValues(LineSearchMethod ls){ + + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java new file mode 100644 index 00000000..0deebcdb --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java @@ -0,0 +1,25 @@ +package optimization.linesearch; + + +public class InterpolationPickFirstStep extends GenericPickFirstStep{ + public InterpolationPickFirstStep(double initValue) { + super(initValue); + } + + public double getFirstStep(LineSearchMethod ls){ + if(ls.getPreviousStepUsed() != -1 && ls.getPreviousInitialGradient()!=0){ + double newStep = Math.min(300, 1.02*ls.getPreviousInitialGradient()*ls.getPreviousStepUsed()/ls.getInitialGradient()); + // System.out.println("proposing " + newStep); + return newStep; + + } + return _initValue; + } + public void collectInitValues(WolfRuleLineSearch ls){ + + } + + public void collectFinalValues(WolfRuleLineSearch ls){ + + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java new file mode 100644 index 00000000..80cd7f39 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java @@ -0,0 +1,14 @@ +package optimization.linesearch; + + +public interface LineSearchMethod { + + double getStepSize(DifferentiableLineSearchObjective o); + + public double getInitialGradient(); + public double getPreviousInitialGradient(); + public double getPreviousStepUsed(); + + public void setInitialStep(double initial); + public void reset(); +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java new file mode 100644 index 00000000..4b354fd9 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java @@ -0,0 +1,33 @@ +package optimization.linesearch; + +/** + * Non newtwon since we don't always try 1... + * Not sure if that is even usefull for newton + * @author javg + * + */ +public class NonNewtonInterpolationPickFirstStep extends GenericPickFirstStep{ + public NonNewtonInterpolationPickFirstStep(double initValue) { + super(initValue); + } + + public double getFirstStep(LineSearchMethod ls){ +// System.out.println("Previous step used " + ls.getPreviousStepUsed()); +// System.out.println("PreviousGradinebt " + ls.getPreviousInitialGradient()); +// System.out.println("CurrentGradinebt " + ls.getInitialGradient()); + if(ls.getPreviousStepUsed() != -1 && ls.getPreviousInitialGradient()!=0){ + double newStep = 1.01*ls.getPreviousInitialGradient()*ls.getPreviousStepUsed()/ls.getInitialGradient(); + //System.out.println("Suggesting " + newStep); + return newStep; + + } + return _initValue; + } + public void collectInitValues(WolfRuleLineSearch ls){ + + } + + public void collectFinalValues(WolfRuleLineSearch ls){ + + } +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java new file mode 100644 index 00000000..29ccbc32 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java @@ -0,0 +1,137 @@ +package optimization.linesearch; + +import optimization.gradientBasedMethods.Objective; +import optimization.gradientBasedMethods.ProjectedObjective; +import optimization.util.MathUtils; +import optimization.util.MatrixOutput; + + +/** + * See ArmijoLineSearchMinimizationAlongProjectionArc for description + * @author javg + * + */ +public class ProjectedDifferentiableLineSearchObjective extends DifferentiableLineSearchObjective{ + + + + ProjectedObjective obj; + public ProjectedDifferentiableLineSearchObjective(Objective o) { + super(o); + if(!(o instanceof ProjectedObjective)){ + System.out.println("Must receive a projected objective"); + throw new RuntimeException(); + } + obj = (ProjectedObjective) o; + } + + + + public double[] projectPoint (double[] point){ + return ((ProjectedObjective)o).projectPoint(point); + } + public void updateAlpha(double alpha){ + if(alpha < 0){ + System.out.println("alpha may not be smaller that zero"); + throw new RuntimeException(); + } + + if(obj.auxParameters == null){ + obj.auxParameters = new double[obj.getParameters().length]; + } + + nrIterations++; + + steps.add(alpha); + System.arraycopy(originalParameters, 0, obj.auxParameters, 0, obj.auxParameters.length); + + //Take a step into the search direction + +// MatrixOutput.printDoubleArray(obj.getGradient(), "gradient"); + +// alpha=gradients.get(0)*alpha/(gradients.get(gradients.size()-1)); + + //x_t+1 = x_t - alpha*gradient = x_t + alpha*direction + MathUtils.plusEquals(obj.auxParameters, searchDirection, alpha); +// MatrixOutput.printDoubleArray(obj.auxParameters, "before projection"); + obj.auxParameters = projectPoint(obj.auxParameters); +// MatrixOutput.printDoubleArray(obj.auxParameters, "after projection"); + o.setParameters(obj.auxParameters); +// System.out.println("new parameters"); +// o.printParameters(); + values.add(o.getValue()); + //Computes the new gradient x_k-[x_k-alpha*Gradient(x_k)]+ + MathUtils.minusEqualsInverse(originalParameters,obj.auxParameters,1); +// MatrixOutput.printDoubleArray(obj.auxParameters, "new gradient"); + //Dot product between the new direction and the new gradient + double gradient = MathUtils.dotProduct(obj.auxParameters,searchDirection); + gradients.add(gradient); + if(gradient > 0){ + System.out.println("Gradient on line search has to be smaller than zero"); + System.out.println("Iter: " + nrIterations); + MatrixOutput.printDoubleArray(obj.auxParameters, "new direction"); + MatrixOutput.printDoubleArray(searchDirection, "search direction"); + throw new RuntimeException(); + + } + + } + + /** + * + */ +// public void updateAlpha(double alpha){ +// +// if(alpha < 0){ +// System.out.println("alpha may not be smaller that zero"); +// throw new RuntimeException(); +// } +// +// nrIterations++; +// steps.add(alpha); +// //x_t+1 = x_t - alpha*direction +// System.arraycopy(originalParameters, 0, parametersChange, 0, parametersChange.length); +//// MatrixOutput.printDoubleArray(parametersChange, "parameters before step"); +//// System.out.println("Step" + alpha); +// MatrixOutput.printDoubleArray(originalGradient, "gradient + " + alpha); +// +// MathUtils.minusEquals(parametersChange, originalGradient, alpha); +// +// //Project the points into the feasibility set +//// MatrixOutput.printDoubleArray(parametersChange, "before projection"); +// //x_k(alpha) = [x_k - alpha*grad f(x_k)]+ +// parametersChange = projectPoint(parametersChange); +//// MatrixOutput.printDoubleArray(parametersChange, "after projection"); +// o.setParameters(parametersChange); +// values.add(o.getValue()); +// //Computes the new direction x_k-[x_k-alpha*Gradient(x_k)]+ +// +// direction=MathUtils.arrayMinus(parametersChange,originalParameters); +//// MatrixOutput.printDoubleArray(direction, "new direction"); +// +// double gradient = MathUtils.dotProduct(originalGradient,direction); +// gradients.add(gradient); +// if(gradient > 1E-10){ +// System.out.println("cosine " + gradient/(MathUtils.L2Norm(originalGradient)*MathUtils.L2Norm(direction))); +// +// +// System.out.println("not a descent direction for alpha " + alpha); +// System.arraycopy(originalParameters, 0, parametersChange, 0, parametersChange.length); +// MathUtils.minusEquals(parametersChange, originalGradient, 1E-20); +// +// parametersChange = projectPoint(parametersChange); +// direction=MathUtils.arrayMinus(parametersChange,originalParameters); +// gradient = MathUtils.dotProduct(originalGradient,direction); +// if(gradient > 0){ +// System.out.println("Direction is really non-descent evern for small alphas:" + gradient); +// } +// System.out.println("ProjecteLineSearchObjective: Should be a descent direction at " + nrIterations + ": "+ gradient); +//// System.out.println(Printing.doubleArrayToString(originalGradient, null,"Original gradient")); +//// System.out.println(Printing.doubleArrayToString(originalParameters, null,"Original parameters")); +//// System.out.println(Printing.doubleArrayToString(parametersChange, null,"Projected parameters")); +//// System.out.println(Printing.doubleArrayToString(direction, null,"Direction")); +// throw new RuntimeException(); +// } +// } + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java new file mode 100644 index 00000000..5489f2d0 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java @@ -0,0 +1,300 @@ +package optimization.linesearch; + +import java.io.PrintStream; +import java.util.ArrayList; + +import optimization.util.Interpolation; + + + + +/** + * + * @author javg + * + */ +public class WolfRuleLineSearch implements LineSearchMethod{ + + GenericPickFirstStep pickFirstStep; + + double c1 = 1.0E-4; + double c2 = 0.9; + + //Application dependent + double maxStep=100; + + int extrapolationIteration; + int maxExtrapolationIteration = 1000; + + + double minZoomDiffTresh = 10E-10; + + + ArrayList<Double> steps; + ArrayList<Double> gradientDots; + ArrayList<Double> functionVals; + + int debugLevel = 0; + boolean foudStep = false; + + public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep){ + this.pickFirstStep = pickFirstStep; + + } + + + + + public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep, double c1, double c2){ + this.pickFirstStep = pickFirstStep; + initialStep = pickFirstStep.getFirstStep(this); + this.c1 = c1; + this.c2 = c2; + } + + public void setDebugLevel(int level){ + debugLevel = level; + } + + //Experiment + double previousStepPicked = -1;; + double previousInitGradientDot = -1; + double currentInitGradientDot = -1; + + double initialStep; + + + public void reset(){ + previousStepPicked = -1;; + previousInitGradientDot = -1; + currentInitGradientDot = -1; + if(steps != null) + steps.clear(); + if(gradientDots != null) + gradientDots.clear(); + if(functionVals != null) + functionVals.clear(); + } + + public void setInitialStep(double initial){ + initialStep = pickFirstStep.getFirstStep(this); + } + + + + /** + * Implements Wolf Line search as described in nocetal. + * This process consists in two stages. The first stage we try to satisfy the + * biggest step size that still satisfies the curvature condition. We keep increasing + * the initial step size until we find a step satisfying the curvature condition, we return + * success, we failed the sufficient increase so we cannot increase more and we can call zoom with + * that maximum step, or we pass the minimum in which case we can call zoom the same way. + * + */ + public double getStepSize(DifferentiableLineSearchObjective objective){ + //System.out.println("entering line search"); + + foudStep = false; + if(debugLevel >= 1){ + steps = new ArrayList<Double>(); + gradientDots = new ArrayList<Double>(); + functionVals =new ArrayList<Double>(); + } + + //test + currentInitGradientDot = objective.getInitialGradient(); + + + double previousValue = objective.getCurrentValue(); + double previousStep = 0; + double currentStep =pickFirstStep.getFirstStep(this); + for(extrapolationIteration = 0; + extrapolationIteration < maxExtrapolationIteration; extrapolationIteration++){ + + objective.updateAlpha(currentStep); + double currentValue = objective.getCurrentValue(); + if(debugLevel >= 1){ + steps.add(currentStep); + functionVals.add(currentValue); + gradientDots.add(objective.getCurrentGradient()); + } + + + //The current step does not satisfy the sufficient decrease condition anymore + // so we cannot get bigger than that calling zoom. + if(!WolfeConditions.suficientDecrease(objective,c1)|| + (extrapolationIteration > 0 && currentValue >= previousValue)){ + currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations); + break; + } + + //Satisfying both conditions ready to leave + if(WolfeConditions.sufficientCurvature(objective,c1,c2)){ + //Found step + foudStep = true; + break; + } + + /** + * This means that we passed the minimum already since the dot product that should be + * negative (descent direction) is now positive. So we cannot increase more. On the other hand + * since we know the direction is a descent direction the value the objective at the current step + * is for sure smaller than the preivous step so we change the order. + */ + if(objective.getCurrentGradient() >= 0){ + currentStep = zoom(objective,currentStep,previousStep,objective.nrIterations,objective.nrIterations-1); + break; + } + + + //Ok, so we can still get a bigger step, + double aux = currentStep; + //currentStep = currentStep*2; + if(Math.abs(currentStep-maxStep)>1.1e-2){ + currentStep = (currentStep+maxStep)/2; + }else{ + currentStep = currentStep*2; + } + previousStep = aux; + previousValue = currentValue; + //Could be done better + if(currentStep >= maxStep){ + System.out.println("Excedded max step...calling zoom with maxStepSize"); + currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations); + } + } + if(!foudStep){ + System.out.println("Wolfe Rule exceed number of iterations"); + if(debugLevel >= 1){ + printSmallWolfeStats(System.out); +// System.out.println("Line search values"); +// DebugHelpers.getLineSearchGraph(o, direction, originalParameters,origValue, origGradDirectionDot,c1,c2); + } + return -1; + } + if(debugLevel >= 1){ + printSmallWolfeStats(System.out); + } + + previousStepPicked = currentStep; + previousInitGradientDot = currentInitGradientDot; +// objective.printLineSearchSteps(); + return currentStep; + } + + + + + + public void printWolfeStats(PrintStream out){ + for(int i = 0; i < steps.size(); i++){ + out.println("Step " + steps.get(i) + " value " + functionVals.get(i) + " dot " + gradientDots.get(i)); + } + } + + public void printSmallWolfeStats(PrintStream out){ + for(int i = 0; i < steps.size(); i++){ + out.print(steps.get(i) + ":"+functionVals.get(i)+":"+gradientDots.get(i)+" "); + } + System.out.println(); + } + + + + /** + * Pick a step satisfying the strong wolfe condition from an given from lowerStep and higherStep + * picked on the routine above. + * + * Both lowerStep and higherStep have been evaluated, so we only need to pass the iteration where they have + * been evaluated and save extra evaluations. + * + * We know that lowerStepValue as to be smaller than higherStepValue, and that a point + * satisfying both conditions exists in such interval. + * + * LowerStep always satisfies at least the sufficient decrease + * @return + */ + public double zoom(DifferentiableLineSearchObjective o, double lowerStep, double higherStep, + int lowerStepIter, int higherStepIter){ + + if(debugLevel >=2){ + System.out.println("Entering zoom with " + lowerStep+"-"+higherStep); + } + + double currentStep=-1; + + int zoomIter = 0; + while(zoomIter < 1000){ + if(Math.abs(lowerStep-higherStep) < minZoomDiffTresh){ + o.updateAlpha(lowerStep); + if(debugLevel >= 1){ + steps.add(lowerStep); + functionVals.add(o.getCurrentValue()); + gradientDots.add(o.getCurrentGradient()); + } + foudStep = true; + return lowerStep; + } + + //Cubic interpolation + currentStep = + Interpolation.cubicInterpolation(lowerStep, o.getValue(lowerStepIter), o.getGradient(lowerStepIter), + higherStep, o.getValue(higherStepIter), o.getGradient(higherStepIter)); + + //Safeguard.... should not be required check in what condtions it is required + if(currentStep < 0 ){ + currentStep = (lowerStep+higherStep)/2; + } + if(Double.isNaN(currentStep) || Double.isInfinite(currentStep)){ + currentStep = (lowerStep+higherStep)/2; + } +// currentStep = (lowerStep+higherStep)/2; +// System.out.println("Trying "+currentStep); + o.updateAlpha(currentStep); + if(debugLevel >=1){ + steps.add(currentStep); + functionVals.add(o.getCurrentValue()); + gradientDots.add(o.getCurrentGradient()); + } + if(!WolfeConditions.suficientDecrease(o,c1) + || o.getCurrentValue() >= o.getValue(lowerStepIter)){ + higherStepIter = o.nrIterations; + higherStep = currentStep; + } + //Note when entering here the new step satisfies the sufficent decrease and + // or as a function value that is better than the previous best (lowerStepFunctionValues) + // so we either leave or change the value of the alpha low. + else{ + if(WolfeConditions.sufficientCurvature(o,c1,c2)){ + //Satisfies the both wolf conditions + foudStep = true; + break; + } + //If does not satisfy curvature + if(o.getCurrentGradient()*(higherStep-lowerStep) >= 0){ + higherStep = lowerStep; + higherStepIter = lowerStepIter; + } + lowerStep = currentStep; + lowerStepIter = o.nrIterations; + } + zoomIter++; + } + return currentStep; + } + + public double getInitialGradient() { + return currentInitGradientDot; + + } + + public double getPreviousInitialGradient() { + return previousInitGradientDot; + } + + public double getPreviousStepUsed() { + return previousStepPicked; + } + + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java new file mode 100644 index 00000000..dcc704eb --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java @@ -0,0 +1,45 @@ +package optimization.linesearch; + + +public class WolfeConditions { + + /** + * Sufficient Increase number. Default constant + */ + + + /** + * Value for suficient curvature: + * 0.9 - For newton and quase netwon methods + * 0.1 - Non linear conhugate gradient + */ + + int debugLevel = 0; + public void setDebugLevel(int level){ + debugLevel = level; + } + + public static boolean suficientDecrease(DifferentiableLineSearchObjective o, double c1){ + double value = o.getOriginalValue()+c1*o.getAlpha()*o.getInitialGradient(); +// System.out.println("Sufficient Decrease original "+value+" new "+ o.getCurrentValue()); + return o.getCurrentValue() <= value; + } + + + + + public static boolean sufficientCurvature(DifferentiableLineSearchObjective o, double c1, double c2){ +// if(debugLevel >= 2){ +// double current = Math.abs(o.getCurrentGradient()); +// double orig = -c2*o.getInitialGradient(); +// if(current <= orig){ +// return true; +// }else{ +// System.out.println("Not satistfying curvature condition curvature " + current + " wants " + orig); +// return false; +// } +// } + return Math.abs(o.getCurrentGradient()) <= -c2*o.getInitialGradient(); + } + +} |