diff options
Diffstat (limited to 'gi/posterior-regularisation')
42 files changed, 3577 insertions, 0 deletions
diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java b/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java new file mode 100644 index 00000000..25fa7f09 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java @@ -0,0 +1,110 @@ +package optimization.examples; + + +import optimization.gradientBasedMethods.ConjugateGradient; +import optimization.gradientBasedMethods.GradientDescent; +import optimization.gradientBasedMethods.LBFGS; +import optimization.gradientBasedMethods.Objective; +import optimization.gradientBasedMethods.Optimizer; +import optimization.gradientBasedMethods.stats.OptimizerStats; +import optimization.linesearch.ArmijoLineSearchMinimization; +import optimization.linesearch.LineSearchMethod; +import optimization.stopCriteria.GradientL2Norm; +import optimization.stopCriteria.StopingCriteria; +import optimization.util.MathUtils; + +/** + *  + * @author javg + * f(x) = \sum_{i=1}^{N-1} \left[ (1-x_i)^2+ 100 (x_{i+1} - x_i^2 )^2 \right] \quad \forall x\in\mathbb{R}^N. + */ +public class GeneralizedRosenbrock extends Objective{ + +	 +	 +	public GeneralizedRosenbrock(int dimensions){ +		parameters = new double[dimensions];		 +		java.util.Arrays.fill(parameters, 0); +		gradient = new double[dimensions]; +		 +	} +	 +	public GeneralizedRosenbrock(int dimensions, double[] params){ +		parameters = params;	 +		gradient = new double[dimensions]; +	} +	 +	 +	public double getValue() { +		functionCalls++; +		double value = 0; +		for(int i = 0; i < parameters.length-1; i++){ +			value += MathUtils.square(1-parameters[i]) + 100*MathUtils.square(parameters[i+1] - MathUtils.square(parameters[i])); +		} +		 +		return value; +	} + +	/** +	 * gx = -2(1-x) -2x200(y-x^2) +	 * gy = 200(y-x^2) +	 */ +	public double[] getGradient() { +		gradientCalls++; +		java.util.Arrays.fill(gradient,0); +		for(int i = 0; i < parameters.length-1; i++){ +			gradient[i]+=-2*(1-parameters[i]) - 400*parameters[i]*(parameters[i+1] - MathUtils.square(parameters[i])); +			gradient[i+1]+=200*(parameters[i+1] - MathUtils.square(parameters[i])); +		}	 +		return gradient; +	} + +	 + +	 + +	 +	 +	public String toString(){ +		String  res =""; +		for(int i = 0; i < parameters.length; i++){ +			res += "P" + i+ " " + parameters[i]; +		} +		res += " Value " + getValue(); +		return res; +	} +	 +	public static void main(String[] args) { +		 +		GeneralizedRosenbrock o = new GeneralizedRosenbrock(2); +		System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); +		; + +		System.out.println("Doing Gradient descent"); +		//LineSearchMethod wolfe = new WolfRuleLineSearch(new InterpolationPickFirstStep(1),100,0.001,0.1); +		StopingCriteria stop = new GradientL2Norm(0.001);		 +		LineSearchMethod ls = new ArmijoLineSearchMinimization(); +		Optimizer optimizer = new GradientDescent(ls);		 +		OptimizerStats stats = new OptimizerStats(); +		optimizer.setMaxIterations(1000); +		boolean succed = optimizer.optimize(o,stats, stop); +		System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1)); +		System.out.println("Doing Conjugate Gradient descent"); +		o = new GeneralizedRosenbrock(2); +	//	wolfe = new WolfRuleLineSearch(new InterpolationPickFirstStep(1),100,0.001,0.1); +		optimizer = new ConjugateGradient(ls); +		stats = new OptimizerStats(); +		optimizer.setMaxIterations(1000); +		succed = optimizer.optimize(o,stats,stop); +		System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1)); +		System.out.println("Doing Quasi newton descent"); +		o = new GeneralizedRosenbrock(2); +		optimizer = new LBFGS(ls,10); +		stats = new OptimizerStats(); +		optimizer.setMaxIterations(1000); +		succed = optimizer.optimize(o,stats,stop); +		System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1)); + +	} +	 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java new file mode 100644 index 00000000..f087681e --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java @@ -0,0 +1,128 @@ +package optimization.examples; + + +import optimization.gradientBasedMethods.ConjugateGradient; + +import optimization.gradientBasedMethods.GradientDescent; +import optimization.gradientBasedMethods.LBFGS; +import optimization.gradientBasedMethods.Objective; +import optimization.gradientBasedMethods.stats.OptimizerStats; +import optimization.linesearch.GenericPickFirstStep; +import optimization.linesearch.LineSearchMethod; +import optimization.linesearch.WolfRuleLineSearch; +import optimization.stopCriteria.GradientL2Norm; +import optimization.stopCriteria.StopingCriteria; + + +/** + * @author javg + * + */ +public class x2y2 extends Objective{ + +	 +	//Implements function ax2+ by2  +	double a, b; +	public x2y2(double a, double b){ +		this.a = a; +		this.b = b; +		parameters = new double[2]; +		parameters[0] = 4; +		parameters[1] = 4; +		gradient = new double[2]; +	} +	 +	public double getValue() { +		functionCalls++; +		return a*parameters[0]*parameters[0]+b*parameters[1]*parameters[1]; +	} + +	public double[] getGradient() { +		gradientCalls++; +		gradient[0]=2*a*parameters[0]; +		gradient[1]=2*b*parameters[1]; +		return gradient; +//		if(debugLevel >=2){ +//			double[] numericalGradient = DebugHelpers.getNumericalGradient(this, parameters, 0.000001); +//			for(int i = 0; i < parameters.length; i++){ +//				double diff = Math.abs(gradient[i]-numericalGradient[i]); +//				if(diff > 0.00001){ +//					System.out.println("Numerical Gradient does not match"); +//					System.exit(1); +//				} +//			} +//		} +	} + +	 +	 +	public void optimizeWithGradientDescent(LineSearchMethod ls, OptimizerStats stats, x2y2 o){ +		GradientDescent optimizer = new GradientDescent(ls); +		StopingCriteria stop = new GradientL2Norm(0.001); +//		optimizer.setGradientConvergenceValue(0.001); +		optimizer.setMaxIterations(100); +		boolean succed = optimizer.optimize(o,stats,stop); +		System.out.println("Ended optimzation Gradient Descent\n" + stats.prettyPrint(1)); +		System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); +		if(succed){ +			System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); +		}else{ +			System.out.println("Failed to optimize"); +		} +	} +	 +	public void optimizeWithConjugateGradient(LineSearchMethod ls, OptimizerStats stats, x2y2 o){ +		ConjugateGradient optimizer = new ConjugateGradient(ls); +		StopingCriteria stop = new GradientL2Norm(0.001); + +		optimizer.setMaxIterations(10); +		boolean succed = optimizer.optimize(o,stats,stop); +		System.out.println("Ended optimzation Conjugate Gradient\n" + stats.prettyPrint(1)); +		System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); +		if(succed){ +			System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); +		}else{ +			System.out.println("Failed to optimize"); +		} +	} +	 +	public void optimizeWithLBFGS(LineSearchMethod ls, OptimizerStats stats, x2y2 o){ +		LBFGS optimizer = new LBFGS(ls,10); +		StopingCriteria stop = new GradientL2Norm(0.001); +		optimizer.setMaxIterations(10); +		boolean succed = optimizer.optimize(o,stats,stop); +		System.out.println("Ended optimzation LBFGS\n" + stats.prettyPrint(1)); +		System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); +		if(succed){ +			System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); +		}else{ +			System.out.println("Failed to optimize"); +		} +	} +	 +	public static void main(String[] args) { +		x2y2 o = new x2y2(1,10); +		System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); +		o.setDebugLevel(4); +		LineSearchMethod wolfe = new WolfRuleLineSearch(new GenericPickFirstStep(1),0.001,0.9);; +//		LineSearchMethod ls = new ArmijoLineSearchMinimization(); +		OptimizerStats stats = new OptimizerStats(); +		o.optimizeWithGradientDescent(wolfe, stats, o); +		o = new x2y2(1,10); +		System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); +//		ls = new ArmijoLineSearchMinimization(); +		stats = new OptimizerStats(); +		o.optimizeWithConjugateGradient(wolfe, stats, o); +		o = new x2y2(1,10); +		System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); +//		ls = new ArmijoLineSearchMinimization(); +		stats = new OptimizerStats(); +		o.optimizeWithLBFGS(wolfe, stats, o);	 +	} +	 +	public String toString(){ +		return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue(); +	} +	 +	 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java new file mode 100644 index 00000000..391775b7 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java @@ -0,0 +1,127 @@ +package optimization.examples; + + +import optimization.gradientBasedMethods.ProjectedGradientDescent; +import optimization.gradientBasedMethods.ProjectedObjective; +import optimization.gradientBasedMethods.stats.OptimizerStats; +import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc; +import optimization.linesearch.InterpolationPickFirstStep; +import optimization.linesearch.LineSearchMethod; +import optimization.projections.BoundsProjection; +import optimization.projections.Projection; +import optimization.projections.SimplexProjection; +import optimization.stopCriteria.CompositeStopingCriteria; +import optimization.stopCriteria.GradientL2Norm; +import optimization.stopCriteria.ProjectedGradientL2Norm; +import optimization.stopCriteria.StopingCriteria; +import optimization.stopCriteria.ValueDifference; + + +/** + * @author javg + *  + *  + *ax2+ b(y2 -displacement) + */ +public class x2y2WithConstraints extends ProjectedObjective{ + + +	double a, b; +	double dx; +	double dy; +	Projection projection; +	 +	 +	public x2y2WithConstraints(double a, double b, double[] params, double dx, double dy, Projection proj){ +		//projection = new BoundsProjection(0.2,Double.MAX_VALUE); +		super(); +		projection = proj;	 +		this.a = a; +		this.b = b; +		this.dx = dx; +		this.dy = dy; +		setInitialParameters(params); +		System.out.println("Function " +a+"(x-"+dx+")^2 + "+b+"(y-"+dy+")^2"); +		System.out.println("Gradient " +(2*a)+"(x-"+dx+") ; "+(b*2)+"(y-"+dy+")"); +		printParameters(); +		projection.project(parameters); +		printParameters(); +		gradient = new double[2]; +	} +	 +	public double getValue() { +		functionCalls++; +		return a*(parameters[0]-dx)*(parameters[0]-dx)+b*((parameters[1]-dy)*(parameters[1]-dy)); +	} + +	public double[] getGradient() { +		if(gradient == null){ +			gradient = new double[2]; +		} +		gradientCalls++; +		gradient[0]=2*a*(parameters[0]-dx); +		gradient[1]=2*b*(parameters[1]-dy); +		return gradient; +	} +	 +	 +	public double[] projectPoint(double[] point) { +		double[] newPoint = point.clone(); +		projection.project(newPoint); +		return newPoint; +	}	 +	 +	public void optimizeWithProjectedGradientDescent(LineSearchMethod ls, OptimizerStats stats, x2y2WithConstraints o){ +		ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls); +		StopingCriteria stopGrad = new ProjectedGradientL2Norm(0.001); +		StopingCriteria stopValue = new ValueDifference(0.001); +		CompositeStopingCriteria compositeStop = new CompositeStopingCriteria(); +		compositeStop.add(stopGrad); +		compositeStop.add(stopValue); +		 +		optimizer.setMaxIterations(5); +		boolean succed = optimizer.optimize(o,stats,compositeStop); +		System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1)); +		System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); +		if(succed){ +			System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); +		}else{ +			System.out.println("Failed to optimize"); +		} +	} +	 +	 +	 +	public String toString(){ +		 +		return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue() + " grad (" + getGradient()[0] + ":" + getGradient()[1]+")"; +	} +	 +	public static void main(String[] args) { +		double a = 1; +		double b=1; +		double x0 = 0; +		double y0  =1; +		double dx = 0.5; +		double dy = 0.5	; +		double [] parameters = new double[2]; +		parameters[0] = x0; +		parameters[1] = y0; +		x2y2WithConstraints o = new x2y2WithConstraints(a,b,parameters,dx,dy, new SimplexProjection(0.5)); +		System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1] + " a " + a + " b "+b ); +		o.setDebugLevel(4); +		 +		LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1)); +		 +		OptimizerStats stats = new OptimizerStats(); +		o.optimizeWithProjectedGradientDescent(ls, stats, o); +		 +//		o = new x2y2WithConstraints(a,b,x0,y0,dx,dy); +//		stats = new OptimizerStats(); +//		o.optimizeWithSpectralProjectedGradientDescent(stats, o); +	} +	 +	 +	 +	 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java new file mode 100644 index 00000000..0a4a5445 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java @@ -0,0 +1,119 @@ +package optimization.gradientBasedMethods; + +import optimization.gradientBasedMethods.stats.OptimizerStats; +import optimization.linesearch.DifferentiableLineSearchObjective; +import optimization.linesearch.LineSearchMethod; +import optimization.stopCriteria.StopingCriteria; +import optimization.util.MathUtils; + +/** + *  + * @author javg + * + */ +public abstract class AbstractGradientBaseMethod implements Optimizer{ +	 +	protected int maxNumberOfIterations=10000; +	 +	 +	 +	protected int currentProjectionIteration; +	protected double currValue;	 +	protected double previousValue = Double.MAX_VALUE;; +	protected double step; +	protected double[] gradient; +	public double[] direction; +	 +	//Original values +	protected double originalGradientL2Norm; +	 +	protected LineSearchMethod lineSearch; +	DifferentiableLineSearchObjective lso; +	 +	 +	public void reset(){ +		direction = null; +		gradient = null; +		previousValue = Double.MAX_VALUE; +		currentProjectionIteration = 0; +		originalGradientL2Norm = 0; +		step = 0; +		currValue = 0; +	} +	 +	public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){ +		lso =   new DifferentiableLineSearchObjective(o); +	} +	public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){ +	} +	 +	public void updateStructuresAfterStep(Objective o,OptimizerStats stats, StopingCriteria stop){ +	} +	 +	public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){ +		//Initialize structures +			 +		stats.collectInitStats(this, o); +		direction = new double[o.getNumParameters()]; +		initializeStructures(o, stats, stop); +		for (currentProjectionIteration = 1; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){		 +//			System.out.println("starting iterations: parameters:" ); +//			o.printParameters(); +			previousValue = currValue; +			currValue = o.getValue(); +			gradient = o.getGradient(); +			if(stop.stopOptimization(o)){ +				stats.collectFinalStats(this, o); +				return true; +			}	 +			 +			getDirection(); +			if(MathUtils.dotProduct(gradient, direction) > 0){ +				System.out.println("Not a descent direction"); +				System.out.println(" current stats " + stats.prettyPrint(1)); +				System.exit(-1); +			} +			updateStructuresBeforeStep(o, stats, stop); +			lso.reset(direction); +			step = lineSearch.getStepSize(lso); +//			System.out.println("Leave with step: " + step); +			if(step==-1){ +				System.out.println("Failed to find step"); +				stats.collectFinalStats(this, o); +				return false;		 +			} +			updateStructuresAfterStep( o, stats,  stop); +//			previousValue = currValue; +//			currValue = o.getValue(); +//			gradient = o.getGradient(); +			stats.collectIterationStats(this, o); +		} +		stats.collectFinalStats(this, o); +		return false; +	} +	 +	 +	public int getCurrentIteration() { +		return currentProjectionIteration; +	} + +	 +	/** +	 * Method specific +	 */ +	public abstract double[] getDirection(); + +	public double getCurrentStep() { +		return step; +	} + + + +	public void setMaxIterations(int max) { +		maxNumberOfIterations = max; +	} + +	public double getCurrentValue() { +		return currValue; +	} +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java new file mode 100644 index 00000000..28295729 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java @@ -0,0 +1,92 @@ +package optimization.gradientBasedMethods; + +import optimization.gradientBasedMethods.stats.OptimizerStats; +import optimization.linesearch.DifferentiableLineSearchObjective; +import optimization.linesearch.LineSearchMethod; +import optimization.stopCriteria.StopingCriteria; +import optimization.util.MathUtils; + + + +public class ConjugateGradient extends AbstractGradientBaseMethod{ +	 +	 +	double[] previousGradient; +	double[] previousDirection; + +	public ConjugateGradient(LineSearchMethod lineSearch) { +		this.lineSearch = lineSearch; +	} +	 +	public void reset(){ +		super.reset(); +		java.util.Arrays.fill(previousDirection, 0); +		java.util.Arrays.fill(previousGradient, 0); +	} +	 +	public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){ +		super.initializeStructures(o, stats, stop); +		previousGradient = new double[o.getNumParameters()]; +		previousDirection = new double[o.getNumParameters()]; +	} +	public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){ +		System.arraycopy(gradient, 0, previousGradient, 0, gradient.length); +		System.arraycopy(direction, 0, previousDirection, 0, direction.length);	 +	} +	 +//	public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){ +//		DifferentiableLineSearchObjective lso = new DifferentiableLineSearchObjective(o); +//		stats.collectInitStats(this, o); +//		direction = new double[o.getNumParameters()]; +//		initializeStructures(o, stats, stop); +//		for (currentProjectionIteration = 0; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){ +//			previousValue = currValue; +//			currValue = o.getValue(); +//			gradient =o.getGradient(); +//			if(stop.stopOptimization(gradient)){ +//				stats.collectFinalStats(this, o); +//				return true; +//			} +//			getDirection(); +//			updateStructures(o, stats, stop); +//			lso.reset(direction); +//			step = lineSearch.getStepSize(lso);	 +//			if(step==-1){ +//				System.out.println("Failed to find a step size"); +//				System.out.println("Failed to find step"); +//				stats.collectFinalStats(this, o); +//				return false;	 +//			} +//			 +//			stats.collectIterationStats(this, o); +//		} +//		stats.collectFinalStats(this, o); +//		return false; +//	} +	 +	public double[] getDirection(){ +		direction = MathUtils.negation(gradient); +		if(currentProjectionIteration != 1){ +			//Using Polak-Ribiere method (book equation 5.45) +			double b = MathUtils.dotProduct(gradient, MathUtils.arrayMinus(gradient, previousGradient)) +			/MathUtils.dotProduct(previousGradient, previousGradient); +			if(b<0){ +				System.out.println("Defaulting to gradient descent"); +				b = Math.max(b, 0); +			} +			MathUtils.plusEquals(direction, previousDirection, b); +			//Debug code +			if(MathUtils.dotProduct(direction, gradient) > 0){ +				System.out.println("Not an descent direction reseting to gradien"); +				direction = MathUtils.negation(gradient); +			} +		} +		return direction; +	} +	 +	 +	 + + + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java new file mode 100644 index 00000000..6dc4ef6c --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java @@ -0,0 +1,65 @@ +package optimization.gradientBasedMethods; + +import java.util.ArrayList; + +import optimization.util.MathUtils; + + + +public class DebugHelpers { +	public static void getLineSearchGraph(Objective o, double[] direction,  +			double[] parameters, double originalObj, +			double originalDot, double c1, double c2){ +		ArrayList<Double> stepS = new ArrayList<Double>(); +		ArrayList<Double> obj = new ArrayList<Double>(); +		ArrayList<Double> norm = new ArrayList<Double>(); +		double[] gradient = new double[o.getNumParameters()]; +		double[] newParameters = parameters.clone(); +		MathUtils.plusEquals(newParameters,direction,0); +		o.setParameters(newParameters); +		double minValue = o.getValue(); +		int valuesBiggerThanMax = 0; +		for(double step = 0; step < 2; step +=0.01 ){ +			newParameters = parameters.clone(); +			MathUtils.plusEquals(newParameters,direction,step); +			o.setParameters(newParameters); +			double newValue = o.getValue(); +			gradient = o.getGradient(); +			double newgradDirectionDot = MathUtils.dotProduct(gradient,direction); +			stepS.add(step); +			obj.add(newValue); +			norm.add(newgradDirectionDot); +			if(newValue <= minValue){ +				minValue = newValue; +			}else{ +				valuesBiggerThanMax++; +			} +			 +			if(valuesBiggerThanMax > 10){ +				break; +			} +			 +		} +		System.out.println("step\torigObj\tobj\tsuffdec\tnorm\tcurvature1"); +		for(int i = 0; i < stepS.size(); i++){ +			double cnorm= norm.get(i);  +			System.out.println(stepS.get(i)+"\t"+originalObj +"\t"+obj.get(i) + "\t" +  +					(originalObj + originalDot*((Double)stepS.get(i))*c1) +"\t"+Math.abs(cnorm) +"\t"+c2*Math.abs(originalDot)); +		} +	} +	 +	public static double[] getNumericalGradient(Objective o, double[] parameters, double epsilon){ +		int nrParameters = o.getNumParameters(); +		double[] gradient = new double[nrParameters]; +		double[] newParameters; +		double originalValue = o.getValue(); +		for(int parameter = 0; parameter < nrParameters; parameter++){ +			newParameters = parameters.clone(); +			newParameters[parameter]+=epsilon; +			o.setParameters(newParameters); +			double newValue = o.getValue(); +			gradient[parameter]=(newValue-originalValue)/epsilon; +		}	 +		return gradient; +	} +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java new file mode 100644 index 00000000..9a53cef4 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java @@ -0,0 +1,19 @@ +package optimization.gradientBasedMethods; + +import optimization.linesearch.LineSearchMethod; + + + +public class GradientDescent extends AbstractGradientBaseMethod{ +	 +	public GradientDescent(LineSearchMethod lineSearch) { +		this.lineSearch = lineSearch; +	} +		 +	public double[] getDirection(){ +		for(int i = 0; i< gradient.length; i++){ +			direction[i] = -gradient[i]; +		} +		return direction; +	} +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java new file mode 100644 index 00000000..dedbc942 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java @@ -0,0 +1,234 @@ +package optimization.gradientBasedMethods; + + +import optimization.gradientBasedMethods.stats.OptimizerStats; +import optimization.linesearch.DifferentiableLineSearchObjective; +import optimization.linesearch.LineSearchMethod; +import optimization.stopCriteria.StopingCriteria; +import optimization.util.MathUtils; + +public class LBFGS extends AbstractGradientBaseMethod{ + +	//How many previous values are being saved +	int history; +	double[][] skList; +	double[][] ykList; +	double initialHessianParameters; +	double[] previousGradient; +	double[] previousParameters; +	 +	//auxiliar structures +	double q[]; +	double[] roi; +	double[] alphai; +	 +	public LBFGS(LineSearchMethod ls, int history) { +		lineSearch = ls; +		this.history = history; +		skList = new double[history][]; +		ykList = new double[history][]; + +	} +	 +	public void reset(){ +		super.reset(); +		initialHessianParameters = 0; +		previousParameters = null; +		previousGradient = null; +		skList = new double[history][]; +		ykList = new double[history][]; +		q = null; +		roi = null; +		alphai = null; +	} +	 +	public double[] LBFGSTwoLoopRecursion(double hessianConst){ +		//Only create array once +		if(q == null){ +			 q = new double[gradient.length]; +		} +		System.arraycopy(gradient, 0, q, 0, gradient.length); +		//Only create array once +		if(roi == null){ +			roi = new double[history];  +		} +		//Only create array once +		if(alphai == null){ +			alphai = new double[history]; +		} +		 +		for(int i = history-1; i >=0 && skList[i]!= null && ykList[i]!=null; i-- ){			 +		//	System.out.println("New to Old proj " + currentProjectionIteration + " history "+history + " index " + i); +			double[] si =  skList[i]; +			double[] yi = ykList[i]; +			roi[i]= 1.0/MathUtils.dotProduct(yi,si); +			alphai[i] = MathUtils.dotProduct(si, q)*roi[i]; +			MathUtils.plusEquals(q, yi, -alphai[i]); +		} +		//Initial Hessian is just a constant +		MathUtils.scalarMultiplication(q, hessianConst); +		for(int i = 0; i <history && skList[i]!= null && ykList[i]!=null; i++ ){ +		//	System.out.println("Old to New proj " + currentProjectionIteration + " history "+history + " index " + i); +			double beta = MathUtils.dotProduct(ykList[i], q)*roi[i]; +			MathUtils.plusEquals(q, skList[i], (alphai[i]-beta)); +		} +		return q; +	} +	 +	 +	 +	 +	@Override +	public double[] getDirection() { +		 +		calculateInitialHessianParameter(); +//		System.out.println("Initial hessian " + initialHessianParameters); +		return direction = MathUtils.negation(LBFGSTwoLoopRecursion(initialHessianParameters));		 +	} +	 +	public void calculateInitialHessianParameter(){ +		if(currentProjectionIteration == 1){ +			//Use gradient +			initialHessianParameters = 1; +		}else if(currentProjectionIteration <= history){ +			double[] sk = skList[currentProjectionIteration-2]; +			double[] yk = ykList[currentProjectionIteration-2]; +			initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk); +		}else{ +			//get the last one +			double[] sk = skList[history-1]; +			double[] yk = ykList[history-1]; +			initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk); +		} +	} +	 +	//TODO if structures exit just reset them to zero +	public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){ +		super.initializeStructures(o, stats, stop); +		previousParameters = new double[o.getNumParameters()]; +		previousGradient = new double[o.getNumParameters()]; +	} +	public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){	 +		super.initializeStructures(o, stats, stop); +		System.arraycopy(o.getParameters(), 0, previousParameters, 0, previousParameters.length); +		System.arraycopy(gradient, 0, previousGradient, 0, gradient.length); +	} + +	public void 	updateStructuresAfterStep( Objective o,OptimizerStats stats, StopingCriteria stop){ +		double[] diffX = MathUtils.arrayMinus(o.getParameters(), previousParameters); +		double[] diffGrad = MathUtils.arrayMinus(gradient, previousGradient); +		//Save new values and discard new ones +		if(currentProjectionIteration > history){ +			for(int i = 0; i < history-1;i++){ +				skList[i]=skList[i+1]; +				ykList[i]=ykList[i+1]; +			} +			skList[history-1]=diffX; +			ykList[history-1]=diffGrad; +		}else{ +			skList[currentProjectionIteration-1]=diffX; +			ykList[currentProjectionIteration-1]=diffGrad; +		}	 +	} +	 +//	public boolean optimize(Objective o, OptimizerStats stats, StopingCriteria stop) {		 +//		DifferentiableLineSearchObjective lso = new DifferentiableLineSearchObjective(o);		 +//		gradient = o.getGradient(); +//		direction = new double[o.getNumParameters()]; +//		previousGradient = new double[o.getNumParameters()]; +//		 +//		previousParameters = new double[o.getNumParameters()]; +//	 +//		stats.collectInitStats(this, o); +//		previousValue = Double.MAX_VALUE; +//		currValue= o.getValue(); +//		//Used for stopping criteria +//		double[] originalGradient = o.getGradient(); +//		 +//		originalGradientL2Norm = MathUtils.L2Norm(originalGradient); +//		if(stop.stopOptimization(originalGradient)){ +//			stats.collectFinalStats(this, o); +//			return true; +//		} +//		for (currentProjectionIteration = 1; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){ +//			 +//			 +//			currValue = o.getValue(); +//			gradient  = o.getGradient(); +//			currParameters = o.getParameters(); +//			 +//			 +//			if(currentProjectionIteration == 1){ +//				//Use gradient +//				initialHessianParameters = 1; +//			}else if(currentProjectionIteration <= history){ +//				double[] sk = skList[currentProjectionIteration-2]; +//				double[] yk = ykList[currentProjectionIteration-2]; +//				initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk); +//			}else{ +//				//get the last one +//				double[] sk = skList[history-1]; +//				double[] yk = ykList[history-1]; +//				initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk); +//			} +//			 +//			getDirection(); +//			 +//			//MatrixOutput.printDoubleArray(direction, "direction"); +//			double dot = MathUtils.dotProduct(direction, gradient); +//			if(dot > 0){				 +//				throw new RuntimeException("Not a descent direction"); +//			} if (Double.isNaN(dot)){ +//				throw new RuntimeException("dot is not a number!!"); +//			} +//			System.arraycopy(currParameters, 0, previousParameters, 0, currParameters.length); +//			System.arraycopy(gradient, 0, previousGradient, 0, gradient.length); +//			lso.reset(direction); +//			step = lineSearch.getStepSize(lso); +//			if(step==-1){ +//				System.out.println("Failed to find a step size"); +////				lso.printLineSearchSteps(); +////				System.out.println(stats.prettyPrint(1)); +//				stats.collectFinalStats(this, o); +//				return false;	 +//			} +//			stats.collectIterationStats(this, o); +//			 +//			//We are not updating the alpha since it is done in line search already +//			currParameters = o.getParameters(); +//			gradient = o.getGradient(); +//			 +//			if(stop.stopOptimization(gradient)){ +//				stats.collectFinalStats(this, o); +//				return true; +//			} +//			double[] diffX = MathUtils.arrayMinus(currParameters, previousParameters); +//			double[] diffGrad = MathUtils.arrayMinus(gradient, previousGradient); +//			//Save new values and discard new ones +//			if(currentProjectionIteration > history){ +//				for(int i = 0; i < history-1;i++){ +//					skList[i]=skList[i+1]; +//					ykList[i]=ykList[i+1]; +//				} +//				skList[history-1]=diffX; +//				ykList[history-1]=diffGrad; +//			}else{ +//				skList[currentProjectionIteration-1]=diffX; +//				ykList[currentProjectionIteration-1]=diffGrad; +//			}		 +//			previousValue = currValue; +//		} +//		stats.collectFinalStats(this, o); +//		return false;	 +//	} +	 + + +	 + +	 + +	 +	 + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java new file mode 100644 index 00000000..0e2e27ac --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java @@ -0,0 +1,83 @@ +package optimization.gradientBasedMethods; + + +/** + * Defines an optimization objective: + *  + *  + * @author javg + * + */ +public abstract  class Objective { + +	protected int functionCalls = 0; +	protected int gradientCalls = 0; +	protected int updateCalls = 0; +	 +	protected double[] parameters; +	 +	//Contains a cache with the gradient +	public double[] gradient; +	int debugLevel = 0; +	 +	public void setDebugLevel(int level){ +		debugLevel = level; +	} +	 +	public int getNumParameters() { +		return parameters.length; +	} + +	public double getParameter(int index) { +		return parameters[index]; +	} + +	public double[] getParameters() { +		return parameters; +	} + +	public abstract double[] getGradient( ); +	 +	public void setParameter(int index, double value) { +		parameters[index]=value; +	} + +	public void setParameters(double[] params) { +		if(parameters == null){ +			parameters = new double[params.length]; +		} +		updateCalls++; +		System.arraycopy(params, 0, parameters, 0, params.length); +	} + +	 +	public int getNumberFunctionCalls() { +		return functionCalls; +	} + +	public int getNumberGradientCalls() { +		return gradientCalls; +	} +	 +	public String finalInfoString() { +		return "FE: " + functionCalls + " GE " + gradientCalls + " Params updates" + +		updateCalls; +	} +	public void printParameters() { +		System.out.println(toString()); +	}	 +	 +	public abstract String toString();	 +	public abstract double getValue (); +	 +	/** +	 * Sets the initial objective parameters +	 * For unconstrained models this just sets the objective params = argument no copying +	 * For a constrained objective project the parameters and then set +	 * @param params +	 */ +	public  void setInitialParameters(double[] params){ +		parameters = params; +	} + +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java new file mode 100644 index 00000000..96fce5b0 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java @@ -0,0 +1,19 @@ +package optimization.gradientBasedMethods; + +import optimization.gradientBasedMethods.stats.OptimizerStats; +import optimization.stopCriteria.StopingCriteria; + +public interface Optimizer { +	public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stoping); +	 +	 +	public double[] getDirection(); +	public double getCurrentStep(); +	public double getCurrentValue(); +	public int getCurrentIteration(); +	public void reset(); +	 +	public void setMaxIterations(int max); +	 +		 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java new file mode 100644 index 00000000..afb29d04 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java @@ -0,0 +1,11 @@ +package optimization.gradientBasedMethods; + + +/** + *  + * @author javg + * + */ +public abstract class ProjectedAbstractGradientBaseMethod extends AbstractGradientBaseMethod implements ProjectedOptimizer{ +	 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java new file mode 100644 index 00000000..0186e945 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java @@ -0,0 +1,154 @@ +package optimization.gradientBasedMethods; + +import java.io.IOException; + +import optimization.gradientBasedMethods.stats.OptimizerStats; +import optimization.linesearch.DifferentiableLineSearchObjective; +import optimization.linesearch.LineSearchMethod; +import optimization.linesearch.ProjectedDifferentiableLineSearchObjective; +import optimization.stopCriteria.StopingCriteria; +import optimization.util.MathUtils; + + +/** + * This class implements the projected gradiend + * as described in Bertsekas "Non Linear Programming" + * section 2.3. + *  + * The update is given by: + * x_k+1 = x_k + alpha^k(xbar_k-x_k) + * Where xbar is: + * xbar = [x_k -s_k grad(f(x_k))]+ + * where []+ is the projection into the feasibility set + *  + * alpha is the step size  + * s_k - is a positive scalar which can be view as a step size as well, by  + * setting alpha to 1, then x_k+1 = [x_k -s_k grad(f(x_k))]+ + * This is called taking a step size along the projection arc (Bertsekas) which + * we will use by default. + *  + * Note that the only place where we actually take a step size is on pick a step size + * so this is going to be just like a normal gradient descent but use a different  + * armijo line search where we project after taking a step. + *  + *  + * @author javg + * + */ +public class ProjectedGradientDescent extends ProjectedAbstractGradientBaseMethod{ +	 + +	 +	 +	public ProjectedGradientDescent(LineSearchMethod lineSearch) { +		this.lineSearch = lineSearch; +	} +	 +	//Use projected differential objective instead +	public void initializeStructures(Objective o, OptimizerStats stats, StopingCriteria stop) { +		lso = new ProjectedDifferentiableLineSearchObjective(o); +	}; +	 +	 +	ProjectedObjective obj; +	public boolean optimize(ProjectedObjective o,OptimizerStats stats, StopingCriteria stop){ +		obj = o; +		return super.optimize(o, stats, stop); +	} +	 +	public double[] getDirection(){ +		for(int i = 0; i< gradient.length; i++){ +			direction[i] = -gradient[i]; +		} +		return direction; +	} +	 +	 + +		 +} + + + + + + + +///OLD CODE + +//Use projected gradient norm +//public boolean stopCriteria(double[] gradient){ +//	if(originalDirenctionL2Norm == 0){ +//		System.out.println("Leaving original direction norm is zero"); +//		return true;	 +//	} +//	if(MathUtils.L2Norm(direction)/originalDirenctionL2Norm < gradientConvergenceValue){ +//		System.out.println("Leaving projected gradient Norm smaller than epsilon"); +//		return true;	 +//	} +//	if((previousValue - currValue)/Math.abs(previousValue) < valueConvergenceValue) { +//		System.out.println("Leaving value change below treshold " + previousValue + " - " + currValue); +//		System.out.println(previousValue/currValue + " - " + currValue/currValue  +//				+ " = " + (previousValue - currValue)/Math.abs(previousValue)); +//		return true; +//	} +//	return false; +//} +// + +//public boolean optimize(ProjectedObjective o,OptimizerStats stats, StopingCriteria stop){ +//		stats.collectInitStats(this, o); +//		obj = o; +//		step = 0; +//		currValue = o.getValue(); +//		previousValue = Double.MAX_VALUE; +//		gradient = o.getGradient(); +//		originalGradientL2Norm = MathUtils.L2Norm(gradient); +//		parameterChange = new double[gradient.length]; +//		getDirection(); +//		ProjectedDifferentiableLineSearchObjective lso = new ProjectedDifferentiableLineSearchObjective(o,direction); +//		 +//		originalDirenctionL2Norm = MathUtils.L2Norm(direction); +//		//MatrixOutput.printDoubleArray(currParameters, "parameters"); +//		for (currentProjectionIteration = 0; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){		 +//		//	System.out.println("Iter " + currentProjectionIteration); +//			//o.printParameters(); +//			 +//			 +//			 +//			if(stop.stopOptimization(gradient)){ +//				stats.collectFinalStats(this, o); +//				lastStepUsed = step; +//				return true; +//			}			 +//			lso.reset(direction); +//			step = lineSearch.getStepSize(lso); +//			if(step==-1){ +//				System.out.println("Failed to find step"); +//				stats.collectFinalStats(this, o); +//				return false;	 +//				 +//			} +//			 +//			//Update the direction for stopping criteria +//			previousValue = currValue; +//			currValue = o.getValue(); +//			gradient = o.getGradient(); +//			direction = getDirection();		 +//			if(MathUtils.dotProduct(gradient, direction) > 0){ +//				System.out.println("Not a descent direction"); +//				System.out.println(" current stats " + stats.prettyPrint(1)); +//				System.exit(-1); +//			} +//			stats.collectIterationStats(this, o); +//		} +//		lastStepUsed = step; +//		stats.collectFinalStats(this, o); +//		return false; +//	} + +//public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){ +//	System.out.println("Objective is not a projected objective"); +//	throw new RuntimeException(); +//} + diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java new file mode 100644 index 00000000..c3d21393 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java @@ -0,0 +1,29 @@ +package optimization.gradientBasedMethods; + +import optimization.util.MathUtils; + + +/** + * Computes a projected objective + * When we tell it to set some parameters it automatically projects the parameters back into the simplex: + *  + *  + * When we tell it to get the gradient in automatically returns the projected gradient: + * @author javg + * + */ +public abstract class ProjectedObjective extends Objective{ +	 +	public abstract double[] projectPoint (double[] point); +	 +	public double[] auxParameters; +	 +	 +	public  void setInitialParameters(double[] params){ +		setParameters(projectPoint(params)); +	} +	 +	 +	 +	 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java new file mode 100644 index 00000000..81d8403e --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java @@ -0,0 +1,10 @@ +package optimization.gradientBasedMethods; + + + +public interface ProjectedOptimizer extends Optimizer{ +	 +	 +	 +	 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java new file mode 100644 index 00000000..6340ef73 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java @@ -0,0 +1,86 @@ +package optimization.gradientBasedMethods.stats; + +import java.util.ArrayList; + +import optimization.gradientBasedMethods.Objective; +import optimization.gradientBasedMethods.Optimizer; +import optimization.util.MathUtils; +import optimization.util.StaticTools; + + +public class OptimizerStats { +	 +	double start = 0; +	double totalTime = 0; +	 +	String objectiveFinalStats; +	 +	ArrayList<Double> gradientNorms = new ArrayList<Double>(); +	ArrayList<Double> steps = new ArrayList<Double>(); +	ArrayList<Double> value = new ArrayList<Double>(); +	ArrayList<Integer> iterations = new ArrayList<Integer>(); +	double prevValue =0; +	 +	public void reset(){ +		start = 0; +		totalTime = 0; +		 +		objectiveFinalStats=""; +		 +		gradientNorms.clear(); +		steps.clear(); +		value.clear(); +		iterations.clear(); +		prevValue =0; +	} +	 +	public void startTime() { +		start = System.currentTimeMillis(); +	} +	public void stopTime() { +		totalTime += System.currentTimeMillis() - start; +	} +	 +	public String prettyPrint(int level){ +		StringBuffer res = new StringBuffer(); +		res.append("Total time " + totalTime/1000 + " seconds \n" + "Iterations " + iterations.size() + "\n"); +		res.append(objectiveFinalStats+"\n"); +		if(level > 0){ +			if(iterations.size() > 0){ +			res.append("\tIteration"+iterations.get(0)+"\tstep: "+StaticTools.prettyPrint(steps.get(0), "0.00E00", 6)+ "\tgradientNorm "+  +					StaticTools.prettyPrint(gradientNorms.get(0), "0.00000E00", 10)+ "\tvalue "+ StaticTools.prettyPrint(value.get(0), "0.000000E00",11)+"\n"); +			} +			for(int i = 1; i < iterations.size(); i++){ +			res.append("\tIteration:\t"+iterations.get(i)+"\tstep:"+StaticTools.prettyPrint(steps.get(i), "0.00E00", 6)+ "\tgradientNorm "+  +					StaticTools.prettyPrint(gradientNorms.get(i), "0.00000E00", 10)+  +					"\tvalue:\t"+ StaticTools.prettyPrint(value.get(i), "0.000000E00",11)+ +					"\tvalueDiff:\t"+ StaticTools.prettyPrint((value.get(i-1)-value.get(i)), "0.000000E00",11)+ +					"\n"); +			} +		} +		return res.toString(); +	} +	 +	 +	public void collectInitStats(Optimizer optimizer, Objective objective){ +		startTime(); +		iterations.add(-1); +		gradientNorms.add(MathUtils.L2Norm(objective.getGradient())); +		steps.add(0.0); +		value.add(objective.getValue()); +	} +	 +	public void collectIterationStats(Optimizer optimizer, Objective objective){ +		iterations.add(optimizer.getCurrentIteration()); +		gradientNorms.add(MathUtils.L2Norm(objective.getGradient())); +		steps.add(optimizer.getCurrentStep()); +		value.add(optimizer.getCurrentValue()); +	} +	 +	 +	public void collectFinalStats(Optimizer optimizer, Objective objective){ +		stopTime(); +		objectiveFinalStats = objective.finalInfoString(); +	} +	 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java new file mode 100644 index 00000000..d65a1267 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java @@ -0,0 +1,70 @@ +package optimization.gradientBasedMethods.stats; + +import java.util.ArrayList; + +import optimization.gradientBasedMethods.Objective; +import optimization.gradientBasedMethods.Optimizer; +import optimization.gradientBasedMethods.ProjectedObjective; +import optimization.gradientBasedMethods.ProjectedOptimizer; +import optimization.util.MathUtils; +import optimization.util.StaticTools; + + +public class ProjectedOptimizerStats extends OptimizerStats{ +	 +	 +	 +	public void reset(){ +		super.reset(); +		projectedGradientNorms.clear(); +	} +	 +	ArrayList<Double> projectedGradientNorms = new ArrayList<Double>(); + +	public String prettyPrint(int level){ +		StringBuffer res = new StringBuffer(); +		res.append("Total time " + totalTime/1000 + " seconds \n" + "Iterations " + iterations.size() + "\n"); +		res.append(objectiveFinalStats+"\n"); +		if(level > 0){ +			if(iterations.size() > 0){ +			res.append("\tIteration"+iterations.get(0)+"\tstep: "+ +					StaticTools.prettyPrint(steps.get(0), "0.00E00", 6)+ "\tgradientNorm "+  +					StaticTools.prettyPrint(gradientNorms.get(0), "0.00000E00", 10) +					+ "\tdirection"+ +					StaticTools.prettyPrint(projectedGradientNorms.get(0), "0.00000E00", 10)+ +					"\tvalue "+ StaticTools.prettyPrint(value.get(0), "0.000000E00",11)+"\n"); +			} +			for(int i = 1; i < iterations.size(); i++){ +			res.append("\tIteration"+iterations.get(i)+"\tstep: "+StaticTools.prettyPrint(steps.get(i), "0.00E00", 6)+ "\tgradientNorm "+  +					StaticTools.prettyPrint(gradientNorms.get(i), "0.00000E00", 10)+  +					"\t direction "+ +					StaticTools.prettyPrint(projectedGradientNorms.get(i), "0.00000E00", 10)+ +					"\tvalue "+ StaticTools.prettyPrint(value.get(i), "0.000000E00",11)+ +					"\tvalueDiff "+ StaticTools.prettyPrint((value.get(i-1)-value.get(i)), "0.000000E00",11)+ +					"\n"); +			} +		} +		return res.toString(); +	} +	 +	 +	public void collectInitStats(Optimizer optimizer, Objective objective){ +		startTime(); +	} +	 +	public void collectIterationStats(Optimizer optimizer, Objective objective){		 +		iterations.add(optimizer.getCurrentIteration()); +		gradientNorms.add(MathUtils.L2Norm(objective.getGradient())); +		projectedGradientNorms.add(MathUtils.L2Norm(optimizer.getDirection())); +		steps.add(optimizer.getCurrentStep()); +		value.add(optimizer.getCurrentValue()); +	} +	 +	 +	 +	public void collectFinalStats(Optimizer optimizer, Objective objective){ +		stopTime(); +		objectiveFinalStats = objective.finalInfoString(); +	} +	 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java new file mode 100644 index 00000000..c9f9b8df --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java @@ -0,0 +1,102 @@ +package optimization.linesearch; + +import optimization.util.Interpolation; + + +/** + * Implements Back Tracking Line Search as described on page 37 of Numerical Optimization. + * Also known as armijo rule + * @author javg + * + */ +public class ArmijoLineSearchMinimization implements LineSearchMethod{ + +	/** +	 * How much should the step size decrease at each iteration. +	 */ +	double contractionFactor = 0.5; +	double c1 = 0.0001; +	 +	double sigma1 = 0.1; +	double sigma2 = 0.9; + + +	 +	double initialStep; +	int maxIterations = 10; +	 +			 +	public ArmijoLineSearchMinimization(){ +		this.initialStep = 1; +	} +	 +	//Experiment +	double previousStepPicked = -1;; +	double previousInitGradientDot = -1; +	double currentInitGradientDot = -1; +	 +	 +	public void reset(){ +		previousStepPicked = -1;; +		previousInitGradientDot = -1; +		currentInitGradientDot = -1; +	} +	 +	public void setInitialStep(double initial){ +		initialStep = initial; +	} +	 +	/** +	 *  +	 */ +	 +	public double getStepSize(DifferentiableLineSearchObjective o) {	 +		currentInitGradientDot = o.getInitialGradient(); +		//Should update all in the objective +		o.updateAlpha(initialStep);	 +		int nrIterations = 0; +		//System.out.println("tried alpha" + initialStep + " value " + o.getCurrentValue()); +		while(!WolfeConditions.suficientDecrease(o,c1)){			 +			if(nrIterations >= maxIterations){ +				o.printLineSearchSteps();	 +				return -1; +			} +			double alpha=o.getAlpha(); +			double alphaTemp =  +				Interpolation.quadraticInterpolation(o.getOriginalValue(), o.getInitialGradient(), alpha, o.getCurrentValue()); +			if(alphaTemp >= sigma1 || alphaTemp <= sigma2*o.getAlpha()){ +//				System.out.println("using alpha temp " + alphaTemp); +				alpha = alphaTemp; +			}else{ +//				System.out.println("Discarding alpha temp " + alphaTemp); +				alpha = alpha*contractionFactor; +			} +//			double alpha =o.getAlpha()*contractionFactor; + +			o.updateAlpha(alpha); +			//System.out.println("tried alpha" + alpha+ " value " + o.getCurrentValue()); +			nrIterations++;			 +		} +		 +		//System.out.println("Leavning line search used:"); +		//o.printLineSearchSteps();	 +		 +		previousInitGradientDot = currentInitGradientDot; +		previousStepPicked = o.getAlpha(); +		return o.getAlpha(); +	} + +	public double getInitialGradient() { +		return currentInitGradientDot; +		 +	} + +	public double getPreviousInitialGradient() { +		return previousInitGradientDot; +	} + +	public double getPreviousStepUsed() { +		return previousStepPicked; +	} +		 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java new file mode 100644 index 00000000..e153f2da --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java @@ -0,0 +1,141 @@ +package optimization.linesearch; + +import optimization.gradientBasedMethods.ProjectedObjective; +import optimization.util.Interpolation; +import optimization.util.MathUtils; + + + + + +/** + * Implements Armijo Rule Line search along the projection arc (Non-Linear Programming page 230) + * To be used with Projected gradient Methods. + *  + * Recall that armijo tries successive step sizes alpha until the sufficient decrease is satisfied: + * f(x+alpha*direction) < f(x) + alpha*c1*grad(f)*direction + *  + * In this case we are optimizing over a convex set X so we must guarantee that the new point stays inside the  + * constraints. + * First the direction as to be feasible (inside constraints) and will be define as: + * d = (x_k_f - x_k) where x_k_f is a feasible point. + * so the armijo condition can be rewritten as: + * f(x+alpha(x_k_f - x_k)) < f(x) + c1*grad(f)*(x_k_f - x_k) + * and x_k_f is defined as: + * [x_k-alpha*grad(f)]+ + * where []+ mean a projection to the feasibility set. + * So this means that we take a step on the negative gradient (gradient descent) and then obtain then project + * that point to the feasibility set.  + * Note that if the point is already feasible then we are back to the normal armijo rule. + *  + * @author javg + * + */ +public class ArmijoLineSearchMinimizationAlongProjectionArc implements LineSearchMethod{ + +	/** +	 * How much should the step size decrease at each iteration. +	 */ +	double contractionFactor = 0.5; +	double c1 = 0.0001; +	 +	 +	double initialStep; +	int maxIterations = 100; +			 +	 +	double sigma1 = 0.1; +	double sigma2 = 0.9; +	 +	//Experiment +	double previousStepPicked = -1;; +	double previousInitGradientDot = -1; +	double currentInitGradientDot = -1; +	 +	GenericPickFirstStep strategy; +	 +	 +	public void reset(){ +		previousStepPicked = -1;; +		previousInitGradientDot = -1; +		currentInitGradientDot = -1; +	} + +	 +	public ArmijoLineSearchMinimizationAlongProjectionArc(){ +		this.initialStep = 1; +	} +	 +	public ArmijoLineSearchMinimizationAlongProjectionArc(GenericPickFirstStep strategy){ +		this.strategy = strategy; +		this.initialStep = strategy.getFirstStep(this); +	} +	 +	 +	public void setInitialStep(double initial){ +		this.initialStep = initial; +	} +	 +	/** +	 *  +	 */ +	 +	public double getStepSize(DifferentiableLineSearchObjective o) {	 + +		 +		//Should update all in the objective +		initialStep = strategy.getFirstStep(this); +		o.updateAlpha(initialStep);	 +		previousInitGradientDot=currentInitGradientDot; +		currentInitGradientDot=o.getCurrentGradient(); +		int nrIterations = 0; +	 +		//Armijo rule, the current value has to be smaller than the original value plus a small step of the gradient +		while(o.getCurrentValue()  > +			o.getOriginalValue() + c1*(o.getCurrentGradient())){			 +//			System.out.println("curr value "+o.getCurrentValue()); +//			System.out.println("original value "+o.getOriginalValue()); +//			System.out.println("GRADIENT decrease" +(MathUtils.dotProduct(o.o.gradient, +//					MathUtils.arrayMinus(o.originalParameters,((ProjectedObjective)o.o).auxParameters)))); +//			System.out.println("GRADIENT SAVED" + o.getCurrentGradient()); +			if(nrIterations >= maxIterations){ +				System.out.println("Could not find a step leaving line search with -1"); +				o.printLineSearchSteps(); +				return -1; +			} +			double alpha=o.getAlpha(); +			double alphaTemp =  +				Interpolation.quadraticInterpolation(o.getOriginalValue(), o.getInitialGradient(), alpha, o.getCurrentValue()); +			if(alphaTemp >= sigma1 || alphaTemp <= sigma2*o.getAlpha()){ +				alpha = alphaTemp; +			}else{ +				alpha = alpha*contractionFactor; +			} +//			double alpha =obj.getAlpha()*contractionFactor; +			o.updateAlpha(alpha); +			nrIterations++;			 +		} +//		System.out.println("curr value "+o.getCurrentValue()); +//		System.out.println("original value "+o.getOriginalValue()); +//		System.out.println("sufficient decrease" +c1*o.getCurrentGradient()); +//		System.out.println("Leavning line search used:"); +//		o.printSmallLineSearchSteps();	 +		 +		previousStepPicked = o.getAlpha(); +		return o.getAlpha(); +	} +	 +	public double getInitialGradient() { +		return currentInitGradientDot; +		 +	} + +	public double getPreviousInitialGradient() { +		return previousInitGradientDot; +	} + +	public double getPreviousStepUsed() { +		return previousStepPicked; +	} +		 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java new file mode 100644 index 00000000..a5bc958e --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java @@ -0,0 +1,185 @@ +package optimization.linesearch; + +import gnu.trove.TDoubleArrayList; +import gnu.trove.TIntArrayList; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; + +import optimization.gradientBasedMethods.Objective; +import optimization.util.MathUtils; +import optimization.util.StaticTools; + + + +import util.MathUtil; +import util.Printing; + + +/** + * A wrapper class for the actual objective in order to perform  + * line search.  The optimization code assumes that this does a lot  + * of caching in order to simplify legibility.  For the applications  + * we use it for, caching the entire history of evaluations should be  + * a win.  + *  + * Note: the lastEvaluatedAt value is very important, since we will use + * it to avoid doing an evaluation of the gradient after the line search.   + *  + * The differentiable line search objective defines a search along the ray + * given by a direction of the main objective. + * It defines the following function,  + * where f is the original objective function: + * g(alpha) = f(x_0 + alpha*direction) + * g'(alpha) = f'(x_0 + alpha*direction)*direction + *  + * @author joao + * + */ +public class DifferentiableLineSearchObjective { + +	 +	 +	Objective o; +	int nrIterations; +	TDoubleArrayList steps; +	TDoubleArrayList values; +	TDoubleArrayList gradients; +	 +	//This variables cannot change +	public double[] originalParameters; +	public double[] searchDirection; + +	 +	/** +	 * Defines a line search objective: +	 * Receives: +	 * Objective to each we are performing the line search, is used to calculate values and gradients +	 * Direction where to do the ray search, note that the direction does not depend of the  +	 * objective but depends from the method. +	 * @param o +	 * @param direction +	 */ +	public DifferentiableLineSearchObjective(Objective o) { +		this.o = o; +		originalParameters = new double[o.getNumParameters()]; +		searchDirection = new double[o.getNumParameters()]; +		steps = new TDoubleArrayList(); +		values = new TDoubleArrayList(); +		gradients = new TDoubleArrayList(); +	} +	/** +	 * Called whenever we start a new iteration.  +	 * Receives the ray where we are searching for and resets all values +	 *  +	 */ +	public void reset(double[] direction){ +		//Copy initial values +		System.arraycopy(o.getParameters(), 0, originalParameters, 0, o.getNumParameters()); +		System.arraycopy(direction, 0, searchDirection, 0, o.getNumParameters()); +		 +		//Initialize variables +		nrIterations = 0; +		steps.clear(); +		values.clear(); +		gradients.clear(); +	 +		values.add(o.getValue()); +		gradients.add(MathUtils.dotProduct(o.getGradient(),direction));	 +		steps.add(0); +	} +	 +	 +	/** +	 * update the current value of alpha. +	 * Takes a step with that alpha in direction +	 * Get the real objective value and gradient and calculate all required information. +	 */ +	public void updateAlpha(double alpha){ +		if(alpha < 0){ +			System.out.println("alpha may not be smaller that zero"); +			throw new RuntimeException(); +		} +		nrIterations++; +		steps.add(alpha); +		//x_t+1 = x_t + alpha*direction +		System.arraycopy(originalParameters,0, o.getParameters(), 0, originalParameters.length); +		MathUtils.plusEquals(o.getParameters(), searchDirection, alpha); +		o.setParameters(o.getParameters()); +//		System.out.println("Took a step of " + alpha + " new value " + o.getValue()); +		values.add(o.getValue()); +		gradients.add(MathUtils.dotProduct(o.getGradient(),searchDirection));		 +	} + +	 +	 +	public int getNrIterations(){ +		return nrIterations; +	} +	 +	/** +	 * return g(alpha) for the current value of alpha +	 * @param iter +	 * @return +	 */ +	public double getValue(int iter){ +		return values.get(iter); +	} +	 +	public double getCurrentValue(){ +		return values.get(nrIterations); +	} +	 +	public double getOriginalValue(){ +		return values.get(0); +	} + +	/** +	 * return g'(alpha) for the current value of alpha +	 * @param iter +	 * @return +	 */ +	public double getGradient(int iter){ +		return gradients.get(iter); +	} +	 +	public double getCurrentGradient(){ +		return gradients.get(nrIterations); +	} +	 +	public double getInitialGradient(){ +		return gradients.get(0); +	} +	 +	 +	 +	 +	public double getAlpha(){ +		return steps.get(nrIterations); +	} +	 +	public void printLineSearchSteps(){ +		System.out.println( +				" Steps size "+steps.size() +  +				"Values size "+values.size() + +				"Gradeients size "+gradients.size()); +		for(int i =0; i < steps.size();i++){ +			System.out.println("Iter " + i + " step " + steps.get(i) + +					" value " + values.get(i) + " grad "  + gradients.get(i)); +		} +	} +	 +	public void printSmallLineSearchSteps(){ +		for(int i =0; i < steps.size();i++){ +			System.out.print(StaticTools.prettyPrint(steps.get(i), "0.0000E00",8) + " "); +		} +		System.out.println(); +	} +	 +	public static void main(String[] args) { +		 +	} +	 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java new file mode 100644 index 00000000..a33eb311 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java @@ -0,0 +1,20 @@ +package optimization.linesearch; + + +public class GenericPickFirstStep{ +	double _initValue; +	public GenericPickFirstStep(double initValue) { +		_initValue = initValue; +	} +	 +	public double getFirstStep(LineSearchMethod ls){ +		return _initValue; +	} +	public void collectInitValues(LineSearchMethod ls){ +		 +	} +	 +	public void collectFinalValues(LineSearchMethod ls){ +		 +	} +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java new file mode 100644 index 00000000..0deebcdb --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java @@ -0,0 +1,25 @@ +package optimization.linesearch; + + +public class InterpolationPickFirstStep extends GenericPickFirstStep{ +	public InterpolationPickFirstStep(double initValue) { +		super(initValue); +	} +	 +	public double getFirstStep(LineSearchMethod ls){ +		if(ls.getPreviousStepUsed() != -1 && ls.getPreviousInitialGradient()!=0){ +			double newStep = Math.min(300, 1.02*ls.getPreviousInitialGradient()*ls.getPreviousStepUsed()/ls.getInitialGradient()); +		//	System.out.println("proposing " + newStep); +			return newStep; +			 +		} +		return _initValue; +	} +	public void collectInitValues(WolfRuleLineSearch ls){ +		 +	} +	 +	public void collectFinalValues(WolfRuleLineSearch ls){ +		 +	} +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java new file mode 100644 index 00000000..80cd7f39 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java @@ -0,0 +1,14 @@ +package optimization.linesearch; + + +public interface LineSearchMethod { +	 +	double getStepSize(DifferentiableLineSearchObjective o); +	 +	public double getInitialGradient(); +	public double getPreviousInitialGradient(); +	public double getPreviousStepUsed(); +	 +	public void setInitialStep(double initial); +	public void reset(); +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java new file mode 100644 index 00000000..4b354fd9 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java @@ -0,0 +1,33 @@ +package optimization.linesearch; + +/** + * Non newtwon since we don't always try 1... + * Not sure if that is even usefull for newton + * @author javg + * + */ +public class NonNewtonInterpolationPickFirstStep extends GenericPickFirstStep{ +	public NonNewtonInterpolationPickFirstStep(double initValue) { +		super(initValue); +	} +	 +	public double getFirstStep(LineSearchMethod ls){ +//		System.out.println("Previous step used " + ls.getPreviousStepUsed()); +//		System.out.println("PreviousGradinebt " + ls.getPreviousInitialGradient()); +//		System.out.println("CurrentGradinebt " + ls.getInitialGradient()); +		if(ls.getPreviousStepUsed() != -1 && ls.getPreviousInitialGradient()!=0){ +			double newStep = 1.01*ls.getPreviousInitialGradient()*ls.getPreviousStepUsed()/ls.getInitialGradient(); +			//System.out.println("Suggesting " + newStep); +			return newStep; +			 +		} +		return _initValue; +	} +	public void collectInitValues(WolfRuleLineSearch ls){ +		 +	} +	 +	public void collectFinalValues(WolfRuleLineSearch ls){ +		 +	} +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java new file mode 100644 index 00000000..29ccbc32 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java @@ -0,0 +1,137 @@ +package optimization.linesearch; + +import optimization.gradientBasedMethods.Objective; +import optimization.gradientBasedMethods.ProjectedObjective; +import optimization.util.MathUtils; +import optimization.util.MatrixOutput; + + +/** + * See ArmijoLineSearchMinimizationAlongProjectionArc for description + * @author javg + * + */ +public class ProjectedDifferentiableLineSearchObjective extends DifferentiableLineSearchObjective{ + +	 +	 +	ProjectedObjective obj; +	public ProjectedDifferentiableLineSearchObjective(Objective o) { +		super(o); +		if(!(o instanceof ProjectedObjective)){ +			System.out.println("Must receive a projected objective"); +			throw new RuntimeException(); +		} +		obj = (ProjectedObjective) o; +	} + +	 +	 +	public double[] projectPoint (double[] point){ +		return ((ProjectedObjective)o).projectPoint(point); +	} +	public void updateAlpha(double alpha){ +		if(alpha < 0){ +			System.out.println("alpha may not be smaller that zero"); +			throw new RuntimeException(); +		} +		 +		if(obj.auxParameters == null){ +			obj.auxParameters = new double[obj.getParameters().length]; +		} +		 +		nrIterations++; +		 +		steps.add(alpha);		 +		System.arraycopy(originalParameters, 0, obj.auxParameters, 0, obj.auxParameters.length); +		 +		//Take a step into the search direction +		 +//		MatrixOutput.printDoubleArray(obj.getGradient(), "gradient"); +		 +//		alpha=gradients.get(0)*alpha/(gradients.get(gradients.size()-1)); +	 +		//x_t+1 = x_t - alpha*gradient = x_t + alpha*direction +		MathUtils.plusEquals(obj.auxParameters, searchDirection, alpha); +//		MatrixOutput.printDoubleArray(obj.auxParameters, "before projection"); +		obj.auxParameters = projectPoint(obj.auxParameters); +//		MatrixOutput.printDoubleArray(obj.auxParameters, "after projection"); +		o.setParameters(obj.auxParameters); +//		System.out.println("new parameters"); +//		o.printParameters(); +		values.add(o.getValue()); +		//Computes the new gradient x_k-[x_k-alpha*Gradient(x_k)]+  +		MathUtils.minusEqualsInverse(originalParameters,obj.auxParameters,1); +//		MatrixOutput.printDoubleArray(obj.auxParameters, "new gradient"); +		//Dot product between the new direction and the new gradient +		double gradient = MathUtils.dotProduct(obj.auxParameters,searchDirection); +		gradients.add(gradient);	 +		if(gradient > 0){ +			System.out.println("Gradient on line search has to be smaller than zero"); +			System.out.println("Iter: " + nrIterations); +			MatrixOutput.printDoubleArray(obj.auxParameters, "new direction"); +			MatrixOutput.printDoubleArray(searchDirection, "search direction"); +			throw new RuntimeException(); +			 +		} +		 +	} +	 +	/** +	 *  +	 */ +//	public void updateAlpha(double alpha){ +//		 +//		if(alpha < 0){ +//			System.out.println("alpha may not be smaller that zero"); +//			throw new RuntimeException(); +//		} +//		 +//		nrIterations++; +//		steps.add(alpha); +//		//x_t+1 = x_t - alpha*direction +//		System.arraycopy(originalParameters, 0, parametersChange, 0, parametersChange.length); +////		MatrixOutput.printDoubleArray(parametersChange, "parameters before step"); +////		System.out.println("Step" + alpha); +//		MatrixOutput.printDoubleArray(originalGradient, "gradient + " + alpha); +// +//		MathUtils.minusEquals(parametersChange, originalGradient, alpha); +//		 +//		//Project the points into the feasibility set +////		MatrixOutput.printDoubleArray(parametersChange, "before projection"); +//		//x_k(alpha) = [x_k - alpha*grad f(x_k)]+ +//		parametersChange = projectPoint(parametersChange); +////		MatrixOutput.printDoubleArray(parametersChange, "after projection"); +//		o.setParameters(parametersChange); +//		values.add(o.getValue()); +//		//Computes the new direction x_k-[x_k-alpha*Gradient(x_k)]+ +//		 +//		direction=MathUtils.arrayMinus(parametersChange,originalParameters); +////		MatrixOutput.printDoubleArray(direction, "new direction"); +//		 +//		double gradient = MathUtils.dotProduct(originalGradient,direction); +//		gradients.add(gradient);		 +//		if(gradient > 1E-10){ +//			System.out.println("cosine " + gradient/(MathUtils.L2Norm(originalGradient)*MathUtils.L2Norm(direction))); +//			 +//			 +//			System.out.println("not a descent direction for alpha " + alpha); +//			System.arraycopy(originalParameters, 0, parametersChange, 0, parametersChange.length); +//			MathUtils.minusEquals(parametersChange, originalGradient, 1E-20); +//			 +//			parametersChange = projectPoint(parametersChange); +//			direction=MathUtils.arrayMinus(parametersChange,originalParameters); +//			gradient = MathUtils.dotProduct(originalGradient,direction); +//			if(gradient > 0){ +//				System.out.println("Direction is really non-descent evern for small alphas:" + gradient); +//			} +//			System.out.println("ProjecteLineSearchObjective: Should be a descent direction at " + nrIterations + ": "+ gradient); +////			System.out.println(Printing.doubleArrayToString(originalGradient, null,"Original gradient")); +////			System.out.println(Printing.doubleArrayToString(originalParameters, null,"Original parameters")); +////			System.out.println(Printing.doubleArrayToString(parametersChange, null,"Projected parameters")); +////			System.out.println(Printing.doubleArrayToString(direction, null,"Direction")); +//			throw new RuntimeException(); +//		} +//	} +	 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java new file mode 100644 index 00000000..5489f2d0 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java @@ -0,0 +1,300 @@ +package optimization.linesearch; + +import java.io.PrintStream; +import java.util.ArrayList; + +import optimization.util.Interpolation; + + + + +/** + *  + * @author javg + * + */ +public class WolfRuleLineSearch implements LineSearchMethod{ + +	GenericPickFirstStep pickFirstStep; +	 +	double c1 = 1.0E-4; +	double c2 = 0.9; +	 +	//Application dependent +	double maxStep=100; +	 +	int extrapolationIteration; +	int maxExtrapolationIteration = 1000; +	 +	 +	double minZoomDiffTresh = 10E-10; + +	 +	ArrayList<Double> steps; +	ArrayList<Double> gradientDots; +	ArrayList<Double> functionVals; +	 +	int debugLevel = 0; +	boolean foudStep = false; +	 +	public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep){ +		this.pickFirstStep = pickFirstStep; +		 +	} +	 +	 + +	 +	public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep,  double c1, double c2){ +		this.pickFirstStep = pickFirstStep; +		initialStep = pickFirstStep.getFirstStep(this); +		this.c1 = c1; +		this.c2 = c2; +	} +	 +	public void setDebugLevel(int level){ +		debugLevel = level; +	} +	 +	//Experiment +	double previousStepPicked = -1;; +	double previousInitGradientDot = -1; +	double currentInitGradientDot = -1; +	 +	double initialStep; + +	 +	public void reset(){ +		previousStepPicked = -1;; +		previousInitGradientDot = -1; +		currentInitGradientDot = -1; +		if(steps != null) +			steps.clear(); +		if(gradientDots != null) +			gradientDots.clear(); +		if(functionVals != null) +			functionVals.clear(); +	} +	 +	public void setInitialStep(double initial){ +		initialStep = pickFirstStep.getFirstStep(this); +	} +	 +	 +	 +	/** +	 * Implements Wolf Line search as described in nocetal. +	 * This process consists in two stages. The first stage we try to satisfy the +	 * biggest step size that still satisfies the curvature condition. We keep increasing +	 * the initial step size until we find a step satisfying the curvature condition, we return  +	 * success, we failed the sufficient increase so we cannot increase more and we can call zoom with  +	 * that maximum step, or we pass the minimum in which case we can call zoom the same way.  +	 *  +	 */ +	public double getStepSize(DifferentiableLineSearchObjective objective){ +		//System.out.println("entering line search"); +		 +		foudStep = false; +		if(debugLevel >= 1){ +			steps = new ArrayList<Double>(); +			gradientDots = new ArrayList<Double>(); +			functionVals  =new ArrayList<Double>(); +		} +		 +		//test +		currentInitGradientDot = objective.getInitialGradient(); +		 +		 +		double previousValue = objective.getCurrentValue(); +		double previousStep = 0; +		double currentStep =pickFirstStep.getFirstStep(this); +		for(extrapolationIteration = 0;  +		extrapolationIteration < maxExtrapolationIteration; extrapolationIteration++){	 +			 +			objective.updateAlpha(currentStep); +			double currentValue = objective.getCurrentValue(); +			if(debugLevel >= 1){ +				steps.add(currentStep); +				functionVals.add(currentValue); +				gradientDots.add(objective.getCurrentGradient()); +			} +			 +			 +			//The current step does not satisfy the sufficient decrease condition anymore +			// so we cannot get bigger than that calling zoom. +			if(!WolfeConditions.suficientDecrease(objective,c1)||					 +					(extrapolationIteration > 0 && currentValue >= previousValue)){ +				currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations); +				break; +			} +			 +			//Satisfying both conditions ready to leave +			if(WolfeConditions.sufficientCurvature(objective,c1,c2)){ +				//Found step +				foudStep = true; +				break; +			} +			 +			/** +			 * This means that we passed the minimum already since the dot product that should be  +			 * negative (descent direction) is now positive. So we cannot increase more. On the other hand +			 * since we know the direction is a descent direction the value the objective at the current step +			 * is for sure smaller than the preivous step so we change the order. +			 */ +			if(objective.getCurrentGradient() >= 0){ +				currentStep =  zoom(objective,currentStep,previousStep,objective.nrIterations,objective.nrIterations-1); +				break; +			} +			 +			 +			//Ok, so we can still get a bigger step,  +			double aux = currentStep; +			//currentStep = currentStep*2; +			if(Math.abs(currentStep-maxStep)>1.1e-2){ +				currentStep = (currentStep+maxStep)/2; +			}else{ +				currentStep = currentStep*2; +			} +			previousStep = aux; +			previousValue = currentValue; +			//Could be done better +			if(currentStep >= maxStep){ +				System.out.println("Excedded max step...calling zoom with maxStepSize"); +				currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations); +			} +		} +		if(!foudStep){ +			System.out.println("Wolfe Rule exceed number of iterations"); +			if(debugLevel >= 1){ +				printSmallWolfeStats(System.out); +//				System.out.println("Line search values"); +//				DebugHelpers.getLineSearchGraph(o,  direction, originalParameters,origValue, origGradDirectionDot,c1,c2);			 +			} +			return -1; +		} +		if(debugLevel >= 1){ +			printSmallWolfeStats(System.out); +		} + +		previousStepPicked = currentStep; +		previousInitGradientDot = currentInitGradientDot; +//		objective.printLineSearchSteps(); +		return currentStep; +	} +	 +	 +	 +	 +	 +	public void printWolfeStats(PrintStream out){ +		for(int i = 0; i < steps.size(); i++){		 +			out.println("Step " + steps.get(i) + " value " + functionVals.get(i) + " dot " + gradientDots.get(i)); +		} +	} +	 +	public void printSmallWolfeStats(PrintStream out){ +		for(int i = 0; i < steps.size(); i++){		 +			out.print(steps.get(i) + ":"+functionVals.get(i)+":"+gradientDots.get(i)+" "); +		} +		System.out.println(); +	} +	 +	 +	 +	/** +	 * Pick a step satisfying the strong wolfe condition from an given from lowerStep and higherStep +	 * picked on the routine above. +	 *  +	 * Both lowerStep and higherStep have been evaluated, so we only need to pass the iteration where they have +	 * been evaluated and save extra evaluations. +	 *  +	 * We know that lowerStepValue as to be smaller than higherStepValue, and that a point  +	 * satisfying both conditions exists in such interval. +	 *  +	 * LowerStep always satisfies at least the sufficient decrease +	 * @return +	 */ +	public double zoom(DifferentiableLineSearchObjective o, double lowerStep, double higherStep, +			int lowerStepIter, int higherStepIter){ +		 +		if(debugLevel >=2){ +			System.out.println("Entering zoom with " + lowerStep+"-"+higherStep); +		} +		 +		double currentStep=-1; +		 +		int zoomIter = 0; +		while(zoomIter < 1000){		 +			if(Math.abs(lowerStep-higherStep) < minZoomDiffTresh){ +				o.updateAlpha(lowerStep); +				if(debugLevel >= 1){ +					steps.add(lowerStep); +					functionVals.add(o.getCurrentValue()); +					gradientDots.add(o.getCurrentGradient()); +				} +				foudStep = true; +				return lowerStep; +			}	 +	 +			//Cubic interpolation +			currentStep =  +				Interpolation.cubicInterpolation(lowerStep, o.getValue(lowerStepIter), o.getGradient(lowerStepIter),  +						higherStep, o.getValue(higherStepIter), o.getGradient(higherStepIter)); +			 +			//Safeguard.... should not be required check in what condtions it is required +			if(currentStep < 0 ){ +				currentStep = (lowerStep+higherStep)/2; +			} +			if(Double.isNaN(currentStep) || Double.isInfinite(currentStep)){ +				currentStep = (lowerStep+higherStep)/2; +			} +//			currentStep = (lowerStep+higherStep)/2; +//			System.out.println("Trying "+currentStep); +			o.updateAlpha(currentStep); +			if(debugLevel >=1){ +				steps.add(currentStep); +				functionVals.add(o.getCurrentValue()); +				gradientDots.add(o.getCurrentGradient()); +			} +			if(!WolfeConditions.suficientDecrease(o,c1) +					|| o.getCurrentValue() >= o.getValue(lowerStepIter)){ +				higherStepIter = o.nrIterations; +				higherStep = currentStep; +			} +			//Note when entering here the new step satisfies the sufficent decrease and +			// or as a function value that is better than the previous best (lowerStepFunctionValues) +			// so we either leave or change the value of the alpha low. +			else{ +				if(WolfeConditions.sufficientCurvature(o,c1,c2)){ +					//Satisfies the both wolf conditions +					foudStep = true; +					break; +				} +				//If does not satisfy curvature  +				if(o.getCurrentGradient()*(higherStep-lowerStep) >= 0){ +					higherStep = lowerStep; +					higherStepIter = lowerStepIter; +				} +				lowerStep = currentStep; +				lowerStepIter = o.nrIterations; +			} +			zoomIter++; +		} +		return currentStep; +	} + +	public double getInitialGradient() { +		return currentInitGradientDot; +		 +	} + +	public double getPreviousInitialGradient() { +		return previousInitGradientDot; +	} + +	public double getPreviousStepUsed() { +		return previousStepPicked; +	} +	 +	 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java new file mode 100644 index 00000000..dcc704eb --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java @@ -0,0 +1,45 @@ +package optimization.linesearch; + + +public class WolfeConditions { +	 +	/** +	 * Sufficient Increase number. Default constant +	 */ +	 +	 +	/** +	 * Value for suficient curvature: +	 * 0.9 - For newton and quase netwon methods +	 * 0.1 - Non linear conhugate gradient +	 */ +	 +	int debugLevel = 0; +	public void setDebugLevel(int level){ +		debugLevel = level; +	} +	 +	public  static boolean suficientDecrease(DifferentiableLineSearchObjective o, double c1){	 +		double value = o.getOriginalValue()+c1*o.getAlpha()*o.getInitialGradient(); +//		System.out.println("Sufficient Decrease original "+value+" new "+  o.getCurrentValue()); +		return o.getCurrentValue() <= value; +	} +	 +	 + + +	public static boolean sufficientCurvature(DifferentiableLineSearchObjective o, double c1, double c2){ +//		if(debugLevel >= 2){ +//			double current = Math.abs(o.getCurrentGradient()); +//			double orig = -c2*o.getInitialGradient(); +//			if(current <= orig){ +//				return true; +//			}else{ +//				System.out.println("Not satistfying curvature condition curvature " + current + " wants " + orig); +//				return false; +//			} +//		} +		return Math.abs(o.getCurrentGradient()) <= -c2*o.getInitialGradient(); +	} +	 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java new file mode 100644 index 00000000..0429d531 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java @@ -0,0 +1,104 @@ +package optimization.projections; + + +import java.util.Random; + +import optimization.util.MathUtils; +import optimization.util.MatrixOutput; + +/** + * Implements a projection into a box set defined by a and b. + * If either a or b are infinity then that bound is ignored. + * @author javg + * + */ +public class BoundsProjection extends Projection{ + +	double a,b; +	boolean ignoreA = false; +	boolean ignoreB = false; +	public BoundsProjection(double lowerBound, double upperBound) { +		if(Double.isInfinite(lowerBound)){ +			this.ignoreA = true; +		}else{ +			this.a =lowerBound; +		} +		if(Double.isInfinite(upperBound)){ +			this.ignoreB = true; +		}else{ +			this.b =upperBound; +		} +	} +	 +	 +	 +	/** +	* Projects into the bounds +	* a <= x_i <=b +	 */ +	public void project(double[] original){ +		for (int i = 0; i < original.length; i++) { +			if(!ignoreA && original[i] < a){ +				original[i] = a; +			}else if(!ignoreB && original[i]>b){ +				original[i]=b; +			} +		} +	} +	 +	/** +	 * Generates a random number between a and b. +	 */ + +	Random r = new Random(); +	 +	public double[] samplePoint(int numParams) { +		double[] point = new double[numParams]; +		for (int i = 0; i < point.length; i++) { +			double rand = r.nextDouble(); +			if(ignoreA && ignoreB){ +				//Use const to avoid number near overflow +				point[i] = rand*(1.E100+1.E100)-1.E100; +			}else if(ignoreA){ +				point[i] = rand*(b-1.E100)-1.E100; +			}else if(ignoreB){ +				point[i] = rand*(1.E100-a)-a; +			}else{ +				point[i] = rand*(b-a)-a; +			} +		} +		return point; +	} +	 +	public static void main(String[] args) { +		BoundsProjection sp = new BoundsProjection(0,Double.POSITIVE_INFINITY); +		 +		 +		MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 1"); +		MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 2"); +		MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 3"); +		 +		double[] d = {-1.1,1.2,1.4}; +		double[] original = d.clone(); +		MatrixOutput.printDoubleArray(d, "before"); +		 +		sp.project(d); +		MatrixOutput.printDoubleArray(d, "after"); +		System.out.println("Test projection: " + sp.testProjection(original, d)); +	} +	 +	double epsilon = 1.E-10; +	public double[] perturbePoint(double[] point, int parameter){ +		double[] newPoint = point.clone(); +		if(!ignoreA && MathUtils.almost(point[parameter], a)){ +			newPoint[parameter]+=epsilon; +		}else if(!ignoreB && MathUtils.almost(point[parameter], b)){ +			newPoint[parameter]-=epsilon; +		}else{ +			newPoint[parameter]-=epsilon; +		} +		return newPoint; +	} + +	 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java new file mode 100644 index 00000000..b5a9f92f --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java @@ -0,0 +1,72 @@ +package optimization.projections; + +import optimization.util.MathUtils; +import optimization.util.MatrixOutput; +import util.ArrayMath; +import util.Printing; + + + +public abstract class Projection { + +	 +	public abstract void project(double[] original); +	 +	 +	/** +	 *  From the projection theorem "Non-Linear Programming" page +	 *  201 fact 2. +	 *   +	 *  Given some z in R, and a vector x* in X; +	 *  x* = z+ iif for all x in X  +	 *  (z-x*)'(x-x*) <= 0 where 0 is when x*=x +	 *  See figure 2.16 in book +	 *   +	 * @param original +	 * @param projected +	 * @return +	 */ +	public boolean testProjection(double[] original, double[] projected){ +		double[] original1 = original.clone(); +		//System.out.println(Printing.doubleArrayToString(original1, null, "original")); +		//System.out.println(Printing.doubleArrayToString(projected, null, "projected")); +		MathUtils.minusEquals(original1, projected, 1); +		//System.out.println(Printing.doubleArrayToString(original1, null, "minus1")); +		for(int i = 0; i < 10; i++){ +			double[] x = samplePoint(original.length); +		//	System.out.println(Printing.doubleArrayToString(x, null, "sample")); +			//If the same this returns zero so we are there.	 +			MathUtils.minusEquals(x, projected, 1); +		//	System.out.println(Printing.doubleArrayToString(x, null, "minus2")); +			double dotProd = MathUtils.dotProduct(original1, x); +			 +		//	System.out.println("dot " + dotProd); +			if(dotProd > 0) return false; +		} +		 +		//Perturbs the point a bit in all possible directions +		for(int i = 0; i < original.length; i++){ +			double[] x = perturbePoint(projected,i); +		//	System.out.println(Printing.doubleArrayToString(x, null, "perturbed")); +			//If the same this returns zero so we are there.	 +			MathUtils.minusEquals(x, projected, 1); +		//	System.out.println(Printing.doubleArrayToString(x, null, "minus2")); +			double dotProd = MathUtils.dotProduct(original1, x); +			 +		//	System.out.println("dot " + dotProd); +			if(dotProd > 0) return false; +		} +		 +		 +		 +		return true; +	} + +	//Samples a point from the constrained set +	public abstract double[] samplePoint(int dimensions); +	 +	//Perturbs a point a bit still leaving it at the constraints set +	public abstract double[] perturbePoint(double[] point, int parameter); +	 +	 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java new file mode 100644 index 00000000..eec11bcf --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java @@ -0,0 +1,127 @@ +package optimization.projections; + + + +import java.util.Random; + +import optimization.util.MathUtils; +import optimization.util.MatrixOutput; + +public class SimplexProjection extends Projection{ + +	double scale; +	public SimplexProjection(double scale) { +		this.scale = scale; +	} +	 +	/** +	 * projects the numbers of the array  +	 * into a simplex of size.  +	 * We follow the description of the paper +	 * "Efficient Projetions onto the l1-Ball +	 * for learning in high dimensions" +	 */ +	public void project(double[] original){ +		double[] ds = new double[original.length]; +		System.arraycopy(original, 0, ds, 0, ds.length); +		//If sum is smaller then zero then its ok +		for (int i = 0; i < ds.length; i++) ds[i] = ds[i]>0? ds[i]:0; +		double sum = MathUtils.sum(ds); +		if (scale - sum >=  -1.E-10 ){ +			System.arraycopy(ds, 0, original, 0, ds.length); +			//System.out.println("Not projecting"); +			return; +		} +		//System.out.println("projecting " + sum + " scontraints " + scale);	 +		util.Array.sortDescending(ds); +		double currentSum = 0; +		double previousTheta = 0; +		double theta = 0; +		for (int i = 0; i < ds.length; i++) { +			currentSum+=ds[i]; +			theta = (currentSum-scale)/(i+1); +			if(ds[i]-theta <= 0){ +				break; +			} +			previousTheta = theta; +		} +		//DEBUG +		if(previousTheta < 0){ +			System.out.println("Simple Projection: Theta is smaller than zero: " + previousTheta); +			System.exit(-1); +		} +		for (int i = 0; i < original.length; i++) { +			original[i] = Math.max(original[i]-previousTheta, 0); +		} +	} +	 +	 +	 +	 +	 + +	/** +	 * Samples a point from the simplex of scale. Just sample +	 * random number from 0-scale and then if +	 * their sum is bigger then sum make them normalize. +	 * This is probably not sampling uniformly from the simplex but it is +	 * enough for our goals in here. +	 */ +	Random r = new Random(); +	public double[] samplePoint(int dimensions) { +		double[] newPoint = new double[dimensions]; +		double sum =0; +		for (int i = 0; i < newPoint.length; i++) { +			double rand = r.nextDouble()*scale; +			sum+=rand; +			newPoint[i]=rand; +		} +		//Normalize +		if(sum > scale){ +			for (int i = 0; i < newPoint.length; i++) { +				newPoint[i]=scale*newPoint[i]/sum; +			} +		} +		return newPoint; +	} +	 +	public static void main(String[] args) { +		SimplexProjection sp = new SimplexProjection(1); +		 +		 +		double[] point = sp.samplePoint(3); +		MatrixOutput.printDoubleArray(point , "random 1 sum:" + MathUtils.sum(point)); +		point = sp.samplePoint(3); +		MatrixOutput.printDoubleArray(point , "random 2 sum:" + MathUtils.sum(point)); +		point = sp.samplePoint(3); +		MatrixOutput.printDoubleArray(point , "random 3 sum:" + MathUtils.sum(point)); +		 +		double[] d = {0,1.1,-10}; +		double[] original = d.clone(); +		MatrixOutput.printDoubleArray(d, "before"); +		 +		sp.project(d); +		MatrixOutput.printDoubleArray(d, "after"); +		System.out.println("Test projection: " + sp.testProjection(original, d)); +		 +	} +	 +	 +	double epsilon = 1.E-10; +	public double[] perturbePoint(double[] point, int parameter){ +		double[] newPoint = point.clone(); +		if(MathUtils.almost(MathUtils.sum(point), scale)){ +			newPoint[parameter]-=epsilon; +		} +		else if(point[parameter]==0){ +			newPoint[parameter]+=epsilon; +		}else if(MathUtils.almost(point[parameter], scale)){ +			newPoint[parameter]-=epsilon; +		} +		else{ +			newPoint[parameter]-=epsilon; +		} +		return newPoint; +	} +	 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java new file mode 100644 index 00000000..15760f18 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java @@ -0,0 +1,33 @@ +package optimization.stopCriteria; + +import java.util.ArrayList; + +import optimization.gradientBasedMethods.Objective; + +public class CompositeStopingCriteria implements StopingCriteria { +	 +	ArrayList<StopingCriteria> criterias; +	 +	public CompositeStopingCriteria() { +		criterias = new ArrayList<StopingCriteria>(); +	} +	 +	public void add(StopingCriteria criteria){ +		criterias.add(criteria); +	} +	 +	public boolean stopOptimization(Objective obj){ +		for(StopingCriteria criteria: criterias){ +			if(criteria.stopOptimization(obj)){ +				return true; +			} +		} +		return false; +	} +	 +	public void reset(){ +		for(StopingCriteria criteria: criterias){ +			criteria.reset(); +		} +	} +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java new file mode 100644 index 00000000..534ff833 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java @@ -0,0 +1,30 @@ +package optimization.stopCriteria; + +import optimization.gradientBasedMethods.Objective; +import optimization.util.MathUtils; + +public class GradientL2Norm implements StopingCriteria{ +	 +	/** +	 * Stop if gradientNorm/(originalGradientNorm) smaller +	 * than gradientConvergenceValue +	 */ +	protected double gradientConvergenceValue; +	 +	 +	public GradientL2Norm(double gradientConvergenceValue){ +		this.gradientConvergenceValue = gradientConvergenceValue; +	} +	 +	public void reset(){} +	 +	public boolean stopOptimization(Objective obj){ +		double norm = MathUtils.L2Norm(obj.gradient); +		if(norm < gradientConvergenceValue){ +			System.out.println("Gradient norm below treshold"); +			return true; +		} +		return false; +		 +	} +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java new file mode 100644 index 00000000..4a489641 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java @@ -0,0 +1,48 @@ +package optimization.stopCriteria; + +import optimization.gradientBasedMethods.Objective; +import optimization.gradientBasedMethods.ProjectedObjective; +import optimization.util.MathUtils; + +/** + * Divides the norm by the norm at the begining of the iteration + * @author javg + * + */ +public class NormalizedGradientL2Norm extends GradientL2Norm{ +	 +	/** +	 * Stop if gradientNorm/(originalGradientNorm) smaller +	 * than gradientConvergenceValue +	 */ +	protected double originalGradientNorm = -1; +	 +	public void reset(){ +		originalGradientNorm = -1; +	} +	public NormalizedGradientL2Norm(double gradientConvergenceValue){ +		super(gradientConvergenceValue); +	} +	 +	 +	  +	 +	public boolean stopOptimization(Objective obj){ +			double norm = MathUtils.L2Norm(obj.gradient); +			if(originalGradientNorm == -1){ +				originalGradientNorm = norm; +			} +			if(originalGradientNorm < 1E-10){ +				System.out.println("Gradient norm is zero " +  originalGradientNorm); +				return true; +			} +			double normalizedNorm = 1.0*norm/originalGradientNorm; +			if( normalizedNorm < gradientConvergenceValue){ +				System.out.println("Gradient norm below normalized normtreshold: " + norm + " original: " + originalGradientNorm + " normalized norm: " + normalizedNorm); +				return true; +			}else{ +//				System.out.println("projected gradient norm: " + norm); +				return false; +			} +	} +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java new file mode 100644 index 00000000..5ae554c2 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java @@ -0,0 +1,60 @@ +package optimization.stopCriteria; + +import optimization.gradientBasedMethods.Objective; +import optimization.gradientBasedMethods.ProjectedObjective; +import optimization.util.MathUtils; + +/** + * Divides the norm by the norm at the begining of the iteration + * @author javg + * + */ +public class NormalizedProjectedGradientL2Norm extends ProjectedGradientL2Norm{ +	 +	/** +	 * Stop if gradientNorm/(originalGradientNorm) smaller +	 * than gradientConvergenceValue +	 */ +	double originalProjectedNorm = -1; +	 +	public NormalizedProjectedGradientL2Norm(double gradientConvergenceValue){ +		super(gradientConvergenceValue); +	} +	 +	public void reset(){ +		originalProjectedNorm = -1; +	} +	 +	 +	 double[] projectGradient(ProjectedObjective obj){ +		 +		if(obj.auxParameters == null){ +			obj.auxParameters = new double[obj.getNumParameters()]; +		} +		System.arraycopy(obj.getParameters(), 0, obj.auxParameters, 0, obj.getNumParameters()); +		MathUtils.minusEquals(obj.auxParameters, obj.gradient, 1); +		obj.auxParameters = obj.projectPoint(obj.auxParameters); +		MathUtils.minusEquals(obj.auxParameters,obj.getParameters(),1); +		return obj.auxParameters; +	} +	 +	public boolean stopOptimization(Objective obj){ +		if(obj instanceof ProjectedObjective) { +			ProjectedObjective o = (ProjectedObjective) obj; +			double norm = MathUtils.L2Norm(projectGradient(o)); +			if(originalProjectedNorm == -1){ +				originalProjectedNorm = norm; +			} +			double normalizedNorm = 1.0*norm/originalProjectedNorm; +			if( normalizedNorm < gradientConvergenceValue){ +				System.out.println("Gradient norm below normalized normtreshold: " + norm + " original: " + originalProjectedNorm + " normalized norm: " + normalizedNorm); +				return true; +			}else{ +//				System.out.println("projected gradient norm: " + norm); +				return false; +			} +		} +		System.out.println("Not a projected objective"); +		throw new RuntimeException(); +	} +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java new file mode 100644 index 00000000..6dbbc50d --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java @@ -0,0 +1,54 @@ +package optimization.stopCriteria; + +import optimization.gradientBasedMethods.Objective; +import optimization.util.MathUtils; + +public class NormalizedValueDifference implements StopingCriteria{ +	 +	/** +	 * Stop if the different between values is smaller than a treshold +	 */ +	protected double valueConvergenceValue=0.01; +	protected double previousValue = Double.NaN; +	protected double currentValue = Double.NaN; +	 +	public NormalizedValueDifference(double valueConvergenceValue){ +		this.valueConvergenceValue = valueConvergenceValue; +	} +	 +	public void reset(){ +		previousValue = Double.NaN; +		currentValue = Double.NaN; +	} + +	 +	public boolean stopOptimization(Objective obj){ +		if(Double.isNaN(currentValue)){ +			currentValue = obj.getValue(); +			return false; +		}else { +			previousValue = currentValue; +			currentValue = obj.getValue(); +			if(previousValue != 0){ +				double valueDiff = Math.abs(previousValue - currentValue)/Math.abs(previousValue); +				if( valueDiff  < valueConvergenceValue){ +					System.out.println("Leaving different in values is to small: Prev "  +							+ (previousValue/previousValue) + " Curr: " + (currentValue/previousValue)  +							+ " diff: " + valueDiff); +					return true; +				} +			}else{ +				double valueDiff = Math.abs(previousValue - currentValue); +				if( valueDiff  < valueConvergenceValue){ +					System.out.println("Leaving different in values is to small: Prev "  +							+ (previousValue) + " Curr: " + (currentValue)  +							+ " diff: " + valueDiff); +					return true; +				} +			} + +			return false; +		} +		 +	} +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java new file mode 100644 index 00000000..aadf1fd5 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java @@ -0,0 +1,51 @@ +package optimization.stopCriteria; + +import optimization.gradientBasedMethods.Objective; +import optimization.gradientBasedMethods.ProjectedObjective; +import optimization.util.MathUtils; + +public class ProjectedGradientL2Norm implements StopingCriteria{ +	 +	/** +	 * Stop if gradientNorm/(originalGradientNorm) smaller +	 * than gradientConvergenceValue +	 */ +	protected double gradientConvergenceValue; +	 +	 +	public ProjectedGradientL2Norm(double gradientConvergenceValue){ +		this.gradientConvergenceValue = gradientConvergenceValue; +	} +	 +	public void reset(){ +		 +	} +	 +	 double[] projectGradient(ProjectedObjective obj){ +		 +		if(obj.auxParameters == null){ +			obj.auxParameters = new double[obj.getNumParameters()]; +		} +		System.arraycopy(obj.getParameters(), 0, obj.auxParameters, 0, obj.getNumParameters()); +		MathUtils.minusEquals(obj.auxParameters, obj.gradient, 1); +		obj.auxParameters = obj.projectPoint(obj.auxParameters); +		MathUtils.minusEquals(obj.auxParameters,obj.getParameters(),1); +		return obj.auxParameters; +	} +	 +	public boolean stopOptimization(Objective obj){ +		if(obj instanceof ProjectedObjective) { +			ProjectedObjective o = (ProjectedObjective) obj; +			double norm = MathUtils.L2Norm(projectGradient(o)); +			if(norm < gradientConvergenceValue){ +	//			System.out.println("Gradient norm below treshold: " + norm); +				return true; +			}else{ +//				System.out.println("projected gradient norm: " + norm); +				return false; +			} +		} +		System.out.println("Not a projected objective"); +		throw new RuntimeException(); +	} +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java new file mode 100644 index 00000000..10cf0522 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java @@ -0,0 +1,8 @@ +package optimization.stopCriteria; + +import optimization.gradientBasedMethods.Objective; + +public interface StopingCriteria { +	public boolean stopOptimization(Objective obj); +	public void reset(); +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java new file mode 100644 index 00000000..e5d07229 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java @@ -0,0 +1,41 @@ +package optimization.stopCriteria; + +import optimization.gradientBasedMethods.Objective; +import optimization.util.MathUtils; + +public class ValueDifference implements StopingCriteria{ +	 +	/** +	 * Stop if the different between values is smaller than a treshold +	 */ +	protected double valueConvergenceValue=0.01; +	protected double previousValue = Double.NaN; +	protected double currentValue = Double.NaN; +	 +	public ValueDifference(double valueConvergenceValue){ +		this.valueConvergenceValue = valueConvergenceValue; +	} +	 +	public void reset(){ +		previousValue = Double.NaN; +		currentValue = Double.NaN; +	} +	 +	public boolean stopOptimization(Objective obj){ +		if(Double.isNaN(currentValue)){ +			currentValue = obj.getValue(); +			return false; +		}else { +			previousValue = currentValue; +			currentValue = obj.getValue(); +			if(previousValue - currentValue   < valueConvergenceValue){ +//				System.out.println("Leaving different in values is to small: Prev "  +//						+ previousValue + " Curr: " + currentValue  +//						+ " diff: " + (previousValue - currentValue)); +				return true; +			} +			return false; +		} +		 +	} +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java b/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java new file mode 100644 index 00000000..cdbdefc6 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java @@ -0,0 +1,37 @@ +package optimization.util; + +public class Interpolation { + +	/** +	 * Fits a cubic polinomyal to a function given two points, +	 * such that either gradB is bigger than zero or funcB >= funcA +	 *  +	 * NonLinear Programming appendix C +	 * @param funcA +	 * @param gradA +	 * @param funcB +	 * @param gradB +	 */ +	public final static double cubicInterpolation(double a,  +			double funcA, double gradA, double b,double funcB, double gradB ){ +		if(gradB < 0 && funcA > funcB){ +			System.out.println("Cannot call cubic interpolation"); +			return -1; +		} +		 +		double z = 3*(funcA-funcB)/(b-a) + gradA + gradB; +		double w = Math.sqrt(z*z - gradA*gradB); +		double min = b -(gradB+w-z)*(b-a)/(gradB-gradA+2*w); +		return min; +	} +	 +	public final static double quadraticInterpolation(double initFValue,  +			double initGrad, double point,double pointFValue){ +				double min = -1*initGrad*point*point/(2*(pointFValue-initGrad*point-initFValue)); +		return min; +	} +	 +	public static void main(String[] args) { +		 +	} +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java b/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java new file mode 100644 index 00000000..5343a39b --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java @@ -0,0 +1,7 @@ +package optimization.util; + +public class Logger { +	 +	 +	 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java b/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java new file mode 100644 index 00000000..af66f82c --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java @@ -0,0 +1,339 @@ +package optimization.util; + +import java.util.Arrays; + + + +public class MathUtils { +	 +	/** +	 *  +	 * @param vector +	 * @return +	 */ +	public static double L2Norm(double[] vector){ +		double value = 0; +		for(int i = 0; i < vector.length; i++){ +			double v = vector[i]; +			value+=v*v; +		} +		return Math.sqrt(value); +	} +	 +	public static double sum(double[] v){ +		double sum = 0; +		for (int i = 0; i < v.length; i++) { +			sum+=v[i]; +		} +		return sum; +	} +	 +	 +	 +	 +	/** +	 * w = w + v +	 * @param w +	 * @param v +	 */ +	public static void plusEquals(double[] w, double[] v) { +		for(int i=0; i<w.length;i++){ +			w[i] += w[i] + v[i]; +		}		 +	} +	 +	/** +	 * w[i] = w[i] + v +	 * @param w +	 * @param v +	 */ +	public static void plusEquals(double[] w, double v) { +		for(int i=0; i<w.length;i++){ +			w[i] += w[i] + v; +		}		 +	} +	 +	/** +	 * w[i] = w[i] - v +	 * @param w +	 * @param v +	 */ +	public static void minusEquals(double[] w, double v) { +		for(int i=0; i<w.length;i++){ +			w[i] -= w[i] + v; +		}		 +	} +	 +	/** +	 * w = w + a*v +	 * @param w +	 * @param v +	 * @param a +	 */ +	public static void plusEquals(double[] w, double[] v, double a) { +		for(int i=0; i<w.length;i++){ +			w[i] += a*v[i]; +		}		 +	} +	 +	/** +	 * w = w - a*v +	 * @param w +	 * @param v +	 * @param a +	 */ +	public static void minusEquals(double[] w, double[] v, double a) { +		for(int i=0; i<w.length;i++){ +			w[i] -= a*v[i]; +		}		 +	} +	/** +	 * v = w - a*v +	 * @param w +	 * @param v +	 * @param a +	 */ +	public static void minusEqualsInverse(double[] w, double[] v, double a) { +		for(int i=0; i<w.length;i++){ +			v[i] = w[i] - a*v[i]; +		}		 +	} +	 +	public static double dotProduct(double[] w, double[] v){ +		double accum = 0; +		for(int i=0; i<w.length;i++){ +			accum += w[i]*v[i]; +		} +		return accum; +	} +	 +	public static double[] arrayMinus(double[]w, double[]v){ +		double result[] = w.clone(); +		for(int i=0; i<w.length;i++){ +			result[i] -= v[i]; +		} +		return result; +	} +	 +	public static double[] arrayMinus(double[] result , double[]w, double[]v){ +		for(int i=0; i<w.length;i++){ +			result[i] = w[i]-v[i]; +		} +		return result; +	} +	 +	public static double[] negation(double[]w){ +		double result[]  = new double[w.length]; +		for(int i=0; i<w.length;i++){ +			result[i] = -w[i]; +		} +		return result; +	} +	 +	public static double square(double value){ +		return value*value; +	} +	public static double[][] outerProduct(double[] w, double[] v){ +		double[][] result = new double[w.length][v.length]; +		for(int i = 0; i < w.length; i++){ +			for(int j = 0; j < v.length; j++){ +				result[i][j] = w[i]*v[j]; +			} +		} +		return result; +	} +	/** +	 * results = a*W*V +	 * @param w +	 * @param v +	 * @param a +	 * @return +	 */ +	public static double[][] weightedouterProduct(double[] w, double[] v, double a){ +		double[][] result = new double[w.length][v.length]; +		for(int i = 0; i < w.length; i++){ +			for(int j = 0; j < v.length; j++){ +				result[i][j] = a*w[i]*v[j]; +			} +		} +		return result; +	} +	 +	public static double[][] identity(int size){ +		double[][] result = new double[size][size]; +		for(int i = 0; i < size; i++){ +			result[i][i] = 1; +		} +		return result; +	} +	 +	/** +	 * v -= w +	 * @param v +	 * @param w +	 */ +	public static void minusEquals(double[][] w, double[][] v){ +		for(int i = 0; i < w.length; i++){ +			for(int j = 0; j < w[0].length; j++){ +				w[i][j] -= v[i][j]; +			} +		} +	} +	 +	/** +	 * v[i][j] -= a*w[i][j] +	 * @param v +	 * @param w +	 */ +	public static void minusEquals(double[][] w, double[][] v, double a){ +		for(int i = 0; i < w.length; i++){ +			for(int j = 0; j < w[0].length; j++){ +				w[i][j] -= a*v[i][j]; +			} +		} +	} +	 +	/** +	 * v += w +	 * @param v +	 * @param w +	 */ +	public static void plusEquals(double[][] w, double[][] v){ +		for(int i = 0; i < w.length; i++){ +			for(int j = 0; j < w[0].length; j++){ +				w[i][j] += v[i][j]; +			} +		} +	} +	 +	/** +	 * v[i][j] += a*w[i][j] +	 * @param v +	 * @param w +	 */ +	public static void plusEquals(double[][] w, double[][] v, double a){ +		for(int i = 0; i < w.length; i++){ +			for(int j = 0; j < w[0].length; j++){ +				w[i][j] += a*v[i][j]; +			} +		} +	} +	 +	 +	/** +	 * results = w*v +	 * @param w +	 * @param v +	 * @return +	 */ +	public static  double[][] matrixMultiplication(double[][] w,double[][] v){ +		int w1 = w.length; +		int w2 = w[0].length; +		int v1 = v.length; +		int v2 = v[0].length; +		 +		if(w2 != v1){ +			System.out.println("Matrix dimensions do not agree..."); +			System.exit(-1); +		} +		 +		double[][] result = new double[w1][v2]; +		for(int w_i1 = 0; w_i1 < w1; w_i1++){ +			for(int v_i2 = 0; v_i2 < v2; v_i2++){ +				double sum = 0; +				for(int w_i2 = 0; w_i2 < w2; w_i2++){ +						sum += w[w_i1 ][w_i2]*v[w_i2][v_i2];	 +				} +				result[w_i1][v_i2] = sum; +			} +		} +		return result; +	} +	 +	/** +	 * w = w.*v +	 * @param w +	 * @param v +	 */ +	public static  void matrixScalarMultiplication(double[][] w,double v){ +		int w1 = w.length; +		int w2 = w[0].length;	 +		for(int w_i1 = 0; w_i1 < w1; w_i1++){ +				for(int w_i2 = 0; w_i2 < w2; w_i2++){ +						w[w_i1 ][w_i2] *= v;	 +				} +		} +	} +	 +	public static  void scalarMultiplication(double[] w,double v){ +		int w1 = w.length; +		for(int w_i1 = 0; w_i1 < w1; w_i1++){ +			w[w_i1 ] *= v;	 +		} +		 +	} +	 +	public static  double[] matrixVector(double[][] w,double[] v){ +		int w1 = w.length; +		int w2 = w[0].length; +		int v1 = v.length; +		 +		if(w2 != v1){ +			System.out.println("Matrix dimensions do not agree..."); +			System.exit(-1); +		} +		 +		double[] result = new double[w1]; +		for(int w_i1 = 0; w_i1 < w1; w_i1++){ +				double sum = 0; +				for(int w_i2 = 0; w_i2 < w2; w_i2++){ +						sum += w[w_i1 ][w_i2]*v[w_i2];	 +				} +				result[w_i1] = sum; +		} +		return result; +	} +	 +	public static boolean allPositive(double[] array){ +		for (int i = 0; i < array.length; i++) { +			if(array[i] < 0) return false; +		} +		return true; +	} +	 +	 +	 +	 +	 +		public static void main(String[] args) { +			double[][] m1 = new double[2][2]; +			m1[0][0]=2; +			m1[1][0]=2; +			m1[0][1]=2; +			m1[1][1]=2; +			MatrixOutput.printDoubleArray(m1, "m1"); +			double[][] m2 = new double[2][2]; +			m2[0][0]=3; +			m2[1][0]=3; +			m2[0][1]=3; +			m2[1][1]=3; +			MatrixOutput.printDoubleArray(m2, "m2"); +			double[][] result = matrixMultiplication(m1, m2); +			MatrixOutput.printDoubleArray(result, "result"); +			matrixScalarMultiplication(result, 3); +			MatrixOutput.printDoubleArray(result, "result after multiply by 3"); +		} +	 +		public static boolean almost(double a, double b, double prec){ +			return Math.abs(a-b)/Math.abs(a+b) <= prec || (almostZero(a) && almostZero(b)); +		} + +		public static boolean almost(double a, double b){ +			return Math.abs(a-b)/Math.abs(a+b) <= 1e-10 || (almostZero(a) && almostZero(b)); +		} + +		public static boolean almostZero(double a) { +			return Math.abs(a) <= 1e-30; +		} +	 +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java b/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java new file mode 100644 index 00000000..9fbdf955 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java @@ -0,0 +1,28 @@ +package optimization.util; + + +public class MatrixOutput { +	public static void printDoubleArray(double[][] array, String arrayName) { +		int size1 = array.length; +		int size2 = array[0].length; +		System.out.println(arrayName); +		for (int i = 0; i < size1; i++) { +			for (int j = 0; j < size2; j++) { +				System.out.print(" " + StaticTools.prettyPrint(array[i][j], +						"00.00E00", 4) + " "); + +			} +			System.out.println(); +		} +		System.out.println(); +	} +	 +	public static void printDoubleArray(double[] array, String arrayName) { +		System.out.println(arrayName); +		for (int i = 0; i < array.length; i++) { +				System.out.print(" " + StaticTools.prettyPrint(array[i], +						"00.00E00", 4) + " "); +		} +		System.out.println(); +	} +} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java b/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java new file mode 100644 index 00000000..bcabee06 --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java @@ -0,0 +1,180 @@ +package optimization.util; + + +import java.io.File; +import java.io.PrintStream; + +public class StaticTools { + +	static java.text.DecimalFormat fmt = new java.text.DecimalFormat(); + +	public static void createDir(String directory) { + +		File dir = new File(directory); +		if (!dir.isDirectory()) { +			boolean success = dir.mkdirs(); +			if (!success) { +				System.out.println("Unable to create directory " + directory); +				System.exit(0); +			} +			System.out.println("Created directory " + directory); +		} else { +			System.out.println("Reusing directory " + directory); +		} +	} + +	/* +	 * q and p are indexed by source/foreign Sum_S(q) = 1 the same for p KL(q,p) = +	 * Eq*q/p +	 */ +	public static double KLDistance(double[][] p, double[][] q, int sourceSize, +			int foreignSize) { +		double totalKL = 0; +		// common.StaticTools.printMatrix(q, sourceSize, foreignSize, "q", +		// System.out); +		// common.StaticTools.printMatrix(p, sourceSize, foreignSize, "p", +		// System.out); +		for (int i = 0; i < sourceSize; i++) { +			double kl = 0; +			for (int j = 0; j < foreignSize; j++) { +				assert !Double.isNaN(q[i][j]) : "KLDistance q:  prob is NaN"; +				assert !Double.isNaN(p[i][j]) : "KLDistance p:  prob is NaN"; +				if (p[i][j] == 0 || q[i][j] == 0) { +					continue; +				} else { +					kl += q[i][j] * Math.log(q[i][j] / p[i][j]); +				} + +			} +			totalKL += kl; +		} +		assert !Double.isNaN(totalKL) : "KLDistance: prob is NaN"; +		if (totalKL < -1.0E-10) { +			System.out.println("KL Smaller than zero " + totalKL); +			System.out.println("Source Size" + sourceSize); +			System.out.println("Foreign Size" + foreignSize); +			StaticTools.printMatrix(q, sourceSize, foreignSize, "q", +					System.out); +			StaticTools.printMatrix(p, sourceSize, foreignSize, "p", +					System.out); +			System.exit(-1); +		} +		return totalKL / sourceSize; +	} + +	/* +	 * indexed the by [fi][si] +	 */ +	public static double KLDistancePrime(double[][] p, double[][] q, +			int sourceSize, int foreignSize) { +		double totalKL = 0; +		for (int i = 0; i < sourceSize; i++) { +			double kl = 0; +			for (int j = 0; j < foreignSize; j++) { +				assert !Double.isNaN(q[j][i]) : "KLDistance q:  prob is NaN"; +				assert !Double.isNaN(p[j][i]) : "KLDistance p:  prob is NaN"; +				if (p[j][i] == 0 || q[j][i] == 0) { +					continue; +				} else { +					kl += q[j][i] * Math.log(q[j][i] / p[j][i]); +				} + +			} +			totalKL += kl; +		} +		assert !Double.isNaN(totalKL) : "KLDistance: prob is NaN"; +		return totalKL / sourceSize; +	} + +	public static double Entropy(double[][] p, int sourceSize, int foreignSize) { +		double totalE = 0; +		for (int i = 0; i < foreignSize; i++) { +			double e = 0; +			for (int j = 0; j < sourceSize; j++) { +				e += p[i][j] * Math.log(p[i][j]); +			} +			totalE += e; +		} +		return totalE / sourceSize; +	} + +	public static double[][] copyMatrix(double[][] original, int sourceSize, +			int foreignSize) { +		double[][] result = new double[sourceSize][foreignSize]; +		for (int i = 0; i < sourceSize; i++) { +			for (int j = 0; j < foreignSize; j++) { +				result[i][j] = original[i][j]; +			} +		} +		return result; +	} + +	public static void printMatrix(double[][] matrix, int sourceSize, +			int foreignSize, String info, PrintStream out) { + +		java.text.DecimalFormat fmt = new java.text.DecimalFormat(); +		fmt.setMaximumFractionDigits(3); +		fmt.setMaximumIntegerDigits(3); +		fmt.setMinimumFractionDigits(3); +		fmt.setMinimumIntegerDigits(3); + +		out.println(info); + +		for (int i = 0; i < foreignSize; i++) { +			for (int j = 0; j < sourceSize; j++) { +				out.print(prettyPrint(matrix[j][i], ".00E00", 6) + " "); +			} +			out.println(); +		} +		out.println(); +		out.println(); +	} + +	public static void printMatrix(int[][] matrix, int sourceSize, +			int foreignSize, String info, PrintStream out) { + +		out.println(info); +		for (int i = 0; i < foreignSize; i++) { +			for (int j = 0; j < sourceSize; j++) { +				out.print(matrix[j][i] + " "); +			} +			out.println(); +		} +		out.println(); +		out.println(); +	} + +	public static String formatTime(long duration) { +		StringBuilder sb = new StringBuilder(); +		double d = duration / 1000; +		fmt.applyPattern("00"); +		sb.append(fmt.format((int) (d / (60 * 60))) + ":"); +		d -= ((int) d / (60 * 60)) * 60 * 60; +		sb.append(fmt.format((int) (d / 60)) + ":"); +		d -= ((int) d / 60) * 60; +		fmt.applyPattern("00.0"); +		sb.append(fmt.format(d)); +		return sb.toString(); +	} + +	public static String prettyPrint(double d, String patt, int len) { +		fmt.applyPattern(patt); +		String s = fmt.format(d); +		while (s.length() < len) { +			s = " " + s; +		} +		return s; +	} +	 +	 +	public static long getUsedMemory(){ +		System.gc(); +		return (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/ (1024 * 1024); +	} +	 +	public final static boolean compareDoubles(double d1, double d2){ +		return Math.abs(d1-d2) <= 1.E-10; +	} +	 +	 +}  | 
