gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154

package optimization.gradientBasedMethods;

import java.io.IOException;

import optimization.gradientBasedMethods.stats.OptimizerStats;
import optimization.linesearch.DifferentiableLineSearchObjective;
import optimization.linesearch.LineSearchMethod;
import optimization.linesearch.ProjectedDifferentiableLineSearchObjective;
import optimization.stopCriteria.StopingCriteria;
import optimization.util.MathUtils;


/**
 * This class implements the projected gradiend
 * as described in Bertsekas "Non Linear Programming"
 * section 2.3.
 * 
 * The update is given by:
 * x_k+1 = x_k + alpha^k(xbar_k-x_k)
 * Where xbar is:
 * xbar = [x_k -s_k grad(f(x_k))]+
 * where []+ is the projection into the feasibility set
 * 
 * alpha is the step size 
 * s_k - is a positive scalar which can be view as a step size as well, by 
 * setting alpha to 1, then x_k+1 = [x_k -s_k grad(f(x_k))]+
 * This is called taking a step size along the projection arc (Bertsekas) which
 * we will use by default.
 * 
 * Note that the only place where we actually take a step size is on pick a step size
 * so this is going to be just like a normal gradient descent but use a different 
 * armijo line search where we project after taking a step.
 * 
 * 
 * @author javg
 *
 */
public class ProjectedGradientDescent extends ProjectedAbstractGradientBaseMethod{
	

	public ProjectedGradientDescent(LineSearchMethod lineSearch) {
		this.lineSearch = lineSearch;
	}
	
	//Use projected differential objective instead
	public void initializeStructures(Objective o, OptimizerStats stats, StopingCriteria stop) {
		lso = new ProjectedDifferentiableLineSearchObjective(o);
	};
	
	
	ProjectedObjective obj;
	public boolean optimize(ProjectedObjective o,OptimizerStats stats, StopingCriteria stop){
		obj = o;
		return super.optimize(o, stats, stop);
	}
	
	public double[] getDirection(){
		for(int i = 0; i< gradient.length; i++){
			direction[i] = -gradient[i];
		}
		return direction;
	}
	
	
}


///OLD CODE

//Use projected gradient norm
//public boolean stopCriteria(double[] gradient){
//	if(originalDirenctionL2Norm == 0){
//		System.out.println("Leaving original direction norm is zero");
//		return true;	
//	}
//	if(MathUtils.L2Norm(direction)/originalDirenctionL2Norm < gradientConvergenceValue){
//		System.out.println("Leaving projected gradient Norm smaller than epsilon");
//		return true;	
//	}
//	if((previousValue - currValue)/Math.abs(previousValue) < valueConvergenceValue) {
//		System.out.println("Leaving value change below treshold " + previousValue + " - " + currValue);
//		System.out.println(previousValue/currValue + " - " + currValue/currValue 
//				+ " = " + (previousValue - currValue)/Math.abs(previousValue));
//		return true;
//	}
//	return false;
//}
//

//public boolean optimize(ProjectedObjective o,OptimizerStats stats, StopingCriteria stop){
//		stats.collectInitStats(this, o);
//		obj = o;
//		step = 0;
//		currValue = o.getValue();
//		previousValue = Double.MAX_VALUE;
//		gradient = o.getGradient();
//		originalGradientL2Norm = MathUtils.L2Norm(gradient);
//		parameterChange = new double[gradient.length];
//		getDirection();
//		ProjectedDifferentiableLineSearchObjective lso = new ProjectedDifferentiableLineSearchObjective(o,direction);
//		
//		originalDirenctionL2Norm = MathUtils.L2Norm(direction);
//		//MatrixOutput.printDoubleArray(currParameters, "parameters");
//		for (currentProjectionIteration = 0; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){		
//		//	System.out.println("Iter " + currentProjectionIteration);
//			//o.printParameters();
//			
//			
//			
//			if(stop.stopOptimization(gradient)){
//				stats.collectFinalStats(this, o);
//				lastStepUsed = step;
//				return true;
//			}			
//			lso.reset(direction);
//			step = lineSearch.getStepSize(lso);
//			if(step==-1){
//				System.out.println("Failed to find step");
//				stats.collectFinalStats(this, o);
//				return false;	
//				
//			}
//			
//			//Update the direction for stopping criteria
//			previousValue = currValue;
//			currValue = o.getValue();
//			gradient = o.getGradient();
//			direction = getDirection();		
//			if(MathUtils.dotProduct(gradient, direction) > 0){
//				System.out.println("Not a descent direction");
//				System.out.println(" current stats " + stats.prettyPrint(1));
//				System.exit(-1);
//			}
//			stats.collectIterationStats(this, o);
//		}
//		lastStepUsed = step;
//		stats.collectFinalStats(this, o);
//		return false;
//	}

//public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){
//	System.out.println("Objective is not a projected objective");
//	throw new RuntimeException();
//}