gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300

package optimization.linesearch;

import java.io.PrintStream;
import java.util.ArrayList;

import optimization.util.Interpolation;


/**
 * 
 * @author javg
 *
 */
public class WolfRuleLineSearch implements LineSearchMethod{

	GenericPickFirstStep pickFirstStep;
	
	double c1 = 1.0E-4;
	double c2 = 0.9;
	
	//Application dependent
	double maxStep=100;
	
	int extrapolationIteration;
	int maxExtrapolationIteration = 1000;
	
	
	double minZoomDiffTresh = 10E-10;

	
	ArrayList<Double> steps;
	ArrayList<Double> gradientDots;
	ArrayList<Double> functionVals;
	
	int debugLevel = 0;
	boolean foudStep = false;
	
	public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep){
		this.pickFirstStep = pickFirstStep;
		
	}
	
	
	public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep,  double c1, double c2){
		this.pickFirstStep = pickFirstStep;
		initialStep = pickFirstStep.getFirstStep(this);
		this.c1 = c1;
		this.c2 = c2;
	}
	
	public void setDebugLevel(int level){
		debugLevel = level;
	}
	
	//Experiment
	double previousStepPicked = -1;;
	double previousInitGradientDot = -1;
	double currentInitGradientDot = -1;
	
	double initialStep;

	
	public void reset(){
		previousStepPicked = -1;;
		previousInitGradientDot = -1;
		currentInitGradientDot = -1;
		if(steps != null)
			steps.clear();
		if(gradientDots != null)
			gradientDots.clear();
		if(functionVals != null)
			functionVals.clear();
	}
	
	public void setInitialStep(double initial){
		initialStep = pickFirstStep.getFirstStep(this);
	}
	
	
	/**
	 * Implements Wolf Line search as described in nocetal.
	 * This process consists in two stages. The first stage we try to satisfy the
	 * biggest step size that still satisfies the curvature condition. We keep increasing
	 * the initial step size until we find a step satisfying the curvature condition, we return 
	 * success, we failed the sufficient increase so we cannot increase more and we can call zoom with 
	 * that maximum step, or we pass the minimum in which case we can call zoom the same way. 
	 * 
	 */
	public double getStepSize(DifferentiableLineSearchObjective objective){
		//System.out.println("entering line search");
		
		foudStep = false;
		if(debugLevel >= 1){
			steps = new ArrayList<Double>();
			gradientDots = new ArrayList<Double>();
			functionVals  =new ArrayList<Double>();
		}
		
		//test
		currentInitGradientDot = objective.getInitialGradient();
		
		
		double previousValue = objective.getCurrentValue();
		double previousStep = 0;
		double currentStep =pickFirstStep.getFirstStep(this);
		for(extrapolationIteration = 0; 
		extrapolationIteration < maxExtrapolationIteration; extrapolationIteration++){	
			
			objective.updateAlpha(currentStep);
			double currentValue = objective.getCurrentValue();
			if(debugLevel >= 1){
				steps.add(currentStep);
				functionVals.add(currentValue);
				gradientDots.add(objective.getCurrentGradient());
			}
			
			
			//The current step does not satisfy the sufficient decrease condition anymore
			// so we cannot get bigger than that calling zoom.
			if(!WolfeConditions.suficientDecrease(objective,c1)||					
					(extrapolationIteration > 0 && currentValue >= previousValue)){
				currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations);
				break;
			}
			
			//Satisfying both conditions ready to leave
			if(WolfeConditions.sufficientCurvature(objective,c1,c2)){
				//Found step
				foudStep = true;
				break;
			}
			
			/**
			 * This means that we passed the minimum already since the dot product that should be 
			 * negative (descent direction) is now positive. So we cannot increase more. On the other hand
			 * since we know the direction is a descent direction the value the objective at the current step
			 * is for sure smaller than the preivous step so we change the order.
			 */
			if(objective.getCurrentGradient() >= 0){
				currentStep =  zoom(objective,currentStep,previousStep,objective.nrIterations,objective.nrIterations-1);
				break;
			}
			
			
			//Ok, so we can still get a bigger step, 
			double aux = currentStep;
			//currentStep = currentStep*2;
			if(Math.abs(currentStep-maxStep)>1.1e-2){
				currentStep = (currentStep+maxStep)/2;
			}else{
				currentStep = currentStep*2;
			}
			previousStep = aux;
			previousValue = currentValue;
			//Could be done better
			if(currentStep >= maxStep){
				System.out.println("Excedded max step...calling zoom with maxStepSize");
				currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations);
			}
		}
		if(!foudStep){
			System.out.println("Wolfe Rule exceed number of iterations");
			if(debugLevel >= 1){
				printSmallWolfeStats(System.out);
//				System.out.println("Line search values");
//				DebugHelpers.getLineSearchGraph(o,  direction, originalParameters,origValue, origGradDirectionDot,c1,c2);			
			}
			return -1;
		}
		if(debugLevel >= 1){
			printSmallWolfeStats(System.out);
		}

		previousStepPicked = currentStep;
		previousInitGradientDot = currentInitGradientDot;
//		objective.printLineSearchSteps();
		return currentStep;
	}
	
	
	public void printWolfeStats(PrintStream out){
		for(int i = 0; i < steps.size(); i++){		
			out.println("Step " + steps.get(i) + " value " + functionVals.get(i) + " dot " + gradientDots.get(i));
		}
	}
	
	public void printSmallWolfeStats(PrintStream out){
		for(int i = 0; i < steps.size(); i++){		
			out.print(steps.get(i) + ":"+functionVals.get(i)+":"+gradientDots.get(i)+" ");
		}
		System.out.println();
	}
	
	
	/**
	 * Pick a step satisfying the strong wolfe condition from an given from lowerStep and higherStep
	 * picked on the routine above.
	 * 
	 * Both lowerStep and higherStep have been evaluated, so we only need to pass the iteration where they have
	 * been evaluated and save extra evaluations.
	 * 
	 * We know that lowerStepValue as to be smaller than higherStepValue, and that a point 
	 * satisfying both conditions exists in such interval.
	 * 
	 * LowerStep always satisfies at least the sufficient decrease
	 * @return
	 */
	public double zoom(DifferentiableLineSearchObjective o, double lowerStep, double higherStep,
			int lowerStepIter, int higherStepIter){
		
		if(debugLevel >=2){
			System.out.println("Entering zoom with " + lowerStep+"-"+higherStep);
		}
		
		double currentStep=-1;
		
		int zoomIter = 0;
		while(zoomIter < 1000){		
			if(Math.abs(lowerStep-higherStep) < minZoomDiffTresh){
				o.updateAlpha(lowerStep);
				if(debugLevel >= 1){
					steps.add(lowerStep);
					functionVals.add(o.getCurrentValue());
					gradientDots.add(o.getCurrentGradient());
				}
				foudStep = true;
				return lowerStep;
			}	
	
			//Cubic interpolation
			currentStep = 
				Interpolation.cubicInterpolation(lowerStep, o.getValue(lowerStepIter), o.getGradient(lowerStepIter), 
						higherStep, o.getValue(higherStepIter), o.getGradient(higherStepIter));
			
			//Safeguard.... should not be required check in what condtions it is required
			if(currentStep < 0 ){
				currentStep = (lowerStep+higherStep)/2;
			}
			if(Double.isNaN(currentStep) || Double.isInfinite(currentStep)){
				currentStep = (lowerStep+higherStep)/2;
			}
//			currentStep = (lowerStep+higherStep)/2;
//			System.out.println("Trying "+currentStep);
			o.updateAlpha(currentStep);
			if(debugLevel >=1){
				steps.add(currentStep);
				functionVals.add(o.getCurrentValue());
				gradientDots.add(o.getCurrentGradient());
			}
			if(!WolfeConditions.suficientDecrease(o,c1)
					|| o.getCurrentValue() >= o.getValue(lowerStepIter)){
				higherStepIter = o.nrIterations;
				higherStep = currentStep;
			}
			//Note when entering here the new step satisfies the sufficent decrease and
			// or as a function value that is better than the previous best (lowerStepFunctionValues)
			// so we either leave or change the value of the alpha low.
			else{
				if(WolfeConditions.sufficientCurvature(o,c1,c2)){
					//Satisfies the both wolf conditions
					foudStep = true;
					break;
				}
				//If does not satisfy curvature 
				if(o.getCurrentGradient()*(higherStep-lowerStep) >= 0){
					higherStep = lowerStep;
					higherStepIter = lowerStepIter;
				}
				lowerStep = currentStep;
				lowerStepIter = o.nrIterations;
			}
			zoomIter++;
		}
		return currentStep;
	}

	public double getInitialGradient() {
		return currentInitGradientDot;
		
	}

	public double getPreviousInitialGradient() {
		return previousInitGradientDot;
	}

	public double getPreviousStepUsed() {
		return previousStepPicked;
	}
	
	
}