SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sfa.h
Go to the documentation of this file.
1 /* This program is free software: you can redistribute it and/or modify
2  * it under the terms of the GNU General Public License as published by
3  * the Free Software Foundation, either version 3 of the License, or
4  * (at your option) any later version.
5  *
6  * This program is distributed in the hope that it will be useful,
7  * but WITHOUT ANY WARRANTY; without even the implied warranty of
8  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9  * GNU General Public License for more details.
10  *
11  * You should have received a copy of the GNU General Public License
12  * along with this program. If not, see <http://www.gnu.org/licenses/>.
13  *
14  * Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye
15  */
16 
17 #ifndef SFA_SLEP
18 #define SFA_SLEP
19 
20 /*
21  Revision History
22 
23  First Version available on October 10, 2009
24 
25  A runnable version on October 15, 2009
26 
27  Major revision on October 29, 2009
28  (Some functions appearing in a previous version have deleted, please refer to the previous version for the old functions.
29  Some new functions have been added as well)
30 
31 */
32 
33 /*
34 
35  Files contained in this header file sfa.h:
36 
37  1. Algorithms for solving the linear system A A^T z0 = Av (see the description of A from the following context)
38 
39  void Thomas(double *zMax, double *z0,
40  double * Av, int nn)
41 
42  void Rose(double *zMax, double *z0,
43  double * Av, int nn)
44 
45  int supportSet(double *x, double *v, double *z,
46  double *g, int * S, double lambda, int nn)
47 
48  void dualityGap(double *gap, double *z,
49  double *g, double *s, double *Av,
50  double lambda, int nn)
51 
52  void dualityGap2(double *gap, double *z,
53  double *g, double *s, double *Av,
54  double lambda, int nn)
55 
56 
57  2. The Subgraident Finding Algorithm (SFA) for solving problem (4) (refer to the description of the problem for detail)
58 
59  int sfa(double *x, double *gap,
60  double *z, double *z0, double * v, double * Av,
61  double lambda, int nn, int maxStep,
62  double *s, double *g,
63  double tol, int tau, int flag)
64 
65  int sfa_special(double *x, double *gap,
66  double *z, double * v, double * Av,
67  double lambda, int nn, int maxStep,
68  double *s, double *g,
69  double tol, int tau)
70 
71  int sfa_one(double *x, double *gap,
72  double *z, double * v, double * Av,
73  double lambda, int nn, int maxStep,
74  double *s, double *g,
75  double tol, int tau)
76 
77 
78 */
79 
80 
81 /*
82 
83  Some mathematical background.
84 
85  In this file, we discuss how to solve the following subproblem,
86 
87  min_x 1/2 \|x-v\|^2 + lambda \|A x\|_1, (1)
88 
89  which is a key problem used in the Fused Lasso Signal Approximator (FLSA).
90 
91  Also, note that, FLSA is a building block for solving the optimation problmes with fused Lasso penalty.
92 
93  In (1), x and v are n-dimensional vectors,
94  and A is a matrix with size (n-1) x n, and is defined as follows (e.g., n=4):
95  A= [ -1 1 0 0;
96  0 -1 1 0;
97  0 0 -1 1]
98 
99  The above problem can be reformulated as the following equivalent min-max optimization problem
100 
101  min_x max_z 1/2 \|x-v\|^2 + <A x, z>
102  subject to \|z\|_{infty} \leq lambda (2)
103 
104 
105  It is easy to get that, at the optimal point
106 
107  x = v - AT z, (3)
108 
109  where z is the optimal solution to the following optimization problem
110 
111  min_z 1/2 z^T A AT z - < z, A v>,
112  subject to \|z\|_{infty} \leq lambda (4)
113 
114 
115 
116  Let B=A A^T. It is easy to get that B is a (n-1) x (n-1) tridiagonal matrix.
117  When n=5, B is defined as:
118  B= [ 2 -1 0 0;
119  -1 2 -1 0;
120  0 -1 2 -1;
121  0 0 -1 2]
122 
123  Let z0 be the solution to the linear system:
124 
125  A A^T * z0 = A * v (5)
126 
127  The problem (5) can be solve by the Thomas Algorithm, in about 5n multiplications and 4n additions.
128 
129  It can also be solved by the Rose's Algorithm, in about 2n multiplications and 2n additions.
130 
131  Moreover, considering the special structure of the matrix A (and B),
132  it can be solved in about n multiplications and 3n additions
133 
134  If lambda \geq \|z0\|_{infty}, x_i= mean(v), for all i,
135  the problem (1) admits near analytical solution
136 
137 
138  We have also added the restart technique, please refer to our paper for detail!
139 
140 */
141 
142 
143 void Thomas(double *zMax, double *z0, double * Av, int nn);
144 
145 void Rose(double *zMax, double *z0, double * Av, int nn);
146 
147 /*
149 
150 x=omega(z)
151 
152 v: the vector to be projected
153 z: the approximate solution
154 g: the gradient at z (g should be computed before calling this function
155 
156 nn: the length of z, g, and S (maximal length for S)
157 
158 n: the length of x and v
159 
160 S: records the indices of the elements in the support set
161 */
162 int supportSet(double *x, double *v, double *z, double *g, int * S, double lambda, int nn);
163 
164 /*
166 
167 we compute the duality corresponding the solution z
168 
169 z: the approximate solution
170 g: the gradient at z (we recompute the gradient)
171 s: an auxiliary variable
172 Av: A*v
173 
174 nn: the lenght for z, g, s, and Av
175 
176 The variables g and s shall be revised.
177 
178 The variables z and Av remain unchanged.
179 */
180 void dualityGap(double *gap, double *z, double *g, double *s, double *Av, double lambda, int nn);
181 
182 /*
183  Similar to dualityGap,
184 
185  The difference is that, we assume that g has been computed.
186  */
187 void dualityGap2(double *gap, double *z, double *g, double *s, double *Av, double lambda, int nn);
188 
189 /*
190 generateSolution:
191 
192 generate the solution x based on the information of z and g
193 (!!!!we assume that g has been computed as the gradient of z!!!!)
194 
195 */
196 int generateSolution(double *x, double *z, double *gap,
197  double *v, double *Av,
198  double *g, double *s, int *S,
199  double lambda, int nn);
200 
201 void restartMapping(double *g, double *z, double * v,
202  double lambda, int nn);
203 
204 /*
206 
207 Our objective is to solve the fused Lasso signal approximator (flsa) problem:
208 
209 min_x g(x) 1/2 \|x-v\|^2 + lambda \|A x\|_1, (1)
210 
211 Let x* be the solution (which is unique), it satisfies
212 
213 0 in x* - v + A^T * lambda *SGN(Ax*) (2)
214 
215 To solve x*, it suffices to find
216 
217 y* in A^T * lambda *SGN(Ax*) (3)
218 that satisfies
219 
220 x* - v + y* =0 (4)
221 which leads to
222 x*= v - y* (5)
223 
224 Due to the uniqueness of x*, we conclude that y* is unique.
225 
226 As y* is a subgradient of lambda \|A x*\|_1,
227 we name our method as Subgradient Finding Algorithm (sfa).
228 
229 y* in (3) can be further written as
230 
231 y*= A^T * z* (6)
232 where
233 
234 z* in lambda* SGN (Ax*) (7)
235 
236 From (6), we have
237 z* = (A A^T)^{-1} A * y* (8)
238 
239 Therefore, from the uqniueness of y*, we conclude that z* is also unique.
240 Next, we discuss how to solve this unique z*.
241 
242 The problem (1) can reformulated as the following equivalent problem:
243 
244 min_x max_z f(x, z)= 1/2 \|x-v\|^2 + <A x, z>
245 subject to \|z\|_{infty} \leq lambda (9)
246 
247 At the saddle point, we have
248 
249 x = v - AT z, (10)
250 
251 which somehow concides with (5) and (6)
252 
253 Plugging (10) into (9), we obtain the problem
254 
255 min_z 1/2 z^T A AT z - < z, A v>,
256 subject to \|z\|_{infty} \leq lambda, (11)
257 
258 In this program, we apply the Nesterov's method for solving (11).
259 
260 
261 Duality gap:
262 
263 At a given point z0, we compute x0= v - A^T z0.
264 It is easy to show that
265 min_x f(x, z0) = f(x0, z0) <= max_z f(x0, z) (12)
266 
267 Moreover, we have
268 max_z f(x0, z) - min_x f(x, z0)
269 <= lambda * \|A x0\|_1 - < z0, Av - A A^T z0> (13)
270 
271 It is also to get that
272 
273 f(x0, z0) <= f(x*, z*) <= max_z f(x0, z) (14)
274 
275 g(x*)=f(x*, z*) (15)
276 
277 g(x0)=max_z f(x0, z) (17)
278 
279  Therefore, we have
280 
281 g(x0)-g(x*) <= lambda * \|A x0\|_1 - < z0, Av - A A^T z0> (18)
282 
283 
284  We have applied a restarting technique, which is quite involved; and thus, we do not explain here.
285 
287  */
288 
289 
290  /*
292 
293  For sfa, the stepsize of the Nesterov's method is fixed to 1/4, so that no line search is needed.
294 
295 
296 
297  Explanation of the parameters:
298 
299  Output parameters
300  x: the solution to the primal problem
301  gap: the duality gap (pointer)
302 
303  Input parameters
304  z: the solution to the dual problem (before calling this function, z contains a starting point)
305  !!!!we assume that the starting point has been successfully initialized in z !!!!
306  z0: a variable used for multiple purposes:
307  1) the previous solution z0
308  2) the difference between z and z0, i.e., z0=z- z0
309 
310  lambda: the regularization parameter (and the radius of the infity ball, see (11)).
311  nn: the length of z, z0, Av, g, and s
312  maxStep: the maximal number of iterations
313 
314  v: the point to be projected (not changed after the program)
315  Av: A*v (not changed after the program)
316 
317  s: the search point (used for multiple purposes)
318  g: the gradient at g (and it is also used for multiple purposes)
319 
320  tol: the tolerance of the gap
321  tau: the duality gap or the restarting technique is done every tau steps
322  flag: if flag=1, we apply the resart technique
323  flag=2, just run the SFA algorithm, terminate it when the absolution change is less than tol
324  flag=3, just run the SFA algorithm, terminate it when the duality gap is less than tol
325  flag=4, just run the SFA algorithm, terminate it when the relative duality gap is less than tol
326 
327 
328  We would like to emphasis that the following assumptions
329  have been checked in the functions that call this function:
330  1) 0< lambda < z_max
331  2) nn >=2
332  3) z has been initialized with a starting point
333  4) z0 has been initialized with all zeros
334 
335  The termination condition is checked every tau iterations.
336 
337  For the duality gap, please refer to (12-18)
338  */
339 int sfa(double *x, double *gap, int * activeS,
340  double *z, double *z0, double * v, double * Av,
341  double lambda, int nn, int maxStep,
342  double *s, double *g,
343  double tol, int tau, int flag);
344 
345 /*
346 
347  Refer to sfa for the defintions of the variables
348 
349  In this file, we restart the program every step, and neglect the gradient step.
350 
351  It seems that, this program does not converge.
352 
353  This function shows that the gradient step is necessary.
354  */
355 int sfa_special(double *x, double *gap, int * activeS,
356  double *z, double * v, double * Av,
357  double lambda, int nn, int maxStep,
358  double *s, double *g,
359  double tol, int tau);
360 
361 /*
362  We do one gradient descent, and then restart the program
363  */
364 int sfa_one(double *x, double *gap, int * activeS,
365  double *z, double * v, double * Av,
366  double lambda, int nn, int maxStep,
367  double *s, double *g,
368  double tol, int tau);
369 #endif /* ----- #ifndef SFA_SLEP ----- */
370 

SHOGUN Machine Learning Toolbox - Documentation