sfa.h

Go to the documentation of this file.
00001 /*   This program is free software: you can redistribute it and/or modify
00002  *   it under the terms of the GNU General Public License as published by
00003  *   the Free Software Foundation, either version 3 of the License, or
00004  *   (at your option) any later version.
00005  *
00006  *   This program is distributed in the hope that it will be useful,
00007  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00008  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00009  *   GNU General Public License for more details.
00010  *
00011  *   You should have received a copy of the GNU General Public License
00012  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00013  *
00014  *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
00015  */
00016 
00017 #ifndef  SFA_SLEP
00018 #define  SFA_SLEP
00019 
00020 /* 
00021    Revision History
00022 
00023    First Version available on October 10, 2009 
00024 
00025    A runnable version on October 15, 2009
00026 
00027    Major revision on October 29, 2009
00028    (Some functions appearing in a previous version have deleted, please refer to the previous version for the old functions.
00029    Some new functions have been added as well)
00030 
00031 */
00032 
00033 /*
00034 
00035    Files contained in this header file sfa.h:
00036 
00037    1. Algorithms for solving the linear system A A^T z0 = Av (see the description of A from the following context)
00038 
00039    void Thomas(double *zMax, double *z0, 
00040    double * Av, int nn)
00041 
00042    void Rose(double *zMax, double *z0, 
00043    double * Av, int nn)
00044 
00045    int supportSet(double *x, double *v, double *z, 
00046    double *g, int * S, double lambda, int nn)
00047 
00048    void dualityGap(double *gap, double *z, 
00049    double *g, double *s, double *Av, 
00050    double lambda, int nn)
00051 
00052    void dualityGap2(double *gap, double *z, 
00053    double *g, double *s, double *Av, 
00054    double lambda, int nn)
00055 
00056 
00057    2. The Subgraident Finding Algorithm (SFA) for solving problem (4) (refer to the description of the problem for detail) 
00058 
00059    int sfa(double *x,     double *gap,
00060    double *z,     double *z0,   double * v,   double * Av, 
00061    double lambda, int nn,       int maxStep,
00062    double *s,     double *g,
00063    double tol,    int tau,       int flag)
00064 
00065    int sfa_special(double *x,     double *gap,
00066    double *z,     double * v,   double * Av, 
00067    double lambda, int nn,       int maxStep,
00068    double *s,     double *g,
00069    double tol,    int tau)
00070 
00071    int sfa_one(double *x,     double *gap,
00072    double *z,     double * v,   double * Av, 
00073    double lambda, int nn,       int maxStep,
00074    double *s,     double *g,
00075    double tol,    int tau)
00076 
00077 
00078 */
00079 
00080 
00081 /*
00082 
00083    Some mathematical background.
00084 
00085    In this file, we discuss how to solve the following subproblem,
00086 
00087    min_x  1/2 \|x-v\|^2  + lambda \|A x\|_1,                 (1)
00088 
00089    which is a key problem used in the Fused Lasso Signal Approximator (FLSA).
00090 
00091    Also, note that, FLSA is a building block for solving the optimation problmes with fused Lasso penalty.
00092 
00093    In (1), x and v are n-dimensional vectors, 
00094    and A is a matrix with size (n-1) x n, and is defined as follows (e.g., n=4):
00095    A= [ -1  1  0  0;
00096    0  -1 1  0;
00097    0  0  -1 1]
00098 
00099    The above problem can be reformulated as the following equivalent min-max optimization problem
00100 
00101    min_x  max_z  1/2 \|x-v\|^2  + <A x, z>
00102    subject to   \|z\|_{infty} \leq lambda                     (2)
00103 
00104 
00105    It is easy to get that, at the optimal point
00106 
00107    x = v - AT z,                             (3)
00108 
00109    where z is the optimal solution to the following optimization problem
00110 
00111    min_z  1/2  z^T A AT z - < z, A v>,
00112    subject to  \|z\|_{infty} \leq lambda                      (4)
00113 
00114 
00115 
00116    Let B=A A^T. It is easy to get that B is a (n-1) x (n-1) tridiagonal matrix.
00117    When n=5, B is defined as:
00118    B= [ 2  -1   0    0;
00119    -1  2   -1   0;
00120    0  -1   2    -1;
00121    0   0   -1   2]
00122 
00123    Let z0 be the solution to the linear system:
00124 
00125    A A^T * z0 = A * v                  (5)
00126 
00127    The problem (5) can be solve by the Thomas Algorithm, in about 5n multiplications and 4n additions.
00128 
00129    It can also be solved by the Rose's Algorithm, in about 2n multiplications and 2n additions.
00130 
00131    Moreover, considering the special structure of the matrix A (and B), 
00132    it can be solved in about n multiplications and 3n additions
00133 
00134    If lambda \geq \|z0\|_{infty}, x_i= mean(v), for all i, 
00135    the problem (1) admits near analytical solution
00136 
00137 
00138    We have also added the restart technique, please refer to our paper for detail!
00139 
00140 */
00141 
00142 
00143 void Thomas(double *zMax, double *z0, double * Av, int nn);
00144 
00145 void Rose(double *zMax, double *z0, double * Av, int nn);
00146 
00147 /*
00149 
00150 x=omega(z)
00151 
00152 v: the vector to be projected
00153 z: the approximate solution
00154 g: the gradient at z (g should be computed before calling this function
00155 
00156 nn: the length of z, g, and S (maximal length for S)
00157 
00158 n:  the length of x and v
00159 
00160 S: records the indices of the elements in the support set
00161 */
00162 int supportSet(double *x, double *v, double *z, double *g, int * S, double lambda, int nn);
00163 
00164 /*
00166 
00167 we compute the duality corresponding the solution z
00168 
00169 z: the approximate solution
00170 g: the gradient at z (we recompute the gradient)
00171 s: an auxiliary variable
00172 Av: A*v
00173 
00174 nn: the lenght for z, g, s, and Av
00175 
00176 The variables g and s shall be revised.
00177 
00178 The variables z and Av remain unchanged.
00179 */
00180 void dualityGap(double *gap, double *z, double *g, double *s, double *Av, double lambda, int nn);
00181 
00182 /*
00183    Similar to dualityGap,
00184 
00185    The difference is that, we assume that g has been computed.
00186    */
00187 void dualityGap2(double *gap, double *z, double *g, double *s, double *Av, double lambda, int nn);
00188 
00189 /*
00190 generateSolution:
00191 
00192 generate the solution x based on the information of z and g 
00193 (!!!!we assume that g has been computed as the gradient of z!!!!)
00194 
00195 */
00196 int generateSolution(double *x, double *z, double *gap,
00197         double *v, double *Av,
00198         double *g, double *s, int *S,
00199         double lambda, int nn);
00200 
00201 void restartMapping(double *g, double *z,  double * v, 
00202         double lambda, int nn);
00203 
00204 /*
00206 
00207 Our objective is to solve the fused Lasso signal approximator (flsa) problem:
00208 
00209 min_x  g(x) 1/2 \|x-v\|^2  + lambda \|A x\|_1,                      (1)
00210 
00211 Let x* be the solution (which is unique), it satisfies
00212 
00213 0 in  x* - v +  A^T * lambda *SGN(Ax*)                     (2)
00214 
00215 To solve x*, it suffices to find
00216 
00217 y*  in A^T * lambda *SGN(Ax*)                              (3)
00218 that satisfies
00219 
00220 x* - v + y* =0                                             (4)
00221 which leads to
00222 x*= v - y*                                                 (5)
00223 
00224 Due to the uniqueness of x*, we conclude that y* is unique. 
00225 
00226 As y* is a subgradient of lambda \|A x*\|_1, 
00227 we name our method as Subgradient Finding Algorithm (sfa).
00228 
00229 y* in (3) can be further written as
00230 
00231 y*= A^T * z*                                               (6)
00232 where
00233 
00234 z* in lambda* SGN (Ax*)                                    (7)
00235 
00236 From (6), we have
00237 z* = (A A^T)^{-1} A * y*                                   (8)
00238 
00239 Therefore, from the uqniueness of y*, we conclude that z* is also unique.
00240 Next, we discuss how to solve this unique z*.
00241 
00242 The problem (1) can reformulated as the following equivalent problem:    
00243 
00244 min_x  max_z  f(x, z)= 1/2 \|x-v\|^2  + <A x, z>
00245 subject to   \|z\|_{infty} \leq lambda                                  (9)
00246 
00247 At the saddle point, we have
00248 
00249 x = v - AT z,                                            (10)
00250 
00251 which somehow concides with (5) and (6)
00252 
00253 Plugging (10) into (9), we obtain the problem
00254 
00255 min_z  1/2  z^T A AT z - < z, A v>,
00256 subject to  \|z\|_{infty} \leq lambda,                             (11)
00257 
00258 In this program, we apply the Nesterov's method for solving (11).
00259 
00260 
00261 Duality gap:
00262 
00263 At a given point z0, we compute x0= v - A^T z0.
00264 It is easy to show that
00265 min_x f(x, z0) = f(x0, z0) <= max_z f(x0, z)               (12)
00266 
00267 Moreover, we have
00268 max_z f(x0, z) - min_x f(x, z0) 
00269 <= lambda * \|A x0\|_1 - < z0, Av - A A^T z0>           (13)
00270 
00271 It is also to get that
00272 
00273 f(x0, z0) <= f(x*, z*) <= max_z f(x0, z)                   (14)
00274 
00275 g(x*)=f(x*, z*)                                            (15)
00276 
00277 g(x0)=max_z f(x0, z)                                       (17)
00278 
00279     Therefore, we have
00280 
00281 g(x0)-g(x*) <= lambda * \|A x0\|_1 - < z0, Av - A A^T z0>  (18)
00282 
00283 
00284     We have applied a restarting technique, which is quite involved; and thus, we do not explain here.
00285 
00287         */
00288 
00289 
00290         /*
00292 
00293         For sfa, the stepsize of the Nesterov's method is fixed to 1/4, so that no line search is needed.
00294 
00295 
00296 
00297         Explanation of the parameters:
00298 
00299         Output parameters
00300         x:    the solution to the primal problem
00301         gap:  the duality gap (pointer)
00302 
00303         Input parameters
00304         z:    the solution to the dual problem (before calling this function, z contains a starting point)
00305         !!!!we assume that the starting point has been successfully initialized in z !!!!
00306         z0:   a variable used for multiple purposes:
00307         1) the previous solution z0
00308         2) the difference between z and z0, i.e., z0=z- z0
00309 
00310         lambda:   the regularization parameter (and the radius of the infity ball, see (11)).
00311         nn:       the length of z, z0, Av, g, and s
00312         maxStep:  the maximal number of iterations
00313 
00314         v:    the point to be projected (not changed after the program)
00315         Av:   A*v (not changed after the program)
00316 
00317         s:        the search point (used for multiple purposes)
00318         g:        the gradient at g (and it is also used for multiple purposes)
00319 
00320         tol:      the tolerance of the gap
00321         tau:  the duality gap or the restarting technique is done every tau steps
00322         flag: if flag=1,  we apply the resart technique
00323         flag=2,  just run the SFA algorithm, terminate it when the absolution change is less than tol
00324         flag=3,  just run the SFA algorithm, terminate it when the duality gap is less than tol
00325         flag=4,  just run the SFA algorithm, terminate it when the relative duality gap is less than tol
00326 
00327 
00328         We would like to emphasis that the following assumptions 
00329         have been checked in the functions that call this function:
00330         1) 0< lambda < z_max
00331         2) nn >=2
00332         3) z has been initialized with a starting point
00333         4) z0 has been initialized with all zeros
00334 
00335         The termination condition is checked every tau iterations.
00336 
00337         For the duality gap, please refer to (12-18)
00338         */
00339 int sfa(double *x,     double *gap, int * activeS,
00340         double *z,     double *z0,   double * v,   double * Av, 
00341         double lambda, int nn,       int maxStep,
00342         double *s,     double *g,
00343         double tol,    int tau,       int flag);
00344 
00345 /*
00346 
00347    Refer to sfa for the defintions of the variables  
00348 
00349    In this file, we restart the program every step, and neglect the gradient step.
00350 
00351    It seems that, this program does not converge.
00352 
00353    This function shows that the gradient step is necessary.
00354    */
00355 int sfa_special(double *x,     double *gap,  int * activeS,
00356         double *z,     double * v,   double * Av, 
00357         double lambda, int nn,       int maxStep,
00358         double *s,     double *g,
00359         double tol,    int tau);
00360 
00361 /*
00362    We do one gradient descent, and then restart the program
00363    */
00364 int sfa_one(double *x,     double *gap, int * activeS,
00365         double *z,     double * v,   double * Av, 
00366         double lambda, int nn,       int maxStep,
00367         double *s,     double *g,
00368         double tol,    int tau);
00369 #endif   /* ----- #ifndef SFA_SLEP  ----- */
00370 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation