00001 /****************************************************************************** 00002 *** GPDT - Gradient Projection Decomposition Technique *** 00003 ****************************************************************************** 00004 *** *** 00005 *** GPDT is a C++ software designed to train large-scale Support Vector *** 00006 *** Machines for binary classification in both scalar and distributed *** 00007 *** memory parallel environments. It uses the Joachims' problem *** 00008 *** decomposition technique to split the whole quadratic programming (QP) *** 00009 *** problem into a sequence of smaller QP subproblems, each one being *** 00010 *** solved by a suitable gradient projection method (GPM). The presently *** 00011 *** implemented GPMs are the Generalized Variable Projection Method *** 00012 *** GVPM (T. Serafini, G. Zanghirati, L. Zanni, "Gradient Projection *** 00013 *** Methods for Quadratic Programs and Applications in Training Support *** 00014 *** Vector Machines"; Optim. Meth. Soft. 20, 2005, 353-378) and the *** 00015 *** Dai-Fletcher Method DFGPM (Y. Dai and R. Fletcher,"New Algorithms for *** 00016 *** Singly Linear Constrained Quadratic Programs Subject to Lower and *** 00017 *** Upper Bounds"; Math. Prog. to appear). *** 00018 *** *** 00019 *** Authors: *** 00020 *** Thomas Serafini, Luca Zanni *** 00021 *** Dept. of Mathematics, University of Modena and Reggio Emilia - ITALY *** 00022 *** serafini.thomas@unimo.it, zanni.luca@unimo.it *** 00023 *** Gaetano Zanghirati *** 00024 *** Dept. of Mathematics, University of Ferrara - ITALY *** 00025 *** g.zanghirati@unife.it *** 00026 *** *** 00027 *** Software homepage: http://dm.unife.it/gpdt *** 00028 *** *** 00029 *** This work is supported by the Italian FIRB Projects *** 00030 *** 'Statistical Learning: Theory, Algorithms and Applications' *** 00031 *** (grant RBAU01877P), http://slipguru.disi.unige.it/ASTA *** 00032 *** and *** 00033 *** 'Parallel Algorithms and Numerical Nonlinear Optimization' *** 00034 *** (grant RBAU01JYPN), http://dm.unife.it/pn2o *** 00035 *** *** 00036 *** Copyright (C) 2004-2008 by T. Serafini, G. Zanghirati, L. Zanni. *** 00037 *** *** 00038 *** COPYRIGHT NOTIFICATION *** 00039 *** *** 00040 *** Permission to copy and modify this software and its documentation *** 00041 *** for internal research use is granted, provided that this notice is *** 00042 *** retained thereon and on all copies or modifications. The authors and *** 00043 *** their respective Universities makes no representations as to the *** 00044 *** suitability and operability of this software for any purpose. It is *** 00045 *** provided "as is" without express or implied warranty. *** 00046 *** Use of this software for commercial purposes is expressly prohibited *** 00047 *** without contacting the authors. *** 00048 *** *** 00049 *** This program is free software; you can redistribute it and/or modify *** 00050 *** it under the terms of the GNU General Public License as published by *** 00051 *** the Free Software Foundation; either version 3 of the License, or *** 00052 *** (at your option) any later version. *** 00053 *** *** 00054 *** This program is distributed in the hope that it will be useful, *** 00055 *** but WITHOUT ANY WARRANTY; without even the implied warranty of *** 00056 *** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *** 00057 *** GNU General Public License for more details. *** 00058 *** *** 00059 *** You should have received a copy of the GNU General Public License *** 00060 *** along with this program; if not, write to the Free Software *** 00061 *** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *** 00062 *** *** 00063 *** File: gpdt.cpp *** 00064 *** Type: scalar *** 00065 *** Version: 1.0 *** 00066 *** Date: October, 2005 *** 00067 *** Revision: 1 *** 00068 *** *** 00069 *** SHOGUN adaptions Written (W) 2006-2008 Soeren Sonnenburg *** 00070 ******************************************************************************/ 00071 #include <stdio.h> 00072 #include <stdlib.h> 00073 #include <string.h> 00074 #include <ctype.h> 00075 #include <math.h> 00076 #include <shogun/lib/external/gpdt.h> 00077 #include <shogun/lib/external/gpdtsolve.h> 00078 00079 using namespace shogun; 00080 00081 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00082 void fatalError(const char *msg1, const char *msg2); 00083 00084 /******************************************************************************/ 00085 /*** Class constructor ***/ 00086 /******************************************************************************/ 00087 QPproblem::QPproblem() 00088 : CSGObject() 00089 { 00090 /*** set problem defaults ***/ 00091 maxmw = 40; 00092 c_const = 10.0; 00093 projection_solver = SOLVER_FLETCHER; 00094 projection_projector = 1; 00095 PreprocessMode = 0; 00096 delta = 1.0e-3; 00097 DELTAsv = EPS_SV; 00098 ker_type = 2; 00099 chunk_size = 400; 00100 q = -1; 00101 y = NULL; 00102 tau_proximal = 0.0; 00103 dim = 1; 00104 } 00105 00106 /******************************************************************************/ 00107 /*** Class destructor ***/ 00108 /******************************************************************************/ 00109 QPproblem::~QPproblem() 00110 { 00111 //if (y != NULL) free(y); 00112 } 00113 00114 /******************************************************************************/ 00115 /*** Setter method for the subproblem features ***/ 00116 /******************************************************************************/ 00117 void QPproblem::Subproblem(QPproblem &p, int32_t len, int32_t *perm) 00118 { 00119 int32_t k; 00120 00121 memcpy(this, &p, sizeof(QPproblem)); 00122 ell = len; 00123 00124 KER->SetSubproblem(p.KER, len, perm); 00125 y = SG_MALLOC(int32_t, len); 00126 for (k = 0; k < ell; k++) 00127 y[k] = p.y[perm[k]]; 00128 } 00129 00130 namespace shogun 00131 { 00132 /******************************************************************************/ 00133 /*** Extract the samples information from an SVMlight-compliant data file ***/ 00134 /******************************************************************************/ 00135 int32_t prescan_document(char *file, int32_t *lines, int32_t *vlen, int32_t *ll) 00136 { 00137 FILE *fl; 00138 int32_t ic; 00139 char c; 00140 int64_t current_length, current_vlen; 00141 00142 if ((fl = fopen (file, "r")) == NULL) 00143 return(-1); 00144 current_length = 0; 00145 current_vlen = 0; 00146 00147 *ll = 0; /* length of the longest input line (the read buffer should 00148 be allocated with this size) */ 00149 *lines = 1; /* number of lines in the file */ 00150 *vlen = 0; /* max number of nonzero components in a vector */ 00151 00152 while ((ic = getc(fl)) != EOF) 00153 { 00154 c = (char)ic; 00155 current_length++; 00156 00157 if (c == ' ') 00158 current_vlen++; 00159 00160 if (c == '\n') 00161 { 00162 (*lines)++; 00163 if (current_length > (*ll)) 00164 *ll = current_length; 00165 if (current_vlen > (*vlen)) 00166 *vlen = current_vlen; 00167 current_length = 0; 00168 current_vlen = 0; 00169 } 00170 } 00171 fclose(fl); 00172 return(0); 00173 } 00174 } 00175 /******************************************************************************/ 00176 /*** return 1 if problem is single class, 0 if two-class ***/ 00177 /******************************************************************************/ 00178 int32_t QPproblem::Check2Class() 00179 { 00180 int32_t i; 00181 00182 for (i = 1; i < ell; i++) 00183 if (y[i] != y[0]) 00184 return 0; 00185 return 1; 00186 } 00187 00188 namespace shogun 00189 { 00190 /******************************************************************************/ 00191 /*** Compute the size of data splitting for preprocessing ***/ 00192 /******************************************************************************/ 00193 void SplitParts( 00194 int32_t n, int32_t part, int32_t parts, int32_t *dim, int32_t *off) 00195 { 00196 int32_t r; 00197 00198 r = n % parts; 00199 *dim = n / parts; 00200 00201 if (part < r) 00202 { 00203 (*dim)++; 00204 *off = *dim * part; 00205 } 00206 else 00207 *off = *dim * part + r; 00208 } 00209 } 00210 /******************************************************************************/ 00211 /*** Kernel class constructor ***/ 00212 /******************************************************************************/ 00213 sKernel::sKernel (CKernel* k, int32_t l) 00214 { 00215 kernel=k; 00216 ell=l; 00217 nor = NULL; 00218 vaux = NULL; 00219 lx = NULL; 00220 ix = NULL; 00221 x = NULL; 00222 IsSubproblem = 0; 00223 KernelEvaluations = 0.0; 00224 } 00225 00226 /******************************************************************************/ 00227 /*** Set the problem data for kernel evaluation ***/ 00228 /******************************************************************************/ 00229 void sKernel::SetData( 00230 float32_t **x_, int32_t **ix_, int32_t *lx_, int32_t _ell, int32_t _dim) 00231 { 00232 int32_t i, j, k; 00233 00234 dim = _dim; 00235 ell = _ell; 00236 nor = SG_MALLOC(float64_t, ell); 00237 vaux = SG_CALLOC(float32_t, dim); 00238 00239 IsSubproblem = 0; 00240 x = x_; 00241 ix = ix_; 00242 lx = lx_; 00243 00244 // unroll one (sparse) vector 00245 vauxRow = 0; 00246 i = vauxRow; 00247 for (k = 0; k < lx[i]; k++) 00248 vaux[ix[i][k]] = x[i][k]; 00249 00250 // compute the squared Euclidean norm of each vector 00251 for (i = 0; i < ell; i++) 00252 { 00253 nor[i] = 0.0; 00254 for (j = 0; j < lx[i]; j++) 00255 nor[i] += (float64_t)(x[i][j]*x[i][j]); 00256 } 00257 } 00258 00259 /******************************************************************************/ 00260 /*** Set the subproblem data ***/ 00261 /******************************************************************************/ 00262 void sKernel::SetSubproblem(sKernel* ker, int32_t len, int32_t *perm) 00263 { 00264 int32_t k; 00265 00266 /* arrays allocations */ 00267 nor = SG_MALLOC(float64_t, len); 00268 vaux = SG_CALLOC(float32_t, ker->dim); 00269 00270 lx = SG_MALLOC(int32_t, len); 00271 ix = SG_MALLOC(int32_t*, len); 00272 x = SG_MALLOC(float32_t*, len); 00273 IsSubproblem = 1; 00274 00275 for (k = 0; k < len; k++) 00276 { 00277 x[k] = ker->x[perm[k]]; 00278 ix[k] = ker->ix[perm[k]]; 00279 lx[k] = ker->lx[perm[k]]; 00280 nor[k] = ker->nor[perm[k]]; 00281 } 00282 00283 // unroll one (sparse) vector 00284 vauxRow = 0; 00285 for (k = 0; k < lx[vauxRow]; k++) 00286 vaux[ix[vauxRow][k]] = x[vauxRow][k]; 00287 } 00288 00289 /******************************************************************************/ 00290 /*** Kernel class destructor ***/ 00291 /******************************************************************************/ 00292 sKernel::~sKernel() 00293 { 00294 int32_t i; 00295 00296 SG_FREE(nor); 00297 SG_FREE(vaux); 00298 00299 SG_FREE(lx); 00300 if (ix != NULL) 00301 { 00302 if (!IsSubproblem) 00303 for (i = 0; i < ell; i++) 00304 SG_FREE(ix[i]); 00305 SG_FREE(ix); 00306 } 00307 if (x != NULL) 00308 { 00309 if (!IsSubproblem) 00310 for (i = 0; i < ell; i++) 00311 SG_FREE(x[i]); 00312 SG_FREE(x); 00313 } 00314 } 00315 00316 #endif // DOXYGEN_SHOULD_SKIP_THIS 00317 00318 /******************************************************************************/ 00319 /*** End of gpdt.cpp file ***/ 00320 /******************************************************************************/