SHOGUN: gpdt.cpp Source File

Go to the documentation of this file.
00001 /******************************************************************************
00002  ***        GPDT - Gradient Projection Decomposition Technique              ***
00003  ******************************************************************************
00004  ***                                                                        ***
00005  *** GPDT is a C++ software designed to train large-scale Support Vector    ***
00006  *** Machines for binary classification in both scalar and distributed      ***
00007  *** memory parallel environments. It uses the Joachims' problem            ***
00008  *** decomposition technique to split the whole quadratic programming (QP)  ***
00009  *** problem into a sequence of smaller QP subproblems, each one being      ***
00010  *** solved by a suitable gradient projection method (GPM). The presently   ***
00011  *** implemented GPMs are the Generalized Variable Projection Method        ***
00012  *** GVPM (T. Serafini, G. Zanghirati, L. Zanni, "Gradient Projection       ***
00013  *** Methods for Quadratic Programs and Applications in Training Support    ***
00014  *** Vector Machines"; Optim. Meth. Soft. 20, 2005, 353-378) and the        ***
00015  *** Dai-Fletcher Method DFGPM (Y. Dai and R. Fletcher,"New Algorithms for  ***
00016  *** Singly Linear Constrained Quadratic Programs Subject to Lower and      ***
00017  *** Upper Bounds"; Math. Prog. to appear).                                 ***
00018  ***                                                                        ***
00019  *** Authors:                                                               ***
00020  ***  Thomas Serafini, Luca Zanni                                           ***
00021  ***   Dept. of Mathematics, University of Modena and Reggio Emilia - ITALY ***
00022  ***   serafini.thomas@unimo.it, zanni.luca@unimo.it                        ***
00023  ***  Gaetano Zanghirati                                                    ***
00024  ***   Dept. of Mathematics, University of Ferrara - ITALY                  ***
00025  ***   g.zanghirati@unife.it                                                ***
00026  ***                                                                        ***
00027  *** Software homepage: http://dm.unife.it/gpdt                             ***
00028  ***                                                                        ***
00029  *** This work is supported by the Italian FIRB Projects                    ***
00030  ***      'Statistical Learning: Theory, Algorithms and Applications'       ***
00031  ***      (grant RBAU01877P), http://slipguru.disi.unige.it/ASTA            ***
00032  *** and                                                                    ***
00033  ***      'Parallel Algorithms and Numerical Nonlinear Optimization'        ***
00034  ***      (grant RBAU01JYPN), http://dm.unife.it/pn2o                       ***
00035  ***                                                                        ***
00036  *** Copyright (C) 2004-2008 by T. Serafini, G. Zanghirati, L. Zanni.       ***
00037  ***                                                                        ***
00038  ***                     COPYRIGHT NOTIFICATION                             ***
00039  ***                                                                        ***
00040  *** Permission to copy and modify this software and its documentation      ***
00041  *** for internal research use is granted, provided that this notice is     ***
00042  *** retained thereon and on all copies or modifications. The authors and   ***
00043  *** their respective Universities makes no representations as to the       ***
00044  *** suitability and operability of this software for any purpose. It is    ***
00045  *** provided "as is" without express or implied warranty.                  ***
00046  *** Use of this software for commercial purposes is expressly prohibited   ***
00047  *** without contacting the authors.                                        ***
00048  ***                                                                        ***
00049  *** This program is free software; you can redistribute it and/or modify   ***
00050  *** it under the terms of the GNU General Public License as published by   ***
00051  *** the Free Software Foundation; either version 3 of the License, or      ***
00052  *** (at your option) any later version.                                    ***
00053  ***                                                                        ***
00054  *** This program is distributed in the hope that it will be useful,        ***
00055  *** but WITHOUT ANY WARRANTY; without even the implied warranty of         ***
00056  *** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ***
00057  *** GNU General Public License for more details.                           ***
00058  ***                                                                        ***
00059  *** You should have received a copy of the GNU General Public License      ***
00060  *** along with this program; if not, write to the Free Software            ***
00061  *** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              ***
00062  ***                                                                        ***
00063  *** File:     gpdt.cpp                                                     ***
00064  *** Type:     scalar                                                       ***
00065  *** Version:  1.0                                                          ***
00066  *** Date:     October, 2005                                                ***
00067  *** Revision: 1                                                            ***
00068  ***                                                                        ***
00069  *** SHOGUN adaptions  Written (W) 2006-2008 Soeren Sonnenburg              ***
00070  ******************************************************************************/
00071 #include <stdio.h>
00072 #include <stdlib.h>
00073 #include <string.h>
00074 #include <ctype.h>
00075 #include <math.h>
00076 #include <shogun/lib/external/gpdt.h>
00077 #include <shogun/lib/external/gpdtsolve.h>
00078 
00079 using namespace shogun;
00080 
00081 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00082 void    fatalError(const char *msg1, const char *msg2);
00083 
00084 /******************************************************************************/
00085 /*** Class constructor                                                      ***/
00086 /******************************************************************************/
00087 QPproblem::QPproblem()
00088 : CSGObject()
00089 {
00090   /*** set problem defaults ***/
00091   maxmw                = 40;
00092   c_const              = 10.0;
00093   projection_solver    = SOLVER_FLETCHER;
00094   projection_projector = 1;
00095   PreprocessMode       = 0;
00096   delta                = 1.0e-3;
00097   DELTAsv              = EPS_SV;
00098   ker_type             = 2;
00099   chunk_size           = 400;
00100   q                    = -1;
00101   y                    = NULL;
00102   tau_proximal         = 0.0;
00103   dim = 1;
00104 }
00105 
00106 /******************************************************************************/
00107 /*** Class destructor                                                       ***/
00108 /******************************************************************************/
00109 QPproblem::~QPproblem()
00110 {
00111   //if (y != NULL) free(y);
00112 }
00113 
00114 /******************************************************************************/
00115 /*** Setter method for the subproblem features                              ***/
00116 /******************************************************************************/
00117 void QPproblem::Subproblem(QPproblem &p, int32_t len, int32_t *perm)
00118 {
00119   int32_t k;
00120 
00121   memcpy(this, &p, sizeof(QPproblem));
00122   ell = len;
00123 
00124   KER->SetSubproblem(p.KER, len, perm);
00125   y = SG_MALLOC(int32_t, len);
00126   for (k = 0; k < ell; k++)
00127       y[k] = p.y[perm[k]];
00128 }
00129 
00130 namespace shogun
00131 {
00132 /******************************************************************************/
00133 /*** Extract the samples information from an SVMlight-compliant data file   ***/
00134 /******************************************************************************/
00135 int32_t prescan_document(char *file, int32_t *lines, int32_t *vlen, int32_t *ll)
00136 {
00137   FILE    *fl;
00138   int32_t ic;
00139   char    c;
00140   int64_t    current_length, current_vlen;
00141 
00142   if ((fl = fopen (file, "r")) == NULL)
00143       return(-1);
00144   current_length = 0;
00145   current_vlen   = 0;
00146 
00147   *ll    = 0;  /* length of the longest input line (the read buffer should
00148                   be allocated with this size)                              */
00149   *lines = 1;  /* number of lines in the file                               */
00150   *vlen  = 0;  /* max number of nonzero components in a vector              */
00151 
00152   while ((ic = getc(fl)) != EOF)
00153   {
00154     c = (char)ic;
00155     current_length++;
00156 
00157     if (c == ' ')
00158         current_vlen++;
00159 
00160     if (c == '\n')
00161     {
00162         (*lines)++;
00163         if (current_length > (*ll))
00164             *ll = current_length;
00165         if (current_vlen > (*vlen))
00166             *vlen = current_vlen;
00167         current_length = 0;
00168         current_vlen   = 0;
00169     }
00170   }
00171   fclose(fl);
00172   return(0);
00173 }
00174 }
00175 /******************************************************************************/
00176 /*** return 1 if problem is single class, 0 if two-class                    ***/
00177 /******************************************************************************/
00178 int32_t QPproblem::Check2Class()
00179 {
00180   int32_t i;
00181 
00182   for (i = 1; i < ell; i++)
00183       if (y[i] != y[0])
00184           return 0;
00185   return 1;
00186 }
00187 
00188 namespace shogun
00189 {
00190 /******************************************************************************/
00191 /*** Compute the size of data splitting for preprocessing                   ***/
00192 /******************************************************************************/
00193 void SplitParts(
00194     int32_t n, int32_t part, int32_t parts, int32_t *dim, int32_t *off)
00195 {
00196   int32_t r;
00197 
00198   r    = n % parts;
00199   *dim = n / parts;
00200 
00201   if (part < r)
00202   {
00203      (*dim)++;
00204      *off = *dim * part;
00205   }
00206   else
00207      *off = *dim * part + r;
00208 }
00209 }
00210 /******************************************************************************/
00211 /*** Kernel class constructor                                               ***/
00212 /******************************************************************************/
00213 sKernel::sKernel (CKernel* k, int32_t l)
00214 {
00215   kernel=k;
00216   ell=l;
00217   nor   = NULL;
00218   vaux  = NULL;
00219   lx    = NULL;
00220   ix    = NULL;
00221   x     = NULL;
00222   IsSubproblem      = 0;
00223   KernelEvaluations = 0.0;
00224 }
00225 
00226 /******************************************************************************/
00227 /*** Set the problem data for kernel evaluation                             ***/
00228 /******************************************************************************/
00229 void sKernel::SetData(
00230     float32_t **x_, int32_t **ix_, int32_t *lx_, int32_t _ell, int32_t _dim)
00231 {
00232   int32_t i, j, k;
00233 
00234   dim  = _dim;
00235   ell  = _ell;
00236   nor  = SG_MALLOC(float64_t, ell);
00237   vaux = SG_CALLOC(float32_t, dim);
00238 
00239   IsSubproblem = 0;
00240   x  = x_;
00241   ix = ix_;
00242   lx = lx_;
00243 
00244   // unroll one (sparse) vector
00245   vauxRow = 0;
00246   i       = vauxRow;
00247   for (k = 0; k < lx[i]; k++)
00248       vaux[ix[i][k]] = x[i][k];
00249 
00250   // compute the squared Euclidean norm of each vector
00251   for (i = 0; i < ell; i++)
00252   {
00253       nor[i] = 0.0;
00254       for (j = 0; j < lx[i]; j++)
00255           nor[i] += (float64_t)(x[i][j]*x[i][j]);
00256   }
00257 }
00258 
00259 /******************************************************************************/
00260 /*** Set the subproblem data                                                ***/
00261 /******************************************************************************/
00262 void sKernel::SetSubproblem(sKernel* ker, int32_t len, int32_t *perm)
00263 {
00264   int32_t k;
00265 
00266   /* arrays allocations */
00267   nor  = SG_MALLOC(float64_t, len);
00268   vaux = SG_CALLOC(float32_t, ker->dim);
00269 
00270   lx = SG_MALLOC(int32_t, len);
00271   ix = SG_MALLOC(int32_t*, len);
00272   x  = SG_MALLOC(float32_t*, len);
00273   IsSubproblem = 1;
00274 
00275   for (k = 0; k < len; k++)
00276   {
00277       x[k]   = ker->x[perm[k]];
00278       ix[k]  = ker->ix[perm[k]];
00279       lx[k]  = ker->lx[perm[k]];
00280       nor[k] = ker->nor[perm[k]];
00281   }
00282 
00283   // unroll one (sparse) vector
00284   vauxRow = 0;
00285   for (k = 0; k < lx[vauxRow]; k++)
00286       vaux[ix[vauxRow][k]] = x[vauxRow][k];
00287 }
00288 
00289 /******************************************************************************/
00290 /*** Kernel class destructor                                                ***/
00291 /******************************************************************************/
00292 sKernel::~sKernel()
00293 {
00294   int32_t i;
00295 
00296   SG_FREE(nor);
00297   SG_FREE(vaux);
00298 
00299   SG_FREE(lx);
00300   if (ix != NULL)
00301   {
00302       if (!IsSubproblem)
00303           for (i = 0; i < ell; i++)
00304               SG_FREE(ix[i]);
00305       SG_FREE(ix);
00306   }
00307   if (x != NULL)
00308   {
00309       if (!IsSubproblem)
00310           for (i = 0; i < ell; i++)
00311               SG_FREE(x[i]);
00312       SG_FREE(x);
00313   }
00314 }
00315 
00316 #endif // DOXYGEN_SHOULD_SKIP_THIS
00317 
00318 /******************************************************************************/
00319 /*** End of gpdt.cpp file                                                   ***/
00320 /******************************************************************************/