SHOGUN: MKLMulticlassGradient.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009 Alexander Binder
00008  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/classifier/mkl/MKLMulticlassGradient.h>
00012 
00013 using namespace shogun;
00014 
00015 MKLMulticlassGradient::MKLMulticlassGradient()
00016 {
00017     numkernels = 0;
00018     pnorm=2;
00019 
00020 }
00021 MKLMulticlassGradient::~MKLMulticlassGradient()
00022 {
00023 
00024 }
00025 
00026 MKLMulticlassGradient MKLMulticlassGradient::operator=(MKLMulticlassGradient & gl)
00027 {
00028     numkernels=gl.numkernels;
00029     pnorm=gl.pnorm;
00030     return (*this);
00031 
00032 }
00033 MKLMulticlassGradient::MKLMulticlassGradient(MKLMulticlassGradient & gl)
00034 {
00035     numkernels=gl.numkernels;
00036     pnorm=gl.pnorm;
00037 
00038 }
00039 
00040 void MKLMulticlassGradient::setup(const int32_t numkernels2)
00041 {
00042     numkernels=numkernels2;
00043     if (numkernels<=1)
00044     {
00045         SG_ERROR("void glpkwrapper::setup(const int32_tnumkernels): input "
00046                 "numkernels out of bounds: %d\n",numkernels);
00047     }
00048 
00049 
00050 }
00051 
00052 void MKLMulticlassGradient::set_mkl_norm(float64_t norm)
00053 {
00054     pnorm=norm;
00055     if(pnorm<1 )
00056         SG_ERROR("MKLMulticlassGradient::set_mkl_norm(float64_t norm) : parameter pnorm<1");
00057 }
00058 
00059 
00060 void MKLMulticlassGradient::addconstraint(const ::std::vector<float64_t> & normw2,
00061         const float64_t sumofpositivealphas)
00062 {
00063     normsofsubkernels.push_back(normw2);
00064     sumsofalphas.push_back(sumofpositivealphas);
00065 }
00066 
00067 void MKLMulticlassGradient::genbetas( ::std::vector<float64_t> & weights ,const ::std::vector<float64_t> & gammas)
00068 {
00069 
00070     assert((int32_t)gammas.size()+1==numkernels);
00071 
00072     double pi4=3.151265358979238/4;
00073 
00074     weights.resize(numkernels);
00075 
00076 
00077     // numkernels-dimensional polar transform
00078     weights[0]=1;
00079 
00080     for(int32_t i=0; i< numkernels-1 ;++i)
00081     {
00082         for(int32_t k=0; k< i+1 ;++k)
00083         {
00084             weights[k]*=cos( std::min(std::max(0.0,gammas[i]),pi4) );
00085         }
00086         weights[i+1]=sin( std::min(std::max(0.0,gammas[i]),pi4) );
00087     }
00088 
00089     // pnorm- manifold adjustment
00090     if(pnorm!=2.0)
00091     {
00092         for(int32_t i=0; i< numkernels ;++i)
00093             weights[i]=pow(weights[i],2.0/pnorm);
00094     }
00095 
00096 }
00097 
00098 void MKLMulticlassGradient::gengammagradient( ::std::vector<float64_t> & gammagradient ,const ::std::vector<float64_t> & gammas,const int32_t dim)
00099 {
00100 
00101     assert((int32_t)gammas.size()+1==numkernels);
00102 
00103     double pi4=3.151265358979238/2;
00104 
00105     gammagradient.resize(numkernels);
00106     std::fill(gammagradient.begin(),gammagradient.end(),0.0);
00107 
00108     // numkernels-dimensional polar transform
00109     gammagradient[0]=1;
00110 
00111     for(int32_t i=0; i< numkernels-1 ;++i)
00112     {
00113         if(i!=dim)
00114         {
00115             for(int32_t k=0; k< std::min(i+1,dim+2) ;++k)
00116             {
00117                 gammagradient[k]*=pow( cos( std::min(std::max(0.0,gammas[i]),pi4) ), 2.0/pnorm) ;
00118             }
00119             if(i<dim)
00120                 gammagradient[i+1]=pow( sin( std::min(std::max(0.0,gammas[i]),pi4) ),2.0/pnorm);
00121         }
00122         else if(i==dim)
00123         {
00124             // i==dim, higher dims are 0
00125             for(int32_t k=0; k< i+1 ;++k)
00126             {
00127                 gammagradient[k]*= pow( cos( std::min(std::max(0.0,gammas[i]),pi4) ), 2.0/pnorm-1.0)*(-1)*sin( std::min(std::max(0.0,gammas[i]),pi4) );
00128             }
00129             gammagradient[i+1]=pow( sin( std::min(std::max(0.0,gammas[i]),pi4) ),2.0/pnorm-1)*cos( std::min(std::max(0.0,gammas[i]),pi4) );
00130         }
00131     }
00132 
00133 
00134 }
00135 
00136 
00137 
00138 float64_t MKLMulticlassGradient::objectives(const ::std::vector<float64_t> & weights, const int32_t index)
00139 {
00140     assert(index>=0);
00141     assert(index < (int32_t) sumsofalphas.size());
00142     assert(index < (int32_t) normsofsubkernels.size());
00143 
00144 
00145     float64_t obj= -sumsofalphas[index];
00146     for(int32_t i=0; i< numkernels ;++i)
00147     {
00148         obj+=0.5*normsofsubkernels[index][i]*weights[i];
00149     }
00150     return(obj);
00151 }
00152 
00153 
00154 void MKLMulticlassGradient::linesearch(std::vector<float64_t> & finalbeta,const std::vector<float64_t> & oldweights)
00155 {
00156 
00157     float64_t pi4=3.151265358979238/2;
00158 
00159     float64_t fingrad=1e-7;
00160     int32_t maxhalfiter=20;
00161     int32_t totaliters=6;
00162     float64_t maxrelobjdiff=1e-6;
00163 
00164 
00165 
00166     std::vector<float64_t> finalgamma,curgamma;
00167 
00168     curgamma.resize(numkernels-1);
00169     if(oldweights.empty())
00170     {
00171     std::fill(curgamma.begin(),curgamma.end(),pi4/2);
00172     }
00173     else
00174     {
00175     // curgamma from init: arcsin on highest dim ^p/2 !!! and divided everthing by its cos
00176         std::vector<float64_t> tmpbeta(numkernels);
00177         for(int32_t i=numkernels-1; i>= 0 ;--i)
00178         {
00179         tmpbeta[i]=pow(oldweights[i],pnorm/2);
00180         }
00181 
00182         for(int32_t i=numkernels-1; i>= 1 ;--i)
00183         {
00184             curgamma[i-1]=asin(tmpbeta[i]);
00185             for(int32_t k=numkernels-2; k>= 1 ;--k) // k==0 not necessary
00186             {
00187                 if(cos(curgamma[i-1])>0)
00188                     tmpbeta[k]/=cos(curgamma[i-1]);
00189             }
00190         }
00191 
00192     }
00193     bool finished=false;
00194     int32_t longiters=0;
00195     while(!finished)
00196     {
00197         ++longiters;
00198         std::vector<float64_t> curbeta;
00199         genbetas( curbeta ,curgamma);
00200         //find smallest objective
00201         int32_t minind=0;
00202         float64_t minval=objectives(curbeta,  minind);
00203         for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
00204         {
00205             float64_t tmpval=objectives(curbeta, i);
00206             if(tmpval<minval)
00207             {
00208                 minval=tmpval;
00209                 minind=i;
00210             }
00211         }
00212         float64_t lobj=minval;
00213         //compute gradient for smallest objective
00214         std::vector<float64_t> curgrad;
00215         for(int32_t i=0; i< numkernels-1 ;++i)
00216         {
00217             ::std::vector<float64_t> gammagradient;
00218             gengammagradient(  gammagradient ,curgamma,i);
00219 
00220             curgrad.push_back(objectives(gammagradient, minind));
00221         }
00222         //find boundary hit point (check for each dim) to [0, pi/4]
00223         std::vector<float64_t> maxalphas(numkernels-1,0);
00224         float64_t maxgrad=0;
00225         for(int32_t i=0; i< numkernels-1 ;++i)
00226         {
00227             maxgrad=std::max(maxgrad,fabs(curgrad[i]) );
00228             if(curgrad[i]<0)
00229             {
00230                 maxalphas[i]=(0-curgamma[i])/curgrad[i];
00231             }
00232             else if(curgrad[i]>0)
00233             {
00234                 maxalphas[i]=(pi4-curgamma[i])/curgrad[i];
00235             }
00236             else
00237             {
00238                 maxalphas[i]=1024*1024;
00239             }
00240         }
00241 
00242         float64_t maxalpha=maxalphas[0];
00243         for(int32_t i=1; i< numkernels-1 ;++i)
00244         {
00245             maxalpha=std::min(maxalpha,maxalphas[i]);
00246         }
00247 
00248         if((maxalpha>1024*1023)|| (maxgrad<fingrad))
00249         {
00250             //curgrad[i] approx 0 for all i terminate
00251             finished=true;
00252             finalgamma=curgamma;
00253         }
00254         else // of if(maxalpha>1024*1023)
00255         {
00256         //linesearch: shrink until min of all objective increases compared to starting point, then check left half and right halve until finish
00257         // curgamma + al*curgrad ,aximizes al in [0, maxal]
00258             float64_t leftalpha=0, rightalpha=maxalpha, midalpha=(leftalpha+rightalpha)/2;
00259 
00260             std::vector<float64_t> tmpgamma=curgamma, tmpbeta;
00261             for(int32_t i=1; i< numkernels-1 ;++i)
00262             {
00263                 tmpgamma[i]=tmpgamma[i]+rightalpha*curgrad[i];
00264             }
00265             genbetas( tmpbeta ,tmpgamma);
00266             float64_t curobj=objectives(tmpbeta, 0);
00267             for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
00268             {
00269                 curobj=std::min(curobj,objectives(tmpbeta, i));
00270             }
00271 
00272             int curhalfiter=0;
00273             while((curobj < minval)&&(curhalfiter<maxhalfiter)&&(fabs(curobj/minval-1 ) > maxrelobjdiff ))
00274             {
00275                 rightalpha=midalpha;
00276                 midalpha=(leftalpha+rightalpha)/2;
00277                 ++curhalfiter;
00278 
00279                 tmpgamma=curgamma;
00280                 for(int32_t i=1; i< numkernels-1 ;++i)
00281                 {
00282                     tmpgamma[i]=tmpgamma[i]+rightalpha*curgrad[i];
00283                 }
00284                 genbetas( tmpbeta ,tmpgamma);
00285                 curobj=objectives(tmpbeta, 0);
00286                 for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
00287                 {
00288                     curobj=std::min(curobj,objectives(tmpbeta, i));
00289                 }
00290 
00291             }
00292 
00293             float64_t robj=curobj;
00294         float64_t tmpobj=std::max(lobj,robj);
00295             do
00296             {
00297 
00298                 tmpobj=std::max(lobj,robj);
00299 
00300 
00301 
00302                 tmpgamma=curgamma;
00303                 for(int32_t i=1; i< numkernels-1 ;++i)
00304                 {
00305                     tmpgamma[i]=tmpgamma[i]+midalpha*curgrad[i];
00306                 }
00307                 genbetas( tmpbeta ,tmpgamma);
00308                 curobj=objectives(tmpbeta, 0);
00309                 for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
00310                 {
00311                     curobj=std::min(curobj,objectives(tmpbeta, i));
00312                 }
00313 
00314                 if(lobj>robj)
00315                 {
00316                     rightalpha=midalpha;
00317                     robj=curobj;
00318                 }
00319                 else
00320                 {
00321                     leftalpha=midalpha;
00322                     lobj=curobj;
00323                 }
00324                 midalpha=(leftalpha+rightalpha)/2;
00325 
00326 
00327             }
00328             while(  fabs(curobj/tmpobj-1 ) > maxrelobjdiff  );
00329             finalgamma=tmpgamma;
00330             curgamma=tmpgamma;
00331         } // else // of if(maxalpha>1024*1023)
00332 
00333         if(longiters>= totaliters)
00334         {
00335             finished=true;
00336         }
00337     }
00338     genbetas(finalbeta,finalgamma);
00339     float64_t nor=0;
00340     for(int32_t i=0; i< numkernels ;++i)
00341     {
00342         nor+=pow(finalbeta[i],pnorm);
00343     }
00344     if(nor>0)
00345     {
00346         nor=pow(nor,1.0/pnorm);
00347         for(int32_t i=0; i< numkernels ;++i)
00348         {
00349             finalbeta[i]/=nor;
00350         }
00351     }
00352 }
00353 
00354 
00355 void MKLMulticlassGradient::computeweights(std::vector<float64_t> & weights2)
00356 {
00357     if(pnorm<1 )
00358         SG_ERROR("MKLMulticlassGradient::computeweights(std::vector<float64_t> & weights2) : parameter pnorm<1");
00359 
00360     SG_SDEBUG("MKLMulticlassGradient::computeweights(...): pnorm %f\n",pnorm);
00361 
00362     int maxnumlinesrch=15;
00363     float64_t maxdiff=1e-6;
00364 
00365     bool finished =false;
00366     int numiter=0;
00367     do
00368     {
00369         ++numiter;
00370         std::vector<float64_t> initw(weights2);
00371         linesearch(weights2,initw);
00372         float64_t norm=0;
00373         if(!initw.empty())
00374         {
00375         for(size_t i=0;i<weights2.size();++i)
00376         {
00377             norm+=(weights2[i]-initw[i])*(weights2[i]-initw[i]);
00378         }
00379             norm=sqrt(norm);
00380         }
00381         else
00382         {
00383             norm=maxdiff+1;
00384         }
00385 
00386         if((norm < maxdiff) ||(numiter>=maxnumlinesrch ))
00387         {
00388             finished=true;
00389         }
00390     }
00391     while(false==finished);
00392 
00393 
00394 
00395 
00396 }