00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/classifier/mkl/MKLMulticlassGradient.h>
00012
00013 using namespace shogun;
00014
00015 MKLMulticlassGradient::MKLMulticlassGradient()
00016 {
00017 numkernels = 0;
00018 pnorm=2;
00019
00020 }
00021 MKLMulticlassGradient::~MKLMulticlassGradient()
00022 {
00023
00024 }
00025
00026 MKLMulticlassGradient MKLMulticlassGradient::operator=(MKLMulticlassGradient & gl)
00027 {
00028 numkernels=gl.numkernels;
00029 pnorm=gl.pnorm;
00030 return (*this);
00031
00032 }
00033 MKLMulticlassGradient::MKLMulticlassGradient(MKLMulticlassGradient & gl)
00034 {
00035 numkernels=gl.numkernels;
00036 pnorm=gl.pnorm;
00037
00038 }
00039
00040 void MKLMulticlassGradient::setup(const int32_t numkernels2)
00041 {
00042 numkernels=numkernels2;
00043 if (numkernels<=1)
00044 {
00045 SG_ERROR("void glpkwrapper::setup(const int32_tnumkernels): input "
00046 "numkernels out of bounds: %d\n",numkernels);
00047 }
00048
00049
00050 }
00051
00052 void MKLMulticlassGradient::set_mkl_norm(float64_t norm)
00053 {
00054 pnorm=norm;
00055 if(pnorm<1 )
00056 SG_ERROR("MKLMulticlassGradient::set_mkl_norm(float64_t norm) : parameter pnorm<1");
00057 }
00058
00059
00060 void MKLMulticlassGradient::addconstraint(const ::std::vector<float64_t> & normw2,
00061 const float64_t sumofpositivealphas)
00062 {
00063 normsofsubkernels.push_back(normw2);
00064 sumsofalphas.push_back(sumofpositivealphas);
00065 }
00066
00067 void MKLMulticlassGradient::genbetas( ::std::vector<float64_t> & weights ,const ::std::vector<float64_t> & gammas)
00068 {
00069
00070 assert((int32_t)gammas.size()+1==numkernels);
00071
00072 double pi4=3.151265358979238/4;
00073
00074 weights.resize(numkernels);
00075
00076
00077
00078 weights[0]=1;
00079
00080 for(int32_t i=0; i< numkernels-1 ;++i)
00081 {
00082 for(int32_t k=0; k< i+1 ;++k)
00083 {
00084 weights[k]*=cos( std::min(std::max(0.0,gammas[i]),pi4) );
00085 }
00086 weights[i+1]=sin( std::min(std::max(0.0,gammas[i]),pi4) );
00087 }
00088
00089
00090 if(pnorm!=2.0)
00091 {
00092 for(int32_t i=0; i< numkernels ;++i)
00093 weights[i]=pow(weights[i],2.0/pnorm);
00094 }
00095
00096 }
00097
00098 void MKLMulticlassGradient::gengammagradient( ::std::vector<float64_t> & gammagradient ,const ::std::vector<float64_t> & gammas,const int32_t dim)
00099 {
00100
00101 assert((int32_t)gammas.size()+1==numkernels);
00102
00103 double pi4=3.151265358979238/2;
00104
00105 gammagradient.resize(numkernels);
00106 std::fill(gammagradient.begin(),gammagradient.end(),0.0);
00107
00108
00109 gammagradient[0]=1;
00110
00111 for(int32_t i=0; i< numkernels-1 ;++i)
00112 {
00113 if(i!=dim)
00114 {
00115 for(int32_t k=0; k< std::min(i+1,dim+2) ;++k)
00116 {
00117 gammagradient[k]*=pow( cos( std::min(std::max(0.0,gammas[i]),pi4) ), 2.0/pnorm) ;
00118 }
00119 if(i<dim)
00120 gammagradient[i+1]=pow( sin( std::min(std::max(0.0,gammas[i]),pi4) ),2.0/pnorm);
00121 }
00122 else if(i==dim)
00123 {
00124
00125 for(int32_t k=0; k< i+1 ;++k)
00126 {
00127 gammagradient[k]*= pow( cos( std::min(std::max(0.0,gammas[i]),pi4) ), 2.0/pnorm-1.0)*(-1)*sin( std::min(std::max(0.0,gammas[i]),pi4) );
00128 }
00129 gammagradient[i+1]=pow( sin( std::min(std::max(0.0,gammas[i]),pi4) ),2.0/pnorm-1)*cos( std::min(std::max(0.0,gammas[i]),pi4) );
00130 }
00131 }
00132
00133
00134 }
00135
00136
00137
00138 float64_t MKLMulticlassGradient::objectives(const ::std::vector<float64_t> & weights, const int32_t index)
00139 {
00140 assert(index>=0);
00141 assert(index < (int32_t) sumsofalphas.size());
00142 assert(index < (int32_t) normsofsubkernels.size());
00143
00144
00145 float64_t obj= -sumsofalphas[index];
00146 for(int32_t i=0; i< numkernels ;++i)
00147 {
00148 obj+=0.5*normsofsubkernels[index][i]*weights[i];
00149 }
00150 return(obj);
00151 }
00152
00153
00154 void MKLMulticlassGradient::linesearch(std::vector<float64_t> & finalbeta,const std::vector<float64_t> & oldweights)
00155 {
00156
00157 float64_t pi4=3.151265358979238/2;
00158
00159 float64_t fingrad=1e-7;
00160 int32_t maxhalfiter=20;
00161 int32_t totaliters=6;
00162 float64_t maxrelobjdiff=1e-6;
00163
00164
00165
00166 std::vector<float64_t> finalgamma,curgamma;
00167
00168 curgamma.resize(numkernels-1);
00169 if(oldweights.empty())
00170 {
00171 std::fill(curgamma.begin(),curgamma.end(),pi4/2);
00172 }
00173 else
00174 {
00175
00176 std::vector<float64_t> tmpbeta(numkernels);
00177 for(int32_t i=numkernels-1; i>= 0 ;--i)
00178 {
00179 tmpbeta[i]=pow(oldweights[i],pnorm/2);
00180 }
00181
00182 for(int32_t i=numkernels-1; i>= 1 ;--i)
00183 {
00184 curgamma[i-1]=asin(tmpbeta[i]);
00185 for(int32_t k=numkernels-2; k>= 1 ;--k)
00186 {
00187 if(cos(curgamma[i-1])>0)
00188 tmpbeta[k]/=cos(curgamma[i-1]);
00189 }
00190 }
00191
00192 }
00193 bool finished=false;
00194 int32_t longiters=0;
00195 while(!finished)
00196 {
00197 ++longiters;
00198 std::vector<float64_t> curbeta;
00199 genbetas( curbeta ,curgamma);
00200
00201 int32_t minind=0;
00202 float64_t minval=objectives(curbeta, minind);
00203 for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
00204 {
00205 float64_t tmpval=objectives(curbeta, i);
00206 if(tmpval<minval)
00207 {
00208 minval=tmpval;
00209 minind=i;
00210 }
00211 }
00212 float64_t lobj=minval;
00213
00214 std::vector<float64_t> curgrad;
00215 for(int32_t i=0; i< numkernels-1 ;++i)
00216 {
00217 ::std::vector<float64_t> gammagradient;
00218 gengammagradient( gammagradient ,curgamma,i);
00219
00220 curgrad.push_back(objectives(gammagradient, minind));
00221 }
00222
00223 std::vector<float64_t> maxalphas(numkernels-1,0);
00224 float64_t maxgrad=0;
00225 for(int32_t i=0; i< numkernels-1 ;++i)
00226 {
00227 maxgrad=std::max(maxgrad,fabs(curgrad[i]) );
00228 if(curgrad[i]<0)
00229 {
00230 maxalphas[i]=(0-curgamma[i])/curgrad[i];
00231 }
00232 else if(curgrad[i]>0)
00233 {
00234 maxalphas[i]=(pi4-curgamma[i])/curgrad[i];
00235 }
00236 else
00237 {
00238 maxalphas[i]=1024*1024;
00239 }
00240 }
00241
00242 float64_t maxalpha=maxalphas[0];
00243 for(int32_t i=1; i< numkernels-1 ;++i)
00244 {
00245 maxalpha=std::min(maxalpha,maxalphas[i]);
00246 }
00247
00248 if((maxalpha>1024*1023)|| (maxgrad<fingrad))
00249 {
00250
00251 finished=true;
00252 finalgamma=curgamma;
00253 }
00254 else
00255 {
00256
00257
00258 float64_t leftalpha=0, rightalpha=maxalpha, midalpha=(leftalpha+rightalpha)/2;
00259
00260 std::vector<float64_t> tmpgamma=curgamma, tmpbeta;
00261 for(int32_t i=1; i< numkernels-1 ;++i)
00262 {
00263 tmpgamma[i]=tmpgamma[i]+rightalpha*curgrad[i];
00264 }
00265 genbetas( tmpbeta ,tmpgamma);
00266 float64_t curobj=objectives(tmpbeta, 0);
00267 for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
00268 {
00269 curobj=std::min(curobj,objectives(tmpbeta, i));
00270 }
00271
00272 int curhalfiter=0;
00273 while((curobj < minval)&&(curhalfiter<maxhalfiter)&&(fabs(curobj/minval-1 ) > maxrelobjdiff ))
00274 {
00275 rightalpha=midalpha;
00276 midalpha=(leftalpha+rightalpha)/2;
00277 ++curhalfiter;
00278
00279 tmpgamma=curgamma;
00280 for(int32_t i=1; i< numkernels-1 ;++i)
00281 {
00282 tmpgamma[i]=tmpgamma[i]+rightalpha*curgrad[i];
00283 }
00284 genbetas( tmpbeta ,tmpgamma);
00285 curobj=objectives(tmpbeta, 0);
00286 for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
00287 {
00288 curobj=std::min(curobj,objectives(tmpbeta, i));
00289 }
00290
00291 }
00292
00293 float64_t robj=curobj;
00294 float64_t tmpobj=std::max(lobj,robj);
00295 do
00296 {
00297
00298 tmpobj=std::max(lobj,robj);
00299
00300
00301
00302 tmpgamma=curgamma;
00303 for(int32_t i=1; i< numkernels-1 ;++i)
00304 {
00305 tmpgamma[i]=tmpgamma[i]+midalpha*curgrad[i];
00306 }
00307 genbetas( tmpbeta ,tmpgamma);
00308 curobj=objectives(tmpbeta, 0);
00309 for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
00310 {
00311 curobj=std::min(curobj,objectives(tmpbeta, i));
00312 }
00313
00314 if(lobj>robj)
00315 {
00316 rightalpha=midalpha;
00317 robj=curobj;
00318 }
00319 else
00320 {
00321 leftalpha=midalpha;
00322 lobj=curobj;
00323 }
00324 midalpha=(leftalpha+rightalpha)/2;
00325
00326
00327 }
00328 while( fabs(curobj/tmpobj-1 ) > maxrelobjdiff );
00329 finalgamma=tmpgamma;
00330 curgamma=tmpgamma;
00331 }
00332
00333 if(longiters>= totaliters)
00334 {
00335 finished=true;
00336 }
00337 }
00338 genbetas(finalbeta,finalgamma);
00339 float64_t nor=0;
00340 for(int32_t i=0; i< numkernels ;++i)
00341 {
00342 nor+=pow(finalbeta[i],pnorm);
00343 }
00344 if(nor>0)
00345 {
00346 nor=pow(nor,1.0/pnorm);
00347 for(int32_t i=0; i< numkernels ;++i)
00348 {
00349 finalbeta[i]/=nor;
00350 }
00351 }
00352 }
00353
00354
00355 void MKLMulticlassGradient::computeweights(std::vector<float64_t> & weights2)
00356 {
00357 if(pnorm<1 )
00358 SG_ERROR("MKLMulticlassGradient::computeweights(std::vector<float64_t> & weights2) : parameter pnorm<1");
00359
00360 SG_SDEBUG("MKLMulticlassGradient::computeweights(...): pnorm %f\n",pnorm);
00361
00362 int maxnumlinesrch=15;
00363 float64_t maxdiff=1e-6;
00364
00365 bool finished =false;
00366 int numiter=0;
00367 do
00368 {
00369 ++numiter;
00370 std::vector<float64_t> initw(weights2);
00371 linesearch(weights2,initw);
00372 float64_t norm=0;
00373 if(!initw.empty())
00374 {
00375 for(size_t i=0;i<weights2.size();++i)
00376 {
00377 norm+=(weights2[i]-initw[i])*(weights2[i]-initw[i]);
00378 }
00379 norm=sqrt(norm);
00380 }
00381 else
00382 {
00383 norm=maxdiff+1;
00384 }
00385
00386 if((norm < maxdiff) ||(numiter>=maxnumlinesrch ))
00387 {
00388 finished=true;
00389 }
00390 }
00391 while(false==finished);
00392
00393
00394
00395
00396 }