SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MKLMulticlassGradient.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2009 Alexander Binder
8  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  *
10  * Update to patch 0.10.0 - thanks to Eric aka Yoo (thereisnoknife@gmail.com)
11  *
12  */
13 
16 
17 using namespace shogun;
18 
20 {
21  numkernels = 0;
22  pnorm=2;
23 
24 }
26 {
27 
28 }
29 
31 {
33  pnorm=gl.pnorm;
34  return (*this);
35 
36 }
38 {
40  pnorm=gl.pnorm;
41 
42 }
43 
44 void MKLMulticlassGradient::setup(const int32_t numkernels2)
45 {
46  numkernels=numkernels2;
47  if (numkernels<=1)
48  {
49  SG_ERROR("void glpkwrapper::setup(const int32_tnumkernels): input "
50  "numkernels out of bounds: %d\n",numkernels);
51  }
52 
53 }
54 
56 {
57  pnorm=norm;
58  if(pnorm<1 )
59  SG_ERROR("MKLMulticlassGradient::set_mkl_norm(float64_t norm) : parameter pnorm<1")
60 }
61 
62 
63 void MKLMulticlassGradient::addconstraint(const ::std::vector<float64_t> & normw2,
64  const float64_t sumofpositivealphas)
65 {
66  normsofsubkernels.push_back(normw2);
67  sumsofalphas.push_back(sumofpositivealphas);
68 }
69 
70 void MKLMulticlassGradient::genbetas( ::std::vector<float64_t> & weights ,const ::std::vector<float64_t> & gammas)
71 {
72 
73  assert((int32_t)gammas.size()+1==numkernels);
74 
75  double pi4=3.151265358979238/2;
76 
77  weights.resize(numkernels);
78 
79 
80  // numkernels-dimensional polar transform
81  weights[0]=1;
82 
83  for(int32_t i=0; i< numkernels-1 ;++i)
84  {
85  for(int32_t k=0; k< i+1 ;++k)
86  {
87  weights[k]*=cos( std::min(std::max(0.0,gammas[i]),pi4) );
88  }
89  weights[i+1]=sin( std::min(std::max(0.0,gammas[i]),pi4) );
90  }
91 
92  // pnorm- manifold adjustment
93  if(pnorm!=2.0)
94  {
95  for(int32_t i=0; i< numkernels ;++i)
96  weights[i]=pow(weights[i],2.0/pnorm);
97  }
98 }
99 
100 void MKLMulticlassGradient::gengammagradient( ::std::vector<float64_t> & gammagradient ,const ::std::vector<float64_t> & gammas,const int32_t dim)
101 {
102 
103  assert((int32_t)gammas.size()+1==numkernels);
104 
105  double pi4=3.151265358979238/2;
106 
107  gammagradient.resize(numkernels);
108  std::fill(gammagradient.begin(),gammagradient.end(),0.0);
109 
110  // numkernels-dimensional polar transform
111  gammagradient[0]=1;
112 
113  for(int32_t i=0; i< numkernels-1 ;++i)
114  {
115  if(i!=dim)
116  {
117  for(int32_t k=0; k< std::min(i+1,dim+2) ;++k)
118  {
119  gammagradient[k]*=pow( cos( std::min(std::max(0.0,gammas[i]),pi4) ), 2.0/pnorm) ;
120  }
121  if(i<dim)
122  gammagradient[i+1]=pow( sin( std::min(std::max(0.0,gammas[i]),pi4) ),2.0/pnorm);
123  }
124  else if(i==dim)
125  {
126  // i==dim, higher dims are 0
127  for(int32_t k=0; k< i+1 ;++k)
128  {
129  gammagradient[k]*= pow( cos( std::min(std::max(0.0,gammas[i]),pi4) ), 2.0/pnorm-1.0)*(-1)*sin( std::min(std::max(0.0,gammas[i]),pi4) );
130  }
131  gammagradient[i+1]=pow( sin( std::min(std::max(0.0,gammas[i]),pi4) ),2.0/pnorm-1)*cos( std::min(std::max(0.0,gammas[i]),pi4) );
132  }
133  }
134 }
135 
136 float64_t MKLMulticlassGradient::objectives(const ::std::vector<float64_t> & weights, const int32_t index)
137 {
138  assert(index>=0);
139  assert(index < (int32_t) sumsofalphas.size());
140  assert(index < (int32_t) normsofsubkernels.size());
141 
142 
143  float64_t obj= -sumsofalphas[index];
144  for(int32_t i=0; i< numkernels ;++i)
145  {
146  obj+=0.5*normsofsubkernels[index][i]*weights[i];
147  }
148  return(obj);
149 }
150 
151 
152 void MKLMulticlassGradient::linesearch(std::vector<float64_t> & finalbeta,const std::vector<float64_t> & oldweights)
153 {
154 
155  float64_t pi4=3.151265358979238/2;
156 
157  float64_t fingrad=1e-7;
158  int32_t maxhalfiter=20;
159  int32_t totaliters=6;
160  float64_t maxrelobjdiff=1e-6;
161 
162  std::vector<float64_t> finalgamma,curgamma;
163 
164  curgamma.resize(numkernels-1);
165  if(oldweights.empty())
166  {
167  std::fill(curgamma.begin(),curgamma.end(),pi4/2);
168  }
169  else
170  {
171  // curgamma from init: arcsin on highest dim ^p/2 !!! and divided everthing by its cos
172  std::vector<float64_t> tmpbeta(numkernels);
173  for(int32_t i=numkernels-1; i>= 0 ;--i)
174  {
175  tmpbeta[i]=pow(oldweights[i],pnorm/2);
176  }
177 
178  for(int32_t i=numkernels-1; i>= 1 ;--i)
179  {
180  curgamma[i-1]=asin(tmpbeta[i]);
181 
182  if(i<numkernels-1)
183  {
184  if( cos(curgamma[i])<=0)
185  {
186  SG_SINFO("linesearch(...): at i %d cos(curgamma[i-1])<=0 %f\n",i, cos(curgamma[i-1]))
187  //curgamma[i-1]=pi4/2;
188  }
189  }
190 
191  for(int32_t k=numkernels-2; k>= 1 ;--k) // k==0 not necessary
192  {
193  if(cos(curgamma[i-1])>0)
194  {
195  tmpbeta[k]/=cos(curgamma[i-1]);
196  if(tmpbeta[k]>1)
197  {
198  SG_SINFO("linesearch(...): at k %d tmpbeta[k]>1 %f\n",k, tmpbeta[k])
199  }
200  tmpbeta[k]=std::min(1.0,std::max(0.0, tmpbeta[k]));
201  }
202  }
203  }
204  }
205 
206  for(size_t i=0;i<curgamma.size();++i)
207  {
208  SG_SINFO("linesearch(...): curgamma[i] %f\n",curgamma[i])
209  }
210 
211 
212  bool finished=false;
213  int32_t longiters=0;
214  while(!finished)
215  {
216  ++longiters;
217  std::vector<float64_t> curbeta;
218  genbetas( curbeta ,curgamma);
219  //find smallest objective
220  int32_t minind=0;
221  float64_t minval=objectives(curbeta, minind);
222  SG_SINFO("linesearch(...): objectives at i %f\n",minval)
223  for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
224  {
225  float64_t tmpval=objectives(curbeta, i);
226  SG_SINFO("linesearch(...): objectives at i %f\n",tmpval)
227  if(tmpval<minval)
228  {
229  minval=tmpval;
230  minind=i;
231  }
232  }
233  float64_t lobj=minval;
234  //compute gradient for smallest objective
235  std::vector<float64_t> curgrad;
236  for(int32_t i=0; i< numkernels-1 ;++i)
237  {
238  ::std::vector<float64_t> gammagradient;
239  gengammagradient( gammagradient ,curgamma,i);
240  curgrad.push_back(objectives(gammagradient, minind));
241  }
242  //find boundary hit point (check for each dim) to [0, pi/4]
243  std::vector<float64_t> maxalphas(numkernels-1,0);
244  float64_t maxgrad=0;
245  for(int32_t i=0; i< numkernels-1 ;++i)
246  {
247  maxgrad=std::max(maxgrad,fabs(curgrad[i]) );
248  if(curgrad[i]<0)
249  {
250  maxalphas[i]=(0-curgamma[i])/curgrad[i];
251  }
252  else if(curgrad[i]>0)
253  {
254  maxalphas[i]=(pi4-curgamma[i])/curgrad[i];
255  }
256  else
257  {
258  maxalphas[i]=1024*1024;
259  }
260  }
261 
262  float64_t maxalpha=maxalphas[0];
263  for(int32_t i=1; i< numkernels-1 ;++i)
264  {
265  maxalpha=std::min(maxalpha,maxalphas[i]);
266  }
267 
268  if((maxalpha>1024*1023)|| (maxgrad<fingrad))
269  {
270  //curgrad[i] approx 0 for all i terminate
271  finished=true;
272  finalgamma=curgamma;
273  }
274  else // of if(maxalpha>1024*1023)
275  {
276  //linesearch: shrink until min of all objective increases compared to starting point, then check left half and right halve until finish
277  // curgamma + al*curgrad ,aximizes al in [0, maxal]
278  float64_t leftalpha=0, rightalpha=maxalpha, midalpha=(leftalpha+rightalpha)/2;
279 
280  std::vector<float64_t> tmpgamma=curgamma, tmpbeta;
281  for(int32_t i=1; i< numkernels-1 ;++i)
282  {
283  tmpgamma[i]=tmpgamma[i]+rightalpha*curgrad[i];
284  }
285  genbetas( tmpbeta ,tmpgamma);
286  float64_t curobj=objectives(tmpbeta, 0);
287  for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
288  {
289  curobj=std::min(curobj,objectives(tmpbeta, i));
290  }
291 
292  int curhalfiter=0;
293  while((curobj < minval)&&(curhalfiter<maxhalfiter)&&(fabs(curobj/minval-1 ) > maxrelobjdiff ))
294  {
295  rightalpha=midalpha;
296  midalpha=(leftalpha+rightalpha)/2;
297  ++curhalfiter;
298  tmpgamma=curgamma;
299  for(int32_t i=1; i< numkernels-1 ;++i)
300  {
301  tmpgamma[i]=tmpgamma[i]+rightalpha*curgrad[i];
302  }
303  genbetas( tmpbeta ,tmpgamma);
304  curobj=objectives(tmpbeta, 0);
305  for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
306  {
307  curobj=std::min(curobj,objectives(tmpbeta, i));
308  }
309  }
310 
311  float64_t robj=curobj;
312  float64_t tmpobj=std::max(lobj,robj);
313  do
314  {
315 
316  tmpobj=std::max(lobj,robj);
317 
318  tmpgamma=curgamma;
319  for(int32_t i=1; i< numkernels-1 ;++i)
320  {
321  tmpgamma[i]=tmpgamma[i]+midalpha*curgrad[i];
322  }
323  genbetas( tmpbeta ,tmpgamma);
324  curobj=objectives(tmpbeta, 0);
325  for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
326  {
327  curobj=std::min(curobj,objectives(tmpbeta, i));
328  }
329 
330  if(lobj>robj)
331  {
332  rightalpha=midalpha;
333  robj=curobj;
334  }
335  else
336  {
337  leftalpha=midalpha;
338  lobj=curobj;
339  }
340  midalpha=(leftalpha+rightalpha)/2;
341 
342  }
343  while( fabs(curobj/tmpobj-1 ) > maxrelobjdiff );
344  finalgamma=tmpgamma;
345  curgamma=tmpgamma;
346  } // else // of if(maxalpha>1024*1023)
347 
348  if(longiters>= totaliters)
349  {
350  finished=true;
351  }
352  }
353  genbetas(finalbeta,finalgamma);
354  float64_t nor=0;
355  for(int32_t i=0; i< numkernels ;++i)
356  {
357  nor+=pow(finalbeta[i],pnorm);
358  }
359  if(nor>0)
360  {
361  nor=pow(nor,1.0/pnorm);
362  for(int32_t i=0; i< numkernels ;++i)
363  {
364  finalbeta[i]/=nor;
365  }
366  }
367 }
368 
369 
370 void MKLMulticlassGradient::linesearch2(std::vector<float64_t> & finalbeta,const std::vector<float64_t> & oldweights)
371 {
372 
373 const float64_t epsRegul = 0.01;
374 
375 int32_t num_kernels=(int)oldweights.size();
376 int32_t nofKernelsGood=num_kernels;
377 
378 finalbeta=oldweights;
379 
380  for( int32_t p=0; p<num_kernels; ++p )
381  {
382  //SG_PRINT("MKL-direct: sumw[%3d] = %e ( oldbeta = %e )\n", p, sumw[p], old_beta[p] )
383  if( oldweights[p] >= 0.0 )
384  {
385  finalbeta[p] = normsofsubkernels.back()[p] * oldweights[p]*oldweights[p] / pnorm;
386  finalbeta[p] = CMath::pow( finalbeta[p], 1.0 / (pnorm+1.0) );
387  }
388  else
389  {
390  finalbeta[p] = 0.0;
391  --nofKernelsGood;
392  }
393  ASSERT( finalbeta[p] >= 0 )
394  }
395 
396  // --- normalize
397  float64_t Z = 0.0;
398  for( int32_t p=0; p<num_kernels; ++p )
399  Z += CMath::pow( finalbeta[p], pnorm );
400 
401  Z = CMath::pow( Z, -1.0/pnorm );
402  ASSERT( Z >= 0 )
403  for( int32_t p=0; p<num_kernels; ++p )
404  finalbeta[p] *= Z;
405 
406  // --- regularize & renormalize
407  float64_t preR = 0.0;
408  for( int32_t p=0; p<num_kernels; ++p )
409  preR += CMath::pow( oldweights[p] - finalbeta[p], 2.0 );
410 
411  const float64_t R = CMath::sqrt( preR / pnorm ) * epsRegul;
412  if( !( R >= 0 ) )
413  {
414  SG_PRINT("MKL-direct: p = %.3f\n", pnorm )
415  SG_PRINT("MKL-direct: nofKernelsGood = %d\n", nofKernelsGood )
416  SG_PRINT("MKL-direct: Z = %e\n", Z )
417  SG_PRINT("MKL-direct: eps = %e\n", epsRegul )
418  for( int32_t p=0; p<num_kernels; ++p )
419  {
420  const float64_t t = CMath::pow( oldweights[p] - finalbeta[p], 2.0 );
421  SG_PRINT("MKL-direct: t[%3d] = %e ( diff = %e = %e - %e )\n", p, t, oldweights[p]-finalbeta[p], oldweights[p], finalbeta[p] )
422  }
423  SG_PRINT("MKL-direct: preR = %e\n", preR )
424  SG_PRINT("MKL-direct: preR/p = %e\n", preR/pnorm )
425  SG_PRINT("MKL-direct: sqrt(preR/p) = %e\n", CMath::sqrt(preR/pnorm) )
426  SG_PRINT("MKL-direct: R = %e\n", R )
427  SG_ERROR("Assertion R >= 0 failed!\n" )
428  }
429 
430  Z = 0.0;
431  for( int32_t p=0; p<num_kernels; ++p )
432  {
433  finalbeta[p] += R;
434  Z += CMath::pow( finalbeta[p], pnorm );
435  ASSERT( finalbeta[p] >= 0 )
436  }
437  Z = CMath::pow( Z, -1.0/pnorm );
438  ASSERT( Z >= 0 )
439  for( int32_t p=0; p<num_kernels; ++p )
440  {
441  finalbeta[p] *= Z;
442  ASSERT( finalbeta[p] >= 0.0 )
443  if( finalbeta[p] > 1.0 )
444  finalbeta[p] = 1.0;
445  }
446 }
447 
448 void MKLMulticlassGradient::computeweights(std::vector<float64_t> & weights2)
449 {
450  if(pnorm<1 )
451  SG_ERROR("MKLMulticlassGradient::computeweights(std::vector<float64_t> & weights2) : parameter pnorm<1")
452 
453  SG_SDEBUG("MKLMulticlassGradient::computeweights(...): pnorm %f\n",pnorm)
454 
455  std::vector<float64_t> initw(weights2);
456  linesearch2(weights2,initw);
457 
458  SG_SINFO("MKLMulticlassGradient::computeweights(...): newweights \n")
459  for(size_t i=0;i<weights2.size();++i)
460  {
461  SG_SINFO(" %f",weights2[i])
462  }
463  SG_SINFO(" \n")
464 
465  /*
466  int maxnumlinesrch=15;
467  float64_t maxdiff=1e-6;
468 
469  bool finished =false;
470  int numiter=0;
471  do
472  {
473  ++numiter;
474  std::vector<float64_t> initw(weights2);
475  linesearch(weights2,initw);
476  float64_t norm=0;
477  if(!initw.empty())
478  {
479  for(size_t i=0;i<weights2.size();++i)
480  {
481  norm+=(weights2[i]-initw[i])*(weights2[i]-initw[i]);
482  }
483  norm=sqrt(norm);
484  }
485  else
486  {
487  norm=maxdiff+1;
488  }
489 
490  if((norm < maxdiff) ||(numiter>=maxnumlinesrch ))
491  {
492  finished=true;
493  }
494  // for(size_t i=0;i<weights2.size();++i)
495  // {
496  // SG_SINFO("MKLMulticlassGradient::computeweights(...): oldweights %f\n",initw[i])
497  // }
498  SG_SINFO("MKLMulticlassGradient::computeweights(...): newweights at iter %d normdiff %f\n",numiter,norm)
499  for(size_t i=0;i<weights2.size();++i)
500  {
501  SG_SINFO(" %f",weights2[i])
502  }
503  SG_SINFO(" \n")
504  }
505  while(false==finished);
506  */
507 
508 }

SHOGUN Machine Learning Toolbox - Documentation