SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MKLMulticlassGradient.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2009 Alexander Binder
8  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
12 
13 using namespace shogun;
14 
16 {
17  numkernels = 0;
18  pnorm=2;
19 
20 }
22 {
23 
24 }
25 
27 {
29  pnorm=gl.pnorm;
30  return (*this);
31 
32 }
34 {
36  pnorm=gl.pnorm;
37 
38 }
39 
40 void MKLMulticlassGradient::setup(const int32_t numkernels2)
41 {
42  numkernels=numkernels2;
43  if (numkernels<=1)
44  {
45  SG_ERROR("void glpkwrapper::setup(const int32_tnumkernels): input "
46  "numkernels out of bounds: %d\n",numkernels);
47  }
48 
49 
50 }
51 
53 {
54  pnorm=norm;
55  if(pnorm<1 )
56  SG_ERROR("MKLMulticlassGradient::set_mkl_norm(float64_t norm) : parameter pnorm<1");
57 }
58 
59 
60 void MKLMulticlassGradient::addconstraint(const ::std::vector<float64_t> & normw2,
61  const float64_t sumofpositivealphas)
62 {
63  normsofsubkernels.push_back(normw2);
64  sumsofalphas.push_back(sumofpositivealphas);
65 }
66 
67 void MKLMulticlassGradient::genbetas( ::std::vector<float64_t> & weights ,const ::std::vector<float64_t> & gammas)
68 {
69 
70  assert((int32_t)gammas.size()+1==numkernels);
71 
72  double pi4=3.151265358979238/4;
73 
74  weights.resize(numkernels);
75 
76 
77  // numkernels-dimensional polar transform
78  weights[0]=1;
79 
80  for(int32_t i=0; i< numkernels-1 ;++i)
81  {
82  for(int32_t k=0; k< i+1 ;++k)
83  {
84  weights[k]*=cos( std::min(std::max(0.0,gammas[i]),pi4) );
85  }
86  weights[i+1]=sin( std::min(std::max(0.0,gammas[i]),pi4) );
87  }
88 
89  // pnorm- manifold adjustment
90  if(pnorm!=2.0)
91  {
92  for(int32_t i=0; i< numkernels ;++i)
93  weights[i]=pow(weights[i],2.0/pnorm);
94  }
95 
96 }
97 
98 void MKLMulticlassGradient::gengammagradient( ::std::vector<float64_t> & gammagradient ,const ::std::vector<float64_t> & gammas,const int32_t dim)
99 {
100 
101  assert((int32_t)gammas.size()+1==numkernels);
102 
103  double pi4=3.151265358979238/2;
104 
105  gammagradient.resize(numkernels);
106  std::fill(gammagradient.begin(),gammagradient.end(),0.0);
107 
108  // numkernels-dimensional polar transform
109  gammagradient[0]=1;
110 
111  for(int32_t i=0; i< numkernels-1 ;++i)
112  {
113  if(i!=dim)
114  {
115  for(int32_t k=0; k< std::min(i+1,dim+2) ;++k)
116  {
117  gammagradient[k]*=pow( cos( std::min(std::max(0.0,gammas[i]),pi4) ), 2.0/pnorm) ;
118  }
119  if(i<dim)
120  gammagradient[i+1]=pow( sin( std::min(std::max(0.0,gammas[i]),pi4) ),2.0/pnorm);
121  }
122  else if(i==dim)
123  {
124  // i==dim, higher dims are 0
125  for(int32_t k=0; k< i+1 ;++k)
126  {
127  gammagradient[k]*= pow( cos( std::min(std::max(0.0,gammas[i]),pi4) ), 2.0/pnorm-1.0)*(-1)*sin( std::min(std::max(0.0,gammas[i]),pi4) );
128  }
129  gammagradient[i+1]=pow( sin( std::min(std::max(0.0,gammas[i]),pi4) ),2.0/pnorm-1)*cos( std::min(std::max(0.0,gammas[i]),pi4) );
130  }
131  }
132 
133 
134 }
135 
136 
137 
138 float64_t MKLMulticlassGradient::objectives(const ::std::vector<float64_t> & weights, const int32_t index)
139 {
140  assert(index>=0);
141  assert(index < (int32_t) sumsofalphas.size());
142  assert(index < (int32_t) normsofsubkernels.size());
143 
144 
145  float64_t obj= -sumsofalphas[index];
146  for(int32_t i=0; i< numkernels ;++i)
147  {
148  obj+=0.5*normsofsubkernels[index][i]*weights[i];
149  }
150  return(obj);
151 }
152 
153 
154 void MKLMulticlassGradient::linesearch(std::vector<float64_t> & finalbeta,const std::vector<float64_t> & oldweights)
155 {
156 
157  float64_t pi4=3.151265358979238/2;
158 
159  float64_t fingrad=1e-7;
160  int32_t maxhalfiter=20;
161  int32_t totaliters=6;
162  float64_t maxrelobjdiff=1e-6;
163 
164 
165 
166  std::vector<float64_t> finalgamma,curgamma;
167 
168  curgamma.resize(numkernels-1);
169  if(oldweights.empty())
170  {
171  std::fill(curgamma.begin(),curgamma.end(),pi4/2);
172  }
173  else
174  {
175  // curgamma from init: arcsin on highest dim ^p/2 !!! and divided everthing by its cos
176  std::vector<float64_t> tmpbeta(numkernels);
177  for(int32_t i=numkernels-1; i>= 0 ;--i)
178  {
179  tmpbeta[i]=pow(oldweights[i],pnorm/2);
180  }
181 
182  for(int32_t i=numkernels-1; i>= 1 ;--i)
183  {
184  curgamma[i-1]=asin(tmpbeta[i]);
185  for(int32_t k=numkernels-2; k>= 1 ;--k) // k==0 not necessary
186  {
187  if(cos(curgamma[i-1])>0)
188  tmpbeta[k]/=cos(curgamma[i-1]);
189  }
190  }
191 
192  }
193  bool finished=false;
194  int32_t longiters=0;
195  while(!finished)
196  {
197  ++longiters;
198  std::vector<float64_t> curbeta;
199  genbetas( curbeta ,curgamma);
200  //find smallest objective
201  int32_t minind=0;
202  float64_t minval=objectives(curbeta, minind);
203  for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
204  {
205  float64_t tmpval=objectives(curbeta, i);
206  if(tmpval<minval)
207  {
208  minval=tmpval;
209  minind=i;
210  }
211  }
212  float64_t lobj=minval;
213  //compute gradient for smallest objective
214  std::vector<float64_t> curgrad;
215  for(int32_t i=0; i< numkernels-1 ;++i)
216  {
217  ::std::vector<float64_t> gammagradient;
218  gengammagradient( gammagradient ,curgamma,i);
219 
220  curgrad.push_back(objectives(gammagradient, minind));
221  }
222  //find boundary hit point (check for each dim) to [0, pi/4]
223  std::vector<float64_t> maxalphas(numkernels-1,0);
224  float64_t maxgrad=0;
225  for(int32_t i=0; i< numkernels-1 ;++i)
226  {
227  maxgrad=std::max(maxgrad,fabs(curgrad[i]) );
228  if(curgrad[i]<0)
229  {
230  maxalphas[i]=(0-curgamma[i])/curgrad[i];
231  }
232  else if(curgrad[i]>0)
233  {
234  maxalphas[i]=(pi4-curgamma[i])/curgrad[i];
235  }
236  else
237  {
238  maxalphas[i]=1024*1024;
239  }
240  }
241 
242  float64_t maxalpha=maxalphas[0];
243  for(int32_t i=1; i< numkernels-1 ;++i)
244  {
245  maxalpha=std::min(maxalpha,maxalphas[i]);
246  }
247 
248  if((maxalpha>1024*1023)|| (maxgrad<fingrad))
249  {
250  //curgrad[i] approx 0 for all i terminate
251  finished=true;
252  finalgamma=curgamma;
253  }
254  else // of if(maxalpha>1024*1023)
255  {
256  //linesearch: shrink until min of all objective increases compared to starting point, then check left half and right halve until finish
257  // curgamma + al*curgrad ,aximizes al in [0, maxal]
258  float64_t leftalpha=0, rightalpha=maxalpha, midalpha=(leftalpha+rightalpha)/2;
259 
260  std::vector<float64_t> tmpgamma=curgamma, tmpbeta;
261  for(int32_t i=1; i< numkernels-1 ;++i)
262  {
263  tmpgamma[i]=tmpgamma[i]+rightalpha*curgrad[i];
264  }
265  genbetas( tmpbeta ,tmpgamma);
266  float64_t curobj=objectives(tmpbeta, 0);
267  for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
268  {
269  curobj=std::min(curobj,objectives(tmpbeta, i));
270  }
271 
272  int curhalfiter=0;
273  while((curobj < minval)&&(curhalfiter<maxhalfiter)&&(fabs(curobj/minval-1 ) > maxrelobjdiff ))
274  {
275  rightalpha=midalpha;
276  midalpha=(leftalpha+rightalpha)/2;
277  ++curhalfiter;
278 
279  tmpgamma=curgamma;
280  for(int32_t i=1; i< numkernels-1 ;++i)
281  {
282  tmpgamma[i]=tmpgamma[i]+rightalpha*curgrad[i];
283  }
284  genbetas( tmpbeta ,tmpgamma);
285  curobj=objectives(tmpbeta, 0);
286  for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
287  {
288  curobj=std::min(curobj,objectives(tmpbeta, i));
289  }
290 
291  }
292 
293  float64_t robj=curobj;
294  float64_t tmpobj=std::max(lobj,robj);
295  do
296  {
297 
298  tmpobj=std::max(lobj,robj);
299 
300 
301 
302  tmpgamma=curgamma;
303  for(int32_t i=1; i< numkernels-1 ;++i)
304  {
305  tmpgamma[i]=tmpgamma[i]+midalpha*curgrad[i];
306  }
307  genbetas( tmpbeta ,tmpgamma);
308  curobj=objectives(tmpbeta, 0);
309  for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
310  {
311  curobj=std::min(curobj,objectives(tmpbeta, i));
312  }
313 
314  if(lobj>robj)
315  {
316  rightalpha=midalpha;
317  robj=curobj;
318  }
319  else
320  {
321  leftalpha=midalpha;
322  lobj=curobj;
323  }
324  midalpha=(leftalpha+rightalpha)/2;
325 
326 
327  }
328  while( fabs(curobj/tmpobj-1 ) > maxrelobjdiff );
329  finalgamma=tmpgamma;
330  curgamma=tmpgamma;
331  } // else // of if(maxalpha>1024*1023)
332 
333  if(longiters>= totaliters)
334  {
335  finished=true;
336  }
337  }
338  genbetas(finalbeta,finalgamma);
339  float64_t nor=0;
340  for(int32_t i=0; i< numkernels ;++i)
341  {
342  nor+=pow(finalbeta[i],pnorm);
343  }
344  if(nor>0)
345  {
346  nor=pow(nor,1.0/pnorm);
347  for(int32_t i=0; i< numkernels ;++i)
348  {
349  finalbeta[i]/=nor;
350  }
351  }
352 }
353 
354 
355 void MKLMulticlassGradient::computeweights(std::vector<float64_t> & weights2)
356 {
357  if(pnorm<1 )
358  SG_ERROR("MKLMulticlassGradient::computeweights(std::vector<float64_t> & weights2) : parameter pnorm<1");
359 
360  SG_SDEBUG("MKLMulticlassGradient::computeweights(...): pnorm %f\n",pnorm);
361 
362  int maxnumlinesrch=15;
363  float64_t maxdiff=1e-6;
364 
365  bool finished =false;
366  int numiter=0;
367  do
368  {
369  ++numiter;
370  std::vector<float64_t> initw(weights2);
371  linesearch(weights2,initw);
372  float64_t norm=0;
373  if(!initw.empty())
374  {
375  for(size_t i=0;i<weights2.size();++i)
376  {
377  norm+=(weights2[i]-initw[i])*(weights2[i]-initw[i]);
378  }
379  norm=sqrt(norm);
380  }
381  else
382  {
383  norm=maxdiff+1;
384  }
385 
386  if((norm < maxdiff) ||(numiter>=maxnumlinesrch ))
387  {
388  finished=true;
389  }
390  }
391  while(false==finished);
392 
393 
394 
395 
396 }

SHOGUN Machine Learning Toolbox - Documentation