SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SubGradientSVM.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2007-2009 Soeren Sonnenburg
8  * Written (W) 2007-2008 Vojtech Franc
9  * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/config.h>
14 #include <shogun/lib/Signal.h>
15 #include <shogun/lib/Time.h>
20 #include <shogun/labels/Labels.h>
22 
23 #undef DEBUG_SUBGRADIENTSVM
24 
25 using namespace shogun;
26 
28 : CLinearMachine(), C1(1), C2(1), epsilon(1e-5), qpsize(42),
29  qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
30 {
31 }
32 
34  float64_t C, CDotFeatures* traindat, CLabels* trainlab)
35 : CLinearMachine(), C1(C), C2(C), epsilon(1e-5), qpsize(42),
36  qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
37 {
38  set_features(traindat);
39  set_labels(trainlab);
40 }
41 
42 
44 {
45 }
46 
47 /*
48 int32_t CSubGradientSVM::find_active(int32_t num_feat, int32_t num_vec, int32_t& num_active, int32_t& num_bound)
49 {
50  int32_t delta_active=0;
51  num_active=0;
52  num_bound=0;
53 
54  for (int32_t i=0; i<num_vec; i++)
55  {
56  active[i]=0;
57 
58  //within margin/wrong side
59  if (proj[i] < 1-work_epsilon)
60  {
61  idx_active[num_active++]=i;
62  active[i]=1;
63  }
64 
65  //on margin
66  if (CMath::abs(proj[i]-1) <= work_epsilon)
67  {
68  idx_bound[num_bound++]=i;
69  active[i]=2;
70  }
71 
72  if (active[i]!=old_active[i])
73  delta_active++;
74  }
75 
76  return delta_active;
77 }
78 */
79 
81  int32_t num_feat, int32_t num_vec, int32_t& num_active, int32_t& num_bound)
82 {
83  delta_bound=0;
84  delta_active=0;
85  num_active=0;
86  num_bound=0;
87 
88  for (int32_t i=0; i<num_vec; i++)
89  {
90  active[i]=0;
91 
92  //within margin/wrong side
93  if (proj[i] < 1-autoselected_epsilon)
94  {
95  idx_active[num_active++]=i;
96  active[i]=1;
97  }
98 
99  //on margin
100  if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
101  {
102  idx_bound[num_bound++]=i;
103  active[i]=2;
104  }
105 
106  if (active[i]!=old_active[i])
107  delta_active++;
108 
109  if (active[i]==2 && old_active[i]==2)
110  delta_bound++;
111  }
112 
113 
114  if (delta_active==0 && work_epsilon<=epsilon) //we converged
115  return 0;
116  else if (delta_active==0) //lets decrease work_epsilon
117  {
120  num_bound=qpsize;
121  }
122 
123  delta_bound=0;
124  delta_active=0;
125  num_active=0;
126  num_bound=0;
127 
128  for (int32_t i=0; i<num_vec; i++)
129  {
130  tmp_proj[i]=CMath::abs(proj[i]-1);
131  tmp_proj_idx[i]=i;
132  }
133 
135 
137 
138 #ifdef DEBUG_SUBGRADIENTSVM
139  //SG_PRINT("autoseleps: %15.15f\n", autoselected_epsilon);
140 #endif
141 
144 
146  {
148 
149  int32_t i=0;
150  while (i < num_vec && tmp_proj[i] <= autoselected_epsilon)
151  i++;
152 
153  //SG_PRINT("lower bound on epsilon requires %d variables in qp\n", i);
154 
155  if (i>=qpsize_max && autoselected_epsilon>epsilon) //qpsize limit
156  {
157  SG_INFO("qpsize limit (%d) reached\n", qpsize_max);
158  int32_t num_in_qp=i;
159  while (--i>=0 && num_in_qp>=qpsize_max)
160  {
162  {
164  num_in_qp--;
165  }
166  }
167 
168  //SG_PRINT("new qpsize will be %d, autoeps:%15.15f\n", num_in_qp, autoselected_epsilon);
169  }
170  }
171 
172  for (int32_t i=0; i<num_vec; i++)
173  {
174  active[i]=0;
175 
176  //within margin/wrong side
177  if (proj[i] < 1-autoselected_epsilon)
178  {
179  idx_active[num_active++]=i;
180  active[i]=1;
181  }
182 
183  //on margin
184  if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
185  {
186  idx_bound[num_bound++]=i;
187  active[i]=2;
188  }
189 
190  if (active[i]!=old_active[i])
191  delta_active++;
192 
193  if (active[i]==2 && old_active[i]==2)
194  delta_bound++;
195  }
196 
197  //SG_PRINT("delta_bound: %d of %d (%02.2f)\n", delta_bound, num_bound, 100.0*delta_bound/num_bound);
198  return delta_active;
199 }
200 
201 
202 void CSubGradientSVM::update_active(int32_t num_feat, int32_t num_vec)
203 {
204  for (int32_t i=0; i<num_vec; i++)
205  {
206  if (active[i]==1 && old_active[i]!=1)
207  {
208  features->add_to_dense_vec(C1*((CBinaryLabels*) m_labels)->get_label(i), i, sum_CXy_active, num_feat);
209  if (use_bias)
210  sum_Cy_active+=C1*((CBinaryLabels*) m_labels)->get_label(i);
211  }
212  else if (old_active[i]==1 && active[i]!=1)
213  {
214  features->add_to_dense_vec(-C1*((CBinaryLabels*) m_labels)->get_label(i), i, sum_CXy_active, num_feat);
215  if (use_bias)
216  sum_Cy_active-=C1*((CBinaryLabels*) m_labels)->get_label(i);
217  }
218  }
219 
221 }
222 
223 float64_t CSubGradientSVM::line_search(int32_t num_feat, int32_t num_vec)
224 {
225  float64_t sum_B = 0;
226  float64_t A_zero = 0.5*SGVector<float64_t>::dot(grad_w, grad_w, num_feat);
227  float64_t B_zero = -SGVector<float64_t>::dot(w.vector, grad_w, num_feat);
228 
229  int32_t num_hinge=0;
230 
231  for (int32_t i=0; i<num_vec; i++)
232  {
233  float64_t p=((CBinaryLabels*) m_labels)->get_label(i)*(features->dense_dot(i, grad_w, num_feat)+grad_b);
234  grad_proj[i]=p;
235  if (p!=0)
236  {
237  hinge_point[num_hinge]=(proj[i]-1)/p;
238  hinge_idx[num_hinge]=i;
239  num_hinge++;
240 
241  if (p<0)
242  sum_B+=p;
243  }
244  }
245  sum_B*=C1;
246 
248 
249 
250  float64_t alpha = hinge_point[0];
251  float64_t grad_val = 2*A_zero*alpha + B_zero + sum_B;
252 
253  //CMath::display_vector(grad_w, num_feat, "grad_w");
254  //CMath::display_vector(grad_proj, num_vec, "grad_proj");
255  //CMath::display_vector(hinge_point, num_vec, "hinge_point");
256  //SG_PRINT("A_zero=%f\n", A_zero);
257  //SG_PRINT("B_zero=%f\n", B_zero);
258  //SG_PRINT("sum_B=%f\n", sum_B);
259  //SG_PRINT("alpha=%f\n", alpha);
260  //SG_PRINT("grad_val=%f\n", grad_val);
261 
262  float64_t old_grad_val = grad_val;
263  float64_t old_alpha = alpha;
264 
265  for (int32_t i=1; i < num_hinge && grad_val < 0; i++)
266  {
267  alpha = hinge_point[i];
268  grad_val = 2*A_zero*alpha + B_zero + sum_B;
269 
270  if (grad_val > 0)
271  {
272  ASSERT(old_grad_val-grad_val != 0);
273  float64_t gamma = -grad_val/(old_grad_val-grad_val);
274  alpha = old_alpha*gamma + (1-gamma)*alpha;
275  }
276  else
277  {
278  old_grad_val = grad_val;
279  old_alpha = alpha;
280 
281  sum_B = sum_B + CMath::abs(C1*grad_proj[hinge_idx[i]]);
282  grad_val = 2*A_zero*alpha + B_zero + sum_B;
283  }
284  }
285 
286  return alpha;
287 }
288 
290  int32_t num_feat, int32_t num_vec, int32_t num_active, int32_t num_bound)
291 {
292  float64_t dir_deriv=0;
293 
294  if (num_bound > 0)
295  {
296 
297  CTime t2;
298  SGVector<float64_t>::add(v, 1.0, w.vector, -1.0, sum_CXy_active, num_feat);
299 
300  if (num_bound>=qpsize_max && num_it_noimprovement!=10) // if qp gets to large, lets just choose a random beta
301  {
302  //SG_PRINT("qpsize too large (%d>=%d) choosing random subgradient/beta\n", num_bound, qpsize_max);
303  for (int32_t i=0; i<num_bound; i++)
304  beta[i]=CMath::random(0.0,1.0);
305  }
306  else
307  {
308  memset(beta, 0, sizeof(float64_t)*num_bound);
309 
310  float64_t bias_const=0;
311 
312  if (use_bias)
313  bias_const=1;
314 
315  for (int32_t i=0; i<num_bound; i++)
316  {
317  for (int32_t j=i; j<num_bound; j++)
318  {
319  Z[i*num_bound+j]= 2.0*C1*C1*((CBinaryLabels*) m_labels)->get_label(idx_bound[i])*((CBinaryLabels*) m_labels)->get_label(idx_bound[j])*
320  (features->dot(idx_bound[i], features, idx_bound[j]) + bias_const);
321 
322  Z[j*num_bound+i]=Z[i*num_bound+j];
323 
324  }
325 
326  Zv[i]=-2.0*C1*((CBinaryLabels*) m_labels)->get_label(idx_bound[i])*
327  (features->dense_dot(idx_bound[i], v, num_feat)-sum_Cy_active);
328  }
329 
330  //CMath::display_matrix(Z, num_bound, num_bound, "Z");
331  //CMath::display_vector(Zv, num_bound, "Zv");
332  t2.stop();
333 #ifdef DEBUG_SUBGRADIENTSVM
334  t2.time_diff_sec(true);
335 #endif
336 
337  CTime t;
338  CQPBSVMLib solver(Z,num_bound, Zv,num_bound, 1.0);
339  //solver.set_solver(QPB_SOLVER_GRADDESC);
340  //solver.set_solver(QPB_SOLVER_GS);
341 #ifdef USE_CPLEX
342  solver.set_solver(QPB_SOLVER_CPLEX);
343 #else
344  solver.set_solver(QPB_SOLVER_SCAS);
345 #endif
346 
347  solver.solve_qp(beta, num_bound);
348 
349  t.stop();
350 #ifdef DEBUG_SUBGRADIENTSVM
351  tim+=t.time_diff_sec(true);
352 #else
353  tim+=t.time_diff_sec(false);
354 #endif
355 
356  //CMath::display_vector(beta, num_bound, "beta gs");
357  //solver.set_solver(QPB_SOLVER_CPLEX);
358  //solver.solve_qp(beta, num_bound);
359  //CMath::display_vector(beta, num_bound, "beta cplex");
360 
361  //CMath::display_vector(grad_w, num_feat, "grad_w");
362  //SG_PRINT("grad_b:%f\n", grad_b);
363  }
364 
365  SGVector<float64_t>::add(grad_w, 1.0, w.vector, -1.0, sum_CXy_active, num_feat);
367 
368  for (int32_t i=0; i<num_bound; i++)
369  {
370  features->add_to_dense_vec(-C1*beta[i]*((CBinaryLabels*) m_labels)->get_label(idx_bound[i]), idx_bound[i], grad_w, num_feat);
371  if (use_bias)
372  grad_b -= C1 * ((CBinaryLabels*) m_labels)->get_label(idx_bound[i])*beta[i];
373  }
374 
375  dir_deriv = SGVector<float64_t>::dot(grad_w, v, num_feat) - grad_b*sum_Cy_active;
376  for (int32_t i=0; i<num_bound; i++)
377  {
378  float64_t val= C1*((CBinaryLabels*) m_labels)->get_label(idx_bound[i])*(features->dense_dot(idx_bound[i], grad_w, num_feat)+grad_b);
379  dir_deriv += CMath::max(0.0, val);
380  }
381  }
382  else
383  {
384  SGVector<float64_t>::add(grad_w, 1.0, w.vector, -1.0, sum_CXy_active, num_feat);
386 
387  dir_deriv = SGVector<float64_t>::dot(grad_w, grad_w, num_feat)+ grad_b*grad_b;
388  }
389 
390  return dir_deriv;
391 }
392 
393 float64_t CSubGradientSVM::compute_objective(int32_t num_feat, int32_t num_vec)
394 {
395  float64_t result= 0.5 * SGVector<float64_t>::dot(w.vector,w.vector, num_feat);
396 
397  for (int32_t i=0; i<num_vec; i++)
398  {
399  if (proj[i]<1.0)
400  result += C1 * (1.0-proj[i]);
401  }
402 
403  return result;
404 }
405 
406 void CSubGradientSVM::compute_projection(int32_t num_feat, int32_t num_vec)
407 {
408  for (int32_t i=0; i<num_vec; i++)
409  proj[i]=((CBinaryLabels*) m_labels)->get_label(i)*(features->dense_dot(i, w.vector, num_feat)+bias);
410 }
411 
412 void CSubGradientSVM::update_projection(float64_t alpha, int32_t num_vec)
413 {
415 }
416 
417 void CSubGradientSVM::init(int32_t num_vec, int32_t num_feat)
418 {
419  // alloc normal and bias inited with 0
420  w=SGVector<float64_t>(num_feat);
421  w.zero();
422  //CMath::random_vector(w, num_feat, -1.0, 1.0);
423  bias=0;
425  grad_b=0;
426  qpsize_limit=5000;
427 
428  grad_w=SG_MALLOC(float64_t, num_feat);
429  memset(grad_w,0,sizeof(float64_t)*num_feat);
430 
432  memset(sum_CXy_active,0,sizeof(float64_t)*num_feat);
433 
434  v=SG_MALLOC(float64_t, num_feat);
435  memset(v,0,sizeof(float64_t)*num_feat);
436 
437  old_v=SG_MALLOC(float64_t, num_feat);
438  memset(old_v,0,sizeof(float64_t)*num_feat);
439 
440  sum_Cy_active=0;
441 
442  proj= SG_MALLOC(float64_t, num_vec);
443  memset(proj,0,sizeof(float64_t)*num_vec);
444 
445  tmp_proj=SG_MALLOC(float64_t, num_vec);
446  memset(proj,0,sizeof(float64_t)*num_vec);
447 
448  tmp_proj_idx= SG_MALLOC(int32_t, num_vec);
449  memset(tmp_proj_idx,0,sizeof(int32_t)*num_vec);
450 
451  grad_proj= SG_MALLOC(float64_t, num_vec);
452  memset(grad_proj,0,sizeof(float64_t)*num_vec);
453 
454  hinge_point= SG_MALLOC(float64_t, num_vec);
455  memset(hinge_point,0,sizeof(float64_t)*num_vec);
456 
457  hinge_idx= SG_MALLOC(int32_t, num_vec);
458  memset(hinge_idx,0,sizeof(int32_t)*num_vec);
459 
460  active=SG_MALLOC(uint8_t, num_vec);
461  memset(active,0,sizeof(uint8_t)*num_vec);
462 
463  old_active=SG_MALLOC(uint8_t, num_vec);
464  memset(old_active,0,sizeof(uint8_t)*num_vec);
465 
466  idx_bound=SG_MALLOC(int32_t, num_vec);
467  memset(idx_bound,0,sizeof(int32_t)*num_vec);
468 
469  idx_active=SG_MALLOC(int32_t, num_vec);
470  memset(idx_active,0,sizeof(int32_t)*num_vec);
471 
473  memset(Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit);
474 
475  Zv=SG_MALLOC(float64_t, qpsize_limit);
476  memset(Zv,0,sizeof(float64_t)*qpsize_limit);
477 
478  beta=SG_MALLOC(float64_t, qpsize_limit);
479  memset(beta,0,sizeof(float64_t)*qpsize_limit);
480 
481  old_Z=SG_MALLOC(float64_t, qpsize_limit*qpsize_limit);
482  memset(old_Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit);
483 
484  old_Zv=SG_MALLOC(float64_t, qpsize_limit);
485  memset(old_Zv,0,sizeof(float64_t)*qpsize_limit);
486 
487  old_beta=SG_MALLOC(float64_t, qpsize_limit);
488  memset(old_beta,0,sizeof(float64_t)*qpsize_limit);
489 
490 }
491 
493 {
497  SG_FREE(proj);
498  SG_FREE(tmp_proj);
500  SG_FREE(active);
505  SG_FREE(grad_w);
506  SG_FREE(v);
507  SG_FREE(Z);
508  SG_FREE(Zv);
509  SG_FREE(beta);
510  SG_FREE(old_v);
511  SG_FREE(old_Z);
512  SG_FREE(old_Zv);
513  SG_FREE(old_beta);
514 
515  hinge_idx=NULL;
516  proj=NULL;
517  active=NULL;
518  old_active=NULL;
519  idx_bound=NULL;
520  idx_active=NULL;
521  sum_CXy_active=NULL;
522  grad_w=NULL;
523  v=NULL;
524  Z=NULL;
525  Zv=NULL;
526  beta=NULL;
527 }
528 
530 {
531  tim=0;
532  SG_INFO("C=%f epsilon=%f\n", C1, epsilon);
533  ASSERT(m_labels);
534 
535  if (data)
536  {
537  if (!data->has_property(FP_DOT))
538  SG_ERROR("Specified features are not of type CDotFeatures\n");
539  set_features((CDotFeatures*) data);
540  }
541  ASSERT(get_features());
542 
543  int32_t num_iterations=0;
544  int32_t num_train_labels=m_labels->get_num_labels();
545  int32_t num_feat=features->get_dim_feature_space();
546  int32_t num_vec=features->get_num_vectors();
547 
548  ASSERT(num_vec==num_train_labels);
549 
550  init(num_vec, num_feat);
551 
552  int32_t num_active=0;
553  int32_t num_bound=0;
554  float64_t alpha=0;
555  float64_t dir_deriv=0;
556  float64_t obj=0;
557  delta_active=num_vec;
559 
560  work_epsilon=0.99;
562 
563  compute_projection(num_feat, num_vec);
564 
565  CTime time;
566 #ifdef DEBUG_SUBGRADIENTSVM
567  float64_t loop_time=0;
568 #endif
569  while (!(CSignal::cancel_computations()))
570  {
571  CTime t;
572  delta_active=find_active(num_feat, num_vec, num_active, num_bound);
573 
574  update_active(num_feat, num_vec);
575 
576 #ifdef DEBUG_SUBGRADIENTSVM
577  SG_PRINT("==================================================\niteration: %d ", num_iterations);
578  obj=compute_objective(num_feat, num_vec);
579  SG_PRINT("objective:%.10f alpha: %.10f dir_deriv: %f num_bound: %d num_active: %d work_eps: %10.10f eps: %10.10f auto_eps: %10.10f time:%f\n",
580  obj, alpha, dir_deriv, num_bound, num_active, work_epsilon, epsilon, autoselected_epsilon, loop_time);
581 #else
583 #endif
584  //CMath::display_vector(w, w_dim, "w");
585  //SG_PRINT("bias: %f\n", bias);
586  //CMath::display_vector(proj, num_vec, "proj");
587  //CMath::display_vector(idx_active, num_active, "idx_active");
588  //SG_PRINT("num_active: %d\n", num_active);
589  //CMath::display_vector(idx_bound, num_bound, "idx_bound");
590  //SG_PRINT("num_bound: %d\n", num_bound);
591  //CMath::display_vector(sum_CXy_active, num_feat, "sum_CXy_active");
592  //SG_PRINT("sum_Cy_active: %f\n", sum_Cy_active);
593  //CMath::display_vector(grad_w, num_feat, "grad_w");
594  //SG_PRINT("grad_b:%f\n", grad_b);
595 
596  dir_deriv=compute_min_subgradient(num_feat, num_vec, num_active, num_bound);
597 
598  alpha=line_search(num_feat, num_vec);
599 
600  if (num_it_noimprovement==10 || num_bound<qpsize_max)
601  {
602  float64_t norm_grad=SGVector<float64_t>::dot(grad_w, grad_w, num_feat) +
603  grad_b*grad_b;
604 
605 #ifdef DEBUG_SUBGRADIENTSVM
606  SG_PRINT("CHECKING OPTIMALITY CONDITIONS: "
607  "work_epsilon: %10.10f delta_active:%d alpha: %10.10f norm_grad: %10.10f a*norm_grad:%10.16f\n",
608  work_epsilon, delta_active, alpha, norm_grad, CMath::abs(alpha*norm_grad));
609 #else
611 #endif
612 
613  if (work_epsilon<=epsilon && delta_active==0 && CMath::abs(alpha*norm_grad)<1e-6)
614  break;
615  else
617  }
618 
619  if ((dir_deriv<0 || alpha==0) && (work_epsilon<=epsilon && delta_active==0))
620  {
621  if (last_it_noimprovement==num_iterations-1)
622  {
623  SG_PRINT("no improvement...\n");
625  }
626  else
628 
629  last_it_noimprovement=num_iterations;
630  }
631 
633  bias-=alpha*grad_b;
634 
635  update_projection(alpha, num_vec);
636  //compute_projection(num_feat, num_vec);
637  //CMath::display_vector(w.vector, w_dim, "w");
638  //SG_PRINT("bias: %f\n", bias);
639  //CMath::display_vector(proj, num_vec, "proj");
640 
641  t.stop();
642 #ifdef DEBUG_SUBGRADIENTSVM
643  loop_time=t.time_diff_sec();
644 #endif
645  num_iterations++;
646 
648  break;
649  }
650  SG_DONE();
651 
652  SG_INFO("converged after %d iterations\n", num_iterations);
653 
654  obj=compute_objective(num_feat, num_vec);
655  SG_INFO("objective: %f alpha: %f dir_deriv: %f num_bound: %d num_active: %d\n",
656  obj, alpha, dir_deriv, num_bound, num_active);
657 
658 #ifdef DEBUG_SUBGRADIENTSVM
659  CMath::display_vector(w.vector, w.vlen, "w");
660  SG_PRINT("bias: %f\n", bias);
661 #endif
662  SG_INFO("solver time:%f s\n", tim);
663 
664  cleanup();
665 
666  return true;
667 }

SHOGUN Machine Learning Toolbox - Documentation