SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SubGradientLPM.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2007-2009 Soeren Sonnenburg
8  * Written (W) 2007-2008 Vojtech Franc
9  * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/config.h>
13 
14 #ifdef USE_CPLEX
15 
17 #include <shogun/lib/Signal.h>
18 #include <shogun/lib/Time.h>
23 #include <shogun/labels/Labels.h>
24 
25 using namespace shogun;
26 
27 #define DEBUG_SUBGRADIENTLPM
28 
30 : CLinearMachine(), C1(1), C2(1), epsilon(1e-5), qpsize(42),
31  qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
32 {
33 }
34 
36  float64_t C, CDotFeatures* traindat, CLabels* trainlab)
37 : CLinearMachine(), C1(C), C2(C), epsilon(1e-5), qpsize(42),
38  qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
39 {
40  CLinearMachine::features=traindat;
41  CLinearMachine::m_labels=trainlab;
42 }
43 
44 
46 {
47  cleanup();
48 }
49 
51  int32_t num_feat, int32_t num_vec, int32_t& num_active, int32_t& num_bound)
52 {
53  //delta_active=0;
54  //num_active=0;
55  //num_bound=0;
56 
57  //for (int32_t i=0; i<num_vec; i++)
58  //{
59  // active[i]=0;
60 
61  // //within margin/wrong side
62  // if (proj[i] < 1-work_epsilon)
63  // {
64  // idx_active[num_active++]=i;
65  // active[i]=1;
66  // }
67 
68  // //on margin
69  // if (CMath::abs(proj[i]-1) <= work_epsilon)
70  // {
71  // idx_bound[num_bound++]=i;
72  // active[i]=2;
73  // }
74 
75  // if (active[i]!=old_active[i])
76  // delta_active++;
77  //}
78 
79  delta_bound=0;
80  delta_active=0;
81  num_active=0;
82  num_bound=0;
83 
84  for (int32_t i=0; i<num_vec; i++)
85  {
86  active[i]=0;
87 
88  //within margin/wrong side
89  if (proj[i] < 1-autoselected_epsilon)
90  {
91  idx_active[num_active++]=i;
92  active[i]=1;
93  }
94 
95  //on margin
97  {
98  idx_bound[num_bound++]=i;
99  active[i]=2;
100  }
101 
102  if (active[i]!=old_active[i])
103  delta_active++;
104 
105  if (active[i]==2 && old_active[i]==2)
106  delta_bound++;
107  }
108 
109 
110  if (delta_active==0 && work_epsilon<=epsilon) //we converged
111  return 0;
112  else if (delta_active==0) //lets decrease work_epsilon
113  {
116  num_bound=qpsize;
117  }
118 
119  delta_bound=0;
120  delta_active=0;
121  num_active=0;
122  num_bound=0;
123 
124  for (int32_t i=0; i<num_vec; i++)
125  {
126  tmp_proj[i]=CMath::abs(proj[i]-1);
127  tmp_proj_idx[i]=i;
128  }
129 
131 
133 
134 #ifdef DEBUG_SUBGRADIENTSVM
135  //SG_PRINT("autoseleps: %15.15f\n", autoselected_epsilon);
136 #endif
137 
140 
142  {
144 
145  int32_t i=0;
146  while (i < num_vec && tmp_proj[i] <= autoselected_epsilon)
147  i++;
148 
149  //SG_PRINT("lower bound on epsilon requires %d variables in qp\n", i);
150 
151  if (i>=qpsize_max && autoselected_epsilon>epsilon) //qpsize limit
152  {
153  SG_PRINT("qpsize limit (%d) reached\n", qpsize_max);
154  int32_t num_in_qp=i;
155  while (--i>=0 && num_in_qp>=qpsize_max)
156  {
158  {
160  num_in_qp--;
161  }
162  }
163 
164  //SG_PRINT("new qpsize will be %d, autoeps:%15.15f\n", num_in_qp, autoselected_epsilon);
165  }
166  }
167 
168  for (int32_t i=0; i<num_vec; i++)
169  {
170  active[i]=0;
171 
172  //within margin/wrong side
173  if (proj[i] < 1-autoselected_epsilon)
174  {
175  idx_active[num_active++]=i;
176  active[i]=1;
177  }
178 
179  //on margin
180  if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
181  {
182  idx_bound[num_bound++]=i;
183  active[i]=2;
184  }
185 
186  if (active[i]!=old_active[i])
187  delta_active++;
188 
189  if (active[i]==2 && old_active[i]==2)
190  delta_bound++;
191  }
192 
193  pos_idx=0;
194  neg_idx=0;
195  zero_idx=0;
196 
197  for (int32_t i=0; i<num_feat; i++)
198  {
199  if (w[i]>work_epsilon)
200  {
201  w_pos[pos_idx++]=i;
202  grad_w[i]=1;
203  }
204  else if (w[i]<-work_epsilon)
205  {
206  w_neg[neg_idx++]=i;
207  grad_w[i]=-1;
208  }
209 
210  if (CMath::abs(w[i])<=work_epsilon)
211  {
212  w_zero[zero_idx++]=i;
213  grad_w[i]=-1;
214  }
215  }
216 
217  return delta_active;
218 }
219 
220 
221 void CSubGradientLPM::update_active(int32_t num_feat, int32_t num_vec)
222 {
223  for (int32_t i=0; i<num_vec; i++)
224  {
225  int32_t lab = ((CBinaryLabels*) m_labels)->get_int_label(i);
226  if (active[i]==1 && old_active[i]!=1)
227  {
228  features->add_to_dense_vec(C1*lab, i, sum_CXy_active, num_feat);
229  if (use_bias)
230  sum_Cy_active+=C1*lab;
231  }
232  else if (old_active[i]==1 && active[i]!=1)
233  {
234  features->add_to_dense_vec(-C1*lab, i, sum_CXy_active, num_feat);
235  if (use_bias)
236  sum_Cy_active-=C1*lab;
237  }
238  }
239 
241 }
242 
243 float64_t CSubGradientLPM::line_search(int32_t num_feat, int32_t num_vec)
244 {
245  int32_t num_hinge=0;
246  float64_t alpha=0;
247  float64_t sgrad=0;
248 
249  float64_t* A=SG_MALLOC(float64_t, num_feat+num_vec);
250  float64_t* B=SG_MALLOC(float64_t, num_feat+num_vec);
251  float64_t* C=SG_MALLOC(float64_t, num_feat+num_vec);
252  float64_t* D=SG_MALLOC(float64_t, num_feat+num_vec);
253 
254  for (int32_t i=0; i<num_feat+num_vec; i++)
255  {
256  if (i<num_feat)
257  {
258  A[i]=-grad_w[i];
259  B[i]=w[i];
260  C[i]=+grad_w[i];
261  D[i]=-w[i];
262  }
263  else
264  {
265  float64_t p=get_label(i-num_feat)*(features->dense_dot(i-num_feat, grad_w, num_feat)+grad_b);
266  grad_proj[i-num_feat]=p;
267 
268  A[i]=0;
269  B[i]=0;
270  C[i]=C1*p;
271  D[i]=C1*(1-proj[i-num_feat]);
272  }
273 
274  if (A[i]==C[i] && B[i]>D[i])
275  sgrad+=A[i]+C[i];
276  else if (A[i]==C[i] && B[i]==D[i])
277  sgrad+=CMath::max(A[i],C[i]);
278  else if (A[i]!=C[i])
279  {
280  hinge_point[num_hinge]=(D[i]-B[i])/(A[i]-C[i]);
281  hinge_idx[num_hinge]=i; // index into A,B,C,D arrays
282  num_hinge++;
283 
284  if (A[i]>C[i])
285  sgrad+=C[i];
286  if (A[i]<C[i])
287  sgrad+=A[i];
288  }
289  }
290 
291  //SG_PRINT("sgrad:%f\n", sgrad);
292  //CMath::display_vector(A, num_feat+num_vec, "A");
293  //CMath::display_vector(B, num_feat+num_vec, "B");
294  //CMath::display_vector(C, num_feat+num_vec, "C");
295  //CMath::display_vector(D, num_feat+num_vec, "D");
296  //CMath::display_vector(hinge_point, num_feat+num_vec, "hinge_point");
297  //CMath::display_vector(hinge_idx, num_feat+num_vec, "hinge_idx");
298  //ASSERT(0);
299 
301  //CMath::display_vector(hinge_point, num_feat+num_vec, "hinge_point_sorted");
302 
303 
304  int32_t i=-1;
305  while (i < num_hinge-1 && sgrad < 0)
306  {
307  i+=1;
308 
309  if (A[hinge_idx[i]] > C[hinge_idx[i]])
310  sgrad += A[hinge_idx[i]] - C[hinge_idx[i]];
311  else
312  sgrad += C[hinge_idx[i]] - A[hinge_idx[i]];
313  }
314 
315  alpha = hinge_point[i];
316 
317  SG_FREE(D);
318  SG_FREE(C);
319  SG_FREE(B);
320  SG_FREE(A);
321 
322  //SG_PRINT("alpha=%f\n", alpha);
323  return alpha;
324 }
325 
327  int32_t num_feat, int32_t num_vec, int32_t num_active, int32_t num_bound)
328 {
329  float64_t dir_deriv=0;
330  solver->init(E_QP);
331 
332  if (zero_idx+num_bound > 0)
333  {
334  //SG_PRINT("num_var:%d (zero:%d, bound:%d) num_feat:%d\n", zero_idx+num_bound, zero_idx,num_bound, num_feat);
335  //CMath::display_vector(grad_w, num_feat+1, "grad_w");
336  CMath::add(grad_w, 1.0, grad_w, -1.0, sum_CXy_active, num_feat);
337  grad_w[num_feat]= -sum_Cy_active;
338 
340 
341  //CMath::display_vector(sum_CXy_active, num_feat, "sum_CXy_active");
342  //SG_PRINT("sum_Cy_active=%10.10f\n", sum_Cy_active);
343 
344  //CMath::display_vector(grad_w, num_feat+1, "grad_w");
345 
347  w_zero, zero_idx,
348  grad_w, num_feat+1,
349  use_bias);
350 
351  solver->optimize(beta);
352  //CMath::display_vector(beta, num_feat+1, "v");
353 
354  //compute dir_deriv here, variable grad_w constains still 'v' and beta
355  //contains the future gradient
356  dir_deriv = CMath::dot(beta, grad_w, num_feat);
357  dir_deriv-=beta[num_feat]*sum_Cy_active;
358 
359  for (int32_t i=0; i<num_bound; i++)
360  {
361  float64_t val= C1*get_label(idx_bound[i])*(features->dense_dot(idx_bound[i], beta, num_feat)+ beta[num_feat]);
362  dir_deriv += CMath::max(0.0, val);
363  }
364 
365  for (int32_t i=0; i<num_feat; i++)
366  grad_w[i]=beta[i];
367 
368  if (use_bias)
369  grad_b=beta[num_feat];
370 
371  //for (int32_t i=0; i<zero_idx+num_bound; i++)
372  // beta[i]=beta[i+num_feat+1];
373 
374  //CMath::display_vector(beta, zero_idx+num_bound, "beta");
375  //SG_PRINT("beta[0]=%10.16f\n", beta[0]);
376  //ASSERT(0);
377 
378  //for (int32_t i=0; i<zero_idx+num_bound; i++)
379  //{
380  // if (i<zero_idx)
381  // grad_w[w_zero[i]]+=beta[w_zero[i]];
382  // else
383  // {
384  // features->add_to_dense_vec(-C1*beta[i]*get_label(idx_bound[i-zero_idx]), idx_bound[i-zero_idx], grad_w, num_feat);
385  // if (use_bias)
386  // grad_b -= C1 * get_label(idx_bound[i-zero_idx])*beta[i-zero_idx];
387  // }
388  //}
389 
390  //CMath::display_vector(w_zero, zero_idx, "w_zero");
391  //CMath::display_vector(grad_w, num_feat, "grad_w");
392  //SG_PRINT("grad_b=%f\n", grad_b);
393  //
394  }
395  else
396  {
397  CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
399 
400  dir_deriv = CMath::dot(grad_w, grad_w, num_feat)+ grad_b*grad_b;
401  }
402 
403  solver->cleanup();
404 
405 
406  //SG_PRINT("Gradient : |subgrad_W|^2=%f, |subgrad_b|^2=%f\n",
407  // CMath::dot(grad_w, grad_w, num_feat), grad_b*grad_b);
408 
409  return dir_deriv;
410 }
411 
412 float64_t CSubGradientLPM::compute_objective(int32_t num_feat, int32_t num_vec)
413 {
414  float64_t result= CMath::sum_abs(w, num_feat);
415 
416  for (int32_t i=0; i<num_vec; i++)
417  {
418  if (proj[i]<1.0)
419  result += C1 * (1.0-proj[i]);
420  }
421 
422  return result;
423 }
424 
425 void CSubGradientLPM::compute_projection(int32_t num_feat, int32_t num_vec)
426 {
427  for (int32_t i=0; i<num_vec; i++)
428  proj[i]=get_label(i)*(features->dense_dot(i, w, num_feat) + bias);
429 }
430 
431 void CSubGradientLPM::update_projection(float64_t alpha, int32_t num_vec)
432 {
433  CMath::vec1_plus_scalar_times_vec2(proj,-alpha, grad_proj, num_vec);
434 }
435 
436 void CSubGradientLPM::init(int32_t num_vec, int32_t num_feat)
437 {
438  // alloc normal and bias inited with 0
439  SG_FREE(w);
440  w=SG_MALLOC(float64_t, num_feat);
441  w_dim=num_feat;
442  for (int32_t i=0; i<num_feat; i++)
443  w[i]=1.0;
444  //CMath::random_vector(w, num_feat, -1.0, 1.0);
445  bias=0;
447  grad_b=0;
448 
449  w_pos=SG_MALLOC(int32_t, num_feat);
450  memset(w_pos,0,sizeof(int32_t)*num_feat);
451 
452  w_zero=SG_MALLOC(int32_t, num_feat);
453  memset(w_zero,0,sizeof(int32_t)*num_feat);
454 
455  w_neg=SG_MALLOC(int32_t, num_feat);
456  memset(w_neg,0,sizeof(int32_t)*num_feat);
457 
458  grad_w=SG_MALLOC(float64_t, num_feat+1);
459  memset(grad_w,0,sizeof(float64_t)*(num_feat+1));
460 
462  memset(sum_CXy_active,0,sizeof(float64_t)*num_feat);
463 
464  sum_Cy_active=0;
465 
466  proj=SG_MALLOC(float64_t, num_vec);
467  memset(proj,0,sizeof(float64_t)*num_vec);
468 
469  tmp_proj=SG_MALLOC(float64_t, num_vec);
470  memset(proj,0,sizeof(float64_t)*num_vec);
471 
472  tmp_proj_idx=SG_MALLOC(int32_t, num_vec);
473  memset(tmp_proj_idx,0,sizeof(int32_t)*num_vec);
474 
475  grad_proj=SG_MALLOC(float64_t, num_vec);
476  memset(grad_proj,0,sizeof(float64_t)*num_vec);
477 
478  hinge_point=SG_MALLOC(float64_t, num_vec+num_feat);
479  memset(hinge_point,0,sizeof(float64_t)*(num_vec+num_feat));
480 
481  hinge_idx=SG_MALLOC(int32_t, num_vec+num_feat);
482  memset(hinge_idx,0,sizeof(int32_t)*(num_vec+num_feat));
483 
484  active=SG_MALLOC(uint8_t, num_vec);
485  memset(active,0,sizeof(uint8_t)*num_vec);
486 
487  old_active=SG_MALLOC(uint8_t, num_vec);
488  memset(old_active,0,sizeof(uint8_t)*num_vec);
489 
490  idx_bound=SG_MALLOC(int32_t, num_vec);
491  memset(idx_bound,0,sizeof(int32_t)*num_vec);
492 
493  idx_active=SG_MALLOC(int32_t, num_vec);
494  memset(idx_active,0,sizeof(int32_t)*num_vec);
495 
496  beta=SG_MALLOC(float64_t, num_feat+1+num_feat+num_vec);
497  memset(beta,0,sizeof(float64_t)*num_feat+1+num_feat+num_vec);
498 
499  solver=new CCplex();
500 }
501 
503 {
507  SG_FREE(proj);
508  SG_FREE(tmp_proj);
510  SG_FREE(active);
515  SG_FREE(w_pos);
516  SG_FREE(w_zero);
517  SG_FREE(w_neg);
518  SG_FREE(grad_w);
519  SG_FREE(beta);
520 
521  hinge_idx=NULL;
522  hinge_point=NULL;
523  grad_proj=NULL;
524  proj=NULL;
525  tmp_proj=NULL;
526  tmp_proj_idx=NULL;
527  active=NULL;
528  old_active=NULL;
529  idx_bound=NULL;
530  idx_active=NULL;
531  sum_CXy_active=NULL;
532  w_pos=NULL;
533  w_zero=NULL;
534  w_neg=NULL;
535  grad_w=NULL;
536  beta=NULL;
537 
538  delete solver;
539  solver=NULL;
540 }
541 
543 {
544  lpmtim=0;
545  SG_INFO("C=%f epsilon=%f\n", C1, epsilon);
546  ASSERT(m_labels);
547  if (data)
548  {
549  if (!data->has_property(FP_DOT))
550  SG_ERROR("Specified features are not of type CDotFeatures\n");
551  set_features((CDotFeatures*) data);
552  }
553  ASSERT(features);
554 
555  int32_t num_iterations=0;
556  int32_t num_train_labels=m_labels->get_num_labels();
557  int32_t num_feat=features->get_dim_feature_space();
558  int32_t num_vec=features->get_num_vectors();
559 
560  ASSERT(num_vec==num_train_labels);
561 
562  init(num_vec, num_feat);
563 
564  int32_t num_active=0;
565  int32_t num_bound=0;
566  float64_t alpha=0;
567  float64_t dir_deriv=0;
568  float64_t obj=0;
569  delta_active=num_vec;
571 
572  work_epsilon=0.99;
574 
575  compute_projection(num_feat, num_vec);
576 
577  CTime time;
578  float64_t loop_time=0;
579  while (!(CSignal::cancel_computations()))
580  {
581  CTime t;
582  delta_active=find_active(num_feat, num_vec, num_active, num_bound);
583 
584  update_active(num_feat, num_vec);
585 
586 #ifdef DEBUG_SUBGRADIENTLPM
587  SG_PRINT("==================================================\niteration: %d ", num_iterations);
588  obj=compute_objective(num_feat, num_vec);
589  SG_PRINT("objective:%.10f alpha: %.10f dir_deriv: %f num_bound: %d num_active: %d work_eps: %10.10f eps: %10.10f auto_eps: %10.10f time:%f\n",
590  obj, alpha, dir_deriv, num_bound, num_active, work_epsilon, epsilon, autoselected_epsilon, loop_time);
591 #else
593 #endif
594  //CMath::display_vector(w, w_dim, "w");
595  //SG_PRINT("bias: %f\n", bias);
596  //CMath::display_vector(proj, num_vec, "proj");
597  //CMath::display_vector(idx_active, num_active, "idx_active");
598  //SG_PRINT("num_active: %d\n", num_active);
599  //CMath::display_vector(idx_bound, num_bound, "idx_bound");
600  //SG_PRINT("num_bound: %d\n", num_bound);
601  //CMath::display_vector(sum_CXy_active, num_feat, "sum_CXy_active");
602  //SG_PRINT("sum_Cy_active: %f\n", sum_Cy_active);
603  //CMath::display_vector(grad_w, num_feat, "grad_w");
604  //SG_PRINT("grad_b:%f\n", grad_b);
605 
606  dir_deriv=compute_min_subgradient(num_feat, num_vec, num_active, num_bound);
607 
608  alpha=line_search(num_feat, num_vec);
609 
610  if (num_it_noimprovement==10 || num_bound<qpsize_max)
611  {
612  float64_t norm_grad=CMath::dot(grad_w, grad_w, num_feat) +
613  grad_b*grad_b;
614 
615  SG_PRINT("CHECKING OPTIMALITY CONDITIONS: "
616  "work_epsilon: %10.10f delta_active:%d alpha: %10.10f norm_grad: %10.10f a*norm_grad:%10.16f\n",
617  work_epsilon, delta_active, alpha, norm_grad, CMath::abs(alpha*norm_grad));
618 
619  if (work_epsilon<=epsilon && delta_active==0 && CMath::abs(alpha*norm_grad)<1e-6)
620  break;
621  else
623  }
624 
625  //if (work_epsilon<=epsilon && delta_active==0 && num_it_noimprovement)
626  if ((dir_deriv<0 || alpha==0) && (work_epsilon<=epsilon && delta_active==0))
627  {
628  if (last_it_noimprovement==num_iterations-1)
629  {
630  SG_PRINT("no improvement...\n");
632  }
633  else
635 
636  last_it_noimprovement=num_iterations;
637  }
638 
639  CMath::vec1_plus_scalar_times_vec2(w, -alpha, grad_w, num_feat);
640  bias-=alpha*grad_b;
641 
642  update_projection(alpha, num_vec);
643 
644  t.stop();
645  loop_time=t.time_diff_sec();
646  num_iterations++;
647 
649  break;
650  }
651 
652  SG_INFO("converged after %d iterations\n", num_iterations);
653 
654  obj=compute_objective(num_feat, num_vec);
655  SG_INFO("objective: %f alpha: %f dir_deriv: %f num_bound: %d num_active: %d\n",
656  obj, alpha, dir_deriv, num_bound, num_active);
657 
658 #ifdef DEBUG_SUBGRADIENTLPM
659  CMath::display_vector(w, w_dim, "w");
660  SG_PRINT("bias: %f\n", bias);
661 #endif
662  SG_PRINT("solver time:%f s\n", lpmtim);
663 
664  cleanup();
665 
666  return true;
667 }
668 #endif //USE_CPLEX

SHOGUN Machine Learning Toolbox - Documentation