SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
libncbm.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Copyright (C) 2012 Viktor Gal
8  *
9  */
10 
12 #include <shogun/lib/Time.h>
13 
14 #include <shogun/lib/external/libqp.h>
16 
17 #include <vector>
18 
19 namespace shogun
20 {
21 
23 static uint32_t maxCPs;
24 static const float64_t epsilon=0.0;
25 
26 static const float64_t *get_col(uint32_t i)
27 {
28  return (&HMatrix[maxCPs*i]);
29 }
30 
32 {
33  /* */
39 };
40 
41 inline static line_search_res zoom
42 (
43  CDualLibQPBMSOSVM *machine,
44  float64_t lambda,
45  float64_t a_lo,
46  float64_t a_hi,
47  float64_t initial_fval,
48  SGVector<float64_t>& initial_solution,
49  SGVector<float64_t>& search_dir,
50  float64_t wolfe_c1,
51  float64_t wolfe_c2,
52  float64_t init_lgrad,
53  float64_t f_lo,
54  float64_t g_lo,
55  float64_t f_hi,
56  float64_t g_hi
57 )
58 {
59  line_search_res ls_res;
60  ls_res.solution.resize_vector(initial_solution.vlen);
61  ls_res.gradient.resize_vector(initial_solution.vlen);
62 
63  SGVector<float64_t> cur_solution(initial_solution.vlen);
64  cur_solution.zero();
65  SGVector<float64_t> cur_grad(initial_solution.vlen);
66 
67  uint32_t iter = 0;
68  while (1)
69  {
70  float64_t d1 = g_lo+g_hi - 3*(f_lo-f_hi)/(a_lo-a_hi);
71  float64_t d2 = CMath::sqrt(d1*d1 - g_lo*g_hi);
72  float64_t a_j = a_hi -(a_hi-a_lo)*(g_hi+d2-d1)/(g_hi-g_lo+2*d2);
73 
74  if (a_lo < a_hi)
75  {
76  if ((a_j < a_lo) || (a_j > a_hi))
77  {
78  a_j = 0.5*(a_lo+a_hi);
79  }
80  }
81  else
82  {
83  if ((a_j > a_lo) || (a_j < a_hi))
84  {
85  a_j = 0.5*(a_lo+a_hi);
86  }
87  }
88 
89  cur_solution.add(cur_solution.vector, 1.0,
90  initial_solution.vector, a_j,
91  search_dir.vector, cur_solution.vlen);
92 
93  float64_t cur_fval = machine->risk(cur_grad.vector, cur_solution.vector);
94  float64_t cur_reg
95  = 0.5*lambda*cur_solution.dot(cur_solution.vector,
96  cur_solution.vector, cur_solution.vlen);
97  cur_fval += cur_reg;
98 
99  cur_grad.vec1_plus_scalar_times_vec2(cur_grad.vector, lambda, cur_solution.vector, cur_grad.vlen);
100 
101  if
102  (
103  (cur_fval > (initial_fval+wolfe_c1*a_j*init_lgrad))
104  ||
105  (cur_fval > f_lo)
106  )
107  {
108  a_hi = a_j;
109  f_hi = cur_fval;
110  g_hi = cur_grad.dot(cur_grad.vector, search_dir.vector, cur_grad.vlen);
111  }
112  else
113  {
114  float64_t cur_lgrad
115  = cur_grad.dot(cur_grad.vector, search_dir.vector, cur_grad.vlen);
116 
117  if (CMath::abs(cur_lgrad) < -wolfe_c2*init_lgrad)
118  {
119  ls_res.a = a_j;
120  ls_res.fval = cur_fval;
121  ls_res.reg = cur_reg;
122  ls_res.gradient = cur_grad;
123  ls_res.solution = cur_solution;
124 // SG_SPRINT("in zoom (wolfe2): %f\n", cur_fval)
125  return ls_res;
126  }
127 
128  if (cur_lgrad*(a_hi - a_lo) >= 0)
129  {
130  a_hi = a_lo;
131  f_hi = f_lo;
132  g_hi = g_lo;
133  }
134  a_lo = a_j;
135  f_lo = cur_fval;
136  g_lo = cur_lgrad;
137  }
138 
139  if
140  (
141  (CMath::abs(a_lo - a_hi) <= 0.01*a_lo)
142  ||
143  (iter >= 5)
144  )
145  {
146  ls_res.a = a_j;
147  ls_res.fval = cur_fval;
148  ls_res.reg = cur_reg;
149  ls_res.gradient = cur_grad;
150  ls_res.solution = cur_solution;
151 // SG_SPRINT("in zoom iter: %d %f\n", iter, cur_fval)
152  return ls_res;
153  }
154  iter++;
155  }
156 }
157 
158 inline std::vector<line_search_res> line_search_with_strong_wolfe
159 (
160  CDualLibQPBMSOSVM *machine,
161  float64_t lambda,
162  float64_t initial_val,
163  SGVector<float64_t>& initial_solution,
164  SGVector<float64_t>& initial_grad,
165  SGVector<float64_t>& search_dir,
166  float64_t astart,
167  float64_t amax = 1.1,
168  float64_t wolfe_c1 = 1E-4,
169  float64_t wolfe_c2 = 0.9,
170  float64_t max_iter = 5
171 )
172 {
173  /* NOTE:
174  * model->risk returns only the risk as it's name says
175  * have to cur_fval = model.risk + (lambda*0.5*w*w')
176  *
177  * subgrad should be: subgrad + (lambda*w)
178  *
179  */
180 
181  uint32_t iter = 0;
182 
183  initial_grad.vec1_plus_scalar_times_vec2(initial_grad.vector, lambda, initial_solution.vector, initial_grad.vlen);
184 
185  float64_t initial_lgrad = initial_grad.dot(initial_grad.vector, search_dir.vector, initial_grad.vlen);
186  float64_t prev_lgrad = initial_lgrad;
187  float64_t prev_fval = initial_val;
188 
189  float64_t prev_a = 0;
190  float64_t cur_a = astart;
191 
192  std::vector<line_search_res> ret;
193  while (1)
194  {
195  SGVector<float64_t> x(initial_solution.vlen);
196  SGVector<float64_t> cur_subgrad(initial_solution.vlen);
197 
198  x.add(x.vector, 1.0, initial_solution.vector, cur_a, search_dir.vector, x.vlen);
199  float64_t cur_fval = machine->risk(cur_subgrad.vector, x.vector);
200  float64_t cur_reg = 0.5*lambda*x.dot(x.vector, x.vector, x.vlen);
201  cur_fval += cur_reg;
202 
203  cur_subgrad.vec1_plus_scalar_times_vec2(cur_subgrad.vector, lambda, x.vector, x.vlen);
204  if (iter == 0)
205  {
206  line_search_res initial_step;
207  initial_step.fval = cur_fval;
208  initial_step.reg = cur_reg;
209  initial_step.gradient = cur_subgrad;
210  initial_step.solution = x;
211  ret.push_back(initial_step);
212  }
213 
214  float64_t cur_lgrad
215  = cur_subgrad.dot(cur_subgrad.vector, search_dir.vector,
216  cur_subgrad.vlen);
217  if
218  (
219  (cur_fval > initial_val+wolfe_c1*cur_a*initial_lgrad)
220  ||
221  (cur_fval >= prev_fval && iter > 0)
222  )
223  {
224  ret.push_back(
225  zoom(machine, lambda, prev_a, cur_a, initial_val,
226  initial_solution, search_dir, wolfe_c1, wolfe_c2,
227  initial_lgrad, prev_fval, prev_lgrad, cur_fval, cur_lgrad)
228  );
229  return ret;
230  }
231 
232  if (CMath::abs(cur_lgrad) <= -wolfe_c2*initial_lgrad)
233  {
234  line_search_res ls_res;
235  ls_res.a = cur_a;
236  ls_res.fval = cur_fval;
237  ls_res.reg = cur_reg;
238  ls_res.solution = x;
239  ls_res.gradient = cur_subgrad;
240  ret.push_back(ls_res);
241  return ret;
242  }
243 
244  if (cur_lgrad >= 0)
245  {
246  ret.push_back(
247  zoom(machine, lambda, cur_a, prev_a, initial_val,
248  initial_solution, search_dir, wolfe_c1, wolfe_c2,
249  initial_lgrad, cur_fval, cur_lgrad, prev_fval, prev_lgrad)
250  );
251  return ret;
252  }
253  iter++;
254  if ((CMath::abs(cur_a - amax) <= 0.01*amax) || (iter >= max_iter))
255  {
256  line_search_res ls_res;
257  ls_res.a = cur_a;
258  ls_res.fval = cur_fval;
259  ls_res.reg = cur_reg;
260  ls_res.solution = x;
261  ls_res.gradient = cur_subgrad;
262  ret.push_back(ls_res);
263  return ret;
264  }
265 
266  prev_a = cur_a;
267  prev_fval = cur_fval;
268  prev_lgrad = cur_lgrad;
269 
270  cur_a = (cur_a + amax)*0.5;
271  }
272 }
273 
274 inline void update_H(BmrmStatistics& ncbm,
275  bmrm_ll* head,
276  bmrm_ll* tail,
278  SGVector<float64_t>& diag_H,
279  float64_t lambda,
280  uint32_t maxCP,
281  int32_t w_dim)
282 {
283  float64_t* a_2 = get_cutting_plane(tail);
284  bmrm_ll* cp_ptr=head;
285 
286  for (uint32_t i=0; i < ncbm.nCP; ++i)
287  {
288  float64_t* a_1 = get_cutting_plane(cp_ptr);
289  cp_ptr=cp_ptr->next;
290 
291  float64_t dot_val = SGVector<float64_t>::dot(a_2, a_1, w_dim);
292 
293  H.matrix[LIBBMRM_INDEX(ncbm.nCP, i, maxCP)]
294  = H.matrix[LIBBMRM_INDEX(i, ncbm.nCP, maxCP)]
295  = dot_val/lambda;
296  }
297 
298  /* set the diagonal element, i.e. subgrad_i*subgrad_i' */
299  float64_t dot_val = SGVector<float64_t>::dot(a_2, a_2, w_dim);
300  H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)]=dot_val/lambda;
301 
302  diag_H[ncbm.nCP]=H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)];
303 
304  ncbm.nCP++;
305 }
306 
307 
309  CDualLibQPBMSOSVM *machine,
310  float64_t *w,
311  float64_t TolRel,
312  float64_t TolAbs,
313  float64_t _lambda,
314  uint32_t _BufSize,
315  bool cleanICP,
316  uint32_t cleanAfter,
317  bool is_convex,
318  bool line_search,
319  bool verbose
320  )
321 {
322  BmrmStatistics ncbm;
323  libqp_state_T qp_exitflag={0, 0, 0, 0};
324  int32_t w_dim = machine->get_model()->get_dim();
325 
326  maxCPs = _BufSize;
327 
328  ncbm.nCP=0;
329  ncbm.nIter=0;
330  ncbm.exitflag=0;
331 
332  /* variables for timing the algorithm*/
333  CTime ttime;
334  float64_t tstart, tstop;
335  tstart=ttime.cur_time_diff(false);
336 
337  /* matrix of subgradiends */
338  SGMatrix<float64_t> A(w_dim, maxCPs);
339 
340  /* bias vector */
342  bias.zero();
343 
344  /* Matrix for storing H = A*A' */
346  HMatrix = H.matrix;
347 
348  /* diag_H */
349  SGVector<float64_t> diag_H(maxCPs);
350  diag_H.zero();
351 
352  /* x the solution vector of the dual problem:
353  * 1/lambda x*H*x' - x*bias
354  */
356  x.zero();
357 
358  /* for sum_{i in I_k} x[i] <= b[k] for all k such that S[k] == 1 */
359  float64_t b = 1.0;
360  uint8_t S = 1;
362  I.set_const(1);
363 
364  /* libqp paramteres */
365  uint32_t QPSolverMaxIter = 0xFFFFFFFF;
366  float64_t QPSolverTolRel = 1E-9;
367 
368  /* variables for maintaining inactive planes */
369  SGVector<bool> map(maxCPs);
370  map.set_const(true);
371  ICP_stats icp_stats;
372  icp_stats.maxCPs = maxCPs;
373  icp_stats.ICPcounter = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t);
374  icp_stats.ICPs = (float64_t**) LIBBMRM_CALLOC(maxCPs, float64_t*);
375  icp_stats.ACPs = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t);
377  if
378  (
379  icp_stats.ICPcounter == NULL || icp_stats.ICPs == NULL
380  || icp_stats.ACPs == NULL || icp_stats.H_buff == NULL
381  )
382  {
383  ncbm.exitflag=-2;
384  LIBBMRM_FREE(icp_stats.ICPcounter);
385  LIBBMRM_FREE(icp_stats.ICPs);
386  LIBBMRM_FREE(icp_stats.ACPs);
387  LIBBMRM_FREE(icp_stats.H_buff);
388  return ncbm;
389  }
390 
391  /* best */
392  float64_t best_Fp = CMath::INFTY;
393  float64_t best_risk = CMath::INFTY;
394  SGVector<float64_t> best_w(w_dim);
395  best_w.zero();
396  SGVector<float64_t> best_subgrad(w_dim);
397  best_subgrad.zero();
398 
399  /* initial solution */
400  SGVector<float64_t> cur_subgrad(w_dim);
401  SGVector<float64_t> cur_w(w_dim);
402  memcpy(cur_w.vector, w, sizeof(float64_t)*w_dim);
403 
404  float64_t cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
405  bias[0] = -cur_risk;
406  best_Fp = 0.5*_lambda*cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen) + cur_risk;
407  best_risk = cur_risk;
408  memcpy(best_w.vector, cur_w.vector, sizeof(float64_t)*w_dim);
409  memcpy(best_subgrad.vector, cur_subgrad.vector, sizeof(float64_t)*w_dim);
410 
411  /* create a double-linked list over the A the subgrad matrix */
412  bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_list=NULL;
413  cp_list = (bmrm_ll*) SG_CALLOC(bmrm_ll, 1);
414  if (cp_list==NULL)
415  {
416  ncbm.exitflag=-2;
417  return ncbm;
418  }
419  /* save the subgradient */
420  memcpy(A.matrix, cur_subgrad.vector, sizeof(float64_t)*w_dim);
421  map[0] = false;
422  cp_list->address=&A[0];
423  cp_list->idx=0;
424  cp_list->prev=NULL;
425  cp_list->next=NULL;
426  CPList_head=cp_list;
427  CPList_tail=cp_list;
428 
429  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
430  tstop=ttime.cur_time_diff(false);
431  if (verbose)
432  SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n",
433  ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, cur_risk);
434 
435  float64_t astar = 0.01;
436 
437  SG_SPRINT("clean icps: %d\n", cleanICP)
438  while (ncbm.exitflag==0)
439  {
440  tstart=ttime.cur_time_diff(false);
441  ncbm.nIter++;
442 
443  //diag_H.display_vector();
444  //bias.display_vector();
445 
446  /* solve the dual of the problem, namely:
447  *
448  */
449  qp_exitflag =
450  libqp_splx_solver(&get_col, diag_H.vector, bias.vector, &b, I.vector, &S, x.vector,
451  ncbm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
452 
453  ncbm.Fd = -qp_exitflag.QP;
454 
455  ncbm.qp_exitflag=qp_exitflag.exitflag;
456 
457  /* Update ICPcounter (add one to unused and reset used)
458  * + compute number of active CPs */
459  ncbm.nzA=0;
460 
461  for (uint32_t i=0; i < ncbm.nCP; ++i)
462  {
463  if (x[i] > epsilon)
464  {
465  /* cp was active => reset counter */
466  ++ncbm.nzA;
467  icp_stats.ICPcounter[i]=0;
468  }
469  else
470  {
471  icp_stats.ICPcounter[i]++;
472  }
473  }
474 
475  /* Inactive Cutting Planes (ICP) removal */
476  if (cleanICP)
477  {
478  clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
479  H.matrix, diag_H.vector, x.vector,
480  map.vector, cleanAfter, bias.vector, I.vector);
481  }
482 
483  /* calculate the new w
484  * w[i] = -1/lambda*A[i]*x[i]
485  */
486  cur_w.zero();
487  cp_ptr=CPList_head;
488  for (uint32_t i=0; i < ncbm.nCP; ++i)
489  {
490  float64_t* A_1 = get_cutting_plane(cp_ptr);
491  cp_ptr=cp_ptr->next;
492  SGVector<float64_t>::vec1_plus_scalar_times_vec2(cur_w.vector, -x[i]/_lambda, A_1, w_dim);
493  }
494 
495  bool calc_gap = false;
496  if (calc_gap)
497  {
498  SGVector<float64_t> scores(ncbm.nCP);
499  cp_ptr=CPList_head;
500 
501  for (uint32_t i=0; i < ncbm.nCP; ++i)
502  {
503  float64_t* a_1 = get_cutting_plane(cp_ptr);
504  cp_ptr = cp_ptr->next;
505  scores[i] = cur_w.dot(cur_w.vector, a_1, w_dim);
506  }
507  scores.vec1_plus_scalar_times_vec2(scores.vector, -1.0, bias.vector, scores.vlen);
508 
509  float64_t w_norm = cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen);
510  float64_t PO = 0.5*_lambda*w_norm + scores.max(scores.vector, scores.vlen);
511  float64_t QP_gap = PO - ncbm.Fd;
512 
513  SG_SPRINT("%4d: primal:%f dual:%f QP_gap:%f\n", ncbm.nIter, PO, ncbm.Fd, QP_gap)
514  }
515 
516  /* Stopping conditions */
517  if ((best_Fp - ncbm.Fd) <= TolRel*LIBBMRM_ABS(best_Fp))
518  ncbm.exitflag = 1;
519 
520  if ((best_Fp - ncbm.Fd) <= TolAbs)
521  ncbm.exitflag = 2;
522 
523  if (ncbm.nCP >= maxCPs)
524  ncbm.exitflag = -1;
525 
526  tstop=ttime.cur_time_diff(false);
527 
528  /* Verbose output */
529  if (verbose)
530  SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d, best_fp=%f, gap=%f\n",
531  ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, ncbm.Fp-ncbm.Fd,
532  (ncbm.Fp-ncbm.Fd)/ncbm.Fp, cur_risk, ncbm.nCP, ncbm.nzA, qp_exitflag.exitflag, best_Fp, (best_Fp-ncbm.Fd)/best_Fp);
533 
534  std::vector<line_search_res> wbest_candidates;
535  if (!line_search)
536  {
537  cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
538 
539  add_cutting_plane(&CPList_tail, map, A.matrix,
540  find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim);
541 
542  bias[ncbm.nCP] = cur_w.dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk;
543 
544  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
545 
546  // add as a new wbest candidate
547  line_search_res ls;
548  ls.fval = cur_risk+0.5*_lambda*cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen);
549  ls.solution = cur_w;
550  ls.gradient = cur_subgrad;
551 
552  wbest_candidates.push_back(ls);
553  }
554  else
555  {
556  tstart=ttime.cur_time_diff(false);
557  /* do line searching */
558  SGVector<float64_t> search_dir(w_dim);
559  search_dir.add(search_dir.vector, 1.0, cur_w.vector, -1.0, best_w.vector, w_dim);
560 
561  float64_t norm_dir = search_dir.twonorm(search_dir.vector, search_dir.vlen);
562  float64_t astart;
563  uint32_t cp_min_approx = 0;
564  if (cp_min_approx || (ncbm.nIter == 1))
565  {
566  astart = 1.0;
567  }
568  else
569  {
570  astart = CMath::min(astar/norm_dir,1.0);
571  if (astart == 0)
572  astart = 1.0;
573  }
574 
575  /* line search */
576  std::vector<line_search_res> ls_res
577  = line_search_with_strong_wolfe(machine, _lambda, best_Fp, best_w, best_subgrad, search_dir, astart);
578 
579  if (ls_res[0].fval != ls_res[1].fval)
580  {
581  ls_res[0].gradient.vec1_plus_scalar_times_vec2(ls_res[0].gradient.vector, -_lambda, ls_res[0].solution.vector, w_dim);
582 
583  add_cutting_plane(&CPList_tail, map, A.matrix,
584  find_free_idx(map, maxCPs), ls_res[0].gradient, w_dim);
585 
586  bias[ncbm.nCP]
587  = SGVector<float64_t>::dot(ls_res[0].solution.vector, ls_res[0].gradient, w_dim)
588  - (ls_res[0].fval - ls_res[0].reg);
589 
590  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
591 
592  wbest_candidates.push_back(ls_res[0]);
593  }
594 
595  ls_res[1].gradient.vec1_plus_scalar_times_vec2(ls_res[1].gradient.vector, -_lambda, ls_res[1].solution.vector, w_dim);
596 
597  add_cutting_plane(&CPList_tail, map, A.matrix,
598  find_free_idx(map, maxCPs), ls_res[1].gradient.vector, w_dim);
599 
600  bias[ncbm.nCP]
601  = ls_res[1].solution.dot(ls_res[1].solution.vector, ls_res[1].gradient.vector, w_dim)
602  - (ls_res[1].fval - ls_res[1].reg);
603 
604  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
605 
606  wbest_candidates.push_back(ls_res[1]);
607 
608  if ((best_Fp <= ls_res[1].fval) && (astart != 1))
609  {
610  cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
611 
612  add_cutting_plane(&CPList_tail, map, A.matrix,
613  find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim);
614 
615  bias[ncbm.nCP]
616  = cur_w.dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk;
617 
618  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
619 
620  /* add as a new wbest candidate */
621  line_search_res ls;
622  ls.fval = cur_risk+0.5*_lambda*cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen);
623  ls.solution = cur_w;
624  ls.gradient = cur_subgrad;
625  SG_SPRINT("%lf\n", ls.fval)
626 
627  wbest_candidates.push_back(ls);
628  }
629 
630  astar = ls_res[1].a * norm_dir;
631 
632  tstop=ttime.cur_time_diff(false);
633  SG_SPRINT("\t\tline search time: %.5lf\n", tstop-tstart)
634  }
635 
636  /* search for the best w among the new candidates */
637  if (verbose)
638  SG_SPRINT("\t searching for the best Fp:\n")
639  for (size_t i = 0; i < wbest_candidates.size(); i++)
640  {
641  if (verbose)
642  SG_SPRINT("\t\t %d fcurrent: %.16lf\n", i, wbest_candidates[i].fval)
643 
644  if (wbest_candidates[i].fval < best_Fp)
645  {
646  best_Fp = wbest_candidates[i].fval;
647  best_risk = wbest_candidates[i].fval - wbest_candidates[i].reg;
648  memcpy(best_w, wbest_candidates[i].solution.vector, sizeof(float64_t)*w_dim);
649  memcpy(best_subgrad.vector, wbest_candidates[i].gradient.vector, sizeof(float64_t)*w_dim);
650 
651  ncbm.Fp = best_Fp;
652 
653  if (verbose)
654  SG_SPRINT("\t\t new best norm: %f\n",
655  best_w.twonorm(best_w.vector, w_dim));
656  }
657 
658  if (!is_convex)
659  {
660  index_t cp_idx = ncbm.nCP-(wbest_candidates.size()-i);
661 
662  /* conflict */
663  float64_t score
665  wbest_candidates[i].gradient.vector, w_dim)
666  + (-1.0*bias[cp_idx]);
667  if (score > best_risk)
668  {
669  float64_t U
670  = best_risk
672  wbest_candidates[i].gradient.vector, w_dim);
673 
674  float64_t L
675  = best_Fp - wbest_candidates[i].reg
676  - SGVector<float64_t>::dot(wbest_candidates[i].solution.vector,
677  wbest_candidates[i].gradient.vector, w_dim);
678 
679  if (verbose)
680  SG_SPRINT("CONFLICT Rbest=%.6lg score=%g L=%.6lg U=%.6lg\n", best_risk, score, L, U)
681  if (L <= U)
682  {
683  if (verbose)
684  SG_SPRINT("%.6lf < %.6lf => changing bias[%d]=%g\n", L, U, cp_idx, L)
685  bias[cp_idx]= -L;
686  }
687  else
688  {
689  wbest_candidates[i].gradient.zero();
690  SGVector<float64_t>::vec1_plus_scalar_times_vec2(wbest_candidates[i].gradient.vector, -_lambda, best_w.vector, w_dim);
691 
692  cp_ptr = CPList_tail;
693  for (size_t j = wbest_candidates.size()-1; i < j; --j)
694  {
695  cp_ptr = cp_ptr->prev;
696  SG_SPRINT("tail - %d\n (%d)", j, i)
697  }
698 
699  float64_t* cp = get_cutting_plane(cp_ptr);
700  LIBBMRM_MEMCPY(cp, wbest_candidates[i].gradient.vector, w_dim*sizeof(float64_t));
701 
702  /* update the corresponding column and row in H */
703  cp_ptr = CPList_head;
704  for (uint32_t j = 0; j < ncbm.nCP-1; ++j)
705  {
706  float64_t* a = get_cutting_plane(cp_ptr);
707  cp_ptr = cp_ptr->next;
708  float64_t dot_val
709  = SGVector<float64_t>::dot(a, wbest_candidates[i].gradient.vector, w_dim);
710 
711  H.matrix[LIBBMRM_INDEX(cp_idx, j, maxCPs)]
712  = H.matrix[LIBBMRM_INDEX(j, cp_idx, maxCPs)]
713  = dot_val/_lambda;
714  }
715 
716  diag_H[LIBBMRM_INDEX(cp_idx, cp_idx, maxCPs)]
717  = SGVector<float64_t>::dot(wbest_candidates[i].gradient.vector,
718  wbest_candidates[i].gradient.vector, w_dim);
719 
720 
721  bias[cp_idx]
722  = best_Fp - wbest_candidates[i].reg
723  - SGVector<float64_t>::dot(wbest_candidates[i].solution.vector,
724  wbest_candidates[i].gradient.vector, w_dim);
725 
726  if (verbose)
727  SG_SPRINT("solved by changing nCP=%d bias:%g (%g)\n", cp_idx, bias[cp_idx], L)
728  }
729  }
730  }
731  }
732 
733  /* Inactive Cutting Planes (ICP) removal
734  if (cleanICP)
735  {
736  clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
737  H.matrix, diag_H.vector, x.vector,
738  map.vector, cleanAfter, bias.vector, I.vector);
739  }
740  */
741  }
742 
743  memcpy(w, best_w.vector, sizeof(float64_t)*w_dim);
744 
745  /* free ICP_stats variables */
746  LIBBMRM_FREE(icp_stats.ICPcounter);
747  LIBBMRM_FREE(icp_stats.ICPs);
748  LIBBMRM_FREE(icp_stats.ACPs);
749  LIBBMRM_FREE(icp_stats.H_buff);
750 
751  return ncbm;
752 }
753 
754 } /* namespace shogun */

SHOGUN Machine Learning Toolbox - Documentation