SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
libncbm.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Copyright (C) 2012 Viktor Gal
8  *
9  */
10 
12 #include <shogun/lib/Time.h>
13 
14 #include <shogun/lib/external/libqp.h>
16 
17 #include <vector>
18 
19 namespace shogun
20 {
21 
23 static uint32_t maxCPs;
24 static const float64_t epsilon=0.0;
25 
26 static const float64_t *get_col(uint32_t i)
27 {
28  return (&HMatrix[maxCPs*i]);
29 }
30 
32 {
33  /* */
39 };
40 
41 inline static line_search_res zoom
42 (
43  CDualLibQPBMSOSVM *machine,
44  float64_t lambda,
45  float64_t a_lo,
46  float64_t a_hi,
47  float64_t initial_fval,
48  SGVector<float64_t>& initial_solution,
49  SGVector<float64_t>& search_dir,
50  float64_t wolfe_c1,
51  float64_t wolfe_c2,
52  float64_t init_lgrad,
53  float64_t f_lo,
54  float64_t g_lo,
55  float64_t f_hi,
56  float64_t g_hi
57 )
58 {
59  line_search_res ls_res;
60  ls_res.solution.resize_vector(initial_solution.vlen);
61  ls_res.gradient.resize_vector(initial_solution.vlen);
62 
63  SGVector<float64_t> cur_solution(initial_solution.vlen);
64  cur_solution.zero();
65  SGVector<float64_t> cur_grad(initial_solution.vlen);
66 
67  uint32_t iter = 0;
68  while (1)
69  {
70  float64_t d1 = g_lo+g_hi - 3*(f_lo-f_hi)/(a_lo-a_hi);
71  float64_t d2 = CMath::sqrt(d1*d1 - g_lo*g_hi);
72  float64_t a_j = a_hi -(a_hi-a_lo)*(g_hi+d2-d1)/(g_hi-g_lo+2*d2);
73 
74  if (a_lo < a_hi)
75  {
76  if ((a_j < a_lo) || (a_j > a_hi))
77  {
78  a_j = 0.5*(a_lo+a_hi);
79  }
80  }
81  else
82  {
83  if ((a_j > a_lo) || (a_j < a_hi))
84  {
85  a_j = 0.5*(a_lo+a_hi);
86  }
87  }
88 
89  cur_solution.add(cur_solution.vector, 1.0,
90  initial_solution.vector, a_j,
91  search_dir.vector, cur_solution.vlen);
92 
93  float64_t cur_fval = machine->risk(cur_grad.vector, cur_solution.vector);
94  float64_t cur_reg
95  = 0.5*lambda*cur_solution.dot(cur_solution.vector,
96  cur_solution.vector, cur_solution.vlen);
97  cur_fval += cur_reg;
98 
99  cur_grad.vec1_plus_scalar_times_vec2(cur_grad.vector, lambda, cur_solution.vector, cur_grad.vlen);
100 
101  if
102  (
103  (cur_fval > (initial_fval+wolfe_c1*a_j*init_lgrad))
104  ||
105  (cur_fval > f_lo)
106  )
107  {
108  a_hi = a_j;
109  f_hi = cur_fval;
110  g_hi = cur_grad.dot(cur_grad.vector, search_dir.vector, cur_grad.vlen);
111  }
112  else
113  {
114  float64_t cur_lgrad
115  = cur_grad.dot(cur_grad.vector, search_dir.vector, cur_grad.vlen);
116 
117  if (CMath::abs(cur_lgrad) < -wolfe_c2*init_lgrad)
118  {
119  ls_res.a = a_j;
120  ls_res.fval = cur_fval;
121  ls_res.reg = cur_reg;
122  ls_res.gradient = cur_grad;
123  ls_res.solution = cur_solution;
124 // SG_SPRINT("in zoom (wolfe2): %f\n", cur_fval)
125  return ls_res;
126  }
127 
128  if (cur_lgrad*(a_hi - a_lo) >= 0)
129  {
130  a_hi = a_lo;
131  f_hi = f_lo;
132  g_hi = g_lo;
133  }
134  a_lo = a_j;
135  f_lo = cur_fval;
136  g_lo = cur_lgrad;
137  }
138 
139  if
140  (
141  (CMath::abs(a_lo - a_hi) <= 0.01*a_lo)
142  ||
143  (iter >= 5)
144  )
145  {
146  ls_res.a = a_j;
147  ls_res.fval = cur_fval;
148  ls_res.reg = cur_reg;
149  ls_res.gradient = cur_grad;
150  ls_res.solution = cur_solution;
151 // SG_SPRINT("in zoom iter: %d %f\n", iter, cur_fval)
152  return ls_res;
153  }
154  iter++;
155  }
156 }
157 
158 inline std::vector<line_search_res> line_search_with_strong_wolfe
159 (
160  CDualLibQPBMSOSVM *machine,
161  float64_t lambda,
162  float64_t initial_val,
163  SGVector<float64_t>& initial_solution,
164  SGVector<float64_t>& initial_grad,
165  SGVector<float64_t>& search_dir,
166  float64_t astart,
167  float64_t amax = 1.1,
168  float64_t wolfe_c1 = 1E-4,
169  float64_t wolfe_c2 = 0.9,
170  float64_t max_iter = 5
171 )
172 {
173  /* NOTE:
174  * model->risk returns only the risk as it's name says
175  * have to cur_fval = model.risk + (lambda*0.5*w*w')
176  *
177  * subgrad should be: subgrad + (lambda*w)
178  *
179  */
180 
181  uint32_t iter = 0;
182 
183  initial_grad.vec1_plus_scalar_times_vec2(initial_grad.vector, lambda, initial_solution.vector, initial_grad.vlen);
184 
185  float64_t initial_lgrad = initial_grad.dot(initial_grad.vector, search_dir.vector, initial_grad.vlen);
186  float64_t prev_lgrad = initial_lgrad;
187  float64_t prev_fval = initial_val;
188 
189  float64_t prev_a = 0;
190  float64_t cur_a = astart;
191 
192  std::vector<line_search_res> ret;
193  while (1)
194  {
195  SGVector<float64_t> x(initial_solution.vlen);
196  SGVector<float64_t> cur_subgrad(initial_solution.vlen);
197 
198  x.add(x.vector, 1.0, initial_solution.vector, cur_a, search_dir.vector, x.vlen);
199  float64_t cur_fval = machine->risk(cur_subgrad.vector, x.vector);
200  float64_t cur_reg = 0.5*lambda*x.dot(x.vector, x.vector, x.vlen);
201  cur_fval += cur_reg;
202 
203  cur_subgrad.vec1_plus_scalar_times_vec2(cur_subgrad.vector, lambda, x.vector, x.vlen);
204  if (iter == 0)
205  {
206  line_search_res initial_step;
207  initial_step.fval = cur_fval;
208  initial_step.reg = cur_reg;
209  initial_step.gradient = cur_subgrad;
210  initial_step.solution = x;
211  ret.push_back(initial_step);
212  }
213 
214  float64_t cur_lgrad
215  = cur_subgrad.dot(cur_subgrad.vector, search_dir.vector,
216  cur_subgrad.vlen);
217  if
218  (
219  (cur_fval > initial_val+wolfe_c1*cur_a*initial_lgrad)
220  ||
221  (cur_fval >= prev_fval && iter > 0)
222  )
223  {
224  ret.push_back(
225  zoom(machine, lambda, prev_a, cur_a, initial_val,
226  initial_solution, search_dir, wolfe_c1, wolfe_c2,
227  initial_lgrad, prev_fval, prev_lgrad, cur_fval, cur_lgrad)
228  );
229  return ret;
230  }
231 
232  if (CMath::abs(cur_lgrad) <= -wolfe_c2*initial_lgrad)
233  {
234  line_search_res ls_res;
235  ls_res.a = cur_a;
236  ls_res.fval = cur_fval;
237  ls_res.reg = cur_reg;
238  ls_res.solution = x;
239  ls_res.gradient = cur_subgrad;
240  ret.push_back(ls_res);
241  return ret;
242  }
243 
244  if (cur_lgrad >= 0)
245  {
246  ret.push_back(
247  zoom(machine, lambda, cur_a, prev_a, initial_val,
248  initial_solution, search_dir, wolfe_c1, wolfe_c2,
249  initial_lgrad, cur_fval, cur_lgrad, prev_fval, prev_lgrad)
250  );
251  return ret;
252  }
253  iter++;
254  if ((CMath::abs(cur_a - amax) <= 0.01*amax) || (iter >= max_iter))
255  {
256  line_search_res ls_res;
257  ls_res.a = cur_a;
258  ls_res.fval = cur_fval;
259  ls_res.reg = cur_reg;
260  ls_res.solution = x;
261  ls_res.gradient = cur_subgrad;
262  ret.push_back(ls_res);
263  return ret;
264  }
265 
266  prev_a = cur_a;
267  prev_fval = cur_fval;
268  prev_lgrad = cur_lgrad;
269 
270  cur_a = (cur_a + amax)*0.5;
271  }
272 }
273 
274 inline void update_H(BmrmStatistics& ncbm,
275  bmrm_ll* head,
276  bmrm_ll* tail,
278  SGVector<float64_t>& diag_H,
279  float64_t lambda,
280  uint32_t maxCP,
281  int32_t w_dim)
282 {
283  float64_t* a_2 = get_cutting_plane(tail);
284  bmrm_ll* cp_ptr=head;
285 
286  for (uint32_t i=0; i < ncbm.nCP; ++i)
287  {
288  float64_t* a_1 = get_cutting_plane(cp_ptr);
289  cp_ptr=cp_ptr->next;
290 
291  float64_t dot_val = SGVector<float64_t>::dot(a_2, a_1, w_dim);
292 
293  H.matrix[LIBBMRM_INDEX(ncbm.nCP, i, maxCP)]
294  = H.matrix[LIBBMRM_INDEX(i, ncbm.nCP, maxCP)]
295  = dot_val/lambda;
296  }
297 
298  /* set the diagonal element, i.e. subgrad_i*subgrad_i' */
299  float64_t dot_val = SGVector<float64_t>::dot(a_2, a_2, w_dim);
300  H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)]=dot_val/lambda;
301 
302  diag_H[ncbm.nCP]=H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)];
303 
304  ncbm.nCP++;
305 }
306 
307 
309  CDualLibQPBMSOSVM *machine,
310  float64_t *w,
311  float64_t TolRel,
312  float64_t TolAbs,
313  float64_t _lambda,
314  uint32_t _BufSize,
315  bool cleanICP,
316  uint32_t cleanAfter,
317  bool is_convex,
318  bool line_search,
319  bool verbose
320  )
321 {
322  BmrmStatistics ncbm;
323  libqp_state_T qp_exitflag={0, 0, 0, 0};
324 
325  CStructuredModel* model = machine->get_model();
326  int32_t w_dim = model->get_dim();
327 
328  maxCPs = _BufSize;
329  BufSize = _BufSize;
330 
331  ncbm.nCP=0;
332  ncbm.nIter=0;
333  ncbm.exitflag=0;
334 
335  /* variables for timing the algorithm*/
336  CTime ttime;
337  float64_t tstart, tstop;
338  tstart=ttime.cur_time_diff(false);
339 
340  /* matrix of subgradiends */
341  SGMatrix<float64_t> A(w_dim, maxCPs);
342 
343  /* bias vector */
345  bias.zero();
346 
347  /* Matrix for storing H = A*A' */
349  HMatrix = H.matrix;
350 
351  /* diag_H */
352  SGVector<float64_t> diag_H(maxCPs);
353  diag_H.zero();
354 
355  /* x the solution vector of the dual problem:
356  * 1/lambda x*H*x' - x*bias
357  */
359  x.zero();
360 
361  /* for sum_{i in I_k} x[i] <= b[k] for all k such that S[k] == 1 */
362  float64_t b = 1.0;
363  uint8_t S = 1;
365  I.set_const(1);
366 
367  /* libqp paramteres */
368  uint32_t QPSolverMaxIter = 0xFFFFFFFF;
369  float64_t QPSolverTolRel = 1E-9;
370 
371  /* variables for maintaining inactive planes */
372  SGVector<bool> map(maxCPs);
373  map.set_const(true);
374  ICP_stats icp_stats;
375  icp_stats.maxCPs = maxCPs;
376  icp_stats.ICPcounter = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t);
377  icp_stats.ICPs = (float64_t**) LIBBMRM_CALLOC(maxCPs, float64_t*);
378  icp_stats.ACPs = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t);
380  if
381  (
382  icp_stats.ICPcounter == NULL || icp_stats.ICPs == NULL
383  || icp_stats.ACPs == NULL || icp_stats.H_buff == NULL
384  )
385  {
386  ncbm.exitflag=-2;
387  LIBBMRM_FREE(icp_stats.ICPcounter);
388  LIBBMRM_FREE(icp_stats.ICPs);
389  LIBBMRM_FREE(icp_stats.ACPs);
390  LIBBMRM_FREE(icp_stats.H_buff);
391  return ncbm;
392  }
393 
394  /* best */
395  float64_t best_Fp = CMath::INFTY;
396  float64_t best_risk = CMath::INFTY;
397  SGVector<float64_t> best_w(w_dim);
398  best_w.zero();
399  SGVector<float64_t> best_subgrad(w_dim);
400  best_subgrad.zero();
401 
402  /* initial solution */
403  SGVector<float64_t> cur_subgrad(w_dim);
404  SGVector<float64_t> cur_w(w_dim);
405  memcpy(cur_w.vector, w, sizeof(float64_t)*w_dim);
406 
407  float64_t cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
408  bias[0] = -cur_risk;
409  best_Fp = 0.5*_lambda*cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen) + cur_risk;
410  best_risk = cur_risk;
411  memcpy(best_w.vector, cur_w.vector, sizeof(float64_t)*w_dim);
412  memcpy(best_subgrad.vector, cur_subgrad.vector, sizeof(float64_t)*w_dim);
413 
414  /* create a double-linked list over the A the subgrad matrix */
415  bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_list=NULL;
416  cp_list = (bmrm_ll*) SG_CALLOC(bmrm_ll, 1);
417  if (cp_list==NULL)
418  {
419  ncbm.exitflag=-2;
420  return ncbm;
421  }
422  /* save the subgradient */
423  memcpy(A.matrix, cur_subgrad.vector, sizeof(float64_t)*w_dim);
424  map[0] = false;
425  cp_list->address=&A[0];
426  cp_list->idx=0;
427  cp_list->prev=NULL;
428  cp_list->next=NULL;
429  CPList_head=cp_list;
430  CPList_tail=cp_list;
431 
432  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
433  tstop=ttime.cur_time_diff(false);
434  if (verbose)
435  SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n",
436  ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, cur_risk);
437 
438  float64_t astar = 0.01;
439 
440  SG_SPRINT("clean icps: %d\n", cleanICP)
441  while (ncbm.exitflag==0)
442  {
443  tstart=ttime.cur_time_diff(false);
444  ncbm.nIter++;
445 
446  //diag_H.display_vector();
447  //bias.display_vector();
448 
449  /* solve the dual of the problem, namely:
450  *
451  */
452  qp_exitflag =
453  libqp_splx_solver(&get_col, diag_H.vector, bias.vector, &b, I.vector, &S, x.vector,
454  ncbm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
455 
456  ncbm.Fd = -qp_exitflag.QP;
457 
458  ncbm.qp_exitflag=qp_exitflag.exitflag;
459 
460  /* Update ICPcounter (add one to unused and reset used)
461  * + compute number of active CPs */
462  ncbm.nzA=0;
463 
464  for (uint32_t i=0; i < ncbm.nCP; ++i)
465  {
466  if (x[i] > epsilon)
467  {
468  /* cp was active => reset counter */
469  ++ncbm.nzA;
470  icp_stats.ICPcounter[i]=0;
471  }
472  else
473  {
474  icp_stats.ICPcounter[i]++;
475  }
476  }
477 
478  /* Inactive Cutting Planes (ICP) removal */
479  if (cleanICP)
480  {
481  clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
482  H.matrix, diag_H.vector, x.vector,
483  map.vector, cleanAfter, bias.vector, I.vector);
484  }
485 
486  /* calculate the new w
487  * w[i] = -1/lambda*A[i]*x[i]
488  */
489  cur_w.zero();
490  cp_ptr=CPList_head;
491  for (uint32_t i=0; i < ncbm.nCP; ++i)
492  {
493  float64_t* A_1 = get_cutting_plane(cp_ptr);
494  cp_ptr=cp_ptr->next;
495  SGVector<float64_t>::vec1_plus_scalar_times_vec2(cur_w.vector, -x[i]/_lambda, A_1, w_dim);
496  }
497 
498  bool calc_gap = false;
499  if (calc_gap)
500  {
501  SGVector<float64_t> scores(ncbm.nCP);
502  cp_ptr=CPList_head;
503 
504  for (uint32_t i=0; i < ncbm.nCP; ++i)
505  {
506  float64_t* a_1 = get_cutting_plane(cp_ptr);
507  cp_ptr = cp_ptr->next;
508  scores[i] = cur_w.dot(cur_w.vector, a_1, w_dim);
509  }
510  scores.vec1_plus_scalar_times_vec2(scores.vector, -1.0, bias.vector, scores.vlen);
511 
512  float64_t w_norm = cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen);
513  float64_t PO = 0.5*_lambda*w_norm + scores.max(scores.vector, scores.vlen);
514  float64_t QP_gap = PO - ncbm.Fd;
515 
516  SG_SPRINT("%4d: primal:%f dual:%f QP_gap:%f\n", ncbm.nIter, PO, ncbm.Fd, QP_gap)
517  }
518 
519  /* Stopping conditions */
520  if ((best_Fp - ncbm.Fd) <= TolRel*LIBBMRM_ABS(best_Fp))
521  ncbm.exitflag = 1;
522 
523  if ((best_Fp - ncbm.Fd) <= TolAbs)
524  ncbm.exitflag = 2;
525 
526  if (ncbm.nCP >= maxCPs)
527  ncbm.exitflag = -1;
528 
529  // next CP would exceed BufSize/maxCPs
530  if (ncbm.nCP+3 >= maxCPs)
531  ncbm.exitflag=-1;
532 
533  tstop=ttime.cur_time_diff(false);
534 
535  /* Verbose output */
536  if (verbose)
537  SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d, best_fp=%f, gap=%f\n",
538  ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, ncbm.Fp-ncbm.Fd,
539  (ncbm.Fp-ncbm.Fd)/ncbm.Fp, cur_risk, ncbm.nCP, ncbm.nzA, qp_exitflag.exitflag, best_Fp, (best_Fp-ncbm.Fd)/best_Fp);
540 
541  if (ncbm.exitflag!=0)
542  break;
543 
544  std::vector<line_search_res> wbest_candidates;
545  if (!line_search)
546  {
547  cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
548 
549  add_cutting_plane(&CPList_tail, map, A.matrix,
550  find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim);
551 
552  bias[ncbm.nCP] = cur_w.dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk;
553 
554  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
555 
556  // add as a new wbest candidate
557  line_search_res ls;
558  ls.fval = cur_risk+0.5*_lambda*cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen);
559  ls.solution = cur_w;
560  ls.gradient = cur_subgrad;
561 
562  wbest_candidates.push_back(ls);
563  }
564  else
565  {
566  tstart=ttime.cur_time_diff(false);
567  /* do line searching */
568  SGVector<float64_t> search_dir(w_dim);
569  search_dir.add(search_dir.vector, 1.0, cur_w.vector, -1.0, best_w.vector, w_dim);
570 
571  float64_t norm_dir = search_dir.twonorm(search_dir.vector, search_dir.vlen);
572  float64_t astart;
573  uint32_t cp_min_approx = 0;
574  if (cp_min_approx || (ncbm.nIter == 1))
575  {
576  astart = 1.0;
577  }
578  else
579  {
580  astart = CMath::min(astar/norm_dir,1.0);
581  if (astart == 0)
582  astart = 1.0;
583  }
584 
585  /* line search */
586  std::vector<line_search_res> ls_res
587  = line_search_with_strong_wolfe(machine, _lambda, best_Fp, best_w, best_subgrad, search_dir, astart);
588 
589  if (ls_res[0].fval != ls_res[1].fval)
590  {
591  ls_res[0].gradient.vec1_plus_scalar_times_vec2(ls_res[0].gradient.vector, -_lambda, ls_res[0].solution.vector, w_dim);
592 
593  add_cutting_plane(&CPList_tail, map, A.matrix,
594  find_free_idx(map, maxCPs), ls_res[0].gradient, w_dim);
595 
596  bias[ncbm.nCP]
597  = SGVector<float64_t>::dot(ls_res[0].solution.vector, ls_res[0].gradient, w_dim)
598  - (ls_res[0].fval - ls_res[0].reg);
599 
600  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
601 
602  wbest_candidates.push_back(ls_res[0]);
603  }
604 
605  ls_res[1].gradient.vec1_plus_scalar_times_vec2(ls_res[1].gradient.vector, -_lambda, ls_res[1].solution.vector, w_dim);
606 
607  add_cutting_plane(&CPList_tail, map, A.matrix,
608  find_free_idx(map, maxCPs), ls_res[1].gradient.vector, w_dim);
609 
610  bias[ncbm.nCP]
611  = ls_res[1].solution.dot(ls_res[1].solution.vector, ls_res[1].gradient.vector, w_dim)
612  - (ls_res[1].fval - ls_res[1].reg);
613 
614  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
615 
616  wbest_candidates.push_back(ls_res[1]);
617 
618  if ((best_Fp <= ls_res[1].fval) && (astart != 1))
619  {
620  cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
621 
622  add_cutting_plane(&CPList_tail, map, A.matrix,
623  find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim);
624 
625  bias[ncbm.nCP]
626  = cur_w.dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk;
627 
628  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
629 
630  /* add as a new wbest candidate */
631  line_search_res ls;
632  ls.fval = cur_risk+0.5*_lambda*cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen);
633  ls.solution = cur_w;
634  ls.gradient = cur_subgrad;
635  SG_SPRINT("%lf\n", ls.fval)
636 
637  wbest_candidates.push_back(ls);
638  }
639 
640  astar = ls_res[1].a * norm_dir;
641 
642  tstop=ttime.cur_time_diff(false);
643  SG_SPRINT("\t\tline search time: %.5lf\n", tstop-tstart)
644  }
645 
646  /* search for the best w among the new candidates */
647  if (verbose)
648  SG_SPRINT("\t searching for the best Fp:\n")
649  for (size_t i = 0; i < wbest_candidates.size(); i++)
650  {
651  if (verbose)
652  SG_SPRINT("\t\t %d fcurrent: %.16lf\n", i, wbest_candidates[i].fval)
653 
654  if (wbest_candidates[i].fval < best_Fp)
655  {
656  best_Fp = wbest_candidates[i].fval;
657  best_risk = wbest_candidates[i].fval - wbest_candidates[i].reg;
658  memcpy(best_w, wbest_candidates[i].solution.vector, sizeof(float64_t)*w_dim);
659  memcpy(best_subgrad.vector, wbest_candidates[i].gradient.vector, sizeof(float64_t)*w_dim);
660 
661  ncbm.Fp = best_Fp;
662 
663  if (verbose)
664  SG_SPRINT("\t\t new best norm: %f\n",
665  best_w.twonorm(best_w.vector, w_dim));
666  }
667 
668  if (!is_convex)
669  {
670  index_t cp_idx = ncbm.nCP-(wbest_candidates.size()-i);
671 
672  /* conflict */
673  float64_t score
675  wbest_candidates[i].gradient.vector, w_dim)
676  + (-1.0*bias[cp_idx]);
677  if (score > best_risk)
678  {
679  float64_t U
680  = best_risk
682  wbest_candidates[i].gradient.vector, w_dim);
683 
684  float64_t L
685  = best_Fp - wbest_candidates[i].reg
686  - SGVector<float64_t>::dot(wbest_candidates[i].solution.vector,
687  wbest_candidates[i].gradient.vector, w_dim);
688 
689  if (verbose)
690  SG_SPRINT("CONFLICT Rbest=%.6lg score=%g L=%.6lg U=%.6lg\n", best_risk, score, L, U)
691  if (L <= U)
692  {
693  if (verbose)
694  SG_SPRINT("%.6lf < %.6lf => changing bias[%d]=%g\n", L, U, cp_idx, L)
695  bias[cp_idx]= -L;
696  }
697  else
698  {
699  wbest_candidates[i].gradient.zero();
700  SGVector<float64_t>::vec1_plus_scalar_times_vec2(wbest_candidates[i].gradient.vector, -_lambda, best_w.vector, w_dim);
701 
702  cp_ptr = CPList_tail;
703  for (size_t j = wbest_candidates.size()-1; i < j; --j)
704  {
705  cp_ptr = cp_ptr->prev;
706  SG_SPRINT("tail - %d\n (%d)", j, i)
707  }
708 
709  float64_t* cp = get_cutting_plane(cp_ptr);
710  LIBBMRM_MEMCPY(cp, wbest_candidates[i].gradient.vector, w_dim*sizeof(float64_t));
711 
712  /* update the corresponding column and row in H */
713  cp_ptr = CPList_head;
714  for (uint32_t j = 0; j < ncbm.nCP-1; ++j)
715  {
716  float64_t* a = get_cutting_plane(cp_ptr);
717  cp_ptr = cp_ptr->next;
718  float64_t dot_val
719  = SGVector<float64_t>::dot(a, wbest_candidates[i].gradient.vector, w_dim);
720 
721  H.matrix[LIBBMRM_INDEX(cp_idx, j, maxCPs)]
722  = H.matrix[LIBBMRM_INDEX(j, cp_idx, maxCPs)]
723  = dot_val/_lambda;
724  }
725 
726  diag_H[LIBBMRM_INDEX(cp_idx, cp_idx, maxCPs)]
727  = SGVector<float64_t>::dot(wbest_candidates[i].gradient.vector,
728  wbest_candidates[i].gradient.vector, w_dim);
729 
730 
731  bias[cp_idx]
732  = best_Fp - wbest_candidates[i].reg
733  - SGVector<float64_t>::dot(wbest_candidates[i].solution.vector,
734  wbest_candidates[i].gradient.vector, w_dim);
735 
736  if (verbose)
737  SG_SPRINT("solved by changing nCP=%d bias:%g (%g)\n", cp_idx, bias[cp_idx], L)
738  }
739  }
740  }
741  }
742 
743  /* Inactive Cutting Planes (ICP) removal
744  if (cleanICP)
745  {
746  clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
747  H.matrix, diag_H.vector, x.vector,
748  map.vector, cleanAfter, bias.vector, I.vector);
749  }
750  */
751  }
752 
753  memcpy(w, best_w.vector, sizeof(float64_t)*w_dim);
754 
755  /* free ICP_stats variables */
756  LIBBMRM_FREE(icp_stats.ICPcounter);
757  LIBBMRM_FREE(icp_stats.ICPs);
758  LIBBMRM_FREE(icp_stats.ACPs);
759  LIBBMRM_FREE(icp_stats.H_buff);
760 
761  cp_ptr=CPList_head;
762  while(cp_ptr!=NULL)
763  {
764  bmrm_ll * cp_ptr_this=cp_ptr;
765  cp_ptr=cp_ptr->next;
766  LIBBMRM_FREE(cp_ptr_this);
767  cp_ptr_this=NULL;
768  }
769 
770  SG_UNREF(model);
771 
772  return ncbm;
773 }
774 
775 } /* namespace shogun */

SHOGUN Machine Learning Toolbox - Documentation