SHOGUN  4.1.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
libncbm.cpp
浏览该文件的文档.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Copyright (C) 2012 Viktor Gal
8  *
9  */
10 
12 #include <shogun/lib/Time.h>
14 
15 #include <shogun/lib/external/libqp.h>
17 
18 #include <vector>
19 
20 namespace shogun
21 {
22 
24 static uint32_t maxCPs;
25 static const float64_t epsilon=0.0;
26 
27 static const float64_t *get_col(uint32_t i)
28 {
29  return (&HMatrix[maxCPs*i]);
30 }
31 
33 {
34  /* */
40 };
41 
42 inline static line_search_res zoom
43 (
44  CDualLibQPBMSOSVM *machine,
45  float64_t lambda,
46  float64_t a_lo,
47  float64_t a_hi,
48  float64_t initial_fval,
49  SGVector<float64_t>& initial_solution,
50  SGVector<float64_t>& search_dir,
51  float64_t wolfe_c1,
52  float64_t wolfe_c2,
53  float64_t init_lgrad,
54  float64_t f_lo,
55  float64_t g_lo,
56  float64_t f_hi,
57  float64_t g_hi
58 )
59 {
60  line_search_res ls_res;
61  ls_res.solution.resize_vector(initial_solution.vlen);
62  ls_res.gradient.resize_vector(initial_solution.vlen);
63 
64  SGVector<float64_t> cur_solution(initial_solution.vlen);
65  cur_solution.zero();
66  SGVector<float64_t> cur_grad(initial_solution.vlen);
67 
68  uint32_t iter = 0;
69  while (1)
70  {
71  float64_t d1 = g_lo+g_hi - 3*(f_lo-f_hi)/(a_lo-a_hi);
72  float64_t d2 = CMath::sqrt(d1*d1 - g_lo*g_hi);
73  float64_t a_j = a_hi -(a_hi-a_lo)*(g_hi+d2-d1)/(g_hi-g_lo+2*d2);
74 
75  if (a_lo < a_hi)
76  {
77  if ((a_j < a_lo) || (a_j > a_hi))
78  {
79  a_j = 0.5*(a_lo+a_hi);
80  }
81  }
82  else
83  {
84  if ((a_j > a_lo) || (a_j < a_hi))
85  {
86  a_j = 0.5*(a_lo+a_hi);
87  }
88  }
89 
90  cur_solution.add(cur_solution.vector, 1.0,
91  initial_solution.vector, a_j,
92  search_dir.vector, cur_solution.vlen);
93 
94  float64_t cur_fval = machine->risk(cur_grad.vector, cur_solution.vector);
95  float64_t cur_reg
96  = 0.5*lambda*CMath::dot(cur_solution.vector,
97  cur_solution.vector, cur_solution.vlen);
98  cur_fval += cur_reg;
99 
100  cur_grad.vec1_plus_scalar_times_vec2(cur_grad.vector, lambda, cur_solution.vector, cur_grad.vlen);
101 
102  if
103  (
104  (cur_fval > (initial_fval+wolfe_c1*a_j*init_lgrad))
105  ||
106  (cur_fval > f_lo)
107  )
108  {
109  a_hi = a_j;
110  f_hi = cur_fval;
111  g_hi = CMath::dot(cur_grad.vector, search_dir.vector, cur_grad.vlen);
112  }
113  else
114  {
115  float64_t cur_lgrad
116  = CMath::dot(cur_grad.vector, search_dir.vector, cur_grad.vlen);
117 
118  if (CMath::abs(cur_lgrad) < -wolfe_c2*init_lgrad)
119  {
120  ls_res.a = a_j;
121  ls_res.fval = cur_fval;
122  ls_res.reg = cur_reg;
123  ls_res.gradient = cur_grad;
124  ls_res.solution = cur_solution;
125 // SG_SPRINT("in zoom (wolfe2): %f\n", cur_fval)
126  return ls_res;
127  }
128 
129  if (cur_lgrad*(a_hi - a_lo) >= 0)
130  {
131  a_hi = a_lo;
132  f_hi = f_lo;
133  g_hi = g_lo;
134  }
135  a_lo = a_j;
136  f_lo = cur_fval;
137  g_lo = cur_lgrad;
138  }
139 
140  if
141  (
142  (CMath::abs(a_lo - a_hi) <= 0.01*a_lo)
143  ||
144  (iter >= 5)
145  )
146  {
147  ls_res.a = a_j;
148  ls_res.fval = cur_fval;
149  ls_res.reg = cur_reg;
150  ls_res.gradient = cur_grad;
151  ls_res.solution = cur_solution;
152 // SG_SPRINT("in zoom iter: %d %f\n", iter, cur_fval)
153  return ls_res;
154  }
155  iter++;
156  }
157 }
158 
159 inline std::vector<line_search_res> line_search_with_strong_wolfe
160 (
161  CDualLibQPBMSOSVM *machine,
162  float64_t lambda,
163  float64_t initial_val,
164  SGVector<float64_t>& initial_solution,
165  SGVector<float64_t>& initial_grad,
166  SGVector<float64_t>& search_dir,
167  float64_t astart,
168  float64_t amax = 1.1,
169  float64_t wolfe_c1 = 1E-4,
170  float64_t wolfe_c2 = 0.9,
171  float64_t max_iter = 5
172 )
173 {
174  /* NOTE:
175  * model->risk returns only the risk as it's name says
176  * have to cur_fval = model.risk + (lambda*0.5*w*w')
177  *
178  * subgrad should be: subgrad + (lambda*w)
179  *
180  */
181 
182  uint32_t iter = 0;
183 
184  initial_grad.vec1_plus_scalar_times_vec2(initial_grad.vector, lambda, initial_solution.vector, initial_grad.vlen);
185 
186  float64_t initial_lgrad = CMath::dot(initial_grad.vector, search_dir.vector, initial_grad.vlen);
187  float64_t prev_lgrad = initial_lgrad;
188  float64_t prev_fval = initial_val;
189 
190  float64_t prev_a = 0;
191  float64_t cur_a = astart;
192 
193  std::vector<line_search_res> ret;
194  while (1)
195  {
196  SGVector<float64_t> x(initial_solution.vlen);
197  SGVector<float64_t> cur_subgrad(initial_solution.vlen);
198 
199  x.add(x.vector, 1.0, initial_solution.vector, cur_a, search_dir.vector, x.vlen);
200  float64_t cur_fval = machine->risk(cur_subgrad.vector, x.vector);
201  float64_t cur_reg = 0.5*lambda*CMath::dot(x.vector, x.vector, x.vlen);
202  cur_fval += cur_reg;
203 
204  cur_subgrad.vec1_plus_scalar_times_vec2(cur_subgrad.vector, lambda, x.vector, x.vlen);
205  if (iter == 0)
206  {
207  line_search_res initial_step;
208  initial_step.fval = cur_fval;
209  initial_step.reg = cur_reg;
210  initial_step.gradient = cur_subgrad;
211  initial_step.solution = x;
212  ret.push_back(initial_step);
213  }
214 
215  float64_t cur_lgrad
216  = CMath::dot(cur_subgrad.vector, search_dir.vector,
217  cur_subgrad.vlen);
218  if
219  (
220  (cur_fval > initial_val+wolfe_c1*cur_a*initial_lgrad)
221  ||
222  (cur_fval >= prev_fval && iter > 0)
223  )
224  {
225  ret.push_back(
226  zoom(machine, lambda, prev_a, cur_a, initial_val,
227  initial_solution, search_dir, wolfe_c1, wolfe_c2,
228  initial_lgrad, prev_fval, prev_lgrad, cur_fval, cur_lgrad)
229  );
230  return ret;
231  }
232 
233  if (CMath::abs(cur_lgrad) <= -wolfe_c2*initial_lgrad)
234  {
235  line_search_res ls_res;
236  ls_res.a = cur_a;
237  ls_res.fval = cur_fval;
238  ls_res.reg = cur_reg;
239  ls_res.solution = x;
240  ls_res.gradient = cur_subgrad;
241  ret.push_back(ls_res);
242  return ret;
243  }
244 
245  if (cur_lgrad >= 0)
246  {
247  ret.push_back(
248  zoom(machine, lambda, cur_a, prev_a, initial_val,
249  initial_solution, search_dir, wolfe_c1, wolfe_c2,
250  initial_lgrad, cur_fval, cur_lgrad, prev_fval, prev_lgrad)
251  );
252  return ret;
253  }
254  iter++;
255  if ((CMath::abs(cur_a - amax) <= 0.01*amax) || (iter >= max_iter))
256  {
257  line_search_res ls_res;
258  ls_res.a = cur_a;
259  ls_res.fval = cur_fval;
260  ls_res.reg = cur_reg;
261  ls_res.solution = x;
262  ls_res.gradient = cur_subgrad;
263  ret.push_back(ls_res);
264  return ret;
265  }
266 
267  prev_a = cur_a;
268  prev_fval = cur_fval;
269  prev_lgrad = cur_lgrad;
270 
271  cur_a = (cur_a + amax)*0.5;
272  }
273 }
274 
275 inline void update_H(BmrmStatistics& ncbm,
276  bmrm_ll* head,
277  bmrm_ll* tail,
279  SGVector<float64_t>& diag_H,
280  float64_t lambda,
281  uint32_t maxCP,
282  int32_t w_dim)
283 {
284  float64_t* a_2 = get_cutting_plane(tail);
285  bmrm_ll* cp_ptr=head;
286 
287  for (uint32_t i=0; i < ncbm.nCP; ++i)
288  {
289  float64_t* a_1 = get_cutting_plane(cp_ptr);
290  cp_ptr=cp_ptr->next;
291 
292  float64_t dot_val = CMath::dot(a_2, a_1, w_dim);
293 
294  H.matrix[LIBBMRM_INDEX(ncbm.nCP, i, maxCP)]
295  = H.matrix[LIBBMRM_INDEX(i, ncbm.nCP, maxCP)]
296  = dot_val/lambda;
297  }
298 
299  /* set the diagonal element, i.e. subgrad_i*subgrad_i' */
300  float64_t dot_val = CMath::dot(a_2, a_2, w_dim);
301  H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)]=dot_val/lambda;
302 
303  diag_H[ncbm.nCP]=H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)];
304 
305  ncbm.nCP++;
306 }
307 
308 
310  CDualLibQPBMSOSVM *machine,
311  float64_t *w,
312  float64_t TolRel,
313  float64_t TolAbs,
314  float64_t _lambda,
315  uint32_t _BufSize,
316  bool cleanICP,
317  uint32_t cleanAfter,
318  bool is_convex,
319  bool line_search,
320  bool verbose
321  )
322 {
323  BmrmStatistics ncbm;
324  libqp_state_T qp_exitflag={0, 0, 0, 0};
325 
326  CStructuredModel* model = machine->get_model();
327  int32_t w_dim = model->get_dim();
328 
329  maxCPs = _BufSize;
330  BufSize = _BufSize;
331 
332  ncbm.nCP=0;
333  ncbm.nIter=0;
334  ncbm.exitflag=0;
335 
336  /* variables for timing the algorithm*/
337  CTime ttime;
338  float64_t tstart, tstop;
339  tstart=ttime.cur_time_diff(false);
340 
341  /* matrix of subgradiends */
342  SGMatrix<float64_t> A(w_dim, maxCPs);
343 
344  /* bias vector */
345  SGVector<float64_t> bias(maxCPs);
346  bias.zero();
347 
348  /* Matrix for storing H = A*A' */
349  SGMatrix<float64_t> H(maxCPs,maxCPs);
350  HMatrix = H.matrix;
351 
352  /* diag_H */
353  SGVector<float64_t> diag_H(maxCPs);
354  diag_H.zero();
355 
356  /* x the solution vector of the dual problem:
357  * 1/lambda x*H*x' - x*bias
358  */
359  SGVector<float64_t> x(maxCPs);
360  x.zero();
361 
362  /* for sum_{i in I_k} x[i] <= b[k] for all k such that S[k] == 1 */
363  float64_t b = 1.0;
364  uint8_t S = 1;
365  SGVector<uint32_t> I(maxCPs);
366  I.set_const(1);
367 
368  /* libqp paramteres */
369  uint32_t QPSolverMaxIter = 0xFFFFFFFF;
370  float64_t QPSolverTolRel = 1E-9;
371 
372  /* variables for maintaining inactive planes */
373  SGVector<bool> map(maxCPs);
374  map.set_const(true);
375  ICP_stats icp_stats;
376  icp_stats.maxCPs = maxCPs;
377  icp_stats.ICPcounter = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t);
378  icp_stats.ICPs = (float64_t**) LIBBMRM_CALLOC(maxCPs, float64_t*);
379  icp_stats.ACPs = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t);
380  icp_stats.H_buff = (float64_t*) LIBBMRM_CALLOC(maxCPs*maxCPs,float64_t);
381  if
382  (
383  icp_stats.ICPcounter == NULL || icp_stats.ICPs == NULL
384  || icp_stats.ACPs == NULL || icp_stats.H_buff == NULL
385  )
386  {
387  ncbm.exitflag=-2;
388  LIBBMRM_FREE(icp_stats.ICPcounter);
389  LIBBMRM_FREE(icp_stats.ICPs);
390  LIBBMRM_FREE(icp_stats.ACPs);
391  LIBBMRM_FREE(icp_stats.H_buff);
392  return ncbm;
393  }
394 
395  /* best */
396  float64_t best_Fp = CMath::INFTY;
397  float64_t best_risk = CMath::INFTY;
398  SGVector<float64_t> best_w(w_dim);
399  best_w.zero();
400  SGVector<float64_t> best_subgrad(w_dim);
401  best_subgrad.zero();
402 
403  /* initial solution */
404  SGVector<float64_t> cur_subgrad(w_dim);
405  SGVector<float64_t> cur_w(w_dim);
406  memcpy(cur_w.vector, w, sizeof(float64_t)*w_dim);
407 
408  float64_t cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
409  bias[0] = -cur_risk;
410  best_Fp = 0.5*_lambda*CMath::dot(cur_w.vector, cur_w.vector, cur_w.vlen) + cur_risk;
411  best_risk = cur_risk;
412  memcpy(best_w.vector, cur_w.vector, sizeof(float64_t)*w_dim);
413  memcpy(best_subgrad.vector, cur_subgrad.vector, sizeof(float64_t)*w_dim);
414 
415  /* create a double-linked list over the A the subgrad matrix */
416  bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_list=NULL;
417  cp_list = (bmrm_ll*) SG_CALLOC(bmrm_ll, 1);
418  if (cp_list==NULL)
419  {
420  ncbm.exitflag=-2;
421  return ncbm;
422  }
423  /* save the subgradient */
424  memcpy(A.matrix, cur_subgrad.vector, sizeof(float64_t)*w_dim);
425  map[0] = false;
426  cp_list->address=&A[0];
427  cp_list->idx=0;
428  cp_list->prev=NULL;
429  cp_list->next=NULL;
430  CPList_head=cp_list;
431  CPList_tail=cp_list;
432 
433  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
434  tstop=ttime.cur_time_diff(false);
435  if (verbose)
436  SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n",
437  ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, cur_risk);
438 
439  float64_t astar = 0.01;
440 
441  SG_SPRINT("clean icps: %d\n", cleanICP)
442  while (ncbm.exitflag==0)
443  {
444  tstart=ttime.cur_time_diff(false);
445  ncbm.nIter++;
446 
447  //diag_H.display_vector();
448  //bias.display_vector();
449 
450  /* solve the dual of the problem, namely:
451  *
452  */
453  qp_exitflag =
454  libqp_splx_solver(&get_col, diag_H.vector, bias.vector, &b, I.vector, &S, x.vector,
455  ncbm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
456 
457  ncbm.Fd = -qp_exitflag.QP;
458 
459  ncbm.qp_exitflag=qp_exitflag.exitflag;
460 
461  /* Update ICPcounter (add one to unused and reset used)
462  * + compute number of active CPs */
463  ncbm.nzA=0;
464 
465  for (uint32_t i=0; i < ncbm.nCP; ++i)
466  {
467  if (x[i] > epsilon)
468  {
469  /* cp was active => reset counter */
470  ++ncbm.nzA;
471  icp_stats.ICPcounter[i]=0;
472  }
473  else
474  {
475  icp_stats.ICPcounter[i]++;
476  }
477  }
478 
479  /* Inactive Cutting Planes (ICP) removal */
480  if (cleanICP)
481  {
482  clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
483  H.matrix, diag_H.vector, x.vector,
484  map.vector, cleanAfter, bias.vector, I.vector);
485  }
486 
487  /* calculate the new w
488  * w[i] = -1/lambda*A[i]*x[i]
489  */
490  cur_w.zero();
491  cp_ptr=CPList_head;
492  for (uint32_t i=0; i < ncbm.nCP; ++i)
493  {
494  float64_t* A_1 = get_cutting_plane(cp_ptr);
495  cp_ptr=cp_ptr->next;
496  SGVector<float64_t>::vec1_plus_scalar_times_vec2(cur_w.vector, -x[i]/_lambda, A_1, w_dim);
497  }
498 
499  bool calc_gap = false;
500  if (calc_gap)
501  {
502  SGVector<float64_t> scores(ncbm.nCP);
503  cp_ptr=CPList_head;
504 
505  for (uint32_t i=0; i < ncbm.nCP; ++i)
506  {
507  float64_t* a_1 = get_cutting_plane(cp_ptr);
508  cp_ptr = cp_ptr->next;
509  scores[i] = CMath::dot(cur_w.vector, a_1, w_dim);
510  }
511  scores.vec1_plus_scalar_times_vec2(scores.vector, -1.0, bias.vector, scores.vlen);
512 
513  float64_t w_norm = CMath::dot(cur_w.vector, cur_w.vector, cur_w.vlen);
514  float64_t PO = 0.5*_lambda*w_norm + CMath::max(scores.vector, scores.vlen);
515  float64_t QP_gap = PO - ncbm.Fd;
516 
517  SG_SPRINT("%4d: primal:%f dual:%f QP_gap:%f\n", ncbm.nIter, PO, ncbm.Fd, QP_gap)
518  }
519 
520  /* Stopping conditions */
521  if ((best_Fp - ncbm.Fd) <= TolRel*LIBBMRM_ABS(best_Fp))
522  ncbm.exitflag = 1;
523 
524  if ((best_Fp - ncbm.Fd) <= TolAbs)
525  ncbm.exitflag = 2;
526 
527  if (ncbm.nCP >= maxCPs)
528  ncbm.exitflag = -1;
529 
530  // next CP would exceed BufSize/maxCPs
531  if (ncbm.nCP+3 >= maxCPs)
532  ncbm.exitflag=-1;
533 
534  tstop=ttime.cur_time_diff(false);
535 
536  /* Verbose output */
537  if (verbose)
538  SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d, best_fp=%f, gap=%f\n",
539  ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, ncbm.Fp-ncbm.Fd,
540  (ncbm.Fp-ncbm.Fd)/ncbm.Fp, cur_risk, ncbm.nCP, ncbm.nzA, qp_exitflag.exitflag, best_Fp, (best_Fp-ncbm.Fd)/best_Fp);
541 
542  if (ncbm.exitflag!=0)
543  break;
544 
545  std::vector<line_search_res> wbest_candidates;
546  if (!line_search)
547  {
548  cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
549 
550  add_cutting_plane(&CPList_tail, map, A.matrix,
551  find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim);
552 
553  bias[ncbm.nCP] = CMath::dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk;
554 
555  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
556 
557  // add as a new wbest candidate
558  line_search_res ls;
559  ls.fval = cur_risk+0.5*_lambda*CMath::dot(cur_w.vector, cur_w.vector, cur_w.vlen);
560  ls.solution = cur_w;
561  ls.gradient = cur_subgrad;
562 
563  wbest_candidates.push_back(ls);
564  }
565  else
566  {
567  tstart=ttime.cur_time_diff(false);
568  /* do line searching */
569  SGVector<float64_t> search_dir(w_dim);
570  search_dir.add(search_dir.vector, 1.0, cur_w.vector, -1.0, best_w.vector, w_dim);
571 
572  float64_t norm_dir = search_dir.twonorm(search_dir.vector, search_dir.vlen);
573  float64_t astart;
574  uint32_t cp_min_approx = 0;
575  if (cp_min_approx || (ncbm.nIter == 1))
576  {
577  astart = 1.0;
578  }
579  else
580  {
581  astart = CMath::min(astar/norm_dir,1.0);
582  if (astart == 0)
583  astart = 1.0;
584  }
585 
586  /* line search */
587  std::vector<line_search_res> ls_res
588  = line_search_with_strong_wolfe(machine, _lambda, best_Fp, best_w, best_subgrad, search_dir, astart);
589 
590  if (ls_res[0].fval != ls_res[1].fval)
591  {
592  ls_res[0].gradient.vec1_plus_scalar_times_vec2(ls_res[0].gradient.vector, -_lambda, ls_res[0].solution.vector, w_dim);
593 
594  add_cutting_plane(&CPList_tail, map, A.matrix,
595  find_free_idx(map, maxCPs), ls_res[0].gradient, w_dim);
596 
597  bias[ncbm.nCP]
598  = CMath::dot(ls_res[0].solution.vector, ls_res[0].gradient, w_dim)
599  - (ls_res[0].fval - ls_res[0].reg);
600 
601  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
602 
603  wbest_candidates.push_back(ls_res[0]);
604  }
605 
606  ls_res[1].gradient.vec1_plus_scalar_times_vec2(ls_res[1].gradient.vector, -_lambda, ls_res[1].solution.vector, w_dim);
607 
608  add_cutting_plane(&CPList_tail, map, A.matrix,
609  find_free_idx(map, maxCPs), ls_res[1].gradient.vector, w_dim);
610 
611  bias[ncbm.nCP]
612  = CMath::dot(ls_res[1].solution.vector, ls_res[1].gradient.vector, w_dim)
613  - (ls_res[1].fval - ls_res[1].reg);
614 
615  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
616 
617  wbest_candidates.push_back(ls_res[1]);
618 
619  if ((best_Fp <= ls_res[1].fval) && (astart != 1))
620  {
621  cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
622 
623  add_cutting_plane(&CPList_tail, map, A.matrix,
624  find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim);
625 
626  bias[ncbm.nCP]
627  = CMath::dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk;
628 
629  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
630 
631  /* add as a new wbest candidate */
632  line_search_res ls;
633  ls.fval = cur_risk+0.5*_lambda*CMath::dot(cur_w.vector, cur_w.vector, cur_w.vlen);
634  ls.solution = cur_w;
635  ls.gradient = cur_subgrad;
636  SG_SPRINT("%lf\n", ls.fval)
637 
638  wbest_candidates.push_back(ls);
639  }
640 
641  astar = ls_res[1].a * norm_dir;
642 
643  tstop=ttime.cur_time_diff(false);
644  SG_SPRINT("\t\tline search time: %.5lf\n", tstop-tstart)
645  }
646 
647  /* search for the best w among the new candidates */
648  if (verbose)
649  SG_SPRINT("\t searching for the best Fp:\n")
650  for (size_t i = 0; i < wbest_candidates.size(); i++)
651  {
652  if (verbose)
653  SG_SPRINT("\t\t %d fcurrent: %.16lf\n", i, wbest_candidates[i].fval)
654 
655  if (wbest_candidates[i].fval < best_Fp)
656  {
657  best_Fp = wbest_candidates[i].fval;
658  best_risk = wbest_candidates[i].fval - wbest_candidates[i].reg;
659  memcpy(best_w, wbest_candidates[i].solution.vector, sizeof(float64_t)*w_dim);
660  memcpy(best_subgrad.vector, wbest_candidates[i].gradient.vector, sizeof(float64_t)*w_dim);
661 
662  ncbm.Fp = best_Fp;
663 
664  if (verbose)
665  SG_SPRINT("\t\t new best norm: %f\n",
666  best_w.twonorm(best_w.vector, w_dim));
667  }
668 
669  if (!is_convex)
670  {
671  index_t cp_idx = ncbm.nCP-(wbest_candidates.size()-i);
672 
673  /* conflict */
674  float64_t score
675  = CMath::dot(best_w.vector,
676  wbest_candidates[i].gradient.vector, w_dim)
677  + (-1.0*bias[cp_idx]);
678  if (score > best_risk)
679  {
680  float64_t U
681  = best_risk
682  - CMath::dot(best_w.vector,
683  wbest_candidates[i].gradient.vector, w_dim);
684 
685  float64_t L
686  = best_Fp - wbest_candidates[i].reg
687  - CMath::dot(wbest_candidates[i].solution.vector,
688  wbest_candidates[i].gradient.vector, w_dim);
689 
690  if (verbose)
691  SG_SPRINT("CONFLICT Rbest=%.6lg score=%g L=%.6lg U=%.6lg\n", best_risk, score, L, U)
692  if (L <= U)
693  {
694  if (verbose)
695  SG_SPRINT("%.6lf < %.6lf => changing bias[%d]=%g\n", L, U, cp_idx, L)
696  bias[cp_idx]= -L;
697  }
698  else
699  {
700  wbest_candidates[i].gradient.zero();
701  SGVector<float64_t>::vec1_plus_scalar_times_vec2(wbest_candidates[i].gradient.vector, -_lambda, best_w.vector, w_dim);
702 
703  cp_ptr = CPList_tail;
704  for (size_t j = wbest_candidates.size()-1; i < j; --j)
705  {
706  cp_ptr = cp_ptr->prev;
707  SG_SPRINT("tail - %d\n (%d)", j, i)
708  }
709 
710  float64_t* cp = get_cutting_plane(cp_ptr);
711  LIBBMRM_MEMCPY(cp, wbest_candidates[i].gradient.vector, w_dim*sizeof(float64_t));
712 
713  /* update the corresponding column and row in H */
714  cp_ptr = CPList_head;
715  for (uint32_t j = 0; j < ncbm.nCP-1; ++j)
716  {
717  float64_t* a = get_cutting_plane(cp_ptr);
718  cp_ptr = cp_ptr->next;
719  float64_t dot_val
720  = CMath::dot(a, wbest_candidates[i].gradient.vector, w_dim);
721 
722  H.matrix[LIBBMRM_INDEX(cp_idx, j, maxCPs)]
723  = H.matrix[LIBBMRM_INDEX(j, cp_idx, maxCPs)]
724  = dot_val/_lambda;
725  }
726 
727  diag_H[LIBBMRM_INDEX(cp_idx, cp_idx, maxCPs)]
728  = CMath::dot(wbest_candidates[i].gradient.vector,
729  wbest_candidates[i].gradient.vector, w_dim);
730 
731 
732  bias[cp_idx]
733  = best_Fp - wbest_candidates[i].reg
734  - CMath::dot(wbest_candidates[i].solution.vector,
735  wbest_candidates[i].gradient.vector, w_dim);
736 
737  if (verbose)
738  SG_SPRINT("solved by changing nCP=%d bias:%g (%g)\n", cp_idx, bias[cp_idx], L)
739  }
740  }
741  }
742  }
743 
744  /* Inactive Cutting Planes (ICP) removal
745  if (cleanICP)
746  {
747  clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
748  H.matrix, diag_H.vector, x.vector,
749  map.vector, cleanAfter, bias.vector, I.vector);
750  }
751  */
752  }
753 
754  memcpy(w, best_w.vector, sizeof(float64_t)*w_dim);
755 
756  /* free ICP_stats variables */
757  LIBBMRM_FREE(icp_stats.ICPcounter);
758  LIBBMRM_FREE(icp_stats.ICPs);
759  LIBBMRM_FREE(icp_stats.ACPs);
760  LIBBMRM_FREE(icp_stats.H_buff);
761 
762  cp_ptr=CPList_head;
763  while(cp_ptr!=NULL)
764  {
765  bmrm_ll * cp_ptr_this=cp_ptr;
766  cp_ptr=cp_ptr->next;
767  LIBBMRM_FREE(cp_ptr_this);
768  cp_ptr_this=NULL;
769  }
770 
771  SG_UNREF(model);
772 
773  return ncbm;
774 }
775 
776 } /* namespace shogun */
#define LIBBMRM_CALLOC(x, y)
Definition: libbmrm.h:24
static T twonorm(const T *x, int32_t len)
|| x ||_2
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:47
static float64_t * H
Definition: libbmrm.cpp:27
static const double * get_col(uint32_t j)
float64_t * H_buff
Definition: libbmrm.h:65
Class DualLibQPBMSOSVM that uses Bundle Methods for Regularized Risk Minimization algorithms for stru...
float64_t * get_cutting_plane(bmrm_ll *ptr)
Definition: libbmrm.h:120
uint32_t idx
Definition: libbmrm.h:46
int32_t index_t
Definition: common.h:62
static const float64_t INFTY
infinity
Definition: Math.h:2048
void update_H(BmrmStatistics &ncbm, bmrm_ll *head, bmrm_ll *tail, SGMatrix< float64_t > &H, SGVector< float64_t > &diag_H, float64_t lambda, uint32_t maxCP, int32_t w_dim)
Definition: libncbm.cpp:275
bmrm_ll * next
Definition: libbmrm.h:42
uint32_t * ACPs
Definition: libbmrm.h:62
virtual int32_t get_dim() const =0
uint32_t find_free_idx(bool *map, uint32_t size)
Definition: libbmrm.h:128
static line_search_res zoom(CDualLibQPBMSOSVM *machine, float64_t lambda, float64_t a_lo, float64_t a_hi, float64_t initial_fval, SGVector< float64_t > &initial_solution, SGVector< float64_t > &search_dir, float64_t wolfe_c1, float64_t wolfe_c2, float64_t init_lgrad, float64_t f_lo, float64_t g_lo, float64_t f_hi, float64_t g_hi)
Definition: libncbm.cpp:43
SGVector< float64_t > solution
Definition: libncbm.cpp:38
static const float64_t epsilon
Definition: libbmrm.cpp:25
float64_t cur_time_diff(bool verbose=false)
Definition: Time.cpp:68
index_t vlen
Definition: SGVector.h:494
#define SG_SPRINT(...)
Definition: SGIO.h:180
#define LIBBMRM_FREE(x)
Definition: libbmrm.h:26
#define LIBBMRM_ABS(A)
Definition: libbmrm.h:30
static const uint32_t QPSolverMaxIter
Definition: libbmrm.cpp:24
bmrm_ll * prev
Definition: libbmrm.h:40
double float64_t
Definition: common.h:50
static float64_t * HMatrix
Definition: libncbm.cpp:23
virtual float64_t risk(float64_t *subgrad, float64_t *W, TMultipleCPinfo *info=0, EStructRiskType rtype=N_SLACK_MARGIN_RESCALING)
static T max(T a, T b)
Definition: Math.h:168
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
Definition: Math.h:627
#define LIBBMRM_MEMCPY(x, y, z)
Definition: libbmrm.h:27
float64_t ** ICPs
Definition: libbmrm.h:59
static void vec1_plus_scalar_times_vec2(T *vec1, const T scalar, const T *vec2, int32_t n)
x=x+alpha*y
Definition: SGVector.cpp:531
std::vector< line_search_res > line_search_with_strong_wolfe(CDualLibQPBMSOSVM *machine, float64_t lambda, float64_t initial_val, SGVector< float64_t > &initial_solution, SGVector< float64_t > &initial_grad, SGVector< float64_t > &search_dir, float64_t astart, float64_t amax=1.1, float64_t wolfe_c1=1E-4, float64_t wolfe_c2=0.9, float64_t max_iter=5)
Definition: libncbm.cpp:160
uint32_t maxCPs
Definition: libbmrm.h:53
Class CStructuredModel that represents the application specific model and contains most of the applic...
static uint32_t maxCPs
Definition: libncbm.cpp:24
#define LIBBMRM_INDEX(ROW, COL, NUM_ROWS)
Definition: libbmrm.h:29
#define SG_UNREF(x)
Definition: SGObject.h:52
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
#define IGNORE_IN_CLASSLIST
Definition: CPLEXSVM.h:24
void add_cutting_plane(bmrm_ll **tail, bool *map, float64_t *A, uint32_t free_idx, float64_t *cp_data, uint32_t dim)
Definition: libbmrm.cpp:30
uint32_t * ICPcounter
Definition: libbmrm.h:56
static T min(T a, T b)
Definition: Math.h:157
float64_t * address
Definition: libbmrm.h:44
BmrmStatistics svm_ncbm_solver(CDualLibQPBMSOSVM *machine, float64_t *w, float64_t TolRel, float64_t TolAbs, float64_t _lambda, uint32_t _BufSize, bool cleanICP, uint32_t cleanAfter, bool is_convex, bool line_search, bool verbose)
Definition: libncbm.cpp:309
CStructuredModel * get_model() const
SGVector< float64_t > gradient
Definition: libncbm.cpp:39
void resize_vector(int32_t n)
Definition: SGVector.cpp:259
static float32_t sqrt(float32_t x)
Definition: Math.h:459
#define LIBBMRM_PLUS_INF
Definition: libbmrm.h:23
void clean_icp(ICP_stats *icp_stats, BmrmStatistics &bmrm, bmrm_ll **head, bmrm_ll **tail, float64_t *&Hmat, float64_t *&diag_H, float64_t *&beta, bool *&map, uint32_t cleanAfter, float64_t *&b, uint32_t *&I, uint32_t cp_models)
Definition: libbmrm.cpp:93
void set_const(T const_elem)
Definition: SGVector.cpp:152
void add(const SGVector< T > x)
Definition: SGVector.cpp:281
static T abs(T a)
Definition: Math.h:179
uint32_t BufSize
Definition: libbmrm.cpp:28

SHOGUN 机器学习工具包 - 项目文档