16 #include <shogun/lib/external/libqp.h>
25 static uint32_t maxCPs;
28 static const float64_t *get_col(uint32_t i)
30 return (&HMatrix[maxCPs*i]);
39 SGVector<float64_t> solution;
40 SGVector<float64_t> gradient;
43 inline static line_search_res zoom
45 CDualLibQPBMSOSVM *machine,
50 SGVector<float64_t>& initial_solution,
51 SGVector<float64_t>& search_dir,
61 line_search_res ls_res;
62 ls_res.solution.resize_vector(initial_solution.vlen);
63 ls_res.gradient.resize_vector(initial_solution.vlen);
65 SGVector<float64_t> cur_solution(initial_solution.vlen);
67 SGVector<float64_t> cur_grad(initial_solution.vlen);
72 float64_t d1 = g_lo+g_hi - 3*(f_lo-f_hi)/(a_lo-a_hi);
74 float64_t a_j = a_hi -(a_hi-a_lo)*(g_hi+d2-d1)/(g_hi-g_lo+2*d2);
78 if ((a_j < a_lo) || (a_j > a_hi))
80 a_j = 0.5*(a_lo+a_hi);
85 if ((a_j > a_lo) || (a_j < a_hi))
87 a_j = 0.5*(a_lo+a_hi);
91 cur_solution.add(cur_solution.vector, 1.0,
92 initial_solution.vector, a_j,
93 search_dir.vector, cur_solution.vlen);
95 float64_t cur_fval = machine->risk(cur_grad.vector, cur_solution.vector);
98 cur_solution.vector, cur_solution.vlen);
101 cur_grad.vec1_plus_scalar_times_vec2(cur_grad.vector, lambda, cur_solution.vector, cur_grad.vlen);
105 (cur_fval > (initial_fval+wolfe_c1*a_j*init_lgrad))
112 g_hi =
CMath::dot(cur_grad.vector, search_dir.vector, cur_grad.vlen);
117 =
CMath::dot(cur_grad.vector, search_dir.vector, cur_grad.vlen);
119 if (
CMath::abs(cur_lgrad) < -wolfe_c2*init_lgrad)
122 ls_res.fval = cur_fval;
123 ls_res.reg = cur_reg;
124 ls_res.gradient = cur_grad;
125 ls_res.solution = cur_solution;
130 if (cur_lgrad*(a_hi - a_lo) >= 0)
149 ls_res.fval = cur_fval;
150 ls_res.reg = cur_reg;
151 ls_res.gradient = cur_grad;
152 ls_res.solution = cur_solution;
160 inline std::vector<line_search_res> line_search_with_strong_wolfe
162 CDualLibQPBMSOSVM *machine,
165 SGVector<float64_t>& initial_solution,
166 SGVector<float64_t>& initial_grad,
167 SGVector<float64_t>& search_dir,
185 initial_grad.vec1_plus_scalar_times_vec2(initial_grad.vector, lambda, initial_solution.vector, initial_grad.vlen);
187 float64_t initial_lgrad =
CMath::dot(initial_grad.vector, search_dir.vector, initial_grad.vlen);
194 std::vector<line_search_res> ret;
197 SGVector<float64_t> x(initial_solution.vlen);
198 SGVector<float64_t> cur_subgrad(initial_solution.vlen);
200 x.add(x.vector, 1.0, initial_solution.vector, cur_a, search_dir.vector, x.vlen);
201 float64_t cur_fval = machine->risk(cur_subgrad.vector, x.vector);
205 cur_subgrad.vec1_plus_scalar_times_vec2(cur_subgrad.vector, lambda, x.vector, x.vlen);
208 line_search_res initial_step;
209 initial_step.fval = cur_fval;
210 initial_step.reg = cur_reg;
211 initial_step.gradient = cur_subgrad;
212 initial_step.solution = x;
213 ret.push_back(initial_step);
217 =
CMath::dot(cur_subgrad.vector, search_dir.vector,
221 (cur_fval > initial_val+wolfe_c1*cur_a*initial_lgrad)
223 (cur_fval >= prev_fval && iter > 0)
227 zoom(machine, lambda, prev_a, cur_a, initial_val,
228 initial_solution, search_dir, wolfe_c1, wolfe_c2,
229 initial_lgrad, prev_fval, prev_lgrad, cur_fval, cur_lgrad)
234 if (
CMath::abs(cur_lgrad) <= -wolfe_c2*initial_lgrad)
236 line_search_res ls_res;
238 ls_res.fval = cur_fval;
239 ls_res.reg = cur_reg;
241 ls_res.gradient = cur_subgrad;
242 ret.push_back(ls_res);
249 zoom(machine, lambda, cur_a, prev_a, initial_val,
250 initial_solution, search_dir, wolfe_c1, wolfe_c2,
251 initial_lgrad, cur_fval, cur_lgrad, prev_fval, prev_lgrad)
256 if ((
CMath::abs(cur_a - amax) <= 0.01*amax) || (iter >= max_iter))
258 line_search_res ls_res;
260 ls_res.fval = cur_fval;
261 ls_res.reg = cur_reg;
263 ls_res.gradient = cur_subgrad;
264 ret.push_back(ls_res);
269 prev_fval = cur_fval;
270 prev_lgrad = cur_lgrad;
272 cur_a = (cur_a + amax)*0.5;
276 inline void update_H(BmrmStatistics& ncbm,
279 SGMatrix<float64_t>& H,
280 SGVector<float64_t>& diag_H,
285 float64_t* a_2 = get_cutting_plane(tail);
286 bmrm_ll* cp_ptr=head;
288 for (uint32_t i=0; i < ncbm.nCP; ++i)
290 float64_t* a_1 = get_cutting_plane(cp_ptr);
295 H.matrix[LIBBMRM_INDEX(ncbm.nCP, i, maxCP)]
296 = H.matrix[LIBBMRM_INDEX(i, ncbm.nCP, maxCP)]
302 H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)]=dot_val/lambda;
304 diag_H[ncbm.nCP]=H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)];
310 BmrmStatistics svm_ncbm_solver(
311 CDualLibQPBMSOSVM *machine,
325 libqp_state_T qp_exitflag={0, 0, 0, 0};
327 CStructuredModel* model = machine->get_model();
328 int32_t w_dim = model->get_dim();
340 tstart=ttime.cur_time_diff(
false);
343 SGMatrix<float64_t> A(w_dim, maxCPs);
346 SGVector<float64_t> bias(maxCPs);
350 SGMatrix<float64_t> H(maxCPs,maxCPs);
354 SGVector<float64_t> diag_H(maxCPs);
360 SGVector<float64_t> x(maxCPs);
366 SGVector<uint32_t> I(maxCPs);
370 uint32_t QPSolverMaxIter = 0xFFFFFFFF;
374 SGVector<bool> map(maxCPs);
377 icp_stats.maxCPs = maxCPs;
378 icp_stats.ICPcounter = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t);
380 icp_stats.ACPs = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t);
384 icp_stats.ICPcounter == NULL || icp_stats.ICPs == NULL
385 || icp_stats.ACPs == NULL || icp_stats.H_buff == NULL
389 LIBBMRM_FREE(icp_stats.ICPcounter);
390 LIBBMRM_FREE(icp_stats.ICPs);
391 LIBBMRM_FREE(icp_stats.ACPs);
392 LIBBMRM_FREE(icp_stats.H_buff);
399 SGVector<float64_t> best_w(w_dim);
401 SGVector<float64_t> best_subgrad(w_dim);
405 SGVector<float64_t> cur_subgrad(w_dim);
406 SGVector<float64_t> cur_w(w_dim);
407 memcpy(cur_w.vector, w,
sizeof(
float64_t)*w_dim);
409 float64_t cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
411 best_Fp = 0.5*_lambda*
CMath::dot(cur_w.vector, cur_w.vector, cur_w.vlen) + cur_risk;
412 best_risk = cur_risk;
413 memcpy(best_w.vector, cur_w.vector,
sizeof(
float64_t)*w_dim);
414 memcpy(best_subgrad.vector, cur_subgrad.vector,
sizeof(
float64_t)*w_dim);
417 bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_list=NULL;
418 cp_list = (bmrm_ll*) SG_CALLOC(bmrm_ll, 1);
425 memcpy(A.matrix, cur_subgrad.vector,
sizeof(
float64_t)*w_dim);
427 cp_list->address=&A[0];
434 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
435 tstop=ttime.cur_time_diff(
false);
437 SG_SPRINT(
"%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n",
438 ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, cur_risk);
443 while (ncbm.exitflag==0)
445 tstart=ttime.cur_time_diff(
false);
455 libqp_splx_solver(&get_col, diag_H.vector, bias.vector, &b, I.vector, &S, x.vector,
456 ncbm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
458 ncbm.Fd = -qp_exitflag.QP;
460 ncbm.qp_exitflag=qp_exitflag.exitflag;
466 for (uint32_t i=0; i < ncbm.nCP; ++i)
472 icp_stats.ICPcounter[i]=0;
476 icp_stats.ICPcounter[i]++;
483 clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
484 H.matrix, diag_H.vector, x.vector,
485 map.vector, cleanAfter, bias.vector, I.vector);
493 for (uint32_t i=0; i < ncbm.nCP; ++i)
495 float64_t* A_1 = get_cutting_plane(cp_ptr);
500 bool calc_gap =
false;
503 SGVector<float64_t> scores(ncbm.nCP);
506 for (uint32_t i=0; i < ncbm.nCP; ++i)
508 float64_t* a_1 = get_cutting_plane(cp_ptr);
509 cp_ptr = cp_ptr->next;
510 scores[i] =
CMath::dot(cur_w.vector, a_1, w_dim);
512 scores.vec1_plus_scalar_times_vec2(scores.vector, -1.0, bias.vector, scores.vlen);
518 SG_SPRINT(
"%4d: primal:%f dual:%f QP_gap:%f\n", ncbm.nIter, PO, ncbm.Fd, QP_gap)
522 if ((best_Fp - ncbm.Fd) <= TolRel*LIBBMRM_ABS(best_Fp))
525 if ((best_Fp - ncbm.Fd) <= TolAbs)
528 if (ncbm.nCP >= maxCPs)
532 if (ncbm.nCP+3 >= maxCPs)
535 tstop=ttime.cur_time_diff(
false);
539 SG_SPRINT(
"%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d, best_fp=%f, gap=%f\n",
540 ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, ncbm.Fp-ncbm.Fd,
541 (ncbm.Fp-ncbm.Fd)/ncbm.Fp, cur_risk, ncbm.nCP, ncbm.nzA, qp_exitflag.exitflag, best_Fp, (best_Fp-ncbm.Fd)/best_Fp);
543 if (ncbm.exitflag!=0)
546 std::vector<line_search_res> wbest_candidates;
549 cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
551 add_cutting_plane(&CPList_tail, map, A.matrix,
552 find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim);
554 bias[ncbm.nCP] =
CMath::dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk;
556 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
560 ls.fval = cur_risk+0.5*_lambda*
CMath::dot(cur_w.vector, cur_w.vector, cur_w.vlen);
562 ls.gradient = cur_subgrad;
564 wbest_candidates.push_back(ls);
568 tstart=ttime.cur_time_diff(
false);
570 SGVector<float64_t> search_dir(w_dim);
571 search_dir.add(search_dir.vector, 1.0, cur_w.vector, -1.0, best_w.vector, w_dim);
573 float64_t norm_dir = search_dir.twonorm(search_dir.vector, search_dir.vlen);
575 uint32_t cp_min_approx = 0;
576 if (cp_min_approx || (ncbm.nIter == 1))
588 std::vector<line_search_res> ls_res
589 = line_search_with_strong_wolfe(machine, _lambda, best_Fp, best_w, best_subgrad, search_dir, astart);
591 if (ls_res[0].fval != ls_res[1].fval)
593 ls_res[0].gradient.vec1_plus_scalar_times_vec2(ls_res[0].gradient.vector, -_lambda, ls_res[0].solution.vector, w_dim);
595 add_cutting_plane(&CPList_tail, map, A.matrix,
596 find_free_idx(map, maxCPs), ls_res[0].gradient, w_dim);
599 =
CMath::dot(ls_res[0].solution.vector, ls_res[0].gradient, w_dim)
600 - (ls_res[0].fval - ls_res[0].reg);
602 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
604 wbest_candidates.push_back(ls_res[0]);
607 ls_res[1].gradient.vec1_plus_scalar_times_vec2(ls_res[1].gradient.vector, -_lambda, ls_res[1].solution.vector, w_dim);
609 add_cutting_plane(&CPList_tail, map, A.matrix,
610 find_free_idx(map, maxCPs), ls_res[1].gradient.vector, w_dim);
613 =
CMath::dot(ls_res[1].solution.vector, ls_res[1].gradient.vector, w_dim)
614 - (ls_res[1].fval - ls_res[1].reg);
616 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
618 wbest_candidates.push_back(ls_res[1]);
620 if ((best_Fp <= ls_res[1].fval) && (astart != 1))
622 cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
624 add_cutting_plane(&CPList_tail, map, A.matrix,
625 find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim);
628 =
CMath::dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk;
630 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
634 ls.fval = cur_risk+0.5*_lambda*
CMath::dot(cur_w.vector, cur_w.vector, cur_w.vlen);
636 ls.gradient = cur_subgrad;
639 wbest_candidates.push_back(ls);
642 astar = ls_res[1].a * norm_dir;
644 tstop=ttime.cur_time_diff(false);
645 SG_SPRINT("\t\tline search time: %.5lf\n", tstop-tstart)
650 SG_SPRINT("\t searching for the best Fp:\n")
651 for (
size_t i = 0; i < wbest_candidates.size(); i++)
654 SG_SPRINT(
"\t\t %d fcurrent: %.16lf\n", i, wbest_candidates[i].fval)
656 if (wbest_candidates[i].fval < best_Fp)
658 best_Fp = wbest_candidates[i].fval;
659 best_risk = wbest_candidates[i].fval - wbest_candidates[i].reg;
660 memcpy(best_w, wbest_candidates[i].solution.vector,
sizeof(
float64_t)*w_dim);
661 memcpy(best_subgrad.vector, wbest_candidates[i].gradient.vector,
sizeof(
float64_t)*w_dim);
667 best_w.twonorm(best_w.vector, w_dim));
672 index_t cp_idx = ncbm.nCP-(wbest_candidates.size()-i);
677 wbest_candidates[i].gradient.vector, w_dim)
678 + (-1.0*bias[cp_idx]);
679 if (score > best_risk)
684 wbest_candidates[i].gradient.vector, w_dim);
687 = best_Fp - wbest_candidates[i].reg
688 -
CMath::dot(wbest_candidates[i].solution.vector,
689 wbest_candidates[i].gradient.vector, w_dim);
692 SG_SPRINT(
"CONFLICT Rbest=%.6lg score=%g L=%.6lg U=%.6lg\n", best_risk, score, L, U)
696 SG_SPRINT(
"%.6lf < %.6lf => changing bias[%d]=%g\n", L, U, cp_idx, L)
701 wbest_candidates[i].gradient.zero();
704 cp_ptr = CPList_tail;
705 for (
size_t j = wbest_candidates.size()-1; i < j; --j)
707 cp_ptr = cp_ptr->prev;
711 float64_t* cp = get_cutting_plane(cp_ptr);
712 LIBBMRM_MEMCPY(cp, wbest_candidates[i].gradient.vector, w_dim*
sizeof(
float64_t));
715 cp_ptr = CPList_head;
716 for (uint32_t j = 0; j < ncbm.nCP-1; ++j)
718 float64_t* a = get_cutting_plane(cp_ptr);
719 cp_ptr = cp_ptr->next;
721 =
CMath::dot(a, wbest_candidates[i].gradient.vector, w_dim);
723 H.matrix[LIBBMRM_INDEX(cp_idx, j, maxCPs)]
724 = H.matrix[LIBBMRM_INDEX(j, cp_idx, maxCPs)]
728 diag_H[LIBBMRM_INDEX(cp_idx, cp_idx, maxCPs)]
729 =
CMath::dot(wbest_candidates[i].gradient.vector,
730 wbest_candidates[i].gradient.vector, w_dim);
734 = best_Fp - wbest_candidates[i].reg
735 -
CMath::dot(wbest_candidates[i].solution.vector,
736 wbest_candidates[i].gradient.vector, w_dim);
739 SG_SPRINT(
"solved by changing nCP=%d bias:%g (%g)\n", cp_idx, bias[cp_idx], L)
755 memcpy(w, best_w.vector,
sizeof(
float64_t)*w_dim);
758 LIBBMRM_FREE(icp_stats.ICPcounter);
759 LIBBMRM_FREE(icp_stats.ICPs);
760 LIBBMRM_FREE(icp_stats.ACPs);
761 LIBBMRM_FREE(icp_stats.H_buff);
766 bmrm_ll * cp_ptr_this=cp_ptr;
768 LIBBMRM_FREE(cp_ptr_this);
778 #endif //USE_GPL_SHOGUN
static const float64_t INFTY
infinity
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
static void vec1_plus_scalar_times_vec2(T *vec1, const T scalar, const T *vec2, int32_t n)
x=x+alpha*y
all of classes and functions are contained in the shogun namespace
#define IGNORE_IN_CLASSLIST
static float32_t sqrt(float32_t x)