14 #include <shogun/lib/external/libqp.h>
70 float64_t d1 = g_lo+g_hi - 3*(f_lo-f_hi)/(a_lo-a_hi);
72 float64_t a_j = a_hi -(a_hi-a_lo)*(g_hi+d2-d1)/(g_hi-g_lo+2*d2);
76 if ((a_j < a_lo) || (a_j > a_hi))
78 a_j = 0.5*(a_lo+a_hi);
83 if ((a_j > a_lo) || (a_j < a_hi))
85 a_j = 0.5*(a_lo+a_hi);
89 cur_solution.
add(cur_solution.
vector, 1.0,
90 initial_solution.
vector, a_j,
95 = 0.5*lambda*cur_solution.
dot(cur_solution.
vector,
103 (cur_fval > (initial_fval+wolfe_c1*a_j*init_lgrad))
117 if (
CMath::abs(cur_lgrad) < -wolfe_c2*init_lgrad)
120 ls_res.
fval = cur_fval;
121 ls_res.
reg = cur_reg;
128 if (cur_lgrad*(a_hi - a_lo) >= 0)
147 ls_res.
fval = cur_fval;
148 ls_res.
reg = cur_reg;
192 std::vector<line_search_res> ret;
207 initial_step.
fval = cur_fval;
208 initial_step.
reg = cur_reg;
209 initial_step.
gradient = cur_subgrad;
211 ret.push_back(initial_step);
219 (cur_fval > initial_val+wolfe_c1*cur_a*initial_lgrad)
221 (cur_fval >= prev_fval && iter > 0)
225 zoom(machine, lambda, prev_a, cur_a, initial_val,
226 initial_solution, search_dir, wolfe_c1, wolfe_c2,
227 initial_lgrad, prev_fval, prev_lgrad, cur_fval, cur_lgrad)
232 if (
CMath::abs(cur_lgrad) <= -wolfe_c2*initial_lgrad)
236 ls_res.
fval = cur_fval;
237 ls_res.
reg = cur_reg;
240 ret.push_back(ls_res);
247 zoom(machine, lambda, cur_a, prev_a, initial_val,
248 initial_solution, search_dir, wolfe_c1, wolfe_c2,
249 initial_lgrad, cur_fval, cur_lgrad, prev_fval, prev_lgrad)
254 if ((
CMath::abs(cur_a - amax) <= 0.01*amax) || (iter >= max_iter))
258 ls_res.
fval = cur_fval;
259 ls_res.
reg = cur_reg;
262 ret.push_back(ls_res);
267 prev_fval = cur_fval;
268 prev_lgrad = cur_lgrad;
270 cur_a = (cur_a + amax)*0.5;
286 for (uint32_t i=0; i < ncbm.
nCP; ++i)
323 libqp_state_T qp_exitflag={0, 0, 0, 0};
380 || icp_stats.
ACPs == NULL || icp_stats.
H_buff == NULL
407 best_risk = cur_risk;
412 bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_list=NULL;
429 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
432 SG_SPRINT(
"%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n",
433 ncbm.
nIter, tstop-tstart, ncbm.
Fp, ncbm.
Fd, cur_risk);
453 ncbm.
Fd = -qp_exitflag.QP;
461 for (uint32_t i=0; i < ncbm.
nCP; ++i)
478 clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
488 for (uint32_t i=0; i < ncbm.
nCP; ++i)
495 bool calc_gap =
false;
501 for (uint32_t i=0; i < ncbm.
nCP; ++i)
504 cp_ptr = cp_ptr->
next;
505 scores[i] = cur_w.
dot(cur_w.
vector, a_1, w_dim);
513 SG_SPRINT(
"%4d: primal:%f dual:%f QP_gap:%f\n", ncbm.
nIter, PO, ncbm.
Fd, QP_gap)
520 if ((best_Fp - ncbm.
Fd) <= TolAbs)
523 if (ncbm.
nCP >= maxCPs)
530 SG_SPRINT(
"%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d, best_fp=%f, gap=%f\n",
532 (ncbm.
Fp-ncbm.
Fd)/ncbm.
Fp, cur_risk, ncbm.
nCP, ncbm.
nzA, qp_exitflag.exitflag, best_Fp, (best_Fp-ncbm.
Fd)/best_Fp);
534 std::vector<line_search_res> wbest_candidates;
544 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
552 wbest_candidates.push_back(ls);
563 uint32_t cp_min_approx = 0;
564 if (cp_min_approx || (ncbm.
nIter == 1))
576 std::vector<line_search_res> ls_res
579 if (ls_res[0].fval != ls_res[1].fval)
581 ls_res[0].gradient.vec1_plus_scalar_times_vec2(ls_res[0].gradient.vector, -_lambda, ls_res[0].solution.vector, w_dim);
588 - (ls_res[0].fval - ls_res[0].reg);
590 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
592 wbest_candidates.push_back(ls_res[0]);
595 ls_res[1].gradient.vec1_plus_scalar_times_vec2(ls_res[1].gradient.vector, -_lambda, ls_res[1].solution.vector, w_dim);
598 find_free_idx(map, maxCPs), ls_res[1].gradient.vector, w_dim);
601 = ls_res[1].solution.
dot(ls_res[1].solution.vector, ls_res[1].gradient.vector, w_dim)
602 - (ls_res[1].fval - ls_res[1].reg);
604 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
606 wbest_candidates.push_back(ls_res[1]);
608 if ((best_Fp <= ls_res[1].fval) && (astart != 1))
618 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
627 wbest_candidates.push_back(ls);
630 astar = ls_res[1].a * norm_dir;
633 SG_SPRINT(
"\t\tline search time: %.5lf\n", tstop-tstart)
638 SG_SPRINT(
"\t searching for the best Fp:\n")
639 for (
size_t i = 0; i < wbest_candidates.size(); i++)
642 SG_SPRINT(
"\t\t %d fcurrent: %.16lf\n", i, wbest_candidates[i].fval)
644 if (wbest_candidates[i].fval < best_Fp)
646 best_Fp = wbest_candidates[i].fval;
647 best_risk = wbest_candidates[i].fval - wbest_candidates[i].reg;
648 memcpy(best_w, wbest_candidates[i].solution.
vector,
sizeof(
float64_t)*w_dim);
649 memcpy(best_subgrad.
vector, wbest_candidates[i].gradient.vector,
sizeof(
float64_t)*w_dim);
660 index_t cp_idx = ncbm.
nCP-(wbest_candidates.size()-i);
665 wbest_candidates[i].gradient.vector, w_dim)
666 + (-1.0*bias[cp_idx]);
667 if (score > best_risk)
672 wbest_candidates[i].gradient.vector, w_dim);
675 = best_Fp - wbest_candidates[i].reg
677 wbest_candidates[i].gradient.vector, w_dim);
680 SG_SPRINT(
"CONFLICT Rbest=%.6lg score=%g L=%.6lg U=%.6lg\n", best_risk, score, L, U)
684 SG_SPRINT(
"%.6lf < %.6lf => changing bias[%d]=%g\n", L, U, cp_idx, L)
689 wbest_candidates[i].gradient.
zero();
692 cp_ptr = CPList_tail;
693 for (
size_t j = wbest_candidates.size()-1; i < j; --j)
695 cp_ptr = cp_ptr->
prev;
703 cp_ptr = CPList_head;
704 for (uint32_t j = 0; j < ncbm.
nCP-1; ++j)
707 cp_ptr = cp_ptr->
next;
718 wbest_candidates[i].gradient.vector, w_dim);
722 = best_Fp - wbest_candidates[i].reg
724 wbest_candidates[i].gradient.vector, w_dim);
727 SG_SPRINT(
"solved by changing nCP=%d bias:%g (%g)\n", cp_idx, bias[cp_idx], L)