83 using namespace shogun;
85 #ifndef DOXYGEN_SHOULD_SKIP_THIS
88 #define y_in(i) y[index_in[(i)]]
89 #define y_out(i) y[index_out[(i)]]
90 #define alpha_in(i) alpha[index_in[(i)]]
91 #define alpha_out(i) alpha[index_out[(i)]]
92 #define minfty (-1.0e+30) // minus infinity
96 #define ThRand (Randnext = Randnext * 1103515245L + 12345L)
97 #define ThRandPos ((Randnext = Randnext * 1103515245L + 12345L) & 0x7fffffff)
102 void quick_si (int32_t a[], int32_t k);
103 void quick_s3 (int32_t a[], int32_t k, int32_t ia[]);
104 void quick_s2 (
float64_t a[], int32_t k, int32_t ia[]);
113 sCache (sKernel* sk, int32_t Mbyte, int32_t ell);
116 cachetype *FillRow (int32_t row, int32_t IsC = 0);
117 cachetype *GetRow (int32_t row);
119 int32_t DivideMP (int32_t *out, int32_t *
in, int32_t n);
122 void Iteration() { nit++; }
128 cache_entry *pt = first_free->next;
130 for (us = 0; pt != first_free; us++, pt = pt->next);
142 int32_t last_access_it;
143 cache_entry *prev, *next;
152 cache_entry *first_free;
153 cache_entry **pindmw;
156 cachetype *FindFree(int32_t row, int32_t IsC);
163 sCache::sCache(sKernel* sk, int32_t Mbyte, int32_t _ell) : KER(sk), ell(_ell)
168 maxmw = (
sizeof(cache_entry) +
sizeof(cache_entry *)
169 + ell*
sizeof(cachetype)) / 4;
171 maxmw = Mbyte*1024*(1024/4) / maxmw;
179 for (i = 0; i < maxmw; i++)
181 mw[i].prev = (i == 0 ? &mw[maxmw-1] : &mw[i-1]);
182 mw[i].next = (i == maxmw-1 ? &mw[0] : &mw[i+1]);
185 mw[i].last_access_it = -1;
187 for (i = 0; i < ell; i++)
201 for (i = maxmw-1; i >= 0; i--)
213 cachetype *sCache::GetRow(int32_t row)
221 c->last_access_it = nit;
224 first_free = first_free->next;
229 c->prev->next = c->next;
230 c->next->prev = c->prev;
231 c->next = first_free;
232 c->prev = first_free->prev;
233 first_free->prev = c;
245 cachetype *sCache::FindFree(int32_t row, int32_t IsC)
249 if (first_free->row != -1)
251 if (first_free->last_access_it == nit || IsC)
254 pindmw[first_free->row] = 0;
256 first_free->row = row;
257 first_free->last_access_it = nit;
258 pindmw[row] = first_free;
260 pt = first_free->data;
261 first_free = first_free->next;
270 cachetype *sCache::FillRow(int32_t row, int32_t IsC)
279 pt = FindFree(row, IsC);
284 for (j = 0; j < ell; j++)
285 pt[j] = (cachetype)KER->Get(row, j);
293 int32_t sCache::DivideMP(int32_t *out, int32_t *
in, int32_t n)
305 int32_t *remained, nremained, k;
312 for (i = 0; i < n; i++)
314 if (pindmw[in[i]] != NULL)
317 remained[nremained++] = i;
319 for (i = 0; i < nremained; i++)
320 out[k++] = remained[i];
329 int32_t QPproblem::optimal()
335 register int32_t i, j, margin_sv_number, z, k, s, kin, z1, znew=0, nnew;
340 for (j = 0; j < ell; j++)
342 grad[j] = y[j] - st[j];
346 quick_s2(grad,ell,ing);
349 margin_sv_number = 0;
351 for (i = chunk_size - 1; i >= 0; i--)
352 if (is_free(index_in[i]))
355 bee = y_in(i) - st[index_in[i]];
359 if (margin_sv_number > 0)
362 for (i = nb-1; i >= 0; i--)
364 gx_i = bee + st[index_out[i]];
365 if ((is_zero(index_out[i]) && (gx_i*y_out(i) < (1.0-
delta))) ||
366 (is_bound(index_out[i]) && (gx_i*y_out(i) > (1.0+
delta))) ||
367 (is_free(index_out[i]) &&
368 ((gx_i*y_out(i) < 1.0-
delta) || (gx_i*y_out(i) > 1.0+
delta))))
370 if (fabs(gx_i*y_out(i) - 1.0) > aux)
371 aux = fabs(gx_i*y_out(i) - 1.0);
377 for (i = ell - 1; i >= 0; i--)
384 if (margin_sv_number == 0)
388 for (j = 0; j < ell; j++)
389 if ( (alpha[ing[j]] > DELTAsv) && (y[ing[j]] >= 0) )
394 for (j = 0; j < ell; j++)
395 if ( (alpha[ing[j]] < c_const-DELTAsv) && (y[ing[j]] <= 0) )
407 for (j = ell-1; j >=0; j--)
408 if ( (alpha[ing[j]] > DELTAsv) && (y[ing[j]] <= 0) )
413 for (j = ell-1; j >=0; j--)
414 if ( (alpha[ing[j]] < c_const-DELTAsv) && (y[ing[j]] >= 0) )
419 if (s2 > s1) s1 = s2;
421 bee = 0.5 * (s1+aux);
425 for (i = ell-1; i >= 0; i--)
428 if ((is_zero(i) && (gx_i*y[i] < (1.0-
delta))) ||
429 (is_bound(i) && (gx_i*y[i] > (1.0+
delta))) ||
431 ((gx_i*y[i] < 1.0-
delta) || (gx_i*y[i] > 1.0+
delta))))
433 if (fabs(gx_i*y[i] - 1.0) > aux)
434 aux = fabs(gx_i*y[i] - 1.0);
444 SG_INFO(
" Max KKT violation: %lf\n", aux);
445 else if (verbosity > 0)
448 if (fabs(kktold-aux) <
delta*0.01 && aux <
delta*2.0)
450 if (DELTAvpm > InitialDELTAvpm*0.1)
452 DELTAvpm = (DELTAvpm*0.5 > InitialDELTAvpm*0.1 ?
453 DELTAvpm*0.5 : InitialDELTAvpm*0.1);
454 SG_INFO(
"Inner tolerance changed to: %lf\n", DELTAvpm);
465 for (j = 0; j < chunk_size; j++)
466 inold[j] = index_in[j];
477 if ( is_free(ing[i]) ||
478 (-y[ing[i]]>=0 && is_zero(ing[i])) ||
479 (-y[ing[i]]<=0 && is_bound(ing[i]))
492 if ( is_free(ing[s]) ||
493 (y[ing[s]]>=0 && is_zero(ing[s])) ||
494 (y[ing[s]]<=0 && is_bound(ing[s]))
503 if (znew == s)
break;
505 index_in[k++] = ing[z];
506 index_in[k++] = ing[z1];
516 quick_si(index_in, q);
520 for (k = 0; k < chunk_size; k++)
523 for (i = j; i < q; i++)
524 if (z <= index_in[i])
529 for (i = k; i < chunk_size; i++)
537 if (z == index_in[i])
547 for (i = 0; i < s; i++)
549 bmemrid[i] = bmem[ing[i]];
553 quick_s3(bmemrid, s, pbmr);
558 while (j < chunk_size && i < s)
560 if (is_free(ing[pbmr[i]]))
562 index_in[j] = ing[pbmr[i]];
572 while (j < chunk_size && i < s)
574 if (is_zero(ing[pbmr[i]]))
576 index_in[j] = ing[pbmr[i]];
584 while (j < chunk_size && i < s)
586 if (is_bound(ing[pbmr[i]]))
588 index_in[j] = ing[pbmr[i]];
596 quick_si(index_in, chunk_size);
598 for (i = 0; i < chunk_size; i++)
603 for (i = 0; i < chunk_size; i++)
605 for (z = j; z < index_in[i]; z++)
612 for (z = j; z < ell; z++)
618 for (i = 0; i < nb; i++)
619 bmem[index_out[i]] = 0;
623 for (k = 0; k < chunk_size; k++)
626 for (i = j; i < chunk_size; i++)
631 for (s = k; s < chunk_size; s++)
636 kin = kin + chunk_size - k ;
657 if (nnew < chunk_size/10)
658 nnew = chunk_size/10;
668 if (DELTAkin < 1.0e-6)
670 SG_INFO(
"\n***ERROR***: GPDT stops with tolerance");
672 " %lf because it is unable to change the working set.\n", kktold);
677 SG_INFO(
"Inner tolerance temporary changed to:");
678 SG_INFO(
" %e\n", DELTAvpm*DELTAkin);
686 SG_INFO(
" Working set: new components: %i", kin);
687 SG_INFO(
", new parameter n: %i\n", q);
697 int32_t QPproblem::Preprocess0(int32_t *aux, int32_t *sv)
702 memset(sv, 0, ell*
sizeof(int32_t));
703 for (i = 0; i < chunk_size; i++)
708 }
while (sv[j] != 0);
717 int32_t QPproblem::Preprocess1(sKernel* kernel, int32_t *aux, int32_t *sv)
721 int32_t n, i, off, j, k, ll;
723 int32_t *sv_loc, *bsv_loc, *sp_y;
729 n = (s + preprocess_size - 1) / preprocess_size;
734 SG_INFO(
" Preprocessing: examples = %d", s);
748 for (i = 0; i < sl; i++)
753 for (i = 0; i < ell; i++)
756 for (i = 0; i < ell; i++)
760 ll = aux[j]; aux[j] = aux[k]; aux[k] = ll;
764 for (i = 0; i < n; i++)
768 SplitParts(s, i, n, &ll, &off);
772 for (j = 0; j < ll; j++)
774 sp_y[j] = y[aux[j+off]];
775 for (k = j; k < ll; k++)
776 sp_D[k*sl + j] = sp_D[j*sl + k]
777 = y[aux[j+off]] * y[aux[k+off]]
778 * (
float32_t)kernel->Get(aux[j+off], aux[k+off]);
781 memset(sp_alpha, 0, sl*
sizeof(
float64_t));
784 gpm_solver(projection_solver, projection_projector, ll, sp_D, sp_h,
785 c_const, 0.0, sp_y, sp_alpha,
delta*10, NULL);
790 p2.Subproblem(*
this, ll, aux + off);
793 p2.maxmw = ll*ll*4 / (1024 * 1024);
794 if (p2.maxmw > maxmw / 2)
795 p2.maxmw = maxmw / 2;
797 p2.delta =
delta * 10.0;
798 p2.PreprocessMode = 0;
799 kernel->KernelEvaluations += p2.gpdtsolve(sp_alpha);
802 for (j = 0; j < ll; j++)
805 if (sp_alpha[j] < (c_const-DELTAsv))
808 sp_alpha[j] = c_const;
809 if (sp_alpha[j] > DELTAsv)
811 if (sp_alpha[j] < (c_const-DELTAsv))
812 sv_loc[nsv++] = aux[j+off];
814 bsv_loc[nbsv++] = aux[j+off];
815 alpha[aux[j+off]] = sp_alpha[j];
823 memset(sv, 0, ell*
sizeof(int32_t));
824 ll = (nsv < chunk_size ? nsv : chunk_size);
825 for (i = 0; i < ll; i++)
829 }
while (sv[j] != 0);
834 ll = ((nsv+nbsv) < chunk_size ? (nsv+nbsv) : chunk_size);
839 }
while (sv[j] != 0);
845 for (; i < chunk_size; i++)
849 }
while (sv[j] != 0);
864 SG_INFO(
"\n Preprocessing: SV = %d", nsv);
876 int32_t i, j, k, z, iin, jin, nit, tot_vpm_iter, lsCount;
877 int32_t tot_vpm_secant, projCount, proximal_count;
878 int32_t vpmWarningThreshold;
881 int32_t *indnzin, *indnzout;
885 float64_t sp_e, aux, fval, tau_proximal_this, dfval;
888 float64_t tot_prep_time, tot_vpm_time, tot_st_time, total_time;
893 Cache =
new sCache(KER, maxmw, ell);
894 if (chunk_size > ell) chunk_size = ell;
896 if (chunk_size <= 20)
897 vpmWarningThreshold = 30*chunk_size;
898 else if (chunk_size <= 200)
899 vpmWarningThreshold = 20*chunk_size + 200;
901 vpmWarningThreshold = 10*chunk_size + 2200;
906 if ( (chunk_size <= 20) | ((
float64_t)chunk_size/ell <= 0.001) )
907 DELTAvpm =
delta * 0.1;
908 else if ( (chunk_size <= 200) | ((
float64_t)chunk_size/ell <= 0.005) )
909 DELTAvpm =
delta * 0.5;
915 if ( (chunk_size <= 200) | ((
float64_t)chunk_size/ell <= 0.005) )
916 DELTAvpm = (1e-3 <
delta*0.1) ? 1e-3 :
delta*0.1;
921 InitialDELTAvpm = DELTAvpm;
922 DELTAsv = EPS_SV * c_const;
926 nb = ell - chunk_size;
930 tot_prep_time = tot_vpm_time = tot_st_time = total_time = 0.0;
935 SG_DEBUG(
"ell:%d, chunk_size:%d, nb:%d dim:%d\n", ell, chunk_size,nb, dim);
938 index_in =
SG_MALLOC(int32_t, chunk_size);
944 memset(ing, 0, ell*
sizeof(int32_t));
946 if (verbosity > 0 && PreprocessMode != 0)
947 SG_INFO(
"\n*********** Begin setup step...\n");
950 switch(PreprocessMode)
952 case 1: Preprocess1(KER, inaux, ing);
break;
955 Preprocess0(inaux, ing);
break;
958 for (j = k = i = 0; i < ell; i++)
965 if (verbosity > 0 && PreprocessMode != 0)
969 "\n\n*********** Begin decomposition technique...\n");
974 bmemrid =
SG_MALLOC(int32_t, chunk_size);
977 indnzin =
SG_MALLOC(int32_t, chunk_size);
986 for (i = 0; i < ell; i++)
999 for (i = 0; i < chunk_size; i++)
1000 cec[index_in[i]] = cec[index_in[i]]+1;
1002 for (i = chunk_size-1; i >= 0; i--)
1006 bmem[index_in[i]] = 1;
1011 SG_INFO(
" IT | Prep Time | Solver IT | Solver Time |");
1012 SG_INFO(
" Grad Time | KKT violation\n");
1013 SG_INFO(
"------+-----------+-----------+-------------+");
1014 SG_INFO(
"-----------+--------------\n");
1023 if ((nit % 10) == 9)
1026 total_time += (
float64_t)(t-ti) / CLOCKS_PER_SEC;
1028 total_time += (
float64_t)(ti-t) / CLOCKS_PER_SEC;
1033 SG_INFO(
"\n*********** ITERATION: %d\n", nit + 1);
1034 else if (verbosity > 0)
1041 for (k = 0; k < nb; k++)
1042 if (alpha_out(k)>DELTAsv)
1044 indnzout[nzout] = index_out[k];
1049 for (j = 0; j < nzout; j++)
1051 vau[j] = y[indnzout[j]]*alpha[indnzout[j]];
1058 for (i = 0; i < chunk_size; i++)
1062 for (i = 0; i < chunk_size; i++)
1065 ptmw = Cache->GetRow(iin);
1068 for (j = 0; j <= i; j++)
1069 sp_D[i*chunk_size + j] = sp_y[i]*sp_y[j] * ptmw[index_in[j]];
1071 else if (incom[i] == -1)
1072 for (j = 0; j <= i; j++)
1073 sp_D[i*chunk_size + j] = sp_y[i]*sp_y[j]
1074 * (
float32_t)KER->Get(iin, index_in[j]);
1077 for (j = 0; j < i; j++)
1079 sp_D[i*chunk_size + j]
1080 = sp_y[i]*sp_y[j] * (
float32_t)KER->Get(iin, index_in[j]);
1082 sp_D[i*chunk_size + j]
1083 = sp_D[incom[j]*chunk_size + incom[i]];
1084 sp_D[i*chunk_size + i]
1085 = sp_y[i]*sp_y[i] * (
float32_t)KER->Get(iin, index_in[i]);
1088 for (i = 0; i < chunk_size; i++)
1090 for (j = 0; j < i; j++)
1091 sp_D[j*chunk_size + i] = sp_D[i*chunk_size + j];
1094 if (nit == 0 && PreprocessMode > 0)
1096 for (i = 0; i < chunk_size; i++)
1100 ptmw = Cache->GetRow(iin);
1102 for (j = 0; j < nzout; j++)
1103 aux += vau[j] * KER->Get(iin, indnzout[j]);
1105 for (j = 0; j < nzout; j++)
1106 aux += vau[j] * ptmw[indnzout[j]];
1107 sp_h[i] = y_in(i) * aux - 1.0;
1112 for (i = 0; i < chunk_size; i++)
1113 vau[i] = alpha_in(i);
1114 for (i = 0; i < chunk_size; i++)
1117 for (j = 0; j < chunk_size; j++)
1118 aux += sp_D[i*chunk_size + j] * vau[j];
1119 sp_h[i] = st[index_in[i]] * y_in(i) - 1.0 - aux;
1126 " Preparation Time: %.2lf\n", (
float64_t)t/CLOCKS_PER_SEC);
1127 else if (verbosity > 0)
1129 tot_prep_time += (
float64_t)t/CLOCKS_PER_SEC;
1133 if (tau_proximal < 0.0)
1134 tau_proximal_this = 0.0;
1136 tau_proximal_this = tau_proximal;
1140 for (i = 0; i < chunk_size; i++)
1142 vau[i] = sp_D[i*chunk_size + i];
1143 sp_h[i] -= tau_proximal_this * alpha_in(i);
1144 sp_D[i*chunk_size + i] += (
float32_t)tau_proximal_this;
1147 if (kktold <
delta*100)
1148 for (i = 0; i < chunk_size; i++)
1149 sp_alpha[i] = alpha_in(i);
1151 for (i = 0; i < chunk_size; i++)
1155 i =
gpm_solver(projection_solver, projection_projector, chunk_size,
1156 sp_D, sp_h, c_const, sp_e, sp_y, sp_alpha,
1157 DELTAvpm*DELTAkin, &lsCount, &projCount);
1159 if (i > vpmWarningThreshold)
1163 SG_INFO(
"\n WARNING: inner subproblem hard to solve;");
1164 SG_INFO(
" setting a smaller -q or");
1165 SG_INFO(
" tuning -c and -g options might help.\n");
1169 SG_INFO(
"\n WARNING: inner subproblem hard to solve;");
1170 SG_INFO(
" set a smaller -q or");
1171 SG_INFO(
" try a better data scaling.\n");
1177 tot_vpm_secant += projCount;
1178 tot_vpm_time += (
float64_t)t/CLOCKS_PER_SEC;
1185 else if (verbosity > 0)
1193 for (i = 0; i < chunk_size; i++)
1194 sp_D[i*chunk_size + i] = (
float32_t)vau[i];
1195 tau_proximal_this = 0.0;
1196 if (tau_proximal < 0.0)
1199 for (i = 0; i < chunk_size; i++)
1202 for (j = 0; j < chunk_size; j++)
1203 aux += sp_D[i*chunk_size + j]*(alpha_in(j) - sp_alpha[j]);
1204 dfval += (0.5*aux - st[index_in[i]]*y_in(i) + 1.0) * (alpha_in(i) - sp_alpha[i]);
1208 for (i = 0; i < chunk_size; i++)
1209 aux += (alpha_in(i) - sp_alpha[i])*(alpha_in(i) - sp_alpha[i]);
1211 if ((-dfval/aux) < -0.5*tau_proximal)
1213 tau_proximal_this = -tau_proximal;
1215 SG_DEBUG(
"tau threshold: %lf ", -dfval/aux);
1219 }
while (tau_proximal_this != 0.0 && proximal_count < 2);
1224 for (j = 0; j < chunk_size; j++)
1229 aux = sp_alpha[j] - alpha_in(j);
1230 if (fabs(aux) > DELTAsv)
1232 indnzin[nzin] = index_in[j];
1233 grad[nzin] = aux * y_in(j);
1239 if (KER->get_kernel()->has_property(
KP_LINADD) && get_linadd_enabled())
1241 KER->get_kernel()->clear_normal() ;
1243 for (j = 0; j < nzin; j++)
1244 KER->get_kernel()->add_to_normal(indnzin[j], grad[j]);
1246 if (nit == 0 && PreprocessMode > 0)
1248 for (j = 0; j < nzout; j++)
1251 KER->get_kernel()->add_to_normal(jin, alpha[jin] * y[jin]);
1255 for (i = 0; i < ell; i++)
1256 st[i] += KER->get_kernel()->compute_optimized(i);
1260 k = Cache->DivideMP(ing, indnzin, nzin);
1261 for (j = 0; j < k; j++)
1263 ptmw = Cache->FillRow(indnzin[ing[j]]);
1264 for (i = 0; i < ell; i++)
1265 st[i] += grad[ing[j]] * ptmw[i];
1268 if (PreprocessMode > 0 && nit == 0)
1273 for (j = 0; j < nzout; j++)
1276 ptmw = Cache->FillRow(jin);
1277 for (i = 0; i < ell; i++)
1278 st[i] += alpha[jin] * y[jin] * ptmw[i];
1282 " G*x0 time: %.2lf\n", (
float64_t)(clock()-ti2)/CLOCKS_PER_SEC);
1291 " Gradient updating time: %.2lf\n", (
float64_t)t/CLOCKS_PER_SEC);
1292 else if (verbosity > 0)
1294 tot_st_time += (
float64_t)t/CLOCKS_PER_SEC;
1297 for (i = 0; i < chunk_size; i++)
1298 alpha_in(i) = sp_alpha[i];
1303 for (i = 0; i < ell; i++)
1305 if (is_free(i)) j++;
1306 if (is_bound(i)) k++;
1319 total_time += (
float64_t)(t-ti) / CLOCKS_PER_SEC;
1321 total_time += (
float64_t)(ti-t) / CLOCKS_PER_SEC;
1324 memcpy(solution, alpha, ell *
sizeof(
float64_t));
1328 for (i = 0; i < ell; i++)
1329 fval += alpha[i]*(y[i]*st[i]*0.5 - 1.0);
1331 SG_INFO(
"\n------+-----------+-----------+-------------+");
1332 SG_INFO(
"-----------+--------------\n");
1334 "\n- TOTAL ITERATIONS: %i\n", nit);
1341 for (i = ell - 1; i >= 0; i--)
1343 if (cec[i] > 0) j++;
1344 if (cec[i] > 1) k++;
1345 if (cec[i] > 2) z++;
1348 "- Variables entering the working set at least one time: %i\n", j);
1350 "- Variables entering the working set at least two times: %i\n", k);
1352 "- Variables entering the working set at least three times: %i\n", z);
1381 aux = KER->KernelEvaluations;
1382 SG_INFO(
"- Total CPU time: %lf\n", total_time);
1386 "- Total kernel evaluations: %.0lf\n", aux);
1388 "- Total inner solver iterations: %i\n", tot_vpm_iter);
1389 if (projection_projector == 1)
1391 "- Total projector iterations: %i\n", tot_vpm_secant);
1393 "- Total preparation time: %lf\n", tot_prep_time);
1395 "- Total inner solver time: %lf\n", tot_vpm_time);
1397 "- Total gradient updating time: %lf\n", tot_st_time);
1399 SG_INFO(
"- Objective function value: %lf\n", fval);
1400 objective_value=fval;
1407 void quick_si(int32_t a[], int32_t n)
1409 int32_t i, j, s, d, l, x, w, ps[20], pd[20];
1426 while (a[i] < x) i++;
1427 while (a[j] > x) j--;
1461 void quick_s2(
float64_t a[], int32_t n, int32_t ia[])
1463 int32_t i, j, s, d, l, iw, ps[20], pd[20];
1481 while (a[i] < x) i++;
1482 while (a[j] > x) j--;
1519 void quick_s3(int32_t a[], int32_t n, int32_t ia[])
1521 int32_t i, j, s, d, l, iw, w, x, ps[20], pd[20];
1538 while (a[i] < x) i++;
1539 while (a[j] > x) j--;
1574 #endif // DOXYGEN_SHOULD_SKIP_THIS