00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071 #include <math.h>
00072 #include <stdio.h>
00073 #include <stdlib.h>
00074 #include <string.h>
00075 #include <time.h>
00076
00077 #include <shogun/classifier/svm/gpm.h>
00078 #include <shogun/classifier/svm/gpdt.h>
00079 #include <shogun/classifier/svm/gpdtsolve.h>
00080 #include <shogun/lib/Signal.h>
00081 #include <shogun/io/SGIO.h>
00082
00083 using namespace shogun;
00084
00085 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00086 namespace shogun
00087 {
00088 #define y_in(i) y[index_in[(i)]]
00089 #define y_out(i) y[index_out[(i)]]
00090 #define alpha_in(i) alpha[index_in[(i)]]
00091 #define alpha_out(i) alpha[index_out[(i)]]
00092 #define minfty (-1.0e+30) // minus infinity
00093
00094 uint32_t Randnext = 1;
00095
00096 #define ThRand (Randnext = Randnext * 1103515245L + 12345L)
00097 #define ThRandPos ((Randnext = Randnext * 1103515245L + 12345L) & 0x7fffffff)
00098
00099 FILE *fp;
00100
00101
00102 void quick_si (int32_t a[], int32_t k);
00103 void quick_s3 (int32_t a[], int32_t k, int32_t ia[]);
00104 void quick_s2 (float64_t a[], int32_t k, int32_t ia[]);
00105
00106
00107
00108
00109 class sCache
00110 {
00111
00112 public:
00113 sCache (sKernel* sk, int32_t Mbyte, int32_t ell);
00114 ~sCache ();
00115
00116 cachetype *FillRow (int32_t row, int32_t IsC = 0);
00117 cachetype *GetRow (int32_t row);
00118
00119 int32_t DivideMP (int32_t *out, int32_t *in, int32_t n);
00120
00121
00122 void Iteration(void) { nit++; }
00123
00124
00125 int32_t CheckCycle(void)
00126 {
00127 int32_t us;
00128 cache_entry *pt = first_free->next;
00129
00130 for (us = 0; pt != first_free; us++, pt = pt->next);
00131 if (us != maxmw-1)
00132 return 1;
00133 else
00134 return 0;
00135 }
00136
00137 private:
00138
00139 struct cache_entry
00140 {
00141 int32_t row;
00142 int32_t last_access_it;
00143 cache_entry *prev, *next;
00144 cachetype *data;
00145 };
00146
00147 sKernel* KER;
00148 int32_t maxmw, ell;
00149 int32_t nit;
00150
00151 cache_entry *mw;
00152 cache_entry *first_free;
00153 cache_entry **pindmw;
00154 cachetype *onerow;
00155
00156 cachetype *FindFree(int32_t row, int32_t IsC);
00157 };
00158
00159
00160
00161
00162
00163 sCache::sCache(sKernel* sk, int32_t Mbyte, int32_t _ell) : KER(sk), ell(_ell)
00164 {
00165 int32_t i;
00166
00167
00168 maxmw = (sizeof(cache_entry) + sizeof(cache_entry *)
00169 + ell*sizeof(cachetype)) / 4;
00170
00171 maxmw = Mbyte*1024*(1024/4) / maxmw;
00172
00173
00174 mw = SG_MALLOC(cache_entry, maxmw);
00175 pindmw = SG_MALLOC(cache_entry*, ell);
00176 onerow = SG_MALLOC(cachetype, ell);
00177
00178
00179 for (i = 0; i < maxmw; i++)
00180 {
00181 mw[i].prev = (i == 0 ? &mw[maxmw-1] : &mw[i-1]);
00182 mw[i].next = (i == maxmw-1 ? &mw[0] : &mw[i+1]);
00183 mw[i].data = SG_MALLOC(cachetype, ell);
00184 mw[i].row = -1;
00185 mw[i].last_access_it = -1;
00186 }
00187 for (i = 0; i < ell; i++)
00188 pindmw[i] = 0;
00189
00190 first_free = &mw[0];
00191 nit = 0;
00192 }
00193
00194
00195
00196
00197 sCache::~sCache()
00198 {
00199 int32_t i;
00200
00201 for (i = maxmw-1; i >= 0; i--)
00202 SG_FREE(mw[i].data);
00203
00204 SG_FREE(onerow);
00205 SG_FREE(pindmw);
00206 SG_FREE(mw);
00207 }
00208
00209
00210
00211
00212
00213 cachetype *sCache::GetRow(int32_t row)
00214 {
00215 cache_entry *c;
00216
00217 c = pindmw[row];
00218 if (c == NULL)
00219 return NULL;
00220
00221 c->last_access_it = nit;
00222 if (c == first_free)
00223 {
00224 first_free = first_free->next;
00225 }
00226 else
00227 {
00228
00229 c->prev->next = c->next;
00230 c->next->prev = c->prev;
00231 c->next = first_free;
00232 c->prev = first_free->prev;
00233 first_free->prev = c;
00234 c->prev->next = c;
00235 }
00236
00237 return c->data;
00238 }
00239
00240
00241
00242
00243
00244
00245 cachetype *sCache::FindFree(int32_t row, int32_t IsC)
00246 {
00247 cachetype *pt;
00248
00249 if (first_free->row != -1)
00250 {
00251 if (first_free->last_access_it == nit || IsC)
00252 return 0;
00253 else
00254 pindmw[first_free->row] = 0;
00255 }
00256 first_free->row = row;
00257 first_free->last_access_it = nit;
00258 pindmw[row] = first_free;
00259
00260 pt = first_free->data;
00261 first_free = first_free->next;
00262
00263 return pt;
00264 }
00265
00266
00267
00268
00269
00270 cachetype *sCache::FillRow(int32_t row, int32_t IsC)
00271 {
00272 int32_t j;
00273 cachetype *pt;
00274
00275 pt = GetRow(row);
00276 if (pt != NULL)
00277 return pt;
00278
00279 pt = FindFree(row, IsC);
00280 if (pt == 0)
00281 pt = onerow;
00282
00283
00284 for (j = 0; j < ell; j++)
00285 pt[j] = (cachetype)KER->Get(row, j);
00286 return pt;
00287 }
00288
00289
00290
00291
00292
00293 int32_t sCache::DivideMP(int32_t *out, int32_t *in, int32_t n)
00294 {
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305 int32_t *remained, nremained, k;
00306 int32_t i;
00307
00308 remained = SG_MALLOC(int32_t, n);
00309
00310 nremained = 0;
00311 k = 0;
00312 for (i = 0; i < n; i++)
00313 {
00314 if (pindmw[in[i]] != NULL)
00315 out[k++] = i;
00316 else
00317 remained[nremained++] = i;
00318 }
00319 for (i = 0; i < nremained; i++)
00320 out[k++] = remained[i];
00321
00322 SG_FREE(remained);
00323 return n;
00324 }
00325
00326
00327
00328
00329 int32_t QPproblem::optimal()
00330 {
00331
00332
00333
00334
00335 register int32_t i, j, margin_sv_number, z, k, s, kin, z1, znew=0, nnew;
00336
00337 float64_t gx_i, aux, s1, s2;
00338
00339
00340 for (j = 0; j < ell; j++)
00341 {
00342 grad[j] = y[j] - st[j];
00343 ing[j] = j;
00344 }
00345
00346 quick_s2(grad,ell,ing);
00347
00348
00349 margin_sv_number = 0;
00350
00351 for (i = chunk_size - 1; i >= 0; i--)
00352 if (is_free(index_in[i]))
00353 {
00354 margin_sv_number++;
00355 bee = y_in(i) - st[index_in[i]];
00356 break;
00357 }
00358
00359 if (margin_sv_number > 0)
00360 {
00361 aux=-1.0;
00362 for (i = nb-1; i >= 0; i--)
00363 {
00364 gx_i = bee + st[index_out[i]];
00365 if ((is_zero(index_out[i]) && (gx_i*y_out(i) < (1.0-delta))) ||
00366 (is_bound(index_out[i]) && (gx_i*y_out(i) > (1.0+delta))) ||
00367 (is_free(index_out[i]) &&
00368 ((gx_i*y_out(i) < 1.0-delta) || (gx_i*y_out(i) > 1.0+delta))))
00369 {
00370 if (fabs(gx_i*y_out(i) - 1.0) > aux)
00371 aux = fabs(gx_i*y_out(i) - 1.0);
00372 }
00373 }
00374 }
00375 else
00376 {
00377 for (i = ell - 1; i >= 0; i--)
00378 if (is_free(i))
00379 {
00380 margin_sv_number++;
00381 bee = y[i] - st[i];
00382 break;
00383 }
00384 if (margin_sv_number == 0)
00385 {
00386 s1 = -minfty;
00387 s2 = -minfty;
00388 for (j = 0; j < ell; j++)
00389 if ( (alpha[ing[j]] > DELTAsv) && (y[ing[j]] >= 0) )
00390 {
00391 s1 = grad[j];
00392 break;
00393 }
00394 for (j = 0; j < ell; j++)
00395 if ( (alpha[ing[j]] < c_const-DELTAsv) && (y[ing[j]] <= 0) )
00396 {
00397 s2 = grad[j];
00398 break;
00399 }
00400 if (s1 < s2)
00401 aux = s1;
00402 else
00403 aux = s2;
00404
00405 s1 = minfty;
00406 s2 = minfty;
00407 for (j = ell-1; j >=0; j--)
00408 if ( (alpha[ing[j]] > DELTAsv) && (y[ing[j]] <= 0) )
00409 {
00410 s1 = grad[j];
00411 break;
00412 }
00413 for (j = ell-1; j >=0; j--)
00414 if ( (alpha[ing[j]] < c_const-DELTAsv) && (y[ing[j]] >= 0) )
00415 {
00416 s2 = grad[j];
00417 break;
00418 }
00419 if (s2 > s1) s1 = s2;
00420
00421 bee = 0.5 * (s1+aux);
00422 }
00423
00424 aux = -1.0;
00425 for (i = ell-1; i >= 0; i--)
00426 {
00427 gx_i = bee + st[i];
00428 if ((is_zero(i) && (gx_i*y[i] < (1.0-delta))) ||
00429 (is_bound(i) && (gx_i*y[i] > (1.0+delta))) ||
00430 (is_free(i) &&
00431 ((gx_i*y[i] < 1.0-delta) || (gx_i*y[i] > 1.0+delta))))
00432 {
00433 if (fabs(gx_i*y[i] - 1.0) > aux)
00434 aux = fabs(gx_i*y[i] - 1.0);
00435 }
00436 }
00437 }
00438
00439 if (aux < 0.0)
00440 return 1;
00441 else
00442 {
00443 if (verbosity > 1)
00444 SG_INFO(" Max KKT violation: %lf\n", aux);
00445 else if (verbosity > 0)
00446 SG_INFO(" %lf\n", aux);
00447
00448 if (fabs(kktold-aux) < delta*0.01 && aux < delta*2.0)
00449 {
00450 if (DELTAvpm > InitialDELTAvpm*0.1)
00451 {
00452 DELTAvpm = (DELTAvpm*0.5 > InitialDELTAvpm*0.1 ?
00453 DELTAvpm*0.5 : InitialDELTAvpm*0.1);
00454 SG_INFO("Inner tolerance changed to: %lf\n", DELTAvpm);
00455 }
00456 }
00457
00458 kktold = aux;
00459
00460
00461
00462
00463
00464
00465 for (j = 0; j < chunk_size; j++)
00466 inold[j] = index_in[j];
00467
00468 z = -1;
00469 z1 = ell;
00470 k = 0;
00471 j = 0;
00472 while (k < q)
00473 {
00474 i = z + 1;
00475 while (i < z1)
00476 {
00477 if ( is_free(ing[i]) ||
00478 (-y[ing[i]]>=0 && is_zero(ing[i])) ||
00479 (-y[ing[i]]<=0 && is_bound(ing[i]))
00480 )
00481 {
00482 znew = i;
00483 break;
00484 }
00485 i++;
00486 }
00487 if (i == z1) break;
00488
00489 s = z1 - 1;
00490 while (znew < s)
00491 {
00492 if ( is_free(ing[s]) ||
00493 (y[ing[s]]>=0 && is_zero(ing[s])) ||
00494 (y[ing[s]]<=0 && is_bound(ing[s]))
00495 )
00496 {
00497 z1 = s;
00498 z = znew;
00499 break;
00500 }
00501 s--;
00502 }
00503 if (znew == s) break;
00504
00505 index_in[k++] = ing[z];
00506 index_in[k++] = ing[z1];
00507 }
00508
00509 if (k < q)
00510 {
00511 if (verbosity > 1)
00512 SG_INFO(" New q: %i\n", k);
00513 q = k;
00514 }
00515
00516 quick_si(index_in, q);
00517
00518 s = 0;
00519 j = 0;
00520 for (k = 0; k < chunk_size; k++)
00521 {
00522 z = inold[k];
00523 for (i = j; i < q; i++)
00524 if (z <= index_in[i])
00525 break;
00526
00527 if (i == q)
00528 {
00529 for (i = k; i < chunk_size; i++)
00530 {
00531 ing[s] = inold[i];
00532 s = s+1;
00533 }
00534 break;
00535 }
00536
00537 if (z == index_in[i])
00538 j = i + 1;
00539 else
00540 {
00541 ing[s] = z;
00542 s = s + 1;
00543 j = i;
00544 }
00545 }
00546
00547 for (i = 0; i < s; i++)
00548 {
00549 bmemrid[i] = bmem[ing[i]];
00550 pbmr[i] = i;
00551 }
00552
00553 quick_s3(bmemrid, s, pbmr);
00554
00555
00556 j = q;
00557 i = 0;
00558 while (j < chunk_size && i < s)
00559 {
00560 if (is_free(ing[pbmr[i]]))
00561 {
00562 index_in[j] = ing[pbmr[i]];
00563 j++;
00564 }
00565 i++;
00566 }
00567
00568
00569 if (j < chunk_size)
00570 {
00571 i = 0;
00572 while (j < chunk_size && i < s)
00573 {
00574 if (is_zero(ing[pbmr[i]]))
00575 {
00576 index_in[j] = ing[pbmr[i]];
00577 j++;
00578 }
00579 i++;
00580 }
00581 if (j < chunk_size)
00582 {
00583 i = 0;
00584 while (j < chunk_size && i < s)
00585 {
00586 if (is_bound(ing[pbmr[i]]))
00587 {
00588 index_in[j] = ing[pbmr[i]];
00589 j++;
00590 }
00591 i++;
00592 }
00593 }
00594 }
00595
00596 quick_si(index_in, chunk_size);
00597
00598 for (i = 0; i < chunk_size; i++)
00599 bmem[index_in[i]]++;
00600
00601 j = 0;
00602 k = 0;
00603 for (i = 0; i < chunk_size; i++)
00604 {
00605 for (z = j; z < index_in[i]; z++)
00606 {
00607 index_out[k] = z;
00608 k++;
00609 }
00610 j = index_in[i]+1;
00611 }
00612 for (z = j; z < ell; z++)
00613 {
00614 index_out[k] = z;
00615 k++;
00616 }
00617
00618 for (i = 0; i < nb; i++)
00619 bmem[index_out[i]] = 0;
00620
00621 kin = 0;
00622 j = 0;
00623 for (k = 0; k < chunk_size; k++)
00624 {
00625 z = index_in[k];
00626 for (i = j; i < chunk_size; i++)
00627 if (z <= inold[i])
00628 break;
00629 if (i == chunk_size)
00630 {
00631 for (s = k; s < chunk_size; s++)
00632 {
00633 incom[s] = -1;
00634 cec[index_in[s]]++;
00635 }
00636 kin = kin + chunk_size - k ;
00637 break;
00638 }
00639
00640 if (z == inold[i])
00641 {
00642 incom[k] = i;
00643 j = i+1;
00644 }
00645 else
00646 {
00647 incom[k] = -1;
00648 j = i;
00649 kin = kin + 1;
00650 cec[index_in[k]]++;
00651 }
00652 }
00653
00654 nnew = kin & (~1);
00655 if (nnew < 10)
00656 nnew = 10;
00657 if (nnew < chunk_size/10)
00658 nnew = chunk_size/10;
00659 if (nnew < q)
00660 {
00661 q = nnew;
00662 q = q & (~1);
00663 }
00664
00665 if (kin == 0)
00666 {
00667 DELTAkin *= 0.1;
00668 if (DELTAkin < 1.0e-6)
00669 {
00670 SG_INFO("\n***ERROR***: GPDT stops with tolerance");
00671 SG_INFO(
00672 " %lf because it is unable to change the working set.\n", kktold);
00673 return 1;
00674 }
00675 else
00676 {
00677 SG_INFO("Inner tolerance temporary changed to:");
00678 SG_INFO(" %e\n", DELTAvpm*DELTAkin);
00679 }
00680 }
00681 else
00682 DELTAkin = 1.0;
00683
00684 if (verbosity > 1)
00685 {
00686 SG_INFO(" Working set: new components: %i", kin);
00687 SG_INFO(", new parameter n: %i\n", q);
00688 }
00689
00690 return 0;
00691 }
00692 }
00693
00694
00695
00696
00697 int32_t QPproblem::Preprocess0(int32_t *aux, int32_t *sv)
00698 {
00699 int32_t i, j;
00700
00701 Randnext = 1;
00702 memset(sv, 0, ell*sizeof(int32_t));
00703 for (i = 0; i < chunk_size; i++)
00704 {
00705 do
00706 {
00707 j = ThRandPos % ell;
00708 } while (sv[j] != 0);
00709 sv[j] = 1;
00710 }
00711 return(chunk_size);
00712 }
00713
00714
00715
00716
00717 int32_t QPproblem::Preprocess1(sKernel* kernel, int32_t *aux, int32_t *sv)
00718 {
00719 int32_t s;
00720 int32_t sl;
00721 int32_t n, i, off, j, k, ll;
00722 int32_t nsv, nbsv;
00723 int32_t *sv_loc, *bsv_loc, *sp_y;
00724 float32_t *sp_D=NULL;
00725 float64_t *sp_alpha, *sp_h;
00726
00727 s = ell;
00728
00729 n = (s + preprocess_size - 1) / preprocess_size;
00730 sl = 1 + s / n;
00731
00732 if (verbosity > 0)
00733 {
00734 SG_INFO(" Preprocessing: examples = %d", s);
00735 SG_INFO(", subp. = %d", n);
00736 SG_INFO(", size = %d\n",sl);
00737 }
00738
00739 sv_loc = SG_MALLOC(int32_t, s);
00740 bsv_loc = SG_MALLOC(int32_t, s);
00741 sp_alpha = SG_MALLOC(float64_t, sl);
00742 sp_h = SG_MALLOC(float64_t, sl);
00743 sp_y = SG_MALLOC(int32_t, sl);
00744
00745 if (sl < 500)
00746 sp_D = SG_MALLOC(float32_t, sl*sl);
00747
00748 for (i = 0; i < sl; i++)
00749 sp_h[i] = -1.0;
00750 memset(alpha, 0, ell*sizeof(float64_t));
00751
00752
00753 for (i = 0; i < ell; i++)
00754 aux[i] = i;
00755 Randnext = 1;
00756 for (i = 0; i < ell; i++)
00757 {
00758 j = ThRandPos % ell;
00759 k = ThRandPos % ell;
00760 ll = aux[j]; aux[j] = aux[k]; aux[k] = ll;
00761 }
00762
00763 nbsv = nsv = 0;
00764 for (i = 0; i < n; i++)
00765 {
00766 if (verbosity > 0)
00767 SG_INFO("%d...", i);
00768 SplitParts(s, i, n, &ll, &off);
00769
00770 if (sl < 500)
00771 {
00772 for (j = 0; j < ll; j++)
00773 {
00774 sp_y[j] = y[aux[j+off]];
00775 for (k = j; k < ll; k++)
00776 sp_D[k*sl + j] = sp_D[j*sl + k]
00777 = y[aux[j+off]] * y[aux[k+off]]
00778 * (float32_t)kernel->Get(aux[j+off], aux[k+off]);
00779 }
00780
00781 memset(sp_alpha, 0, sl*sizeof(float64_t));
00782
00783
00784 gpm_solver(projection_solver, projection_projector, ll, sp_D, sp_h,
00785 c_const, 0.0, sp_y, sp_alpha, delta*10, NULL);
00786 }
00787 else
00788 {
00789 QPproblem p2;
00790 p2.Subproblem(*this, ll, aux + off);
00791 p2.chunk_size = (int32_t) ((float64_t)chunk_size / sqrt((float64_t)n));
00792 p2.q = (int32_t) ((float64_t)q / sqrt((float64_t)n));
00793 p2.maxmw = ll*ll*4 / (1024 * 1024);
00794 if (p2.maxmw > maxmw / 2)
00795 p2.maxmw = maxmw / 2;
00796 p2.verbosity = 0;
00797 p2.delta = delta * 10.0;
00798 p2.PreprocessMode = 0;
00799 kernel->KernelEvaluations += p2.gpdtsolve(sp_alpha);
00800 }
00801
00802 for (j = 0; j < ll; j++)
00803 {
00804
00805 if (sp_alpha[j] < (c_const-DELTAsv))
00806 sp_alpha[j] = 0.0;
00807 else
00808 sp_alpha[j] = c_const;
00809 if (sp_alpha[j] > DELTAsv)
00810 {
00811 if (sp_alpha[j] < (c_const-DELTAsv))
00812 sv_loc[nsv++] = aux[j+off];
00813 else
00814 bsv_loc[nbsv++] = aux[j+off];
00815 alpha[aux[j+off]] = sp_alpha[j];
00816 }
00817 }
00818 }
00819
00820 Randnext = 1;
00821
00822
00823 memset(sv, 0, ell*sizeof(int32_t));
00824 ll = (nsv < chunk_size ? nsv : chunk_size);
00825 for (i = 0; i < ll; i++)
00826 {
00827 do {
00828 j = sv_loc[ThRandPos % nsv];
00829 } while (sv[j] != 0);
00830 sv[j] = 1;
00831 }
00832
00833
00834 ll = ((nsv+nbsv) < chunk_size ? (nsv+nbsv) : chunk_size);
00835 for (; i < ll; i++)
00836 {
00837 do {
00838 j = bsv_loc[ThRandPos % nbsv];
00839 } while (sv[j] != 0);
00840 sv[j] = 1;
00841 }
00842
00843
00844
00845 for (; i < chunk_size; i++)
00846 {
00847 do {
00848 j = ThRandPos % ell;
00849 } while (sv[j] != 0);
00850 sv[j] = 1;
00851 }
00852
00853
00854
00855 if (sl < 500) SG_FREE(sp_D);
00856 SG_FREE(sp_y );
00857 SG_FREE(sp_h );
00858 SG_FREE(sv_loc );
00859 SG_FREE(bsv_loc );
00860 SG_FREE(sp_alpha);
00861
00862 if (verbosity > 0)
00863 {
00864 SG_INFO("\n Preprocessing: SV = %d", nsv);
00865 SG_INFO(", BSV = %d\n", nbsv);
00866 }
00867
00868 return(nsv);
00869 }
00870
00871
00872
00873
00874 float64_t QPproblem::gpdtsolve(float64_t *solution)
00875 {
00876 int32_t i, j, k, z, iin, jin, nit, tot_vpm_iter, lsCount;
00877 int32_t tot_vpm_secant, projCount, proximal_count;
00878 int32_t vpmWarningThreshold;
00879 int32_t nzin, nzout;
00880 int32_t *sp_y;
00881 int32_t *indnzin, *indnzout;
00882 float32_t *sp_D;
00883 float64_t *sp_h, *sp_hloc,
00884 *sp_alpha,*stloc;
00885 float64_t sp_e, aux, fval, tau_proximal_this, dfval;
00886 float64_t *vau;
00887 float64_t *weight;
00888 float64_t tot_prep_time, tot_vpm_time, tot_st_time, total_time;
00889 sCache *Cache;
00890 cachetype *ptmw;
00891 clock_t t, ti;
00892
00893 Cache = new sCache(KER, maxmw, ell);
00894 if (chunk_size > ell) chunk_size = ell;
00895
00896 if (chunk_size <= 20)
00897 vpmWarningThreshold = 30*chunk_size;
00898 else if (chunk_size <= 200)
00899 vpmWarningThreshold = 20*chunk_size + 200;
00900 else
00901 vpmWarningThreshold = 10*chunk_size + 2200;
00902
00903 kktold = 10000.0;
00904 if (delta <= 5e-3)
00905 {
00906 if ( (chunk_size <= 20) | ((float64_t)chunk_size/ell <= 0.001) )
00907 DELTAvpm = delta * 0.1;
00908 else if ( (chunk_size <= 200) | ((float64_t)chunk_size/ell <= 0.005) )
00909 DELTAvpm = delta * 0.5;
00910 else
00911 DELTAvpm = delta;
00912 }
00913 else
00914 {
00915 if ( (chunk_size <= 200) | ((float64_t)chunk_size/ell <= 0.005) )
00916 DELTAvpm = (1e-3 < delta*0.1) ? 1e-3 : delta*0.1;
00917 else
00918 DELTAvpm = 5e-3;
00919 }
00920
00921 InitialDELTAvpm = DELTAvpm;
00922 DELTAsv = EPS_SV * c_const;
00923 DELTAkin = 1.0;
00924
00925 q = q & (~1);
00926 nb = ell - chunk_size;
00927 tot_vpm_iter = 0;
00928 tot_vpm_secant = 0;
00929
00930 tot_prep_time = tot_vpm_time = tot_st_time = total_time = 0.0;
00931
00932 ti = clock();
00933
00934
00935 SG_DEBUG("ell:%d, chunk_size:%d, nb:%d dim:%d\n", ell, chunk_size,nb, dim);
00936 ing = SG_MALLOC(int32_t, ell);
00937 inaux = SG_MALLOC(int32_t, ell);
00938 index_in = SG_MALLOC(int32_t, chunk_size);
00939 index_out = SG_MALLOC(int32_t, ell);
00940 indnzout = SG_MALLOC(int32_t, nb);
00941 alpha = SG_MALLOC(float64_t, ell);
00942
00943 memset(alpha, 0, ell*sizeof(float64_t));
00944 memset(ing, 0, ell*sizeof(int32_t));
00945
00946 if (verbosity > 0 && PreprocessMode != 0)
00947 SG_INFO("\n*********** Begin setup step...\n");
00948 t = clock();
00949
00950 switch(PreprocessMode)
00951 {
00952 case 1: Preprocess1(KER, inaux, ing); break;
00953 case 0:
00954 default:
00955 Preprocess0(inaux, ing); break;
00956 }
00957
00958 for (j = k = i = 0; i < ell; i++)
00959 if (ing[i] == 0)
00960 index_out[j++] = i;
00961 else
00962 index_in[k++] = i;
00963
00964 t = clock() - t;
00965 if (verbosity > 0 && PreprocessMode != 0)
00966 {
00967 SG_INFO( " Time for setup: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC);
00968 SG_INFO(
00969 "\n\n*********** Begin decomposition technique...\n");
00970 }
00971
00972
00973 bmem = SG_MALLOC(int32_t, ell);
00974 bmemrid = SG_MALLOC(int32_t, chunk_size);
00975 pbmr = SG_MALLOC(int32_t, chunk_size);
00976 cec = SG_MALLOC(int32_t, ell);
00977 indnzin = SG_MALLOC(int32_t, chunk_size);
00978 inold = SG_MALLOC(int32_t, chunk_size);
00979 incom = SG_MALLOC(int32_t, chunk_size);
00980 vau = SG_MALLOC(float64_t, ell);
00981 grad = SG_MALLOC(float64_t, ell);
00982 weight = SG_MALLOC(float64_t, dim);
00983 st = SG_MALLOC(float64_t, ell);
00984 stloc = SG_MALLOC(float64_t, ell);
00985
00986 for (i = 0; i < ell; i++)
00987 {
00988 bmem[i] = 0;
00989 cec[i] = 0;
00990 st[i] = 0;
00991 }
00992
00993 sp_y = SG_MALLOC(int32_t, chunk_size);
00994 sp_D = SG_MALLOC(float32_t, chunk_size*chunk_size);
00995 sp_alpha = SG_MALLOC(float64_t, chunk_size);
00996 sp_h = SG_MALLOC(float64_t, chunk_size);
00997 sp_hloc = SG_MALLOC(float64_t, chunk_size);
00998
00999 for (i = 0; i < chunk_size; i++)
01000 cec[index_in[i]] = cec[index_in[i]]+1;
01001
01002 for (i = chunk_size-1; i >= 0; i--)
01003 {
01004 incom[i] = -1;
01005 sp_alpha[i] = 0.0;
01006 bmem[index_in[i]] = 1;
01007 }
01008
01009 if (verbosity == 1)
01010 {
01011 SG_INFO( " IT | Prep Time | Solver IT | Solver Time |");
01012 SG_INFO( " Grad Time | KKT violation\n");
01013 SG_INFO( "------+-----------+-----------+-------------+");
01014 SG_INFO( "-----------+--------------\n");
01015 }
01016
01017
01018
01019 nit = 0;
01020 do
01021 {
01022 t = clock();
01023 if ((nit % 10) == 9)
01024 {
01025 if ((t-ti) > 0)
01026 total_time += (float64_t)(t-ti) / CLOCKS_PER_SEC;
01027 else
01028 total_time += (float64_t)(ti-t) / CLOCKS_PER_SEC;
01029 ti = t;
01030 }
01031
01032 if (verbosity > 1)
01033 SG_INFO("\n*********** ITERATION: %d\n", nit + 1);
01034 else if (verbosity > 0)
01035 SG_INFO( "%5d |", nit + 1);
01036 else
01037 SG_INFO( ".");
01038 fflush(stdout);
01039
01040 nzout = 0;
01041 for (k = 0; k < nb; k++)
01042 if (alpha_out(k)>DELTAsv)
01043 {
01044 indnzout[nzout] = index_out[k];
01045 nzout++;
01046 }
01047
01048 sp_e = 0.;
01049 for (j = 0; j < nzout; j++)
01050 {
01051 vau[j] = y[indnzout[j]]*alpha[indnzout[j]];
01052 sp_e += vau[j];
01053 }
01054
01055 if (verbosity > 1)
01056 SG_INFO( " spe: %e ", sp_e);
01057
01058 for (i = 0; i < chunk_size; i++)
01059 sp_y[i] = y_in(i);
01060
01061
01062 for (i = 0; i < chunk_size; i++)
01063 {
01064 iin = index_in[i];
01065 ptmw = Cache->GetRow(iin);
01066 if (ptmw != 0)
01067 {
01068 for (j = 0; j <= i; j++)
01069 sp_D[i*chunk_size + j] = sp_y[i]*sp_y[j] * ptmw[index_in[j]];
01070 }
01071 else if (incom[i] == -1)
01072 for (j = 0; j <= i; j++)
01073 sp_D[i*chunk_size + j] = sp_y[i]*sp_y[j]
01074 * (float32_t)KER->Get(iin, index_in[j]);
01075 else
01076 {
01077 for (j = 0; j < i; j++)
01078 if (incom[j] == -1)
01079 sp_D[i*chunk_size + j]
01080 = sp_y[i]*sp_y[j] * (float32_t)KER->Get(iin, index_in[j]);
01081 else
01082 sp_D[i*chunk_size + j]
01083 = sp_D[incom[j]*chunk_size + incom[i]];
01084 sp_D[i*chunk_size + i]
01085 = sp_y[i]*sp_y[i] * (float32_t)KER->Get(iin, index_in[i]);
01086 }
01087 }
01088 for (i = 0; i < chunk_size; i++)
01089 {
01090 for (j = 0; j < i; j++)
01091 sp_D[j*chunk_size + i] = sp_D[i*chunk_size + j];
01092 }
01093
01094 if (nit == 0 && PreprocessMode > 0)
01095 {
01096 for (i = 0; i < chunk_size; i++)
01097 {
01098 iin = index_in[i];
01099 aux = 0.;
01100 ptmw = Cache->GetRow(iin);
01101 if (ptmw == NULL)
01102 for (j = 0; j < nzout; j++)
01103 aux += vau[j] * KER->Get(iin, indnzout[j]);
01104 else
01105 for (j = 0; j < nzout; j++)
01106 aux += vau[j] * ptmw[indnzout[j]];
01107 sp_h[i] = y_in(i) * aux - 1.0;
01108 }
01109 }
01110 else
01111 {
01112 for (i = 0; i < chunk_size; i++)
01113 vau[i] = alpha_in(i);
01114 for (i = 0; i < chunk_size; i++)
01115 {
01116 aux = 0.0;
01117 for (j = 0; j < chunk_size; j++)
01118 aux += sp_D[i*chunk_size + j] * vau[j];
01119 sp_h[i] = st[index_in[i]] * y_in(i) - 1.0 - aux;
01120 }
01121 }
01122
01123 t = clock() - t;
01124 if (verbosity > 1)
01125 SG_INFO(
01126 " Preparation Time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC);
01127 else if (verbosity > 0)
01128 SG_INFO( " %8.2lf |", (float64_t)t/CLOCKS_PER_SEC);
01129 tot_prep_time += (float64_t)t/CLOCKS_PER_SEC;
01130
01131
01132
01133 if (tau_proximal < 0.0)
01134 tau_proximal_this = 0.0;
01135 else
01136 tau_proximal_this = tau_proximal;
01137 proximal_count = 0;
01138 do {
01139 t = clock();
01140 for (i = 0; i < chunk_size; i++)
01141 {
01142 vau[i] = sp_D[i*chunk_size + i];
01143 sp_h[i] -= tau_proximal_this * alpha_in(i);
01144 sp_D[i*chunk_size + i] += (float32_t)tau_proximal_this;
01145 }
01146
01147 if (kktold < delta*100)
01148 for (i = 0; i < chunk_size; i++)
01149 sp_alpha[i] = alpha_in(i);
01150 else
01151 for (i = 0; i < chunk_size; i++)
01152 sp_alpha[i] = 0.0;
01153
01154
01155 i = gpm_solver(projection_solver, projection_projector, chunk_size,
01156 sp_D, sp_h, c_const, sp_e, sp_y, sp_alpha,
01157 DELTAvpm*DELTAkin, &lsCount, &projCount);
01158
01159 if (i > vpmWarningThreshold)
01160 {
01161 if (ker_type == 2)
01162 {
01163 SG_INFO("\n WARNING: inner subproblem hard to solve;");
01164 SG_INFO(" setting a smaller -q or");
01165 SG_INFO(" tuning -c and -g options might help.\n");
01166 }
01167 else
01168 {
01169 SG_INFO("\n WARNING: inner subproblem hard to solve;");
01170 SG_INFO(" set a smaller -q or");
01171 SG_INFO(" try a better data scaling.\n");
01172 }
01173 }
01174
01175 t = clock() - t;
01176 tot_vpm_iter += i;
01177 tot_vpm_secant += projCount;
01178 tot_vpm_time += (float64_t)t/CLOCKS_PER_SEC;
01179 if (verbosity > 1)
01180 {
01181 SG_INFO(" Solver it: %d", i);
01182 SG_INFO(", ls: %d", lsCount);
01183 SG_INFO(", time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC);
01184 }
01185 else if (verbosity > 0)
01186 {
01187 SG_INFO(" %6d", i);
01188 SG_INFO(" | %8.2lf |", (float64_t)t/CLOCKS_PER_SEC);
01189 }
01190
01191
01192
01193 for (i = 0; i < chunk_size; i++)
01194 sp_D[i*chunk_size + i] = (float32_t)vau[i];
01195 tau_proximal_this = 0.0;
01196 if (tau_proximal < 0.0)
01197 {
01198 dfval = 0.0;
01199 for (i = 0; i < chunk_size; i++)
01200 {
01201 aux = 0.0;
01202 for (j = 0; j < chunk_size; j++)
01203 aux += sp_D[i*chunk_size + j]*(alpha_in(j) - sp_alpha[j]);
01204 dfval += (0.5*aux - st[index_in[i]]*y_in(i) + 1.0) * (alpha_in(i) - sp_alpha[i]);
01205 }
01206
01207 aux=0.0;
01208 for (i = 0; i < chunk_size; i++)
01209 aux += (alpha_in(i) - sp_alpha[i])*(alpha_in(i) - sp_alpha[i]);
01210
01211 if ((-dfval/aux) < -0.5*tau_proximal)
01212 {
01213 tau_proximal_this = -tau_proximal;
01214 if (verbosity > 0)
01215 SG_DEBUG("tau threshold: %lf ", -dfval/aux);
01216 }
01217 }
01218 proximal_count++;
01219 } while (tau_proximal_this != 0.0 && proximal_count < 2);
01220
01221 t = clock();
01222
01223 nzin = 0;
01224 for (j = 0; j < chunk_size; j++)
01225 {
01226 if (nit == 0)
01227 aux = sp_alpha[j];
01228 else
01229 aux = sp_alpha[j] - alpha_in(j);
01230 if (fabs(aux) > DELTAsv)
01231 {
01232 indnzin[nzin] = index_in[j];
01233 grad[nzin] = aux * y_in(j);
01234 nzin++;
01235 }
01236 }
01237
01238
01239 if (KER->get_kernel()->has_property(KP_LINADD) && get_linadd_enabled())
01240 {
01241 KER->get_kernel()->clear_normal() ;
01242
01243 for (j = 0; j < nzin; j++)
01244 KER->get_kernel()->add_to_normal(indnzin[j], grad[j]);
01245
01246 if (nit == 0 && PreprocessMode > 0)
01247 {
01248 for (j = 0; j < nzout; j++)
01249 {
01250 jin = indnzout[j];
01251 KER->get_kernel()->add_to_normal(jin, alpha[jin] * y[jin]);
01252 }
01253 }
01254
01255 for (i = 0; i < ell; i++)
01256 st[i] += KER->get_kernel()->compute_optimized(i);
01257 }
01258 else
01259 {
01260 k = Cache->DivideMP(ing, indnzin, nzin);
01261 for (j = 0; j < k; j++)
01262 {
01263 ptmw = Cache->FillRow(indnzin[ing[j]]);
01264 for (i = 0; i < ell; i++)
01265 st[i] += grad[ing[j]] * ptmw[i];
01266 }
01267
01268 if (PreprocessMode > 0 && nit == 0)
01269 {
01270 clock_t ti2;
01271
01272 ti2 = clock();
01273 for (j = 0; j < nzout; j++)
01274 {
01275 jin = indnzout[j];
01276 ptmw = Cache->FillRow(jin);
01277 for (i = 0; i < ell; i++)
01278 st[i] += alpha[jin] * y[jin] * ptmw[i];
01279 }
01280 if (verbosity > 1)
01281 SG_INFO(
01282 " G*x0 time: %.2lf\n", (float64_t)(clock()-ti2)/CLOCKS_PER_SEC);
01283 }
01284 }
01285
01286
01287
01288 t = clock() - t;
01289 if (verbosity > 1)
01290 SG_INFO(
01291 " Gradient updating time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC);
01292 else if (verbosity > 0)
01293 SG_INFO( " %8.2lf |", (float64_t)t/CLOCKS_PER_SEC);
01294 tot_st_time += (float64_t)t/CLOCKS_PER_SEC;
01295
01296
01297 for (i = 0; i < chunk_size; i++)
01298 alpha_in(i) = sp_alpha[i];
01299
01300 if (verbosity > 1)
01301 {
01302 j = k = 0;
01303 for (i = 0; i < ell; i++)
01304 {
01305 if (is_free(i)) j++;
01306 if (is_bound(i)) k++;
01307 }
01308 SG_INFO(" SV: %d", j+k);
01309 SG_INFO(", BSV: %d\n", k);
01310 }
01311 Cache->Iteration();
01312 nit = nit+1;
01313 } while (!optimal() && !(CSignal::cancel_computations()));
01314
01315
01316
01317 t = clock();
01318 if ((t-ti) > 0)
01319 total_time += (float64_t)(t-ti) / CLOCKS_PER_SEC;
01320 else
01321 total_time += (float64_t)(ti-t) / CLOCKS_PER_SEC;
01322 ti = t;
01323
01324 memcpy(solution, alpha, ell * sizeof(float64_t));
01325
01326
01327 fval = 0.0;
01328 for (i = 0; i < ell; i++)
01329 fval += alpha[i]*(y[i]*st[i]*0.5 - 1.0);
01330
01331 SG_INFO("\n------+-----------+-----------+-------------+");
01332 SG_INFO("-----------+--------------\n");
01333 SG_INFO(
01334 "\n- TOTAL ITERATIONS: %i\n", nit);
01335
01336 if (verbosity > 1)
01337 {
01338 j = 0;
01339 k = 0;
01340 z = 0;
01341 for (i = ell - 1; i >= 0; i--)
01342 {
01343 if (cec[i] > 0) j++;
01344 if (cec[i] > 1) k++;
01345 if (cec[i] > 2) z++;
01346 }
01347 SG_INFO(
01348 "- Variables entering the working set at least one time: %i\n", j);
01349 SG_INFO(
01350 "- Variables entering the working set at least two times: %i\n", k);
01351 SG_INFO(
01352 "- Variables entering the working set at least three times: %i\n", z);
01353 }
01354
01355
01356 SG_FREE(bmem);
01357 SG_FREE(bmemrid);
01358 SG_FREE(pbmr);
01359 SG_FREE(cec);
01360 SG_FREE(ing);
01361 SG_FREE(inaux);
01362 SG_FREE(indnzin);
01363 SG_FREE(index_in);
01364 SG_FREE(inold);
01365 SG_FREE(incom);
01366 SG_FREE(indnzout);
01367 SG_FREE(index_out);
01368 SG_FREE(vau);
01369 SG_FREE(alpha);
01370 SG_FREE(weight);
01371 SG_FREE(grad);
01372 SG_FREE(stloc);
01373 SG_FREE(st);
01374 SG_FREE(sp_h);
01375 SG_FREE(sp_hloc);
01376 SG_FREE(sp_y);
01377 SG_FREE(sp_D);
01378 SG_FREE(sp_alpha);
01379 delete Cache;
01380
01381 aux = KER->KernelEvaluations;
01382 SG_INFO( "- Total CPU time: %lf\n", total_time);
01383 if (verbosity > 0)
01384 {
01385 SG_INFO(
01386 "- Total kernel evaluations: %.0lf\n", aux);
01387 SG_INFO(
01388 "- Total inner solver iterations: %i\n", tot_vpm_iter);
01389 if (projection_projector == 1)
01390 SG_INFO(
01391 "- Total projector iterations: %i\n", tot_vpm_secant);
01392 SG_INFO(
01393 "- Total preparation time: %lf\n", tot_prep_time);
01394 SG_INFO(
01395 "- Total inner solver time: %lf\n", tot_vpm_time);
01396 SG_INFO(
01397 "- Total gradient updating time: %lf\n", tot_st_time);
01398 }
01399 SG_INFO( "- Objective function value: %lf\n", fval);
01400 objective_value=fval;
01401 return aux;
01402 }
01403
01404
01405
01406
01407 void quick_si(int32_t a[], int32_t n)
01408 {
01409 int32_t i, j, s, d, l, x, w, ps[20], pd[20];
01410
01411 l = 0;
01412 ps[0] = 0;
01413 pd[0] = n-1;
01414 do
01415 {
01416 s = ps[l];
01417 d = pd[l];
01418 l--;
01419 do
01420 {
01421 i = s;
01422 j = d;
01423 x = a[(s+d)/2];
01424 do
01425 {
01426 while (a[i] < x) i++;
01427 while (a[j] > x) j--;
01428 if (i <= j)
01429 {
01430 w = a[i];
01431 a[i] = a[j];
01432 i++;
01433 a[j] = w;
01434 j--;
01435 }
01436 } while(i<=j);
01437 if (j-s > d-i)
01438 {
01439 l++;
01440 ps[l] = s;
01441 pd[l] = j;
01442 s = i;
01443 }
01444 else
01445 {
01446 if (i < d)
01447 {
01448 l++;
01449 ps[l] = i;
01450 pd[l] = d;
01451 }
01452 d = j;
01453 }
01454 } while (s < d);
01455 } while (l >= 0);
01456 }
01457
01458
01459
01460
01461 void quick_s2(float64_t a[], int32_t n, int32_t ia[])
01462 {
01463 int32_t i, j, s, d, l, iw, ps[20], pd[20];
01464 float64_t x, w;
01465
01466 l = 0;
01467 ps[0] = 0;
01468 pd[0] = n-1;
01469 do
01470 {
01471 s = ps[l];
01472 d = pd[l];
01473 l--;
01474 do
01475 {
01476 i = s;
01477 j = d;
01478 x = a[(s+d)/2];
01479 do
01480 {
01481 while (a[i] < x) i++;
01482 while (a[j] > x) j--;
01483 if (i <= j)
01484 {
01485 iw = ia[i];
01486 w = a[i];
01487 ia[i] = ia[j];
01488 a[i] = a[j];
01489 i++;
01490 ia[j] = iw;
01491 a[j] = w;
01492 j--;
01493 }
01494 } while (i <= j);
01495 if (j-s > d-i)
01496 {
01497 l++;
01498 ps[l] = s;
01499 pd[l] = j;
01500 s = i;
01501 }
01502 else
01503 {
01504 if (i < d)
01505 {
01506 l++;
01507 ps[l] = i;
01508 pd[l] = d;
01509 }
01510 d = j;
01511 }
01512 } while (s < d);
01513 } while(l>=0);
01514 }
01515
01516
01517
01518
01519 void quick_s3(int32_t a[], int32_t n, int32_t ia[])
01520 {
01521 int32_t i, j, s, d, l, iw, w, x, ps[20], pd[20];
01522
01523 l = 0;
01524 ps[0] = 0;
01525 pd[0] = n-1;
01526 do
01527 {
01528 s = ps[l];
01529 d = pd[l];
01530 l--;
01531 do
01532 {
01533 i = s;
01534 j = d;
01535 x = a[(s+d)/2];
01536 do
01537 {
01538 while (a[i] < x) i++;
01539 while (a[j] > x) j--;
01540 if (i <= j)
01541 {
01542 iw = ia[i];
01543 w = a[i];
01544 ia[i] = ia[j];
01545 a[i] = a[j];
01546 i++;
01547 ia[j] = iw;
01548 a[j] = w;
01549 j--;
01550 }
01551 } while (i <= j);
01552 if (j-s > d-i)
01553 {
01554 l++;
01555 ps[l] = s;
01556 pd[l] = j;
01557 s = i;
01558 }
01559 else
01560 {
01561 if (i < d)
01562 {
01563 l++;
01564 ps[l] = i;
01565 pd[l] = d;
01566 }
01567 d = j;
01568 }
01569 } while (s < d);
01570 } while (l >= 0);
01571 }
01572 }
01573
01574 #endif // DOXYGEN_SHOULD_SKIP_THIS
01575
01576
01577
01578