00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071 #include <math.h>
00072 #include <stdio.h>
00073 #include <stdlib.h>
00074 #include <string.h>
00075 #include <time.h>
00076
00077 #include "classifier/svm/gpm.h"
00078 #include "classifier/svm/gpdt.h"
00079 #include "classifier/svm/gpdtsolve.h"
00080 #include "lib/Signal.h"
00081 #include "lib/io.h"
00082
00083 using namespace shogun;
00084
00085 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00086 namespace shogun
00087 {
00088 #define y_in(i) y[index_in[(i)]]
00089 #define y_out(i) y[index_out[(i)]]
00090 #define alpha_in(i) alpha[index_in[(i)]]
00091 #define alpha_out(i) alpha[index_out[(i)]]
00092 #define minfty (-1.0e+30) // minus infinity
00093
00094 uint32_t Randnext = 1;
00095
00096 #define ThRand (Randnext = Randnext * 1103515245L + 12345L)
00097 #define ThRandPos ((Randnext = Randnext * 1103515245L + 12345L) & 0x7fffffff)
00098
00099 FILE *fp;
00100
00101
00102 void quick_si (int32_t a[], int32_t k);
00103 void quick_s3 (int32_t a[], int32_t k, int32_t ia[]);
00104 void quick_s2 (float64_t a[], int32_t k, int32_t ia[]);
00105
00106
00107
00108
00109 class sCache
00110 {
00111
00112 public:
00113 sCache (sKernel* sk, int32_t Mbyte, int32_t ell);
00114 ~sCache ();
00115
00116 cachetype *FillRow (int32_t row, int32_t IsC = 0);
00117 cachetype *GetRow (int32_t row);
00118
00119 int32_t DivideMP (int32_t *out, int32_t *in, int32_t n);
00120
00121
00122 void Iteration(void) { nit++; }
00123
00124
00125 int32_t CheckCycle(void)
00126 {
00127 int32_t us;
00128 cache_entry *pt = first_free->next;
00129
00130 for (us = 0; pt != first_free; us++, pt = pt->next);
00131 if (us != maxmw-1)
00132 return 1;
00133 else
00134 return 0;
00135 }
00136
00137 private:
00138
00139 struct cache_entry
00140 {
00141 int32_t row;
00142 int32_t last_access_it;
00143 cache_entry *prev, *next;
00144 cachetype *data;
00145 };
00146
00147 sKernel* KER;
00148 int32_t maxmw, ell;
00149 int32_t nit;
00150
00151 cache_entry *mw;
00152 cache_entry *first_free;
00153 cache_entry **pindmw;
00154 cachetype *onerow;
00155
00156 cachetype *FindFree(int32_t row, int32_t IsC);
00157 };
00158
00159
00160
00161
00162
00163 sCache::sCache(sKernel* sk, int32_t Mbyte, int32_t _ell) : KER(sk), ell(_ell)
00164 {
00165 int32_t i;
00166
00167
00168 maxmw = (sizeof(cache_entry) + sizeof(cache_entry *)
00169 + ell*sizeof(cachetype)) / 4;
00170
00171 maxmw = Mbyte*1024*(1024/4) / maxmw;
00172
00173
00174 mw = (cache_entry *)malloc(maxmw * sizeof(cache_entry));
00175 pindmw = (cache_entry **)malloc(ell * sizeof(cache_entry *));
00176 onerow = (cachetype *)malloc(ell * sizeof(cachetype ));
00177
00178
00179 for (i = 0; i < maxmw; i++)
00180 {
00181 mw[i].prev = (i == 0 ? &mw[maxmw-1] : &mw[i-1]);
00182 mw[i].next = (i == maxmw-1 ? &mw[0] : &mw[i+1]);
00183 mw[i].data = (cachetype *)malloc(ell*sizeof(cachetype));
00184 mw[i].row = -1;
00185 mw[i].last_access_it = -1;
00186 }
00187 for (i = 0; i < ell; i++)
00188 pindmw[i] = 0;
00189
00190 first_free = &mw[0];
00191 nit = 0;
00192 }
00193
00194
00195
00196
00197 sCache::~sCache()
00198 {
00199 int32_t i;
00200
00201 for (i = maxmw-1; i >= 0; i--)
00202 if (mw[i].data) free(mw[i].data);
00203 if (onerow) free(onerow);
00204 if (pindmw) free(pindmw);
00205 if (mw) free(mw);
00206 }
00207
00208
00209
00210
00211
00212 cachetype *sCache::GetRow(int32_t row)
00213 {
00214 cache_entry *c;
00215
00216 c = pindmw[row];
00217 if (c == NULL)
00218 return NULL;
00219
00220 c->last_access_it = nit;
00221 if (c == first_free)
00222 {
00223 first_free = first_free->next;
00224 }
00225 else
00226 {
00227
00228 c->prev->next = c->next;
00229 c->next->prev = c->prev;
00230 c->next = first_free;
00231 c->prev = first_free->prev;
00232 first_free->prev = c;
00233 c->prev->next = c;
00234 }
00235
00236 return c->data;
00237 }
00238
00239
00240
00241
00242
00243
00244 cachetype *sCache::FindFree(int32_t row, int32_t IsC)
00245 {
00246 cachetype *pt;
00247
00248 if (first_free->row != -1)
00249 {
00250 if (first_free->last_access_it == nit || IsC)
00251 return 0;
00252 else
00253 pindmw[first_free->row] = 0;
00254 }
00255 first_free->row = row;
00256 first_free->last_access_it = nit;
00257 pindmw[row] = first_free;
00258
00259 pt = first_free->data;
00260 first_free = first_free->next;
00261
00262 return pt;
00263 }
00264
00265
00266
00267
00268
00269 cachetype *sCache::FillRow(int32_t row, int32_t IsC)
00270 {
00271 int32_t j;
00272 cachetype *pt;
00273
00274 pt = GetRow(row);
00275 if (pt != NULL)
00276 return pt;
00277
00278 pt = FindFree(row, IsC);
00279 if (pt == 0)
00280 pt = onerow;
00281
00282
00283 for (j = 0; j < ell; j++)
00284 pt[j] = (cachetype)KER->Get(row, j);
00285 return pt;
00286 }
00287
00288
00289
00290
00291
00292 int32_t sCache::DivideMP(int32_t *out, int32_t *in, int32_t n)
00293 {
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304 int32_t *remained, nremained, k;
00305 int32_t i;
00306
00307 remained = (int32_t *) malloc(n*sizeof(int32_t));
00308
00309 nremained = 0;
00310 k = 0;
00311 for (i = 0; i < n; i++)
00312 {
00313 if (pindmw[in[i]] != NULL)
00314 out[k++] = i;
00315 else
00316 remained[nremained++] = i;
00317 }
00318 for (i = 0; i < nremained; i++)
00319 out[k++] = remained[i];
00320
00321 free(remained);
00322 return n;
00323 }
00324
00325
00326
00327
00328 int32_t QPproblem::optimal()
00329 {
00330
00331
00332
00333
00334 register int32_t i, j, margin_sv_number, z, k, s, kin, z1, znew=0, nnew;
00335
00336 float64_t gx_i, aux, s1, s2;
00337
00338
00339 for (j = 0; j < ell; j++)
00340 {
00341 grad[j] = y[j] - st[j];
00342 ing[j] = j;
00343 }
00344
00345 quick_s2(grad,ell,ing);
00346
00347
00348 margin_sv_number = 0;
00349
00350 for (i = chunk_size - 1; i >= 0; i--)
00351 if (is_free(index_in[i]))
00352 {
00353 margin_sv_number++;
00354 bee = y_in(i) - st[index_in[i]];
00355 break;
00356 }
00357
00358 if (margin_sv_number > 0)
00359 {
00360 aux=-1.0;
00361 for (i = nb-1; i >= 0; i--)
00362 {
00363 gx_i = bee + st[index_out[i]];
00364 if ((is_zero(index_out[i]) && (gx_i*y_out(i) < (1.0-delta))) ||
00365 (is_bound(index_out[i]) && (gx_i*y_out(i) > (1.0+delta))) ||
00366 (is_free(index_out[i]) &&
00367 ((gx_i*y_out(i) < 1.0-delta) || (gx_i*y_out(i) > 1.0+delta))))
00368 {
00369 if (fabs(gx_i*y_out(i) - 1.0) > aux)
00370 aux = fabs(gx_i*y_out(i) - 1.0);
00371 }
00372 }
00373 }
00374 else
00375 {
00376 for (i = ell - 1; i >= 0; i--)
00377 if (is_free(i))
00378 {
00379 margin_sv_number++;
00380 bee = y[i] - st[i];
00381 break;
00382 }
00383 if (margin_sv_number == 0)
00384 {
00385 s1 = -minfty;
00386 s2 = -minfty;
00387 for (j = 0; j < ell; j++)
00388 if ( (alpha[ing[j]] > DELTAsv) && (y[ing[j]] >= 0) )
00389 {
00390 s1 = grad[j];
00391 break;
00392 }
00393 for (j = 0; j < ell; j++)
00394 if ( (alpha[ing[j]] < c_const-DELTAsv) && (y[ing[j]] <= 0) )
00395 {
00396 s2 = grad[j];
00397 break;
00398 }
00399 if (s1 < s2)
00400 aux = s1;
00401 else
00402 aux = s2;
00403
00404 s1 = minfty;
00405 s2 = minfty;
00406 for (j = ell-1; j >=0; j--)
00407 if ( (alpha[ing[j]] > DELTAsv) && (y[ing[j]] <= 0) )
00408 {
00409 s1 = grad[j];
00410 break;
00411 }
00412 for (j = ell-1; j >=0; j--)
00413 if ( (alpha[ing[j]] < c_const-DELTAsv) && (y[ing[j]] >= 0) )
00414 {
00415 s2 = grad[j];
00416 break;
00417 }
00418 if (s2 > s1) s1 = s2;
00419
00420 bee = 0.5 * (s1+aux);
00421 }
00422
00423 aux = -1.0;
00424 for (i = ell-1; i >= 0; i--)
00425 {
00426 gx_i = bee + st[i];
00427 if ((is_zero(i) && (gx_i*y[i] < (1.0-delta))) ||
00428 (is_bound(i) && (gx_i*y[i] > (1.0+delta))) ||
00429 (is_free(i) &&
00430 ((gx_i*y[i] < 1.0-delta) || (gx_i*y[i] > 1.0+delta))))
00431 {
00432 if (fabs(gx_i*y[i] - 1.0) > aux)
00433 aux = fabs(gx_i*y[i] - 1.0);
00434 }
00435 }
00436 }
00437
00438 if (aux < 0.0)
00439 return 1;
00440 else
00441 {
00442 if (verbosity > 1)
00443 SG_INFO(" Max KKT violation: %lf\n", aux);
00444 else if (verbosity > 0)
00445 SG_INFO(" %lf\n", aux);
00446
00447 if (fabs(kktold-aux) < delta*0.01 && aux < delta*2.0)
00448 {
00449 if (DELTAvpm > InitialDELTAvpm*0.1)
00450 {
00451 DELTAvpm = (DELTAvpm*0.5 > InitialDELTAvpm*0.1 ?
00452 DELTAvpm*0.5 : InitialDELTAvpm*0.1);
00453 SG_INFO("Inner tolerance changed to: %lf\n", DELTAvpm);
00454 }
00455 }
00456
00457 kktold = aux;
00458
00459
00460
00461
00462
00463
00464 for (j = 0; j < chunk_size; j++)
00465 inold[j] = index_in[j];
00466
00467 z = -1;
00468 z1 = ell;
00469 k = 0;
00470 j = 0;
00471 while (k < q)
00472 {
00473 i = z + 1;
00474 while (i < z1)
00475 {
00476 if ( is_free(ing[i]) ||
00477 (-y[ing[i]]>=0 && is_zero(ing[i])) ||
00478 (-y[ing[i]]<=0 && is_bound(ing[i]))
00479 )
00480 {
00481 znew = i;
00482 break;
00483 }
00484 i++;
00485 }
00486 if (i == z1) break;
00487
00488 s = z1 - 1;
00489 while (znew < s)
00490 {
00491 if ( is_free(ing[s]) ||
00492 (y[ing[s]]>=0 && is_zero(ing[s])) ||
00493 (y[ing[s]]<=0 && is_bound(ing[s]))
00494 )
00495 {
00496 z1 = s;
00497 z = znew;
00498 break;
00499 }
00500 s--;
00501 }
00502 if (znew == s) break;
00503
00504 index_in[k++] = ing[z];
00505 index_in[k++] = ing[z1];
00506 }
00507
00508 if (k < q)
00509 {
00510 if (verbosity > 1)
00511 SG_INFO(" New q: %i\n", k);
00512 q = k;
00513 }
00514
00515 quick_si(index_in, q);
00516
00517 s = 0;
00518 j = 0;
00519 for (k = 0; k < chunk_size; k++)
00520 {
00521 z = inold[k];
00522 for (i = j; i < q; i++)
00523 if (z <= index_in[i])
00524 break;
00525
00526 if (i == q)
00527 {
00528 for (i = k; i < chunk_size; i++)
00529 {
00530 ing[s] = inold[i];
00531 s = s+1;
00532 }
00533 break;
00534 }
00535
00536 if (z == index_in[i])
00537 j = i + 1;
00538 else
00539 {
00540 ing[s] = z;
00541 s = s + 1;
00542 j = i;
00543 }
00544 }
00545
00546 for (i = 0; i < s; i++)
00547 {
00548 bmemrid[i] = bmem[ing[i]];
00549 pbmr[i] = i;
00550 }
00551
00552 quick_s3(bmemrid, s, pbmr);
00553
00554
00555 j = q;
00556 i = 0;
00557 while (j < chunk_size && i < s)
00558 {
00559 if (is_free(ing[pbmr[i]]))
00560 {
00561 index_in[j] = ing[pbmr[i]];
00562 j++;
00563 }
00564 i++;
00565 }
00566
00567
00568 if (j < chunk_size)
00569 {
00570 i = 0;
00571 while (j < chunk_size && i < s)
00572 {
00573 if (is_zero(ing[pbmr[i]]))
00574 {
00575 index_in[j] = ing[pbmr[i]];
00576 j++;
00577 }
00578 i++;
00579 }
00580 if (j < chunk_size)
00581 {
00582 i = 0;
00583 while (j < chunk_size && i < s)
00584 {
00585 if (is_bound(ing[pbmr[i]]))
00586 {
00587 index_in[j] = ing[pbmr[i]];
00588 j++;
00589 }
00590 i++;
00591 }
00592 }
00593 }
00594
00595 quick_si(index_in, chunk_size);
00596
00597 for (i = 0; i < chunk_size; i++)
00598 bmem[index_in[i]]++;
00599
00600 j = 0;
00601 k = 0;
00602 for (i = 0; i < chunk_size; i++)
00603 {
00604 for (z = j; z < index_in[i]; z++)
00605 {
00606 index_out[k] = z;
00607 k++;
00608 }
00609 j = index_in[i]+1;
00610 }
00611 for (z = j; z < ell; z++)
00612 {
00613 index_out[k] = z;
00614 k++;
00615 }
00616
00617 for (i = 0; i < nb; i++)
00618 bmem[index_out[i]] = 0;
00619
00620 kin = 0;
00621 j = 0;
00622 for (k = 0; k < chunk_size; k++)
00623 {
00624 z = index_in[k];
00625 for (i = j; i < chunk_size; i++)
00626 if (z <= inold[i])
00627 break;
00628 if (i == chunk_size)
00629 {
00630 for (s = k; s < chunk_size; s++)
00631 {
00632 incom[s] = -1;
00633 cec[index_in[s]]++;
00634 }
00635 kin = kin + chunk_size - k ;
00636 break;
00637 }
00638
00639 if (z == inold[i])
00640 {
00641 incom[k] = i;
00642 j = i+1;
00643 }
00644 else
00645 {
00646 incom[k] = -1;
00647 j = i;
00648 kin = kin + 1;
00649 cec[index_in[k]]++;
00650 }
00651 }
00652
00653 nnew = kin & (~1);
00654 if (nnew < 10)
00655 nnew = 10;
00656 if (nnew < chunk_size/10)
00657 nnew = chunk_size/10;
00658 if (nnew < q)
00659 {
00660 q = nnew;
00661 q = q & (~1);
00662 }
00663
00664 if (kin == 0)
00665 {
00666 DELTAkin *= 0.1;
00667 if (DELTAkin < 1.0e-6)
00668 {
00669 SG_INFO("\n***ERROR***: GPDT stops with tolerance");
00670 SG_INFO(
00671 " %lf because it is unable to change the working set.\n", kktold);
00672 return 1;
00673 }
00674 else
00675 {
00676 SG_INFO("Inner tolerance temporary changed to:");
00677 SG_INFO(" %e\n", DELTAvpm*DELTAkin);
00678 }
00679 }
00680 else
00681 DELTAkin = 1.0;
00682
00683 if (verbosity > 1)
00684 {
00685 SG_INFO(" Working set: new components: %i", kin);
00686 SG_INFO(", new parameter n: %i\n", q);
00687 }
00688
00689 return 0;
00690 }
00691 }
00692
00693
00694
00695
00696 int32_t QPproblem::Preprocess0(int32_t *aux, int32_t *sv)
00697 {
00698 int32_t i, j;
00699
00700 Randnext = 1;
00701 memset(sv, 0, ell*sizeof(int32_t));
00702 for (i = 0; i < chunk_size; i++)
00703 {
00704 do
00705 {
00706 j = ThRandPos % ell;
00707 } while (sv[j] != 0);
00708 sv[j] = 1;
00709 }
00710 return(chunk_size);
00711 }
00712
00713
00714
00715
00716 int32_t QPproblem::Preprocess1(sKernel* kernel, int32_t *aux, int32_t *sv)
00717 {
00718 int32_t s;
00719 int32_t sl;
00720 int32_t n, i, off, j, k, ll;
00721 int32_t nsv, nbsv;
00722 int32_t *sv_loc, *bsv_loc, *sp_y;
00723 float32_t *sp_D=NULL;
00724 float64_t *sp_alpha, *sp_h;
00725
00726 s = ell;
00727
00728 n = (s + preprocess_size - 1) / preprocess_size;
00729 sl = 1 + s / n;
00730
00731 if (verbosity > 0)
00732 {
00733 SG_INFO(" Preprocessing: examples = %d", s);
00734 SG_INFO(", subp. = %d", n);
00735 SG_INFO(", size = %d\n",sl);
00736 }
00737
00738 sv_loc = (int32_t *) malloc(s*sizeof(int32_t));
00739 bsv_loc = (int32_t *)malloc(s*sizeof(int32_t));
00740 sp_alpha = (float64_t *)malloc(sl*sizeof(float64_t));
00741 sp_h = (float64_t *)malloc(sl*sizeof(float64_t));
00742 sp_y = (int32_t *)malloc(sl*sizeof(int32_t));
00743
00744 if (sl < 500)
00745 sp_D = (float32_t *)malloc(sl*sl * sizeof(float32_t));
00746
00747 for (i = 0; i < sl; i++)
00748 sp_h[i] = -1.0;
00749 memset(alpha, 0, ell*sizeof(float64_t));
00750
00751
00752 for (i = 0; i < ell; i++)
00753 aux[i] = i;
00754 Randnext = 1;
00755 for (i = 0; i < ell; i++)
00756 {
00757 j = ThRandPos % ell;
00758 k = ThRandPos % ell;
00759 ll = aux[j]; aux[j] = aux[k]; aux[k] = ll;
00760 }
00761
00762 nbsv = nsv = 0;
00763 for (i = 0; i < n; i++)
00764 {
00765 if (verbosity > 0)
00766 SG_INFO("%d...", i);
00767 SplitParts(s, i, n, &ll, &off);
00768
00769 if (sl < 500)
00770 {
00771 for (j = 0; j < ll; j++)
00772 {
00773 sp_y[j] = y[aux[j+off]];
00774 for (k = j; k < ll; k++)
00775 sp_D[k*sl + j] = sp_D[j*sl + k]
00776 = y[aux[j+off]] * y[aux[k+off]]
00777 * (float32_t)kernel->Get(aux[j+off], aux[k+off]);
00778 }
00779
00780 memset(sp_alpha, 0, sl*sizeof(float64_t));
00781
00782
00783 gpm_solver(projection_solver, projection_projector, ll, sp_D, sp_h,
00784 c_const, 0.0, sp_y, sp_alpha, delta*10, NULL);
00785 }
00786 else
00787 {
00788 QPproblem p2;
00789 p2.Subproblem(*this, ll, aux + off);
00790 p2.chunk_size = (int32_t) ((float64_t)chunk_size / sqrt((float64_t)n));
00791 p2.q = (int32_t) ((float64_t)q / sqrt((float64_t)n));
00792 p2.maxmw = ll*ll*4 / (1024 * 1024);
00793 if (p2.maxmw > maxmw / 2)
00794 p2.maxmw = maxmw / 2;
00795 p2.verbosity = 0;
00796 p2.delta = delta * 10.0;
00797 p2.PreprocessMode = 0;
00798 kernel->KernelEvaluations += p2.gpdtsolve(sp_alpha);
00799 }
00800
00801 for (j = 0; j < ll; j++)
00802 {
00803
00804 if (sp_alpha[j] < (c_const-DELTAsv))
00805 sp_alpha[j] = 0.0;
00806 else
00807 sp_alpha[j] = c_const;
00808 if (sp_alpha[j] > DELTAsv)
00809 {
00810 if (sp_alpha[j] < (c_const-DELTAsv))
00811 sv_loc[nsv++] = aux[j+off];
00812 else
00813 bsv_loc[nbsv++] = aux[j+off];
00814 alpha[aux[j+off]] = sp_alpha[j];
00815 }
00816 }
00817 }
00818
00819 Randnext = 1;
00820
00821
00822 memset(sv, 0, ell*sizeof(int32_t));
00823 ll = (nsv < chunk_size ? nsv : chunk_size);
00824 for (i = 0; i < ll; i++)
00825 {
00826 do {
00827 j = sv_loc[ThRandPos % nsv];
00828 } while (sv[j] != 0);
00829 sv[j] = 1;
00830 }
00831
00832
00833 ll = ((nsv+nbsv) < chunk_size ? (nsv+nbsv) : chunk_size);
00834 for (; i < ll; i++)
00835 {
00836 do {
00837 j = bsv_loc[ThRandPos % nbsv];
00838 } while (sv[j] != 0);
00839 sv[j] = 1;
00840 }
00841
00842
00843
00844 for (; i < chunk_size; i++)
00845 {
00846 do {
00847 j = ThRandPos % ell;
00848 } while (sv[j] != 0);
00849 sv[j] = 1;
00850 }
00851
00852
00853
00854 if (sl < 500) free(sp_D);
00855 free(sp_y );
00856 free(sp_h );
00857 free(sv_loc );
00858 free(bsv_loc );
00859 free(sp_alpha);
00860
00861 if (verbosity > 0)
00862 {
00863 SG_INFO("\n Preprocessing: SV = %d", nsv);
00864 SG_INFO(", BSV = %d\n", nbsv);
00865 }
00866
00867 return(nsv);
00868 }
00869
00870
00871
00872
00873 float64_t QPproblem::gpdtsolve(float64_t *solution)
00874 {
00875 int32_t i, j, k, z, iin, jin, nit, tot_vpm_iter, lsCount;
00876 int32_t tot_vpm_secant, projCount, proximal_count;
00877 int32_t vpmWarningThreshold;
00878 int32_t nzin, nzout;
00879 int32_t *sp_y;
00880 int32_t *indnzin, *indnzout;
00881 float32_t *sp_D;
00882 float64_t *sp_h, *sp_hloc,
00883 *sp_alpha,*stloc;
00884 float64_t sp_e, aux, fval, tau_proximal_this, dfval;
00885 float64_t *vau;
00886 float64_t *weight;
00887 float64_t tot_prep_time, tot_vpm_time, tot_st_time, total_time;
00888 sCache *Cache;
00889 cachetype *ptmw;
00890 clock_t t, ti;
00891
00892 Cache = new sCache(KER, maxmw, ell);
00893 if (chunk_size > ell) chunk_size = ell;
00894
00895 if (chunk_size <= 20)
00896 vpmWarningThreshold = 30*chunk_size;
00897 else if (chunk_size <= 200)
00898 vpmWarningThreshold = 20*chunk_size + 200;
00899 else
00900 vpmWarningThreshold = 10*chunk_size + 2200;
00901
00902 kktold = 10000.0;
00903 if (delta <= 5e-3)
00904 {
00905 if ( (chunk_size <= 20) | ((float64_t)chunk_size/ell <= 0.001) )
00906 DELTAvpm = delta * 0.1;
00907 else if ( (chunk_size <= 200) | ((float64_t)chunk_size/ell <= 0.005) )
00908 DELTAvpm = delta * 0.5;
00909 else
00910 DELTAvpm = delta;
00911 }
00912 else
00913 {
00914 if ( (chunk_size <= 200) | ((float64_t)chunk_size/ell <= 0.005) )
00915 DELTAvpm = (1e-3 < delta*0.1) ? 1e-3 : delta*0.1;
00916 else
00917 DELTAvpm = 5e-3;
00918 }
00919
00920 InitialDELTAvpm = DELTAvpm;
00921 DELTAsv = EPS_SV * c_const;
00922 DELTAkin = 1.0;
00923
00924 q = q & (~1);
00925 nb = ell - chunk_size;
00926 tot_vpm_iter = 0;
00927 tot_vpm_secant = 0;
00928
00929 tot_prep_time = tot_vpm_time = tot_st_time = total_time = 0.0;
00930
00931 ti = clock();
00932
00933
00934 SG_DEBUG("ell:%d, chunk_size:%d, nb:%d dim:%d\n", ell, chunk_size,nb, dim);
00935 ing = (int32_t *) malloc(ell*sizeof(int32_t));
00936 inaux = (int32_t *) malloc(ell*sizeof(int32_t));
00937 index_in = (int32_t *) malloc(chunk_size*sizeof(int32_t));
00938 index_out = (int32_t *) malloc(ell*sizeof(int32_t));
00939 indnzout = (int32_t *) malloc(nb*sizeof(int32_t));
00940 alpha = (float64_t *)malloc(ell*sizeof(float64_t ));
00941
00942 memset(alpha, 0, ell*sizeof(float64_t));
00943 memset(ing, 0, ell*sizeof(int32_t));
00944
00945 if (verbosity > 0 && PreprocessMode != 0)
00946 SG_INFO("\n*********** Begin setup step...\n");
00947 t = clock();
00948
00949 switch(PreprocessMode)
00950 {
00951 case 1: Preprocess1(KER, inaux, ing); break;
00952 case 0:
00953 default:
00954 Preprocess0(inaux, ing); break;
00955 }
00956
00957 for (j = k = i = 0; i < ell; i++)
00958 if (ing[i] == 0)
00959 index_out[j++] = i;
00960 else
00961 index_in[k++] = i;
00962
00963 t = clock() - t;
00964 if (verbosity > 0 && PreprocessMode != 0)
00965 {
00966 SG_INFO( " Time for setup: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC);
00967 SG_INFO(
00968 "\n\n*********** Begin decomposition technique...\n");
00969 }
00970
00971
00972 bmem = (int32_t *)malloc(ell*sizeof(int32_t));
00973 bmemrid = (int32_t *)malloc(chunk_size*sizeof(int32_t));
00974 pbmr = (int32_t *)malloc(chunk_size*sizeof(int32_t));
00975 cec = (int32_t *)malloc(ell*sizeof(int32_t));
00976 indnzin = (int32_t *)malloc(chunk_size*sizeof(int32_t));
00977 inold = (int32_t *)malloc(chunk_size*sizeof(int32_t));
00978 incom = (int32_t *)malloc(chunk_size*sizeof(int32_t));
00979 vau = (float64_t *)malloc(ell*sizeof(float64_t ));
00980 grad = (float64_t *)malloc(ell*sizeof(float64_t ));
00981 weight = (float64_t *)malloc(dim*sizeof(float64_t ));
00982 st = (float64_t *)malloc(ell*sizeof(float64_t ));
00983 stloc = (float64_t *)malloc(ell*sizeof(float64_t ));
00984
00985 for (i = 0; i < ell; i++)
00986 {
00987 bmem[i] = 0;
00988 cec[i] = 0;
00989 st[i] = 0;
00990 }
00991
00992 sp_y = (int32_t *)malloc(chunk_size*sizeof(int32_t));
00993 sp_D = (float32_t *)malloc(chunk_size*chunk_size * sizeof(float32_t));
00994 sp_alpha = (float64_t *)malloc(chunk_size*sizeof(float64_t ));
00995 sp_h = (float64_t *)malloc(chunk_size*sizeof(float64_t ));
00996 sp_hloc = (float64_t *)malloc(chunk_size*sizeof(float64_t ));
00997
00998 for (i = 0; i < chunk_size; i++)
00999 cec[index_in[i]] = cec[index_in[i]]+1;
01000
01001 for (i = chunk_size-1; i >= 0; i--)
01002 {
01003 incom[i] = -1;
01004 sp_alpha[i] = 0.0;
01005 bmem[index_in[i]] = 1;
01006 }
01007
01008 if (verbosity == 1)
01009 {
01010 SG_INFO( " IT | Prep Time | Solver IT | Solver Time |");
01011 SG_INFO( " Grad Time | KKT violation\n");
01012 SG_INFO( "------+-----------+-----------+-------------+");
01013 SG_INFO( "-----------+--------------\n");
01014 }
01015
01016
01017
01018 nit = 0;
01019 do
01020 {
01021 t = clock();
01022 if ((nit % 10) == 9)
01023 {
01024 if ((t-ti) > 0)
01025 total_time += (float64_t)(t-ti) / CLOCKS_PER_SEC;
01026 else
01027 total_time += (float64_t)(ti-t) / CLOCKS_PER_SEC;
01028 ti = t;
01029 }
01030
01031 if (verbosity > 1)
01032 SG_INFO("\n*********** ITERATION: %d\n", nit + 1);
01033 else if (verbosity > 0)
01034 SG_INFO( "%5d |", nit + 1);
01035 else
01036 SG_INFO( ".");
01037 fflush(stdout);
01038
01039 nzout = 0;
01040 for (k = 0; k < nb; k++)
01041 if (alpha_out(k)>DELTAsv)
01042 {
01043 indnzout[nzout] = index_out[k];
01044 nzout++;
01045 }
01046
01047 sp_e = 0.;
01048 for (j = 0; j < nzout; j++)
01049 {
01050 vau[j] = y[indnzout[j]]*alpha[indnzout[j]];
01051 sp_e += vau[j];
01052 }
01053
01054 if (verbosity > 1)
01055 SG_INFO( " spe: %e ", sp_e);
01056
01057 for (i = 0; i < chunk_size; i++)
01058 sp_y[i] = y_in(i);
01059
01060
01061 for (i = 0; i < chunk_size; i++)
01062 {
01063 iin = index_in[i];
01064 ptmw = Cache->GetRow(iin);
01065 if (ptmw != 0)
01066 {
01067 for (j = 0; j <= i; j++)
01068 sp_D[i*chunk_size + j] = sp_y[i]*sp_y[j] * ptmw[index_in[j]];
01069 }
01070 else if (incom[i] == -1)
01071 for (j = 0; j <= i; j++)
01072 sp_D[i*chunk_size + j] = sp_y[i]*sp_y[j]
01073 * (float32_t)KER->Get(iin, index_in[j]);
01074 else
01075 {
01076 for (j = 0; j < i; j++)
01077 if (incom[j] == -1)
01078 sp_D[i*chunk_size + j]
01079 = sp_y[i]*sp_y[j] * (float32_t)KER->Get(iin, index_in[j]);
01080 else
01081 sp_D[i*chunk_size + j]
01082 = sp_D[incom[j]*chunk_size + incom[i]];
01083 sp_D[i*chunk_size + i]
01084 = sp_y[i]*sp_y[i] * (float32_t)KER->Get(iin, index_in[i]);
01085 }
01086 }
01087 for (i = 0; i < chunk_size; i++)
01088 {
01089 for (j = 0; j < i; j++)
01090 sp_D[j*chunk_size + i] = sp_D[i*chunk_size + j];
01091 }
01092
01093 if (nit == 0 && PreprocessMode > 0)
01094 {
01095 for (i = 0; i < chunk_size; i++)
01096 {
01097 iin = index_in[i];
01098 aux = 0.;
01099 ptmw = Cache->GetRow(iin);
01100 if (ptmw == NULL)
01101 for (j = 0; j < nzout; j++)
01102 aux += vau[j] * KER->Get(iin, indnzout[j]);
01103 else
01104 for (j = 0; j < nzout; j++)
01105 aux += vau[j] * ptmw[indnzout[j]];
01106 sp_h[i] = y_in(i) * aux - 1.0;
01107 }
01108 }
01109 else
01110 {
01111 for (i = 0; i < chunk_size; i++)
01112 vau[i] = alpha_in(i);
01113 for (i = 0; i < chunk_size; i++)
01114 {
01115 aux = 0.0;
01116 for (j = 0; j < chunk_size; j++)
01117 aux += sp_D[i*chunk_size + j] * vau[j];
01118 sp_h[i] = st[index_in[i]] * y_in(i) - 1.0 - aux;
01119 }
01120 }
01121
01122 t = clock() - t;
01123 if (verbosity > 1)
01124 SG_INFO(
01125 " Preparation Time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC);
01126 else if (verbosity > 0)
01127 SG_INFO( " %8.2lf |", (float64_t)t/CLOCKS_PER_SEC);
01128 tot_prep_time += (float64_t)t/CLOCKS_PER_SEC;
01129
01130
01131
01132 if (tau_proximal < 0.0)
01133 tau_proximal_this = 0.0;
01134 else
01135 tau_proximal_this = tau_proximal;
01136 proximal_count = 0;
01137 do {
01138 t = clock();
01139 for (i = 0; i < chunk_size; i++)
01140 {
01141 vau[i] = sp_D[i*chunk_size + i];
01142 sp_h[i] -= tau_proximal_this * alpha_in(i);
01143 sp_D[i*chunk_size + i] += (float32_t)tau_proximal_this;
01144 }
01145
01146 if (kktold < delta*100)
01147 for (i = 0; i < chunk_size; i++)
01148 sp_alpha[i] = alpha_in(i);
01149 else
01150 for (i = 0; i < chunk_size; i++)
01151 sp_alpha[i] = 0.0;
01152
01153
01154 i = gpm_solver(projection_solver, projection_projector, chunk_size,
01155 sp_D, sp_h, c_const, sp_e, sp_y, sp_alpha,
01156 DELTAvpm*DELTAkin, &lsCount, &projCount);
01157
01158 if (i > vpmWarningThreshold)
01159 {
01160 if (ker_type == 2)
01161 {
01162 SG_INFO("\n WARNING: inner subproblem hard to solve;");
01163 SG_INFO(" setting a smaller -q or");
01164 SG_INFO(" tuning -c and -g options might help.\n");
01165 }
01166 else
01167 {
01168 SG_INFO("\n WARNING: inner subproblem hard to solve;");
01169 SG_INFO(" set a smaller -q or");
01170 SG_INFO(" try a better data scaling.\n");
01171 }
01172 }
01173
01174 t = clock() - t;
01175 tot_vpm_iter += i;
01176 tot_vpm_secant += projCount;
01177 tot_vpm_time += (float64_t)t/CLOCKS_PER_SEC;
01178 if (verbosity > 1)
01179 {
01180 SG_INFO(" Solver it: %d", i);
01181 SG_INFO(", ls: %d", lsCount);
01182 SG_INFO(", time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC);
01183 }
01184 else if (verbosity > 0)
01185 {
01186 SG_INFO(" %6d", i);
01187 SG_INFO(" | %8.2lf |", (float64_t)t/CLOCKS_PER_SEC);
01188 }
01189
01190
01191
01192 for (i = 0; i < chunk_size; i++)
01193 sp_D[i*chunk_size + i] = (float32_t)vau[i];
01194 tau_proximal_this = 0.0;
01195 if (tau_proximal < 0.0)
01196 {
01197 dfval = 0.0;
01198 for (i = 0; i < chunk_size; i++)
01199 {
01200 aux = 0.0;
01201 for (j = 0; j < chunk_size; j++)
01202 aux += sp_D[i*chunk_size + j]*(alpha_in(j) - sp_alpha[j]);
01203 dfval += (0.5*aux - st[index_in[i]]*y_in(i) + 1.0) * (alpha_in(i) - sp_alpha[i]);
01204 }
01205
01206 aux=0.0;
01207 for (i = 0; i < chunk_size; i++)
01208 aux += (alpha_in(i) - sp_alpha[i])*(alpha_in(i) - sp_alpha[i]);
01209
01210 if ((-dfval/aux) < -0.5*tau_proximal)
01211 {
01212 tau_proximal_this = -tau_proximal;
01213 if (verbosity > 0)
01214 SG_DEBUG("tau threshold: %lf ", -dfval/aux);
01215 }
01216 }
01217 proximal_count++;
01218 } while (tau_proximal_this != 0.0 && proximal_count < 2);
01219
01220 t = clock();
01221
01222 nzin = 0;
01223 for (j = 0; j < chunk_size; j++)
01224 {
01225 if (nit == 0)
01226 aux = sp_alpha[j];
01227 else
01228 aux = sp_alpha[j] - alpha_in(j);
01229 if (fabs(aux) > DELTAsv)
01230 {
01231 indnzin[nzin] = index_in[j];
01232 grad[nzin] = aux * y_in(j);
01233 nzin++;
01234 }
01235 }
01236
01237
01238 if (KER->get_kernel()->has_property(KP_LINADD) && get_linadd_enabled())
01239 {
01240 KER->get_kernel()->clear_normal() ;
01241
01242 for (j = 0; j < nzin; j++)
01243 KER->get_kernel()->add_to_normal(indnzin[j], grad[j]);
01244
01245 if (nit == 0 && PreprocessMode > 0)
01246 {
01247 for (j = 0; j < nzout; j++)
01248 {
01249 jin = indnzout[j];
01250 KER->get_kernel()->add_to_normal(jin, alpha[jin] * y[jin]);
01251 }
01252 }
01253
01254 for (i = 0; i < ell; i++)
01255 st[i] += KER->get_kernel()->compute_optimized(i);
01256 }
01257 else
01258 {
01259 k = Cache->DivideMP(ing, indnzin, nzin);
01260 for (j = 0; j < k; j++)
01261 {
01262 ptmw = Cache->FillRow(indnzin[ing[j]]);
01263 for (i = 0; i < ell; i++)
01264 st[i] += grad[ing[j]] * ptmw[i];
01265 }
01266
01267 if (PreprocessMode > 0 && nit == 0)
01268 {
01269 clock_t ti2;
01270
01271 ti2 = clock();
01272 for (j = 0; j < nzout; j++)
01273 {
01274 jin = indnzout[j];
01275 ptmw = Cache->FillRow(jin);
01276 for (i = 0; i < ell; i++)
01277 st[i] += alpha[jin] * y[jin] * ptmw[i];
01278 }
01279 if (verbosity > 1)
01280 SG_INFO(
01281 " G*x0 time: %.2lf\n", (float64_t)(clock()-ti2)/CLOCKS_PER_SEC);
01282 }
01283 }
01284
01285
01286
01287 t = clock() - t;
01288 if (verbosity > 1)
01289 SG_INFO(
01290 " Gradient updating time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC);
01291 else if (verbosity > 0)
01292 SG_INFO( " %8.2lf |", (float64_t)t/CLOCKS_PER_SEC);
01293 tot_st_time += (float64_t)t/CLOCKS_PER_SEC;
01294
01295
01296 for (i = 0; i < chunk_size; i++)
01297 alpha_in(i) = sp_alpha[i];
01298
01299 if (verbosity > 1)
01300 {
01301 j = k = 0;
01302 for (i = 0; i < ell; i++)
01303 {
01304 if (is_free(i)) j++;
01305 if (is_bound(i)) k++;
01306 }
01307 SG_INFO(" SV: %d", j+k);
01308 SG_INFO(", BSV: %d\n", k);
01309 }
01310 Cache->Iteration();
01311 nit = nit+1;
01312 } while (!optimal() && !(CSignal::cancel_computations()));
01313
01314
01315
01316 t = clock();
01317 if ((t-ti) > 0)
01318 total_time += (float64_t)(t-ti) / CLOCKS_PER_SEC;
01319 else
01320 total_time += (float64_t)(ti-t) / CLOCKS_PER_SEC;
01321 ti = t;
01322
01323 memcpy(solution, alpha, ell * sizeof(float64_t));
01324
01325
01326 fval = 0.0;
01327 for (i = 0; i < ell; i++)
01328 fval += alpha[i]*(y[i]*st[i]*0.5 - 1.0);
01329
01330 SG_INFO("\n------+-----------+-----------+-------------+");
01331 SG_INFO("-----------+--------------\n");
01332 SG_INFO(
01333 "\n- TOTAL ITERATIONS: %i\n", nit);
01334
01335 if (verbosity > 1)
01336 {
01337 j = 0;
01338 k = 0;
01339 z = 0;
01340 for (i = ell - 1; i >= 0; i--)
01341 {
01342 if (cec[i] > 0) j++;
01343 if (cec[i] > 1) k++;
01344 if (cec[i] > 2) z++;
01345 }
01346 SG_INFO(
01347 "- Variables entering the working set at least one time: %i\n", j);
01348 SG_INFO(
01349 "- Variables entering the working set at least two times: %i\n", k);
01350 SG_INFO(
01351 "- Variables entering the working set at least three times: %i\n", z);
01352 }
01353
01354
01355 free(bmem);
01356 free(bmemrid);
01357 free(pbmr);
01358 free(cec);
01359 free(ing);
01360 free(inaux);
01361 free(indnzin);
01362 free(index_in);
01363 free(inold);
01364 free(incom);
01365 free(indnzout);
01366 free(index_out);
01367 free(vau);
01368 free(alpha);
01369 free(weight);
01370 free(grad);
01371 free(stloc);
01372 free(st);
01373 free(sp_h);
01374 free(sp_hloc);
01375 free(sp_y);
01376 free(sp_D);
01377 free(sp_alpha);
01378 delete Cache;
01379
01380 aux = KER->KernelEvaluations;
01381 SG_INFO( "- Total CPU time: %lf\n", total_time);
01382 if (verbosity > 0)
01383 {
01384 SG_INFO(
01385 "- Total kernel evaluations: %.0lf\n", aux);
01386 SG_INFO(
01387 "- Total inner solver iterations: %i\n", tot_vpm_iter);
01388 if (projection_projector == 1)
01389 SG_INFO(
01390 "- Total projector iterations: %i\n", tot_vpm_secant);
01391 SG_INFO(
01392 "- Total preparation time: %lf\n", tot_prep_time);
01393 SG_INFO(
01394 "- Total inner solver time: %lf\n", tot_vpm_time);
01395 SG_INFO(
01396 "- Total gradient updating time: %lf\n", tot_st_time);
01397 }
01398 SG_INFO( "- Objective function value: %lf\n", fval);
01399 objective_value=fval;
01400 return aux;
01401 }
01402
01403
01404
01405
01406 void quick_si(int32_t a[], int32_t n)
01407 {
01408 int32_t i, j, s, d, l, x, w, ps[20], pd[20];
01409
01410 l = 0;
01411 ps[0] = 0;
01412 pd[0] = n-1;
01413 do
01414 {
01415 s = ps[l];
01416 d = pd[l];
01417 l--;
01418 do
01419 {
01420 i = s;
01421 j = d;
01422 x = a[(s+d)/2];
01423 do
01424 {
01425 while (a[i] < x) i++;
01426 while (a[j] > x) j--;
01427 if (i <= j)
01428 {
01429 w = a[i];
01430 a[i] = a[j];
01431 i++;
01432 a[j] = w;
01433 j--;
01434 }
01435 } while(i<=j);
01436 if (j-s > d-i)
01437 {
01438 l++;
01439 ps[l] = s;
01440 pd[l] = j;
01441 s = i;
01442 }
01443 else
01444 {
01445 if (i < d)
01446 {
01447 l++;
01448 ps[l] = i;
01449 pd[l] = d;
01450 }
01451 d = j;
01452 }
01453 } while (s < d);
01454 } while (l >= 0);
01455 }
01456
01457
01458
01459
01460 void quick_s2(float64_t a[], int32_t n, int32_t ia[])
01461 {
01462 int32_t i, j, s, d, l, iw, ps[20], pd[20];
01463 float64_t x, w;
01464
01465 l = 0;
01466 ps[0] = 0;
01467 pd[0] = n-1;
01468 do
01469 {
01470 s = ps[l];
01471 d = pd[l];
01472 l--;
01473 do
01474 {
01475 i = s;
01476 j = d;
01477 x = a[(s+d)/2];
01478 do
01479 {
01480 while (a[i] < x) i++;
01481 while (a[j] > x) j--;
01482 if (i <= j)
01483 {
01484 iw = ia[i];
01485 w = a[i];
01486 ia[i] = ia[j];
01487 a[i] = a[j];
01488 i++;
01489 ia[j] = iw;
01490 a[j] = w;
01491 j--;
01492 }
01493 } while (i <= j);
01494 if (j-s > d-i)
01495 {
01496 l++;
01497 ps[l] = s;
01498 pd[l] = j;
01499 s = i;
01500 }
01501 else
01502 {
01503 if (i < d)
01504 {
01505 l++;
01506 ps[l] = i;
01507 pd[l] = d;
01508 }
01509 d = j;
01510 }
01511 } while (s < d);
01512 } while(l>=0);
01513 }
01514
01515
01516
01517
01518 void quick_s3(int32_t a[], int32_t n, int32_t ia[])
01519 {
01520 int32_t i, j, s, d, l, iw, w, x, ps[20], pd[20];
01521
01522 l = 0;
01523 ps[0] = 0;
01524 pd[0] = n-1;
01525 do
01526 {
01527 s = ps[l];
01528 d = pd[l];
01529 l--;
01530 do
01531 {
01532 i = s;
01533 j = d;
01534 x = a[(s+d)/2];
01535 do
01536 {
01537 while (a[i] < x) i++;
01538 while (a[j] > x) j--;
01539 if (i <= j)
01540 {
01541 iw = ia[i];
01542 w = a[i];
01543 ia[i] = ia[j];
01544 a[i] = a[j];
01545 i++;
01546 ia[j] = iw;
01547 a[j] = w;
01548 j--;
01549 }
01550 } while (i <= j);
01551 if (j-s > d-i)
01552 {
01553 l++;
01554 ps[l] = s;
01555 pd[l] = j;
01556 s = i;
01557 }
01558 else
01559 {
01560 if (i < d)
01561 {
01562 l++;
01563 ps[l] = i;
01564 pd[l] = d;
01565 }
01566 d = j;
01567 }
01568 } while (s < d);
01569 } while (l >= 0);
01570 }
01571 }
01572
01573 #endif // DOXYGEN_SHOULD_SKIP_THIS
01574
01575
01576
01577