00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 
00039 
00040 
00041 
00042 
00043 
00044 
00045 
00046 
00047 
00048 
00049 
00050 
00051 
00052 
00053 
00054 
00055 
00056 
00057 
00058 
00059 
00060 
00061 
00062 
00063 
00064 
00065 
00066 
00067 
00068 
00069 
00070 
00071 #include <math.h>
00072 #include <stdio.h>
00073 #include <stdlib.h>
00074 #include <string.h>
00075 #include <time.h>
00076 
00077 #include <shogun/classifier/svm/gpm.h>
00078 #include <shogun/classifier/svm/gpdt.h>
00079 #include <shogun/classifier/svm/gpdtsolve.h>
00080 #include <shogun/lib/Signal.h>
00081 #include <shogun/io/SGIO.h>
00082 
00083 using namespace shogun;
00084 
00085 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00086 namespace shogun
00087 {
00088 #define y_in(i)      y[index_in[(i)]]
00089 #define y_out(i)     y[index_out[(i)]]
00090 #define alpha_in(i)  alpha[index_in[(i)]]
00091 #define alpha_out(i) alpha[index_out[(i)]]
00092 #define minfty       (-1.0e+30)  // minus infinity
00093 
00094 uint32_t Randnext = 1;
00095 
00096 #define ThRand    (Randnext = Randnext * 1103515245L + 12345L)
00097 #define ThRandPos ((Randnext = Randnext * 1103515245L + 12345L) & 0x7fffffff)
00098 
00099 FILE        *fp;
00100 
00101 
00102 void quick_si (int32_t a[], int32_t k);
00103 void quick_s3 (int32_t a[], int32_t k, int32_t ia[]);
00104 void quick_s2 (float64_t a[], int32_t k, int32_t ia[]);
00105 
00106 
00107 
00108 
00109 class sCache
00110 {
00111 
00112 public:
00113   sCache  (sKernel* sk, int32_t Mbyte, int32_t ell);
00114   ~sCache ();
00115 
00116   cachetype *FillRow (int32_t row, int32_t IsC = 0);
00117   cachetype *GetRow  (int32_t row);
00118 
00119   int32_t DivideMP (int32_t *out, int32_t *in, int32_t n);
00120 
00121   
00122   void Iteration() { nit++; }
00123 
00124   
00125   int32_t CheckCycle()
00126   {
00127     int32_t us;
00128     cache_entry *pt = first_free->next;
00129 
00130     for (us = 0; pt != first_free; us++, pt = pt->next);
00131     if (us != maxmw-1)
00132         return 1;
00133     else
00134         return 0;
00135   }
00136 
00137 private:
00138 
00139   struct cache_entry
00140   {
00141     int32_t row;      
00142     int32_t last_access_it;
00143     cache_entry *prev, *next;
00144     cachetype   *data;
00145   };
00146 
00147   sKernel* KER;
00148   int32_t maxmw, ell;
00149   int32_t nit;
00150 
00151   cache_entry *mw;
00152   cache_entry *first_free;
00153   cache_entry **pindmw;    
00154   cachetype   *onerow;
00155 
00156   cachetype   *FindFree(int32_t row, int32_t IsC);
00157 };
00158 
00159 
00160 
00161 
00162 
00163 sCache::sCache(sKernel* sk, int32_t Mbyte, int32_t _ell) : KER(sk), ell(_ell)
00164 {
00165   int32_t i;
00166 
00167   
00168   maxmw = (sizeof(cache_entry) + sizeof(cache_entry *)
00169            + ell*sizeof(cachetype)) / 4;
00170   
00171   maxmw = Mbyte*1024*(1024/4) / maxmw;
00172 
00173   
00174   mw     = SG_MALLOC(cache_entry, maxmw);
00175   pindmw = SG_MALLOC(cache_entry*,  ell);
00176   onerow = SG_MALLOC(cachetype,     ell);
00177 
00178   
00179   for (i = 0; i < maxmw; i++)
00180   {
00181       mw[i].prev           = (i == 0 ? &mw[maxmw-1] : &mw[i-1]);
00182       mw[i].next           = (i == maxmw-1 ? &mw[0] : &mw[i+1]);
00183       mw[i].data           = SG_MALLOC(cachetype, ell);
00184       mw[i].row            = -1;    
00185       mw[i].last_access_it = -1;
00186   }
00187   for (i = 0; i < ell; i++)
00188       pindmw[i] = 0;
00189 
00190   first_free = &mw[0];
00191   nit        = 0;
00192 }
00193 
00194 
00195 
00196 
00197 sCache::~sCache()
00198 {
00199   int32_t i;
00200 
00201   for (i = maxmw-1; i >= 0; i--)
00202       SG_FREE(mw[i].data);
00203 
00204   SG_FREE(onerow);
00205   SG_FREE(pindmw);
00206   SG_FREE(mw);
00207 }
00208 
00209 
00210 
00211 
00212 
00213 cachetype *sCache::GetRow(int32_t row)
00214 {
00215   cache_entry *c;
00216 
00217   c = pindmw[row];
00218   if (c == NULL)
00219       return NULL;
00220 
00221   c->last_access_it = nit;
00222   if (c == first_free)
00223   {
00224       first_free = first_free->next;
00225   }
00226   else
00227   {
00228       
00229       c->prev->next    = c->next;
00230       c->next->prev    = c->prev;
00231       c->next          = first_free;
00232       c->prev          = first_free->prev;
00233       first_free->prev = c;
00234       c->prev->next    = c;
00235   }
00236 
00237   return c->data;
00238 }
00239 
00240 
00241 
00242 
00243 
00244 
00245 cachetype *sCache::FindFree(int32_t row, int32_t IsC)
00246 {
00247   cachetype *pt;
00248 
00249   if (first_free->row != -1) 
00250   {
00251       if (first_free->last_access_it == nit || IsC)
00252           return 0;
00253       else
00254           pindmw[first_free->row] = 0;
00255   }
00256   first_free->row            = row;
00257   first_free->last_access_it = nit;
00258   pindmw[row]                = first_free;
00259 
00260   pt         = first_free->data;
00261   first_free = first_free->next;
00262 
00263   return pt;
00264 }
00265 
00266 
00267 
00268 
00269 
00270 cachetype *sCache::FillRow(int32_t row, int32_t IsC)
00271 {
00272   int32_t j;
00273   cachetype *pt;
00274 
00275   pt = GetRow(row);
00276   if (pt != NULL)
00277       return pt;
00278 
00279   pt = FindFree(row, IsC);
00280   if (pt == 0)
00281       pt = onerow;
00282 
00283   
00284   for (j = 0; j < ell; j++)
00285       pt[j] = (cachetype)KER->Get(row, j);
00286   return pt;
00287 }
00288 
00289 
00290 
00291 
00292 
00293 int32_t sCache::DivideMP(int32_t *out, int32_t *in, int32_t n)
00294 {
00295    
00296 
00297 
00298 
00299 
00300 
00301 
00302 
00303 
00304 
00305   int32_t *remained, nremained, k;
00306   int32_t i;
00307 
00308   remained = SG_MALLOC(int32_t, n);
00309 
00310   nremained = 0;
00311   k = 0;
00312   for (i = 0; i < n; i++)
00313   {
00314       if (pindmw[in[i]] != NULL)
00315           out[k++] = i;
00316       else
00317           remained[nremained++] = i;
00318   }
00319   for (i = 0; i < nremained; i++)
00320       out[k++] = remained[i];
00321 
00322   SG_FREE(remained);
00323   return n;
00324 }
00325 
00326 
00327 
00328 
00329 int32_t QPproblem::optimal()
00330 {
00331   
00332 
00333 
00334 
00335   register int32_t i, j, margin_sv_number, z, k, s, kin, z1, znew=0, nnew;
00336 
00337   float64_t gx_i, aux, s1, s2;
00338 
00339   
00340   for (j = 0; j < ell; j++)
00341   {
00342       grad[j] = y[j] - st[j];
00343       ing[j]  = j;
00344   }
00345 
00346   quick_s2(grad,ell,ing);
00347 
00348   
00349   margin_sv_number = 0;
00350 
00351   for (i = chunk_size - 1; i >= 0; i--)
00352       if (is_free(index_in[i]))
00353       {
00354           margin_sv_number++;
00355           bee = y_in(i) - st[index_in[i]];
00356           break;
00357       }
00358 
00359   if (margin_sv_number > 0)
00360   {
00361       aux=-1.0;
00362       for (i = nb-1; i >= 0; i--)
00363       {
00364           gx_i = bee + st[index_out[i]];
00365           if ((is_zero(index_out[i]) && (gx_i*y_out(i) < (1.0-delta))) ||
00366              (is_bound(index_out[i]) && (gx_i*y_out(i) > (1.0+delta))) ||
00367              (is_free(index_out[i])  &&
00368              ((gx_i*y_out(i) < 1.0-delta) || (gx_i*y_out(i) > 1.0+delta))))
00369           {
00370               if (fabs(gx_i*y_out(i) - 1.0) > aux)
00371                   aux = fabs(gx_i*y_out(i) - 1.0);
00372           }
00373       }
00374   }
00375   else
00376   {
00377       for (i = ell - 1; i >= 0; i--)
00378           if (is_free(i))
00379           {
00380               margin_sv_number++;
00381               bee = y[i] - st[i];
00382               break;
00383           }
00384       if (margin_sv_number == 0)
00385       {
00386           s1 = -minfty;
00387           s2 = -minfty;
00388           for (j = 0; j < ell; j++)
00389               if ( (alpha[ing[j]] > DELTAsv) &&  (y[ing[j]] >= 0) )
00390               {
00391                   s1 = grad[j];
00392                   break;
00393               }
00394           for (j = 0; j < ell; j++)
00395               if ( (alpha[ing[j]] < c_const-DELTAsv) && (y[ing[j]] <= 0) )
00396               {
00397                   s2 = grad[j];
00398                   break;
00399               }
00400           if (s1 < s2)
00401               aux = s1;
00402           else
00403               aux = s2;
00404 
00405           s1 = minfty;
00406           s2 = minfty;
00407           for (j = ell-1; j >=0; j--)
00408               if ( (alpha[ing[j]] > DELTAsv) && (y[ing[j]] <= 0) )
00409               {
00410                   s1 = grad[j];
00411                   break;
00412               }
00413           for (j = ell-1; j >=0; j--)
00414               if ( (alpha[ing[j]] < c_const-DELTAsv) && (y[ing[j]] >= 0) )
00415               {
00416                   s2 = grad[j];
00417                   break;
00418               }
00419           if (s2 > s1) s1 = s2;
00420 
00421           bee = 0.5 * (s1+aux);
00422       }
00423 
00424       aux = -1.0;
00425       for (i = ell-1; i >= 0; i--)
00426       {
00427           gx_i = bee + st[i];
00428           if ((is_zero(i) && (gx_i*y[i] < (1.0-delta))) ||
00429              (is_bound(i) && (gx_i*y[i] > (1.0+delta))) ||
00430              (is_free(i)  &&
00431              ((gx_i*y[i] < 1.0-delta) || (gx_i*y[i] > 1.0+delta))))
00432           {
00433               if (fabs(gx_i*y[i] - 1.0) > aux)
00434                   aux = fabs(gx_i*y[i] - 1.0);
00435           }
00436       }
00437   }
00438 
00439   if (aux < 0.0)
00440       return 1;
00441   else
00442   {
00443       if (verbosity > 1)
00444           SG_INFO("  Max KKT violation: %lf\n", aux);
00445       else if (verbosity > 0)
00446           SG_INFO("  %lf\n", aux);
00447 
00448       if (fabs(kktold-aux) < delta*0.01 &&  aux < delta*2.0)
00449       {
00450           if (DELTAvpm > InitialDELTAvpm*0.1)
00451           {
00452               DELTAvpm = (DELTAvpm*0.5 > InitialDELTAvpm*0.1 ?
00453                                             DELTAvpm*0.5 : InitialDELTAvpm*0.1);
00454               SG_INFO("Inner tolerance changed to: %lf\n", DELTAvpm);
00455           }
00456       }
00457 
00458       kktold = aux;
00459 
00460  
00461 
00462 
00463 
00464 
00465       for (j = 0; j < chunk_size; j++)
00466           inold[j] = index_in[j];
00467 
00468       z  = -1;  
00469       z1 = ell; 
00470       k  = 0;
00471       j  = 0;
00472       while (k < q)
00473       {
00474           i = z + 1; 
00475           while (i < z1)
00476           {
00477               if ( is_free(ing[i]) ||
00478                    (-y[ing[i]]>=0 && is_zero(ing[i])) ||
00479                    (-y[ing[i]]<=0 && is_bound(ing[i]))
00480                  )
00481               {
00482                   znew = i; 
00483                   break;
00484               }
00485               i++;
00486           }
00487           if (i == z1) break;
00488 
00489           s = z1 - 1;
00490           while (znew < s)
00491           {
00492               if ( is_free(ing[s]) ||
00493                    (y[ing[s]]>=0 && is_zero(ing[s])) ||
00494                    (y[ing[s]]<=0 && is_bound(ing[s]))
00495                  )
00496               {
00497                   z1 = s;
00498                   z  = znew;
00499                   break;
00500               }
00501               s--;
00502           }
00503           if (znew == s) break;
00504 
00505           index_in[k++] = ing[z];
00506           index_in[k++] = ing[z1];
00507       }
00508 
00509       if (k < q)
00510       {
00511           if (verbosity > 1)
00512               SG_INFO("  New q: %i\n", k);
00513           q = k;
00514       }
00515 
00516       quick_si(index_in, q);
00517 
00518       s = 0;
00519       j = 0;
00520       for (k = 0; k < chunk_size; k++)
00521       {
00522           z = inold[k];
00523           for (i = j; i < q; i++)
00524               if (z <= index_in[i])
00525                   break;
00526 
00527           if (i == q)
00528           {
00529               for (i = k; i < chunk_size; i++)
00530               {
00531                   ing[s] = inold[i]; 
00532                   s      = s+1;
00533               }
00534               break;
00535           }
00536 
00537           if (z == index_in[i])
00538               j      = i + 1; 
00539           else
00540           {
00541               ing[s] = z;     
00542               s      = s + 1;
00543               j      = i;
00544           }
00545       }
00546 
00547       for (i = 0; i < s; i++)
00548       {
00549           bmemrid[i] = bmem[ing[i]];
00550           pbmr[i]    = i;
00551       }
00552 
00553       quick_s3(bmemrid, s, pbmr);
00554 
00555       
00556       j = q;
00557       i = 0;
00558       while (j < chunk_size && i < s)
00559       {
00560           if (is_free(ing[pbmr[i]]))
00561           {
00562               index_in[j] = ing[pbmr[i]];
00563               j++;
00564           }
00565           i++;
00566       }
00567 
00568       
00569       if (j < chunk_size)
00570       {
00571           i = 0;
00572           while (j < chunk_size && i < s)
00573           {
00574               if (is_zero(ing[pbmr[i]]))
00575               {
00576                   index_in[j] = ing[pbmr[i]];
00577                   j++;
00578               }
00579               i++;
00580           }
00581           if (j < chunk_size)
00582           {
00583               i = 0;
00584               while (j < chunk_size && i < s)
00585               {
00586                   if (is_bound(ing[pbmr[i]]))
00587                   {
00588                       index_in[j] = ing[pbmr[i]];
00589                       j++;
00590                   }
00591                   i++;
00592               }
00593           }
00594       }
00595 
00596       quick_si(index_in, chunk_size);
00597 
00598       for (i = 0; i < chunk_size; i++)
00599           bmem[index_in[i]]++;
00600 
00601       j = 0;
00602       k = 0;
00603       for (i = 0; i < chunk_size; i++)
00604       {
00605           for (z = j; z < index_in[i]; z++)
00606           {
00607               index_out[k] = z;
00608               k++;
00609           }
00610           j = index_in[i]+1;
00611       }
00612       for (z = j; z < ell; z++)
00613       {
00614           index_out[k] = z;
00615           k++;
00616       }
00617 
00618       for (i = 0; i < nb; i++)
00619           bmem[index_out[i]] = 0;
00620 
00621       kin = 0;
00622       j   = 0;
00623       for (k = 0; k < chunk_size; k++)
00624       {
00625           z = index_in[k];
00626           for (i = j; i < chunk_size; i++)
00627               if (z <= inold[i])
00628                   break;
00629           if (i == chunk_size)
00630           {
00631               for (s = k; s < chunk_size; s++)
00632               {
00633                   incom[s] = -1;
00634                   cec[index_in[s]]++;
00635               }
00636               kin = kin + chunk_size - k ;
00637               break;
00638           }
00639 
00640           if (z == inold[i])
00641           {
00642               incom[k] = i;
00643               j        = i+1;
00644           }
00645           else
00646           {
00647               incom[k] = -1;
00648               j        = i;
00649               kin      = kin + 1;
00650               cec[index_in[k]]++;
00651           }
00652       }
00653 
00654       nnew = kin & (~1);
00655       if (nnew < 10)
00656           nnew = 10;
00657       if (nnew < chunk_size/10)
00658           nnew = chunk_size/10;
00659       if (nnew < q)
00660       {
00661           q = nnew;
00662           q = q & (~1);
00663       }
00664 
00665       if (kin == 0)
00666       {
00667           DELTAkin *= 0.1;
00668           if (DELTAkin < 1.0e-6)
00669           {
00670               SG_INFO("\n***ERROR***: GPDT stops with tolerance"); 
00671               SG_INFO( 
00672               " %lf because it is unable to change the working set.\n", kktold);
00673               return 1;
00674           }
00675           else
00676           {
00677               SG_INFO("Inner tolerance temporary changed to:");
00678               SG_INFO(" %e\n", DELTAvpm*DELTAkin);
00679           }
00680       }
00681       else
00682           DELTAkin = 1.0;
00683 
00684       if (verbosity > 1)
00685       {
00686           SG_INFO("  Working set: new components: %i", kin);
00687           SG_INFO(",  new parameter n: %i\n", q);
00688       }
00689 
00690       return 0;
00691    }
00692 }
00693 
00694 
00695 
00696 
00697 int32_t QPproblem::Preprocess0(int32_t *aux, int32_t *sv)
00698 {
00699   int32_t i, j;
00700 
00701   Randnext = 1;
00702   memset(sv, 0, ell*sizeof(int32_t));
00703   for (i = 0; i < chunk_size; i++)
00704   {
00705       do
00706       {
00707           j = ThRandPos % ell;
00708       } while (sv[j] != 0);
00709       sv[j] = 1;
00710   }
00711   return(chunk_size);
00712 }
00713 
00714 
00715 
00716 
00717 int32_t QPproblem::Preprocess1(sKernel* kernel, int32_t *aux, int32_t *sv)
00718 {
00719   int32_t    s;    
00720   int32_t    sl;   
00721   int32_t    n, i, off, j, k, ll;
00722   int32_t    nsv, nbsv;
00723   int32_t    *sv_loc, *bsv_loc, *sp_y;
00724   float32_t  *sp_D=NULL;
00725   float64_t *sp_alpha, *sp_h;
00726 
00727   s  = ell;
00728   
00729   n  = (s + preprocess_size - 1) / preprocess_size;
00730   sl = 1 + s / n;
00731 
00732   if (verbosity > 0)
00733   {
00734       SG_INFO("  Preprocessing: examples = %d", s);
00735       SG_INFO(", subp. = %d", n);
00736       SG_INFO(", size = %d\n",sl);
00737   }
00738 
00739   sv_loc   = SG_MALLOC(int32_t, s);
00740   bsv_loc  = SG_MALLOC(int32_t, s);
00741   sp_alpha = SG_MALLOC(float64_t, sl);
00742   sp_h     = SG_MALLOC(float64_t, sl);
00743   sp_y     = SG_MALLOC(int32_t, sl);
00744 
00745   if (sl < 500)
00746       sp_D = SG_MALLOC(float32_t, sl*sl);
00747 
00748   for (i = 0; i < sl; i++)
00749        sp_h[i] = -1.0;
00750   memset(alpha, 0, ell*sizeof(float64_t));
00751 
00752   
00753   for (i = 0; i < ell; i++)
00754       aux[i] = i;
00755   Randnext = 1;
00756   for (i = 0; i < ell; i++)
00757   {
00758       j  = ThRandPos % ell;
00759       k  = ThRandPos % ell;
00760       ll = aux[j]; aux[j] = aux[k]; aux[k] = ll;
00761   }
00762 
00763   nbsv = nsv = 0;
00764   for (i = 0; i < n; i++)
00765   {
00766       if (verbosity > 0)
00767           SG_INFO("%d...", i);
00768       SplitParts(s, i, n, &ll, &off);
00769 
00770       if (sl < 500)
00771       {
00772           for (j = 0; j < ll; j++)
00773           {
00774               sp_y[j] = y[aux[j+off]];
00775               for (k = j; k < ll; k++)
00776                   sp_D[k*sl + j] = sp_D[j*sl + k]
00777                                  = y[aux[j+off]] * y[aux[k+off]]
00778                                    * (float32_t)kernel->Get(aux[j+off], aux[k+off]);
00779           }
00780 
00781           memset(sp_alpha, 0, sl*sizeof(float64_t));
00782 
00783           
00784           gpm_solver(projection_solver, projection_projector, ll, sp_D, sp_h,
00785                      c_const, 0.0, sp_y, sp_alpha, delta*10, NULL);
00786       }
00787       else
00788       {
00789           QPproblem p2;
00790           p2.Subproblem(*this, ll, aux + off);
00791           p2.chunk_size     = (int32_t) ((float64_t)chunk_size / sqrt((float64_t)n));
00792           p2.q              = (int32_t) ((float64_t)q / sqrt((float64_t)n));
00793           p2.maxmw          = ll*ll*4 / (1024 * 1024);
00794           if (p2.maxmw > maxmw / 2)
00795               p2.maxmw = maxmw / 2;
00796           p2.verbosity      = 0;
00797           p2.delta          = delta * 10.0;
00798           p2.PreprocessMode = 0;
00799           kernel->KernelEvaluations += p2.gpdtsolve(sp_alpha);
00800       }
00801 
00802       for (j = 0; j < ll; j++)
00803       {
00804           
00805           if (sp_alpha[j] < (c_const-DELTAsv))
00806               sp_alpha[j] = 0.0;
00807           else
00808               sp_alpha[j] = c_const;
00809           if (sp_alpha[j] > DELTAsv)
00810           {
00811               if (sp_alpha[j] < (c_const-DELTAsv))
00812                   sv_loc[nsv++]   = aux[j+off];
00813               else
00814                   bsv_loc[nbsv++] = aux[j+off];
00815               alpha[aux[j+off]] = sp_alpha[j];
00816           }
00817       }
00818   }
00819 
00820   Randnext = 1;
00821 
00822   
00823   memset(sv, 0, ell*sizeof(int32_t));
00824   ll = (nsv < chunk_size ? nsv : chunk_size);
00825   for (i = 0; i < ll; i++)
00826   {
00827       do {
00828            j = sv_loc[ThRandPos % nsv];
00829       } while (sv[j] != 0);
00830       sv[j] = 1;
00831   }
00832 
00833   
00834   ll = ((nsv+nbsv) < chunk_size ? (nsv+nbsv) : chunk_size);
00835   for (; i < ll; i++)
00836   {
00837       do {
00838            j = bsv_loc[ThRandPos % nbsv];
00839       } while (sv[j] != 0);
00840       sv[j] = 1;
00841   }
00842 
00843   
00844 
00845   for (; i < chunk_size; i++)
00846   {
00847       do {
00848            j = ThRandPos % ell;
00849       } while (sv[j] != 0);
00850       sv[j] = 1;
00851   }
00852 
00853 
00854   
00855   if (sl < 500) SG_FREE(sp_D);
00856   SG_FREE(sp_y    );
00857   SG_FREE(sp_h    );
00858   SG_FREE(sv_loc  );
00859   SG_FREE(bsv_loc );
00860   SG_FREE(sp_alpha);
00861 
00862   if (verbosity > 0)
00863   {
00864       SG_INFO("\n  Preprocessing: SV = %d", nsv);
00865       SG_INFO(", BSV = %d\n", nbsv);
00866   }
00867 
00868   return(nsv);
00869 }
00870 
00871 
00872 
00873 
00874 float64_t QPproblem::gpdtsolve(float64_t *solution)
00875 {
00876   int32_t i, j, k, z, iin, jin, nit, tot_vpm_iter, lsCount;
00877   int32_t tot_vpm_secant, projCount, proximal_count;
00878   int32_t vpmWarningThreshold;
00879   int32_t  nzin, nzout;
00880   int32_t *sp_y;               
00881   int32_t *indnzin, *indnzout; 
00882   float32_t     *sp_D;               
00883   float64_t    *sp_h, *sp_hloc,     
00884             *sp_alpha,*stloc;    
00885   float64_t    sp_e, aux, fval, tau_proximal_this, dfval;
00886   float64_t    *vau;
00887   float64_t    *weight;
00888   float64_t    tot_prep_time, tot_vpm_time, tot_st_time, total_time;
00889   sCache    *Cache;
00890   cachetype *ptmw;
00891   clock_t   t, ti;
00892 
00893   Cache = new sCache(KER, maxmw, ell);
00894     if (chunk_size > ell) chunk_size = ell;
00895 
00896   if (chunk_size <= 20)
00897       vpmWarningThreshold = 30*chunk_size;
00898   else if (chunk_size <= 200)
00899       vpmWarningThreshold = 20*chunk_size + 200;
00900   else
00901       vpmWarningThreshold = 10*chunk_size + 2200;
00902 
00903   kktold = 10000.0;
00904   if (delta <= 5e-3)
00905   {
00906       if ( (chunk_size <= 20) | ((float64_t)chunk_size/ell <= 0.001) )
00907           DELTAvpm = delta * 0.1;
00908       else if ( (chunk_size <= 200) | ((float64_t)chunk_size/ell <= 0.005) )
00909           DELTAvpm = delta * 0.5;
00910       else
00911           DELTAvpm = delta;
00912   }
00913   else
00914   {
00915       if ( (chunk_size <= 200) | ((float64_t)chunk_size/ell <= 0.005) )
00916           DELTAvpm = (1e-3 < delta*0.1) ? 1e-3 : delta*0.1;
00917       else
00918           DELTAvpm = 5e-3;
00919   }
00920 
00921   InitialDELTAvpm = DELTAvpm;
00922   DELTAsv         = EPS_SV * c_const;
00923   DELTAkin        = 1.0;
00924 
00925   q               = q & (~1);
00926   nb              = ell - chunk_size;
00927   tot_vpm_iter    = 0;
00928   tot_vpm_secant  = 0;
00929 
00930   tot_prep_time = tot_vpm_time = tot_st_time = total_time = 0.0;
00931 
00932   ti = clock();
00933 
00934   
00935   SG_DEBUG("ell:%d, chunk_size:%d, nb:%d dim:%d\n", ell, chunk_size,nb, dim);
00936   ing       = SG_MALLOC(int32_t, ell);
00937   inaux     = SG_MALLOC(int32_t, ell);
00938   index_in  = SG_MALLOC(int32_t, chunk_size);
00939   index_out = SG_MALLOC(int32_t, ell);
00940   indnzout  = SG_MALLOC(int32_t, nb);
00941   alpha     = SG_MALLOC(float64_t, ell);
00942 
00943   memset(alpha, 0, ell*sizeof(float64_t));
00944   memset(ing,   0, ell*sizeof(int32_t));
00945 
00946   if (verbosity > 0 && PreprocessMode != 0)
00947       SG_INFO("\n*********** Begin setup step...\n");
00948   t = clock();
00949 
00950   switch(PreprocessMode)
00951   {
00952     case 1: Preprocess1(KER, inaux, ing); break;
00953     case 0:
00954     default:
00955             Preprocess0(inaux, ing); break;
00956   }
00957 
00958   for (j = k = i = 0; i < ell; i++)
00959       if (ing[i] == 0)
00960           index_out[j++] = i;
00961       else
00962           index_in[k++]  = i;
00963 
00964   t = clock() - t;
00965   if (verbosity > 0 && PreprocessMode != 0)
00966   {
00967       SG_INFO( "  Time for setup: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC);
00968       SG_INFO(
00969               "\n\n*********** Begin decomposition technique...\n");
00970   }
00971 
00972   
00973   bmem     = SG_MALLOC(int32_t, ell);
00974   bmemrid  = SG_MALLOC(int32_t, chunk_size);
00975   pbmr     = SG_MALLOC(int32_t, chunk_size);
00976   cec      = SG_MALLOC(int32_t, ell);
00977   indnzin  = SG_MALLOC(int32_t, chunk_size);
00978   inold    = SG_MALLOC(int32_t, chunk_size);
00979   incom    = SG_MALLOC(int32_t, chunk_size);
00980   vau      = SG_MALLOC(float64_t, ell);
00981   grad     = SG_MALLOC(float64_t, ell);
00982   weight   = SG_MALLOC(float64_t, dim);
00983   st       = SG_MALLOC(float64_t, ell);
00984   stloc    = SG_MALLOC(float64_t, ell);
00985 
00986   for (i = 0; i < ell; i++)
00987   {
00988       bmem[i] = 0;
00989       cec[i]  = 0;
00990       st[i]   = 0;
00991   }
00992 
00993   sp_y     = SG_MALLOC(int32_t, chunk_size);
00994   sp_D     = SG_MALLOC(float32_t, chunk_size*chunk_size);
00995   sp_alpha = SG_MALLOC(float64_t, chunk_size);
00996   sp_h     = SG_MALLOC(float64_t, chunk_size);
00997   sp_hloc  = SG_MALLOC(float64_t, chunk_size);
00998 
00999   for (i = 0; i < chunk_size; i++)
01000       cec[index_in[i]] = cec[index_in[i]]+1;
01001 
01002   for (i = chunk_size-1; i >= 0; i--)
01003   {
01004       incom[i]          = -1;
01005       sp_alpha[i]       = 0.0;
01006       bmem[index_in[i]] = 1;
01007   }
01008 
01009   if (verbosity == 1)
01010   {
01011       SG_INFO( "  IT  | Prep Time | Solver IT | Solver Time |");
01012       SG_INFO( " Grad Time | KKT violation\n");
01013       SG_INFO( "------+-----------+-----------+-------------+");
01014       SG_INFO( "-----------+--------------\n");
01015   }
01016 
01017   
01018   
01019   nit = 0;
01020   do
01021   {
01022       t = clock();
01023       if ((nit % 10) == 9)
01024       {
01025           if ((t-ti) > 0)
01026               total_time += (float64_t)(t-ti) / CLOCKS_PER_SEC;
01027           else
01028               total_time += (float64_t)(ti-t) / CLOCKS_PER_SEC;
01029           ti = t;
01030       }
01031 
01032       if (verbosity > 1)
01033           SG_INFO("\n*********** ITERATION: %d\n", nit + 1);
01034       else if (verbosity > 0)
01035           SG_INFO( "%5d |", nit + 1);
01036       else
01037           SG_INFO( ".");
01038       fflush(stdout);
01039 
01040       nzout = 0;
01041       for (k = 0; k < nb; k++)
01042           if (alpha_out(k)>DELTAsv)
01043           {
01044               indnzout[nzout] = index_out[k];
01045               nzout++;
01046           }
01047 
01048       sp_e = 0.;
01049       for (j = 0; j < nzout; j++)
01050       {
01051           vau[j] = y[indnzout[j]]*alpha[indnzout[j]];
01052           sp_e  += vau[j];
01053       }
01054 
01055       if (verbosity > 1)
01056           SG_INFO( "  spe: %e ", sp_e);
01057 
01058       for (i = 0; i < chunk_size; i++)
01059           sp_y[i] = y_in(i);
01060 
01061       
01062       for (i = 0; i < chunk_size; i++)
01063       {
01064           iin  = index_in[i];
01065           ptmw = Cache->GetRow(iin);
01066           if (ptmw != 0)
01067           {
01068               for (j = 0; j <= i; j++)
01069                   sp_D[i*chunk_size + j] = sp_y[i]*sp_y[j] * ptmw[index_in[j]];
01070           }
01071           else if (incom[i] == -1)
01072               for (j = 0; j <= i; j++)
01073                   sp_D[i*chunk_size + j] = sp_y[i]*sp_y[j]
01074                                            * (float32_t)KER->Get(iin, index_in[j]);
01075           else
01076           {
01077               for (j = 0; j < i; j++)
01078                   if (incom[j] == -1)
01079                       sp_D[i*chunk_size + j]
01080                          = sp_y[i]*sp_y[j] * (float32_t)KER->Get(iin, index_in[j]);
01081                   else
01082                       sp_D[i*chunk_size + j]
01083                          = sp_D[incom[j]*chunk_size + incom[i]];
01084               sp_D[i*chunk_size + i]
01085                   = sp_y[i]*sp_y[i] * (float32_t)KER->Get(iin, index_in[i]);
01086           }
01087       }
01088       for (i = 0; i < chunk_size; i++)
01089       {
01090           for (j = 0; j < i; j++)
01091               sp_D[j*chunk_size + i] = sp_D[i*chunk_size + j];
01092       }
01093 
01094       if (nit == 0 && PreprocessMode > 0)
01095       {
01096          for (i = 0; i < chunk_size; i++)
01097          {
01098              iin  = index_in[i];
01099              aux  = 0.;
01100              ptmw = Cache->GetRow(iin);
01101              if (ptmw == NULL)
01102                  for (j = 0; j < nzout; j++)
01103                      aux += vau[j] * KER->Get(iin, indnzout[j]);
01104              else
01105                  for (j = 0; j < nzout; j++)
01106                      aux += vau[j] * ptmw[indnzout[j]];
01107              sp_h[i] = y_in(i) * aux - 1.0;
01108          }
01109       }
01110       else
01111       {
01112          for (i = 0; i < chunk_size; i++)
01113              vau[i] = alpha_in(i);
01114          for (i = 0; i < chunk_size; i++)
01115          {
01116              aux = 0.0;
01117              for (j = 0; j < chunk_size; j++)
01118                  aux += sp_D[i*chunk_size + j] * vau[j];
01119              sp_h[i] = st[index_in[i]] * y_in(i) - 1.0 - aux;
01120          }
01121       }
01122 
01123     t = clock() - t;
01124     if (verbosity > 1)
01125         SG_INFO(
01126                 "  Preparation Time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC);
01127     else if (verbosity > 0)
01128         SG_INFO( "  %8.2lf |", (float64_t)t/CLOCKS_PER_SEC);
01129     tot_prep_time += (float64_t)t/CLOCKS_PER_SEC;
01130 
01131     
01132 
01133     if (tau_proximal < 0.0)
01134       tau_proximal_this = 0.0;
01135     else
01136       tau_proximal_this = tau_proximal;
01137     proximal_count = 0;
01138     do {
01139       t = clock();
01140       for (i = 0; i < chunk_size; i++)
01141       {
01142           vau[i]                  = sp_D[i*chunk_size + i];
01143           sp_h[i]                -= tau_proximal_this * alpha_in(i);
01144           sp_D[i*chunk_size + i] += (float32_t)tau_proximal_this;
01145       }
01146 
01147       if (kktold < delta*100)
01148           for (i = 0; i < chunk_size; i++)
01149               sp_alpha[i] = alpha_in(i);
01150       else
01151           for (i = 0; i < chunk_size; i++)
01152               sp_alpha[i] = 0.0;
01153 
01154       
01155       i = gpm_solver(projection_solver, projection_projector, chunk_size, 
01156                     sp_D, sp_h, c_const, sp_e, sp_y, sp_alpha, 
01157                     DELTAvpm*DELTAkin, &lsCount, &projCount);
01158 
01159       if (i > vpmWarningThreshold)
01160       {
01161         if (ker_type == 2)
01162         {
01163             SG_INFO("\n WARNING: inner subproblem hard to solve;");
01164             SG_INFO(" setting a smaller -q or");
01165             SG_INFO(" tuning -c and -g options might help.\n");
01166         }
01167         else
01168         {
01169             SG_INFO("\n WARNING: inner subproblem hard to solve;");
01170             SG_INFO(" set a smaller -q or");
01171             SG_INFO(" try a better data scaling.\n");
01172         }
01173       }
01174 
01175       t = clock() - t;
01176       tot_vpm_iter   += i;
01177       tot_vpm_secant += projCount;
01178       tot_vpm_time   += (float64_t)t/CLOCKS_PER_SEC;
01179       if (verbosity > 1)
01180       {
01181           SG_INFO("  Solver it: %d", i);
01182           SG_INFO(", ls: %d", lsCount);
01183           SG_INFO(", time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC);
01184       }
01185       else if (verbosity > 0)
01186       {
01187           SG_INFO("    %6d", i);
01188           SG_INFO(" |    %8.2lf |", (float64_t)t/CLOCKS_PER_SEC);
01189       }
01190 
01191       
01192 
01193       for (i = 0; i < chunk_size; i++)
01194           sp_D[i*chunk_size + i] = (float32_t)vau[i];
01195       tau_proximal_this = 0.0;
01196       if (tau_proximal < 0.0)
01197       {
01198         dfval = 0.0;
01199         for (i = 0; i < chunk_size; i++)
01200         {
01201           aux = 0.0;
01202           for (j = 0; j < chunk_size; j++)
01203             aux += sp_D[i*chunk_size + j]*(alpha_in(j) - sp_alpha[j]);
01204           dfval += (0.5*aux - st[index_in[i]]*y_in(i) + 1.0) * (alpha_in(i) - sp_alpha[i]);
01205         }
01206         
01207         aux=0.0;
01208         for (i = 0; i < chunk_size; i++)
01209             aux +=  (alpha_in(i) - sp_alpha[i])*(alpha_in(i) - sp_alpha[i]);
01210 
01211         if ((-dfval/aux) < -0.5*tau_proximal)
01212         {
01213           tau_proximal_this = -tau_proximal;
01214           if (verbosity > 0)
01215             SG_DEBUG("tau threshold: %lf  ", -dfval/aux);
01216         }
01217       }
01218       proximal_count++;
01219     } while (tau_proximal_this != 0.0 && proximal_count < 2); 
01220 
01221     t = clock();
01222 
01223     nzin = 0;
01224     for (j = 0; j < chunk_size; j++)
01225     {
01226         if (nit == 0)
01227             aux = sp_alpha[j];
01228         else
01229             aux = sp_alpha[j] - alpha_in(j);
01230         if (fabs(aux) > DELTAsv)
01231         {
01232             indnzin[nzin] = index_in[j];
01233             grad[nzin]    = aux * y_in(j);
01234             nzin++;
01235         }
01236     }
01237 
01238     
01239     if (KER->get_kernel()->has_property(KP_LINADD) && get_linadd_enabled())
01240     {
01241         KER->get_kernel()->clear_normal() ;
01242 
01243         for (j = 0; j < nzin; j++)
01244             KER->get_kernel()->add_to_normal(indnzin[j], grad[j]);
01245 
01246         if (nit == 0 && PreprocessMode > 0)
01247         {
01248             for (j = 0; j < nzout; j++)
01249             {
01250                 jin = indnzout[j];
01251                 KER->get_kernel()->add_to_normal(jin, alpha[jin] * y[jin]);
01252             }
01253         }
01254 
01255         for (i = 0; i < ell; i++)
01256             st[i] += KER->get_kernel()->compute_optimized(i);
01257     }
01258     else  
01259     {
01260         k = Cache->DivideMP(ing, indnzin, nzin);
01261         for (j = 0; j < k; j++)
01262         {
01263             ptmw = Cache->FillRow(indnzin[ing[j]]);
01264             for (i = 0; i < ell; i++)
01265                 st[i] += grad[ing[j]] * ptmw[i];
01266         }
01267 
01268         if (PreprocessMode > 0 && nit == 0)
01269         {
01270             clock_t ti2;
01271 
01272             ti2 = clock();
01273             for (j = 0; j < nzout; j++)
01274             {
01275                 jin  = indnzout[j];
01276                 ptmw = Cache->FillRow(jin);
01277                 for (i = 0; i < ell; i++)
01278                     st[i] += alpha[jin] * y[jin] * ptmw[i];
01279             }
01280             if (verbosity > 1)
01281                 SG_INFO(
01282                  "  G*x0 time: %.2lf\n", (float64_t)(clock()-ti2)/CLOCKS_PER_SEC);
01283         }
01284     }
01285 
01286     
01287 
01288     t = clock() - t;
01289     if (verbosity > 1)
01290         SG_INFO(
01291                 "  Gradient updating time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC);
01292     else if (verbosity > 0)
01293         SG_INFO( "  %8.2lf |", (float64_t)t/CLOCKS_PER_SEC);
01294     tot_st_time += (float64_t)t/CLOCKS_PER_SEC;
01295 
01296     
01297     for (i = 0; i < chunk_size; i++)
01298         alpha_in(i) = sp_alpha[i];
01299 
01300     if (verbosity > 1)
01301     {
01302         j = k = 0;
01303         for (i = 0; i < ell; i++)
01304         {
01305             if (is_free(i))  j++;
01306             if (is_bound(i)) k++;
01307         }
01308         SG_INFO("  SV: %d", j+k);
01309         SG_INFO(",  BSV: %d\n", k);
01310     }
01311     Cache->Iteration();
01312     nit = nit+1;
01313   } while (!optimal() && !(CSignal::cancel_computations()));
01314   
01315   
01316 
01317   t = clock();
01318   if ((t-ti) > 0)
01319       total_time += (float64_t)(t-ti) / CLOCKS_PER_SEC;
01320   else
01321       total_time += (float64_t)(ti-t) / CLOCKS_PER_SEC;
01322   ti = t;
01323 
01324   memcpy(solution, alpha, ell * sizeof(float64_t));
01325 
01326   
01327   fval = 0.0;
01328   for (i = 0; i < ell; i++)
01329       fval += alpha[i]*(y[i]*st[i]*0.5 - 1.0);
01330 
01331   SG_INFO("\n------+-----------+-----------+-------------+");
01332   SG_INFO("-----------+--------------\n");
01333   SG_INFO(
01334       "\n- TOTAL ITERATIONS: %i\n", nit);
01335 
01336   if (verbosity > 1)
01337   {
01338       j = 0;
01339       k = 0;
01340       z = 0;
01341       for (i = ell - 1; i >= 0; i--)
01342       {
01343            if (cec[i] > 0) j++;
01344            if (cec[i] > 1) k++;
01345            if (cec[i] > 2) z++;
01346       }
01347       SG_INFO(
01348         "- Variables entering the working set at least one time:  %i\n", j);
01349       SG_INFO(
01350         "- Variables entering the working set at least two times:  %i\n", k);
01351       SG_INFO(
01352         "- Variables entering the working set at least three times:  %i\n", z);
01353   }
01354 
01355 
01356   SG_FREE(bmem);
01357   SG_FREE(bmemrid);
01358   SG_FREE(pbmr);
01359   SG_FREE(cec);
01360   SG_FREE(ing);
01361   SG_FREE(inaux);
01362   SG_FREE(indnzin);
01363   SG_FREE(index_in);
01364   SG_FREE(inold);
01365   SG_FREE(incom);
01366   SG_FREE(indnzout);
01367   SG_FREE(index_out);
01368   SG_FREE(vau);
01369   SG_FREE(alpha);
01370   SG_FREE(weight);
01371   SG_FREE(grad);
01372   SG_FREE(stloc);
01373   SG_FREE(st);
01374   SG_FREE(sp_h);
01375   SG_FREE(sp_hloc);
01376   SG_FREE(sp_y);
01377   SG_FREE(sp_D);
01378   SG_FREE(sp_alpha);
01379   delete Cache;
01380 
01381   aux = KER->KernelEvaluations;
01382   SG_INFO( "- Total CPU time: %lf\n", total_time);
01383   if (verbosity > 0)
01384   {
01385       SG_INFO( 
01386               "- Total kernel evaluations: %.0lf\n", aux);
01387       SG_INFO( 
01388               "- Total inner solver iterations: %i\n", tot_vpm_iter);
01389       if (projection_projector == 1) 
01390           SG_INFO( 
01391               "- Total projector iterations: %i\n", tot_vpm_secant);
01392       SG_INFO( 
01393               "- Total preparation time: %lf\n", tot_prep_time);
01394       SG_INFO( 
01395               "- Total inner solver time: %lf\n", tot_vpm_time);
01396       SG_INFO( 
01397               "- Total gradient updating time: %lf\n", tot_st_time);
01398   }
01399   SG_INFO( "- Objective function value: %lf\n", fval);
01400   objective_value=fval;
01401   return aux;
01402 }
01403 
01404 
01405 
01406 
01407 void quick_si(int32_t a[], int32_t n)
01408 {
01409   int32_t i, j, s, d, l, x, w, ps[20], pd[20];
01410 
01411   l     = 0;
01412   ps[0] = 0;
01413   pd[0] = n-1;
01414   do
01415   {
01416       s = ps[l];
01417       d = pd[l];
01418       l--;
01419       do
01420       {
01421           i = s;
01422           j = d;
01423           x = a[(s+d)/2];
01424           do
01425           {
01426               while (a[i] < x) i++;
01427               while (a[j] > x) j--;
01428               if (i <= j)
01429               {
01430                   w    = a[i];
01431                   a[i] = a[j];
01432                   i++;
01433                   a[j] = w;
01434                   j--;
01435               }
01436           } while(i<=j);
01437           if (j-s > d-i)
01438           {
01439               l++;
01440               ps[l] = s;
01441               pd[l] = j;
01442               s     = i;
01443           }
01444           else
01445           {
01446               if (i < d)
01447               {
01448                   l++;
01449                   ps[l] = i;
01450                   pd[l] = d;
01451               }
01452           d = j;
01453           }
01454       } while (s < d);
01455   } while (l >= 0);
01456 }
01457 
01458 
01459 
01460 
01461 void quick_s2(float64_t a[], int32_t n, int32_t ia[])
01462 {
01463   int32_t     i, j, s, d, l, iw, ps[20], pd[20];
01464   float64_t  x, w;
01465 
01466   l     = 0;
01467   ps[0] = 0;
01468   pd[0] = n-1;
01469   do
01470   {
01471       s = ps[l];
01472       d = pd[l];
01473       l--;
01474       do
01475       {
01476           i = s;
01477           j = d;
01478           x = a[(s+d)/2];
01479           do
01480           {
01481               while (a[i] < x) i++;
01482               while (a[j] > x) j--;
01483               if (i <= j)
01484               {
01485                   iw    = ia[i];
01486                   w     = a[i];
01487                   ia[i] = ia[j];
01488                   a[i]  = a[j];
01489                   i++;
01490                   ia[j] = iw;
01491                   a[j]  = w;
01492                   j--;
01493               }
01494           } while (i <= j);
01495           if (j-s > d-i)
01496           {
01497               l++;
01498               ps[l] = s;
01499               pd[l] = j;
01500               s     = i;
01501           }
01502           else
01503           {
01504               if (i < d)
01505               {
01506                   l++;
01507                   ps[l] = i;
01508                   pd[l] = d;
01509               }
01510               d = j;
01511           }
01512       } while (s < d);
01513   } while(l>=0);
01514 }
01515 
01516 
01517 
01518 
01519 void quick_s3(int32_t a[], int32_t n, int32_t ia[])
01520 {
01521   int32_t i, j, s, d, l, iw, w, x, ps[20], pd[20];
01522 
01523   l     = 0;
01524   ps[0] = 0;
01525   pd[0] = n-1;
01526   do
01527   {
01528       s = ps[l];
01529       d = pd[l];
01530       l--;
01531       do
01532       {
01533           i = s;
01534           j = d;
01535           x = a[(s+d)/2];
01536           do
01537           {
01538               while (a[i] < x) i++;
01539               while (a[j] > x) j--;
01540               if (i <= j)
01541               {
01542                  iw    = ia[i];
01543                  w     = a[i];
01544                  ia[i] = ia[j];
01545                  a[i]  = a[j];
01546                  i++;
01547                  ia[j] = iw;
01548                  a[j]  = w;
01549                  j--;
01550               }
01551           } while (i <= j);
01552           if (j-s > d-i)
01553           {
01554               l++;
01555               ps[l] = s;
01556               pd[l] = j;
01557               s     = i;
01558           }
01559           else
01560           {
01561               if (i < d)
01562               {
01563                   l++;
01564                   ps[l] = i;
01565                   pd[l] = d;
01566               }
01567               d = j;
01568           }
01569       } while (s < d);
01570   } while (l >= 0);
01571 }
01572 }
01573 
01574 #endif // DOXYGEN_SHOULD_SKIP_THIS
01575 
01576 
01577 
01578