SHOGUN: GMNPLib.cpp Source File

Go to the documentation of this file.
00001 /*-----------------------------------------------------------------------
00002  *
00003  * This program is free software; you can redistribute it and/or modify
00004  * it under the terms of the GNU General Public License as published by
00005  * the Free Software Foundation; either version 3 of the License, or
00006  * (at your option) any later version.
00007  *
00008  * Library of solvers for Generalized Nearest Point Problem (GNPP).
00009  *
00010  * Written (W) 1999-2008 Vojtech Franc, xfrancv@cmp.felk.cvut.cz
00011  * Copyright (C) 1999-2008 Center for Machine Perception, CTU FEL Prague 
00012  *
00013  *
00014 gmnplib.c: Library of solvers for Generalized Minimal Norm Problem (GMNP).
00015  
00016  Generalized Minimal Norm Problem to solve is
00017   
00018   min 0.5*alpha'*H*alpha + c'*alpha
00019 
00020   subject to  sum(alpha) = 1,  alpha(i) >= 0
00021   
00022  H [dim x dim] is symmetric positive definite matrix.
00023  c [dim x 1] is an arbitrary vector.
00024 
00025  The precision of the found solution is given by
00026  the parameters tmax, tolabs and tolrel which
00027  define the stopping conditions:
00028  
00029  UB-LB <= tolabs      ->  exit_flag = 1   Abs. tolerance.
00030  UB-LB <= UB*tolrel   ->  exit_flag = 2   Relative tolerance.
00031  LB > th              ->  exit_flag = 3   Threshold on lower bound.
00032  t >= tmax            ->  exit_flag = 0   Number of iterations.
00033 
00034  UB ... Upper bound on the optimal solution.
00035  LB ... Lower bound on the optimal solution.
00036  t  ... Number of iterations.
00037  History ... Value of LB and UB wrt. number of iterations.
00038 
00039 
00040  The following algorithms are implemented:
00041  ..............................................
00042 
00043  - GMNP solver based on improved MDM algorithm 1 (u fixed v optimized)
00044     exitflag = gmnp_imdm( &get_col, diag_H, vector_c, dim,  
00045                  tmax, tolabs, tolrel, th, &alpha, &t, &History, verb  );
00046 
00047   For more info refer to V.Franc: Optimization Algorithms for Kernel 
00048   Methods. Research report. CTU-CMP-2005-22. CTU FEL Prague. 2005.
00049   ftp://cmp.felk.cvut.cz/pub/cmp/articles/franc/Franc-PhD.pdf .
00050 
00051  Modifications:
00052  09-sep-2005, VF
00053  24-jan-2005, VF
00054  26-nov-2004, VF
00055  25-nov-2004, VF
00056  21-nov-2004, VF
00057  20-nov-2004, VF
00058  31-may-2004, VF
00059  23-Jan-2004, VF
00060 
00061 -------------------------------------------------------------------- */
00062 
00063 #include "classifier/svm/GMNPLib.h"
00064 #include "lib/Mathematics.h"
00065 
00066 #include <string.h>
00067 #include <limits.h>
00068 
00069 using namespace shogun;
00070 
00071 #define HISTORY_BUF 1000000
00072 
00073 #define MINUS_INF INT_MIN
00074 #define PLUS_INF  INT_MAX
00075 
00076 #define INDEX(ROW,COL,DIM) ((COL*DIM)+ROW)
00077 #define KDELTA(A,B) (A==B)
00078 #define KDELTA4(A1,A2,A3,A4) ((A1==A2)||(A1==A3)||(A1==A4)||(A2==A3)||(A2==A4)||(A3==A4))
00079 
00080 CGMNPLib::CGMNPLib(void)
00081 {
00082     SG_UNSTABLE("CGMNPLib::CGMNPLib(void)", "\n");
00083 
00084     diag_H = NULL;
00085     kernel_columns = NULL;
00086     cache_index = NULL;
00087     first_kernel_inx = 0;
00088     Cache_Size = 0;
00089     m_num_data = 0;
00090     m_reg_const = 0;
00091     m_vector_y = 0;
00092     m_kernel = NULL;
00093 
00094     first_virt_inx = 0;
00095     memset(&virt_columns, 0, sizeof (virt_columns));
00096     m_num_virt_data = 0;
00097     m_num_classes = 0;
00098 }
00099 
00100 CGMNPLib::CGMNPLib(
00101     float64_t* vector_y, CKernel* kernel, int32_t num_data,
00102     int32_t num_virt_data, int32_t num_classes, float64_t reg_const)
00103 : CSGObject()
00104 {
00105   m_num_classes=num_classes;
00106   m_num_virt_data=num_virt_data;
00107   m_reg_const = reg_const;
00108   m_num_data = num_data;
00109   m_vector_y = vector_y;
00110   m_kernel = kernel;
00111 
00112   Cache_Size = ((int64_t) kernel->get_cache_size())*1024*1024/(sizeof(float64_t)*num_data);
00113   Cache_Size = CMath::min(Cache_Size, (int64_t) num_data);
00114 
00115   SG_INFO("using %d kernel cache lines\n", Cache_Size);
00116   ASSERT(Cache_Size>=2);
00117 
00118   /* allocates memory for kernel cache */
00119   kernel_columns = new float64_t*[Cache_Size];
00120   cache_index = new float64_t[Cache_Size];
00121 
00122   for(int32_t i = 0; i < Cache_Size; i++ )
00123   {
00124     kernel_columns[i] = new float64_t[num_data];
00125     cache_index[i] = -2;
00126   }
00127   first_kernel_inx = 0;
00128 
00129 
00130 
00131   for(int32_t i = 0; i < 3; i++ )
00132   {
00133     virt_columns[i] = new float64_t[num_virt_data];
00134   }
00135   first_virt_inx = 0;
00136 
00137   diag_H = new float64_t[num_virt_data];
00138 
00139   for(int32_t i = 0; i < num_virt_data; i++ )
00140       diag_H[i] = kernel_fce(i,i);
00141 }
00142 
00143 CGMNPLib::~CGMNPLib()
00144 {
00145     for(int32_t i = 0; i < Cache_Size; i++ ) 
00146         delete[] kernel_columns[i];
00147 
00148     for(int32_t i = 0; i < 3; i++ ) 
00149         delete[] virt_columns[i];
00150 
00151     delete[] cache_index;
00152     delete[] kernel_columns;
00153 
00154     delete[] diag_H;
00155 }
00156 
00157 /* ------------------------------------------------------------
00158   Returns pointer at a-th column of the kernel matrix.
00159   This function maintains FIFO cache of kernel columns.
00160 ------------------------------------------------------------ */
00161 float64_t* CGMNPLib::get_kernel_col( int32_t a )
00162 {
00163   float64_t *col_ptr;
00164   int32_t i;
00165   int32_t inx;
00166 
00167   inx = -1;
00168   for( i=0; i < Cache_Size; i++ ) {
00169     if( cache_index[i] == a ) { inx = i; break; }
00170   }
00171     
00172   if( inx != -1 ) {
00173     col_ptr = kernel_columns[inx];
00174     return( col_ptr );
00175   }
00176    
00177   col_ptr = kernel_columns[first_kernel_inx];
00178   cache_index[first_kernel_inx] = a;
00179 
00180   first_kernel_inx++;
00181   if( first_kernel_inx >= Cache_Size ) first_kernel_inx = 0;
00182 
00183   for( i=0; i < m_num_data; i++ ) {
00184     col_ptr[i] = m_kernel->kernel(i,a);
00185   }
00186 
00187   return( col_ptr );
00188 }
00189 
00190 /* ------------------------------------------------------------
00191   Computes index of input example and its class label from 
00192   index of virtual "single-class" example.
00193 ------------------------------------------------------------ */
00194 void CGMNPLib::get_indices2( int32_t *index, int32_t *c, int32_t i )
00195 {
00196    *index = i / (m_num_classes-1);
00197  
00198    *c= (i % (m_num_classes-1))+1;
00199    if( *c>= m_vector_y[ *index ]) (*c)++;
00200 
00201    return;
00202 }
00203 
00204 
00205 /* ------------------------------------------------------------
00206   Returns pointer at the a-th column of the virtual K matrix.
00207 
00208   (note: the b-th column must be preserved in the cache during 
00209    updating but b is from (a(t-2), a(t-1)) where a=a(t) and
00210    thus FIFO with three columns does not have to take care od b.)
00211 ------------------------------------------------------------ */
00212 float64_t* CGMNPLib::get_col( int32_t a, int32_t b )
00213 {
00214   int32_t i;
00215   float64_t *col_ptr;
00216   float64_t *ker_ptr;
00217   float64_t value;
00218   int32_t i1,c1,i2,c2;
00219 
00220   col_ptr = virt_columns[first_virt_inx++];
00221   if( first_virt_inx >= 3 ) first_virt_inx = 0;
00222 
00223   get_indices2( &i1, &c1, a );
00224   ker_ptr = (float64_t*) get_kernel_col( i1 );
00225 
00226   for( i=0; i < m_num_virt_data; i++ ) {
00227     get_indices2( &i2, &c2, i );
00228 
00229     if( KDELTA4(m_vector_y[i1],m_vector_y[i2],c1,c2) ) {
00230       value = (+KDELTA(m_vector_y[i1],m_vector_y[i2]) 
00231                -KDELTA(m_vector_y[i1],c2)
00232                -KDELTA(m_vector_y[i2],c1)
00233                +KDELTA(c1,c2)
00234               )*(ker_ptr[i2]+1);
00235     }
00236     else
00237     {
00238       value = 0;
00239     }
00240 
00241     if(a==i) value += m_reg_const; 
00242 
00243     col_ptr[i] = value;
00244   }
00245   
00246   return( col_ptr );
00247 }
00248 
00249 
00250 /* --------------------------------------------------------------
00251  GMNP solver based on improved MDM algorithm 1.
00252 
00253  Search strategy: u determined by common rule and v is 
00254  optimized.
00255 
00256  Usage: exitflag = gmnp_imdm( &get_col, diag_H, vector_c, dim,  
00257                   tmax, tolabs, tolrel, th, &alpha, &t, &History );
00258 -------------------------------------------------------------- */
00259 
00260 int8_t CGMNPLib::gmnp_imdm(float64_t *vector_c,
00261             int32_t dim, 
00262             int32_t tmax,
00263             float64_t tolabs,
00264             float64_t tolrel,
00265             float64_t th,
00266             float64_t *alpha,
00267             int32_t  *ptr_t,
00268             float64_t **ptr_History,
00269             int32_t verb)
00270 {
00271   float64_t LB;
00272   float64_t UB;
00273   float64_t aHa, ac;
00274   float64_t tmp, tmp1;
00275   float64_t Huu, Huv, Hvv;
00276   float64_t min_beta, beta;
00277   float64_t max_improv, improv;
00278   float64_t lambda;
00279   float64_t *History;
00280   float64_t *Ha;
00281   float64_t *tmp_ptr;
00282   float64_t *col_u, *col_v;
00283   int32_t u=0;
00284   int32_t v=0;
00285   int32_t new_u=0;
00286   int32_t i;
00287   int32_t t;
00288   int32_t History_size;
00289   int8_t exitflag;
00290 
00291   /* ------------------------------------------------------------ */
00292   /* Initialization                                               */
00293   /* ------------------------------------------------------------ */
00294 
00295   Ha = new float64_t[dim];
00296   if( Ha == NULL ) SG_ERROR("Not enough memory.");
00297 
00298   History_size = (tmax < HISTORY_BUF ) ? tmax+1 : HISTORY_BUF;
00299   History = new float64_t[History_size*2];
00300   if( History == NULL ) SG_ERROR("Not enough memory.");
00301 
00302   /* inx = argmin(0.5*diag_H + vector_c ); */
00303   for( tmp1 =  PLUS_INF, i = 0; i < dim; i++ ) {
00304     tmp = 0.5*diag_H[i] + vector_c[i];
00305     if( tmp1 > tmp) {
00306       tmp1 = tmp;
00307       v = i;
00308     }
00309   }
00310 
00311   col_v = (float64_t*)get_col(v,-1);
00312 
00313   for( min_beta = PLUS_INF, i = 0; i < dim; i++ ) 
00314   {
00315     alpha[i] = 0;
00316     Ha[i] = col_v[i];
00317 
00318     beta = Ha[i] + vector_c[i];
00319     if( beta < min_beta ) {
00320       min_beta = beta;
00321       u = i;
00322     }
00323   }
00324 
00325   alpha[v] = 1;
00326   aHa = diag_H[v];
00327   ac = vector_c[v];
00328 
00329   UB = 0.5*aHa + ac;
00330   LB = min_beta - 0.5*aHa;
00331 
00332   t = 0;
00333   History[INDEX(0,0,2)] = LB;
00334   History[INDEX(1,0,2)] = UB;
00335 
00336   if( verb ) {
00337     SG_PRINT("Init: UB=%f, LB=%f, UB-LB=%f, (UB-LB)/|UB|=%f \n",
00338       UB, LB, UB-LB,(UB-LB)/UB);
00339   }  
00340 
00341   /* Stopping conditions */
00342   if( UB-LB <= tolabs ) exitflag = 1;
00343   else if(UB-LB <= CMath::abs(UB)*tolrel ) exitflag = 2;
00344   else if(LB > th ) exitflag = 3;
00345   else exitflag = -1;
00346 
00347   /* ------------------------------------------------------------ */
00348   /* Main optimization loop                                       */
00349   /* ------------------------------------------------------------ */
00350 
00351   col_u = (float64_t*)get_col(u,-1);
00352   while( exitflag == -1 ) 
00353   {
00354     t++;     
00355 
00356     col_v = (float64_t*)get_col(v,u);
00357 
00358     /* Adaptation rule and update */
00359     Huu = diag_H[u];
00360     Hvv = diag_H[v];
00361     Huv = col_u[v];
00362 
00363     lambda = (Ha[v]-Ha[u]+vector_c[v]-vector_c[u])/(alpha[v]*(Huu-2*Huv+Hvv));
00364     if( lambda < 0 ) lambda = 0; else if (lambda > 1) lambda = 1;
00365 
00366     aHa = aHa + 2*alpha[v]*lambda*(Ha[u]-Ha[v])+
00367                 lambda*lambda*alpha[v]*alpha[v]*(Huu-2*Huv+Hvv);
00368 
00369     ac = ac + lambda*alpha[v]*(vector_c[u]-vector_c[v]);
00370 
00371     tmp = alpha[v];
00372     alpha[u]=alpha[u]+lambda*alpha[v];
00373     alpha[v]=alpha[v]-lambda*alpha[v];
00374 
00375     UB = 0.5*aHa + ac;
00376     
00377 /*    max_beta = MINUS_INF;*/
00378     for( min_beta = PLUS_INF, i = 0; i < dim; i++ ) 
00379     {
00380        Ha[i] = Ha[i] + lambda*tmp*(col_u[i] - col_v[i]);
00381 
00382        beta = Ha[i]+ vector_c[i];
00383 
00384        if( beta < min_beta )
00385        { 
00386          new_u = i;
00387          min_beta = beta;
00388        }
00389     }    
00390 
00391     LB = min_beta - 0.5*aHa; 
00392     u = new_u;    
00393     col_u = (float64_t*)get_col(u,-1);
00394 
00395     /* search for optimal v while u is fixed */
00396     for( max_improv =  MINUS_INF, i = 0; i < dim; i++ ) {
00397 
00398       if( alpha[i] != 0 ) {
00399         beta = Ha[i] + vector_c[i];
00400 
00401         if( beta >= min_beta ) {
00402 
00403           tmp = diag_H[u] - 2*col_u[i] + diag_H[i];
00404           if( tmp != 0 ) {
00405             improv = (0.5*(beta-min_beta)*(beta-min_beta))/tmp;
00406 
00407             if( improv > max_improv ) {
00408               max_improv = improv;
00409               v = i;
00410             }
00411           }
00412         }
00413       }
00414     }
00415 
00416     /* Stopping conditions */
00417     if( UB-LB <= tolabs ) exitflag = 1; 
00418     else if( UB-LB <= CMath::abs(UB)*tolrel) exitflag = 2;
00419     else if(LB > th ) exitflag = 3;
00420     else if(t >= tmax) exitflag = 0; 
00421 
00422     /* print info */
00423     SG_ABS_PROGRESS(CMath::abs((UB-LB)/UB),
00424             -CMath::log10(CMath::abs(UB-LB)),
00425             -CMath::log10(1.0),
00426             -CMath::log10(tolrel), 6);
00427     if(verb && (t % verb) == 0 ) {
00428       SG_PRINT("%d: UB=%f, LB=%f, UB-LB=%f, (UB-LB)/|UB|=%f \n",
00429         t, UB, LB, UB-LB,(UB-LB)/UB);
00430     }  
00431 
00432     /* Store selected values */
00433     if( t < History_size ) {
00434       History[INDEX(0,t,2)] = LB;
00435       History[INDEX(1,t,2)] = UB;
00436     }
00437     else {
00438       tmp_ptr = new float64_t[(History_size+HISTORY_BUF)*2];
00439       if( tmp_ptr == NULL ) SG_ERROR("Not enough memory.");
00440       for( i = 0; i < History_size; i++ ) {
00441         tmp_ptr[INDEX(0,i,2)] = History[INDEX(0,i,2)];
00442         tmp_ptr[INDEX(1,i,2)] = History[INDEX(1,i,2)];
00443       }
00444       tmp_ptr[INDEX(0,t,2)] = LB;
00445       tmp_ptr[INDEX(1,t,2)] = UB;
00446       
00447       History_size += HISTORY_BUF;
00448       delete[] History;
00449       History = tmp_ptr;
00450     }
00451   }
00452 
00453   /* print info about last iteration*/
00454   SG_DONE();
00455   if(verb && (t % verb) ) {
00456     SG_PRINT("exit: UB=%f, LB=%f, UB-LB=%f, (UB-LB)/|UB|=%f \n",
00457       UB, LB, UB-LB,(UB-LB)/UB);
00458   }  
00459 
00460 
00461   /*------------------------------------------------------- */
00462   /* Set outputs                                            */
00463   /*------------------------------------------------------- */
00464   (*ptr_t) = t;
00465   (*ptr_History) = History;
00466 
00467   /* Free memory */
00468   delete[] Ha;
00469   
00470   return( exitflag ); 
00471 }
00472 
00473 /* ------------------------------------------------------------
00474   Retures (a,b)-th element of the virtual kernel matrix 
00475   of size [num_virt_data x num_virt_data]. 
00476 ------------------------------------------------------------ */
00477 float64_t CGMNPLib::kernel_fce( int32_t a, int32_t b )
00478 {
00479   float64_t value;
00480   int32_t i1,c1,i2,c2;
00481 
00482   get_indices2( &i1, &c1, a );
00483   get_indices2( &i2, &c2, b );
00484 
00485   if( KDELTA4(m_vector_y[i1],m_vector_y[i2],c1,c2) ) {
00486     value = (+KDELTA(m_vector_y[i1],m_vector_y[i2]) 
00487              -KDELTA(m_vector_y[i1],c2)
00488              -KDELTA(m_vector_y[i2],c1)
00489              +KDELTA(c1,c2)
00490             )*(m_kernel->kernel( i1, i2 )+1);
00491   }
00492   else
00493   {
00494     value = 0;
00495   }
00496 
00497   if(a==b) value += m_reg_const; 
00498 
00499   return( value );
00500 }