00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063 #include "classifier/svm/GMNPLib.h"
00064 #include "lib/Mathematics.h"
00065
00066 #include <string.h>
00067 #include <limits.h>
00068
00069 using namespace shogun;
00070
00071 #define HISTORY_BUF 1000000
00072
00073 #define MINUS_INF INT_MIN
00074 #define PLUS_INF INT_MAX
00075
00076 #define INDEX(ROW,COL,DIM) ((COL*DIM)+ROW)
00077 #define KDELTA(A,B) (A==B)
00078 #define KDELTA4(A1,A2,A3,A4) ((A1==A2)||(A1==A3)||(A1==A4)||(A2==A3)||(A2==A4)||(A3==A4))
00079
00080 CGMNPLib::CGMNPLib(void)
00081 {
00082 SG_UNSTABLE("CGMNPLib::CGMNPLib(void)", "\n");
00083
00084 diag_H = NULL;
00085 kernel_columns = NULL;
00086 cache_index = NULL;
00087 first_kernel_inx = 0;
00088 Cache_Size = 0;
00089 m_num_data = 0;
00090 m_reg_const = 0;
00091 m_vector_y = 0;
00092 m_kernel = NULL;
00093
00094 first_virt_inx = 0;
00095 memset(&virt_columns, 0, sizeof (virt_columns));
00096 m_num_virt_data = 0;
00097 m_num_classes = 0;
00098 }
00099
00100 CGMNPLib::CGMNPLib(
00101 float64_t* vector_y, CKernel* kernel, int32_t num_data,
00102 int32_t num_virt_data, int32_t num_classes, float64_t reg_const)
00103 : CSGObject()
00104 {
00105 m_num_classes=num_classes;
00106 m_num_virt_data=num_virt_data;
00107 m_reg_const = reg_const;
00108 m_num_data = num_data;
00109 m_vector_y = vector_y;
00110 m_kernel = kernel;
00111
00112 Cache_Size = ((int64_t) kernel->get_cache_size())*1024*1024/(sizeof(float64_t)*num_data);
00113 Cache_Size = CMath::min(Cache_Size, (int64_t) num_data);
00114
00115 SG_INFO("using %d kernel cache lines\n", Cache_Size);
00116 ASSERT(Cache_Size>=2);
00117
00118
00119 kernel_columns = new float64_t*[Cache_Size];
00120 cache_index = new float64_t[Cache_Size];
00121
00122 for(int32_t i = 0; i < Cache_Size; i++ )
00123 {
00124 kernel_columns[i] = new float64_t[num_data];
00125 cache_index[i] = -2;
00126 }
00127 first_kernel_inx = 0;
00128
00129
00130
00131 for(int32_t i = 0; i < 3; i++ )
00132 {
00133 virt_columns[i] = new float64_t[num_virt_data];
00134 }
00135 first_virt_inx = 0;
00136
00137 diag_H = new float64_t[num_virt_data];
00138
00139 for(int32_t i = 0; i < num_virt_data; i++ )
00140 diag_H[i] = kernel_fce(i,i);
00141 }
00142
00143 CGMNPLib::~CGMNPLib()
00144 {
00145 for(int32_t i = 0; i < Cache_Size; i++ )
00146 delete[] kernel_columns[i];
00147
00148 for(int32_t i = 0; i < 3; i++ )
00149 delete[] virt_columns[i];
00150
00151 delete[] cache_index;
00152 delete[] kernel_columns;
00153
00154 delete[] diag_H;
00155 }
00156
00157
00158
00159
00160
00161 float64_t* CGMNPLib::get_kernel_col( int32_t a )
00162 {
00163 float64_t *col_ptr;
00164 int32_t i;
00165 int32_t inx;
00166
00167 inx = -1;
00168 for( i=0; i < Cache_Size; i++ ) {
00169 if( cache_index[i] == a ) { inx = i; break; }
00170 }
00171
00172 if( inx != -1 ) {
00173 col_ptr = kernel_columns[inx];
00174 return( col_ptr );
00175 }
00176
00177 col_ptr = kernel_columns[first_kernel_inx];
00178 cache_index[first_kernel_inx] = a;
00179
00180 first_kernel_inx++;
00181 if( first_kernel_inx >= Cache_Size ) first_kernel_inx = 0;
00182
00183 for( i=0; i < m_num_data; i++ ) {
00184 col_ptr[i] = m_kernel->kernel(i,a);
00185 }
00186
00187 return( col_ptr );
00188 }
00189
00190
00191
00192
00193
00194 void CGMNPLib::get_indices2( int32_t *index, int32_t *c, int32_t i )
00195 {
00196 *index = i / (m_num_classes-1);
00197
00198 *c= (i % (m_num_classes-1))+1;
00199 if( *c>= m_vector_y[ *index ]) (*c)++;
00200
00201 return;
00202 }
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212 float64_t* CGMNPLib::get_col( int32_t a, int32_t b )
00213 {
00214 int32_t i;
00215 float64_t *col_ptr;
00216 float64_t *ker_ptr;
00217 float64_t value;
00218 int32_t i1,c1,i2,c2;
00219
00220 col_ptr = virt_columns[first_virt_inx++];
00221 if( first_virt_inx >= 3 ) first_virt_inx = 0;
00222
00223 get_indices2( &i1, &c1, a );
00224 ker_ptr = (float64_t*) get_kernel_col( i1 );
00225
00226 for( i=0; i < m_num_virt_data; i++ ) {
00227 get_indices2( &i2, &c2, i );
00228
00229 if( KDELTA4(m_vector_y[i1],m_vector_y[i2],c1,c2) ) {
00230 value = (+KDELTA(m_vector_y[i1],m_vector_y[i2])
00231 -KDELTA(m_vector_y[i1],c2)
00232 -KDELTA(m_vector_y[i2],c1)
00233 +KDELTA(c1,c2)
00234 )*(ker_ptr[i2]+1);
00235 }
00236 else
00237 {
00238 value = 0;
00239 }
00240
00241 if(a==i) value += m_reg_const;
00242
00243 col_ptr[i] = value;
00244 }
00245
00246 return( col_ptr );
00247 }
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260 int8_t CGMNPLib::gmnp_imdm(float64_t *vector_c,
00261 int32_t dim,
00262 int32_t tmax,
00263 float64_t tolabs,
00264 float64_t tolrel,
00265 float64_t th,
00266 float64_t *alpha,
00267 int32_t *ptr_t,
00268 float64_t **ptr_History,
00269 int32_t verb)
00270 {
00271 float64_t LB;
00272 float64_t UB;
00273 float64_t aHa, ac;
00274 float64_t tmp, tmp1;
00275 float64_t Huu, Huv, Hvv;
00276 float64_t min_beta, beta;
00277 float64_t max_improv, improv;
00278 float64_t lambda;
00279 float64_t *History;
00280 float64_t *Ha;
00281 float64_t *tmp_ptr;
00282 float64_t *col_u, *col_v;
00283 int32_t u=0;
00284 int32_t v=0;
00285 int32_t new_u=0;
00286 int32_t i;
00287 int32_t t;
00288 int32_t History_size;
00289 int8_t exitflag;
00290
00291
00292
00293
00294
00295 Ha = new float64_t[dim];
00296 if( Ha == NULL ) SG_ERROR("Not enough memory.");
00297
00298 History_size = (tmax < HISTORY_BUF ) ? tmax+1 : HISTORY_BUF;
00299 History = new float64_t[History_size*2];
00300 if( History == NULL ) SG_ERROR("Not enough memory.");
00301
00302
00303 for( tmp1 = PLUS_INF, i = 0; i < dim; i++ ) {
00304 tmp = 0.5*diag_H[i] + vector_c[i];
00305 if( tmp1 > tmp) {
00306 tmp1 = tmp;
00307 v = i;
00308 }
00309 }
00310
00311 col_v = (float64_t*)get_col(v,-1);
00312
00313 for( min_beta = PLUS_INF, i = 0; i < dim; i++ )
00314 {
00315 alpha[i] = 0;
00316 Ha[i] = col_v[i];
00317
00318 beta = Ha[i] + vector_c[i];
00319 if( beta < min_beta ) {
00320 min_beta = beta;
00321 u = i;
00322 }
00323 }
00324
00325 alpha[v] = 1;
00326 aHa = diag_H[v];
00327 ac = vector_c[v];
00328
00329 UB = 0.5*aHa + ac;
00330 LB = min_beta - 0.5*aHa;
00331
00332 t = 0;
00333 History[INDEX(0,0,2)] = LB;
00334 History[INDEX(1,0,2)] = UB;
00335
00336 if( verb ) {
00337 SG_PRINT("Init: UB=%f, LB=%f, UB-LB=%f, (UB-LB)/|UB|=%f \n",
00338 UB, LB, UB-LB,(UB-LB)/UB);
00339 }
00340
00341
00342 if( UB-LB <= tolabs ) exitflag = 1;
00343 else if(UB-LB <= CMath::abs(UB)*tolrel ) exitflag = 2;
00344 else if(LB > th ) exitflag = 3;
00345 else exitflag = -1;
00346
00347
00348
00349
00350
00351 col_u = (float64_t*)get_col(u,-1);
00352 while( exitflag == -1 )
00353 {
00354 t++;
00355
00356 col_v = (float64_t*)get_col(v,u);
00357
00358
00359 Huu = diag_H[u];
00360 Hvv = diag_H[v];
00361 Huv = col_u[v];
00362
00363 lambda = (Ha[v]-Ha[u]+vector_c[v]-vector_c[u])/(alpha[v]*(Huu-2*Huv+Hvv));
00364 if( lambda < 0 ) lambda = 0; else if (lambda > 1) lambda = 1;
00365
00366 aHa = aHa + 2*alpha[v]*lambda*(Ha[u]-Ha[v])+
00367 lambda*lambda*alpha[v]*alpha[v]*(Huu-2*Huv+Hvv);
00368
00369 ac = ac + lambda*alpha[v]*(vector_c[u]-vector_c[v]);
00370
00371 tmp = alpha[v];
00372 alpha[u]=alpha[u]+lambda*alpha[v];
00373 alpha[v]=alpha[v]-lambda*alpha[v];
00374
00375 UB = 0.5*aHa + ac;
00376
00377
00378 for( min_beta = PLUS_INF, i = 0; i < dim; i++ )
00379 {
00380 Ha[i] = Ha[i] + lambda*tmp*(col_u[i] - col_v[i]);
00381
00382 beta = Ha[i]+ vector_c[i];
00383
00384 if( beta < min_beta )
00385 {
00386 new_u = i;
00387 min_beta = beta;
00388 }
00389 }
00390
00391 LB = min_beta - 0.5*aHa;
00392 u = new_u;
00393 col_u = (float64_t*)get_col(u,-1);
00394
00395
00396 for( max_improv = MINUS_INF, i = 0; i < dim; i++ ) {
00397
00398 if( alpha[i] != 0 ) {
00399 beta = Ha[i] + vector_c[i];
00400
00401 if( beta >= min_beta ) {
00402
00403 tmp = diag_H[u] - 2*col_u[i] + diag_H[i];
00404 if( tmp != 0 ) {
00405 improv = (0.5*(beta-min_beta)*(beta-min_beta))/tmp;
00406
00407 if( improv > max_improv ) {
00408 max_improv = improv;
00409 v = i;
00410 }
00411 }
00412 }
00413 }
00414 }
00415
00416
00417 if( UB-LB <= tolabs ) exitflag = 1;
00418 else if( UB-LB <= CMath::abs(UB)*tolrel) exitflag = 2;
00419 else if(LB > th ) exitflag = 3;
00420 else if(t >= tmax) exitflag = 0;
00421
00422
00423 SG_ABS_PROGRESS(CMath::abs((UB-LB)/UB),
00424 -CMath::log10(CMath::abs(UB-LB)),
00425 -CMath::log10(1.0),
00426 -CMath::log10(tolrel), 6);
00427 if(verb && (t % verb) == 0 ) {
00428 SG_PRINT("%d: UB=%f, LB=%f, UB-LB=%f, (UB-LB)/|UB|=%f \n",
00429 t, UB, LB, UB-LB,(UB-LB)/UB);
00430 }
00431
00432
00433 if( t < History_size ) {
00434 History[INDEX(0,t,2)] = LB;
00435 History[INDEX(1,t,2)] = UB;
00436 }
00437 else {
00438 tmp_ptr = new float64_t[(History_size+HISTORY_BUF)*2];
00439 if( tmp_ptr == NULL ) SG_ERROR("Not enough memory.");
00440 for( i = 0; i < History_size; i++ ) {
00441 tmp_ptr[INDEX(0,i,2)] = History[INDEX(0,i,2)];
00442 tmp_ptr[INDEX(1,i,2)] = History[INDEX(1,i,2)];
00443 }
00444 tmp_ptr[INDEX(0,t,2)] = LB;
00445 tmp_ptr[INDEX(1,t,2)] = UB;
00446
00447 History_size += HISTORY_BUF;
00448 delete[] History;
00449 History = tmp_ptr;
00450 }
00451 }
00452
00453
00454 SG_DONE();
00455 if(verb && (t % verb) ) {
00456 SG_PRINT("exit: UB=%f, LB=%f, UB-LB=%f, (UB-LB)/|UB|=%f \n",
00457 UB, LB, UB-LB,(UB-LB)/UB);
00458 }
00459
00460
00461
00462
00463
00464 (*ptr_t) = t;
00465 (*ptr_History) = History;
00466
00467
00468 delete[] Ha;
00469
00470 return( exitflag );
00471 }
00472
00473
00474
00475
00476
00477 float64_t CGMNPLib::kernel_fce( int32_t a, int32_t b )
00478 {
00479 float64_t value;
00480 int32_t i1,c1,i2,c2;
00481
00482 get_indices2( &i1, &c1, a );
00483 get_indices2( &i2, &c2, b );
00484
00485 if( KDELTA4(m_vector_y[i1],m_vector_y[i2],c1,c2) ) {
00486 value = (+KDELTA(m_vector_y[i1],m_vector_y[i2])
00487 -KDELTA(m_vector_y[i1],c2)
00488 -KDELTA(m_vector_y[i2],c1)
00489 +KDELTA(c1,c2)
00490 )*(m_kernel->kernel( i1, i2 )+1);
00491 }
00492 else
00493 {
00494 value = 0;
00495 }
00496
00497 if(a==b) value += m_reg_const;
00498
00499 return( value );
00500 }