00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include <shogun/lib/config.h>
00016 #include <shogun/lib/common.h>
00017 #include <shogun/io/SGIO.h>
00018 #include <shogun/io/File.h>
00019 #include <shogun/lib/Time.h>
00020 #include <shogun/lib/Signal.h>
00021
00022 #include <shogun/base/Parallel.h>
00023
00024 #include <shogun/kernel/Kernel.h>
00025 #include <shogun/kernel/normalizer/IdentityKernelNormalizer.h>
00026 #include <shogun/features/Features.h>
00027 #include <shogun/base/Parameter.h>
00028
00029 #include <shogun/classifier/svm/SVM.h>
00030
00031 #include <string.h>
00032 #include <unistd.h>
00033 #include <math.h>
00034
00035 #ifdef HAVE_PTHREAD
00036 #include <pthread.h>
00037 #endif
00038
00039 using namespace shogun;
00040
00041 CKernel::CKernel() : CSGObject()
00042 {
00043 init();
00044 register_params();
00045 }
00046
00047 CKernel::CKernel(int32_t size) : CSGObject()
00048 {
00049 init();
00050
00051 if (size<10)
00052 size=10;
00053
00054 cache_size=size;
00055 register_params();
00056 }
00057
00058
00059 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject()
00060 {
00061 init();
00062
00063 if (size<10)
00064 size=10;
00065
00066 cache_size=size;
00067
00068 set_normalizer(new CIdentityKernelNormalizer());
00069 init(p_lhs, p_rhs);
00070 register_params();
00071 }
00072
00073 CKernel::~CKernel()
00074 {
00075 if (get_is_initialized())
00076 SG_ERROR("Kernel still initialized on destruction.\n");
00077
00078 remove_lhs_and_rhs();
00079 SG_UNREF(normalizer);
00080
00081 SG_INFO("Kernel deleted (%p).\n", this);
00082 }
00083
00084 #ifdef USE_SVMLIGHT
00085 void CKernel::resize_kernel_cache(KERNELCACHE_IDX size, bool regression_hack)
00086 {
00087 if (size<10)
00088 size=10;
00089
00090 kernel_cache_cleanup();
00091 cache_size=size;
00092
00093 if (has_features() && get_num_vec_lhs())
00094 kernel_cache_init(cache_size, regression_hack);
00095 }
00096 #endif //USE_SVMLIGHT
00097
00098 bool CKernel::init(CFeatures* l, CFeatures* r)
00099 {
00100 SG_DEBUG("entering CKernel::init(%p, %p)\n", l, r);
00101
00102
00103 SG_REF(l);
00104 SG_REF(r);
00105
00106
00107 REQUIRE(l, "CKernel::init(%p, %p): LHS features required!\n", l, r);
00108 REQUIRE(r, "CKernel::init(%p, %p): RHS features required!\n", l, r);
00109
00110
00111 ASSERT(l->get_feature_class()==r->get_feature_class());
00112 ASSERT(l->get_feature_type()==r->get_feature_type());
00113
00114
00115 remove_lhs_and_rhs();
00116
00117
00118 SG_REF(l);
00119 if (l==r)
00120 lhs_equals_rhs=true;
00121 else
00122 SG_REF(r);
00123
00124 lhs=l;
00125 rhs=r;
00126
00127 ASSERT(!num_lhs || num_lhs==l->get_num_vectors());
00128 ASSERT(!num_rhs || num_rhs==l->get_num_vectors());
00129
00130 num_lhs=l->get_num_vectors();
00131 num_rhs=r->get_num_vectors();
00132
00133
00134 SG_UNREF(r);
00135 SG_UNREF(l);
00136
00137 SG_DEBUG("leaving CKernel::init(%p, %p)\n", l, r);
00138 return true;
00139 }
00140
00141 bool CKernel::set_normalizer(CKernelNormalizer* n)
00142 {
00143 SG_REF(n);
00144 if (lhs && rhs)
00145 n->init(this);
00146
00147 SG_UNREF(normalizer);
00148 normalizer=n;
00149
00150 return (normalizer!=NULL);
00151 }
00152
00153 CKernelNormalizer* CKernel::get_normalizer()
00154 {
00155 SG_REF(normalizer)
00156 return normalizer;
00157 }
00158
00159 bool CKernel::init_normalizer()
00160 {
00161 return normalizer->init(this);
00162 }
00163
00164 void CKernel::cleanup()
00165 {
00166 remove_lhs_and_rhs();
00167 }
00168
00169 #ifdef USE_SVMLIGHT
00170
00171
00172 void CKernel::kernel_cache_init(int32_t buffsize, bool regression_hack)
00173 {
00174 int32_t totdoc=get_num_vec_lhs();
00175 if (totdoc<=0)
00176 {
00177 SG_ERROR("kernel has zero rows: num_lhs=%d num_rhs=%d\n",
00178 get_num_vec_lhs(), get_num_vec_rhs());
00179 }
00180 uint64_t buffer_size=0;
00181 int32_t i;
00182
00183
00184 if (regression_hack)
00185 totdoc*=2;
00186
00187 buffer_size=((uint64_t) buffsize)*1024*1024/sizeof(KERNELCACHE_ELEM);
00188 if (buffer_size>((uint64_t) totdoc)*totdoc)
00189 buffer_size=((uint64_t) totdoc)*totdoc;
00190
00191 SG_INFO( "using a kernel cache of size %lld MB (%lld bytes) for %s Kernel\n", buffer_size*sizeof(KERNELCACHE_ELEM)/1024/1024, buffer_size*sizeof(KERNELCACHE_ELEM), get_name());
00192
00193
00194 ASSERT(buffer_size < (((uint64_t) 1) << (sizeof(KERNELCACHE_IDX)*8-1)));
00195
00196 kernel_cache.index = SG_MALLOC(int32_t, totdoc);
00197 kernel_cache.occu = SG_MALLOC(int32_t, totdoc);
00198 kernel_cache.lru = SG_MALLOC(int32_t, totdoc);
00199 kernel_cache.invindex = SG_MALLOC(int32_t, totdoc);
00200 kernel_cache.active2totdoc = SG_MALLOC(int32_t, totdoc);
00201 kernel_cache.totdoc2active = SG_MALLOC(int32_t, totdoc);
00202 kernel_cache.buffer = SG_MALLOC(KERNELCACHE_ELEM, buffer_size);
00203 kernel_cache.buffsize=buffer_size;
00204 kernel_cache.max_elems=(int32_t) (kernel_cache.buffsize/totdoc);
00205
00206 if(kernel_cache.max_elems>totdoc) {
00207 kernel_cache.max_elems=totdoc;
00208 }
00209
00210 kernel_cache.elems=0;
00211 for(i=0;i<totdoc;i++) {
00212 kernel_cache.index[i]=-1;
00213 kernel_cache.lru[i]=0;
00214 }
00215 for(i=0;i<totdoc;i++) {
00216 kernel_cache.occu[i]=0;
00217 kernel_cache.invindex[i]=-1;
00218 }
00219
00220 kernel_cache.activenum=totdoc;;
00221 for(i=0;i<totdoc;i++) {
00222 kernel_cache.active2totdoc[i]=i;
00223 kernel_cache.totdoc2active[i]=i;
00224 }
00225
00226 kernel_cache.time=0;
00227 }
00228
00229 void CKernel::get_kernel_row(
00230 int32_t docnum, int32_t *active2dnum, float64_t *buffer, bool full_line)
00231 {
00232 int32_t i,j;
00233 KERNELCACHE_IDX start;
00234
00235 int32_t num_vectors = get_num_vec_lhs();
00236 if (docnum>=num_vectors)
00237 docnum=2*num_vectors-1-docnum;
00238
00239
00240 if(kernel_cache.index[docnum] != -1)
00241 {
00242 kernel_cache.lru[kernel_cache.index[docnum]]=kernel_cache.time;
00243 start=((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[docnum];
00244
00245 if (full_line)
00246 {
00247 for(j=0;j<get_num_vec_lhs();j++)
00248 {
00249 if(kernel_cache.totdoc2active[j] >= 0)
00250 buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]];
00251 else
00252 buffer[j]=(float64_t) kernel(docnum, j);
00253 }
00254 }
00255 else
00256 {
00257 for(i=0;(j=active2dnum[i])>=0;i++)
00258 {
00259 if(kernel_cache.totdoc2active[j] >= 0)
00260 buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]];
00261 else
00262 {
00263 int32_t k=j;
00264 if (k>=num_vectors)
00265 k=2*num_vectors-1-k;
00266 buffer[j]=(float64_t) kernel(docnum, k);
00267 }
00268 }
00269 }
00270 }
00271 else
00272 {
00273 if (full_line)
00274 {
00275 for(j=0;j<get_num_vec_lhs();j++)
00276 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, j);
00277 }
00278 else
00279 {
00280 for(i=0;(j=active2dnum[i])>=0;i++)
00281 {
00282 int32_t k=j;
00283 if (k>=num_vectors)
00284 k=2*num_vectors-1-k;
00285 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, k);
00286 }
00287 }
00288 }
00289 }
00290
00291
00292
00293 void CKernel::cache_kernel_row(int32_t m)
00294 {
00295 register int32_t j,k,l;
00296 register KERNELCACHE_ELEM *cache;
00297
00298 int32_t num_vectors = get_num_vec_lhs();
00299
00300 if (m>=num_vectors)
00301 m=2*num_vectors-1-m;
00302
00303 if(!kernel_cache_check(m))
00304 {
00305 cache = kernel_cache_clean_and_malloc(m);
00306 if(cache) {
00307 l=kernel_cache.totdoc2active[m];
00308
00309 for(j=0;j<kernel_cache.activenum;j++)
00310 {
00311 k=kernel_cache.active2totdoc[j];
00312
00313 if((kernel_cache.index[k] != -1) && (l != -1) && (k != m)) {
00314 cache[j]=kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)
00315 *kernel_cache.index[k]+l];
00316 }
00317 else
00318 {
00319 if (k>=num_vectors)
00320 k=2*num_vectors-1-k;
00321
00322 cache[j]=kernel(m, k);
00323 }
00324 }
00325 }
00326 else
00327 perror("Error: Kernel cache full! => increase cache size");
00328 }
00329 }
00330
00331
00332 void* CKernel::cache_multiple_kernel_row_helper(void* p)
00333 {
00334 int32_t j,k,l;
00335 S_KTHREAD_PARAM* params = (S_KTHREAD_PARAM*) p;
00336
00337 for (int32_t i=params->start; i<params->end; i++)
00338 {
00339 KERNELCACHE_ELEM* cache=params->cache[i];
00340 int32_t m = params->uncached_rows[i];
00341 l=params->kernel_cache->totdoc2active[m];
00342
00343 for(j=0;j<params->kernel_cache->activenum;j++)
00344 {
00345 k=params->kernel_cache->active2totdoc[j];
00346
00347 if((params->kernel_cache->index[k] != -1) && (l != -1) && (!params->needs_computation[k])) {
00348 cache[j]=params->kernel_cache->buffer[((KERNELCACHE_IDX) params->kernel_cache->activenum)
00349 *params->kernel_cache->index[k]+l];
00350 }
00351 else
00352 {
00353 if (k>=params->num_vectors)
00354 k=2*params->num_vectors-1-k;
00355
00356 cache[j]=params->kernel->kernel(m, k);
00357 }
00358 }
00359
00360
00361 params->needs_computation[m]=0;
00362 }
00363 return NULL;
00364 }
00365
00366
00367 void CKernel::cache_multiple_kernel_rows(int32_t* rows, int32_t num_rows)
00368 {
00369 #ifdef HAVE_PTHREAD
00370 int32_t nthreads=parallel->get_num_threads();
00371
00372 if (nthreads<2)
00373 {
00374 #endif
00375 for(int32_t i=0;i<num_rows;i++)
00376 cache_kernel_row(rows[i]);
00377 #ifdef HAVE_PTHREAD
00378 }
00379 else
00380 {
00381
00382 int32_t* uncached_rows = SG_MALLOC(int32_t, num_rows);
00383 KERNELCACHE_ELEM** cache = SG_MALLOC(KERNELCACHE_ELEM*, num_rows);
00384 pthread_t* threads = SG_MALLOC(pthread_t, nthreads-1);
00385 S_KTHREAD_PARAM* params = SG_MALLOC(S_KTHREAD_PARAM, nthreads-1);
00386 int32_t num_threads=nthreads-1;
00387 int32_t num_vec=get_num_vec_lhs();
00388 ASSERT(num_vec>0);
00389 uint8_t* needs_computation=SG_CALLOC(uint8_t, num_vec);
00390
00391 int32_t step=0;
00392 int32_t num=0;
00393 int32_t end=0;
00394
00395
00396 for (int32_t i=0; i<num_rows; i++)
00397 {
00398 int32_t idx=rows[i];
00399 if (idx>=num_vec)
00400 idx=2*num_vec-1-idx;
00401
00402 if (kernel_cache_check(idx))
00403 continue;
00404
00405 needs_computation[idx]=1;
00406 uncached_rows[num]=idx;
00407 cache[num]= kernel_cache_clean_and_malloc(idx);
00408
00409 if (!cache[num])
00410 SG_ERROR("Kernel cache full! => increase cache size\n");
00411
00412 num++;
00413 }
00414
00415 if (num>0)
00416 {
00417 step= num/nthreads;
00418
00419 if (step<1)
00420 {
00421 num_threads=num-1;
00422 step=1;
00423 }
00424
00425 for (int32_t t=0; t<num_threads; t++)
00426 {
00427 params[t].kernel = this;
00428 params[t].kernel_cache = &kernel_cache;
00429 params[t].cache = cache;
00430 params[t].uncached_rows = uncached_rows;
00431 params[t].needs_computation = needs_computation;
00432 params[t].num_uncached = num;
00433 params[t].start = t*step;
00434 params[t].end = (t+1)*step;
00435 params[t].num_vectors = get_num_vec_lhs();
00436 end=params[t].end;
00437
00438 int code=pthread_create(&threads[t], NULL,
00439 CKernel::cache_multiple_kernel_row_helper, (void*)¶ms[t]);
00440
00441 if (code != 0)
00442 {
00443 SG_WARNING("Thread creation failed (thread %d of %d) "
00444 "with error:'%s'\n",t, num_threads, strerror(code));
00445 num_threads=t;
00446 end=t*step;
00447 break;
00448 }
00449 }
00450 }
00451 else
00452 num_threads=-1;
00453
00454
00455 S_KTHREAD_PARAM last_param;
00456 last_param.kernel = this;
00457 last_param.kernel_cache = &kernel_cache;
00458 last_param.cache = cache;
00459 last_param.uncached_rows = uncached_rows;
00460 last_param.needs_computation = needs_computation;
00461 last_param.start = end;
00462 last_param.num_uncached = num;
00463 last_param.end = num;
00464 last_param.num_vectors = get_num_vec_lhs();
00465
00466 cache_multiple_kernel_row_helper(&last_param);
00467
00468
00469 for (int32_t t=0; t<num_threads; t++)
00470 {
00471 if (pthread_join(threads[t], NULL) != 0)
00472 SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads);
00473 }
00474
00475 SG_FREE(needs_computation);
00476 SG_FREE(params);
00477 SG_FREE(threads);
00478 SG_FREE(cache);
00479 SG_FREE(uncached_rows);
00480 }
00481 #endif
00482 }
00483
00484
00485
00486 void CKernel::kernel_cache_shrink(
00487 int32_t totdoc, int32_t numshrink, int32_t *after)
00488 {
00489 register int32_t i,j,jj,scount;
00490 KERNELCACHE_IDX from=0,to=0;
00491 int32_t *keep;
00492
00493 keep=SG_MALLOC(int32_t, totdoc);
00494 for(j=0;j<totdoc;j++) {
00495 keep[j]=1;
00496 }
00497 scount=0;
00498 for(jj=0;(jj<kernel_cache.activenum) && (scount<numshrink);jj++) {
00499 j=kernel_cache.active2totdoc[jj];
00500 if(!after[j]) {
00501 scount++;
00502 keep[j]=0;
00503 }
00504 }
00505
00506 for(i=0;i<kernel_cache.max_elems;i++) {
00507 for(jj=0;jj<kernel_cache.activenum;jj++) {
00508 j=kernel_cache.active2totdoc[jj];
00509 if(!keep[j]) {
00510 from++;
00511 }
00512 else {
00513 kernel_cache.buffer[to]=kernel_cache.buffer[from];
00514 to++;
00515 from++;
00516 }
00517 }
00518 }
00519
00520 kernel_cache.activenum=0;
00521 for(j=0;j<totdoc;j++) {
00522 if((keep[j]) && (kernel_cache.totdoc2active[j] != -1)) {
00523 kernel_cache.active2totdoc[kernel_cache.activenum]=j;
00524 kernel_cache.totdoc2active[j]=kernel_cache.activenum;
00525 kernel_cache.activenum++;
00526 }
00527 else {
00528 kernel_cache.totdoc2active[j]=-1;
00529 }
00530 }
00531
00532 kernel_cache.max_elems=
00533 (int32_t)(kernel_cache.buffsize/kernel_cache.activenum);
00534 if(kernel_cache.max_elems>totdoc) {
00535 kernel_cache.max_elems=totdoc;
00536 }
00537
00538 SG_FREE(keep);
00539
00540 }
00541
00542 void CKernel::kernel_cache_reset_lru()
00543 {
00544 int32_t maxlru=0,k;
00545
00546 for(k=0;k<kernel_cache.max_elems;k++) {
00547 if(maxlru < kernel_cache.lru[k])
00548 maxlru=kernel_cache.lru[k];
00549 }
00550 for(k=0;k<kernel_cache.max_elems;k++) {
00551 kernel_cache.lru[k]-=maxlru;
00552 }
00553 }
00554
00555 void CKernel::kernel_cache_cleanup()
00556 {
00557 SG_FREE(kernel_cache.index);
00558 SG_FREE(kernel_cache.occu);
00559 SG_FREE(kernel_cache.lru);
00560 SG_FREE(kernel_cache.invindex);
00561 SG_FREE(kernel_cache.active2totdoc);
00562 SG_FREE(kernel_cache.totdoc2active);
00563 SG_FREE(kernel_cache.buffer);
00564 memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE));
00565 }
00566
00567 int32_t CKernel::kernel_cache_malloc()
00568 {
00569 int32_t i;
00570
00571 if(kernel_cache_space_available()) {
00572 for(i=0;i<kernel_cache.max_elems;i++) {
00573 if(!kernel_cache.occu[i]) {
00574 kernel_cache.occu[i]=1;
00575 kernel_cache.elems++;
00576 return(i);
00577 }
00578 }
00579 }
00580 return(-1);
00581 }
00582
00583 void CKernel::kernel_cache_free(int32_t cacheidx)
00584 {
00585 kernel_cache.occu[cacheidx]=0;
00586 kernel_cache.elems--;
00587 }
00588
00589
00590
00591 int32_t CKernel::kernel_cache_free_lru()
00592 {
00593 register int32_t k,least_elem=-1,least_time;
00594
00595 least_time=kernel_cache.time+1;
00596 for(k=0;k<kernel_cache.max_elems;k++) {
00597 if(kernel_cache.invindex[k] != -1) {
00598 if(kernel_cache.lru[k]<least_time) {
00599 least_time=kernel_cache.lru[k];
00600 least_elem=k;
00601 }
00602 }
00603 }
00604
00605 if(least_elem != -1) {
00606 kernel_cache_free(least_elem);
00607 kernel_cache.index[kernel_cache.invindex[least_elem]]=-1;
00608 kernel_cache.invindex[least_elem]=-1;
00609 return(1);
00610 }
00611 return(0);
00612 }
00613
00614
00615
00616 KERNELCACHE_ELEM* CKernel::kernel_cache_clean_and_malloc(int32_t cacheidx)
00617 {
00618 int32_t result;
00619 if((result = kernel_cache_malloc()) == -1) {
00620 if(kernel_cache_free_lru()) {
00621 result = kernel_cache_malloc();
00622 }
00623 }
00624 kernel_cache.index[cacheidx]=result;
00625 if(result == -1) {
00626 return(0);
00627 }
00628 kernel_cache.invindex[result]=cacheidx;
00629 kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time;
00630 return &kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[cacheidx]];
00631 }
00632 #endif //USE_SVMLIGHT
00633
00634 void CKernel::load(CFile* loader)
00635 {
00636 SG_SET_LOCALE_C;
00637 SG_RESET_LOCALE;
00638 }
00639
00640 void CKernel::save(CFile* writer)
00641 {
00642 SGMatrix<float64_t> k_matrix=get_kernel_matrix<float64_t>();
00643 SG_SET_LOCALE_C;
00644 writer->set_matrix(k_matrix.matrix, k_matrix.num_rows, k_matrix.num_cols);
00645 SG_RESET_LOCALE;
00646 }
00647
00648 void CKernel::remove_lhs_and_rhs()
00649 {
00650 SG_DEBUG("entering CKernel::remove_lhs_and_rhs\n");
00651 if (rhs!=lhs)
00652 SG_UNREF(rhs);
00653 rhs = NULL;
00654 num_rhs=0;
00655
00656 SG_UNREF(lhs);
00657 lhs = NULL;
00658 num_lhs=0;
00659 lhs_equals_rhs=false;
00660
00661 #ifdef USE_SVMLIGHT
00662 cache_reset();
00663 #endif //USE_SVMLIGHT
00664 SG_DEBUG("leaving CKernel::remove_lhs_and_rhs\n");
00665 }
00666
00667 void CKernel::remove_lhs()
00668 {
00669 if (rhs==lhs)
00670 rhs=NULL;
00671 SG_UNREF(lhs);
00672 lhs = NULL;
00673 num_lhs=0;
00674 lhs_equals_rhs=false;
00675 #ifdef USE_SVMLIGHT
00676 cache_reset();
00677 #endif //USE_SVMLIGHT
00678 }
00679
00681 void CKernel::remove_rhs()
00682 {
00683 if (rhs!=lhs)
00684 SG_UNREF(rhs);
00685 rhs = NULL;
00686 num_rhs=0;
00687 lhs_equals_rhs=false;
00688
00689 #ifdef USE_SVMLIGHT
00690 cache_reset();
00691 #endif //USE_SVMLIGHT
00692 }
00693
00694 #define ENUM_CASE(n) case n: SG_INFO(#n " "); break;
00695
00696 void CKernel::list_kernel()
00697 {
00698 SG_INFO( "%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
00699 get_combined_kernel_weight(),
00700 get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
00701 "SLOWBUTMEMEFFICIENT");
00702
00703 switch (get_kernel_type())
00704 {
00705 ENUM_CASE(K_UNKNOWN)
00706 ENUM_CASE(K_LINEAR)
00707 ENUM_CASE(K_POLY)
00708 ENUM_CASE(K_GAUSSIAN)
00709 ENUM_CASE(K_GAUSSIANSHIFT)
00710 ENUM_CASE(K_GAUSSIANMATCH)
00711 ENUM_CASE(K_HISTOGRAM)
00712 ENUM_CASE(K_SALZBERG)
00713 ENUM_CASE(K_LOCALITYIMPROVED)
00714 ENUM_CASE(K_SIMPLELOCALITYIMPROVED)
00715 ENUM_CASE(K_FIXEDDEGREE)
00716 ENUM_CASE(K_WEIGHTEDDEGREE)
00717 ENUM_CASE(K_WEIGHTEDDEGREEPOS)
00718 ENUM_CASE(K_WEIGHTEDDEGREERBF)
00719 ENUM_CASE(K_WEIGHTEDCOMMWORDSTRING)
00720 ENUM_CASE(K_POLYMATCH)
00721 ENUM_CASE(K_ALIGNMENT)
00722 ENUM_CASE(K_COMMWORDSTRING)
00723 ENUM_CASE(K_COMMULONGSTRING)
00724 ENUM_CASE(K_SPECTRUMRBF)
00725 ENUM_CASE(K_COMBINED)
00726 ENUM_CASE(K_AUC)
00727 ENUM_CASE(K_CUSTOM)
00728 ENUM_CASE(K_SIGMOID)
00729 ENUM_CASE(K_CHI2)
00730 ENUM_CASE(K_DIAG)
00731 ENUM_CASE(K_CONST)
00732 ENUM_CASE(K_DISTANCE)
00733 ENUM_CASE(K_LOCALALIGNMENT)
00734 ENUM_CASE(K_PYRAMIDCHI2)
00735 ENUM_CASE(K_OLIGO)
00736 ENUM_CASE(K_MATCHWORD)
00737 ENUM_CASE(K_TPPK)
00738 ENUM_CASE(K_REGULATORYMODULES)
00739 ENUM_CASE(K_SPARSESPATIALSAMPLE)
00740 ENUM_CASE(K_HISTOGRAMINTERSECTION)
00741 ENUM_CASE(K_WAVELET)
00742 ENUM_CASE(K_WAVE)
00743 ENUM_CASE(K_CAUCHY)
00744 ENUM_CASE(K_TSTUDENT)
00745 ENUM_CASE(K_MULTIQUADRIC)
00746 ENUM_CASE(K_EXPONENTIAL)
00747 ENUM_CASE(K_RATIONAL_QUADRATIC)
00748 ENUM_CASE(K_POWER)
00749 ENUM_CASE(K_SPHERICAL)
00750 ENUM_CASE(K_LOG)
00751 ENUM_CASE(K_SPLINE)
00752 ENUM_CASE(K_ANOVA)
00753 ENUM_CASE(K_CIRCULAR)
00754 ENUM_CASE(K_INVERSEMULTIQUADRIC)
00755 ENUM_CASE(K_SPECTRUMMISMATCHRBF)
00756 ENUM_CASE(K_DISTANTSEGMENTS)
00757 ENUM_CASE(K_BESSEL)
00758 ENUM_CASE(K_JENSENSHANNON)
00759 ENUM_CASE(K_DIRECTOR)
00760 ENUM_CASE(K_PRODUCT)
00761 ENUM_CASE(K_LINEARARD)
00762 ENUM_CASE(K_GAUSSIANARD)
00763 ENUM_CASE(K_STREAMING)
00764 }
00765
00766 switch (get_feature_class())
00767 {
00768 ENUM_CASE(C_UNKNOWN)
00769 ENUM_CASE(C_DENSE)
00770 ENUM_CASE(C_SPARSE)
00771 ENUM_CASE(C_STRING)
00772 ENUM_CASE(C_STREAMING_DENSE)
00773 ENUM_CASE(C_STREAMING_SPARSE)
00774 ENUM_CASE(C_STREAMING_STRING)
00775 ENUM_CASE(C_STREAMING_VW)
00776 ENUM_CASE(C_COMBINED)
00777 ENUM_CASE(C_COMBINED_DOT)
00778 ENUM_CASE(C_WD)
00779 ENUM_CASE(C_SPEC)
00780 ENUM_CASE(C_WEIGHTEDSPEC)
00781 ENUM_CASE(C_POLY)
00782 ENUM_CASE(C_BINNED_DOT)
00783 ENUM_CASE(C_DIRECTOR_DOT)
00784 ENUM_CASE(C_LATENT)
00785 ENUM_CASE(C_ANY)
00786 }
00787
00788 switch (get_feature_type())
00789 {
00790 ENUM_CASE(F_UNKNOWN)
00791 ENUM_CASE(F_BOOL)
00792 ENUM_CASE(F_CHAR)
00793 ENUM_CASE(F_BYTE)
00794 ENUM_CASE(F_SHORT)
00795 ENUM_CASE(F_WORD)
00796 ENUM_CASE(F_INT)
00797 ENUM_CASE(F_UINT)
00798 ENUM_CASE(F_LONG)
00799 ENUM_CASE(F_ULONG)
00800 ENUM_CASE(F_SHORTREAL)
00801 ENUM_CASE(F_DREAL)
00802 ENUM_CASE(F_LONGREAL)
00803 ENUM_CASE(F_ANY)
00804 }
00805 SG_INFO( "\n");
00806 }
00807 #undef ENUM_CASE
00808
00809 bool CKernel::init_optimization(
00810 int32_t count, int32_t *IDX, float64_t * weights)
00811 {
00812 SG_ERROR( "kernel does not support linadd optimization\n");
00813 return false ;
00814 }
00815
00816 bool CKernel::delete_optimization()
00817 {
00818 SG_ERROR( "kernel does not support linadd optimization\n");
00819 return false;
00820 }
00821
00822 float64_t CKernel::compute_optimized(int32_t vector_idx)
00823 {
00824 SG_ERROR( "kernel does not support linadd optimization\n");
00825 return 0;
00826 }
00827
00828 void CKernel::compute_batch(
00829 int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec,
00830 int32_t* IDX, float64_t* weights, float64_t factor)
00831 {
00832 SG_ERROR( "kernel does not support batch computation\n");
00833 }
00834
00835 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight)
00836 {
00837 SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n");
00838 }
00839
00840 void CKernel::clear_normal()
00841 {
00842 SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n");
00843 }
00844
00845 int32_t CKernel::get_num_subkernels()
00846 {
00847 return 1;
00848 }
00849
00850 void CKernel::compute_by_subkernel(
00851 int32_t vector_idx, float64_t * subkernel_contrib)
00852 {
00853 SG_ERROR( "kernel compute_by_subkernel not implemented\n");
00854 }
00855
00856 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights)
00857 {
00858 num_weights=1 ;
00859 return &combined_kernel_weight ;
00860 }
00861
00862 void CKernel::set_subkernel_weights(const SGVector<float64_t> weights)
00863 {
00864 ASSERT(weights.vector);
00865 if (weights.vlen!=1)
00866 SG_ERROR( "number of subkernel weights should be one ...\n");
00867
00868 combined_kernel_weight = weights.vector[0] ;
00869 }
00870
00871 bool CKernel::init_optimization_svm(CSVM * svm)
00872 {
00873 int32_t num_suppvec=svm->get_num_support_vectors();
00874 int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
00875 float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
00876
00877 for (int32_t i=0; i<num_suppvec; i++)
00878 {
00879 sv_idx[i] = svm->get_support_vector(i);
00880 sv_weight[i] = svm->get_alpha(i);
00881 }
00882 bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
00883
00884 SG_FREE(sv_idx);
00885 SG_FREE(sv_weight);
00886 return ret;
00887 }
00888
00889 void CKernel::load_serializable_post() throw (ShogunException)
00890 {
00891 CSGObject::load_serializable_post();
00892 if (lhs_equals_rhs)
00893 rhs=lhs;
00894 }
00895
00896 void CKernel::save_serializable_pre() throw (ShogunException)
00897 {
00898 CSGObject::save_serializable_pre();
00899
00900 if (lhs_equals_rhs)
00901 rhs=NULL;
00902 }
00903
00904 void CKernel::save_serializable_post() throw (ShogunException)
00905 {
00906 CSGObject::save_serializable_post();
00907
00908 if (lhs_equals_rhs)
00909 rhs=lhs;
00910 }
00911
00912 void CKernel::register_params() {
00913 SG_ADD(&cache_size, "cache_size",
00914 "Cache size in MB.", MS_NOT_AVAILABLE);
00915 SG_ADD((CSGObject**) &lhs, "lhs",
00916 "Feature vectors to occur on left hand side.", MS_NOT_AVAILABLE);
00917 SG_ADD((CSGObject**) &rhs, "rhs",
00918 "Feature vectors to occur on right hand side.", MS_NOT_AVAILABLE);
00919 SG_ADD(&lhs_equals_rhs, "lhs_equals_rhs",
00920 "If features on lhs are the same as on rhs.", MS_NOT_AVAILABLE);
00921 SG_ADD(&num_lhs, "num_lhs", "Number of feature vectors on left hand side.",
00922 MS_NOT_AVAILABLE);
00923 SG_ADD(&num_rhs, "num_rhs", "Number of feature vectors on right hand side.",
00924 MS_NOT_AVAILABLE);
00925 SG_ADD(&combined_kernel_weight, "combined_kernel_weight",
00926 "Combined kernel weight.", MS_AVAILABLE);
00927 SG_ADD(&optimization_initialized, "optimization_initialized",
00928 "Optimization is initialized.", MS_NOT_AVAILABLE);
00929 SG_ADD((machine_int_t*) &opt_type, "opt_type",
00930 "Optimization type.", MS_NOT_AVAILABLE);
00931 SG_ADD(&properties, "properties", "Kernel properties.", MS_NOT_AVAILABLE);
00932 SG_ADD((CSGObject**) &normalizer, "normalizer", "Normalize the kernel.",
00933 MS_AVAILABLE);
00934 }
00935
00936
00937 void CKernel::init()
00938 {
00939 cache_size=10;
00940 kernel_matrix=NULL;
00941 lhs=NULL;
00942 rhs=NULL;
00943 num_lhs=0;
00944 num_rhs=0;
00945 combined_kernel_weight=1;
00946 optimization_initialized=false;
00947 opt_type=FASTBUTMEMHUNGRY;
00948 properties=KP_NONE;
00949 normalizer=NULL;
00950
00951 #ifdef USE_SVMLIGHT
00952 memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE));
00953 #endif //USE_SVMLIGHT
00954
00955 set_normalizer(new CIdentityKernelNormalizer());
00956 }
00957
00958 SGMatrix<float64_t> CKernel::get_parameter_gradient(TParameter* param,
00959 CSGObject* obj, index_t index)
00960 {
00961 return SGMatrix<float64_t>();
00962 }