Kernel.cpp

Go to the documentation of this file.
00001 /*
00002  * EXCEPT FOR THE KERNEL CACHING FUNCTIONS WHICH ARE (W) THORSTEN JOACHIMS
00003  * COPYRIGHT (C) 1999  UNIVERSITAET DORTMUND - ALL RIGHTS RESERVED
00004  *
00005  * this program is free software; you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation; either version 3 of the License, or
00008  * (at your option) any later version.
00009  *
00010  * Written (W) 1999-2009 Soeren Sonnenburg
00011  * Written (W) 1999-2008 Gunnar Raetsch
00012  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00013  */
00014 
00015 #include <shogun/lib/config.h>
00016 #include <shogun/lib/common.h>
00017 #include <shogun/io/SGIO.h>
00018 #include <shogun/io/File.h>
00019 #include <shogun/lib/Time.h>
00020 #include <shogun/lib/Signal.h>
00021 
00022 #include <shogun/base/Parallel.h>
00023 
00024 #include <shogun/kernel/Kernel.h>
00025 #include <shogun/kernel/IdentityKernelNormalizer.h>
00026 #include <shogun/features/Features.h>
00027 #include <shogun/base/Parameter.h>
00028 
00029 #include <shogun/classifier/svm/SVM.h>
00030 
00031 #include <string.h>
00032 #include <unistd.h>
00033 #include <math.h>
00034 
00035 #ifdef HAVE_PTHREAD
00036 #include <pthread.h>
00037 #endif
00038 
00039 using namespace shogun;
00040 
00041 CKernel::CKernel() : CSGObject()
00042 {
00043     init();
00044     register_params();
00045 }
00046 
00047 CKernel::CKernel(int32_t size) : CSGObject()
00048 {
00049     init();
00050     
00051     if (size<10)
00052         size=10;
00053 
00054     cache_size=size;
00055     register_params();
00056 }
00057 
00058 
00059 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject()
00060 {
00061     init();
00062 
00063     if (size<10)
00064         size=10;
00065 
00066     cache_size=size;
00067 
00068     set_normalizer(new CIdentityKernelNormalizer());
00069     init(p_lhs, p_rhs);
00070     register_params();
00071 }
00072 
00073 CKernel::~CKernel()
00074 {
00075     if (get_is_initialized())
00076         SG_ERROR("Kernel still initialized on destruction.\n");
00077 
00078     remove_lhs_and_rhs();
00079     SG_UNREF(normalizer);
00080 
00081     SG_INFO("Kernel deleted (%p).\n", this);
00082 }
00083 
00084 #ifdef USE_SVMLIGHT
00085 void CKernel::resize_kernel_cache(KERNELCACHE_IDX size, bool regression_hack)
00086 {
00087     if (size<10)
00088         size=10;
00089 
00090     kernel_cache_cleanup();
00091     cache_size=size;
00092 
00093     if (has_features() && get_num_vec_lhs())
00094         kernel_cache_init(cache_size, regression_hack);
00095 }
00096 #endif //USE_SVMLIGHT
00097 
00098 bool CKernel::init(CFeatures* l, CFeatures* r)
00099 {
00100     //make sure features were indeed supplied
00101     ASSERT(l);
00102     ASSERT(r);
00103 
00104     //make sure features are compatible
00105     ASSERT(l->get_feature_class()==r->get_feature_class());
00106     ASSERT(l->get_feature_type()==r->get_feature_type());
00107 
00108     //remove references to previous features
00109     remove_lhs_and_rhs();
00110 
00111     //increase reference counts
00112     SG_REF(l);
00113     if (l==r)
00114         lhs_equals_rhs=true;
00115     else // l!=r
00116         SG_REF(r);
00117 
00118     lhs=l;
00119     rhs=r;
00120 
00121     ASSERT(!num_lhs || num_lhs==l->get_num_vectors());
00122     ASSERT(!num_rhs || num_rhs==l->get_num_vectors());
00123 
00124     num_lhs=l->get_num_vectors();
00125     num_rhs=r->get_num_vectors();
00126 
00127     return true;
00128 }
00129 
00130 bool CKernel::set_normalizer(CKernelNormalizer* n)
00131 {
00132     SG_REF(n);
00133     if (lhs && rhs)
00134         n->init(this);
00135 
00136     SG_UNREF(normalizer);
00137     normalizer=n;
00138 
00139     return (normalizer!=NULL);
00140 }
00141 
00142 CKernelNormalizer* CKernel::get_normalizer()
00143 {
00144     SG_REF(normalizer)
00145     return normalizer;
00146 }
00147 
00148 bool CKernel::init_normalizer()
00149 {
00150     return normalizer->init(this);
00151 }
00152 
00153 void CKernel::cleanup()
00154 {
00155     remove_lhs_and_rhs();
00156 }
00157 
00158 #ifdef USE_SVMLIGHT
00159 /****************************** Cache handling *******************************/
00160 
00161 void CKernel::kernel_cache_init(int32_t buffsize, bool regression_hack)
00162 {
00163     int32_t totdoc=get_num_vec_lhs();
00164     if (totdoc<=0)
00165     {
00166         SG_ERROR("kernel has zero rows: num_lhs=%d num_rhs=%d\n",
00167                 get_num_vec_lhs(), get_num_vec_rhs());
00168     }
00169     uint64_t buffer_size=0;
00170     int32_t i;
00171 
00172     //in regression the additional constraints are made by doubling the training data
00173     if (regression_hack)
00174         totdoc*=2;
00175 
00176     buffer_size=((uint64_t) buffsize)*1024*1024/sizeof(KERNELCACHE_ELEM);
00177     if (buffer_size>((uint64_t) totdoc)*totdoc)
00178         buffer_size=((uint64_t) totdoc)*totdoc;
00179 
00180     SG_INFO( "using a kernel cache of size %lld MB (%lld bytes) for %s Kernel\n", buffer_size*sizeof(KERNELCACHE_ELEM)/1024/1024, buffer_size*sizeof(KERNELCACHE_ELEM), get_name());
00181 
00182     //make sure it fits in the *signed* KERNELCACHE_IDX type
00183     ASSERT(buffer_size < (((uint64_t) 1) << (sizeof(KERNELCACHE_IDX)*8-1)));
00184 
00185     kernel_cache.index = SG_MALLOC(int32_t, totdoc);
00186     kernel_cache.occu = SG_MALLOC(int32_t, totdoc);
00187     kernel_cache.lru = SG_MALLOC(int32_t, totdoc);
00188     kernel_cache.invindex = SG_MALLOC(int32_t, totdoc);
00189     kernel_cache.active2totdoc = SG_MALLOC(int32_t, totdoc);
00190     kernel_cache.totdoc2active = SG_MALLOC(int32_t, totdoc);
00191     kernel_cache.buffer = SG_MALLOC(KERNELCACHE_ELEM, buffer_size);
00192     kernel_cache.buffsize=buffer_size;
00193     kernel_cache.max_elems=(int32_t) (kernel_cache.buffsize/totdoc);
00194 
00195     if(kernel_cache.max_elems>totdoc) {
00196         kernel_cache.max_elems=totdoc;
00197     }
00198 
00199     kernel_cache.elems=0;   // initialize cache
00200     for(i=0;i<totdoc;i++) {
00201         kernel_cache.index[i]=-1;
00202         kernel_cache.lru[i]=0;
00203     }
00204     for(i=0;i<totdoc;i++) {
00205         kernel_cache.occu[i]=0;
00206         kernel_cache.invindex[i]=-1;
00207     }
00208 
00209     kernel_cache.activenum=totdoc;;
00210     for(i=0;i<totdoc;i++) {
00211         kernel_cache.active2totdoc[i]=i;
00212         kernel_cache.totdoc2active[i]=i;
00213     }
00214 
00215     kernel_cache.time=0;
00216 }
00217 
00218 void CKernel::get_kernel_row(
00219     int32_t docnum, int32_t *active2dnum, float64_t *buffer, bool full_line)
00220 {
00221     int32_t i,j;
00222     KERNELCACHE_IDX start;
00223 
00224     int32_t num_vectors = get_num_vec_lhs();
00225     if (docnum>=num_vectors)
00226         docnum=2*num_vectors-1-docnum;
00227 
00228     /* is cached? */
00229     if(kernel_cache.index[docnum] != -1)
00230     {
00231         kernel_cache.lru[kernel_cache.index[docnum]]=kernel_cache.time; /* lru */
00232         start=((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[docnum];
00233 
00234         if (full_line)
00235         {
00236             for(j=0;j<get_num_vec_lhs();j++)
00237             {
00238                 if(kernel_cache.totdoc2active[j] >= 0)
00239                     buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]];
00240                 else
00241                     buffer[j]=(float64_t) kernel(docnum, j);
00242             }
00243         }
00244         else
00245         {
00246             for(i=0;(j=active2dnum[i])>=0;i++)
00247             {
00248                 if(kernel_cache.totdoc2active[j] >= 0)
00249                     buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]];
00250                 else
00251                 {
00252                     int32_t k=j;
00253                     if (k>=num_vectors)
00254                         k=2*num_vectors-1-k;
00255                     buffer[j]=(float64_t) kernel(docnum, k);
00256                 }
00257             }
00258         }
00259     }
00260     else
00261     {
00262         if (full_line)
00263         {
00264             for(j=0;j<get_num_vec_lhs();j++)
00265                 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, j);
00266         }
00267         else
00268         {
00269             for(i=0;(j=active2dnum[i])>=0;i++)
00270             {
00271                 int32_t k=j;
00272                 if (k>=num_vectors)
00273                     k=2*num_vectors-1-k;
00274                 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, k);
00275             }
00276         }
00277     }
00278 }
00279 
00280 
00281 // Fills cache for the row m
00282 void CKernel::cache_kernel_row(int32_t m)
00283 {
00284     register int32_t j,k,l;
00285     register KERNELCACHE_ELEM *cache;
00286 
00287     int32_t num_vectors = get_num_vec_lhs();
00288 
00289     if (m>=num_vectors)
00290         m=2*num_vectors-1-m;
00291 
00292     if(!kernel_cache_check(m))   // not cached yet
00293     {
00294         cache = kernel_cache_clean_and_malloc(m);
00295         if(cache) {
00296             l=kernel_cache.totdoc2active[m];
00297 
00298             for(j=0;j<kernel_cache.activenum;j++)  // fill cache
00299             {
00300                 k=kernel_cache.active2totdoc[j];
00301 
00302                 if((kernel_cache.index[k] != -1) && (l != -1) && (k != m)) {
00303                     cache[j]=kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)
00304                         *kernel_cache.index[k]+l];
00305                 }
00306                 else
00307                 {
00308                     if (k>=num_vectors)
00309                         k=2*num_vectors-1-k;
00310 
00311                     cache[j]=kernel(m, k);
00312                 }
00313             }
00314         }
00315         else
00316             perror("Error: Kernel cache full! => increase cache size");
00317     }
00318 }
00319 
00320 
00321 void* CKernel::cache_multiple_kernel_row_helper(void* p)
00322 {
00323     int32_t j,k,l;
00324     S_KTHREAD_PARAM* params = (S_KTHREAD_PARAM*) p;
00325 
00326     for (int32_t i=params->start; i<params->end; i++)
00327     {
00328         KERNELCACHE_ELEM* cache=params->cache[i];
00329         int32_t m = params->uncached_rows[i];
00330         l=params->kernel_cache->totdoc2active[m];
00331 
00332         for(j=0;j<params->kernel_cache->activenum;j++)  // fill cache
00333         {
00334             k=params->kernel_cache->active2totdoc[j];
00335 
00336             if((params->kernel_cache->index[k] != -1) && (l != -1) && (!params->needs_computation[k])) {
00337                 cache[j]=params->kernel_cache->buffer[((KERNELCACHE_IDX) params->kernel_cache->activenum)
00338                     *params->kernel_cache->index[k]+l];
00339             }
00340             else
00341                 {
00342                     if (k>=params->num_vectors)
00343                         k=2*params->num_vectors-1-k;
00344 
00345                     cache[j]=params->kernel->kernel(m, k);
00346                 }
00347         }
00348 
00349         //now line m is cached
00350         params->needs_computation[m]=0;
00351     }
00352     return NULL;
00353 }
00354 
00355 // Fills cache for the rows in key
00356 void CKernel::cache_multiple_kernel_rows(int32_t* rows, int32_t num_rows)
00357 {
00358 #ifdef HAVE_PTHREAD
00359     int32_t nthreads=parallel->get_num_threads();
00360 
00361     if (nthreads<2)
00362     {
00363 #endif
00364         for(int32_t i=0;i<num_rows;i++)
00365             cache_kernel_row(rows[i]);
00366 #ifdef HAVE_PTHREAD
00367     }
00368     else
00369     {
00370         // fill up kernel cache
00371         int32_t* uncached_rows = SG_MALLOC(int32_t, num_rows);
00372         KERNELCACHE_ELEM** cache = SG_MALLOC(KERNELCACHE_ELEM*, num_rows);
00373         pthread_t* threads = SG_MALLOC(pthread_t, nthreads-1);
00374         S_KTHREAD_PARAM* params = SG_MALLOC(S_KTHREAD_PARAM, nthreads-1);
00375         int32_t num_threads=nthreads-1;
00376         int32_t num_vec=get_num_vec_lhs();
00377         ASSERT(num_vec>0);
00378         uint8_t* needs_computation=SG_CALLOC(uint8_t, num_vec);
00379 
00380         int32_t step=0;
00381         int32_t num=0;
00382         int32_t end=0;
00383 
00384         // allocate cachelines if necessary
00385         for (int32_t i=0; i<num_rows; i++)
00386         {
00387             int32_t idx=rows[i];
00388             if (idx>=num_vec)
00389                 idx=2*num_vec-1-idx;
00390 
00391             if (kernel_cache_check(idx))
00392                 continue;
00393 
00394             needs_computation[idx]=1;
00395             uncached_rows[num]=idx;
00396             cache[num]= kernel_cache_clean_and_malloc(idx);
00397 
00398             if (!cache[num])
00399                 SG_ERROR("Kernel cache full! => increase cache size\n");
00400 
00401             num++;
00402         }
00403 
00404         if (num>0)
00405         {
00406             step= num/nthreads;
00407 
00408             if (step<1)
00409             {
00410                 num_threads=num-1;
00411                 step=1;
00412             }
00413 
00414             for (int32_t t=0; t<num_threads; t++)
00415             {
00416                 params[t].kernel = this;
00417                 params[t].kernel_cache = &kernel_cache;
00418                 params[t].cache = cache;
00419                 params[t].uncached_rows = uncached_rows;
00420                 params[t].needs_computation = needs_computation;
00421                 params[t].num_uncached = num;
00422                 params[t].start = t*step;
00423                 params[t].end = (t+1)*step;
00424                 params[t].num_vectors = get_num_vec_lhs();
00425                 end=params[t].end;
00426 
00427                 int code=pthread_create(&threads[t], NULL,
00428                         CKernel::cache_multiple_kernel_row_helper, (void*)&params[t]);
00429 
00430                 if (code != 0)
00431                 {
00432                     SG_WARNING("Thread creation failed (thread %d of %d) "
00433                             "with error:'%s'\n",t, num_threads, strerror(code));
00434                     num_threads=t;
00435                     end=t*step;
00436                     break;
00437                 }
00438             }
00439         }
00440         else
00441             num_threads=-1;
00442 
00443 
00444         S_KTHREAD_PARAM last_param;
00445         last_param.kernel = this;
00446         last_param.kernel_cache = &kernel_cache;
00447         last_param.cache = cache;
00448         last_param.uncached_rows = uncached_rows;
00449         last_param.needs_computation = needs_computation;
00450         last_param.start = end;
00451         last_param.num_uncached = num;
00452         last_param.end = num;
00453         last_param.num_vectors = get_num_vec_lhs();
00454 
00455         cache_multiple_kernel_row_helper(&last_param);
00456 
00457 
00458         for (int32_t t=0; t<num_threads; t++)
00459         {
00460             if (pthread_join(threads[t], NULL) != 0)
00461                 SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads);
00462         }
00463 
00464         SG_FREE(needs_computation);
00465         SG_FREE(params);
00466         SG_FREE(threads);
00467         SG_FREE(cache);
00468         SG_FREE(uncached_rows);
00469     }
00470 #endif
00471 }
00472 
00473 // remove numshrink columns in the cache
00474 // which correspond to examples marked
00475 void CKernel::kernel_cache_shrink(
00476     int32_t totdoc, int32_t numshrink, int32_t *after)
00477 {
00478     register int32_t i,j,jj,scount;     // 0 in after.
00479     KERNELCACHE_IDX from=0,to=0;
00480     int32_t *keep;
00481 
00482     keep=SG_MALLOC(int32_t, totdoc);
00483     for(j=0;j<totdoc;j++) {
00484         keep[j]=1;
00485     }
00486     scount=0;
00487     for(jj=0;(jj<kernel_cache.activenum) && (scount<numshrink);jj++) {
00488         j=kernel_cache.active2totdoc[jj];
00489         if(!after[j]) {
00490             scount++;
00491             keep[j]=0;
00492         }
00493     }
00494 
00495     for(i=0;i<kernel_cache.max_elems;i++) {
00496         for(jj=0;jj<kernel_cache.activenum;jj++) {
00497             j=kernel_cache.active2totdoc[jj];
00498             if(!keep[j]) {
00499                 from++;
00500             }
00501             else {
00502                 kernel_cache.buffer[to]=kernel_cache.buffer[from];
00503                 to++;
00504                 from++;
00505             }
00506         }
00507     }
00508 
00509     kernel_cache.activenum=0;
00510     for(j=0;j<totdoc;j++) {
00511         if((keep[j]) && (kernel_cache.totdoc2active[j] != -1)) {
00512             kernel_cache.active2totdoc[kernel_cache.activenum]=j;
00513             kernel_cache.totdoc2active[j]=kernel_cache.activenum;
00514             kernel_cache.activenum++;
00515         }
00516         else {
00517             kernel_cache.totdoc2active[j]=-1;
00518         }
00519     }
00520 
00521     kernel_cache.max_elems=
00522         (int32_t)(kernel_cache.buffsize/kernel_cache.activenum);
00523     if(kernel_cache.max_elems>totdoc) {
00524         kernel_cache.max_elems=totdoc;
00525     }
00526 
00527     SG_FREE(keep);
00528 
00529 }
00530 
00531 void CKernel::kernel_cache_reset_lru()
00532 {
00533     int32_t maxlru=0,k;
00534 
00535     for(k=0;k<kernel_cache.max_elems;k++) {
00536         if(maxlru < kernel_cache.lru[k])
00537             maxlru=kernel_cache.lru[k];
00538     }
00539     for(k=0;k<kernel_cache.max_elems;k++) {
00540         kernel_cache.lru[k]-=maxlru;
00541     }
00542 }
00543 
00544 void CKernel::kernel_cache_cleanup()
00545 {
00546     SG_FREE(kernel_cache.index);
00547     SG_FREE(kernel_cache.occu);
00548     SG_FREE(kernel_cache.lru);
00549     SG_FREE(kernel_cache.invindex);
00550     SG_FREE(kernel_cache.active2totdoc);
00551     SG_FREE(kernel_cache.totdoc2active);
00552     SG_FREE(kernel_cache.buffer);
00553     memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE));
00554 }
00555 
00556 int32_t CKernel::kernel_cache_malloc()
00557 {
00558   int32_t i;
00559 
00560   if(kernel_cache_space_available()) {
00561     for(i=0;i<kernel_cache.max_elems;i++) {
00562       if(!kernel_cache.occu[i]) {
00563     kernel_cache.occu[i]=1;
00564     kernel_cache.elems++;
00565     return(i);
00566       }
00567     }
00568   }
00569   return(-1);
00570 }
00571 
00572 void CKernel::kernel_cache_free(int32_t cacheidx)
00573 {
00574     kernel_cache.occu[cacheidx]=0;
00575     kernel_cache.elems--;
00576 }
00577 
00578 // remove least recently used cache
00579 // element
00580 int32_t CKernel::kernel_cache_free_lru()
00581 {
00582   register int32_t k,least_elem=-1,least_time;
00583 
00584   least_time=kernel_cache.time+1;
00585   for(k=0;k<kernel_cache.max_elems;k++) {
00586     if(kernel_cache.invindex[k] != -1) {
00587       if(kernel_cache.lru[k]<least_time) {
00588     least_time=kernel_cache.lru[k];
00589     least_elem=k;
00590       }
00591     }
00592   }
00593 
00594   if(least_elem != -1) {
00595     kernel_cache_free(least_elem);
00596     kernel_cache.index[kernel_cache.invindex[least_elem]]=-1;
00597     kernel_cache.invindex[least_elem]=-1;
00598     return(1);
00599   }
00600   return(0);
00601 }
00602 
00603 // Get a free cache entry. In case cache is full, the lru
00604 // element is removed.
00605 KERNELCACHE_ELEM* CKernel::kernel_cache_clean_and_malloc(int32_t cacheidx)
00606 {
00607     int32_t result;
00608     if((result = kernel_cache_malloc()) == -1) {
00609         if(kernel_cache_free_lru()) {
00610             result = kernel_cache_malloc();
00611         }
00612     }
00613     kernel_cache.index[cacheidx]=result;
00614     if(result == -1) {
00615         return(0);
00616     }
00617     kernel_cache.invindex[result]=cacheidx;
00618     kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time; // lru
00619     return &kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[cacheidx]];
00620 }
00621 #endif //USE_SVMLIGHT
00622 
00623 void CKernel::load(CFile* loader)
00624 {
00625     SG_SET_LOCALE_C;
00626     SG_RESET_LOCALE;
00627 }
00628 
00629 void CKernel::save(CFile* writer)
00630 {
00631     SGMatrix<float64_t> k_matrix=get_kernel_matrix<float64_t>();
00632     SG_SET_LOCALE_C;
00633     writer->set_matrix(k_matrix.matrix, k_matrix.num_rows, k_matrix.num_cols);
00634     SG_FREE(k_matrix.matrix);
00635     SG_RESET_LOCALE;
00636 }
00637 
00638 void CKernel::remove_lhs_and_rhs()
00639 {
00640     if (rhs!=lhs)
00641         SG_UNREF(rhs);
00642     rhs = NULL;
00643     num_rhs=0;
00644 
00645     SG_UNREF(lhs);
00646     lhs = NULL;
00647     num_lhs=0;
00648     lhs_equals_rhs=false;
00649 
00650 #ifdef USE_SVMLIGHT
00651     cache_reset();
00652 #endif //USE_SVMLIGHT
00653 }
00654 
00655 void CKernel::remove_lhs()
00656 {
00657     if (rhs==lhs)
00658         rhs=NULL;
00659     SG_UNREF(lhs);
00660     lhs = NULL;
00661     num_lhs=0;
00662     lhs_equals_rhs=false;
00663 #ifdef USE_SVMLIGHT
00664     cache_reset();
00665 #endif //USE_SVMLIGHT
00666 }
00667 
00669 void CKernel::remove_rhs()
00670 {
00671     if (rhs!=lhs)
00672         SG_UNREF(rhs);
00673     rhs = NULL;
00674     num_rhs=0;
00675     lhs_equals_rhs=false;
00676 
00677 #ifdef USE_SVMLIGHT
00678     cache_reset();
00679 #endif //USE_SVMLIGHT
00680 }
00681 
00682 #define ENUM_CASE(n) case n: SG_INFO(#n " "); break;
00683 
00684 void CKernel::list_kernel()
00685 {
00686     SG_INFO( "%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
00687             get_combined_kernel_weight(),
00688             get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
00689             "SLOWBUTMEMEFFICIENT");
00690 
00691     switch (get_kernel_type())
00692     {
00693         ENUM_CASE(K_UNKNOWN)
00694         ENUM_CASE(K_LINEAR)
00695         ENUM_CASE(K_POLY)
00696         ENUM_CASE(K_GAUSSIAN)
00697         ENUM_CASE(K_GAUSSIANSHIFT)
00698         ENUM_CASE(K_GAUSSIANMATCH)
00699         ENUM_CASE(K_HISTOGRAM)
00700         ENUM_CASE(K_SALZBERG)
00701         ENUM_CASE(K_LOCALITYIMPROVED)
00702         ENUM_CASE(K_SIMPLELOCALITYIMPROVED)
00703         ENUM_CASE(K_FIXEDDEGREE)
00704         ENUM_CASE(K_WEIGHTEDDEGREE)
00705         ENUM_CASE(K_WEIGHTEDDEGREEPOS)
00706         ENUM_CASE(K_WEIGHTEDDEGREERBF)
00707         ENUM_CASE(K_WEIGHTEDCOMMWORDSTRING)
00708         ENUM_CASE(K_POLYMATCH)
00709         ENUM_CASE(K_ALIGNMENT)
00710         ENUM_CASE(K_COMMWORDSTRING)
00711         ENUM_CASE(K_COMMULONGSTRING)
00712         ENUM_CASE(K_SPECTRUMRBF)
00713         ENUM_CASE(K_COMBINED)
00714         ENUM_CASE(K_AUC)
00715         ENUM_CASE(K_CUSTOM)
00716         ENUM_CASE(K_SIGMOID)
00717         ENUM_CASE(K_CHI2)
00718         ENUM_CASE(K_DIAG)
00719         ENUM_CASE(K_CONST)
00720         ENUM_CASE(K_DISTANCE)
00721         ENUM_CASE(K_LOCALALIGNMENT)
00722         ENUM_CASE(K_PYRAMIDCHI2)
00723         ENUM_CASE(K_OLIGO)
00724         ENUM_CASE(K_MATCHWORD)
00725         ENUM_CASE(K_TPPK)
00726         ENUM_CASE(K_REGULATORYMODULES)
00727         ENUM_CASE(K_SPARSESPATIALSAMPLE)
00728         ENUM_CASE(K_HISTOGRAMINTERSECTION)
00729         ENUM_CASE(K_WAVELET)
00730         ENUM_CASE(K_WAVE)
00731         ENUM_CASE(K_CAUCHY)
00732         ENUM_CASE(K_TSTUDENT)
00733         ENUM_CASE(K_MULTIQUADRIC)
00734         ENUM_CASE(K_EXPONENTIAL)
00735         ENUM_CASE(K_RATIONAL_QUADRATIC)
00736         ENUM_CASE(K_POWER)
00737         ENUM_CASE(K_SPHERICAL)
00738         ENUM_CASE(K_LOG)
00739         ENUM_CASE(K_SPLINE)
00740         ENUM_CASE(K_ANOVA)
00741         ENUM_CASE(K_CIRCULAR)
00742         ENUM_CASE(K_INVERSEMULTIQUADRIC)
00743         ENUM_CASE(K_SPECTRUMMISMATCHRBF)
00744         ENUM_CASE(K_DISTANTSEGMENTS)
00745         ENUM_CASE(K_BESSEL)
00746     }
00747 
00748     switch (get_feature_class())
00749     {
00750         ENUM_CASE(C_UNKNOWN)
00751         ENUM_CASE(C_SIMPLE)
00752         ENUM_CASE(C_SPARSE)
00753         ENUM_CASE(C_STRING)
00754         ENUM_CASE(C_STREAMING_SIMPLE)
00755         ENUM_CASE(C_STREAMING_SPARSE)
00756         ENUM_CASE(C_STREAMING_STRING)
00757         ENUM_CASE(C_STREAMING_VW)
00758         ENUM_CASE(C_COMBINED)
00759         ENUM_CASE(C_COMBINED_DOT)
00760         ENUM_CASE(C_WD)
00761         ENUM_CASE(C_SPEC)
00762         ENUM_CASE(C_WEIGHTEDSPEC)
00763         ENUM_CASE(C_POLY)
00764         ENUM_CASE(C_ANY)
00765     }
00766 
00767     switch (get_feature_type())
00768     {
00769         ENUM_CASE(F_UNKNOWN)
00770         ENUM_CASE(F_BOOL)
00771         ENUM_CASE(F_CHAR)
00772         ENUM_CASE(F_BYTE)
00773         ENUM_CASE(F_SHORT)
00774         ENUM_CASE(F_WORD)
00775         ENUM_CASE(F_INT)
00776         ENUM_CASE(F_UINT)
00777         ENUM_CASE(F_LONG)
00778         ENUM_CASE(F_ULONG)
00779         ENUM_CASE(F_SHORTREAL)
00780         ENUM_CASE(F_DREAL)
00781         ENUM_CASE(F_LONGREAL)
00782         ENUM_CASE(F_ANY)
00783     }
00784     SG_INFO( "\n");
00785 }
00786 #undef ENUM_CASE
00787 
00788 bool CKernel::init_optimization(
00789     int32_t count, int32_t *IDX, float64_t * weights)
00790 {
00791    SG_ERROR( "kernel does not support linadd optimization\n");
00792     return false ;
00793 }
00794 
00795 bool CKernel::delete_optimization()
00796 {
00797    SG_ERROR( "kernel does not support linadd optimization\n");
00798     return false;
00799 }
00800 
00801 float64_t CKernel::compute_optimized(int32_t vector_idx)
00802 {
00803    SG_ERROR( "kernel does not support linadd optimization\n");
00804     return 0;
00805 }
00806 
00807 void CKernel::compute_batch(
00808     int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec,
00809     int32_t* IDX, float64_t* weights, float64_t factor)
00810 {
00811    SG_ERROR( "kernel does not support batch computation\n");
00812 }
00813 
00814 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight)
00815 {
00816    SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n");
00817 }
00818 
00819 void CKernel::clear_normal()
00820 {
00821    SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n");
00822 }
00823 
00824 int32_t CKernel::get_num_subkernels()
00825 {
00826     return 1;
00827 }
00828 
00829 void CKernel::compute_by_subkernel(
00830     int32_t vector_idx, float64_t * subkernel_contrib)
00831 {
00832    SG_ERROR( "kernel compute_by_subkernel not implemented\n");
00833 }
00834 
00835 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights)
00836 {
00837     num_weights=1 ;
00838     return &combined_kernel_weight ;
00839 }
00840 
00841 void CKernel::set_subkernel_weights(SGVector<float64_t> weights)
00842 {
00843     ASSERT(weights.vector);
00844     if (weights.vlen!=1)
00845       SG_ERROR( "number of subkernel weights should be one ...\n");
00846 
00847     combined_kernel_weight = weights.vector[0] ;
00848 }
00849 
00850 bool CKernel::init_optimization_svm(CSVM * svm)
00851 {
00852     int32_t num_suppvec=svm->get_num_support_vectors();
00853     int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
00854     float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
00855 
00856     for (int32_t i=0; i<num_suppvec; i++)
00857     {
00858         sv_idx[i]    = svm->get_support_vector(i);
00859         sv_weight[i] = svm->get_alpha(i);
00860     }
00861     bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
00862 
00863     SG_FREE(sv_idx);
00864     SG_FREE(sv_weight);
00865     return ret;
00866 }
00867 
00868 void CKernel::load_serializable_post() throw (ShogunException)
00869 {
00870     CSGObject::load_serializable_post();
00871     if (lhs_equals_rhs)
00872         rhs=lhs;
00873 }
00874 
00875 void CKernel::save_serializable_pre() throw (ShogunException)
00876 {
00877     CSGObject::save_serializable_pre();
00878 
00879     if (lhs_equals_rhs)
00880         rhs=NULL;
00881 }
00882 
00883 void CKernel::save_serializable_post() throw (ShogunException)
00884 {
00885     CSGObject::save_serializable_post();
00886 
00887     if (lhs_equals_rhs)
00888         rhs=lhs;
00889 }
00890 
00891 void CKernel::register_params()   {
00892     m_parameters->add(&cache_size, "cache_size",
00893                       "Cache size in MB.");
00894     m_parameters->add((CSGObject**) &lhs, "lhs",
00895                       "Feature vectors to occur on left hand side.");
00896     m_parameters->add((CSGObject**) &rhs, "rhs",
00897                       "Feature vectors to occur on right hand side.");
00898     m_parameters->add(&lhs_equals_rhs, "lhs_equals_rhs",
00899                       "If features on lhs are the same as on rhs.");
00900     m_parameters->add(&num_lhs, "num_lhs",
00901                       "Number of feature vectors on left hand side.");
00902     m_parameters->add(&num_rhs, "num_rhs",
00903                       "Number of feature vectors on right hand side.");
00904     m_parameters->add(&combined_kernel_weight, "combined_kernel_weight",
00905                       "Combined kernel weight.");
00906     m_parameters->add(&optimization_initialized,
00907                       "optimization_initialized",
00908                       "Optimization is initialized.");
00909     m_parameters->add((machine_int_t*) &opt_type, "opt_type",
00910                       "Optimization type.");
00911     m_parameters->add(&properties, "properties",
00912                       "Kernel properties.");
00913     m_parameters->add((CSGObject**) &normalizer, "normalizer",
00914                       "Normalize the kernel.");
00915 }
00916 
00917 
00918 void CKernel::init()
00919 {
00920     cache_size=10;
00921     kernel_matrix=NULL;
00922     lhs=NULL;
00923     rhs=NULL;
00924     num_lhs=0;
00925     num_rhs=0;
00926     combined_kernel_weight=1;
00927     optimization_initialized=false;
00928     opt_type=FASTBUTMEMHUNGRY;
00929     properties=KP_NONE;
00930     normalizer=NULL;
00931 
00932 #ifdef USE_SVMLIGHT
00933     memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE));
00934 #endif //USE_SVMLIGHT
00935 
00936     set_normalizer(new CIdentityKernelNormalizer());
00937 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation