Kernel.cpp

Go to the documentation of this file.
00001 /*
00002  * EXCEPT FOR THE KERNEL CACHING FUNCTIONS WHICH ARE (W) THORSTEN JOACHIMS
00003  * COPYRIGHT (C) 1999  UNIVERSITAET DORTMUND - ALL RIGHTS RESERVED
00004  *
00005  * this program is free software; you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation; either version 3 of the License, or
00008  * (at your option) any later version.
00009  *
00010  * Written (W) 1999-2009 Soeren Sonnenburg
00011  * Written (W) 1999-2008 Gunnar Raetsch
00012  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00013  */
00014 
00015 #include <shogun/lib/config.h>
00016 #include <shogun/lib/common.h>
00017 #include <shogun/io/SGIO.h>
00018 #include <shogun/io/File.h>
00019 #include <shogun/lib/Time.h>
00020 #include <shogun/lib/Signal.h>
00021 
00022 #include <shogun/base/Parallel.h>
00023 
00024 #include <shogun/kernel/Kernel.h>
00025 #include <shogun/kernel/IdentityKernelNormalizer.h>
00026 #include <shogun/features/Features.h>
00027 #include <shogun/base/Parameter.h>
00028 
00029 #include <shogun/classifier/svm/SVM.h>
00030 
00031 #include <string.h>
00032 #include <unistd.h>
00033 #include <math.h>
00034 
00035 #ifdef HAVE_PTHREAD
00036 #include <pthread.h>
00037 #endif
00038 
00039 using namespace shogun;
00040 
00041 CKernel::CKernel() : CSGObject()
00042 {
00043     init();
00044     register_params();
00045 }
00046 
00047 CKernel::CKernel(int32_t size) : CSGObject()
00048 {
00049     init();
00050     
00051     if (size<10)
00052         size=10;
00053 
00054     cache_size=size;
00055     register_params();
00056 }
00057 
00058 
00059 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject()
00060 {
00061     init();
00062 
00063     if (size<10)
00064         size=10;
00065 
00066     cache_size=size;
00067 
00068     set_normalizer(new CIdentityKernelNormalizer());
00069     init(p_lhs, p_rhs);
00070     register_params();
00071 }
00072 
00073 CKernel::~CKernel()
00074 {
00075     if (get_is_initialized())
00076         SG_ERROR("Kernel still initialized on destruction.\n");
00077 
00078     remove_lhs_and_rhs();
00079     SG_UNREF(normalizer);
00080 
00081     SG_INFO("Kernel deleted (%p).\n", this);
00082 }
00083 
00084 #ifdef USE_SVMLIGHT
00085 void CKernel::resize_kernel_cache(KERNELCACHE_IDX size, bool regression_hack)
00086 {
00087     if (size<10)
00088         size=10;
00089 
00090     kernel_cache_cleanup();
00091     cache_size=size;
00092 
00093     if (has_features() && get_num_vec_lhs())
00094         kernel_cache_init(cache_size, regression_hack);
00095 }
00096 #endif //USE_SVMLIGHT
00097 
00098 bool CKernel::init(CFeatures* l, CFeatures* r)
00099 {
00100     //make sure features were indeed supplied
00101     ASSERT(l);
00102     ASSERT(r);
00103 
00104     //make sure features are compatible
00105     ASSERT(l->get_feature_class()==r->get_feature_class());
00106     ASSERT(l->get_feature_type()==r->get_feature_type());
00107 
00108     //remove references to previous features
00109     remove_lhs_and_rhs();
00110 
00111     //increase reference counts
00112     SG_REF(l);
00113     if (l==r)
00114         lhs_equals_rhs=true;
00115     else // l!=r
00116         SG_REF(r);
00117 
00118     lhs=l;
00119     rhs=r;
00120 
00121     ASSERT(!num_lhs || num_lhs==l->get_num_vectors());
00122     ASSERT(!num_rhs || num_rhs==l->get_num_vectors());
00123 
00124     num_lhs=l->get_num_vectors();
00125     num_rhs=r->get_num_vectors();
00126 
00127     return true;
00128 }
00129 
00130 bool CKernel::set_normalizer(CKernelNormalizer* n)
00131 {
00132     SG_REF(n);
00133     if (lhs && rhs)
00134         n->init(this);
00135 
00136     SG_UNREF(normalizer);
00137     normalizer=n;
00138 
00139     return (normalizer!=NULL);
00140 }
00141 
00142 CKernelNormalizer* CKernel::get_normalizer()
00143 {
00144     SG_REF(normalizer)
00145     return normalizer;
00146 }
00147 
00148 bool CKernel::init_normalizer()
00149 {
00150     return normalizer->init(this);
00151 }
00152 
00153 void CKernel::cleanup()
00154 {
00155     remove_lhs_and_rhs();
00156 }
00157 
00158 #ifdef USE_SVMLIGHT
00159 /****************************** Cache handling *******************************/
00160 
00161 void CKernel::kernel_cache_init(int32_t buffsize, bool regression_hack)
00162 {
00163     int32_t totdoc=get_num_vec_lhs();
00164     if (totdoc<=0)
00165     {
00166         SG_ERROR("kernel has zero rows: num_lhs=%d num_rhs=%d\n",
00167                 get_num_vec_lhs(), get_num_vec_rhs());
00168     }
00169     uint64_t buffer_size=0;
00170     int32_t i;
00171 
00172     //in regression the additional constraints are made by doubling the training data
00173     if (regression_hack)
00174         totdoc*=2;
00175 
00176     buffer_size=((uint64_t) buffsize)*1024*1024/sizeof(KERNELCACHE_ELEM);
00177     if (buffer_size>((uint64_t) totdoc)*totdoc)
00178         buffer_size=((uint64_t) totdoc)*totdoc;
00179 
00180     SG_INFO( "using a kernel cache of size %lld MB (%lld bytes) for %s Kernel\n", buffer_size*sizeof(KERNELCACHE_ELEM)/1024/1024, buffer_size*sizeof(KERNELCACHE_ELEM), get_name());
00181 
00182     //make sure it fits in the *signed* KERNELCACHE_IDX type
00183     ASSERT(buffer_size < (((uint64_t) 1) << (sizeof(KERNELCACHE_IDX)*8-1)));
00184 
00185     kernel_cache.index = SG_MALLOC(int32_t, totdoc);
00186     kernel_cache.occu = SG_MALLOC(int32_t, totdoc);
00187     kernel_cache.lru = SG_MALLOC(int32_t, totdoc);
00188     kernel_cache.invindex = SG_MALLOC(int32_t, totdoc);
00189     kernel_cache.active2totdoc = SG_MALLOC(int32_t, totdoc);
00190     kernel_cache.totdoc2active = SG_MALLOC(int32_t, totdoc);
00191     kernel_cache.buffer = SG_MALLOC(KERNELCACHE_ELEM, buffer_size);
00192     kernel_cache.buffsize=buffer_size;
00193     kernel_cache.max_elems=(int32_t) (kernel_cache.buffsize/totdoc);
00194 
00195     if(kernel_cache.max_elems>totdoc) {
00196         kernel_cache.max_elems=totdoc;
00197     }
00198 
00199     kernel_cache.elems=0;   // initialize cache
00200     for(i=0;i<totdoc;i++) {
00201         kernel_cache.index[i]=-1;
00202         kernel_cache.lru[i]=0;
00203     }
00204     for(i=0;i<totdoc;i++) {
00205         kernel_cache.occu[i]=0;
00206         kernel_cache.invindex[i]=-1;
00207     }
00208 
00209     kernel_cache.activenum=totdoc;;
00210     for(i=0;i<totdoc;i++) {
00211         kernel_cache.active2totdoc[i]=i;
00212         kernel_cache.totdoc2active[i]=i;
00213     }
00214 
00215     kernel_cache.time=0;
00216 }
00217 
00218 void CKernel::get_kernel_row(
00219     int32_t docnum, int32_t *active2dnum, float64_t *buffer, bool full_line)
00220 {
00221     int32_t i,j;
00222     KERNELCACHE_IDX start;
00223 
00224     int32_t num_vectors = get_num_vec_lhs();
00225     if (docnum>=num_vectors)
00226         docnum=2*num_vectors-1-docnum;
00227 
00228     /* is cached? */
00229     if(kernel_cache.index[docnum] != -1)
00230     {
00231         kernel_cache.lru[kernel_cache.index[docnum]]=kernel_cache.time; /* lru */
00232         start=((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[docnum];
00233 
00234         if (full_line)
00235         {
00236             for(j=0;j<get_num_vec_lhs();j++)
00237             {
00238                 if(kernel_cache.totdoc2active[j] >= 0)
00239                     buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]];
00240                 else
00241                     buffer[j]=(float64_t) kernel(docnum, j);
00242             }
00243         }
00244         else
00245         {
00246             for(i=0;(j=active2dnum[i])>=0;i++)
00247             {
00248                 if(kernel_cache.totdoc2active[j] >= 0)
00249                     buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]];
00250                 else
00251                 {
00252                     int32_t k=j;
00253                     if (k>=num_vectors)
00254                         k=2*num_vectors-1-k;
00255                     buffer[j]=(float64_t) kernel(docnum, k);
00256                 }
00257             }
00258         }
00259     }
00260     else
00261     {
00262         if (full_line)
00263         {
00264             for(j=0;j<get_num_vec_lhs();j++)
00265                 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, j);
00266         }
00267         else
00268         {
00269             for(i=0;(j=active2dnum[i])>=0;i++)
00270             {
00271                 int32_t k=j;
00272                 if (k>=num_vectors)
00273                     k=2*num_vectors-1-k;
00274                 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, k);
00275             }
00276         }
00277     }
00278 }
00279 
00280 
00281 // Fills cache for the row m
00282 void CKernel::cache_kernel_row(int32_t m)
00283 {
00284     register int32_t j,k,l;
00285     register KERNELCACHE_ELEM *cache;
00286 
00287     int32_t num_vectors = get_num_vec_lhs();
00288 
00289     if (m>=num_vectors)
00290         m=2*num_vectors-1-m;
00291 
00292     if(!kernel_cache_check(m))   // not cached yet
00293     {
00294         cache = kernel_cache_clean_and_malloc(m);
00295         if(cache) {
00296             l=kernel_cache.totdoc2active[m];
00297 
00298             for(j=0;j<kernel_cache.activenum;j++)  // fill cache
00299             {
00300                 k=kernel_cache.active2totdoc[j];
00301 
00302                 if((kernel_cache.index[k] != -1) && (l != -1) && (k != m)) {
00303                     cache[j]=kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)
00304                         *kernel_cache.index[k]+l];
00305                 }
00306                 else
00307                 {
00308                     if (k>=num_vectors)
00309                         k=2*num_vectors-1-k;
00310 
00311                     cache[j]=kernel(m, k);
00312                 }
00313             }
00314         }
00315         else
00316             perror("Error: Kernel cache full! => increase cache size");
00317     }
00318 }
00319 
00320 
00321 void* CKernel::cache_multiple_kernel_row_helper(void* p)
00322 {
00323     int32_t j,k,l;
00324     S_KTHREAD_PARAM* params = (S_KTHREAD_PARAM*) p;
00325 
00326     for (int32_t i=params->start; i<params->end; i++)
00327     {
00328         KERNELCACHE_ELEM* cache=params->cache[i];
00329         int32_t m = params->uncached_rows[i];
00330         l=params->kernel_cache->totdoc2active[m];
00331 
00332         for(j=0;j<params->kernel_cache->activenum;j++)  // fill cache
00333         {
00334             k=params->kernel_cache->active2totdoc[j];
00335 
00336             if((params->kernel_cache->index[k] != -1) && (l != -1) && (!params->needs_computation[k])) {
00337                 cache[j]=params->kernel_cache->buffer[((KERNELCACHE_IDX) params->kernel_cache->activenum)
00338                     *params->kernel_cache->index[k]+l];
00339             }
00340             else
00341                 {
00342                     if (k>=params->num_vectors)
00343                         k=2*params->num_vectors-1-k;
00344 
00345                     cache[j]=params->kernel->kernel(m, k);
00346                 }
00347         }
00348 
00349         //now line m is cached
00350         params->needs_computation[m]=0;
00351     }
00352     return NULL;
00353 }
00354 
00355 // Fills cache for the rows in key
00356 void CKernel::cache_multiple_kernel_rows(int32_t* rows, int32_t num_rows)
00357 {
00358 #ifdef HAVE_PTHREAD
00359     if (parallel->get_num_threads()<2)
00360     {
00361 #endif
00362         for(int32_t i=0;i<num_rows;i++)
00363             cache_kernel_row(rows[i]);
00364 #ifdef HAVE_PTHREAD
00365     }
00366     else
00367     {
00368         // fill up kernel cache
00369         int32_t* uncached_rows = SG_MALLOC(int32_t, num_rows);
00370         KERNELCACHE_ELEM** cache = SG_MALLOC(KERNELCACHE_ELEM*, num_rows);
00371         pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1);
00372         S_KTHREAD_PARAM* params = SG_MALLOC(S_KTHREAD_PARAM, parallel->get_num_threads()-1);
00373         int32_t num_threads=parallel->get_num_threads()-1;
00374         int32_t num_vec=get_num_vec_lhs();
00375         ASSERT(num_vec>0);
00376         uint8_t* needs_computation=SG_MALLOC(uint8_t, num_vec);
00377         memset(needs_computation, 0, sizeof(uint8_t)*num_vec);
00378         int32_t step=0;
00379         int32_t num=0;
00380         int32_t end=0;
00381 
00382         // allocate cachelines if necessary
00383         for (int32_t i=0; i<num_rows; i++)
00384         {
00385             int32_t idx=rows[i];
00386             if (kernel_cache_check(idx))
00387                 continue;
00388 
00389             if (idx>=num_vec)
00390                 idx=2*num_vec-1-idx;
00391 
00392             needs_computation[idx]=1;
00393             uncached_rows[num]=idx;
00394             cache[num]= kernel_cache_clean_and_malloc(idx);
00395 
00396             if (!cache[num])
00397                 SG_ERROR("Kernel cache full! => increase cache size\n");
00398 
00399             num++;
00400         }
00401 
00402         if (num>0)
00403         {
00404             step= num/parallel->get_num_threads();
00405 
00406             if (step<1)
00407             {
00408                 num_threads=num-1;
00409                 step=1;
00410             }
00411 
00412             for (int32_t t=0; t<num_threads; t++)
00413             {
00414                 params[t].kernel = this;
00415                 params[t].kernel_cache = &kernel_cache;
00416                 params[t].cache = cache;
00417                 params[t].uncached_rows = uncached_rows;
00418                 params[t].needs_computation = needs_computation;
00419                 params[t].num_uncached = num;
00420                 params[t].start = t*step;
00421                 params[t].end = (t+1)*step;
00422                 params[t].num_vectors = get_num_vec_lhs();
00423                 end=params[t].end;
00424 
00425                 int code=pthread_create(&threads[t], NULL,
00426                         CKernel::cache_multiple_kernel_row_helper, (void*)&params[t]);
00427 
00428                 if (!code)
00429                 {
00430                     SG_WARNING("Thread creation failed (thread %d of %d) "
00431                             "with error:'%s'\n",t, num_threads, strerror(code));
00432                     num_threads=t;
00433                     end=t*step;
00434                     break;
00435                 }
00436             }
00437         }
00438         else
00439             num_threads=-1;
00440 
00441 
00442         S_KTHREAD_PARAM last_param;
00443         last_param.kernel = this;
00444         last_param.kernel_cache = &kernel_cache;
00445         last_param.cache = cache;
00446         last_param.uncached_rows = uncached_rows;
00447         last_param.needs_computation = needs_computation;
00448         last_param.start = end;
00449         last_param.num_uncached = num;
00450         last_param.end = num;
00451         last_param.num_vectors = get_num_vec_lhs();
00452 
00453         cache_multiple_kernel_row_helper(&last_param);
00454 
00455 
00456         for (int32_t t=0; t<num_threads; t++)
00457         {
00458             if (pthread_join(threads[t], NULL) != 0)
00459                 SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads);
00460         }
00461 
00462         SG_FREE(needs_computation);
00463         SG_FREE(params);
00464         SG_FREE(threads);
00465         SG_FREE(cache);
00466         SG_FREE(uncached_rows);
00467     }
00468 #endif
00469 }
00470 
00471 // remove numshrink columns in the cache
00472 // which correspond to examples marked
00473 void CKernel::kernel_cache_shrink(
00474     int32_t totdoc, int32_t numshrink, int32_t *after)
00475 {
00476     register int32_t i,j,jj,scount;     // 0 in after.
00477     KERNELCACHE_IDX from=0,to=0;
00478     int32_t *keep;
00479 
00480     keep=SG_MALLOC(int32_t, totdoc);
00481     for(j=0;j<totdoc;j++) {
00482         keep[j]=1;
00483     }
00484     scount=0;
00485     for(jj=0;(jj<kernel_cache.activenum) && (scount<numshrink);jj++) {
00486         j=kernel_cache.active2totdoc[jj];
00487         if(!after[j]) {
00488             scount++;
00489             keep[j]=0;
00490         }
00491     }
00492 
00493     for(i=0;i<kernel_cache.max_elems;i++) {
00494         for(jj=0;jj<kernel_cache.activenum;jj++) {
00495             j=kernel_cache.active2totdoc[jj];
00496             if(!keep[j]) {
00497                 from++;
00498             }
00499             else {
00500                 kernel_cache.buffer[to]=kernel_cache.buffer[from];
00501                 to++;
00502                 from++;
00503             }
00504         }
00505     }
00506 
00507     kernel_cache.activenum=0;
00508     for(j=0;j<totdoc;j++) {
00509         if((keep[j]) && (kernel_cache.totdoc2active[j] != -1)) {
00510             kernel_cache.active2totdoc[kernel_cache.activenum]=j;
00511             kernel_cache.totdoc2active[j]=kernel_cache.activenum;
00512             kernel_cache.activenum++;
00513         }
00514         else {
00515             kernel_cache.totdoc2active[j]=-1;
00516         }
00517     }
00518 
00519     kernel_cache.max_elems=
00520         (int32_t)(kernel_cache.buffsize/kernel_cache.activenum);
00521     if(kernel_cache.max_elems>totdoc) {
00522         kernel_cache.max_elems=totdoc;
00523     }
00524 
00525     SG_FREE(keep);
00526 
00527 }
00528 
00529 void CKernel::kernel_cache_reset_lru()
00530 {
00531     int32_t maxlru=0,k;
00532 
00533     for(k=0;k<kernel_cache.max_elems;k++) {
00534         if(maxlru < kernel_cache.lru[k])
00535             maxlru=kernel_cache.lru[k];
00536     }
00537     for(k=0;k<kernel_cache.max_elems;k++) {
00538         kernel_cache.lru[k]-=maxlru;
00539     }
00540 }
00541 
00542 void CKernel::kernel_cache_cleanup()
00543 {
00544     SG_FREE(kernel_cache.index);
00545     SG_FREE(kernel_cache.occu);
00546     SG_FREE(kernel_cache.lru);
00547     SG_FREE(kernel_cache.invindex);
00548     SG_FREE(kernel_cache.active2totdoc);
00549     SG_FREE(kernel_cache.totdoc2active);
00550     SG_FREE(kernel_cache.buffer);
00551     memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE));
00552 }
00553 
00554 int32_t CKernel::kernel_cache_malloc()
00555 {
00556   int32_t i;
00557 
00558   if(kernel_cache_space_available()) {
00559     for(i=0;i<kernel_cache.max_elems;i++) {
00560       if(!kernel_cache.occu[i]) {
00561     kernel_cache.occu[i]=1;
00562     kernel_cache.elems++;
00563     return(i);
00564       }
00565     }
00566   }
00567   return(-1);
00568 }
00569 
00570 void CKernel::kernel_cache_free(int32_t cacheidx)
00571 {
00572     kernel_cache.occu[cacheidx]=0;
00573     kernel_cache.elems--;
00574 }
00575 
00576 // remove least recently used cache
00577 // element
00578 int32_t CKernel::kernel_cache_free_lru()
00579 {
00580   register int32_t k,least_elem=-1,least_time;
00581 
00582   least_time=kernel_cache.time+1;
00583   for(k=0;k<kernel_cache.max_elems;k++) {
00584     if(kernel_cache.invindex[k] != -1) {
00585       if(kernel_cache.lru[k]<least_time) {
00586     least_time=kernel_cache.lru[k];
00587     least_elem=k;
00588       }
00589     }
00590   }
00591 
00592   if(least_elem != -1) {
00593     kernel_cache_free(least_elem);
00594     kernel_cache.index[kernel_cache.invindex[least_elem]]=-1;
00595     kernel_cache.invindex[least_elem]=-1;
00596     return(1);
00597   }
00598   return(0);
00599 }
00600 
00601 // Get a free cache entry. In case cache is full, the lru
00602 // element is removed.
00603 KERNELCACHE_ELEM* CKernel::kernel_cache_clean_and_malloc(int32_t cacheidx)
00604 {
00605     int32_t result;
00606     if((result = kernel_cache_malloc()) == -1) {
00607         if(kernel_cache_free_lru()) {
00608             result = kernel_cache_malloc();
00609         }
00610     }
00611     kernel_cache.index[cacheidx]=result;
00612     if(result == -1) {
00613         return(0);
00614     }
00615     kernel_cache.invindex[result]=cacheidx;
00616     kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time; // lru
00617     return &kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[cacheidx]];
00618 }
00619 #endif //USE_SVMLIGHT
00620 
00621 void CKernel::load(CFile* loader)
00622 {
00623     SG_SET_LOCALE_C;
00624     SG_RESET_LOCALE;
00625 }
00626 
00627 void CKernel::save(CFile* writer)
00628 {
00629     SGMatrix<float64_t> k_matrix=get_kernel_matrix<float64_t>();
00630     SG_SET_LOCALE_C;
00631     writer->set_matrix(k_matrix.matrix, k_matrix.num_rows, k_matrix.num_cols);
00632     SG_FREE(k_matrix.matrix);
00633     SG_RESET_LOCALE;
00634 }
00635 
00636 void CKernel::remove_lhs_and_rhs()
00637 {
00638     if (rhs!=lhs)
00639         SG_UNREF(rhs);
00640     rhs = NULL;
00641     num_rhs=0;
00642 
00643     SG_UNREF(lhs);
00644     lhs = NULL;
00645     num_lhs=0;
00646     lhs_equals_rhs=false;
00647 
00648 #ifdef USE_SVMLIGHT
00649     cache_reset();
00650 #endif //USE_SVMLIGHT
00651 }
00652 
00653 void CKernel::remove_lhs()
00654 {
00655     if (rhs==lhs)
00656         rhs=NULL;
00657     SG_UNREF(lhs);
00658     lhs = NULL;
00659     num_lhs=0;
00660     lhs_equals_rhs=false;
00661 #ifdef USE_SVMLIGHT
00662     cache_reset();
00663 #endif //USE_SVMLIGHT
00664 }
00665 
00667 void CKernel::remove_rhs()
00668 {
00669     if (rhs!=lhs)
00670         SG_UNREF(rhs);
00671     rhs = NULL;
00672     num_rhs=0;
00673     lhs_equals_rhs=false;
00674 
00675 #ifdef USE_SVMLIGHT
00676     cache_reset();
00677 #endif //USE_SVMLIGHT
00678 }
00679 
00680 #define ENUM_CASE(n) case n: SG_INFO(#n " "); break;
00681 
00682 void CKernel::list_kernel()
00683 {
00684     SG_INFO( "%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
00685             get_combined_kernel_weight(),
00686             get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
00687             "SLOWBUTMEMEFFICIENT");
00688 
00689     switch (get_kernel_type())
00690     {
00691         ENUM_CASE(K_UNKNOWN)
00692         ENUM_CASE(K_LINEAR)
00693         ENUM_CASE(K_POLY)
00694         ENUM_CASE(K_GAUSSIAN)
00695         ENUM_CASE(K_GAUSSIANSHIFT)
00696         ENUM_CASE(K_GAUSSIANMATCH)
00697         ENUM_CASE(K_HISTOGRAM)
00698         ENUM_CASE(K_SALZBERG)
00699         ENUM_CASE(K_LOCALITYIMPROVED)
00700         ENUM_CASE(K_SIMPLELOCALITYIMPROVED)
00701         ENUM_CASE(K_FIXEDDEGREE)
00702         ENUM_CASE(K_WEIGHTEDDEGREE)
00703         ENUM_CASE(K_WEIGHTEDDEGREEPOS)
00704         ENUM_CASE(K_WEIGHTEDDEGREERBF)
00705         ENUM_CASE(K_WEIGHTEDCOMMWORDSTRING)
00706         ENUM_CASE(K_POLYMATCH)
00707         ENUM_CASE(K_ALIGNMENT)
00708         ENUM_CASE(K_COMMWORDSTRING)
00709         ENUM_CASE(K_COMMULONGSTRING)
00710         ENUM_CASE(K_SPECTRUMRBF)
00711         ENUM_CASE(K_COMBINED)
00712         ENUM_CASE(K_AUC)
00713         ENUM_CASE(K_CUSTOM)
00714         ENUM_CASE(K_SIGMOID)
00715         ENUM_CASE(K_CHI2)
00716         ENUM_CASE(K_DIAG)
00717         ENUM_CASE(K_CONST)
00718         ENUM_CASE(K_DISTANCE)
00719         ENUM_CASE(K_LOCALALIGNMENT)
00720         ENUM_CASE(K_PYRAMIDCHI2)
00721         ENUM_CASE(K_OLIGO)
00722         ENUM_CASE(K_MATCHWORD)
00723         ENUM_CASE(K_TPPK)
00724         ENUM_CASE(K_REGULATORYMODULES)
00725         ENUM_CASE(K_SPARSESPATIALSAMPLE)
00726         ENUM_CASE(K_HISTOGRAMINTERSECTION)
00727         ENUM_CASE(K_WAVELET)
00728         ENUM_CASE(K_WAVE)
00729         ENUM_CASE(K_CAUCHY)
00730         ENUM_CASE(K_TSTUDENT)
00731         ENUM_CASE(K_MULTIQUADRIC)
00732         ENUM_CASE(K_EXPONENTIAL)
00733         ENUM_CASE(K_RATIONAL_QUADRATIC)
00734         ENUM_CASE(K_POWER)
00735         ENUM_CASE(K_SPHERICAL)
00736         ENUM_CASE(K_LOG)
00737         ENUM_CASE(K_SPLINE)
00738         ENUM_CASE(K_ANOVA)
00739         ENUM_CASE(K_CIRCULAR)
00740         ENUM_CASE(K_INVERSEMULTIQUADRIC)
00741         ENUM_CASE(K_SPECTRUMMISMATCHRBF)
00742         ENUM_CASE(K_DISTANTSEGMENTS)
00743         ENUM_CASE(K_BESSEL)
00744     }
00745 
00746     switch (get_feature_class())
00747     {
00748         ENUM_CASE(C_UNKNOWN)
00749         ENUM_CASE(C_SIMPLE)
00750         ENUM_CASE(C_SPARSE)
00751         ENUM_CASE(C_STRING)
00752         ENUM_CASE(C_STREAMING_SIMPLE)
00753         ENUM_CASE(C_STREAMING_SPARSE)
00754         ENUM_CASE(C_STREAMING_STRING)
00755         ENUM_CASE(C_STREAMING_VW)
00756         ENUM_CASE(C_COMBINED)
00757         ENUM_CASE(C_COMBINED_DOT)
00758         ENUM_CASE(C_WD)
00759         ENUM_CASE(C_SPEC)
00760         ENUM_CASE(C_WEIGHTEDSPEC)
00761         ENUM_CASE(C_POLY)
00762         ENUM_CASE(C_ANY)
00763     }
00764 
00765     switch (get_feature_type())
00766     {
00767         ENUM_CASE(F_UNKNOWN)
00768         ENUM_CASE(F_BOOL)
00769         ENUM_CASE(F_CHAR)
00770         ENUM_CASE(F_BYTE)
00771         ENUM_CASE(F_SHORT)
00772         ENUM_CASE(F_WORD)
00773         ENUM_CASE(F_INT)
00774         ENUM_CASE(F_UINT)
00775         ENUM_CASE(F_LONG)
00776         ENUM_CASE(F_ULONG)
00777         ENUM_CASE(F_SHORTREAL)
00778         ENUM_CASE(F_DREAL)
00779         ENUM_CASE(F_LONGREAL)
00780         ENUM_CASE(F_ANY)
00781     }
00782     SG_INFO( "\n");
00783 }
00784 #undef ENUM_CASE
00785 
00786 bool CKernel::init_optimization(
00787     int32_t count, int32_t *IDX, float64_t * weights)
00788 {
00789    SG_ERROR( "kernel does not support linadd optimization\n");
00790     return false ;
00791 }
00792 
00793 bool CKernel::delete_optimization()
00794 {
00795    SG_ERROR( "kernel does not support linadd optimization\n");
00796     return false;
00797 }
00798 
00799 float64_t CKernel::compute_optimized(int32_t vector_idx)
00800 {
00801    SG_ERROR( "kernel does not support linadd optimization\n");
00802     return 0;
00803 }
00804 
00805 void CKernel::compute_batch(
00806     int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec,
00807     int32_t* IDX, float64_t* weights, float64_t factor)
00808 {
00809    SG_ERROR( "kernel does not support batch computation\n");
00810 }
00811 
00812 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight)
00813 {
00814    SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n");
00815 }
00816 
00817 void CKernel::clear_normal()
00818 {
00819    SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n");
00820 }
00821 
00822 int32_t CKernel::get_num_subkernels()
00823 {
00824     return 1;
00825 }
00826 
00827 void CKernel::compute_by_subkernel(
00828     int32_t vector_idx, float64_t * subkernel_contrib)
00829 {
00830    SG_ERROR( "kernel compute_by_subkernel not implemented\n");
00831 }
00832 
00833 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights)
00834 {
00835     num_weights=1 ;
00836     return &combined_kernel_weight ;
00837 }
00838 
00839 void CKernel::set_subkernel_weights(float64_t* weights, int32_t num_weights)
00840 {
00841     combined_kernel_weight = weights[0] ;
00842     if (num_weights!=1)
00843       SG_ERROR( "number of subkernel weights should be one ...\n");
00844 }
00845 
00846 bool CKernel::init_optimization_svm(CSVM * svm)
00847 {
00848     int32_t num_suppvec=svm->get_num_support_vectors();
00849     int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
00850     float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
00851 
00852     for (int32_t i=0; i<num_suppvec; i++)
00853     {
00854         sv_idx[i]    = svm->get_support_vector(i);
00855         sv_weight[i] = svm->get_alpha(i);
00856     }
00857     bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
00858 
00859     SG_FREE(sv_idx);
00860     SG_FREE(sv_weight);
00861     return ret;
00862 }
00863 
00864 void CKernel::load_serializable_post() throw (ShogunException)
00865 {
00866     CSGObject::load_serializable_post();
00867     if (lhs_equals_rhs)
00868         rhs=lhs;
00869 }
00870 
00871 void CKernel::save_serializable_pre() throw (ShogunException)
00872 {
00873     CSGObject::save_serializable_pre();
00874 
00875     if (lhs_equals_rhs)
00876         rhs=NULL;
00877 }
00878 
00879 void CKernel::save_serializable_post() throw (ShogunException)
00880 {
00881     CSGObject::save_serializable_post();
00882 
00883     if (lhs_equals_rhs)
00884         rhs=lhs;
00885 }
00886 
00887 void CKernel::register_params()   {
00888     m_parameters->add(&cache_size, "cache_size",
00889                       "Cache size in MB.");
00890     m_parameters->add((CSGObject**) &lhs, "lhs",
00891                       "Feature vectors to occur on left hand side.");
00892     m_parameters->add((CSGObject**) &rhs, "rhs",
00893                       "Feature vectors to occur on right hand side.");
00894     m_parameters->add(&lhs_equals_rhs, "lhs_equals_rhs",
00895                       "If features on lhs are the same as on rhs.");
00896     m_parameters->add(&num_lhs, "num_lhs",
00897                       "Number of feature vectors on left hand side.");
00898     m_parameters->add(&num_rhs, "num_rhs",
00899                       "Number of feature vectors on right hand side.");
00900     m_parameters->add(&combined_kernel_weight, "combined_kernel_weight",
00901                       "Combined kernel weight.");
00902     m_parameters->add(&optimization_initialized,
00903                       "optimization_initialized",
00904                       "Optimization is initialized.");
00905     m_parameters->add((machine_int_t*) &opt_type, "opt_type",
00906                       "Optimization type.");
00907     m_parameters->add(&properties, "properties",
00908                       "Kernel properties.");
00909     m_parameters->add((CSGObject**) &normalizer, "normalizer",
00910                       "Normalize the kernel.");
00911 }
00912 
00913 
00914 void CKernel::init()
00915 {
00916     cache_size=10;
00917     kernel_matrix=NULL;
00918     lhs=NULL;
00919     rhs=NULL;
00920     num_lhs=0;
00921     num_rhs=0;
00922     combined_kernel_weight=1;
00923     optimization_initialized=false;
00924     opt_type=FASTBUTMEMHUNGRY;
00925     properties=KP_NONE;
00926     normalizer=NULL;
00927 
00928 #ifdef USE_SVMLIGHT
00929     memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE));
00930 #endif //USE_SVMLIGHT
00931 
00932     set_normalizer(new CIdentityKernelNormalizer());
00933 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation