Kernel.cpp

Go to the documentation of this file.
00001 /*
00002  * EXCEPT FOR THE KERNEL CACHING FUNCTIONS WHICH ARE (W) THORSTEN JOACHIMS
00003  * COPYRIGHT (C) 1999  UNIVERSITAET DORTMUND - ALL RIGHTS RESERVED
00004  *
00005  * this program is free software; you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation; either version 3 of the License, or
00008  * (at your option) any later version.
00009  *
00010  * Written (W) 1999-2009 Soeren Sonnenburg
00011  * Written (W) 1999-2008 Gunnar Raetsch
00012  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00013  */
00014 
00015 #include "lib/config.h"
00016 #include "lib/common.h"
00017 #include "lib/io.h"
00018 #include "lib/File.h"
00019 #include "lib/Time.h"
00020 #include "lib/Signal.h"
00021 
00022 #include "base/Parallel.h"
00023 
00024 #include "kernel/Kernel.h"
00025 #include "kernel/IdentityKernelNormalizer.h"
00026 #include "features/Features.h"
00027 #include "base/Parameter.h"
00028 
00029 #include "classifier/svm/SVM.h"
00030 
00031 #include <string.h>
00032 #include <unistd.h>
00033 #include <math.h>
00034 
00035 #ifndef WIN32
00036 #include <pthread.h>
00037 #endif
00038 
00039 using namespace shogun;
00040 
00041 CKernel::CKernel() : CSGObject()
00042 {
00043     init();
00044 }
00045 
00046 CKernel::CKernel(int32_t size) : CSGObject()
00047 {
00048     init();
00049 
00050     if (size<10)
00051         size=10;
00052 
00053     cache_size=size;
00054 }
00055 
00056 
00057 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject()
00058 {
00059     init();
00060 
00061     if (size<10)
00062         size=10;
00063 
00064     cache_size=size;
00065 
00066     set_normalizer(new CIdentityKernelNormalizer());
00067     init(p_lhs, p_rhs);
00068 }
00069 
00070 CKernel::~CKernel()
00071 {
00072     if (get_is_initialized())
00073         SG_ERROR("Kernel still initialized on destruction.\n");
00074 
00075     remove_lhs_and_rhs();
00076     SG_UNREF(normalizer);
00077 
00078     SG_INFO("Kernel deleted (%p).\n", this);
00079 }
00080 
00081 void CKernel::get_kernel_matrix(float64_t** dst, int32_t* m, int32_t* n)
00082 {
00083     ASSERT(dst && m && n);
00084 
00085     float64_t* result = NULL;
00086 
00087     if (has_features())
00088     {
00089         int32_t num_vec1=get_num_vec_lhs();
00090         int32_t num_vec2=get_num_vec_rhs();
00091         *m=num_vec1;
00092         *n=num_vec2;
00093 
00094         int64_t total_num = ((int64_t) num_vec1) * num_vec2;
00095         SG_DEBUG( "allocating memory for a kernel matrix"
00096                 " of size %dx%d\n", num_vec1, num_vec2);
00097 
00098         result=(float64_t*) malloc(sizeof(float64_t)*total_num);
00099         ASSERT(result);
00100         get_kernel_matrix<float64_t>(num_vec1,num_vec2, result);
00101     }
00102     else
00103         SG_ERROR( "no features assigned to kernel\n");
00104 
00105     *dst=result;
00106 }
00107 
00108 #ifdef USE_SVMLIGHT
00109 void CKernel::resize_kernel_cache(KERNELCACHE_IDX size, bool regression_hack)
00110 {
00111     if (size<10)
00112         size=10;
00113 
00114     kernel_cache_cleanup();
00115     cache_size=size;
00116 
00117     if (has_features() && get_num_vec_lhs())
00118         kernel_cache_init(cache_size, regression_hack);
00119 }
00120 #endif //USE_SVMLIGHT
00121 
00122 bool CKernel::init(CFeatures* l, CFeatures* r)
00123 {
00124     //make sure features were indeed supplied
00125     ASSERT(l);
00126     ASSERT(r);
00127 
00128     //make sure features are compatible
00129     ASSERT(l->get_feature_class()==r->get_feature_class());
00130     ASSERT(l->get_feature_type()==r->get_feature_type());
00131 
00132     //remove references to previous features
00133     remove_lhs_and_rhs();
00134 
00135     //increase reference counts
00136     SG_REF(l);
00137     if (l==r)
00138         lhs_equals_rhs=true;
00139     else // l!=r
00140         SG_REF(r);
00141 
00142     lhs=l;
00143     rhs=r;
00144 
00145     ASSERT(!num_lhs || num_lhs==l->get_num_vectors());
00146     ASSERT(!num_rhs || num_rhs==l->get_num_vectors());
00147 
00148     num_lhs=l->get_num_vectors();
00149     num_rhs=r->get_num_vectors();
00150 
00151     return true;
00152 }
00153 
00154 bool CKernel::set_normalizer(CKernelNormalizer* n)
00155 {
00156     SG_REF(n);
00157     if (lhs && rhs)
00158         n->init(this);
00159 
00160     SG_UNREF(normalizer);
00161     normalizer=n;
00162 
00163     return (normalizer!=NULL);
00164 }
00165 
00166 CKernelNormalizer* CKernel::get_normalizer()
00167 {
00168     SG_REF(normalizer)
00169     return normalizer;
00170 }
00171 
00172 bool CKernel::init_normalizer()
00173 {
00174     return normalizer->init(this);
00175 }
00176 
00177 void CKernel::cleanup()
00178 {
00179     remove_lhs_and_rhs();
00180 }
00181 
00182 #ifdef USE_SVMLIGHT
00183 /****************************** Cache handling *******************************/
00184 
00185 void CKernel::kernel_cache_init(int32_t buffsize, bool regression_hack)
00186 {
00187     int32_t totdoc=get_num_vec_lhs();
00188     if (totdoc<=0)
00189     {
00190         SG_ERROR("kernel has zero rows: num_lhs=%d num_rhs=%d\n",
00191                 get_num_vec_lhs(), get_num_vec_rhs());
00192     }
00193     uint64_t buffer_size=0;
00194     int32_t i;
00195 
00196     //in regression the additional constraints are made by doubling the training data
00197     if (regression_hack)
00198         totdoc*=2;
00199 
00200     buffer_size=((uint64_t) buffsize)*1024*1024/sizeof(KERNELCACHE_ELEM);
00201     if (buffer_size>((uint64_t) totdoc)*totdoc)
00202         buffer_size=((uint64_t) totdoc)*totdoc;
00203 
00204     SG_INFO( "using a kernel cache of size %lld MB (%lld bytes) for %s Kernel\n", buffer_size*sizeof(KERNELCACHE_ELEM)/1024/1024, buffer_size*sizeof(KERNELCACHE_ELEM), get_name());
00205 
00206     //make sure it fits in the *signed* KERNELCACHE_IDX type
00207     ASSERT(buffer_size < (((uint64_t) 1) << (sizeof(KERNELCACHE_IDX)*8-1)));
00208 
00209     kernel_cache.index = new int32_t[totdoc];
00210     kernel_cache.occu = new int32_t[totdoc];
00211     kernel_cache.lru = new int32_t[totdoc];
00212     kernel_cache.invindex = new int32_t[totdoc];
00213     kernel_cache.active2totdoc = new int32_t[totdoc];
00214     kernel_cache.totdoc2active = new int32_t[totdoc];
00215     kernel_cache.buffer = new KERNELCACHE_ELEM[buffer_size];
00216     kernel_cache.buffsize=buffer_size;
00217     kernel_cache.max_elems=(int32_t) (kernel_cache.buffsize/totdoc);
00218 
00219     if(kernel_cache.max_elems>totdoc) {
00220         kernel_cache.max_elems=totdoc;
00221     }
00222 
00223     kernel_cache.elems=0;   // initialize cache
00224     for(i=0;i<totdoc;i++) {
00225         kernel_cache.index[i]=-1;
00226         kernel_cache.lru[i]=0;
00227     }
00228     for(i=0;i<totdoc;i++) {
00229         kernel_cache.occu[i]=0;
00230         kernel_cache.invindex[i]=-1;
00231     }
00232 
00233     kernel_cache.activenum=totdoc;;
00234     for(i=0;i<totdoc;i++) {
00235         kernel_cache.active2totdoc[i]=i;
00236         kernel_cache.totdoc2active[i]=i;
00237     }
00238 
00239     kernel_cache.time=0;
00240 }
00241 
00242 void CKernel::get_kernel_row(
00243     int32_t docnum, int32_t *active2dnum, float64_t *buffer, bool full_line)
00244 {
00245     int32_t i,j;
00246     KERNELCACHE_IDX start;
00247 
00248     int32_t num_vectors = get_num_vec_lhs();
00249     if (docnum>=num_vectors)
00250         docnum=2*num_vectors-1-docnum;
00251 
00252     /* is cached? */
00253     if(kernel_cache.index[docnum] != -1)
00254     {
00255         kernel_cache.lru[kernel_cache.index[docnum]]=kernel_cache.time; /* lru */
00256         start=((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[docnum];
00257 
00258         if (full_line)
00259         {
00260             for(j=0;j<get_num_vec_lhs();j++)
00261             {
00262                 if(kernel_cache.totdoc2active[j] >= 0)
00263                     buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]];
00264                 else
00265                     buffer[j]=(float64_t) kernel(docnum, j);
00266             }
00267         }
00268         else
00269         {
00270             for(i=0;(j=active2dnum[i])>=0;i++)
00271             {
00272                 if(kernel_cache.totdoc2active[j] >= 0)
00273                     buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]];
00274                 else
00275                 {
00276                     int32_t k=j;
00277                     if (k>=num_vectors)
00278                         k=2*num_vectors-1-k;
00279                     buffer[j]=(float64_t) kernel(docnum, k);
00280                 }
00281             }
00282         }
00283     }
00284     else
00285     {
00286         if (full_line)
00287         {
00288             for(j=0;j<get_num_vec_lhs();j++)
00289                 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, j);
00290         }
00291         else
00292         {
00293             for(i=0;(j=active2dnum[i])>=0;i++)
00294             {
00295                 int32_t k=j;
00296                 if (k>=num_vectors)
00297                     k=2*num_vectors-1-k;
00298                 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, k);
00299             }
00300         }
00301     }
00302 }
00303 
00304 
00305 // Fills cache for the row m
00306 void CKernel::cache_kernel_row(int32_t m)
00307 {
00308     register int32_t j,k,l;
00309     register KERNELCACHE_ELEM *cache;
00310 
00311     int32_t num_vectors = get_num_vec_lhs();
00312 
00313     if (m>=num_vectors)
00314         m=2*num_vectors-1-m;
00315 
00316     if(!kernel_cache_check(m))   // not cached yet
00317     {
00318         cache = kernel_cache_clean_and_malloc(m);
00319         if(cache) {
00320             l=kernel_cache.totdoc2active[m];
00321 
00322             for(j=0;j<kernel_cache.activenum;j++)  // fill cache
00323             {
00324                 k=kernel_cache.active2totdoc[j];
00325 
00326                 if((kernel_cache.index[k] != -1) && (l != -1) && (k != m)) {
00327                     cache[j]=kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)
00328                         *kernel_cache.index[k]+l];
00329                 }
00330                 else
00331                 {
00332                     if (k>=num_vectors)
00333                         k=2*num_vectors-1-k;
00334 
00335                     cache[j]=kernel(m, k);
00336                 }
00337             }
00338         }
00339         else
00340             perror("Error: Kernel cache full! => increase cache size");
00341     }
00342 }
00343 
00344 
00345 void* CKernel::cache_multiple_kernel_row_helper(void* p)
00346 {
00347     int32_t j,k,l;
00348     S_KTHREAD_PARAM* params = (S_KTHREAD_PARAM*) p;
00349 
00350     for (int32_t i=params->start; i<params->end; i++)
00351     {
00352         KERNELCACHE_ELEM* cache=params->cache[i];
00353         int32_t m = params->uncached_rows[i];
00354         l=params->kernel_cache->totdoc2active[m];
00355 
00356         for(j=0;j<params->kernel_cache->activenum;j++)  // fill cache
00357         {
00358             k=params->kernel_cache->active2totdoc[j];
00359 
00360             if((params->kernel_cache->index[k] != -1) && (l != -1) && (!params->needs_computation[k])) {
00361                 cache[j]=params->kernel_cache->buffer[((KERNELCACHE_IDX) params->kernel_cache->activenum)
00362                     *params->kernel_cache->index[k]+l];
00363             }
00364             else
00365                 {
00366                     if (k>=params->num_vectors)
00367                         k=2*params->num_vectors-1-k;
00368 
00369                     cache[j]=params->kernel->kernel(m, k);
00370                 }
00371         }
00372 
00373         //now line m is cached
00374         params->needs_computation[m]=0;
00375     }
00376     return NULL;
00377 }
00378 
00379 // Fills cache for the rows in key
00380 void CKernel::cache_multiple_kernel_rows(int32_t* rows, int32_t num_rows)
00381 {
00382 #ifndef WIN32
00383     if (parallel->get_num_threads()<2)
00384     {
00385 #endif
00386         for(int32_t i=0;i<num_rows;i++)
00387             cache_kernel_row(rows[i]);
00388 #ifndef WIN32
00389     }
00390     else
00391     {
00392         // fill up kernel cache
00393         int32_t* uncached_rows = new int32_t[num_rows];
00394         KERNELCACHE_ELEM** cache = new KERNELCACHE_ELEM*[num_rows];
00395         pthread_t* threads = new pthread_t[parallel->get_num_threads()-1];
00396         S_KTHREAD_PARAM* params = new S_KTHREAD_PARAM[parallel->get_num_threads()-1];
00397         int32_t num_threads=parallel->get_num_threads()-1;
00398         int32_t num_vec=get_num_vec_lhs();
00399         ASSERT(num_vec>0);
00400         uint8_t* needs_computation=new uint8_t[num_vec];
00401         memset(needs_computation, 0, sizeof(uint8_t)*num_vec);
00402         int32_t step=0;
00403         int32_t num=0;
00404         int32_t end=0;
00405 
00406         // allocate cachelines if necessary
00407         for (int32_t i=0; i<num_rows; i++)
00408         {
00409             int32_t idx=rows[i];
00410             if (kernel_cache_check(idx))
00411                 continue;
00412 
00413             if (idx>=num_vec)
00414                 idx=2*num_vec-1-idx;
00415 
00416             needs_computation[idx]=1;
00417             uncached_rows[num]=idx;
00418             cache[num]= kernel_cache_clean_and_malloc(idx);
00419 
00420             if (!cache[num])
00421                 SG_ERROR("Kernel cache full! => increase cache size\n");
00422 
00423             num++;
00424         }
00425 
00426         if (num>0)
00427         {
00428             step= num/parallel->get_num_threads();
00429 
00430             if (step<1)
00431             {
00432                 num_threads=num-1;
00433                 step=1;
00434             }
00435 
00436             for (int32_t t=0; t<num_threads; t++)
00437             {
00438                 params[t].kernel = this;
00439                 params[t].kernel_cache = &kernel_cache;
00440                 params[t].cache = cache;
00441                 params[t].uncached_rows = uncached_rows;
00442                 params[t].needs_computation = needs_computation;
00443                 params[t].num_uncached = num;
00444                 params[t].start = t*step;
00445                 params[t].end = (t+1)*step;
00446                 params[t].num_vectors = get_num_vec_lhs();
00447                 end=params[t].end;
00448 
00449                 if (pthread_create(&threads[t], NULL, CKernel::cache_multiple_kernel_row_helper, (void*)&params[t]) != 0)
00450                 {
00451                     num_threads=t;
00452                     end=t*step;
00453                     SG_WARNING("thread creation failed\n");
00454                     break;
00455                 }
00456             }
00457         }
00458         else
00459             num_threads=-1;
00460 
00461 
00462         S_KTHREAD_PARAM last_param;
00463         last_param.kernel = this;
00464         last_param.kernel_cache = &kernel_cache;
00465         last_param.cache = cache;
00466         last_param.uncached_rows = uncached_rows;
00467         last_param.needs_computation = needs_computation;
00468         last_param.start = end;
00469         last_param.num_uncached = num;
00470         last_param.end = num;
00471         last_param.num_vectors = get_num_vec_lhs();
00472 
00473         cache_multiple_kernel_row_helper(&last_param);
00474 
00475 
00476         for (int32_t t=0; t<num_threads; t++)
00477         {
00478             if (pthread_join(threads[t], NULL) != 0)
00479                 SG_WARNING( "pthread_join failed\n");
00480         }
00481 
00482         delete[] needs_computation;
00483         delete[] params;
00484         delete[] threads;
00485         delete[] cache;
00486         delete[] uncached_rows;
00487     }
00488 #endif
00489 }
00490 
00491 // remove numshrink columns in the cache
00492 // which correspond to examples marked
00493 void CKernel::kernel_cache_shrink(
00494     int32_t totdoc, int32_t numshrink, int32_t *after)
00495 {
00496     register int32_t i,j,jj,scount;     // 0 in after.
00497     KERNELCACHE_IDX from=0,to=0;
00498     int32_t *keep;
00499 
00500     keep=new int32_t[totdoc];
00501     for(j=0;j<totdoc;j++) {
00502         keep[j]=1;
00503     }
00504     scount=0;
00505     for(jj=0;(jj<kernel_cache.activenum) && (scount<numshrink);jj++) {
00506         j=kernel_cache.active2totdoc[jj];
00507         if(!after[j]) {
00508             scount++;
00509             keep[j]=0;
00510         }
00511     }
00512 
00513     for(i=0;i<kernel_cache.max_elems;i++) {
00514         for(jj=0;jj<kernel_cache.activenum;jj++) {
00515             j=kernel_cache.active2totdoc[jj];
00516             if(!keep[j]) {
00517                 from++;
00518             }
00519             else {
00520                 kernel_cache.buffer[to]=kernel_cache.buffer[from];
00521                 to++;
00522                 from++;
00523             }
00524         }
00525     }
00526 
00527     kernel_cache.activenum=0;
00528     for(j=0;j<totdoc;j++) {
00529         if((keep[j]) && (kernel_cache.totdoc2active[j] != -1)) {
00530             kernel_cache.active2totdoc[kernel_cache.activenum]=j;
00531             kernel_cache.totdoc2active[j]=kernel_cache.activenum;
00532             kernel_cache.activenum++;
00533         }
00534         else {
00535             kernel_cache.totdoc2active[j]=-1;
00536         }
00537     }
00538 
00539     kernel_cache.max_elems=
00540         (int32_t)(kernel_cache.buffsize/kernel_cache.activenum);
00541     if(kernel_cache.max_elems>totdoc) {
00542         kernel_cache.max_elems=totdoc;
00543     }
00544 
00545     delete[] keep;
00546 
00547 }
00548 
00549 void CKernel::kernel_cache_reset_lru()
00550 {
00551     int32_t maxlru=0,k;
00552 
00553     for(k=0;k<kernel_cache.max_elems;k++) {
00554         if(maxlru < kernel_cache.lru[k])
00555             maxlru=kernel_cache.lru[k];
00556     }
00557     for(k=0;k<kernel_cache.max_elems;k++) {
00558         kernel_cache.lru[k]-=maxlru;
00559     }
00560 }
00561 
00562 void CKernel::kernel_cache_cleanup()
00563 {
00564     delete[] kernel_cache.index;
00565     delete[] kernel_cache.occu;
00566     delete[] kernel_cache.lru;
00567     delete[] kernel_cache.invindex;
00568     delete[] kernel_cache.active2totdoc;
00569     delete[] kernel_cache.totdoc2active;
00570     delete[] kernel_cache.buffer;
00571     memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE));
00572 }
00573 
00574 int32_t CKernel::kernel_cache_malloc()
00575 {
00576   int32_t i;
00577 
00578   if(kernel_cache_space_available()) {
00579     for(i=0;i<kernel_cache.max_elems;i++) {
00580       if(!kernel_cache.occu[i]) {
00581     kernel_cache.occu[i]=1;
00582     kernel_cache.elems++;
00583     return(i);
00584       }
00585     }
00586   }
00587   return(-1);
00588 }
00589 
00590 void CKernel::kernel_cache_free(int32_t cacheidx)
00591 {
00592     kernel_cache.occu[cacheidx]=0;
00593     kernel_cache.elems--;
00594 }
00595 
00596 // remove least recently used cache
00597 // element
00598 int32_t CKernel::kernel_cache_free_lru()
00599 {
00600   register int32_t k,least_elem=-1,least_time;
00601 
00602   least_time=kernel_cache.time+1;
00603   for(k=0;k<kernel_cache.max_elems;k++) {
00604     if(kernel_cache.invindex[k] != -1) {
00605       if(kernel_cache.lru[k]<least_time) {
00606     least_time=kernel_cache.lru[k];
00607     least_elem=k;
00608       }
00609     }
00610   }
00611 
00612   if(least_elem != -1) {
00613     kernel_cache_free(least_elem);
00614     kernel_cache.index[kernel_cache.invindex[least_elem]]=-1;
00615     kernel_cache.invindex[least_elem]=-1;
00616     return(1);
00617   }
00618   return(0);
00619 }
00620 
00621 // Get a free cache entry. In case cache is full, the lru
00622 // element is removed.
00623 KERNELCACHE_ELEM* CKernel::kernel_cache_clean_and_malloc(int32_t cacheidx)
00624 {
00625     int32_t result;
00626     if((result = kernel_cache_malloc()) == -1) {
00627         if(kernel_cache_free_lru()) {
00628             result = kernel_cache_malloc();
00629         }
00630     }
00631     kernel_cache.index[cacheidx]=result;
00632     if(result == -1) {
00633         return(0);
00634     }
00635     kernel_cache.invindex[result]=cacheidx;
00636     kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time; // lru
00637     return &kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[cacheidx]];
00638 }
00639 #endif //USE_SVMLIGHT
00640 
00641 void CKernel::load(CFile* loader)
00642 {
00643     SG_SET_LOCALE_C;
00644     SG_RESET_LOCALE;
00645 }
00646 
00647 void CKernel::save(CFile* writer)
00648 {
00649     int32_t m,n;
00650     float64_t* km=get_kernel_matrix<float64_t>(m,n, NULL);
00651     SG_SET_LOCALE_C;
00652     writer->set_real_matrix(km, m,n);
00653     delete[] km;
00654     SG_RESET_LOCALE;
00655 }
00656 
00657 void CKernel::remove_lhs_and_rhs()
00658 {
00659     if (rhs!=lhs)
00660         SG_UNREF(rhs);
00661     rhs = NULL;
00662     num_rhs=0;
00663 
00664     SG_UNREF(lhs);
00665     lhs = NULL;
00666     num_lhs=0;
00667     lhs_equals_rhs=false;
00668 
00669 #ifdef USE_SVMLIGHT
00670     cache_reset();
00671 #endif //USE_SVMLIGHT
00672 }
00673 
00674 void CKernel::remove_lhs()
00675 {
00676     if (rhs==lhs)
00677         rhs=NULL;
00678     SG_UNREF(lhs);
00679     lhs = NULL;
00680     num_lhs=NULL;
00681     lhs_equals_rhs=false;
00682 #ifdef USE_SVMLIGHT
00683     cache_reset();
00684 #endif //USE_SVMLIGHT
00685 }
00686 
00688 void CKernel::remove_rhs()
00689 {
00690     if (rhs!=lhs)
00691         SG_UNREF(rhs);
00692     rhs = NULL;
00693     num_rhs=NULL;
00694     lhs_equals_rhs=false;
00695 
00696 #ifdef USE_SVMLIGHT
00697     cache_reset();
00698 #endif //USE_SVMLIGHT
00699 }
00700 
00701 #define ENUM_CASE(n) case n: SG_INFO(#n " "); break;
00702 
00703 void CKernel::list_kernel()
00704 {
00705     SG_INFO( "%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
00706             get_combined_kernel_weight(),
00707             get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
00708             "SLOWBUTMEMEFFICIENT");
00709 
00710     switch (get_kernel_type())
00711     {
00712         ENUM_CASE(K_UNKNOWN)
00713         ENUM_CASE(K_LINEAR)
00714         ENUM_CASE(K_POLY)
00715         ENUM_CASE(K_GAUSSIAN)
00716         ENUM_CASE(K_GAUSSIANSHIFT)
00717         ENUM_CASE(K_GAUSSIANMATCH)
00718         ENUM_CASE(K_HISTOGRAM)
00719         ENUM_CASE(K_SALZBERG)
00720         ENUM_CASE(K_LOCALITYIMPROVED)
00721         ENUM_CASE(K_SIMPLELOCALITYIMPROVED)
00722         ENUM_CASE(K_FIXEDDEGREE)
00723         ENUM_CASE(K_WEIGHTEDDEGREE)
00724         ENUM_CASE(K_WEIGHTEDDEGREEPOS)
00725         ENUM_CASE(K_WEIGHTEDDEGREERBF)
00726         ENUM_CASE(K_WEIGHTEDCOMMWORDSTRING)
00727         ENUM_CASE(K_POLYMATCH)
00728         ENUM_CASE(K_ALIGNMENT)
00729         ENUM_CASE(K_COMMWORDSTRING)
00730         ENUM_CASE(K_COMMULONGSTRING)
00731         ENUM_CASE(K_SPECTRUMMISMATCHRBF)
00732         ENUM_CASE(K_COMBINED)
00733         ENUM_CASE(K_AUC)
00734         ENUM_CASE(K_CUSTOM)
00735         ENUM_CASE(K_SIGMOID)
00736         ENUM_CASE(K_CHI2)
00737         ENUM_CASE(K_DIAG)
00738         ENUM_CASE(K_CONST)
00739         ENUM_CASE(K_DISTANCE)
00740         ENUM_CASE(K_LOCALALIGNMENT)
00741         ENUM_CASE(K_PYRAMIDCHI2)
00742         ENUM_CASE(K_OLIGO)
00743         ENUM_CASE(K_MATCHWORD)
00744         ENUM_CASE(K_TPPK)
00745         ENUM_CASE(K_REGULATORYMODULES)
00746         ENUM_CASE(K_SPARSESPATIALSAMPLE)
00747         ENUM_CASE(K_HISTOGRAMINTERSECTION)
00748     }
00749 
00750     switch (get_feature_class())
00751     {
00752         ENUM_CASE(C_UNKNOWN)
00753         ENUM_CASE(C_SIMPLE)
00754         ENUM_CASE(C_SPARSE)
00755         ENUM_CASE(C_STRING)
00756         ENUM_CASE(C_COMBINED)
00757         ENUM_CASE(C_COMBINED_DOT)
00758         ENUM_CASE(C_WD)
00759         ENUM_CASE(C_SPEC)
00760         ENUM_CASE(C_WEIGHTEDSPEC)
00761         ENUM_CASE(C_POLY)
00762         ENUM_CASE(C_ANY)
00763     }
00764 
00765     switch (get_feature_type())
00766     {
00767         ENUM_CASE(F_UNKNOWN)
00768         ENUM_CASE(F_BOOL)
00769         ENUM_CASE(F_CHAR)
00770         ENUM_CASE(F_BYTE)
00771         ENUM_CASE(F_SHORT)
00772         ENUM_CASE(F_WORD)
00773         ENUM_CASE(F_INT)
00774         ENUM_CASE(F_UINT)
00775         ENUM_CASE(F_LONG)
00776         ENUM_CASE(F_ULONG)
00777         ENUM_CASE(F_SHORTREAL)
00778         ENUM_CASE(F_DREAL)
00779         ENUM_CASE(F_LONGREAL)
00780         ENUM_CASE(F_ANY)
00781     }
00782     SG_INFO( "\n");
00783 }
00784 #undef ENUM_CASE
00785 
00786 bool CKernel::init_optimization(
00787     int32_t count, int32_t *IDX, float64_t * weights)
00788 {
00789    SG_ERROR( "kernel does not support linadd optimization\n");
00790     return false ;
00791 }
00792 
00793 bool CKernel::delete_optimization()
00794 {
00795    SG_ERROR( "kernel does not support linadd optimization\n");
00796     return false;
00797 }
00798 
00799 float64_t CKernel::compute_optimized(int32_t vector_idx)
00800 {
00801    SG_ERROR( "kernel does not support linadd optimization\n");
00802     return 0;
00803 }
00804 
00805 void CKernel::compute_batch(
00806     int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec,
00807     int32_t* IDX, float64_t* weights, float64_t factor)
00808 {
00809    SG_ERROR( "kernel does not support batch computation\n");
00810 }
00811 
00812 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight)
00813 {
00814    SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n");
00815 }
00816 
00817 void CKernel::clear_normal()
00818 {
00819    SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n");
00820 }
00821 
00822 int32_t CKernel::get_num_subkernels()
00823 {
00824     return 1;
00825 }
00826 
00827 void CKernel::compute_by_subkernel(
00828     int32_t vector_idx, float64_t * subkernel_contrib)
00829 {
00830    SG_ERROR( "kernel compute_by_subkernel not implemented\n");
00831 }
00832 
00833 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights)
00834 {
00835     num_weights=1 ;
00836     return &combined_kernel_weight ;
00837 }
00838 
00839 void CKernel::set_subkernel_weights(float64_t* weights, int32_t num_weights)
00840 {
00841     combined_kernel_weight = weights[0] ;
00842     if (num_weights!=1)
00843       SG_ERROR( "number of subkernel weights should be one ...\n");
00844 }
00845 
00846 bool CKernel::init_optimization_svm(CSVM * svm)
00847 {
00848     int32_t num_suppvec=svm->get_num_support_vectors();
00849     int32_t* sv_idx=new int32_t[num_suppvec];
00850     float64_t* sv_weight=new float64_t[num_suppvec];
00851 
00852     for (int32_t i=0; i<num_suppvec; i++)
00853     {
00854         sv_idx[i]    = svm->get_support_vector(i);
00855         sv_weight[i] = svm->get_alpha(i);
00856     }
00857     bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
00858 
00859     delete[] sv_idx;
00860     delete[] sv_weight;
00861     return ret;
00862 }
00863 
00864 void CKernel::load_serializable_post() throw (ShogunException)
00865 {
00866     CSGObject::load_serializable_post();
00867     if (lhs_equals_rhs)
00868         rhs=lhs;
00869 }
00870 
00871 void CKernel::save_serializable_pre() throw (ShogunException)
00872 {
00873     CSGObject::save_serializable_pre();
00874 
00875     if (lhs_equals_rhs)
00876         rhs=NULL;
00877 }
00878 
00879 void CKernel::save_serializable_post() throw (ShogunException)
00880 {
00881     CSGObject::save_serializable_post();
00882 
00883     if (lhs_equals_rhs)
00884         rhs=lhs;
00885 }
00886 
00887 void CKernel::init()
00888 {
00889     cache_size=10;
00890     kernel_matrix=NULL;
00891     lhs=NULL;
00892     rhs=NULL;
00893     num_lhs=0;
00894     num_rhs=0;
00895     combined_kernel_weight=1;
00896     optimization_initialized=false;
00897     opt_type=FASTBUTMEMHUNGRY;
00898     properties=KP_NONE;
00899     normalizer=NULL;
00900 
00901 #ifdef USE_SVMLIGHT
00902     memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE));
00903 #endif //USE_SVMLIGHT
00904 
00905     set_normalizer(new CIdentityKernelNormalizer());
00906 
00907     m_parameters->add(&cache_size, "cache_size",
00908                       "Cache size in MB.");
00909     m_parameters->add((CSGObject**) &lhs, "lhs",
00910                       "Feature vectors to occur on left hand side.");
00911     m_parameters->add((CSGObject**) &rhs, "rhs",
00912                       "Feature vectors to occur on right hand side.");
00913     m_parameters->add(&lhs_equals_rhs, "lhs_equals_rhs",
00914                       "If features on lhs are the same as on rhs.");
00915     m_parameters->add(&num_lhs, "num_lhs",
00916                       "Number of feature vectors on left hand side.");
00917     m_parameters->add(&num_rhs, "num_rhs",
00918                       "Number of feature vectors on right hand side.");
00919     m_parameters->add(&combined_kernel_weight, "combined_kernel_weight",
00920                       "Combined kernel weight.");
00921     m_parameters->add(&optimization_initialized,
00922                       "optimization_initialized",
00923                       "Optimization is initialized.");
00924     m_parameters->add((machine_int_t*) &opt_type, "opt_type",
00925                       "Optimization type.");
00926     m_parameters->add(&properties, "properties",
00927                       "Kernel properties.");
00928     m_parameters->add((CSGObject**) &normalizer, "normalizer",
00929                       "Normalize the kernel.");
00930 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation