Kernel.cpp

Go to the documentation of this file.
00001 /*
00002  * EXCEPT FOR THE KERNEL CACHING FUNCTIONS WHICH ARE (W) THORSTEN JOACHIMS
00003  * COPYRIGHT (C) 1999  UNIVERSITAET DORTMUND - ALL RIGHTS RESERVED
00004  *
00005  * this program is free software; you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation; either version 3 of the License, or
00008  * (at your option) any later version.
00009  *
00010  * Written (W) 1999-2009 Soeren Sonnenburg
00011  * Written (W) 1999-2008 Gunnar Raetsch
00012  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00013  */
00014 
00015 #include <shogun/lib/config.h>
00016 #include <shogun/lib/common.h>
00017 #include <shogun/io/SGIO.h>
00018 #include <shogun/io/File.h>
00019 #include <shogun/lib/Time.h>
00020 #include <shogun/lib/Signal.h>
00021 
00022 #include <shogun/base/Parallel.h>
00023 
00024 #include <shogun/kernel/Kernel.h>
00025 #include <shogun/kernel/normalizer/IdentityKernelNormalizer.h>
00026 #include <shogun/features/Features.h>
00027 #include <shogun/base/Parameter.h>
00028 
00029 #include <shogun/classifier/svm/SVM.h>
00030 
00031 #include <string.h>
00032 #include <unistd.h>
00033 #include <math.h>
00034 
00035 #ifdef HAVE_PTHREAD
00036 #include <pthread.h>
00037 #endif
00038 
00039 using namespace shogun;
00040 
00041 CKernel::CKernel() : CSGObject()
00042 {
00043     init();
00044     register_params();
00045 }
00046 
00047 CKernel::CKernel(int32_t size) : CSGObject()
00048 {
00049     init();
00050 
00051     if (size<10)
00052         size=10;
00053 
00054     cache_size=size;
00055     register_params();
00056 }
00057 
00058 
00059 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject()
00060 {
00061     init();
00062 
00063     if (size<10)
00064         size=10;
00065 
00066     cache_size=size;
00067 
00068     set_normalizer(new CIdentityKernelNormalizer());
00069     init(p_lhs, p_rhs);
00070     register_params();
00071 }
00072 
00073 CKernel::~CKernel()
00074 {
00075     if (get_is_initialized())
00076         SG_ERROR("Kernel still initialized on destruction.\n");
00077 
00078     remove_lhs_and_rhs();
00079     SG_UNREF(normalizer);
00080 
00081     SG_INFO("Kernel deleted (%p).\n", this);
00082 }
00083 
00084 #ifdef USE_SVMLIGHT
00085 void CKernel::resize_kernel_cache(KERNELCACHE_IDX size, bool regression_hack)
00086 {
00087     if (size<10)
00088         size=10;
00089 
00090     kernel_cache_cleanup();
00091     cache_size=size;
00092 
00093     if (has_features() && get_num_vec_lhs())
00094         kernel_cache_init(cache_size, regression_hack);
00095 }
00096 #endif //USE_SVMLIGHT
00097 
00098 bool CKernel::init(CFeatures* l, CFeatures* r)
00099 {
00100     SG_DEBUG("entering CKernel::init(%p, %p)\n", l, r);
00101 
00102     /* make sure that features are not deleted if same ones are used */
00103     SG_REF(l);
00104     SG_REF(r);
00105 
00106     //make sure features were indeed supplied
00107     REQUIRE(l, "CKernel::init(%p, %p): LHS features required!\n", l, r);
00108     REQUIRE(r, "CKernel::init(%p, %p): RHS features required!\n", l, r);
00109 
00110     //make sure features are compatible
00111     ASSERT(l->get_feature_class()==r->get_feature_class());
00112     ASSERT(l->get_feature_type()==r->get_feature_type());
00113 
00114     //remove references to previous features
00115     remove_lhs_and_rhs();
00116 
00117     //increase reference counts
00118     SG_REF(l);
00119     if (l==r)
00120         lhs_equals_rhs=true;
00121     else // l!=r
00122         SG_REF(r);
00123 
00124     lhs=l;
00125     rhs=r;
00126 
00127     ASSERT(!num_lhs || num_lhs==l->get_num_vectors());
00128     ASSERT(!num_rhs || num_rhs==l->get_num_vectors());
00129 
00130     num_lhs=l->get_num_vectors();
00131     num_rhs=r->get_num_vectors();
00132 
00133     /* unref "safety" refs from beginning */
00134     SG_UNREF(r);
00135     SG_UNREF(l);
00136 
00137     SG_DEBUG("leaving CKernel::init(%p, %p)\n", l, r);
00138     return true;
00139 }
00140 
00141 bool CKernel::set_normalizer(CKernelNormalizer* n)
00142 {
00143     SG_REF(n);
00144     if (lhs && rhs)
00145         n->init(this);
00146 
00147     SG_UNREF(normalizer);
00148     normalizer=n;
00149 
00150     return (normalizer!=NULL);
00151 }
00152 
00153 CKernelNormalizer* CKernel::get_normalizer()
00154 {
00155     SG_REF(normalizer)
00156     return normalizer;
00157 }
00158 
00159 bool CKernel::init_normalizer()
00160 {
00161     return normalizer->init(this);
00162 }
00163 
00164 void CKernel::cleanup()
00165 {
00166     remove_lhs_and_rhs();
00167 }
00168 
00169 #ifdef USE_SVMLIGHT
00170 /****************************** Cache handling *******************************/
00171 
00172 void CKernel::kernel_cache_init(int32_t buffsize, bool regression_hack)
00173 {
00174     int32_t totdoc=get_num_vec_lhs();
00175     if (totdoc<=0)
00176     {
00177         SG_ERROR("kernel has zero rows: num_lhs=%d num_rhs=%d\n",
00178                 get_num_vec_lhs(), get_num_vec_rhs());
00179     }
00180     uint64_t buffer_size=0;
00181     int32_t i;
00182 
00183     //in regression the additional constraints are made by doubling the training data
00184     if (regression_hack)
00185         totdoc*=2;
00186 
00187     buffer_size=((uint64_t) buffsize)*1024*1024/sizeof(KERNELCACHE_ELEM);
00188     if (buffer_size>((uint64_t) totdoc)*totdoc)
00189         buffer_size=((uint64_t) totdoc)*totdoc;
00190 
00191     SG_INFO( "using a kernel cache of size %lld MB (%lld bytes) for %s Kernel\n", buffer_size*sizeof(KERNELCACHE_ELEM)/1024/1024, buffer_size*sizeof(KERNELCACHE_ELEM), get_name());
00192 
00193     //make sure it fits in the *signed* KERNELCACHE_IDX type
00194     ASSERT(buffer_size < (((uint64_t) 1) << (sizeof(KERNELCACHE_IDX)*8-1)));
00195 
00196     kernel_cache.index = SG_MALLOC(int32_t, totdoc);
00197     kernel_cache.occu = SG_MALLOC(int32_t, totdoc);
00198     kernel_cache.lru = SG_MALLOC(int32_t, totdoc);
00199     kernel_cache.invindex = SG_MALLOC(int32_t, totdoc);
00200     kernel_cache.active2totdoc = SG_MALLOC(int32_t, totdoc);
00201     kernel_cache.totdoc2active = SG_MALLOC(int32_t, totdoc);
00202     kernel_cache.buffer = SG_MALLOC(KERNELCACHE_ELEM, buffer_size);
00203     kernel_cache.buffsize=buffer_size;
00204     kernel_cache.max_elems=(int32_t) (kernel_cache.buffsize/totdoc);
00205 
00206     if(kernel_cache.max_elems>totdoc) {
00207         kernel_cache.max_elems=totdoc;
00208     }
00209 
00210     kernel_cache.elems=0;   // initialize cache
00211     for(i=0;i<totdoc;i++) {
00212         kernel_cache.index[i]=-1;
00213         kernel_cache.lru[i]=0;
00214     }
00215     for(i=0;i<totdoc;i++) {
00216         kernel_cache.occu[i]=0;
00217         kernel_cache.invindex[i]=-1;
00218     }
00219 
00220     kernel_cache.activenum=totdoc;;
00221     for(i=0;i<totdoc;i++) {
00222         kernel_cache.active2totdoc[i]=i;
00223         kernel_cache.totdoc2active[i]=i;
00224     }
00225 
00226     kernel_cache.time=0;
00227 }
00228 
00229 void CKernel::get_kernel_row(
00230     int32_t docnum, int32_t *active2dnum, float64_t *buffer, bool full_line)
00231 {
00232     int32_t i,j;
00233     KERNELCACHE_IDX start;
00234 
00235     int32_t num_vectors = get_num_vec_lhs();
00236     if (docnum>=num_vectors)
00237         docnum=2*num_vectors-1-docnum;
00238 
00239     /* is cached? */
00240     if(kernel_cache.index[docnum] != -1)
00241     {
00242         kernel_cache.lru[kernel_cache.index[docnum]]=kernel_cache.time; /* lru */
00243         start=((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[docnum];
00244 
00245         if (full_line)
00246         {
00247             for(j=0;j<get_num_vec_lhs();j++)
00248             {
00249                 if(kernel_cache.totdoc2active[j] >= 0)
00250                     buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]];
00251                 else
00252                     buffer[j]=(float64_t) kernel(docnum, j);
00253             }
00254         }
00255         else
00256         {
00257             for(i=0;(j=active2dnum[i])>=0;i++)
00258             {
00259                 if(kernel_cache.totdoc2active[j] >= 0)
00260                     buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]];
00261                 else
00262                 {
00263                     int32_t k=j;
00264                     if (k>=num_vectors)
00265                         k=2*num_vectors-1-k;
00266                     buffer[j]=(float64_t) kernel(docnum, k);
00267                 }
00268             }
00269         }
00270     }
00271     else
00272     {
00273         if (full_line)
00274         {
00275             for(j=0;j<get_num_vec_lhs();j++)
00276                 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, j);
00277         }
00278         else
00279         {
00280             for(i=0;(j=active2dnum[i])>=0;i++)
00281             {
00282                 int32_t k=j;
00283                 if (k>=num_vectors)
00284                     k=2*num_vectors-1-k;
00285                 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, k);
00286             }
00287         }
00288     }
00289 }
00290 
00291 
00292 // Fills cache for the row m
00293 void CKernel::cache_kernel_row(int32_t m)
00294 {
00295     register int32_t j,k,l;
00296     register KERNELCACHE_ELEM *cache;
00297 
00298     int32_t num_vectors = get_num_vec_lhs();
00299 
00300     if (m>=num_vectors)
00301         m=2*num_vectors-1-m;
00302 
00303     if(!kernel_cache_check(m))   // not cached yet
00304     {
00305         cache = kernel_cache_clean_and_malloc(m);
00306         if(cache) {
00307             l=kernel_cache.totdoc2active[m];
00308 
00309             for(j=0;j<kernel_cache.activenum;j++)  // fill cache
00310             {
00311                 k=kernel_cache.active2totdoc[j];
00312 
00313                 if((kernel_cache.index[k] != -1) && (l != -1) && (k != m)) {
00314                     cache[j]=kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)
00315                         *kernel_cache.index[k]+l];
00316                 }
00317                 else
00318                 {
00319                     if (k>=num_vectors)
00320                         k=2*num_vectors-1-k;
00321 
00322                     cache[j]=kernel(m, k);
00323                 }
00324             }
00325         }
00326         else
00327             perror("Error: Kernel cache full! => increase cache size");
00328     }
00329 }
00330 
00331 
00332 void* CKernel::cache_multiple_kernel_row_helper(void* p)
00333 {
00334     int32_t j,k,l;
00335     S_KTHREAD_PARAM* params = (S_KTHREAD_PARAM*) p;
00336 
00337     for (int32_t i=params->start; i<params->end; i++)
00338     {
00339         KERNELCACHE_ELEM* cache=params->cache[i];
00340         int32_t m = params->uncached_rows[i];
00341         l=params->kernel_cache->totdoc2active[m];
00342 
00343         for(j=0;j<params->kernel_cache->activenum;j++)  // fill cache
00344         {
00345             k=params->kernel_cache->active2totdoc[j];
00346 
00347             if((params->kernel_cache->index[k] != -1) && (l != -1) && (!params->needs_computation[k])) {
00348                 cache[j]=params->kernel_cache->buffer[((KERNELCACHE_IDX) params->kernel_cache->activenum)
00349                     *params->kernel_cache->index[k]+l];
00350             }
00351             else
00352                 {
00353                     if (k>=params->num_vectors)
00354                         k=2*params->num_vectors-1-k;
00355 
00356                     cache[j]=params->kernel->kernel(m, k);
00357                 }
00358         }
00359 
00360         //now line m is cached
00361         params->needs_computation[m]=0;
00362     }
00363     return NULL;
00364 }
00365 
00366 // Fills cache for the rows in key
00367 void CKernel::cache_multiple_kernel_rows(int32_t* rows, int32_t num_rows)
00368 {
00369 #ifdef HAVE_PTHREAD
00370     int32_t nthreads=parallel->get_num_threads();
00371 
00372     if (nthreads<2)
00373     {
00374 #endif
00375         for(int32_t i=0;i<num_rows;i++)
00376             cache_kernel_row(rows[i]);
00377 #ifdef HAVE_PTHREAD
00378     }
00379     else
00380     {
00381         // fill up kernel cache
00382         int32_t* uncached_rows = SG_MALLOC(int32_t, num_rows);
00383         KERNELCACHE_ELEM** cache = SG_MALLOC(KERNELCACHE_ELEM*, num_rows);
00384         pthread_t* threads = SG_MALLOC(pthread_t, nthreads-1);
00385         S_KTHREAD_PARAM* params = SG_MALLOC(S_KTHREAD_PARAM, nthreads-1);
00386         int32_t num_threads=nthreads-1;
00387         int32_t num_vec=get_num_vec_lhs();
00388         ASSERT(num_vec>0);
00389         uint8_t* needs_computation=SG_CALLOC(uint8_t, num_vec);
00390 
00391         int32_t step=0;
00392         int32_t num=0;
00393         int32_t end=0;
00394 
00395         // allocate cachelines if necessary
00396         for (int32_t i=0; i<num_rows; i++)
00397         {
00398             int32_t idx=rows[i];
00399             if (idx>=num_vec)
00400                 idx=2*num_vec-1-idx;
00401 
00402             if (kernel_cache_check(idx))
00403                 continue;
00404 
00405             needs_computation[idx]=1;
00406             uncached_rows[num]=idx;
00407             cache[num]= kernel_cache_clean_and_malloc(idx);
00408 
00409             if (!cache[num])
00410                 SG_ERROR("Kernel cache full! => increase cache size\n");
00411 
00412             num++;
00413         }
00414 
00415         if (num>0)
00416         {
00417             step= num/nthreads;
00418 
00419             if (step<1)
00420             {
00421                 num_threads=num-1;
00422                 step=1;
00423             }
00424 
00425             for (int32_t t=0; t<num_threads; t++)
00426             {
00427                 params[t].kernel = this;
00428                 params[t].kernel_cache = &kernel_cache;
00429                 params[t].cache = cache;
00430                 params[t].uncached_rows = uncached_rows;
00431                 params[t].needs_computation = needs_computation;
00432                 params[t].num_uncached = num;
00433                 params[t].start = t*step;
00434                 params[t].end = (t+1)*step;
00435                 params[t].num_vectors = get_num_vec_lhs();
00436                 end=params[t].end;
00437 
00438                 int code=pthread_create(&threads[t], NULL,
00439                         CKernel::cache_multiple_kernel_row_helper, (void*)&params[t]);
00440 
00441                 if (code != 0)
00442                 {
00443                     SG_WARNING("Thread creation failed (thread %d of %d) "
00444                             "with error:'%s'\n",t, num_threads, strerror(code));
00445                     num_threads=t;
00446                     end=t*step;
00447                     break;
00448                 }
00449             }
00450         }
00451         else
00452             num_threads=-1;
00453 
00454 
00455         S_KTHREAD_PARAM last_param;
00456         last_param.kernel = this;
00457         last_param.kernel_cache = &kernel_cache;
00458         last_param.cache = cache;
00459         last_param.uncached_rows = uncached_rows;
00460         last_param.needs_computation = needs_computation;
00461         last_param.start = end;
00462         last_param.num_uncached = num;
00463         last_param.end = num;
00464         last_param.num_vectors = get_num_vec_lhs();
00465 
00466         cache_multiple_kernel_row_helper(&last_param);
00467 
00468 
00469         for (int32_t t=0; t<num_threads; t++)
00470         {
00471             if (pthread_join(threads[t], NULL) != 0)
00472                 SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads);
00473         }
00474 
00475         SG_FREE(needs_computation);
00476         SG_FREE(params);
00477         SG_FREE(threads);
00478         SG_FREE(cache);
00479         SG_FREE(uncached_rows);
00480     }
00481 #endif
00482 }
00483 
00484 // remove numshrink columns in the cache
00485 // which correspond to examples marked
00486 void CKernel::kernel_cache_shrink(
00487     int32_t totdoc, int32_t numshrink, int32_t *after)
00488 {
00489     register int32_t i,j,jj,scount;     // 0 in after.
00490     KERNELCACHE_IDX from=0,to=0;
00491     int32_t *keep;
00492 
00493     keep=SG_MALLOC(int32_t, totdoc);
00494     for(j=0;j<totdoc;j++) {
00495         keep[j]=1;
00496     }
00497     scount=0;
00498     for(jj=0;(jj<kernel_cache.activenum) && (scount<numshrink);jj++) {
00499         j=kernel_cache.active2totdoc[jj];
00500         if(!after[j]) {
00501             scount++;
00502             keep[j]=0;
00503         }
00504     }
00505 
00506     for(i=0;i<kernel_cache.max_elems;i++) {
00507         for(jj=0;jj<kernel_cache.activenum;jj++) {
00508             j=kernel_cache.active2totdoc[jj];
00509             if(!keep[j]) {
00510                 from++;
00511             }
00512             else {
00513                 kernel_cache.buffer[to]=kernel_cache.buffer[from];
00514                 to++;
00515                 from++;
00516             }
00517         }
00518     }
00519 
00520     kernel_cache.activenum=0;
00521     for(j=0;j<totdoc;j++) {
00522         if((keep[j]) && (kernel_cache.totdoc2active[j] != -1)) {
00523             kernel_cache.active2totdoc[kernel_cache.activenum]=j;
00524             kernel_cache.totdoc2active[j]=kernel_cache.activenum;
00525             kernel_cache.activenum++;
00526         }
00527         else {
00528             kernel_cache.totdoc2active[j]=-1;
00529         }
00530     }
00531 
00532     kernel_cache.max_elems=
00533         (int32_t)(kernel_cache.buffsize/kernel_cache.activenum);
00534     if(kernel_cache.max_elems>totdoc) {
00535         kernel_cache.max_elems=totdoc;
00536     }
00537 
00538     SG_FREE(keep);
00539 
00540 }
00541 
00542 void CKernel::kernel_cache_reset_lru()
00543 {
00544     int32_t maxlru=0,k;
00545 
00546     for(k=0;k<kernel_cache.max_elems;k++) {
00547         if(maxlru < kernel_cache.lru[k])
00548             maxlru=kernel_cache.lru[k];
00549     }
00550     for(k=0;k<kernel_cache.max_elems;k++) {
00551         kernel_cache.lru[k]-=maxlru;
00552     }
00553 }
00554 
00555 void CKernel::kernel_cache_cleanup()
00556 {
00557     SG_FREE(kernel_cache.index);
00558     SG_FREE(kernel_cache.occu);
00559     SG_FREE(kernel_cache.lru);
00560     SG_FREE(kernel_cache.invindex);
00561     SG_FREE(kernel_cache.active2totdoc);
00562     SG_FREE(kernel_cache.totdoc2active);
00563     SG_FREE(kernel_cache.buffer);
00564     memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE));
00565 }
00566 
00567 int32_t CKernel::kernel_cache_malloc()
00568 {
00569   int32_t i;
00570 
00571   if(kernel_cache_space_available()) {
00572     for(i=0;i<kernel_cache.max_elems;i++) {
00573       if(!kernel_cache.occu[i]) {
00574     kernel_cache.occu[i]=1;
00575     kernel_cache.elems++;
00576     return(i);
00577       }
00578     }
00579   }
00580   return(-1);
00581 }
00582 
00583 void CKernel::kernel_cache_free(int32_t cacheidx)
00584 {
00585     kernel_cache.occu[cacheidx]=0;
00586     kernel_cache.elems--;
00587 }
00588 
00589 // remove least recently used cache
00590 // element
00591 int32_t CKernel::kernel_cache_free_lru()
00592 {
00593   register int32_t k,least_elem=-1,least_time;
00594 
00595   least_time=kernel_cache.time+1;
00596   for(k=0;k<kernel_cache.max_elems;k++) {
00597     if(kernel_cache.invindex[k] != -1) {
00598       if(kernel_cache.lru[k]<least_time) {
00599     least_time=kernel_cache.lru[k];
00600     least_elem=k;
00601       }
00602     }
00603   }
00604 
00605   if(least_elem != -1) {
00606     kernel_cache_free(least_elem);
00607     kernel_cache.index[kernel_cache.invindex[least_elem]]=-1;
00608     kernel_cache.invindex[least_elem]=-1;
00609     return(1);
00610   }
00611   return(0);
00612 }
00613 
00614 // Get a free cache entry. In case cache is full, the lru
00615 // element is removed.
00616 KERNELCACHE_ELEM* CKernel::kernel_cache_clean_and_malloc(int32_t cacheidx)
00617 {
00618     int32_t result;
00619     if((result = kernel_cache_malloc()) == -1) {
00620         if(kernel_cache_free_lru()) {
00621             result = kernel_cache_malloc();
00622         }
00623     }
00624     kernel_cache.index[cacheidx]=result;
00625     if(result == -1) {
00626         return(0);
00627     }
00628     kernel_cache.invindex[result]=cacheidx;
00629     kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time; // lru
00630     return &kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[cacheidx]];
00631 }
00632 #endif //USE_SVMLIGHT
00633 
00634 void CKernel::load(CFile* loader)
00635 {
00636     SG_SET_LOCALE_C;
00637     SG_RESET_LOCALE;
00638 }
00639 
00640 void CKernel::save(CFile* writer)
00641 {
00642     SGMatrix<float64_t> k_matrix=get_kernel_matrix<float64_t>();
00643     SG_SET_LOCALE_C;
00644     writer->set_matrix(k_matrix.matrix, k_matrix.num_rows, k_matrix.num_cols);
00645     SG_RESET_LOCALE;
00646 }
00647 
00648 void CKernel::remove_lhs_and_rhs()
00649 {
00650     SG_DEBUG("entering CKernel::remove_lhs_and_rhs\n");
00651     if (rhs!=lhs)
00652         SG_UNREF(rhs);
00653     rhs = NULL;
00654     num_rhs=0;
00655 
00656     SG_UNREF(lhs);
00657     lhs = NULL;
00658     num_lhs=0;
00659     lhs_equals_rhs=false;
00660 
00661 #ifdef USE_SVMLIGHT
00662     cache_reset();
00663 #endif //USE_SVMLIGHT
00664     SG_DEBUG("leaving CKernel::remove_lhs_and_rhs\n");
00665 }
00666 
00667 void CKernel::remove_lhs()
00668 {
00669     if (rhs==lhs)
00670         rhs=NULL;
00671     SG_UNREF(lhs);
00672     lhs = NULL;
00673     num_lhs=0;
00674     lhs_equals_rhs=false;
00675 #ifdef USE_SVMLIGHT
00676     cache_reset();
00677 #endif //USE_SVMLIGHT
00678 }
00679 
00681 void CKernel::remove_rhs()
00682 {
00683     if (rhs!=lhs)
00684         SG_UNREF(rhs);
00685     rhs = NULL;
00686     num_rhs=0;
00687     lhs_equals_rhs=false;
00688 
00689 #ifdef USE_SVMLIGHT
00690     cache_reset();
00691 #endif //USE_SVMLIGHT
00692 }
00693 
00694 #define ENUM_CASE(n) case n: SG_INFO(#n " "); break;
00695 
00696 void CKernel::list_kernel()
00697 {
00698     SG_INFO( "%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
00699             get_combined_kernel_weight(),
00700             get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
00701             "SLOWBUTMEMEFFICIENT");
00702 
00703     switch (get_kernel_type())
00704     {
00705         ENUM_CASE(K_UNKNOWN)
00706         ENUM_CASE(K_LINEAR)
00707         ENUM_CASE(K_POLY)
00708         ENUM_CASE(K_GAUSSIAN)
00709         ENUM_CASE(K_GAUSSIANSHIFT)
00710         ENUM_CASE(K_GAUSSIANMATCH)
00711         ENUM_CASE(K_HISTOGRAM)
00712         ENUM_CASE(K_SALZBERG)
00713         ENUM_CASE(K_LOCALITYIMPROVED)
00714         ENUM_CASE(K_SIMPLELOCALITYIMPROVED)
00715         ENUM_CASE(K_FIXEDDEGREE)
00716         ENUM_CASE(K_WEIGHTEDDEGREE)
00717         ENUM_CASE(K_WEIGHTEDDEGREEPOS)
00718         ENUM_CASE(K_WEIGHTEDDEGREERBF)
00719         ENUM_CASE(K_WEIGHTEDCOMMWORDSTRING)
00720         ENUM_CASE(K_POLYMATCH)
00721         ENUM_CASE(K_ALIGNMENT)
00722         ENUM_CASE(K_COMMWORDSTRING)
00723         ENUM_CASE(K_COMMULONGSTRING)
00724         ENUM_CASE(K_SPECTRUMRBF)
00725         ENUM_CASE(K_COMBINED)
00726         ENUM_CASE(K_AUC)
00727         ENUM_CASE(K_CUSTOM)
00728         ENUM_CASE(K_SIGMOID)
00729         ENUM_CASE(K_CHI2)
00730         ENUM_CASE(K_DIAG)
00731         ENUM_CASE(K_CONST)
00732         ENUM_CASE(K_DISTANCE)
00733         ENUM_CASE(K_LOCALALIGNMENT)
00734         ENUM_CASE(K_PYRAMIDCHI2)
00735         ENUM_CASE(K_OLIGO)
00736         ENUM_CASE(K_MATCHWORD)
00737         ENUM_CASE(K_TPPK)
00738         ENUM_CASE(K_REGULATORYMODULES)
00739         ENUM_CASE(K_SPARSESPATIALSAMPLE)
00740         ENUM_CASE(K_HISTOGRAMINTERSECTION)
00741         ENUM_CASE(K_WAVELET)
00742         ENUM_CASE(K_WAVE)
00743         ENUM_CASE(K_CAUCHY)
00744         ENUM_CASE(K_TSTUDENT)
00745         ENUM_CASE(K_MULTIQUADRIC)
00746         ENUM_CASE(K_EXPONENTIAL)
00747         ENUM_CASE(K_RATIONAL_QUADRATIC)
00748         ENUM_CASE(K_POWER)
00749         ENUM_CASE(K_SPHERICAL)
00750         ENUM_CASE(K_LOG)
00751         ENUM_CASE(K_SPLINE)
00752         ENUM_CASE(K_ANOVA)
00753         ENUM_CASE(K_CIRCULAR)
00754         ENUM_CASE(K_INVERSEMULTIQUADRIC)
00755         ENUM_CASE(K_SPECTRUMMISMATCHRBF)
00756         ENUM_CASE(K_DISTANTSEGMENTS)
00757         ENUM_CASE(K_BESSEL)
00758         ENUM_CASE(K_JENSENSHANNON)
00759         ENUM_CASE(K_DIRECTOR)
00760         ENUM_CASE(K_PRODUCT)
00761         ENUM_CASE(K_LINEARARD)
00762         ENUM_CASE(K_GAUSSIANARD)
00763         ENUM_CASE(K_STREAMING)
00764     }
00765 
00766     switch (get_feature_class())
00767     {
00768         ENUM_CASE(C_UNKNOWN)
00769         ENUM_CASE(C_DENSE)
00770         ENUM_CASE(C_SPARSE)
00771         ENUM_CASE(C_STRING)
00772         ENUM_CASE(C_STREAMING_DENSE)
00773         ENUM_CASE(C_STREAMING_SPARSE)
00774         ENUM_CASE(C_STREAMING_STRING)
00775         ENUM_CASE(C_STREAMING_VW)
00776         ENUM_CASE(C_COMBINED)
00777         ENUM_CASE(C_COMBINED_DOT)
00778         ENUM_CASE(C_WD)
00779         ENUM_CASE(C_SPEC)
00780         ENUM_CASE(C_WEIGHTEDSPEC)
00781         ENUM_CASE(C_POLY)
00782         ENUM_CASE(C_BINNED_DOT)
00783         ENUM_CASE(C_DIRECTOR_DOT)
00784         ENUM_CASE(C_LATENT)
00785         ENUM_CASE(C_ANY)
00786     }
00787 
00788     switch (get_feature_type())
00789     {
00790         ENUM_CASE(F_UNKNOWN)
00791         ENUM_CASE(F_BOOL)
00792         ENUM_CASE(F_CHAR)
00793         ENUM_CASE(F_BYTE)
00794         ENUM_CASE(F_SHORT)
00795         ENUM_CASE(F_WORD)
00796         ENUM_CASE(F_INT)
00797         ENUM_CASE(F_UINT)
00798         ENUM_CASE(F_LONG)
00799         ENUM_CASE(F_ULONG)
00800         ENUM_CASE(F_SHORTREAL)
00801         ENUM_CASE(F_DREAL)
00802         ENUM_CASE(F_LONGREAL)
00803         ENUM_CASE(F_ANY)
00804     }
00805     SG_INFO( "\n");
00806 }
00807 #undef ENUM_CASE
00808 
00809 bool CKernel::init_optimization(
00810     int32_t count, int32_t *IDX, float64_t * weights)
00811 {
00812    SG_ERROR( "kernel does not support linadd optimization\n");
00813     return false ;
00814 }
00815 
00816 bool CKernel::delete_optimization()
00817 {
00818    SG_ERROR( "kernel does not support linadd optimization\n");
00819     return false;
00820 }
00821 
00822 float64_t CKernel::compute_optimized(int32_t vector_idx)
00823 {
00824    SG_ERROR( "kernel does not support linadd optimization\n");
00825     return 0;
00826 }
00827 
00828 void CKernel::compute_batch(
00829     int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec,
00830     int32_t* IDX, float64_t* weights, float64_t factor)
00831 {
00832    SG_ERROR( "kernel does not support batch computation\n");
00833 }
00834 
00835 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight)
00836 {
00837    SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n");
00838 }
00839 
00840 void CKernel::clear_normal()
00841 {
00842    SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n");
00843 }
00844 
00845 int32_t CKernel::get_num_subkernels()
00846 {
00847     return 1;
00848 }
00849 
00850 void CKernel::compute_by_subkernel(
00851     int32_t vector_idx, float64_t * subkernel_contrib)
00852 {
00853    SG_ERROR( "kernel compute_by_subkernel not implemented\n");
00854 }
00855 
00856 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights)
00857 {
00858     num_weights=1 ;
00859     return &combined_kernel_weight ;
00860 }
00861 
00862 void CKernel::set_subkernel_weights(const SGVector<float64_t> weights)
00863 {
00864     ASSERT(weights.vector);
00865     if (weights.vlen!=1)
00866       SG_ERROR( "number of subkernel weights should be one ...\n");
00867 
00868     combined_kernel_weight = weights.vector[0] ;
00869 }
00870 
00871 bool CKernel::init_optimization_svm(CSVM * svm)
00872 {
00873     int32_t num_suppvec=svm->get_num_support_vectors();
00874     int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
00875     float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
00876 
00877     for (int32_t i=0; i<num_suppvec; i++)
00878     {
00879         sv_idx[i]    = svm->get_support_vector(i);
00880         sv_weight[i] = svm->get_alpha(i);
00881     }
00882     bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
00883 
00884     SG_FREE(sv_idx);
00885     SG_FREE(sv_weight);
00886     return ret;
00887 }
00888 
00889 void CKernel::load_serializable_post() throw (ShogunException)
00890 {
00891     CSGObject::load_serializable_post();
00892     if (lhs_equals_rhs)
00893         rhs=lhs;
00894 }
00895 
00896 void CKernel::save_serializable_pre() throw (ShogunException)
00897 {
00898     CSGObject::save_serializable_pre();
00899 
00900     if (lhs_equals_rhs)
00901         rhs=NULL;
00902 }
00903 
00904 void CKernel::save_serializable_post() throw (ShogunException)
00905 {
00906     CSGObject::save_serializable_post();
00907 
00908     if (lhs_equals_rhs)
00909         rhs=lhs;
00910 }
00911 
00912 void CKernel::register_params()   {
00913     SG_ADD(&cache_size, "cache_size",
00914         "Cache size in MB.", MS_NOT_AVAILABLE);
00915     SG_ADD((CSGObject**) &lhs, "lhs",
00916       "Feature vectors to occur on left hand side.", MS_NOT_AVAILABLE);
00917     SG_ADD((CSGObject**) &rhs, "rhs",
00918       "Feature vectors to occur on right hand side.", MS_NOT_AVAILABLE);
00919     SG_ADD(&lhs_equals_rhs, "lhs_equals_rhs",
00920         "If features on lhs are the same as on rhs.", MS_NOT_AVAILABLE);
00921     SG_ADD(&num_lhs, "num_lhs", "Number of feature vectors on left hand side.",
00922         MS_NOT_AVAILABLE);
00923     SG_ADD(&num_rhs, "num_rhs", "Number of feature vectors on right hand side.",
00924         MS_NOT_AVAILABLE);
00925     SG_ADD(&combined_kernel_weight, "combined_kernel_weight",
00926             "Combined kernel weight.", MS_AVAILABLE);
00927     SG_ADD(&optimization_initialized, "optimization_initialized",
00928           "Optimization is initialized.", MS_NOT_AVAILABLE);
00929     SG_ADD((machine_int_t*) &opt_type, "opt_type",
00930           "Optimization type.", MS_NOT_AVAILABLE);
00931     SG_ADD(&properties, "properties", "Kernel properties.", MS_NOT_AVAILABLE);
00932     SG_ADD((CSGObject**) &normalizer, "normalizer", "Normalize the kernel.",
00933         MS_AVAILABLE);
00934 }
00935 
00936 
00937 void CKernel::init()
00938 {
00939     cache_size=10;
00940     kernel_matrix=NULL;
00941     lhs=NULL;
00942     rhs=NULL;
00943     num_lhs=0;
00944     num_rhs=0;
00945     combined_kernel_weight=1;
00946     optimization_initialized=false;
00947     opt_type=FASTBUTMEMHUNGRY;
00948     properties=KP_NONE;
00949     normalizer=NULL;
00950 
00951 #ifdef USE_SVMLIGHT
00952     memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE));
00953 #endif //USE_SVMLIGHT
00954 
00955     set_normalizer(new CIdentityKernelNormalizer());
00956 }
00957 
00958 SGMatrix<float64_t> CKernel::get_parameter_gradient(TParameter* param,
00959         CSGObject* obj, index_t index)
00960 {
00961     return SGMatrix<float64_t>();
00962 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation