Kernel.h

Go to the documentation of this file.
00001 /*
00002  * EXCEPT FOR THE KERNEL CACHING FUNCTIONS WHICH ARE (W) THORSTEN JOACHIMS
00003  * COPYRIGHT (C) 1999  UNIVERSITAET DORTMUND - ALL RIGHTS RESERVED
00004  *
00005  * this program is free software; you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation; either version 3 of the License, or
00008  * (at your option) any later version.
00009  *
00010  * Written (W) 1999-2009 Soeren Sonnenburg
00011  * Written (W) 1999-2008 Gunnar Raetsch
00012  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00013  */
00014 
00015 #ifndef _KERNEL_H___
00016 #define _KERNEL_H___
00017 
00018 #include <shogun/lib/common.h>
00019 #include <shogun/lib/Signal.h>
00020 #include <shogun/io/File.h>
00021 #include <shogun/mathematics/Math.h>
00022 #include <shogun/features/FeatureTypes.h>
00023 #include <shogun/base/SGObject.h>
00024 #include <shogun/features/Features.h>
00025 #include <shogun/kernel/KernelNormalizer.h>
00026 
00027 #include <vector>
00028 
00029 namespace shogun
00030 {
00031     class CFile;
00032     class CFeatures;
00033     class CKernelNormalizer;
00034 
00035 #ifdef USE_SHORTREAL_KERNELCACHE
00036 
00037     typedef float32_t KERNELCACHE_ELEM;
00038 #else
00039 
00040     typedef float64_t KERNELCACHE_ELEM;
00041 #endif
00042 
00044 typedef int64_t KERNELCACHE_IDX;
00045 
00046 
00048 enum EOptimizationType
00049 {
00050     FASTBUTMEMHUNGRY,
00051     SLOWBUTMEMEFFICIENT
00052 };
00053 
00055 enum EKernelType
00056 {
00057     K_UNKNOWN = 0,
00058     K_LINEAR = 10,
00059     K_POLY = 20,
00060     K_GAUSSIAN = 30,
00061     K_GAUSSIANSHIFT = 32,
00062     K_GAUSSIANMATCH = 33,
00063     K_HISTOGRAM = 40,
00064     K_SALZBERG = 41,
00065     K_LOCALITYIMPROVED = 50,
00066     K_SIMPLELOCALITYIMPROVED = 60,
00067     K_FIXEDDEGREE = 70,
00068     K_WEIGHTEDDEGREE =    80,
00069     K_WEIGHTEDDEGREEPOS = 81,
00070     K_WEIGHTEDDEGREERBF = 82,
00071     K_WEIGHTEDCOMMWORDSTRING = 90,
00072     K_POLYMATCH = 100,
00073     K_ALIGNMENT = 110,
00074     K_COMMWORDSTRING = 120,
00075     K_COMMULONGSTRING = 121,
00076     K_SPECTRUMRBF = 122,
00077     K_SPECTRUMMISMATCHRBF = 123,
00078     K_COMBINED = 140,
00079     K_AUC = 150,
00080     K_CUSTOM = 160,
00081     K_SIGMOID = 170,
00082     K_CHI2 = 180,
00083     K_DIAG = 190,
00084     K_CONST = 200,
00085     K_DISTANCE = 220,
00086     K_LOCALALIGNMENT = 230,
00087     K_PYRAMIDCHI2 = 240,
00088     K_OLIGO = 250,
00089     K_MATCHWORD = 260,
00090     K_TPPK = 270,
00091     K_REGULATORYMODULES = 280,
00092     K_SPARSESPATIALSAMPLE = 290,
00093     K_HISTOGRAMINTERSECTION = 300,
00094     K_WAVELET = 310,
00095     K_WAVE = 320,
00096     K_CAUCHY = 330,
00097     K_TSTUDENT = 340,
00098     K_RATIONAL_QUADRATIC = 350,
00099     K_MULTIQUADRIC = 360,
00100     K_EXPONENTIAL = 370,
00101     K_SPHERICAL = 380,
00102     K_SPLINE = 390,
00103     K_ANOVA = 400,
00104     K_POWER = 410,
00105     K_LOG = 420,
00106     K_CIRCULAR = 430,
00107     K_INVERSEMULTIQUADRIC = 440,
00108     K_DISTANTSEGMENTS = 450,
00109     K_BESSEL = 460,
00110 };
00111 
00113 enum EKernelProperty
00114 {
00115     KP_NONE = 0,
00116     KP_LINADD = 1,  // Kernels that can be optimized via doing normal updates w + dw
00117     KP_KERNCOMBINATION = 2, // Kernels that are infact a linear combination of subkernels K=\sum_i b_i*K_i
00118     KP_BATCHEVALUATION = 4  // Kernels that can on the fly generate normals in linadd and more quickly/memory efficient process batches instead of single examples
00119 };
00120 
00121 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00122 
00123 template <class T> struct K_THREAD_PARAM
00124 {
00126     CKernel* kernel;
00128     int32_t start;
00130     int32_t end;
00132     int32_t total_start;
00134     int32_t total_end;
00136     int32_t m;
00138     int32_t n;
00140     T* result;
00142     bool symmetric;
00144     bool verbose;
00145 };
00146 #endif
00147 
00148 class CSVM;
00149 
00175 class CKernel : public CSGObject
00176 {
00177     friend class CVarianceKernelNormalizer;
00178     friend class CSqrtDiagKernelNormalizer;
00179     friend class CAvgDiagKernelNormalizer;
00180     friend class CRidgeKernelNormalizer;
00181     friend class CFirstElementKernelNormalizer;
00182     friend class CMultitaskKernelNormalizer;
00183     friend class CMultitaskKernelMklNormalizer;
00184     friend class CMultitaskKernelMaskNormalizer;
00185     friend class CMultitaskKernelMaskPairNormalizer;
00186     friend class CTanimotoKernelNormalizer;
00187     friend class CDiceKernelNormalizer;
00188     friend class CZeroMeanCenterKernelNormalizer;
00189 
00190     public:
00191 
00195         CKernel();
00196 
00197 
00202         CKernel(int32_t size);
00203 
00210         CKernel(CFeatures* l, CFeatures* r, int32_t size);
00211 
00212         virtual ~CKernel();
00213 
00221         inline float64_t kernel(int32_t idx_a, int32_t idx_b)
00222         {
00223             if (idx_a<0 || idx_b<0 || idx_a>=num_lhs || idx_b>=num_rhs)
00224             {
00225                 SG_ERROR("Index out of Range: idx_a=%d/%d idx_b=%d/%d\n",
00226                         idx_a,num_lhs, idx_b,num_rhs);
00227             }
00228 
00229             return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
00230         }
00231 
00236         SGMatrix<float64_t> get_kernel_matrix()
00237         {
00238             return get_kernel_matrix<float64_t>();
00239         }
00240 
00246         virtual SGVector<float64_t> get_kernel_col(int32_t j)
00247         {
00248 
00249             SGVector<float64_t> col = SGVector<float64_t>(num_rhs);
00250 
00251             for (int32_t i=0; i!=num_rhs; i++)
00252                 col[i] = kernel(i,j);
00253 
00254             return col;
00255         }
00256 
00257 
00263         virtual SGVector<float64_t> get_kernel_row(int32_t i)
00264         {
00265             SGVector<float64_t> row = SGVector<float64_t>(num_lhs);
00266 
00267             for (int32_t j=0; j!=num_lhs; j++)
00268                 row[j] = kernel(i,j);
00269 
00270             return row;
00271         }
00272 
00277         template <class T>
00278         SGMatrix<T> get_kernel_matrix()
00279         {
00280             T* result = NULL;
00281 
00282             if (!has_features())
00283                 SG_ERROR( "no features assigned to kernel\n");
00284 
00285             int32_t m=get_num_vec_lhs();
00286             int32_t n=get_num_vec_rhs();
00287 
00288             int64_t total_num = int64_t(m)*n;
00289 
00290             // if lhs == rhs and sizes match assume k(i,j)=k(j,i)
00291             bool symmetric= (lhs && lhs==rhs && m==n);
00292 
00293             SG_DEBUG( "returning kernel matrix of size %dx%d\n", m, n);
00294 
00295             result=SG_MALLOC(T, total_num);
00296 
00297             int32_t num_threads=parallel->get_num_threads();
00298             if (num_threads < 2)
00299             {
00300                 K_THREAD_PARAM<T> params;
00301                 params.kernel=this;
00302                 params.result=result;
00303                 params.start=0;
00304                 params.end=m;
00305                 params.total_start=0;
00306                 params.total_end=total_num;
00307                 params.n=n;
00308                 params.m=m;
00309                 params.symmetric=symmetric;
00310                 params.verbose=true;
00311                 get_kernel_matrix_helper<T>((void*) &params);
00312             }
00313             else
00314             {
00315                 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
00316                 K_THREAD_PARAM<T>* params = SG_MALLOC(K_THREAD_PARAM<T>, num_threads);
00317                 int64_t step= total_num/num_threads;
00318 
00319                 int32_t t;
00320 
00321                 num_threads--;
00322                 for (t=0; t<num_threads; t++)
00323                 {
00324                     params[t].kernel = this;
00325                     params[t].result = result;
00326                     params[t].start = compute_row_start(t*step, n, symmetric);
00327                     params[t].end = compute_row_start((t+1)*step, n, symmetric);
00328                     params[t].total_start=t*step;
00329                     params[t].total_end=(t+1)*step;
00330                     params[t].n=n;
00331                     params[t].m=m;
00332                     params[t].symmetric=symmetric;
00333                     params[t].verbose=false;
00334 
00335                     int code=pthread_create(&threads[t], NULL,
00336                             CKernel::get_kernel_matrix_helper<T>, (void*)&params[t]);
00337 
00338                     if (code != 0)
00339                     {
00340                         SG_WARNING("Thread creation failed (thread %d of %d) "
00341                                 "with error:'%s'\n",t, num_threads, strerror(code));
00342                         num_threads=t;
00343                         break;
00344                     }
00345                 }
00346 
00347                 params[t].kernel = this;
00348                 params[t].result = result;
00349                 params[t].start = compute_row_start(t*step, n, symmetric);
00350                 params[t].end = m;
00351                 params[t].total_start=t*step;
00352                 params[t].total_end=total_num;
00353                 params[t].n=n;
00354                 params[t].m=m;
00355                 params[t].symmetric=symmetric;
00356                 params[t].verbose=true;
00357                 get_kernel_matrix_helper<T>(&params[t]);
00358 
00359                 for (t=0; t<num_threads; t++)
00360                 {
00361                     if (pthread_join(threads[t], NULL) != 0)
00362                         SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads);
00363                 }
00364 
00365                 SG_FREE(params);
00366                 SG_FREE(threads);
00367             }
00368 
00369             SG_DONE();
00370 
00371             return SGMatrix<T>(result,m,n,true);
00372         }
00373 
00374 
00385         virtual bool init(CFeatures* lhs, CFeatures* rhs);
00386 
00391         virtual bool set_normalizer(CKernelNormalizer* normalizer);
00392 
00397         virtual CKernelNormalizer* get_normalizer();
00398 
00402         virtual bool init_normalizer();
00403 
00410         virtual void cleanup();
00411 
00416         void load(CFile* loader);
00417 
00422         void save(CFile* writer);
00423 
00428         inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
00429 
00434         inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
00435 
00440         virtual inline int32_t get_num_vec_lhs()
00441         {
00442             return num_lhs;
00443         }
00444 
00449         virtual inline int32_t get_num_vec_rhs()
00450         {
00451             return num_rhs;
00452         }
00453 
00458         virtual inline bool has_features()
00459         {
00460             return lhs && rhs;
00461         }
00462 
00467         inline bool get_lhs_equals_rhs()
00468         {
00469             return lhs_equals_rhs;
00470         }
00471 
00473         virtual void remove_lhs_and_rhs();
00474 
00476         virtual void remove_lhs();
00477 
00479         virtual void remove_rhs();
00480 
00488         virtual EKernelType get_kernel_type()=0 ;
00489 
00496         virtual EFeatureType get_feature_type()=0;
00497 
00504         virtual EFeatureClass get_feature_class()=0;
00505 
00510         inline void set_cache_size(int32_t size)
00511         {
00512             cache_size = size;
00513 #ifdef USE_SVMLIGHT
00514             cache_reset();
00515 #endif //USE_SVMLIGHT
00516         }
00517 
00522         inline int32_t get_cache_size() { return cache_size; }
00523 
00524 #ifdef USE_SVMLIGHT
00525 
00526         inline void cache_reset() { resize_kernel_cache(cache_size); }
00527 
00532         inline int32_t get_max_elems_cache() { return kernel_cache.max_elems; }
00533 
00538         inline int32_t get_activenum_cache() { return kernel_cache.activenum; }
00539 
00547         void get_kernel_row(
00548             int32_t docnum, int32_t *active2dnum, float64_t *buffer,
00549             bool full_line=false);
00550 
00555         void cache_kernel_row(int32_t x);
00556 
00562         void cache_multiple_kernel_rows(int32_t* key, int32_t varnum);
00563 
00565         void kernel_cache_reset_lru();
00566 
00573         void kernel_cache_shrink(
00574             int32_t totdoc, int32_t num_shrink, int32_t *after);
00575 
00581         void resize_kernel_cache(KERNELCACHE_IDX size,
00582             bool regression_hack=false);
00583 
00588         inline void set_time(int32_t t)
00589         {
00590             kernel_cache.time=t;
00591         }
00592 
00598         inline int32_t kernel_cache_touch(int32_t cacheidx)
00599         {
00600             if(kernel_cache.index[cacheidx] != -1)
00601             {
00602                 kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time;
00603                 return(1);
00604             }
00605             return(0);
00606         }
00607 
00613         inline int32_t kernel_cache_check(int32_t cacheidx)
00614         {
00615             return(kernel_cache.index[cacheidx] >= 0);
00616         }
00617 
00622         inline int32_t kernel_cache_space_available()
00623         {
00624             return(kernel_cache.elems < kernel_cache.max_elems);
00625         }
00626 
00632         void kernel_cache_init(int32_t size, bool regression_hack=false);
00633 
00635         void kernel_cache_cleanup();
00636 
00637 #endif //USE_SVMLIGHT
00638 
00640         void list_kernel();
00641 
00647         inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
00648 
00652         virtual void clear_normal();
00653 
00659         virtual void add_to_normal(int32_t vector_idx, float64_t weight);
00660 
00665         inline EOptimizationType get_optimization_type() { return opt_type; }
00666 
00671         virtual inline void set_optimization_type(EOptimizationType t) { opt_type=t;}
00672 
00677         inline bool get_is_initialized() { return optimization_initialized; }
00678 
00686         virtual bool init_optimization(
00687             int32_t count, int32_t *IDX, float64_t *weights);
00688 
00693         virtual bool delete_optimization();
00694 
00700         bool init_optimization_svm(CSVM * svm) ;
00701 
00707         virtual float64_t compute_optimized(int32_t vector_idx);
00708 
00717         virtual void compute_batch(
00718             int32_t num_vec, int32_t* vec_idx, float64_t* target,
00719             int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00720             float64_t factor=1.0);
00721 
00726         inline float64_t get_combined_kernel_weight() { return combined_kernel_weight; }
00727 
00732         inline void set_combined_kernel_weight(float64_t nw) { combined_kernel_weight=nw; }
00733 
00738         virtual int32_t get_num_subkernels();
00739 
00745         virtual void compute_by_subkernel(
00746             int32_t vector_idx, float64_t * subkernel_contrib);
00747 
00753         virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
00754 
00759         virtual void set_subkernel_weights(SGVector<float64_t> weights);
00760 
00761     protected:
00766         inline void set_property(EKernelProperty p)
00767         {
00768             properties |= p;
00769         }
00770 
00775         inline void unset_property(EKernelProperty p)
00776         {
00777             properties &= (properties | p) ^ p;
00778         }
00779 
00784         inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
00785 
00796         virtual float64_t compute(int32_t x, int32_t y)=0;
00797 
00804         int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
00805         {
00806             int32_t i_start;
00807 
00808             if (symmetric)
00809                 i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs));
00810             else
00811                 i_start=(int32_t) (offs/int64_t(n));
00812 
00813             return i_start;
00814         }
00815 
00820         template <class T>
00821         static void* get_kernel_matrix_helper(void* p)
00822         {
00823             K_THREAD_PARAM<T>* params= (K_THREAD_PARAM<T>*) p;
00824             int32_t i_start=params->start;
00825             int32_t i_end=params->end;
00826             CKernel* k=params->kernel;
00827             T* result=params->result;
00828             bool symmetric=params->symmetric;
00829             int32_t n=params->n;
00830             int32_t m=params->m;
00831             bool verbose=params->verbose;
00832             int64_t total_start=params->total_start;
00833             int64_t total_end=params->total_end;
00834             int64_t total=total_start;
00835 
00836             for (int32_t i=i_start; i<i_end; i++)
00837             {
00838                 int32_t j_start=0;
00839 
00840                 if (symmetric)
00841                     j_start=i;
00842 
00843                 for (int32_t j=j_start; j<n; j++)
00844                 {
00845                     float64_t v=k->kernel(i,j);
00846                     result[i+j*m]=v;
00847 
00848                     if (symmetric && i!=j)
00849                         result[j+i*m]=v;
00850 
00851                     if (verbose)
00852                     {
00853                         total++;
00854 
00855                         if (symmetric && i!=j)
00856                             total++;
00857 
00858                         if (total%100 == 0)
00859                             k->SG_PROGRESS(total, total_start, total_end);
00860 
00861                         if (CSignal::cancel_computations())
00862                             break;
00863                     }
00864                 }
00865 
00866             }
00867 
00868             return NULL;
00869         }
00870 
00879         virtual void load_serializable_post() throw (ShogunException);
00880 
00889         virtual void save_serializable_pre() throw (ShogunException);
00890 
00899         virtual void save_serializable_post() throw (ShogunException);
00904         virtual void register_params();
00905 
00906     private:
00909         void init();
00910 
00911 
00912 #ifdef USE_SVMLIGHT
00913 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00914 
00915         struct KERNEL_CACHE {
00917             int32_t   *index;
00919             int32_t   *invindex;
00921             int32_t   *active2totdoc;
00923             int32_t   *totdoc2active;
00925             int32_t   *lru;
00927             int32_t   *occu;
00929             int32_t   elems;
00931             int32_t   max_elems;
00933             int32_t   time;
00935             int32_t   activenum;
00936 
00938             KERNELCACHE_ELEM  *buffer;
00940             KERNELCACHE_IDX   buffsize;
00941         };
00942 
00944         struct S_KTHREAD_PARAM
00945         {
00947             CKernel* kernel;
00949             KERNEL_CACHE* kernel_cache;
00951             KERNELCACHE_ELEM** cache;
00953             int32_t* uncached_rows;
00955             int32_t num_uncached;
00957             uint8_t* needs_computation;
00959             int32_t start;
00961             int32_t end;
00963             int32_t num_vectors;
00964         };
00965 #endif // DOXYGEN_SHOULD_SKIP_THIS
00966 
00968         static void* cache_multiple_kernel_row_helper(void* p);
00969 
00971         void   kernel_cache_free(int32_t cacheidx);
00972         int32_t   kernel_cache_malloc();
00973         int32_t   kernel_cache_free_lru();
00974         KERNELCACHE_ELEM *kernel_cache_clean_and_malloc(int32_t cacheidx);
00975 #endif //USE_SVMLIGHT
00976 
00977 
00978     protected:
00980         int32_t cache_size;
00981 
00982 #ifdef USE_SVMLIGHT
00983 
00984         KERNEL_CACHE kernel_cache;
00985 #endif //USE_SVMLIGHT
00986 
00989         KERNELCACHE_ELEM* kernel_matrix;
00990 
00992         CFeatures* lhs;
00994         CFeatures* rhs;
00995 
00997         bool lhs_equals_rhs;
00998 
01000         int32_t num_lhs;
01002         int32_t num_rhs;
01003 
01005         float64_t combined_kernel_weight;
01006 
01008         bool optimization_initialized;
01012         EOptimizationType opt_type;
01013 
01015         uint64_t  properties;
01016 
01019         CKernelNormalizer* normalizer;
01020 };
01021 
01022 }
01023 #endif /* _KERNEL_H__ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation