00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef _KERNEL_H___
00016 #define _KERNEL_H___
00017
00018 #include <shogun/lib/common.h>
00019 #include <shogun/lib/Signal.h>
00020 #include <shogun/io/File.h>
00021 #include <shogun/mathematics/Math.h>
00022 #include <shogun/features/FeatureTypes.h>
00023 #include <shogun/base/SGObject.h>
00024 #include <shogun/features/Features.h>
00025 #include <shogun/kernel/KernelNormalizer.h>
00026
00027 #include <vector>
00028
00029 namespace shogun
00030 {
00031 class CFile;
00032 class CFeatures;
00033 class CKernelNormalizer;
00034
00035 #ifdef USE_SHORTREAL_KERNELCACHE
00036
00037 typedef float32_t KERNELCACHE_ELEM;
00038 #else
00039
00040 typedef float64_t KERNELCACHE_ELEM;
00041 #endif
00042
00044 typedef int64_t KERNELCACHE_IDX;
00045
00046
00048 enum EOptimizationType
00049 {
00050 FASTBUTMEMHUNGRY,
00051 SLOWBUTMEMEFFICIENT
00052 };
00053
00055 enum EKernelType
00056 {
00057 K_UNKNOWN = 0,
00058 K_LINEAR = 10,
00059 K_POLY = 20,
00060 K_GAUSSIAN = 30,
00061 K_GAUSSIANSHIFT = 32,
00062 K_GAUSSIANMATCH = 33,
00063 K_HISTOGRAM = 40,
00064 K_SALZBERG = 41,
00065 K_LOCALITYIMPROVED = 50,
00066 K_SIMPLELOCALITYIMPROVED = 60,
00067 K_FIXEDDEGREE = 70,
00068 K_WEIGHTEDDEGREE = 80,
00069 K_WEIGHTEDDEGREEPOS = 81,
00070 K_WEIGHTEDDEGREERBF = 82,
00071 K_WEIGHTEDCOMMWORDSTRING = 90,
00072 K_POLYMATCH = 100,
00073 K_ALIGNMENT = 110,
00074 K_COMMWORDSTRING = 120,
00075 K_COMMULONGSTRING = 121,
00076 K_SPECTRUMRBF = 122,
00077 K_SPECTRUMMISMATCHRBF = 123,
00078 K_COMBINED = 140,
00079 K_AUC = 150,
00080 K_CUSTOM = 160,
00081 K_SIGMOID = 170,
00082 K_CHI2 = 180,
00083 K_DIAG = 190,
00084 K_CONST = 200,
00085 K_DISTANCE = 220,
00086 K_LOCALALIGNMENT = 230,
00087 K_PYRAMIDCHI2 = 240,
00088 K_OLIGO = 250,
00089 K_MATCHWORD = 260,
00090 K_TPPK = 270,
00091 K_REGULATORYMODULES = 280,
00092 K_SPARSESPATIALSAMPLE = 290,
00093 K_HISTOGRAMINTERSECTION = 300,
00094 K_WAVELET = 310,
00095 K_WAVE = 320,
00096 K_CAUCHY = 330,
00097 K_TSTUDENT = 340,
00098 K_RATIONAL_QUADRATIC = 350,
00099 K_MULTIQUADRIC = 360,
00100 K_EXPONENTIAL = 370,
00101 K_SPHERICAL = 380,
00102 K_SPLINE = 390,
00103 K_ANOVA = 400,
00104 K_POWER = 410,
00105 K_LOG = 420,
00106 K_CIRCULAR = 430,
00107 K_INVERSEMULTIQUADRIC = 440,
00108 K_DISTANTSEGMENTS = 450,
00109 K_BESSEL = 460,
00110 };
00111
00113 enum EKernelProperty
00114 {
00115 KP_NONE = 0,
00116 KP_LINADD = 1,
00117 KP_KERNCOMBINATION = 2,
00118 KP_BATCHEVALUATION = 4
00119 };
00120
00121 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00122
00123 template <class T> struct K_THREAD_PARAM
00124 {
00126 CKernel* kernel;
00128 int32_t start;
00130 int32_t end;
00132 int32_t total_start;
00134 int32_t total_end;
00136 int32_t m;
00138 int32_t n;
00140 T* result;
00142 bool symmetric;
00144 bool verbose;
00145 };
00146 #endif
00147
00148 class CSVM;
00149
00175 class CKernel : public CSGObject
00176 {
00177 friend class CVarianceKernelNormalizer;
00178 friend class CSqrtDiagKernelNormalizer;
00179 friend class CAvgDiagKernelNormalizer;
00180 friend class CRidgeKernelNormalizer;
00181 friend class CFirstElementKernelNormalizer;
00182 friend class CMultitaskKernelNormalizer;
00183 friend class CMultitaskKernelMklNormalizer;
00184 friend class CMultitaskKernelMaskNormalizer;
00185 friend class CMultitaskKernelMaskPairNormalizer;
00186 friend class CTanimotoKernelNormalizer;
00187 friend class CDiceKernelNormalizer;
00188 friend class CZeroMeanCenterKernelNormalizer;
00189
00190 public:
00191
00195 CKernel();
00196
00197
00202 CKernel(int32_t size);
00203
00210 CKernel(CFeatures* l, CFeatures* r, int32_t size);
00211
00212 virtual ~CKernel();
00213
00221 inline float64_t kernel(int32_t idx_a, int32_t idx_b)
00222 {
00223 if (idx_a<0 || idx_b<0 || idx_a>=num_lhs || idx_b>=num_rhs)
00224 {
00225 SG_ERROR("Index out of Range: idx_a=%d/%d idx_b=%d/%d\n",
00226 idx_a,num_lhs, idx_b,num_rhs);
00227 }
00228
00229 return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
00230 }
00231
00236 SGMatrix<float64_t> get_kernel_matrix()
00237 {
00238 return get_kernel_matrix<float64_t>();
00239 }
00240
00246 virtual SGVector<float64_t> get_kernel_col(int32_t j)
00247 {
00248
00249 SGVector<float64_t> col = SGVector<float64_t>(num_rhs);
00250
00251 for (int32_t i=0; i!=num_rhs; i++)
00252 col[i] = kernel(i,j);
00253
00254 return col;
00255 }
00256
00257
00263 virtual SGVector<float64_t> get_kernel_row(int32_t i)
00264 {
00265 SGVector<float64_t> row = SGVector<float64_t>(num_lhs);
00266
00267 for (int32_t j=0; j!=num_lhs; j++)
00268 row[j] = kernel(i,j);
00269
00270 return row;
00271 }
00272
00277 template <class T>
00278 SGMatrix<T> get_kernel_matrix()
00279 {
00280 T* result = NULL;
00281
00282 if (!has_features())
00283 SG_ERROR( "no features assigned to kernel\n");
00284
00285 int32_t m=get_num_vec_lhs();
00286 int32_t n=get_num_vec_rhs();
00287
00288 int64_t total_num = int64_t(m)*n;
00289
00290
00291 bool symmetric= (lhs && lhs==rhs && m==n);
00292
00293 SG_DEBUG( "returning kernel matrix of size %dx%d\n", m, n);
00294
00295 result=SG_MALLOC(T, total_num);
00296
00297 int32_t num_threads=parallel->get_num_threads();
00298 if (num_threads < 2)
00299 {
00300 K_THREAD_PARAM<T> params;
00301 params.kernel=this;
00302 params.result=result;
00303 params.start=0;
00304 params.end=m;
00305 params.total_start=0;
00306 params.total_end=total_num;
00307 params.n=n;
00308 params.m=m;
00309 params.symmetric=symmetric;
00310 params.verbose=true;
00311 get_kernel_matrix_helper<T>((void*) ¶ms);
00312 }
00313 else
00314 {
00315 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
00316 K_THREAD_PARAM<T>* params = SG_MALLOC(K_THREAD_PARAM<T>, num_threads);
00317 int64_t step= total_num/num_threads;
00318
00319 int32_t t;
00320
00321 num_threads--;
00322 for (t=0; t<num_threads; t++)
00323 {
00324 params[t].kernel = this;
00325 params[t].result = result;
00326 params[t].start = compute_row_start(t*step, n, symmetric);
00327 params[t].end = compute_row_start((t+1)*step, n, symmetric);
00328 params[t].total_start=t*step;
00329 params[t].total_end=(t+1)*step;
00330 params[t].n=n;
00331 params[t].m=m;
00332 params[t].symmetric=symmetric;
00333 params[t].verbose=false;
00334
00335 int code=pthread_create(&threads[t], NULL,
00336 CKernel::get_kernel_matrix_helper<T>, (void*)¶ms[t]);
00337
00338 if (code != 0)
00339 {
00340 SG_WARNING("Thread creation failed (thread %d of %d) "
00341 "with error:'%s'\n",t, num_threads, strerror(code));
00342 num_threads=t;
00343 break;
00344 }
00345 }
00346
00347 params[t].kernel = this;
00348 params[t].result = result;
00349 params[t].start = compute_row_start(t*step, n, symmetric);
00350 params[t].end = m;
00351 params[t].total_start=t*step;
00352 params[t].total_end=total_num;
00353 params[t].n=n;
00354 params[t].m=m;
00355 params[t].symmetric=symmetric;
00356 params[t].verbose=true;
00357 get_kernel_matrix_helper<T>(¶ms[t]);
00358
00359 for (t=0; t<num_threads; t++)
00360 {
00361 if (pthread_join(threads[t], NULL) != 0)
00362 SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads);
00363 }
00364
00365 SG_FREE(params);
00366 SG_FREE(threads);
00367 }
00368
00369 SG_DONE();
00370
00371 return SGMatrix<T>(result,m,n,true);
00372 }
00373
00374
00385 virtual bool init(CFeatures* lhs, CFeatures* rhs);
00386
00391 virtual bool set_normalizer(CKernelNormalizer* normalizer);
00392
00397 virtual CKernelNormalizer* get_normalizer();
00398
00402 virtual bool init_normalizer();
00403
00410 virtual void cleanup();
00411
00416 void load(CFile* loader);
00417
00422 void save(CFile* writer);
00423
00428 inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
00429
00434 inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
00435
00440 virtual inline int32_t get_num_vec_lhs()
00441 {
00442 return num_lhs;
00443 }
00444
00449 virtual inline int32_t get_num_vec_rhs()
00450 {
00451 return num_rhs;
00452 }
00453
00458 virtual inline bool has_features()
00459 {
00460 return lhs && rhs;
00461 }
00462
00467 inline bool get_lhs_equals_rhs()
00468 {
00469 return lhs_equals_rhs;
00470 }
00471
00473 virtual void remove_lhs_and_rhs();
00474
00476 virtual void remove_lhs();
00477
00479 virtual void remove_rhs();
00480
00488 virtual EKernelType get_kernel_type()=0 ;
00489
00496 virtual EFeatureType get_feature_type()=0;
00497
00504 virtual EFeatureClass get_feature_class()=0;
00505
00510 inline void set_cache_size(int32_t size)
00511 {
00512 cache_size = size;
00513 #ifdef USE_SVMLIGHT
00514 cache_reset();
00515 #endif //USE_SVMLIGHT
00516 }
00517
00522 inline int32_t get_cache_size() { return cache_size; }
00523
00524 #ifdef USE_SVMLIGHT
00525
00526 inline void cache_reset() { resize_kernel_cache(cache_size); }
00527
00532 inline int32_t get_max_elems_cache() { return kernel_cache.max_elems; }
00533
00538 inline int32_t get_activenum_cache() { return kernel_cache.activenum; }
00539
00547 void get_kernel_row(
00548 int32_t docnum, int32_t *active2dnum, float64_t *buffer,
00549 bool full_line=false);
00550
00555 void cache_kernel_row(int32_t x);
00556
00562 void cache_multiple_kernel_rows(int32_t* key, int32_t varnum);
00563
00565 void kernel_cache_reset_lru();
00566
00573 void kernel_cache_shrink(
00574 int32_t totdoc, int32_t num_shrink, int32_t *after);
00575
00581 void resize_kernel_cache(KERNELCACHE_IDX size,
00582 bool regression_hack=false);
00583
00588 inline void set_time(int32_t t)
00589 {
00590 kernel_cache.time=t;
00591 }
00592
00598 inline int32_t kernel_cache_touch(int32_t cacheidx)
00599 {
00600 if(kernel_cache.index[cacheidx] != -1)
00601 {
00602 kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time;
00603 return(1);
00604 }
00605 return(0);
00606 }
00607
00613 inline int32_t kernel_cache_check(int32_t cacheidx)
00614 {
00615 return(kernel_cache.index[cacheidx] >= 0);
00616 }
00617
00622 inline int32_t kernel_cache_space_available()
00623 {
00624 return(kernel_cache.elems < kernel_cache.max_elems);
00625 }
00626
00632 void kernel_cache_init(int32_t size, bool regression_hack=false);
00633
00635 void kernel_cache_cleanup();
00636
00637 #endif //USE_SVMLIGHT
00638
00640 void list_kernel();
00641
00647 inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
00648
00652 virtual void clear_normal();
00653
00659 virtual void add_to_normal(int32_t vector_idx, float64_t weight);
00660
00665 inline EOptimizationType get_optimization_type() { return opt_type; }
00666
00671 virtual inline void set_optimization_type(EOptimizationType t) { opt_type=t;}
00672
00677 inline bool get_is_initialized() { return optimization_initialized; }
00678
00686 virtual bool init_optimization(
00687 int32_t count, int32_t *IDX, float64_t *weights);
00688
00693 virtual bool delete_optimization();
00694
00700 bool init_optimization_svm(CSVM * svm) ;
00701
00707 virtual float64_t compute_optimized(int32_t vector_idx);
00708
00717 virtual void compute_batch(
00718 int32_t num_vec, int32_t* vec_idx, float64_t* target,
00719 int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00720 float64_t factor=1.0);
00721
00726 inline float64_t get_combined_kernel_weight() { return combined_kernel_weight; }
00727
00732 inline void set_combined_kernel_weight(float64_t nw) { combined_kernel_weight=nw; }
00733
00738 virtual int32_t get_num_subkernels();
00739
00745 virtual void compute_by_subkernel(
00746 int32_t vector_idx, float64_t * subkernel_contrib);
00747
00753 virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
00754
00759 virtual void set_subkernel_weights(SGVector<float64_t> weights);
00760
00761 protected:
00766 inline void set_property(EKernelProperty p)
00767 {
00768 properties |= p;
00769 }
00770
00775 inline void unset_property(EKernelProperty p)
00776 {
00777 properties &= (properties | p) ^ p;
00778 }
00779
00784 inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
00785
00796 virtual float64_t compute(int32_t x, int32_t y)=0;
00797
00804 int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
00805 {
00806 int32_t i_start;
00807
00808 if (symmetric)
00809 i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs));
00810 else
00811 i_start=(int32_t) (offs/int64_t(n));
00812
00813 return i_start;
00814 }
00815
00820 template <class T>
00821 static void* get_kernel_matrix_helper(void* p)
00822 {
00823 K_THREAD_PARAM<T>* params= (K_THREAD_PARAM<T>*) p;
00824 int32_t i_start=params->start;
00825 int32_t i_end=params->end;
00826 CKernel* k=params->kernel;
00827 T* result=params->result;
00828 bool symmetric=params->symmetric;
00829 int32_t n=params->n;
00830 int32_t m=params->m;
00831 bool verbose=params->verbose;
00832 int64_t total_start=params->total_start;
00833 int64_t total_end=params->total_end;
00834 int64_t total=total_start;
00835
00836 for (int32_t i=i_start; i<i_end; i++)
00837 {
00838 int32_t j_start=0;
00839
00840 if (symmetric)
00841 j_start=i;
00842
00843 for (int32_t j=j_start; j<n; j++)
00844 {
00845 float64_t v=k->kernel(i,j);
00846 result[i+j*m]=v;
00847
00848 if (symmetric && i!=j)
00849 result[j+i*m]=v;
00850
00851 if (verbose)
00852 {
00853 total++;
00854
00855 if (symmetric && i!=j)
00856 total++;
00857
00858 if (total%100 == 0)
00859 k->SG_PROGRESS(total, total_start, total_end);
00860
00861 if (CSignal::cancel_computations())
00862 break;
00863 }
00864 }
00865
00866 }
00867
00868 return NULL;
00869 }
00870
00879 virtual void load_serializable_post() throw (ShogunException);
00880
00889 virtual void save_serializable_pre() throw (ShogunException);
00890
00899 virtual void save_serializable_post() throw (ShogunException);
00904 virtual void register_params();
00905
00906 private:
00909 void init();
00910
00911
00912 #ifdef USE_SVMLIGHT
00913 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00914
00915 struct KERNEL_CACHE {
00917 int32_t *index;
00919 int32_t *invindex;
00921 int32_t *active2totdoc;
00923 int32_t *totdoc2active;
00925 int32_t *lru;
00927 int32_t *occu;
00929 int32_t elems;
00931 int32_t max_elems;
00933 int32_t time;
00935 int32_t activenum;
00936
00938 KERNELCACHE_ELEM *buffer;
00940 KERNELCACHE_IDX buffsize;
00941 };
00942
00944 struct S_KTHREAD_PARAM
00945 {
00947 CKernel* kernel;
00949 KERNEL_CACHE* kernel_cache;
00951 KERNELCACHE_ELEM** cache;
00953 int32_t* uncached_rows;
00955 int32_t num_uncached;
00957 uint8_t* needs_computation;
00959 int32_t start;
00961 int32_t end;
00963 int32_t num_vectors;
00964 };
00965 #endif // DOXYGEN_SHOULD_SKIP_THIS
00966
00968 static void* cache_multiple_kernel_row_helper(void* p);
00969
00971 void kernel_cache_free(int32_t cacheidx);
00972 int32_t kernel_cache_malloc();
00973 int32_t kernel_cache_free_lru();
00974 KERNELCACHE_ELEM *kernel_cache_clean_and_malloc(int32_t cacheidx);
00975 #endif //USE_SVMLIGHT
00976
00977
00978 protected:
00980 int32_t cache_size;
00981
00982 #ifdef USE_SVMLIGHT
00983
00984 KERNEL_CACHE kernel_cache;
00985 #endif //USE_SVMLIGHT
00986
00989 KERNELCACHE_ELEM* kernel_matrix;
00990
00992 CFeatures* lhs;
00994 CFeatures* rhs;
00995
00997 bool lhs_equals_rhs;
00998
01000 int32_t num_lhs;
01002 int32_t num_rhs;
01003
01005 float64_t combined_kernel_weight;
01006
01008 bool optimization_initialized;
01012 EOptimizationType opt_type;
01013
01015 uint64_t properties;
01016
01019 CKernelNormalizer* normalizer;
01020 };
01021
01022 }
01023 #endif