00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef _KERNEL_H___
00016 #define _KERNEL_H___
00017
00018 #include <shogun/lib/common.h>
00019 #include <shogun/lib/Signal.h>
00020 #include <shogun/io/File.h>
00021 #include <shogun/mathematics/Math.h>
00022 #include <shogun/features/FeatureTypes.h>
00023 #include <shogun/base/SGObject.h>
00024 #include <shogun/features/Features.h>
00025 #include <shogun/kernel/KernelNormalizer.h>
00026
00027 #include <vector>
00028 #include <set>
00029 #include <string>
00030
00031 namespace shogun
00032 {
00033 class CFile;
00034 class CFeatures;
00035 class CKernelNormalizer;
00036 enum EFeatureType;
00037 enum EFeatureClass;
00038
00039 #ifdef USE_SHORTREAL_KERNELCACHE
00040
00041 typedef float32_t KERNELCACHE_ELEM;
00042 #else
00043
00044 typedef float64_t KERNELCACHE_ELEM;
00045 #endif
00046
00048 typedef int64_t KERNELCACHE_IDX;
00049
00050
00052 enum EOptimizationType
00053 {
00054 FASTBUTMEMHUNGRY,
00055 SLOWBUTMEMEFFICIENT
00056 };
00057
00059 enum EKernelType
00060 {
00061 K_UNKNOWN = 0,
00062 K_LINEAR = 10,
00063 K_POLY = 20,
00064 K_GAUSSIAN = 30,
00065 K_GAUSSIANSHIFT = 32,
00066 K_GAUSSIANMATCH = 33,
00067 K_HISTOGRAM = 40,
00068 K_SALZBERG = 41,
00069 K_LOCALITYIMPROVED = 50,
00070 K_SIMPLELOCALITYIMPROVED = 60,
00071 K_FIXEDDEGREE = 70,
00072 K_WEIGHTEDDEGREE = 80,
00073 K_WEIGHTEDDEGREEPOS = 81,
00074 K_WEIGHTEDDEGREERBF = 82,
00075 K_WEIGHTEDCOMMWORDSTRING = 90,
00076 K_POLYMATCH = 100,
00077 K_ALIGNMENT = 110,
00078 K_COMMWORDSTRING = 120,
00079 K_COMMULONGSTRING = 121,
00080 K_SPECTRUMRBF = 122,
00081 K_SPECTRUMMISMATCHRBF = 123,
00082 K_COMBINED = 140,
00083 K_AUC = 150,
00084 K_CUSTOM = 160,
00085 K_SIGMOID = 170,
00086 K_CHI2 = 180,
00087 K_DIAG = 190,
00088 K_CONST = 200,
00089 K_DISTANCE = 220,
00090 K_LOCALALIGNMENT = 230,
00091 K_PYRAMIDCHI2 = 240,
00092 K_OLIGO = 250,
00093 K_MATCHWORD = 260,
00094 K_TPPK = 270,
00095 K_REGULATORYMODULES = 280,
00096 K_SPARSESPATIALSAMPLE = 290,
00097 K_HISTOGRAMINTERSECTION = 300,
00098 K_WAVELET = 310,
00099 K_WAVE = 320,
00100 K_CAUCHY = 330,
00101 K_TSTUDENT = 340,
00102 K_RATIONAL_QUADRATIC = 350,
00103 K_MULTIQUADRIC = 360,
00104 K_EXPONENTIAL = 370,
00105 K_SPHERICAL = 380,
00106 K_SPLINE = 390,
00107 K_ANOVA = 400,
00108 K_POWER = 410,
00109 K_LOG = 420,
00110 K_CIRCULAR = 430,
00111 K_INVERSEMULTIQUADRIC = 440,
00112 K_DISTANTSEGMENTS = 450,
00113 K_BESSEL = 460,
00114 };
00115
00117 enum EKernelProperty
00118 {
00119 KP_NONE = 0,
00120 KP_LINADD = 1,
00121 KP_KERNCOMBINATION = 2,
00122 KP_BATCHEVALUATION = 4
00123 };
00124
00125 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00126
00127 template <class T> struct K_THREAD_PARAM
00128 {
00130 CKernel* kernel;
00132 int32_t start;
00134 int32_t end;
00136 int32_t total_start;
00138 int32_t total_end;
00140 int32_t m;
00142 int32_t n;
00144 T* result;
00146 bool symmetric;
00148 bool verbose;
00149 };
00150 #endif
00151
00152 class CSVM;
00153
00179 class CKernel : public CSGObject
00180 {
00181 friend class CVarianceKernelNormalizer;
00182 friend class CSqrtDiagKernelNormalizer;
00183 friend class CAvgDiagKernelNormalizer;
00184 friend class CRidgeKernelNormalizer;
00185 friend class CFirstElementKernelNormalizer;
00186 friend class CMultitaskKernelNormalizer;
00187 friend class CMultitaskKernelMklNormalizer;
00188 friend class CMultitaskKernelMaskNormalizer;
00189 friend class CMultitaskKernelMaskPairNormalizer;
00190 friend class CTanimotoKernelNormalizer;
00191 friend class CDiceKernelNormalizer;
00192 friend class CZeroMeanCenterKernelNormalizer;
00193
00194 public:
00195
00199 CKernel();
00200
00201
00206 CKernel(int32_t size);
00207
00214 CKernel(CFeatures* l, CFeatures* r, int32_t size);
00215
00216 virtual ~CKernel();
00217
00225 inline float64_t kernel(int32_t idx_a, int32_t idx_b)
00226 {
00227 if (idx_a<0 || idx_b<0 || idx_a>=num_lhs || idx_b>=num_rhs)
00228 {
00229 SG_ERROR("Index out of Range: idx_a=%d/%d idx_b=%d/%d\n",
00230 idx_a,num_lhs, idx_b,num_rhs);
00231 }
00232
00233 return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
00234 }
00235
00240 SGMatrix<float64_t> get_kernel_matrix()
00241 {
00242 return get_kernel_matrix<float64_t>();
00243 }
00244
00250 virtual std::vector<float64_t> get_kernel_col(int32_t j)
00251 {
00252
00253 std::vector<float64_t> col = std::vector<float64_t>(num_rhs);
00254
00255 for (int32_t i=0; i!=num_rhs; i++)
00256 {
00257 col[i] = kernel(i,j);
00258 }
00259
00260 return col;
00261
00262 }
00263
00264
00270 virtual std::vector<float64_t> get_kernel_row(int32_t i)
00271 {
00272
00273 std::vector<float64_t> row = std::vector<float64_t>(num_lhs);
00274
00275 for (int32_t j=0; j!=num_lhs; j++)
00276 {
00277 row[j] = kernel(i,j);
00278 }
00279
00280 return row;
00281
00282 }
00283
00288 template <class T>
00289 SGMatrix<T> get_kernel_matrix()
00290 {
00291 T* result = NULL;
00292
00293 if (!has_features())
00294 SG_ERROR( "no features assigned to kernel\n");
00295
00296 int32_t m=get_num_vec_lhs();
00297 int32_t n=get_num_vec_rhs();
00298
00299 int64_t total_num = int64_t(m)*n;
00300
00301
00302 bool symmetric= (lhs && lhs==rhs && m==n);
00303
00304 SG_DEBUG( "returning kernel matrix of size %dx%d\n", m, n);
00305
00306 result=SG_MALLOC(T, total_num);
00307
00308 int32_t num_threads=parallel->get_num_threads();
00309 if (num_threads < 2)
00310 {
00311 K_THREAD_PARAM<T> params;
00312 params.kernel=this;
00313 params.result=result;
00314 params.start=0;
00315 params.end=m;
00316 params.total_start=0;
00317 params.total_end=total_num;
00318 params.n=n;
00319 params.m=m;
00320 params.symmetric=symmetric;
00321 params.verbose=true;
00322 get_kernel_matrix_helper<T>((void*) ¶ms);
00323 }
00324 else
00325 {
00326 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
00327 K_THREAD_PARAM<T>* params = SG_MALLOC(K_THREAD_PARAM<T>, num_threads);
00328 int64_t step= total_num/num_threads;
00329
00330 int32_t t;
00331
00332 num_threads--;
00333 for (t=0; t<num_threads; t++)
00334 {
00335 params[t].kernel = this;
00336 params[t].result = result;
00337 params[t].start = compute_row_start(t*step, n, symmetric);
00338 params[t].end = compute_row_start((t+1)*step, n, symmetric);
00339 params[t].total_start=t*step;
00340 params[t].total_end=(t+1)*step;
00341 params[t].n=n;
00342 params[t].m=m;
00343 params[t].symmetric=symmetric;
00344 params[t].verbose=false;
00345
00346 int code=pthread_create(&threads[t], NULL,
00347 CKernel::get_kernel_matrix_helper<T>, (void*)¶ms[t]);
00348
00349 if (code != 0)
00350 {
00351 SG_WARNING("Thread creation failed (thread %d of %d) "
00352 "with error:'%s'\n",t, num_threads, strerror(code));
00353 num_threads=t;
00354 break;
00355 }
00356 }
00357
00358 params[t].kernel = this;
00359 params[t].result = result;
00360 params[t].start = compute_row_start(t*step, n, symmetric);
00361 params[t].end = m;
00362 params[t].total_start=t*step;
00363 params[t].total_end=total_num;
00364 params[t].n=n;
00365 params[t].m=m;
00366 params[t].symmetric=symmetric;
00367 params[t].verbose=true;
00368 get_kernel_matrix_helper<T>(¶ms[t]);
00369
00370 for (t=0; t<num_threads; t++)
00371 {
00372 if (pthread_join(threads[t], NULL) != 0)
00373 SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads);
00374 }
00375
00376 SG_FREE(params);
00377 SG_FREE(threads);
00378 }
00379
00380 SG_DONE();
00381
00382 return SGMatrix<T>(result,m,n);
00383 }
00384
00385
00396 virtual bool init(CFeatures* lhs, CFeatures* rhs);
00397
00402 virtual bool set_normalizer(CKernelNormalizer* normalizer);
00403
00408 virtual CKernelNormalizer* get_normalizer();
00409
00413 virtual bool init_normalizer();
00414
00421 virtual void cleanup();
00422
00427 void load(CFile* loader);
00428
00433 void save(CFile* writer);
00434
00439 inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
00440
00445 inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
00446
00451 virtual inline int32_t get_num_vec_lhs()
00452 {
00453 return num_lhs;
00454 }
00455
00460 virtual inline int32_t get_num_vec_rhs()
00461 {
00462 return num_rhs;
00463 }
00464
00469 virtual inline bool has_features()
00470 {
00471 return lhs && rhs;
00472 }
00473
00478 inline bool get_lhs_equals_rhs()
00479 {
00480 return lhs_equals_rhs;
00481 }
00482
00484 virtual void remove_lhs_and_rhs();
00485
00487 virtual void remove_lhs();
00488
00490 virtual void remove_rhs();
00491
00499 virtual EKernelType get_kernel_type()=0 ;
00500
00507 virtual EFeatureType get_feature_type()=0;
00508
00515 virtual EFeatureClass get_feature_class()=0;
00516
00521 inline void set_cache_size(int32_t size)
00522 {
00523 cache_size = size;
00524 #ifdef USE_SVMLIGHT
00525 cache_reset();
00526 #endif //USE_SVMLIGHT
00527 }
00528
00533 inline int32_t get_cache_size() { return cache_size; }
00534
00535 #ifdef USE_SVMLIGHT
00536
00537 inline void cache_reset() { resize_kernel_cache(cache_size); }
00538
00543 inline int32_t get_max_elems_cache() { return kernel_cache.max_elems; }
00544
00549 inline int32_t get_activenum_cache() { return kernel_cache.activenum; }
00550
00558 void get_kernel_row(
00559 int32_t docnum, int32_t *active2dnum, float64_t *buffer,
00560 bool full_line=false);
00561
00566 void cache_kernel_row(int32_t x);
00567
00573 void cache_multiple_kernel_rows(int32_t* key, int32_t varnum);
00574
00576 void kernel_cache_reset_lru();
00577
00584 void kernel_cache_shrink(
00585 int32_t totdoc, int32_t num_shrink, int32_t *after);
00586
00592 void resize_kernel_cache(KERNELCACHE_IDX size,
00593 bool regression_hack=false);
00594
00599 inline void set_time(int32_t t)
00600 {
00601 kernel_cache.time=t;
00602 }
00603
00609 inline int32_t kernel_cache_touch(int32_t cacheidx)
00610 {
00611 if(kernel_cache.index[cacheidx] != -1)
00612 {
00613 kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time;
00614 return(1);
00615 }
00616 return(0);
00617 }
00618
00624 inline int32_t kernel_cache_check(int32_t cacheidx)
00625 {
00626 return(kernel_cache.index[cacheidx] >= 0);
00627 }
00628
00633 inline int32_t kernel_cache_space_available()
00634 {
00635 return(kernel_cache.elems < kernel_cache.max_elems);
00636 }
00637
00643 void kernel_cache_init(int32_t size, bool regression_hack=false);
00644
00646 void kernel_cache_cleanup();
00647
00648 #endif //USE_SVMLIGHT
00649
00651 void list_kernel();
00652
00658 inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
00659
00663 virtual void clear_normal();
00664
00670 virtual void add_to_normal(int32_t vector_idx, float64_t weight);
00671
00676 inline EOptimizationType get_optimization_type() { return opt_type; }
00677
00682 virtual inline void set_optimization_type(EOptimizationType t) { opt_type=t;}
00683
00688 inline bool get_is_initialized() { return optimization_initialized; }
00689
00697 virtual bool init_optimization(
00698 int32_t count, int32_t *IDX, float64_t *weights);
00699
00704 virtual bool delete_optimization();
00705
00711 bool init_optimization_svm(CSVM * svm) ;
00712
00718 virtual float64_t compute_optimized(int32_t vector_idx);
00719
00728 virtual void compute_batch(
00729 int32_t num_vec, int32_t* vec_idx, float64_t* target,
00730 int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00731 float64_t factor=1.0);
00732
00737 inline float64_t get_combined_kernel_weight() { return combined_kernel_weight; }
00738
00743 inline void set_combined_kernel_weight(float64_t nw) { combined_kernel_weight=nw; }
00744
00749 virtual int32_t get_num_subkernels();
00750
00756 virtual void compute_by_subkernel(
00757 int32_t vector_idx, float64_t * subkernel_contrib);
00758
00764 virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
00765
00771 virtual void set_subkernel_weights(
00772 float64_t* weights, int32_t num_weights);
00773
00774 protected:
00779 inline void set_property(EKernelProperty p)
00780 {
00781 properties |= p;
00782 }
00783
00788 inline void unset_property(EKernelProperty p)
00789 {
00790 properties &= (properties | p) ^ p;
00791 }
00792
00797 inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
00798
00809 virtual float64_t compute(int32_t x, int32_t y)=0;
00810
00817 int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
00818 {
00819 int32_t i_start;
00820
00821 if (symmetric)
00822 i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs));
00823 else
00824 i_start=(int32_t) (offs/int64_t(n));
00825
00826 return i_start;
00827 }
00828
00833 template <class T>
00834 static void* get_kernel_matrix_helper(void* p)
00835 {
00836 K_THREAD_PARAM<T>* params= (K_THREAD_PARAM<T>*) p;
00837 int32_t i_start=params->start;
00838 int32_t i_end=params->end;
00839 CKernel* k=params->kernel;
00840 T* result=params->result;
00841 bool symmetric=params->symmetric;
00842 int32_t n=params->n;
00843 int32_t m=params->m;
00844 bool verbose=params->verbose;
00845 int64_t total_start=params->total_start;
00846 int64_t total_end=params->total_end;
00847 int64_t total=total_start;
00848
00849 for (int32_t i=i_start; i<i_end; i++)
00850 {
00851 int32_t j_start=0;
00852
00853 if (symmetric)
00854 j_start=i;
00855
00856 for (int32_t j=j_start; j<n; j++)
00857 {
00858 float64_t v=k->kernel(i,j);
00859 result[i+j*m]=v;
00860
00861 if (symmetric && i!=j)
00862 result[j+i*m]=v;
00863
00864 if (verbose)
00865 {
00866 total++;
00867
00868 if (symmetric && i!=j)
00869 total++;
00870
00871 if (total%100 == 0)
00872 k->SG_PROGRESS(total, total_start, total_end);
00873
00874 if (CSignal::cancel_computations())
00875 break;
00876 }
00877 }
00878
00879 }
00880
00881 return NULL;
00882 }
00883
00892 virtual void load_serializable_post() throw (ShogunException);
00893
00902 virtual void save_serializable_pre() throw (ShogunException);
00903
00912 virtual void save_serializable_post() throw (ShogunException);
00917 virtual void register_params();
00918
00919 private:
00922 void init();
00923
00924
00925 #ifdef USE_SVMLIGHT
00926
00927 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00928 struct KERNEL_CACHE {
00930 int32_t *index;
00932 int32_t *invindex;
00934 int32_t *active2totdoc;
00936 int32_t *totdoc2active;
00938 int32_t *lru;
00940 int32_t *occu;
00942 int32_t elems;
00944 int32_t max_elems;
00946 int32_t time;
00948 int32_t activenum;
00949
00951 KERNELCACHE_ELEM *buffer;
00953 KERNELCACHE_IDX buffsize;
00954 };
00955
00957 struct S_KTHREAD_PARAM
00958 {
00960 CKernel* kernel;
00962 KERNEL_CACHE* kernel_cache;
00964 KERNELCACHE_ELEM** cache;
00966 int32_t* uncached_rows;
00968 int32_t num_uncached;
00970 uint8_t* needs_computation;
00972 int32_t start;
00974 int32_t end;
00976 int32_t num_vectors;
00977 };
00978 #endif // DOXYGEN_SHOULD_SKIP_THIS
00979
00981 static void* cache_multiple_kernel_row_helper(void* p);
00982
00984 void kernel_cache_free(int32_t cacheidx);
00985 int32_t kernel_cache_malloc();
00986 int32_t kernel_cache_free_lru();
00987 KERNELCACHE_ELEM *kernel_cache_clean_and_malloc(int32_t cacheidx);
00988 #endif //USE_SVMLIGHT
00989
00990
00991 protected:
00993 int32_t cache_size;
00994
00995 #ifdef USE_SVMLIGHT
00996
00997 KERNEL_CACHE kernel_cache;
00998 #endif //USE_SVMLIGHT
00999
01002 KERNELCACHE_ELEM* kernel_matrix;
01003
01005 CFeatures* lhs;
01007 CFeatures* rhs;
01008
01010 bool lhs_equals_rhs;
01011
01013 int32_t num_lhs;
01015 int32_t num_rhs;
01016
01018 float64_t combined_kernel_weight;
01019
01021 bool optimization_initialized;
01025 EOptimizationType opt_type;
01026
01028 uint64_t properties;
01029
01032 CKernelNormalizer* normalizer;
01033 };
01034
01035 }
01036 #endif