00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef _KERNEL_H___
00016 #define _KERNEL_H___
00017
00018 #include <shogun/lib/common.h>
00019 #include <shogun/lib/Signal.h>
00020 #include <shogun/io/SGIO.h>
00021 #include <shogun/io/File.h>
00022 #include <shogun/mathematics/Math.h>
00023 #include <shogun/features/FeatureTypes.h>
00024 #include <shogun/base/SGObject.h>
00025 #include <shogun/features/Features.h>
00026 #include <shogun/kernel/normalizer/KernelNormalizer.h>
00027
00028 namespace shogun
00029 {
00030 class CFile;
00031 class CFeatures;
00032 class CKernelNormalizer;
00033
00034 #ifdef USE_SHORTREAL_KERNELCACHE
00035
00036 typedef float32_t KERNELCACHE_ELEM;
00037 #else
00038
00039 typedef float64_t KERNELCACHE_ELEM;
00040 #endif
00041
00043 typedef int64_t KERNELCACHE_IDX;
00044
00045
00047 enum EOptimizationType
00048 {
00049 FASTBUTMEMHUNGRY,
00050 SLOWBUTMEMEFFICIENT
00051 };
00052
00054 enum EKernelType
00055 {
00056 K_UNKNOWN = 0,
00057 K_LINEAR = 10,
00058 K_POLY = 20,
00059 K_GAUSSIAN = 30,
00060 K_GAUSSIANSHIFT = 32,
00061 K_GAUSSIANMATCH = 33,
00062 K_HISTOGRAM = 40,
00063 K_SALZBERG = 41,
00064 K_LOCALITYIMPROVED = 50,
00065 K_SIMPLELOCALITYIMPROVED = 60,
00066 K_FIXEDDEGREE = 70,
00067 K_WEIGHTEDDEGREE = 80,
00068 K_WEIGHTEDDEGREEPOS = 81,
00069 K_WEIGHTEDDEGREERBF = 82,
00070 K_WEIGHTEDCOMMWORDSTRING = 90,
00071 K_POLYMATCH = 100,
00072 K_ALIGNMENT = 110,
00073 K_COMMWORDSTRING = 120,
00074 K_COMMULONGSTRING = 121,
00075 K_SPECTRUMRBF = 122,
00076 K_SPECTRUMMISMATCHRBF = 123,
00077 K_COMBINED = 140,
00078 K_AUC = 150,
00079 K_CUSTOM = 160,
00080 K_SIGMOID = 170,
00081 K_CHI2 = 180,
00082 K_DIAG = 190,
00083 K_CONST = 200,
00084 K_DISTANCE = 220,
00085 K_LOCALALIGNMENT = 230,
00086 K_PYRAMIDCHI2 = 240,
00087 K_OLIGO = 250,
00088 K_MATCHWORD = 260,
00089 K_TPPK = 270,
00090 K_REGULATORYMODULES = 280,
00091 K_SPARSESPATIALSAMPLE = 290,
00092 K_HISTOGRAMINTERSECTION = 300,
00093 K_WAVELET = 310,
00094 K_WAVE = 320,
00095 K_CAUCHY = 330,
00096 K_TSTUDENT = 340,
00097 K_RATIONAL_QUADRATIC = 350,
00098 K_MULTIQUADRIC = 360,
00099 K_EXPONENTIAL = 370,
00100 K_SPHERICAL = 380,
00101 K_SPLINE = 390,
00102 K_ANOVA = 400,
00103 K_POWER = 410,
00104 K_LOG = 420,
00105 K_CIRCULAR = 430,
00106 K_INVERSEMULTIQUADRIC = 440,
00107 K_DISTANTSEGMENTS = 450,
00108 K_BESSEL = 460,
00109 K_JENSENSHANNON = 470,
00110 K_DIRECTOR = 480,
00111 K_PRODUCT = 490,
00112 K_LINEARARD = 500,
00113 K_GAUSSIANARD = 510,
00114 K_STREAMING = 520
00115 };
00116
00118 enum EKernelProperty
00119 {
00120 KP_NONE = 0,
00121 KP_LINADD = 1,
00122 KP_KERNCOMBINATION = 2,
00123 KP_BATCHEVALUATION = 4
00124 };
00125
00126 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00127
00128 template <class T> struct K_THREAD_PARAM
00129 {
00131 CKernel* kernel;
00133 int32_t start;
00135 int32_t end;
00137 int32_t total_start;
00139 int32_t total_end;
00141 int32_t m;
00143 int32_t n;
00145 T* result;
00147 bool symmetric;
00149 bool verbose;
00150 };
00151 #endif
00152
00153 class CSVM;
00154
00180 class CKernel : public CSGObject
00181 {
00182 friend class CVarianceKernelNormalizer;
00183 friend class CSqrtDiagKernelNormalizer;
00184 friend class CAvgDiagKernelNormalizer;
00185 friend class CRidgeKernelNormalizer;
00186 friend class CFirstElementKernelNormalizer;
00187 friend class CMultitaskKernelNormalizer;
00188 friend class CMultitaskKernelMklNormalizer;
00189 friend class CMultitaskKernelMaskNormalizer;
00190 friend class CMultitaskKernelMaskPairNormalizer;
00191 friend class CTanimotoKernelNormalizer;
00192 friend class CDiceKernelNormalizer;
00193 friend class CZeroMeanCenterKernelNormalizer;
00194
00195 friend class CStreamingKernel;
00196
00197 public:
00198
00202 CKernel();
00203
00204
00209 CKernel(int32_t size);
00210
00217 CKernel(CFeatures* l, CFeatures* r, int32_t size);
00218
00219 virtual ~CKernel();
00220
00228 inline float64_t kernel(int32_t idx_a, int32_t idx_b)
00229 {
00230 REQUIRE(idx_a>=0 && idx_b>=0 && idx_a<num_lhs && idx_b<num_rhs,
00231 "%s::kernel(): index out of Range: idx_a=%d/%d idx_b=%d/%d\n",
00232 get_name(), idx_a,num_lhs, idx_b,num_rhs);
00233
00234 return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
00235 }
00236
00241 SGMatrix<float64_t> get_kernel_matrix()
00242 {
00243 return get_kernel_matrix<float64_t>();
00244 }
00245
00250 SGVector<float64_t> get_kernel_diagonal()
00251 {
00252 REQUIRE(lhs, "CKernel::get_kernel_diagonal(): Left-handside "
00253 "features missing!\n");
00254
00255 REQUIRE(rhs, "CKernel::get_kernel_diagonal(): Right-handside "
00256 "features missing!\n");
00257
00258 REQUIRE(lhs->get_num_vectors()==rhs->get_num_vectors(),
00259 "CKernel::get_kernel_diagonal(): Left- and right-"
00260 "handside features must be equal sized\n");
00261
00262
00263 SGVector<float64_t> diagonal(lhs->get_num_vectors());
00264
00265 for (index_t i=0; i<diagonal.vlen; ++i)
00266 diagonal[i]=kernel(i, i);
00267
00268 return diagonal;
00269 }
00270
00276 virtual SGVector<float64_t> get_kernel_col(int32_t j)
00277 {
00278
00279 SGVector<float64_t> col = SGVector<float64_t>(num_rhs);
00280
00281 for (int32_t i=0; i!=num_rhs; i++)
00282 col[i] = kernel(i,j);
00283
00284 return col;
00285 }
00286
00287
00293 virtual SGVector<float64_t> get_kernel_row(int32_t i)
00294 {
00295 SGVector<float64_t> row = SGVector<float64_t>(num_lhs);
00296
00297 for (int32_t j=0; j!=num_lhs; j++)
00298 row[j] = kernel(i,j);
00299
00300 return row;
00301 }
00302
00307 template <class T>
00308 SGMatrix<T> get_kernel_matrix()
00309 {
00310 T* result = NULL;
00311
00312 REQUIRE(has_features(), "no features assigned to kernel\n");
00313
00314 int32_t m=get_num_vec_lhs();
00315 int32_t n=get_num_vec_rhs();
00316
00317 int64_t total_num = int64_t(m)*n;
00318
00319
00320 bool symmetric= (lhs && lhs==rhs && m==n);
00321
00322 SG_DEBUG( "returning kernel matrix of size %dx%d\n", m, n);
00323
00324 result=SG_MALLOC(T, total_num);
00325
00326 int32_t num_threads=parallel->get_num_threads();
00327 if (num_threads < 2)
00328 {
00329 K_THREAD_PARAM<T> params;
00330 params.kernel=this;
00331 params.result=result;
00332 params.start=0;
00333 params.end=m;
00334 params.total_start=0;
00335 params.total_end=total_num;
00336 params.n=n;
00337 params.m=m;
00338 params.symmetric=symmetric;
00339 params.verbose=true;
00340 get_kernel_matrix_helper<T>((void*) ¶ms);
00341 }
00342 else
00343 {
00344 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
00345 K_THREAD_PARAM<T>* params = SG_MALLOC(K_THREAD_PARAM<T>, num_threads);
00346 int64_t step= total_num/num_threads;
00347
00348 int32_t t;
00349
00350 num_threads--;
00351 for (t=0; t<num_threads; t++)
00352 {
00353 params[t].kernel = this;
00354 params[t].result = result;
00355 params[t].start = compute_row_start(t*step, n, symmetric);
00356 params[t].end = compute_row_start((t+1)*step, n, symmetric);
00357 params[t].total_start=t*step;
00358 params[t].total_end=(t+1)*step;
00359 params[t].n=n;
00360 params[t].m=m;
00361 params[t].symmetric=symmetric;
00362 params[t].verbose=false;
00363
00364 int code=pthread_create(&threads[t], NULL,
00365 CKernel::get_kernel_matrix_helper<T>, (void*)¶ms[t]);
00366
00367 if (code != 0)
00368 {
00369 SG_WARNING("Thread creation failed (thread %d of %d) "
00370 "with error:'%s'\n",t, num_threads, strerror(code));
00371 num_threads=t;
00372 break;
00373 }
00374 }
00375
00376 params[t].kernel = this;
00377 params[t].result = result;
00378 params[t].start = compute_row_start(t*step, n, symmetric);
00379 params[t].end = m;
00380 params[t].total_start=t*step;
00381 params[t].total_end=total_num;
00382 params[t].n=n;
00383 params[t].m=m;
00384 params[t].symmetric=symmetric;
00385 params[t].verbose=true;
00386 get_kernel_matrix_helper<T>(¶ms[t]);
00387
00388 for (t=0; t<num_threads; t++)
00389 {
00390 if (pthread_join(threads[t], NULL) != 0)
00391 SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads);
00392 }
00393
00394 SG_FREE(params);
00395 SG_FREE(threads);
00396 }
00397
00398 SG_DONE();
00399
00400 return SGMatrix<T>(result,m,n,true);
00401 }
00402
00403
00414 virtual bool init(CFeatures* lhs, CFeatures* rhs);
00415
00420 virtual bool set_normalizer(CKernelNormalizer* normalizer);
00421
00426 virtual CKernelNormalizer* get_normalizer();
00427
00431 virtual bool init_normalizer();
00432
00439 virtual void cleanup();
00440
00445 void load(CFile* loader);
00446
00451 void save(CFile* writer);
00452
00457 inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
00458
00463 inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
00464
00469 virtual int32_t get_num_vec_lhs()
00470 {
00471 return num_lhs;
00472 }
00473
00478 virtual int32_t get_num_vec_rhs()
00479 {
00480 return num_rhs;
00481 }
00482
00487 virtual bool has_features()
00488 {
00489 return lhs && rhs;
00490 }
00491
00496 inline bool get_lhs_equals_rhs()
00497 {
00498 return lhs_equals_rhs;
00499 }
00500
00502 virtual void remove_lhs_and_rhs();
00503
00505 virtual void remove_lhs();
00506
00508 virtual void remove_rhs();
00509
00517 virtual EKernelType get_kernel_type()=0 ;
00518
00525 virtual EFeatureType get_feature_type()=0;
00526
00533 virtual EFeatureClass get_feature_class()=0;
00534
00539 inline void set_cache_size(int32_t size)
00540 {
00541 cache_size = size;
00542 #ifdef USE_SVMLIGHT
00543 cache_reset();
00544 #endif //USE_SVMLIGHT
00545 }
00546
00551 inline int32_t get_cache_size() { return cache_size; }
00552
00553 #ifdef USE_SVMLIGHT
00554
00555 inline void cache_reset() { resize_kernel_cache(cache_size); }
00556
00561 inline int32_t get_max_elems_cache() { return kernel_cache.max_elems; }
00562
00567 inline int32_t get_activenum_cache() { return kernel_cache.activenum; }
00568
00576 void get_kernel_row(
00577 int32_t docnum, int32_t *active2dnum, float64_t *buffer,
00578 bool full_line=false);
00579
00584 void cache_kernel_row(int32_t x);
00585
00591 void cache_multiple_kernel_rows(int32_t* key, int32_t varnum);
00592
00594 void kernel_cache_reset_lru();
00595
00602 void kernel_cache_shrink(
00603 int32_t totdoc, int32_t num_shrink, int32_t *after);
00604
00610 void resize_kernel_cache(KERNELCACHE_IDX size,
00611 bool regression_hack=false);
00612
00617 inline void set_time(int32_t t)
00618 {
00619 kernel_cache.time=t;
00620 }
00621
00627 inline int32_t kernel_cache_touch(int32_t cacheidx)
00628 {
00629 if(kernel_cache.index[cacheidx] != -1)
00630 {
00631 kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time;
00632 return(1);
00633 }
00634 return(0);
00635 }
00636
00642 inline int32_t kernel_cache_check(int32_t cacheidx)
00643 {
00644 return(kernel_cache.index[cacheidx] >= 0);
00645 }
00646
00651 inline int32_t kernel_cache_space_available()
00652 {
00653 return(kernel_cache.elems < kernel_cache.max_elems);
00654 }
00655
00661 void kernel_cache_init(int32_t size, bool regression_hack=false);
00662
00664 void kernel_cache_cleanup();
00665
00666 #endif //USE_SVMLIGHT
00667
00669 void list_kernel();
00670
00676 inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
00677
00681 virtual void clear_normal();
00682
00688 virtual void add_to_normal(int32_t vector_idx, float64_t weight);
00689
00694 inline EOptimizationType get_optimization_type() { return opt_type; }
00695
00700 virtual void set_optimization_type(EOptimizationType t) { opt_type=t;}
00701
00706 inline bool get_is_initialized() { return optimization_initialized; }
00707
00715 virtual bool init_optimization(
00716 int32_t count, int32_t *IDX, float64_t *weights);
00717
00722 virtual bool delete_optimization();
00723
00729 bool init_optimization_svm(CSVM * svm) ;
00730
00736 virtual float64_t compute_optimized(int32_t vector_idx);
00737
00746 virtual void compute_batch(
00747 int32_t num_vec, int32_t* vec_idx, float64_t* target,
00748 int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00749 float64_t factor=1.0);
00750
00755 inline float64_t get_combined_kernel_weight() { return combined_kernel_weight; }
00756
00761 inline void set_combined_kernel_weight(float64_t nw) { combined_kernel_weight=nw; }
00762
00767 virtual int32_t get_num_subkernels();
00768
00774 virtual void compute_by_subkernel(
00775 int32_t vector_idx, float64_t * subkernel_contrib);
00776
00782 virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
00783
00788 virtual void set_subkernel_weights(SGVector<float64_t> weights);
00789
00798 virtual SGMatrix<float64_t> get_parameter_gradient(TParameter* param,
00799 CSGObject* obj, index_t index = -1);
00800 protected:
00805 inline void set_property(EKernelProperty p)
00806 {
00807 properties |= p;
00808 }
00809
00814 inline void unset_property(EKernelProperty p)
00815 {
00816 properties &= (properties | p) ^ p;
00817 }
00818
00823 inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
00824
00835 virtual float64_t compute(int32_t x, int32_t y)=0;
00836
00843 int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
00844 {
00845 int32_t i_start;
00846
00847 if (symmetric)
00848 i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs));
00849 else
00850 i_start=(int32_t) (offs/int64_t(n));
00851
00852 return i_start;
00853 }
00854
00859 template <class T>
00860 static void* get_kernel_matrix_helper(void* p)
00861 {
00862 K_THREAD_PARAM<T>* params= (K_THREAD_PARAM<T>*) p;
00863 int32_t i_start=params->start;
00864 int32_t i_end=params->end;
00865 CKernel* k=params->kernel;
00866 T* result=params->result;
00867 bool symmetric=params->symmetric;
00868 int32_t n=params->n;
00869 int32_t m=params->m;
00870 bool verbose=params->verbose;
00871 int64_t total_start=params->total_start;
00872 int64_t total_end=params->total_end;
00873 int64_t total=total_start;
00874
00875 for (int32_t i=i_start; i<i_end; i++)
00876 {
00877 int32_t j_start=0;
00878
00879 if (symmetric)
00880 j_start=i;
00881
00882 for (int32_t j=j_start; j<n; j++)
00883 {
00884 float64_t v=k->kernel(i,j);
00885 result[i+j*m]=v;
00886
00887 if (symmetric && i!=j)
00888 result[j+i*m]=v;
00889
00890 if (verbose)
00891 {
00892 total++;
00893
00894 if (symmetric && i!=j)
00895 total++;
00896
00897 if (total%100 == 0)
00898 k->SG_PROGRESS(total, total_start, total_end);
00899
00900 if (CSignal::cancel_computations())
00901 break;
00902 }
00903 }
00904
00905 }
00906
00907 return NULL;
00908 }
00909
00918 virtual void load_serializable_post() throw (ShogunException);
00919
00928 virtual void save_serializable_pre() throw (ShogunException);
00929
00938 virtual void save_serializable_post() throw (ShogunException);
00943 virtual void register_params();
00944
00945 private:
00948 void init();
00949
00950
00951 #ifdef USE_SVMLIGHT
00952 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00953
00954 struct KERNEL_CACHE {
00956 int32_t *index;
00958 int32_t *invindex;
00960 int32_t *active2totdoc;
00962 int32_t *totdoc2active;
00964 int32_t *lru;
00966 int32_t *occu;
00968 int32_t elems;
00970 int32_t max_elems;
00972 int32_t time;
00974 int32_t activenum;
00975
00977 KERNELCACHE_ELEM *buffer;
00979 KERNELCACHE_IDX buffsize;
00980 };
00981
00983 struct S_KTHREAD_PARAM
00984 {
00986 CKernel* kernel;
00988 KERNEL_CACHE* kernel_cache;
00990 KERNELCACHE_ELEM** cache;
00992 int32_t* uncached_rows;
00994 int32_t num_uncached;
00996 uint8_t* needs_computation;
00998 int32_t start;
01000 int32_t end;
01002 int32_t num_vectors;
01003 };
01004 #endif // DOXYGEN_SHOULD_SKIP_THIS
01005
01007 static void* cache_multiple_kernel_row_helper(void* p);
01008
01010 void kernel_cache_free(int32_t cacheidx);
01011 int32_t kernel_cache_malloc();
01012 int32_t kernel_cache_free_lru();
01013 KERNELCACHE_ELEM *kernel_cache_clean_and_malloc(int32_t cacheidx);
01014 #endif //USE_SVMLIGHT
01015
01016
01017 protected:
01019 int32_t cache_size;
01020
01021 #ifdef USE_SVMLIGHT
01022
01023 KERNEL_CACHE kernel_cache;
01024 #endif //USE_SVMLIGHT
01025
01028 KERNELCACHE_ELEM* kernel_matrix;
01029
01031 CFeatures* lhs;
01033 CFeatures* rhs;
01034
01036 bool lhs_equals_rhs;
01037
01039 int32_t num_lhs;
01041 int32_t num_rhs;
01042
01044 float64_t combined_kernel_weight;
01045
01047 bool optimization_initialized;
01051 EOptimizationType opt_type;
01052
01054 uint64_t properties;
01055
01058 CKernelNormalizer* normalizer;
01059 };
01060
01061 }
01062 #endif