SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Kernel.h
Go to the documentation of this file.
1 /*
2  * EXCEPT FOR THE KERNEL CACHING FUNCTIONS WHICH ARE (W) THORSTEN JOACHIMS
3  * COPYRIGHT (C) 1999 UNIVERSITAET DORTMUND - ALL RIGHTS RESERVED
4  *
5  * this program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Written (W) 1999-2009 Soeren Sonnenburg
11  * Written (W) 1999-2008 Gunnar Raetsch
12  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
13  */
14 
15 #ifndef _KERNEL_H___
16 #define _KERNEL_H___
17 
18 #include <shogun/lib/config.h>
19 
20 #include <shogun/lib/common.h>
21 #include <shogun/lib/Signal.h>
22 #include <shogun/io/SGIO.h>
23 #include <shogun/io/File.h>
26 #include <shogun/base/SGObject.h>
27 #include <shogun/lib/SGMatrix.h>
30 
31 namespace shogun
32 {
33  class CFile;
34  class CFeatures;
36 
37 #ifdef USE_SHORTREAL_KERNELCACHE
38 
40 #else
41 
43 #endif
44 
46 typedef int64_t KERNELCACHE_IDX;
47 
48 
51 {
54 };
55 
58 {
59  K_UNKNOWN = 0,
60  K_LINEAR = 10,
61  K_POLY = 20,
62  K_GAUSSIAN = 30,
67  K_SALZBERG = 41,
75  K_POLYMATCH = 100,
76  K_ALIGNMENT = 110,
81  K_COMBINED = 140,
82  K_AUC = 150,
83  K_CUSTOM = 160,
84  K_SIGMOID = 170,
85  K_CHI2 = 180,
86  K_DIAG = 190,
87  K_CONST = 200,
88  K_DISTANCE = 220,
91  K_OLIGO = 250,
92  K_MATCHWORD = 260,
93  K_TPPK = 270,
97  K_WAVELET = 310,
98  K_WAVE = 320,
99  K_CAUCHY = 330,
100  K_TSTUDENT = 340,
104  K_SPHERICAL = 380,
105  K_SPLINE = 390,
106  K_ANOVA = 400,
107  K_POWER = 410,
108  K_LOG = 420,
109  K_CIRCULAR = 430,
112  K_BESSEL = 460,
114  K_DIRECTOR = 480,
115  K_PRODUCT = 490,
119  K_STREAMING = 520,
121 };
122 
125 {
126  KP_NONE = 0,
127  KP_LINADD = 1, // Kernels that can be optimized via doing normal updates w + dw
128  KP_KERNCOMBINATION = 2, // Kernels that are infact a linear combination of subkernels K=\sum_i b_i*K_i
129  KP_BATCHEVALUATION = 4 // Kernels that can on the fly generate normals in linadd and more quickly/memory efficient process batches instead of single examples
130 };
131 
132 class CSVM;
133 
159 class CKernel : public CSGObject
160 {
171  friend class CDiceKernelNormalizer;
173 
174  friend class CStreamingKernel;
175 
176  public:
177 
181  CKernel();
182 
183 
188  CKernel(int32_t size);
189 
196  CKernel(CFeatures* l, CFeatures* r, int32_t size);
197 
198  virtual ~CKernel();
199 
207  inline float64_t kernel(int32_t idx_a, int32_t idx_b)
208  {
209  REQUIRE(idx_a>=0 && idx_b>=0 && idx_a<num_lhs && idx_b<num_rhs,
210  "%s::kernel(): index out of Range: idx_a=%d/%d idx_b=%d/%d\n",
211  get_name(), idx_a,num_lhs, idx_b,num_rhs);
212 
213  return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
214  }
215 
221  {
222  return get_kernel_matrix<float64_t>();
223  }
224 
232  preallocated=SGVector<float64_t>())
233  {
234  REQUIRE(lhs, "CKernel::get_kernel_diagonal(): Left-handside "
235  "features missing!\n");
236 
237  REQUIRE(rhs, "CKernel::get_kernel_diagonal(): Right-handside "
238  "features missing!\n");
239 
240  int32_t length=CMath::min(lhs->get_num_vectors(),rhs->get_num_vectors());
241 
242  /* allocate space if necessary */
243  if (!preallocated.vector)
244  preallocated=SGVector<float64_t>(length);
245  else
246  {
247  REQUIRE(preallocated.vlen==length,
248  "%s::get_kernel_diagonal(): Preallocated vector has"
249  " wrong size!\n", get_name());
250  }
251 
252  for (index_t i=0; i<preallocated.vlen; ++i)
253  preallocated[i]=kernel(i, i);
254 
255  return preallocated;
256  }
257 
264  {
265 
267 
268  for (int32_t i=0; i!=num_rhs; i++)
269  col[i] = kernel(i,j);
270 
271  return col;
272  }
273 
274 
281  {
283 
284  for (int32_t j=0; j!=num_lhs; j++)
285  row[j] = kernel(i,j);
286 
287  return row;
288  }
289 
313  virtual float64_t sum_symmetric_block(index_t block_begin,
314  index_t block_size, bool no_diag=true);
315 
344  virtual float64_t sum_block(index_t block_begin_row,
345  index_t block_begin_col, index_t block_size_row,
346  index_t block_size_col, bool no_diag=false);
347 
372  block_begin, index_t block_size, bool no_diag=true);
373 
404  index_t block_begin, index_t block_size, bool no_diag=true);
405 
442  index_t block_begin_row, index_t block_begin_col,
443  index_t block_size_row, index_t block_size_col,
444  bool no_diag=false);
445 
450  template <class T> SGMatrix<T> get_kernel_matrix();
451 
462  virtual bool init(CFeatures* lhs, CFeatures* rhs);
463 
469 
475 
479  virtual bool init_normalizer();
480 
487  virtual void cleanup();
488 
493  void load(CFile* loader);
494 
499  void save(CFile* writer);
500 
505  inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
506 
511  inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
512 
517  virtual int32_t get_num_vec_lhs()
518  {
519  return num_lhs;
520  }
521 
526  virtual int32_t get_num_vec_rhs()
527  {
528  return num_rhs;
529  }
530 
535  virtual bool has_features()
536  {
537  return lhs && rhs;
538  }
539 
544  inline bool get_lhs_equals_rhs()
545  {
546  return lhs_equals_rhs;
547  }
548 
550  virtual void remove_lhs_and_rhs();
551 
553  virtual void remove_lhs();
554 
556  virtual void remove_rhs();
557 
565  virtual EKernelType get_kernel_type()=0 ;
566 
573  virtual EFeatureType get_feature_type()=0;
574 
581  virtual EFeatureClass get_feature_class()=0;
582 
587  inline void set_cache_size(int32_t size)
588  {
589  cache_size = size;
590 #ifdef USE_SVMLIGHT
591  cache_reset();
592 #endif //USE_SVMLIGHT
593  }
594 
599  inline int32_t get_cache_size() { return cache_size; }
600 
601 #ifdef USE_SVMLIGHT
602 
604 
609  inline int32_t get_max_elems_cache() { return kernel_cache.max_elems; }
610 
615  inline int32_t get_activenum_cache() { return kernel_cache.activenum; }
616 
624  void get_kernel_row(
625  int32_t docnum, int32_t *active2dnum, float64_t *buffer,
626  bool full_line=false);
627 
632  void cache_kernel_row(int32_t x);
633 
639  void cache_multiple_kernel_rows(int32_t* key, int32_t varnum);
640 
642  void kernel_cache_reset_lru();
643 
650  void kernel_cache_shrink(
651  int32_t totdoc, int32_t num_shrink, int32_t *after);
652 
658  void resize_kernel_cache(KERNELCACHE_IDX size,
659  bool regression_hack=false);
660 
665  inline void set_time(int32_t t)
666  {
667  kernel_cache.time=t;
668  }
669 
675  inline int32_t kernel_cache_touch(int32_t cacheidx)
676  {
677  if(kernel_cache.index[cacheidx] != -1)
678  {
679  kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time;
680  return(1);
681  }
682  return(0);
683  }
684 
690  inline int32_t kernel_cache_check(int32_t cacheidx)
691  {
692  return(kernel_cache.index[cacheidx] >= 0);
693  }
694 
700  {
701  return(kernel_cache.elems < kernel_cache.max_elems);
702  }
703 
709  void kernel_cache_init(int32_t size, bool regression_hack=false);
710 
712  void kernel_cache_cleanup();
713 
714 #endif //USE_SVMLIGHT
715 
717  void list_kernel();
718 
724  inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
725 
729  virtual void clear_normal();
730 
736  virtual void add_to_normal(int32_t vector_idx, float64_t weight);
737 
743 
749 
755 
763  virtual bool init_optimization(
764  int32_t count, int32_t *IDX, float64_t *weights);
765 
770  virtual bool delete_optimization();
771 
777  bool init_optimization_svm(CSVM * svm) ;
778 
784  virtual float64_t compute_optimized(int32_t vector_idx);
785 
794  virtual void compute_batch(
795  int32_t num_vec, int32_t* vec_idx, float64_t* target,
796  int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
797  float64_t factor=1.0);
798 
804 
810 
815  virtual int32_t get_num_subkernels();
816 
822  virtual void compute_by_subkernel(
823  int32_t vector_idx, float64_t * subkernel_contrib);
824 
830  virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
831 
837 
842  virtual void set_subkernel_weights(SGVector<float64_t> weights);
843 
852  const TParameter* param, index_t index=-1)
853  {
854  SG_ERROR("Can't compute derivative wrt %s parameter\n", param->m_name)
855  return SGMatrix<float64_t>();
856  }
857 
866  const TParameter* param, index_t index=-1)
867  {
868  return get_parameter_gradient(param,index).get_diagonal_vector();
869  }
870 
877  protected:
883  {
884  properties |= p;
885  }
886 
892  {
893  properties &= (properties | p) ^ p;
894  }
895 
900  inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
901 
912  virtual float64_t compute(int32_t x, int32_t y)=0;
913 
920  int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
921  {
922  int32_t i_start;
923 
924  if (symmetric)
925  i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs));
926  else
927  i_start=(int32_t) (offs/int64_t(n));
928 
929  return i_start;
930  }
931 
936  template <class T> static void* get_kernel_matrix_helper(void* p);
937 
946  virtual void load_serializable_post() throw (ShogunException);
947 
956  virtual void save_serializable_pre() throw (ShogunException);
957 
966  virtual void save_serializable_post() throw (ShogunException);
967 
972  virtual void register_params();
973 
974  private:
977  void init();
978 
979 
980 #ifdef USE_SVMLIGHT
981 #ifndef DOXYGEN_SHOULD_SKIP_THIS
982 
983  struct KERNEL_CACHE {
985  int32_t *index;
987  int32_t *invindex;
989  int32_t *active2totdoc;
991  int32_t *totdoc2active;
993  int32_t *lru;
995  int32_t *occu;
997  int32_t elems;
999  int32_t max_elems;
1001  int32_t time;
1003  int32_t activenum;
1004 
1006  KERNELCACHE_ELEM *buffer;
1008  KERNELCACHE_IDX buffsize;
1009  };
1010 
1012  struct S_KTHREAD_PARAM
1013  {
1015  CKernel* kernel;
1017  KERNEL_CACHE* kernel_cache;
1019  KERNELCACHE_ELEM** cache;
1021  int32_t* uncached_rows;
1023  int32_t num_uncached;
1025  uint8_t* needs_computation;
1027  int32_t start;
1029  int32_t end;
1031  int32_t num_vectors;
1032  };
1033 #endif // DOXYGEN_SHOULD_SKIP_THIS
1034 
1036  static void* cache_multiple_kernel_row_helper(void* p);
1037 
1039  void kernel_cache_free(int32_t cacheidx);
1040  int32_t kernel_cache_malloc();
1041  int32_t kernel_cache_free_lru();
1042  KERNELCACHE_ELEM *kernel_cache_clean_and_malloc(int32_t cacheidx);
1043 #endif //USE_SVMLIGHT
1044 
1045 
1046  protected:
1048  int32_t cache_size;
1049 
1050 #ifdef USE_SVMLIGHT
1051  KERNEL_CACHE kernel_cache;
1053 #endif //USE_SVMLIGHT
1054 
1057  KERNELCACHE_ELEM* kernel_matrix;
1058 
1063 
1066 
1068  int32_t num_lhs;
1070  int32_t num_rhs;
1071 
1074 
1081 
1083  uint64_t properties;
1084 
1088 };
1089 
1090 }
1091 #endif /* _KERNEL_H__ */
virtual void clear_normal()
Definition: Kernel.cpp:859
virtual const char * get_name() const =0
virtual void load_serializable_post()
Definition: Kernel.cpp:929
int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
Definition: Kernel.h:920
The MultitaskKernel allows Multitask Learning via a modified kernel function.
int32_t get_activenum_cache()
Definition: Kernel.h:615
virtual void cleanup()
Definition: Kernel.cpp:173
virtual void compute_by_subkernel(int32_t vector_idx, float64_t *subkernel_contrib)
Definition: Kernel.cpp:869
EKernelType
Definition: Kernel.h:57
void cache_multiple_kernel_rows(int32_t *key, int32_t varnum)
Definition: Kernel.cpp:376
int32_t get_max_elems_cache()
Definition: Kernel.h:609
virtual float64_t compute(int32_t x, int32_t y)=0
int32_t index_t
Definition: common.h:62
DiceKernelNormalizer performs kernel normalization inspired by the Dice coefficient (see http://en...
The MultitaskKernel allows Multitask Learning via a modified kernel function.
int32_t num_rhs
number of feature vectors on right hand side
Definition: Kernel.h:1070
static void * get_kernel_matrix_helper(void *p)
Definition: Kernel.cpp:1293
Class ShogunException defines an exception which is thrown whenever an error inside of shogun occurs...
virtual bool set_normalizer(CKernelNormalizer *normalizer)
Definition: Kernel.cpp:150
virtual float64_t sum_block(index_t block_begin_row, index_t block_begin_col, index_t block_size_row, index_t block_size_col, bool no_diag=false)
Definition: Kernel.cpp:1080
static T sq(T x)
Definition: Math.h:450
bool get_lhs_equals_rhs()
Definition: Kernel.h:544
parameter struct
virtual int32_t get_num_vectors() const =0
CFeatures * get_rhs()
Definition: Kernel.h:511
#define SG_ERROR(...)
Definition: SGIO.h:129
void cache_reset()
Definition: Kernel.h:603
#define REQUIRE(x,...)
Definition: SGIO.h:206
void set_is_initialized(bool p_init)
Definition: Kernel.h:900
virtual bool delete_optimization()
Definition: Kernel.cpp:835
int64_t KERNELCACHE_IDX
Definition: Kernel.h:46
void set_cache_size(int32_t size)
Definition: Kernel.h:587
int32_t kernel_cache_space_available()
Definition: Kernel.h:699
float64_t kernel(int32_t idx_a, int32_t idx_b)
Definition: Kernel.h:207
virtual void set_optimization_type(EOptimizationType t)
Definition: Kernel.h:748
uint64_t properties
Definition: Kernel.h:1083
virtual void remove_rhs()
takes all necessary steps if the rhs is removed from kernel
Definition: Kernel.cpp:693
TanimotoKernelNormalizer performs kernel normalization inspired by the Tanimoto coefficient (see http...
virtual int32_t get_num_vec_lhs()
Definition: Kernel.h:517
SGMatrix< float64_t > get_kernel_matrix()
Definition: Kernel.h:220
#define SG_REF(x)
Definition: SGObject.h:54
static float64_t floor(float64_t d)
Definition: Math.h:407
int32_t cache_size
cache_size in MB
Definition: Kernel.h:1048
int32_t kernel_cache_touch(int32_t cacheidx)
Definition: Kernel.h:675
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:38
void kernel_cache_shrink(int32_t totdoc, int32_t num_shrink, int32_t *after)
Definition: Kernel.cpp:495
bool get_is_initialized()
Definition: Kernel.h:754
virtual SGMatrix< float64_t > row_wise_sum_squared_sum_symmetric_block(index_t block_begin, index_t block_size, bool no_diag=true)
Definition: Kernel.cpp:1180
float64_t combined_kernel_weight
Definition: Kernel.h:1073
virtual void register_params()
Definition: Kernel.cpp:952
void save(CFile *writer)
Definition: Kernel.cpp:652
virtual SGVector< float64_t > get_kernel_col(int32_t j)
Definition: Kernel.h:263
virtual void remove_lhs_and_rhs()
Definition: Kernel.cpp:660
bool has_property(EKernelProperty p)
Definition: Kernel.h:724
SGVector< T > get_diagonal_vector() const
Definition: SGMatrix.cpp:1095
virtual CKernelNormalizer * get_normalizer()
Definition: Kernel.cpp:162
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:115
virtual SGVector< float64_t > row_col_wise_sum_block(index_t block_begin_row, index_t block_begin_col, index_t block_size_row, index_t block_size_col, bool no_diag=false)
Definition: Kernel.cpp:1239
void cache_kernel_row(int32_t x)
Definition: Kernel.cpp:302
EKernelProperty
Definition: Kernel.h:124
virtual float64_t sum_symmetric_block(index_t block_begin, index_t block_size, bool no_diag=true)
Definition: Kernel.cpp:1027
virtual SGVector< float64_t > get_subkernel_weights()
Definition: Kernel.cpp:881
double float64_t
Definition: common.h:50
KERNEL_CACHE kernel_cache
kernel cache
Definition: Kernel.h:1052
virtual EFeatureType get_feature_type()=0
void set_combined_kernel_weight(float64_t nw)
Definition: Kernel.h:809
KERNELCACHE_ELEM * kernel_matrix
Definition: Kernel.h:1057
A File access base class.
Definition: File.h:34
virtual void save_serializable_post()
Definition: Kernel.cpp:944
virtual float64_t compute_optimized(int32_t vector_idx)
Definition: Kernel.cpp:841
EOptimizationType get_optimization_type()
Definition: Kernel.h:742
void unset_property(EKernelProperty p)
Definition: Kernel.h:891
void list_kernel()
Definition: Kernel.cpp:708
float64_t get_combined_kernel_weight()
Definition: Kernel.h:803
virtual SGVector< float64_t > row_wise_sum_symmetric_block(index_t block_begin, index_t block_size, bool no_diag=true)
Definition: Kernel.cpp:1126
The MultitaskKernel allows Multitask Learning via a modified kernel function.
Normalize the kernel by a constant obtained from the first element of the kernel matrix, i.e. .
Normalize the kernel by adding a constant term to its diagonal. This aids kernels to become positive ...
int32_t num_lhs
number of feature vectors on left hand side
Definition: Kernel.h:1068
The class Kernel Normalizer defines a function to post-process kernel values.
ZeroMeanCenterKernelNormalizer centers the kernel in feature space.
virtual int32_t get_num_vec_rhs()
Definition: Kernel.h:526
virtual void set_subkernel_weights(SGVector< float64_t > weights)
Definition: Kernel.cpp:888
virtual bool init_normalizer()
Definition: Kernel.cpp:168
bool optimization_initialized
Definition: Kernel.h:1076
float float32_t
Definition: common.h:49
EFeatureType
shogun feature type
Definition: FeatureTypes.h:19
EOptimizationType opt_type
Definition: Kernel.h:1080
void load(CFile *loader)
Definition: Kernel.cpp:646
CFeatures * rhs
feature vectors to occur on right hand side
Definition: Kernel.h:1062
static CKernel * obtain_from_generic(CSGObject *kernel)
Definition: Kernel.cpp:897
Base-class for parameterized Kernel Normalizers.
SqrtDiagKernelNormalizer divides by the Square Root of the product of the diagonal elements...
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual void compute_batch(int32_t num_vec, int32_t *vec_idx, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, float64_t factor=1.0)
Definition: Kernel.cpp:847
EOptimizationType
Definition: Kernel.h:50
bool lhs_equals_rhs
lhs
Definition: Kernel.h:1065
Normalize the kernel by either a constant or the average value of the diagonal elements (depending on...
virtual EKernelType get_kernel_type()=0
virtual bool init_optimization(int32_t count, int32_t *IDX, float64_t *weights)
Definition: Kernel.cpp:828
void set_time(int32_t t)
Definition: Kernel.h:665
CFeatures * lhs
feature vectors to occur on left hand side
Definition: Kernel.h:1060
The class Features is the base class of all feature objects.
Definition: Features.h:68
static T min(T a, T b)
Definition: Math.h:157
virtual void save_serializable_pre()
Definition: Kernel.cpp:936
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
Definition: Kernel.h:851
SGVector< float64_t > get_kernel_diagonal(SGVector< float64_t > preallocated=SGVector< float64_t >())
Definition: Kernel.h:231
void kernel_cache_cleanup()
Definition: Kernel.cpp:567
virtual void remove_lhs()
Definition: Kernel.cpp:679
int32_t kernel_cache_check(int32_t cacheidx)
Definition: Kernel.h:690
virtual int32_t get_num_subkernels()
Definition: Kernel.cpp:864
bool init_optimization_svm(CSVM *svm)
Definition: Kernel.cpp:911
A generic Support Vector Machine Interface.
Definition: SVM.h:49
void kernel_cache_reset_lru()
Definition: Kernel.cpp:554
The Kernel base class.
Definition: Kernel.h:159
int32_t get_cache_size()
Definition: Kernel.h:599
CKernelNormalizer * normalizer
Definition: Kernel.h:1087
virtual SGVector< float64_t > get_kernel_row(int32_t i)
Definition: Kernel.h:280
virtual float64_t normalize(float64_t value, int32_t idx_lhs, int32_t idx_rhs)=0
static float32_t sqrt(float32_t x)
Definition: Math.h:459
virtual bool has_features()
Definition: Kernel.h:535
void kernel_cache_init(int32_t size, bool regression_hack=false)
Definition: Kernel.cpp:181
virtual ~CKernel()
Definition: Kernel.cpp:73
virtual void add_to_normal(int32_t vector_idx, float64_t weight)
Definition: Kernel.cpp:854
virtual SGVector< float64_t > get_parameter_gradient_diagonal(const TParameter *param, index_t index=-1)
Definition: Kernel.h:865
float64_t KERNELCACHE_ELEM
Definition: Kernel.h:35
friend class CStreamingKernel
Definition: Kernel.h:174
void set_property(EKernelProperty p)
Definition: Kernel.h:882
VarianceKernelNormalizer divides by the ``variance''.
void resize_kernel_cache(KERNELCACHE_IDX size, bool regression_hack=false)
Definition: Kernel.cpp:85
virtual EFeatureClass get_feature_class()=0
CFeatures * get_lhs()
Definition: Kernel.h:505

SHOGUN Machine Learning Toolbox - Documentation