SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Kernel.h
Go to the documentation of this file.
1 /*
2  * EXCEPT FOR THE KERNEL CACHING FUNCTIONS WHICH ARE (W) THORSTEN JOACHIMS
3  * COPYRIGHT (C) 1999 UNIVERSITAET DORTMUND - ALL RIGHTS RESERVED
4  *
5  * this program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Written (W) 1999-2009 Soeren Sonnenburg
11  * Written (W) 1999-2008 Gunnar Raetsch
12  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
13  */
14 
15 #ifndef _KERNEL_H___
16 #define _KERNEL_H___
17 
18 #include <shogun/lib/common.h>
19 #include <shogun/lib/Signal.h>
20 #include <shogun/io/SGIO.h>
21 #include <shogun/io/File.h>
24 #include <shogun/base/SGObject.h>
27 
28 namespace shogun
29 {
30  class CFile;
31  class CFeatures;
33 
34 #ifdef USE_SHORTREAL_KERNELCACHE
35 
37 #else
38 
40 #endif
41 
43 typedef int64_t KERNELCACHE_IDX;
44 
45 
48 {
51 };
52 
55 {
56  K_UNKNOWN = 0,
57  K_LINEAR = 10,
58  K_POLY = 20,
59  K_GAUSSIAN = 30,
63  K_SALZBERG = 41,
71  K_POLYMATCH = 100,
72  K_ALIGNMENT = 110,
77  K_COMBINED = 140,
78  K_AUC = 150,
79  K_CUSTOM = 160,
80  K_SIGMOID = 170,
81  K_CHI2 = 180,
82  K_DIAG = 190,
83  K_CONST = 200,
84  K_DISTANCE = 220,
87  K_OLIGO = 250,
88  K_MATCHWORD = 260,
89  K_TPPK = 270,
93  K_WAVELET = 310,
94  K_WAVE = 320,
95  K_CAUCHY = 330,
96  K_TSTUDENT = 340,
100  K_SPHERICAL = 380,
101  K_SPLINE = 390,
102  K_ANOVA = 400,
103  K_POWER = 410,
104  K_LOG = 420,
105  K_CIRCULAR = 430,
108  K_BESSEL = 460,
110  K_DIRECTOR = 480,
111  K_PRODUCT = 490,
112  K_LINEARARD = 500,
115 };
116 
119 {
120  KP_NONE = 0,
121  KP_LINADD = 1, // Kernels that can be optimized via doing normal updates w + dw
122  KP_KERNCOMBINATION = 2, // Kernels that are infact a linear combination of subkernels K=\sum_i b_i*K_i
123  KP_BATCHEVALUATION = 4 // Kernels that can on the fly generate normals in linadd and more quickly/memory efficient process batches instead of single examples
124 };
125 
126 class CSVM;
127 
153 class CKernel : public CSGObject
154 {
165  friend class CDiceKernelNormalizer;
167 
168  friend class CStreamingKernel;
169 
170  public:
171 
175  CKernel();
176 
177 
182  CKernel(int32_t size);
183 
190  CKernel(CFeatures* l, CFeatures* r, int32_t size);
191 
192  virtual ~CKernel();
193 
201  inline float64_t kernel(int32_t idx_a, int32_t idx_b)
202  {
203  REQUIRE(idx_a>=0 && idx_b>=0 && idx_a<num_lhs && idx_b<num_rhs,
204  "%s::kernel(): index out of Range: idx_a=%d/%d idx_b=%d/%d\n",
205  get_name(), idx_a,num_lhs, idx_b,num_rhs);
206 
207  return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
208  }
209 
215  {
216  return get_kernel_matrix<float64_t>();
217  }
218 
226  preallocated=SGVector<float64_t>())
227  {
228  REQUIRE(lhs, "CKernel::get_kernel_diagonal(): Left-handside "
229  "features missing!\n");
230 
231  REQUIRE(rhs, "CKernel::get_kernel_diagonal(): Right-handside "
232  "features missing!\n");
233 
235  "CKernel::get_kernel_diagonal(): Left- and right-"
236  "handside features must be equal sized\n");
237 
238  /* allocate space if necessary */
239  if (!preallocated.vector)
240  preallocated=SGVector<float64_t>(lhs->get_num_vectors());
241  else
242  {
243  REQUIRE(preallocated.vlen==lhs->get_num_vectors(),
244  "%s::get_kernel_diagonal(): Preallocated vector has"
245  " wrong size!\n", get_name());
246  }
247 
248  for (index_t i=0; i<preallocated.vlen; ++i)
249  preallocated[i]=kernel(i, i);
250 
251  return preallocated;
252  }
253 
260  {
261 
263 
264  for (int32_t i=0; i!=num_rhs; i++)
265  col[i] = kernel(i,j);
266 
267  return col;
268  }
269 
270 
277  {
279 
280  for (int32_t j=0; j!=num_lhs; j++)
281  row[j] = kernel(i,j);
282 
283  return row;
284  }
285 
290  template <class T> SGMatrix<T> get_kernel_matrix();
291 
292 
303  virtual bool init(CFeatures* lhs, CFeatures* rhs);
304 
310 
316 
320  virtual bool init_normalizer();
321 
328  virtual void cleanup();
329 
334  void load(CFile* loader);
335 
340  void save(CFile* writer);
341 
346  inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
347 
352  inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
353 
358  virtual int32_t get_num_vec_lhs()
359  {
360  return num_lhs;
361  }
362 
367  virtual int32_t get_num_vec_rhs()
368  {
369  return num_rhs;
370  }
371 
376  virtual bool has_features()
377  {
378  return lhs && rhs;
379  }
380 
385  inline bool get_lhs_equals_rhs()
386  {
387  return lhs_equals_rhs;
388  }
389 
391  virtual void remove_lhs_and_rhs();
392 
394  virtual void remove_lhs();
395 
397  virtual void remove_rhs();
398 
406  virtual EKernelType get_kernel_type()=0 ;
407 
414  virtual EFeatureType get_feature_type()=0;
415 
422  virtual EFeatureClass get_feature_class()=0;
423 
428  inline void set_cache_size(int32_t size)
429  {
430  cache_size = size;
431 #ifdef USE_SVMLIGHT
432  cache_reset();
433 #endif //USE_SVMLIGHT
434  }
435 
440  inline int32_t get_cache_size() { return cache_size; }
441 
442 #ifdef USE_SVMLIGHT
443 
445 
450  inline int32_t get_max_elems_cache() { return kernel_cache.max_elems; }
451 
456  inline int32_t get_activenum_cache() { return kernel_cache.activenum; }
457 
465  void get_kernel_row(
466  int32_t docnum, int32_t *active2dnum, float64_t *buffer,
467  bool full_line=false);
468 
473  void cache_kernel_row(int32_t x);
474 
480  void cache_multiple_kernel_rows(int32_t* key, int32_t varnum);
481 
483  void kernel_cache_reset_lru();
484 
491  void kernel_cache_shrink(
492  int32_t totdoc, int32_t num_shrink, int32_t *after);
493 
500  bool regression_hack=false);
501 
506  inline void set_time(int32_t t)
507  {
508  kernel_cache.time=t;
509  }
510 
516  inline int32_t kernel_cache_touch(int32_t cacheidx)
517  {
518  if(kernel_cache.index[cacheidx] != -1)
519  {
520  kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time;
521  return(1);
522  }
523  return(0);
524  }
525 
531  inline int32_t kernel_cache_check(int32_t cacheidx)
532  {
533  return(kernel_cache.index[cacheidx] >= 0);
534  }
535 
541  {
542  return(kernel_cache.elems < kernel_cache.max_elems);
543  }
544 
550  void kernel_cache_init(int32_t size, bool regression_hack=false);
551 
553  void kernel_cache_cleanup();
554 
555 #endif //USE_SVMLIGHT
556 
558  void list_kernel();
559 
565  inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
566 
570  virtual void clear_normal();
571 
577  virtual void add_to_normal(int32_t vector_idx, float64_t weight);
578 
584 
590 
596 
604  virtual bool init_optimization(
605  int32_t count, int32_t *IDX, float64_t *weights);
606 
611  virtual bool delete_optimization();
612 
618  bool init_optimization_svm(CSVM * svm) ;
619 
625  virtual float64_t compute_optimized(int32_t vector_idx);
626 
635  virtual void compute_batch(
636  int32_t num_vec, int32_t* vec_idx, float64_t* target,
637  int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
638  float64_t factor=1.0);
639 
645 
651 
656  virtual int32_t get_num_subkernels();
657 
663  virtual void compute_by_subkernel(
664  int32_t vector_idx, float64_t * subkernel_contrib);
665 
671  virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
672 
678 
683  virtual void set_subkernel_weights(SGVector<float64_t> weights);
684 
693  const TParameter* param, index_t index=-1)
694  {
695  SG_ERROR("Can't compute derivative wrt %s parameter\n", param->m_name)
696  return SGMatrix<float64_t>();
697  }
698 
705  protected:
711  {
712  properties |= p;
713  }
714 
720  {
721  properties &= (properties | p) ^ p;
722  }
723 
728  inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
729 
740  virtual float64_t compute(int32_t x, int32_t y)=0;
741 
748  int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
749  {
750  int32_t i_start;
751 
752  if (symmetric)
753  i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs));
754  else
755  i_start=(int32_t) (offs/int64_t(n));
756 
757  return i_start;
758  }
759 
764  template <class T> static void* get_kernel_matrix_helper(void* p);
765 
774  virtual void load_serializable_post() throw (ShogunException);
775 
784  virtual void save_serializable_pre() throw (ShogunException);
785 
794  virtual void save_serializable_post() throw (ShogunException);
795 
800  virtual void register_params();
801 
802  private:
805  void init();
806 
807 
808 #ifdef USE_SVMLIGHT
809 #ifndef DOXYGEN_SHOULD_SKIP_THIS
810 
811  struct KERNEL_CACHE {
813  int32_t *index;
815  int32_t *invindex;
817  int32_t *active2totdoc;
819  int32_t *totdoc2active;
821  int32_t *lru;
823  int32_t *occu;
825  int32_t elems;
827  int32_t max_elems;
829  int32_t time;
831  int32_t activenum;
832 
834  KERNELCACHE_ELEM *buffer;
836  KERNELCACHE_IDX buffsize;
837  };
838 
840  struct S_KTHREAD_PARAM
841  {
843  CKernel* kernel;
845  KERNEL_CACHE* kernel_cache;
847  KERNELCACHE_ELEM** cache;
849  int32_t* uncached_rows;
851  int32_t num_uncached;
853  uint8_t* needs_computation;
855  int32_t start;
857  int32_t end;
859  int32_t num_vectors;
860  };
861 #endif // DOXYGEN_SHOULD_SKIP_THIS
862 
864  static void* cache_multiple_kernel_row_helper(void* p);
865 
867  void kernel_cache_free(int32_t cacheidx);
868  int32_t kernel_cache_malloc();
869  int32_t kernel_cache_free_lru();
870  KERNELCACHE_ELEM *kernel_cache_clean_and_malloc(int32_t cacheidx);
871 #endif //USE_SVMLIGHT
872 
873 
874  protected:
876  int32_t cache_size;
877 
878 #ifdef USE_SVMLIGHT
879 
880  KERNEL_CACHE kernel_cache;
881 #endif //USE_SVMLIGHT
882 
886 
891 
894 
896  int32_t num_lhs;
898  int32_t num_rhs;
899 
902 
909 
911  uint64_t properties;
912 
916 };
917 
918 }
919 #endif /* _KERNEL_H__ */

SHOGUN Machine Learning Toolbox - Documentation