SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Kernel.h
Go to the documentation of this file.
1 /*
2  * EXCEPT FOR THE KERNEL CACHING FUNCTIONS WHICH ARE (W) THORSTEN JOACHIMS
3  * COPYRIGHT (C) 1999 UNIVERSITAET DORTMUND - ALL RIGHTS RESERVED
4  *
5  * this program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Written (W) 1999-2009 Soeren Sonnenburg
11  * Written (W) 1999-2008 Gunnar Raetsch
12  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
13  */
14 
15 #ifndef _KERNEL_H___
16 #define _KERNEL_H___
17 
18 #include <shogun/lib/config.h>
19 
20 #include <shogun/lib/common.h>
21 #include <shogun/lib/Signal.h>
22 #include <shogun/io/SGIO.h>
23 #include <shogun/io/File.h>
26 #include <shogun/base/SGObject.h>
27 #include <shogun/lib/SGMatrix.h>
30 
31 namespace shogun
32 {
33  class CFile;
34  class CFeatures;
36 
37 #ifdef USE_SHORTREAL_KERNELCACHE
38 
40 #else
41 
43 #endif
44 
46 typedef int64_t KERNELCACHE_IDX;
47 
48 
51 {
54 };
55 
58 {
59  K_UNKNOWN = 0,
60  K_LINEAR = 10,
61  K_POLY = 20,
62  K_GAUSSIAN = 30,
66  K_SALZBERG = 41,
74  K_POLYMATCH = 100,
75  K_ALIGNMENT = 110,
80  K_COMBINED = 140,
81  K_AUC = 150,
82  K_CUSTOM = 160,
83  K_SIGMOID = 170,
84  K_CHI2 = 180,
85  K_DIAG = 190,
86  K_CONST = 200,
87  K_DISTANCE = 220,
90  K_OLIGO = 250,
91  K_MATCHWORD = 260,
92  K_TPPK = 270,
96  K_WAVELET = 310,
97  K_WAVE = 320,
98  K_CAUCHY = 330,
99  K_TSTUDENT = 340,
103  K_SPHERICAL = 380,
104  K_SPLINE = 390,
105  K_ANOVA = 400,
106  K_POWER = 410,
107  K_LOG = 420,
108  K_CIRCULAR = 430,
111  K_BESSEL = 460,
113  K_DIRECTOR = 480,
114  K_PRODUCT = 490,
115  K_LINEARARD = 500,
118 };
119 
122 {
123  KP_NONE = 0,
124  KP_LINADD = 1, // Kernels that can be optimized via doing normal updates w + dw
125  KP_KERNCOMBINATION = 2, // Kernels that are infact a linear combination of subkernels K=\sum_i b_i*K_i
126  KP_BATCHEVALUATION = 4 // Kernels that can on the fly generate normals in linadd and more quickly/memory efficient process batches instead of single examples
127 };
128 
129 class CSVM;
130 
156 class CKernel : public CSGObject
157 {
168  friend class CDiceKernelNormalizer;
170 
171  friend class CStreamingKernel;
172 
173  public:
174 
178  CKernel();
179 
180 
185  CKernel(int32_t size);
186 
193  CKernel(CFeatures* l, CFeatures* r, int32_t size);
194 
195  virtual ~CKernel();
196 
204  inline float64_t kernel(int32_t idx_a, int32_t idx_b)
205  {
206  REQUIRE(idx_a>=0 && idx_b>=0 && idx_a<num_lhs && idx_b<num_rhs,
207  "%s::kernel(): index out of Range: idx_a=%d/%d idx_b=%d/%d\n",
208  get_name(), idx_a,num_lhs, idx_b,num_rhs);
209 
210  return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
211  }
212 
218  {
219  return get_kernel_matrix<float64_t>();
220  }
221 
229  preallocated=SGVector<float64_t>())
230  {
231  REQUIRE(lhs, "CKernel::get_kernel_diagonal(): Left-handside "
232  "features missing!\n");
233 
234  REQUIRE(rhs, "CKernel::get_kernel_diagonal(): Right-handside "
235  "features missing!\n");
236 
238  "CKernel::get_kernel_diagonal(): Left- and right-"
239  "handside features must be equal sized\n");
240 
241  /* allocate space if necessary */
242  if (!preallocated.vector)
243  preallocated=SGVector<float64_t>(lhs->get_num_vectors());
244  else
245  {
246  REQUIRE(preallocated.vlen==lhs->get_num_vectors(),
247  "%s::get_kernel_diagonal(): Preallocated vector has"
248  " wrong size!\n", get_name());
249  }
250 
251  for (index_t i=0; i<preallocated.vlen; ++i)
252  preallocated[i]=kernel(i, i);
253 
254  return preallocated;
255  }
256 
263  {
264 
266 
267  for (int32_t i=0; i!=num_rhs; i++)
268  col[i] = kernel(i,j);
269 
270  return col;
271  }
272 
273 
280  {
282 
283  for (int32_t j=0; j!=num_lhs; j++)
284  row[j] = kernel(i,j);
285 
286  return row;
287  }
288 
293  template <class T> SGMatrix<T> get_kernel_matrix();
294 
295 
306  virtual bool init(CFeatures* lhs, CFeatures* rhs);
307 
313 
319 
323  virtual bool init_normalizer();
324 
331  virtual void cleanup();
332 
337  void load(CFile* loader);
338 
343  void save(CFile* writer);
344 
349  inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
350 
355  inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
356 
361  virtual int32_t get_num_vec_lhs()
362  {
363  return num_lhs;
364  }
365 
370  virtual int32_t get_num_vec_rhs()
371  {
372  return num_rhs;
373  }
374 
379  virtual bool has_features()
380  {
381  return lhs && rhs;
382  }
383 
388  inline bool get_lhs_equals_rhs()
389  {
390  return lhs_equals_rhs;
391  }
392 
394  virtual void remove_lhs_and_rhs();
395 
397  virtual void remove_lhs();
398 
400  virtual void remove_rhs();
401 
409  virtual EKernelType get_kernel_type()=0 ;
410 
417  virtual EFeatureType get_feature_type()=0;
418 
425  virtual EFeatureClass get_feature_class()=0;
426 
431  inline void set_cache_size(int32_t size)
432  {
433  cache_size = size;
434 #ifdef USE_SVMLIGHT
435  cache_reset();
436 #endif //USE_SVMLIGHT
437  }
438 
443  inline int32_t get_cache_size() { return cache_size; }
444 
445 #ifdef USE_SVMLIGHT
446 
448 
453  inline int32_t get_max_elems_cache() { return kernel_cache.max_elems; }
454 
459  inline int32_t get_activenum_cache() { return kernel_cache.activenum; }
460 
468  void get_kernel_row(
469  int32_t docnum, int32_t *active2dnum, float64_t *buffer,
470  bool full_line=false);
471 
476  void cache_kernel_row(int32_t x);
477 
483  void cache_multiple_kernel_rows(int32_t* key, int32_t varnum);
484 
486  void kernel_cache_reset_lru();
487 
494  void kernel_cache_shrink(
495  int32_t totdoc, int32_t num_shrink, int32_t *after);
496 
503  bool regression_hack=false);
504 
509  inline void set_time(int32_t t)
510  {
511  kernel_cache.time=t;
512  }
513 
519  inline int32_t kernel_cache_touch(int32_t cacheidx)
520  {
521  if(kernel_cache.index[cacheidx] != -1)
522  {
523  kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time;
524  return(1);
525  }
526  return(0);
527  }
528 
534  inline int32_t kernel_cache_check(int32_t cacheidx)
535  {
536  return(kernel_cache.index[cacheidx] >= 0);
537  }
538 
544  {
545  return(kernel_cache.elems < kernel_cache.max_elems);
546  }
547 
553  void kernel_cache_init(int32_t size, bool regression_hack=false);
554 
556  void kernel_cache_cleanup();
557 
558 #endif //USE_SVMLIGHT
559 
561  void list_kernel();
562 
568  inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
569 
573  virtual void clear_normal();
574 
580  virtual void add_to_normal(int32_t vector_idx, float64_t weight);
581 
587 
593 
599 
607  virtual bool init_optimization(
608  int32_t count, int32_t *IDX, float64_t *weights);
609 
614  virtual bool delete_optimization();
615 
621  bool init_optimization_svm(CSVM * svm) ;
622 
628  virtual float64_t compute_optimized(int32_t vector_idx);
629 
638  virtual void compute_batch(
639  int32_t num_vec, int32_t* vec_idx, float64_t* target,
640  int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
641  float64_t factor=1.0);
642 
648 
654 
659  virtual int32_t get_num_subkernels();
660 
666  virtual void compute_by_subkernel(
667  int32_t vector_idx, float64_t * subkernel_contrib);
668 
674  virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
675 
681 
686  virtual void set_subkernel_weights(SGVector<float64_t> weights);
687 
696  const TParameter* param, index_t index=-1)
697  {
698  SG_ERROR("Can't compute derivative wrt %s parameter\n", param->m_name)
699  return SGMatrix<float64_t>();
700  }
701 
708  protected:
714  {
715  properties |= p;
716  }
717 
723  {
724  properties &= (properties | p) ^ p;
725  }
726 
731  inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
732 
743  virtual float64_t compute(int32_t x, int32_t y)=0;
744 
751  int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
752  {
753  int32_t i_start;
754 
755  if (symmetric)
756  i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs));
757  else
758  i_start=(int32_t) (offs/int64_t(n));
759 
760  return i_start;
761  }
762 
767  template <class T> static void* get_kernel_matrix_helper(void* p);
768 
777  virtual void load_serializable_post() throw (ShogunException);
778 
787  virtual void save_serializable_pre() throw (ShogunException);
788 
797  virtual void save_serializable_post() throw (ShogunException);
798 
803  virtual void register_params();
804 
805  private:
808  void init();
809 
810 
811 #ifdef USE_SVMLIGHT
812 #ifndef DOXYGEN_SHOULD_SKIP_THIS
813 
814  struct KERNEL_CACHE {
816  int32_t *index;
818  int32_t *invindex;
820  int32_t *active2totdoc;
822  int32_t *totdoc2active;
824  int32_t *lru;
826  int32_t *occu;
828  int32_t elems;
830  int32_t max_elems;
832  int32_t time;
834  int32_t activenum;
835 
837  KERNELCACHE_ELEM *buffer;
839  KERNELCACHE_IDX buffsize;
840  };
841 
843  struct S_KTHREAD_PARAM
844  {
846  CKernel* kernel;
848  KERNEL_CACHE* kernel_cache;
850  KERNELCACHE_ELEM** cache;
852  int32_t* uncached_rows;
854  int32_t num_uncached;
856  uint8_t* needs_computation;
858  int32_t start;
860  int32_t end;
862  int32_t num_vectors;
863  };
864 #endif // DOXYGEN_SHOULD_SKIP_THIS
865 
867  static void* cache_multiple_kernel_row_helper(void* p);
868 
870  void kernel_cache_free(int32_t cacheidx);
871  int32_t kernel_cache_malloc();
872  int32_t kernel_cache_free_lru();
873  KERNELCACHE_ELEM *kernel_cache_clean_and_malloc(int32_t cacheidx);
874 #endif //USE_SVMLIGHT
875 
876 
877  protected:
879  int32_t cache_size;
880 
881 #ifdef USE_SVMLIGHT
882 
883  KERNEL_CACHE kernel_cache;
884 #endif //USE_SVMLIGHT
885 
889 
894 
897 
899  int32_t num_lhs;
901  int32_t num_rhs;
902 
905 
912 
914  uint64_t properties;
915 
919 };
920 
921 }
922 #endif /* _KERNEL_H__ */

SHOGUN Machine Learning Toolbox - Documentation