SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Kernel.h
Go to the documentation of this file.
1 /*
2  * EXCEPT FOR THE KERNEL CACHING FUNCTIONS WHICH ARE (W) THORSTEN JOACHIMS
3  * COPYRIGHT (C) 1999 UNIVERSITAET DORTMUND - ALL RIGHTS RESERVED
4  *
5  * this program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Written (W) 1999-2009 Soeren Sonnenburg
11  * Written (W) 1999-2008 Gunnar Raetsch
12  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
13  */
14 
15 #ifndef _KERNEL_H___
16 #define _KERNEL_H___
17 
18 #include <shogun/lib/common.h>
19 #include <shogun/lib/Signal.h>
20 #include <shogun/io/SGIO.h>
21 #include <shogun/io/File.h>
24 #include <shogun/base/SGObject.h>
27 
28 namespace shogun
29 {
30  class CFile;
31  class CFeatures;
33 
34 #ifdef USE_SHORTREAL_KERNELCACHE
35 
37 #else
38 
40 #endif
41 
43 typedef int64_t KERNELCACHE_IDX;
44 
45 
48 {
51 };
52 
55 {
56  K_UNKNOWN = 0,
57  K_LINEAR = 10,
58  K_POLY = 20,
59  K_GAUSSIAN = 30,
63  K_SALZBERG = 41,
71  K_POLYMATCH = 100,
72  K_ALIGNMENT = 110,
77  K_COMBINED = 140,
78  K_AUC = 150,
79  K_CUSTOM = 160,
80  K_SIGMOID = 170,
81  K_CHI2 = 180,
82  K_DIAG = 190,
83  K_CONST = 200,
84  K_DISTANCE = 220,
87  K_OLIGO = 250,
88  K_MATCHWORD = 260,
89  K_TPPK = 270,
93  K_WAVELET = 310,
94  K_WAVE = 320,
95  K_CAUCHY = 330,
96  K_TSTUDENT = 340,
100  K_SPHERICAL = 380,
101  K_SPLINE = 390,
102  K_ANOVA = 400,
103  K_POWER = 410,
104  K_LOG = 420,
105  K_CIRCULAR = 430,
108  K_BESSEL = 460,
110  K_DIRECTOR = 480,
111  K_PRODUCT = 490,
112  K_LINEARARD = 500,
114 };
115 
118 {
119  KP_NONE = 0,
120  KP_LINADD = 1, // Kernels that can be optimized via doing normal updates w + dw
121  KP_KERNCOMBINATION = 2, // Kernels that are infact a linear combination of subkernels K=\sum_i b_i*K_i
122  KP_BATCHEVALUATION = 4 // Kernels that can on the fly generate normals in linadd and more quickly/memory efficient process batches instead of single examples
123 };
124 
125 #ifndef DOXYGEN_SHOULD_SKIP_THIS
126 
127 template <class T> struct K_THREAD_PARAM
128 {
130  CKernel* kernel;
132  int32_t start;
134  int32_t end;
136  int32_t total_start;
138  int32_t total_end;
140  int32_t m;
142  int32_t n;
144  T* result;
146  bool symmetric;
148  bool verbose;
149 };
150 #endif
151 
152 class CSVM;
153 
179 class CKernel : public CSGObject
180 {
191  friend class CDiceKernelNormalizer;
193 
194  public:
195 
199  CKernel();
200 
201 
206  CKernel(int32_t size);
207 
214  CKernel(CFeatures* l, CFeatures* r, int32_t size);
215 
216  virtual ~CKernel();
217 
225  inline float64_t kernel(int32_t idx_a, int32_t idx_b)
226  {
227  REQUIRE(idx_a>=0 && idx_b>=0 && idx_a<num_lhs && idx_b<num_rhs,
228  "Index out of Range: idx_a=%d/%d idx_b=%d/%d\n",
229  idx_a,num_lhs, idx_b,num_rhs);
230 
231  return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
232  }
233 
239  {
240  return get_kernel_matrix<float64_t>();
241  }
242 
249  {
250 
252 
253  for (int32_t i=0; i!=num_rhs; i++)
254  col[i] = kernel(i,j);
255 
256  return col;
257  }
258 
259 
266  {
268 
269  for (int32_t j=0; j!=num_lhs; j++)
270  row[j] = kernel(i,j);
271 
272  return row;
273  }
274 
279  template <class T>
281  {
282  T* result = NULL;
283 
284  REQUIRE(has_features(), "no features assigned to kernel\n");
285 
286  int32_t m=get_num_vec_lhs();
287  int32_t n=get_num_vec_rhs();
288 
289  int64_t total_num = int64_t(m)*n;
290 
291  // if lhs == rhs and sizes match assume k(i,j)=k(j,i)
292  bool symmetric= (lhs && lhs==rhs && m==n);
293 
294  SG_DEBUG( "returning kernel matrix of size %dx%d\n", m, n);
295 
296  result=SG_MALLOC(T, total_num);
297 
298  int32_t num_threads=parallel->get_num_threads();
299  if (num_threads < 2)
300  {
301  K_THREAD_PARAM<T> params;
302  params.kernel=this;
303  params.result=result;
304  params.start=0;
305  params.end=m;
306  params.total_start=0;
307  params.total_end=total_num;
308  params.n=n;
309  params.m=m;
310  params.symmetric=symmetric;
311  params.verbose=true;
312  get_kernel_matrix_helper<T>((void*) &params);
313  }
314  else
315  {
316  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
317  K_THREAD_PARAM<T>* params = SG_MALLOC(K_THREAD_PARAM<T>, num_threads);
318  int64_t step= total_num/num_threads;
319 
320  int32_t t;
321 
322  num_threads--;
323  for (t=0; t<num_threads; t++)
324  {
325  params[t].kernel = this;
326  params[t].result = result;
327  params[t].start = compute_row_start(t*step, n, symmetric);
328  params[t].end = compute_row_start((t+1)*step, n, symmetric);
329  params[t].total_start=t*step;
330  params[t].total_end=(t+1)*step;
331  params[t].n=n;
332  params[t].m=m;
333  params[t].symmetric=symmetric;
334  params[t].verbose=false;
335 
336  int code=pthread_create(&threads[t], NULL,
337  CKernel::get_kernel_matrix_helper<T>, (void*)&params[t]);
338 
339  if (code != 0)
340  {
341  SG_WARNING("Thread creation failed (thread %d of %d) "
342  "with error:'%s'\n",t, num_threads, strerror(code));
343  num_threads=t;
344  break;
345  }
346  }
347 
348  params[t].kernel = this;
349  params[t].result = result;
350  params[t].start = compute_row_start(t*step, n, symmetric);
351  params[t].end = m;
352  params[t].total_start=t*step;
353  params[t].total_end=total_num;
354  params[t].n=n;
355  params[t].m=m;
356  params[t].symmetric=symmetric;
357  params[t].verbose=true;
358  get_kernel_matrix_helper<T>(&params[t]);
359 
360  for (t=0; t<num_threads; t++)
361  {
362  if (pthread_join(threads[t], NULL) != 0)
363  SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads);
364  }
365 
366  SG_FREE(params);
367  SG_FREE(threads);
368  }
369 
370  SG_DONE();
371 
372  return SGMatrix<T>(result,m,n,true);
373  }
374 
375 
386  virtual bool init(CFeatures* lhs, CFeatures* rhs);
387 
393 
399 
403  virtual bool init_normalizer();
404 
411  virtual void cleanup();
412 
417  void load(CFile* loader);
418 
423  void save(CFile* writer);
424 
429  inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
430 
435  inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
436 
441  virtual int32_t get_num_vec_lhs()
442  {
443  return num_lhs;
444  }
445 
450  virtual int32_t get_num_vec_rhs()
451  {
452  return num_rhs;
453  }
454 
459  virtual bool has_features()
460  {
461  return lhs && rhs;
462  }
463 
468  inline bool get_lhs_equals_rhs()
469  {
470  return lhs_equals_rhs;
471  }
472 
474  virtual void remove_lhs_and_rhs();
475 
477  virtual void remove_lhs();
478 
480  virtual void remove_rhs();
481 
489  virtual EKernelType get_kernel_type()=0 ;
490 
497  virtual EFeatureType get_feature_type()=0;
498 
505  virtual EFeatureClass get_feature_class()=0;
506 
511  inline void set_cache_size(int32_t size)
512  {
513  cache_size = size;
514 #ifdef USE_SVMLIGHT
515  cache_reset();
516 #endif //USE_SVMLIGHT
517  }
518 
523  inline int32_t get_cache_size() { return cache_size; }
524 
525 #ifdef USE_SVMLIGHT
526 
528 
533  inline int32_t get_max_elems_cache() { return kernel_cache.max_elems; }
534 
539  inline int32_t get_activenum_cache() { return kernel_cache.activenum; }
540 
548  void get_kernel_row(
549  int32_t docnum, int32_t *active2dnum, float64_t *buffer,
550  bool full_line=false);
551 
556  void cache_kernel_row(int32_t x);
557 
563  void cache_multiple_kernel_rows(int32_t* key, int32_t varnum);
564 
566  void kernel_cache_reset_lru();
567 
574  void kernel_cache_shrink(
575  int32_t totdoc, int32_t num_shrink, int32_t *after);
576 
583  bool regression_hack=false);
584 
589  inline void set_time(int32_t t)
590  {
591  kernel_cache.time=t;
592  }
593 
599  inline int32_t kernel_cache_touch(int32_t cacheidx)
600  {
601  if(kernel_cache.index[cacheidx] != -1)
602  {
603  kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time;
604  return(1);
605  }
606  return(0);
607  }
608 
614  inline int32_t kernel_cache_check(int32_t cacheidx)
615  {
616  return(kernel_cache.index[cacheidx] >= 0);
617  }
618 
624  {
625  return(kernel_cache.elems < kernel_cache.max_elems);
626  }
627 
633  void kernel_cache_init(int32_t size, bool regression_hack=false);
634 
636  void kernel_cache_cleanup();
637 
638 #endif //USE_SVMLIGHT
639 
641  void list_kernel();
642 
648  inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
649 
653  virtual void clear_normal();
654 
660  virtual void add_to_normal(int32_t vector_idx, float64_t weight);
661 
667 
672  virtual inline void set_optimization_type(EOptimizationType t) { opt_type=t;}
673 
679 
687  virtual bool init_optimization(
688  int32_t count, int32_t *IDX, float64_t *weights);
689 
694  virtual bool delete_optimization();
695 
701  bool init_optimization_svm(CSVM * svm) ;
702 
708  virtual float64_t compute_optimized(int32_t vector_idx);
709 
718  virtual void compute_batch(
719  int32_t num_vec, int32_t* vec_idx, float64_t* target,
720  int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
721  float64_t factor=1.0);
722 
728 
734 
739  virtual int32_t get_num_subkernels();
740 
746  virtual void compute_by_subkernel(
747  int32_t vector_idx, float64_t * subkernel_contrib);
748 
754  virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
755 
760  virtual void set_subkernel_weights(SGVector<float64_t> weights);
761 
771  CSGObject* obj, index_t index = -1);
772  protected:
778  {
779  properties |= p;
780  }
781 
787  {
788  properties &= (properties | p) ^ p;
789  }
790 
795  inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
796 
807  virtual float64_t compute(int32_t x, int32_t y)=0;
808 
815  int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
816  {
817  int32_t i_start;
818 
819  if (symmetric)
820  i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs));
821  else
822  i_start=(int32_t) (offs/int64_t(n));
823 
824  return i_start;
825  }
826 
831  template <class T>
832  static void* get_kernel_matrix_helper(void* p)
833  {
834  K_THREAD_PARAM<T>* params= (K_THREAD_PARAM<T>*) p;
835  int32_t i_start=params->start;
836  int32_t i_end=params->end;
837  CKernel* k=params->kernel;
838  T* result=params->result;
839  bool symmetric=params->symmetric;
840  int32_t n=params->n;
841  int32_t m=params->m;
842  bool verbose=params->verbose;
843  int64_t total_start=params->total_start;
844  int64_t total_end=params->total_end;
845  int64_t total=total_start;
846 
847  for (int32_t i=i_start; i<i_end; i++)
848  {
849  int32_t j_start=0;
850 
851  if (symmetric)
852  j_start=i;
853 
854  for (int32_t j=j_start; j<n; j++)
855  {
856  float64_t v=k->kernel(i,j);
857  result[i+j*m]=v;
858 
859  if (symmetric && i!=j)
860  result[j+i*m]=v;
861 
862  if (verbose)
863  {
864  total++;
865 
866  if (symmetric && i!=j)
867  total++;
868 
869  if (total%100 == 0)
870  k->SG_PROGRESS(total, total_start, total_end);
871 
873  break;
874  }
875  }
876 
877  }
878 
879  return NULL;
880  }
881 
890  virtual void load_serializable_post() throw (ShogunException);
891 
900  virtual void save_serializable_pre() throw (ShogunException);
901 
910  virtual void save_serializable_post() throw (ShogunException);
915  virtual void register_params();
916 
917  private:
920  void init();
921 
922 
923 #ifdef USE_SVMLIGHT
924 #ifndef DOXYGEN_SHOULD_SKIP_THIS
925 
926  struct KERNEL_CACHE {
928  int32_t *index;
930  int32_t *invindex;
932  int32_t *active2totdoc;
934  int32_t *totdoc2active;
936  int32_t *lru;
938  int32_t *occu;
940  int32_t elems;
942  int32_t max_elems;
944  int32_t time;
946  int32_t activenum;
947 
949  KERNELCACHE_ELEM *buffer;
951  KERNELCACHE_IDX buffsize;
952  };
953 
955  struct S_KTHREAD_PARAM
956  {
958  CKernel* kernel;
960  KERNEL_CACHE* kernel_cache;
962  KERNELCACHE_ELEM** cache;
964  int32_t* uncached_rows;
966  int32_t num_uncached;
968  uint8_t* needs_computation;
970  int32_t start;
972  int32_t end;
974  int32_t num_vectors;
975  };
976 #endif // DOXYGEN_SHOULD_SKIP_THIS
977 
979  static void* cache_multiple_kernel_row_helper(void* p);
980 
982  void kernel_cache_free(int32_t cacheidx);
983  int32_t kernel_cache_malloc();
984  int32_t kernel_cache_free_lru();
985  KERNELCACHE_ELEM *kernel_cache_clean_and_malloc(int32_t cacheidx);
986 #endif //USE_SVMLIGHT
987 
988 
989  protected:
991  int32_t cache_size;
992 
993 #ifdef USE_SVMLIGHT
994 
995  KERNEL_CACHE kernel_cache;
996 #endif //USE_SVMLIGHT
997 
1001 
1006 
1009 
1011  int32_t num_lhs;
1013  int32_t num_rhs;
1014 
1017 
1024 
1026  uint64_t properties;
1027 
1031 };
1032 
1033 }
1034 #endif /* _KERNEL_H__ */

SHOGUN Machine Learning Toolbox - Documentation