SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
KernelMachine.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 2011-2012 Heiko Strathmann
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
13 #include <shogun/lib/Signal.h>
15 #include <shogun/io/SGIO.h>
16 
17 #include <shogun/kernel/Kernel.h>
19 #include <shogun/labels/Labels.h>
20 
21 using namespace shogun;
22 
23 #ifndef DOXYGEN_SHOULD_SKIP_THIS
24 struct S_THREAD_PARAM_KERNEL_MACHINE
25 {
26  CKernelMachine* kernel_machine;
27  float64_t* result;
28  int32_t start;
29  int32_t end;
30 
31  /* if non-null, start and end correspond to indices in this vector */
32  index_t* indices;
33  index_t indices_len;
34  bool verbose;
35 };
36 #endif // DOXYGEN_SHOULD_SKIP_THIS
37 
39 {
40  init();
41 }
42 
45 {
46  init();
47 
48  int32_t num_sv=svs.vlen;
49  ASSERT(num_sv == alphas.vlen)
50  create_new_model(num_sv);
51  set_alphas(alphas);
54  set_bias(b);
55 }
56 
58 {
59  init();
60 
61  SGVector<float64_t> alphas = machine->get_alphas().clone();
62  SGVector<int32_t> svs = machine->get_support_vectors().clone();
63  float64_t bias = machine->get_bias();
64  CKernel* ker = machine->get_kernel();
65 
66  int32_t num_sv = svs.vlen;
67  create_new_model(num_sv);
68  set_alphas(alphas);
70  set_bias(bias);
71  set_kernel(ker);
72 }
73 
75 {
79 }
80 
82 {
83  SG_REF(k);
85  kernel=k;
86 }
87 
89 {
90  SG_REF(kernel);
91  return kernel;
92 }
93 
95 {
96  use_batch_computation=enable;
97 }
98 
100 {
101  return use_batch_computation;
102 }
103 
105 {
106  use_linadd=enable;
107 }
108 
110 {
111  return use_linadd;
112 }
113 
114 void CKernelMachine::set_bias_enabled(bool enable_bias)
115 {
116  use_bias=enable_bias;
117 }
118 
120 {
121  return use_bias;
122 }
123 
125 {
126  return m_bias;
127 }
128 
130 {
131  m_bias=bias;
132 }
133 
135 {
136  ASSERT(m_svs.vector && idx<m_svs.vlen)
137  return m_svs.vector[idx];
138 }
139 
141 {
142  if (!m_alpha.vector)
143  SG_ERROR("No alphas set\n")
144  if (idx>=m_alpha.vlen)
145  SG_ERROR("Alphas index (%d) out of range (%d)\n", idx, m_svs.vlen)
146  return m_alpha.vector[idx];
147 }
148 
149 bool CKernelMachine::set_support_vector(int32_t idx, int32_t val)
150 {
151  if (m_svs.vector && idx<m_svs.vlen)
152  m_svs.vector[idx]=val;
153  else
154  return false;
155 
156  return true;
157 }
158 
159 bool CKernelMachine::set_alpha(int32_t idx, float64_t val)
160 {
161  if (m_alpha.vector && idx<m_alpha.vlen)
162  m_alpha.vector[idx]=val;
163  else
164  return false;
165 
166  return true;
167 }
168 
170 {
171  return m_svs.vlen;
172 }
173 
175 {
176  m_alpha = alphas;
177 }
178 
180 {
181  m_svs = svs;
182 }
183 
185 {
186  return m_svs;
187 }
188 
190 {
191  return m_alpha;
192 }
193 
195 {
198 
199  m_bias=0;
200 
201  if (num>0)
202  {
204  m_svs= SGVector<int32_t>(num);
205  return (m_alpha.vector!=NULL && m_svs.vector!=NULL);
206  }
207  else
208  return true;
209 }
210 
212 {
213  int32_t num_sv=get_num_support_vectors();
214 
215  if (kernel && kernel->has_property(KP_LINADD) && num_sv>0)
216  {
217  int32_t * sv_idx = SG_MALLOC(int32_t, num_sv);
218  float64_t* sv_weight = SG_MALLOC(float64_t, num_sv);
219 
220  for(int32_t i=0; i<num_sv; i++)
221  {
222  sv_idx[i] = get_support_vector(i) ;
223  sv_weight[i] = get_alpha(i) ;
224  }
225 
226  bool ret = kernel->init_optimization(num_sv, sv_idx, sv_weight) ;
227 
228  SG_FREE(sv_idx);
229  SG_FREE(sv_weight);
230 
231  if (!ret)
232  SG_ERROR("initialization of kernel optimization failed\n")
233 
234  return ret;
235  }
236  else
237  SG_ERROR("initialization of kernel optimization failed\n")
238 
239  return false;
240 }
241 
243 {
244  SGVector<float64_t> outputs = apply_get_outputs(data);
245  return new CRegressionLabels(outputs);
246 }
247 
249 {
250  SGVector<float64_t> outputs = apply_get_outputs(data);
251  return new CBinaryLabels(outputs);
252 }
253 
255 {
256  SG_DEBUG("entering %s::apply_get_outputs(%s at %p)\n",
257  get_name(), data ? data->get_name() : "NULL", data);
258 
259  REQUIRE(kernel, "%s::apply_get_outputs(): No kernel assigned!\n")
260 
261  if (!kernel->get_num_vec_lhs())
262  {
263  SG_ERROR("%s: No vectors on left hand side (%s). This is probably due to"
264  " an implementation error in %s, where it was forgotten to set "
265  "the data (m_svs) indices\n", get_name(),
266  data->get_name());
267  }
268 
269  if (data)
270  {
271  CFeatures* lhs=kernel->get_lhs();
272  REQUIRE(lhs, "%s::apply_get_outputs(): No left hand side specified\n",
273  get_name());
274  kernel->init(lhs, data);
275  SG_UNREF(lhs);
276  }
277 
278  /* using the features to get num vectors is far safer than using the kernel
279  * since SHOGUNs kernel num_rhs/num_lhs is buggy (CombinedKernel for ex.)
280  * Might be worth investigating why
281  * kernel->get_num_rhs() != rhs->get_num_vectors()
282  * However, the below version works
283  * TODO Heiko Strathmann
284  */
285  CFeatures* rhs=kernel->get_rhs();
286  int32_t num_vectors=rhs ? rhs->get_num_vectors() : kernel->get_num_vec_rhs();
287  SG_UNREF(rhs)
288 
289  SGVector<float64_t> output(num_vectors);
290 
291  if (kernel->get_num_vec_rhs()>0)
292  {
293  SG_DEBUG("computing output on %d test examples\n", num_vectors)
294 
296 
297  if (io->get_show_progress())
298  io->enable_progress();
299  else
300  io->disable_progress();
301 
304  {
305  output.zero();
306  SG_DEBUG("Batch evaluation enabled\n")
307  if (get_num_support_vectors()>0)
308  {
309  int32_t* sv_idx=SG_MALLOC(int32_t, get_num_support_vectors());
310  float64_t* sv_weight=SG_MALLOC(float64_t, get_num_support_vectors());
311  int32_t* idx=SG_MALLOC(int32_t, num_vectors);
312 
313  //compute output for all vectors v[0]...v[num_vectors-1]
314  for (int32_t i=0; i<num_vectors; i++)
315  idx[i]=i;
316 
317  for (int32_t i=0; i<get_num_support_vectors(); i++)
318  {
319  sv_idx[i] = get_support_vector(i) ;
320  sv_weight[i] = get_alpha(i) ;
321  }
322 
323  kernel->compute_batch(num_vectors, idx,
324  output.vector, get_num_support_vectors(), sv_idx, sv_weight);
325  SG_FREE(sv_idx);
326  SG_FREE(sv_weight);
327  SG_FREE(idx);
328  }
329 
330  for (int32_t i=0; i<num_vectors; i++)
331  output[i] = get_bias() + output[i];
332 
333  }
334  else
335  {
336  int32_t num_threads=parallel->get_num_threads();
337  ASSERT(num_threads>0)
338 
339  if (num_threads < 2)
340  {
341  S_THREAD_PARAM_KERNEL_MACHINE params;
342  params.kernel_machine=this;
343  params.result = output.vector;
344  params.start=0;
345  params.end=num_vectors;
346  params.verbose=true;
347  params.indices = NULL;
348  params.indices_len = 0;
349  apply_helper((void*) &params);
350  }
351 #ifdef HAVE_PTHREAD
352  else
353  {
354  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
355  S_THREAD_PARAM_KERNEL_MACHINE* params = SG_MALLOC(S_THREAD_PARAM_KERNEL_MACHINE, num_threads);
356  int32_t step= num_vectors/num_threads;
357 
358  int32_t t;
359 
360  for (t=0; t<num_threads-1; t++)
361  {
362  params[t].kernel_machine = this;
363  params[t].result = output.vector;
364  params[t].start = t*step;
365  params[t].end = (t+1)*step;
366  params[t].verbose = false;
367  params[t].indices = NULL;
368  params[t].indices_len = 0;
369  pthread_create(&threads[t], NULL,
370  CKernelMachine::apply_helper, (void*)&params[t]);
371  }
372 
373  params[t].kernel_machine = this;
374  params[t].result = output.vector;
375  params[t].start = t*step;
376  params[t].end = num_vectors;
377  params[t].verbose = true;
378  params[t].indices = NULL;
379  params[t].indices_len = 0;
380  apply_helper((void*) &params[t]);
381 
382  for (t=0; t<num_threads-1; t++)
383  pthread_join(threads[t], NULL);
384 
385  SG_FREE(params);
386  SG_FREE(threads);
387  }
388 #endif
389  }
390 
391 #ifndef WIN32
393  SG_INFO("prematurely stopped. \n")
394  else
395 #endif
396  SG_DONE()
397  }
398 
399  SG_DEBUG("leaving %s::apply_get_outputs(%s at %p)\n",
400  get_name(), data ? data->get_name() : "NULL", data);
401 
402  return output;
403 }
404 
406 {
407  ASSERT(kernel)
408 
410  {
411  float64_t score = kernel->compute_optimized(num);
412  return score+get_bias();
413  }
414  else
415  {
416  float64_t score=0;
417  for(int32_t i=0; i<get_num_support_vectors(); i++)
418  score+=kernel->kernel(get_support_vector(i), num)*get_alpha(i);
419 
420  return score+get_bias();
421  }
422 }
423 
425 {
426  S_THREAD_PARAM_KERNEL_MACHINE* params = (S_THREAD_PARAM_KERNEL_MACHINE*) p;
427  float64_t* result = params->result;
428  CKernelMachine* kernel_machine = params->kernel_machine;
429 
430 #ifdef WIN32
431  for (int32_t vec=params->start; vec<params->end; vec++)
432 #else
433  for (int32_t vec=params->start; vec<params->end &&
435 #endif
436  {
437  if (params->verbose)
438  {
439  int32_t num_vectors=params->end - params->start;
440  int32_t v=vec-params->start;
441  if ( (v% (num_vectors/100+1))== 0)
442  SG_SPROGRESS(v, 0.0, num_vectors-1)
443  }
444 
445  /* eventually use index mapping if exists */
446  index_t idx=params->indices ? params->indices[vec] : vec;
447  result[vec] = kernel_machine->apply_one(idx);
448  }
449 
450  return NULL;
451 }
452 
454 {
455  if (!kernel)
456  SG_ERROR("kernel is needed to store SV features.\n")
457 
458  CFeatures* lhs=kernel->get_lhs();
459  CFeatures* rhs=kernel->get_rhs();
460 
461  if (!lhs)
462  SG_ERROR("kernel lhs is needed to store SV features.\n")
463 
464  /* copy sv feature data */
465  CFeatures* sv_features=lhs->copy_subset(m_svs);
466  SG_UNREF(lhs);
467 
468  /* set new lhs to kernel */
469  kernel->init(sv_features, rhs);
470 
471  /* unref rhs */
472  SG_UNREF(rhs);
473 
474  /* was SG_REF'ed by copy_subset */
475  SG_UNREF(sv_features);
476 
477  /* now sv indices are just the identity */
478  m_svs.range_fill();
479 
480 }
481 
483 {
484  SG_DEBUG("entering %s::train_locked()\n", get_name())
485  if (!is_data_locked())
486  SG_ERROR("CKernelMachine::train_locked() call data_lock() before!\n")
487 
488  /* this is asusmed here */
490 
491  /* since its not easily possible to controll the row subsets of the custom
492  * kernel from outside, we enforce that there is only one row subset by
493  * removing all of them. Otherwise, they would add up in the stack until
494  * an error occurs */
496 
497  /* set custom kernel subset of data to train on */
500 
501  /* set corresponding labels subset */
502  m_labels->add_subset(indices);
503 
504  /* dont do train because model should not be stored (no acutal features)
505  * and train does data_unlock */
506  bool result=train_machine();
507 
508  /* remove last col subset of custom kernel */
510 
511  /* remove label subset after training */
513 
514  SG_DEBUG("leaving %s::train_locked()\n", get_name())
515  return result;
516 }
517 
519 {
520  SGVector<float64_t> outputs = apply_locked_get_output(indices);
521  return new CBinaryLabels(outputs);
522 }
523 
525  SGVector<index_t> indices)
526 {
527  SGVector<float64_t> outputs = apply_locked_get_output(indices);
528  return new CRegressionLabels(outputs);
529 }
530 
532  SGVector<index_t> indices)
533 {
534  if (!is_data_locked())
535  SG_ERROR("CKernelMachine::apply_locked() call data_lock() before!\n")
536 
537  /* we are working on a custom kernel here */
539 
540  int32_t num_inds=indices.vlen;
541  SGVector<float64_t> output(num_inds);
542 
544 
545  if (io->get_show_progress())
546  io->enable_progress();
547  else
548  io->disable_progress();
549 
550  /* custom kernel never has batch evaluation property so dont do this here */
551  int32_t num_threads=parallel->get_num_threads();
552  ASSERT(num_threads>0)
553 
554  if (num_threads<2)
555  {
556  S_THREAD_PARAM_KERNEL_MACHINE params;
557  params.kernel_machine=this;
558  params.result=output.vector;
559 
560  /* use the parameter index vector */
561  params.start=0;
562  params.end=num_inds;
563  params.indices=indices.vector;
564  params.indices_len=indices.vlen;
565 
566  params.verbose=true;
567  apply_helper((void*) &params);
568  }
569 #ifdef HAVE_PTHREAD
570  else
571  {
572  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
573  S_THREAD_PARAM_KERNEL_MACHINE* params=SG_MALLOC(S_THREAD_PARAM_KERNEL_MACHINE, num_threads);
574  int32_t step= num_inds/num_threads;
575 
576  int32_t t;
577  for (t=0; t<num_threads-1; t++)
578  {
579  params[t].kernel_machine=this;
580  params[t].result=output.vector;
581 
582  /* use the parameter index vector */
583  params[t].start=t*step;
584  params[t].end=(t+1)*step;
585  params[t].indices=indices.vector;
586  params[t].indices_len=indices.vlen;
587 
588  params[t].verbose=false;
589  pthread_create(&threads[t], NULL, CKernelMachine::apply_helper,
590  (void*)&params[t]);
591  }
592 
593  params[t].kernel_machine=this;
594  params[t].result=output.vector;
595 
596  /* use the parameter index vector */
597  params[t].start=t*step;
598  params[t].end=num_inds;
599  params[t].indices=indices.vector;
600  params[t].indices_len=indices.vlen;
601 
602  params[t].verbose=true;
603  apply_helper((void*) &params[t]);
604 
605  for (t=0; t<num_threads-1; t++)
606  pthread_join(threads[t], NULL);
607 
608  SG_FREE(params);
609  SG_FREE(threads);
610  }
611 #endif
612 
613 #ifndef WIN32
615  SG_INFO("prematurely stopped.\n")
616  else
617 #endif
618  SG_DONE()
619 
620  return output;
621 }
622 
624 {
625  if ( !kernel )
626  SG_ERROR("The kernel is not initialized\n")
628  SG_ERROR("Locking is not supported (yet) with combined kernel. Please disable it in cross validation")
629 
630  /* init kernel with data */
631  kernel->init(features, features);
632 
633  /* backup reference to old kernel */
637 
638  /* unref possible old custom kernel */
640 
641  /* create custom kernel matrix from current kernel */
644 
645  /* replace kernel by custom kernel */
646  SG_UNREF(kernel);
648  SG_REF(kernel);
649 
650  /* dont forget to call superclass method */
651  CMachine::data_lock(labs, features);
652 }
653 
655 {
657  m_custom_kernel=NULL;
658 
659  /* restore original kernel, possibly delete created one */
660  if (m_kernel_backup)
661  {
662  /* check if kernel was created in train_locked */
663  if (kernel!=m_kernel_backup)
664  SG_UNREF(kernel);
665 
667  m_kernel_backup=NULL;
668  }
669 
670  /* dont forget to call superclass method */
672 }
673 
674 void CKernelMachine::init()
675 {
676  m_bias=0.0;
677  kernel=NULL;
678  m_custom_kernel=NULL;
679  m_kernel_backup=NULL;
681  use_linadd=true;
682  use_bias=true;
683 
684  SG_ADD((CSGObject**) &kernel, "kernel", "", MS_AVAILABLE);
685  SG_ADD((CSGObject**) &m_custom_kernel, "custom_kernel", "Custom kernel for"
686  " data lock", MS_NOT_AVAILABLE);
687  SG_ADD((CSGObject**) &m_kernel_backup, "kernel_backup",
688  "Kernel backup for data lock", MS_NOT_AVAILABLE);
689  SG_ADD(&use_batch_computation, "use_batch_computation",
690  "Batch computation is enabled.", MS_NOT_AVAILABLE);
691  SG_ADD(&use_linadd, "use_linadd", "Linadd is enabled.", MS_NOT_AVAILABLE);
692  SG_ADD(&use_bias, "use_bias", "Bias shall be used.", MS_NOT_AVAILABLE);
693  SG_ADD(&m_bias, "m_bias", "Bias term.", MS_NOT_AVAILABLE);
694  SG_ADD(&m_alpha, "m_alpha", "Array of coefficients alpha.",
696  SG_ADD(&m_svs, "m_svs", "Number of ``support vectors''.", MS_NOT_AVAILABLE);
697 }
698 
700 {
701  return true;
702 }
703 
virtual float64_t apply_one(int32_t num)
virtual const char * get_name() const =0
SGVector< float64_t > apply_get_outputs(CFeatures *data)
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:98
SGVector< int32_t > m_svs
void set_bias_enabled(bool enable_bias)
void range_fill(T start=0)
Definition: SGVector.cpp:173
#define SG_INFO(...)
Definition: SGIO.h:118
#define SG_DONE()
Definition: SGIO.h:157
virtual CBinaryLabels * apply_locked_binary(SGVector< index_t > indices)
Real Labels are real-valued labels.
int32_t get_num_threads() const
Definition: Parallel.cpp:64
int32_t index_t
Definition: common.h:62
virtual void add_row_subset(SGVector< index_t > subset)
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
The Custom Kernel allows for custom user provided kernel matrices.
Definition: CustomKernel.h:36
virtual const char * get_name() const
Definition: KernelMachine.h:80
virtual CRegressionLabels * apply_regression(CFeatures *data=NULL)
SGVector< int32_t > get_support_vectors()
virtual int32_t get_num_vectors() const =0
CCustomKernel * m_custom_kernel
CLabels * m_labels
Definition: Machine.h:361
CFeatures * get_rhs()
Definition: Kernel.h:510
#define SG_ERROR(...)
Definition: SGIO.h:129
#define REQUIRE(x,...)
Definition: SGIO.h:206
float64_t kernel(int32_t idx_a, int32_t idx_b)
Definition: Kernel.h:206
static void * apply_helper(void *p)
virtual bool train_machine(CFeatures *data=NULL)
Definition: Machine.h:318
A generic KernelMachine interface.
Definition: KernelMachine.h:51
Parallel * parallel
Definition: SGObject.h:372
virtual int32_t get_num_vec_lhs()
Definition: Kernel.h:516
virtual void remove_all_row_subsets()
#define SG_REF(x)
Definition: SGObject.h:51
A generic learning machine interface.
Definition: Machine.h:143
bool get_is_initialized()
Definition: Kernel.h:753
void set_support_vectors(SGVector< int32_t > svs)
virtual bool train_locked(SGVector< index_t > indices)
SGVector< float64_t > m_alpha
virtual void remove_col_subset()
bool has_property(EKernelProperty p)
Definition: Kernel.h:723
virtual void add_col_subset(SGVector< index_t > subset)
index_t vlen
Definition: SGVector.h:494
virtual void store_model_features()
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
void set_bias(float64_t bias)
void set_batch_computation_enabled(bool enable)
static void clear_cancel()
Definition: Signal.cpp:129
virtual SGVector< float64_t > apply_locked_get_output(SGVector< index_t > indices)
void disable_progress()
Definition: SGIO.h:414
double float64_t
Definition: common.h:50
bool set_alpha(int32_t idx, float64_t val)
virtual void data_unlock()
Definition: Machine.cpp:143
virtual void data_unlock()
virtual void data_lock(CLabels *labs, CFeatures *features)
Definition: Machine.cpp:112
virtual void remove_subset()
Definition: Labels.cpp:49
virtual float64_t compute_optimized(int32_t vector_idx)
Definition: Kernel.cpp:840
float64_t get_alpha(int32_t idx)
virtual void add_subset(SGVector< index_t > subset)
Definition: Labels.cpp:39
virtual bool supports_locking() const
bool set_support_vector(int32_t idx, int32_t val)
int32_t get_support_vector(int32_t idx)
static bool cancel_computations()
Definition: Signal.h:86
virtual int32_t get_num_vec_rhs()
Definition: Kernel.h:525
bool get_show_progress() const
Definition: SGIO.h:280
SGVector< float64_t > get_alphas()
#define SG_UNREF(x)
Definition: SGObject.h:52
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual void compute_batch(int32_t num_vec, int32_t *vec_idx, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, float64_t factor=1.0)
Definition: Kernel.cpp:846
virtual bool init_optimization(int32_t count, int32_t *IDX, float64_t *weights)
Definition: Kernel.cpp:827
virtual CFeatures * copy_subset(SGVector< index_t > indices)
Definition: Features.cpp:340
virtual CRegressionLabels * apply_locked_regression(SGVector< index_t > indices)
void set_alphas(SGVector< float64_t > alphas)
The class Features is the base class of all feature objects.
Definition: Features.h:68
SGVector< T > clone() const
Definition: SGVector.cpp:209
#define SG_SPROGRESS(...)
Definition: SGIO.h:183
void set_linadd_enabled(bool enable)
The Kernel base class.
Definition: Kernel.h:158
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
void set_kernel(CKernel *k)
#define SG_ADD(...)
Definition: SGObject.h:81
bool is_data_locked() const
Definition: Machine.h:296
virtual CBinaryLabels * apply_binary(CFeatures *data=NULL)
bool create_new_model(int32_t num)
CFeatures * get_lhs()
Definition: Kernel.h:504
virtual void data_lock(CLabels *labs, CFeatures *features=NULL)
void enable_progress()
Definition: SGIO.h:404

SHOGUN Machine Learning Toolbox - Documentation