SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
KernelMachine.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 2011-2012 Heiko Strathmann
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
13 #include <shogun/lib/Signal.h>
15 #include <shogun/base/Parameter.h>
17 
18 using namespace shogun;
19 
20 #ifndef DOXYGEN_SHOULD_SKIP_THIS
21 struct S_THREAD_PARAM
22 {
23  CKernelMachine* kernel_machine;
24  float64_t* result;
25  int32_t start;
26  int32_t end;
27 
28  /* if non-null, start and end correspond to indices in this vector */
29  index_t* indices;
30  index_t indices_len;
31  bool verbose;
32 };
33 #endif // DOXYGEN_SHOULD_SKIP_THIS
34 
36 {
37  init();
38 }
39 
42 {
43  init();
44 
45  int32_t num_sv=svs.vlen;
46  ASSERT(num_sv == alphas.vlen);
47  create_new_model(num_sv);
48  set_alphas(alphas);
51  set_bias(b);
52 }
53 
55 {
56  init();
57 
58  SGVector<float64_t> alphas = machine->get_alphas().clone();
59  SGVector<int32_t> svs = machine->get_support_vectors().clone();
60  float64_t bias = machine->get_bias();
61  CKernel* ker = machine->get_kernel();
62 
63  int32_t num_sv = svs.vlen;
64  create_new_model(num_sv);
65  set_alphas(alphas);
67  set_bias(bias);
68  set_kernel(ker);
69 }
70 
72 {
76 }
77 
79 {
80  SG_REF(k);
82  kernel=k;
83 }
84 
86 {
87  SG_REF(kernel);
88  return kernel;
89 }
90 
92 {
93  use_batch_computation=enable;
94 }
95 
97 {
98  return use_batch_computation;
99 }
100 
102 {
103  use_linadd=enable;
104 }
105 
107 {
108  return use_linadd;
109 }
110 
111 void CKernelMachine::set_bias_enabled(bool enable_bias)
112 {
113  use_bias=enable_bias;
114 }
115 
117 {
118  return use_bias;
119 }
120 
122 {
123  return m_bias;
124 }
125 
127 {
128  m_bias=bias;
129 }
130 
132 {
133  ASSERT(m_svs.vector && idx<m_svs.vlen);
134  return m_svs.vector[idx];
135 }
136 
138 {
139  if (!m_alpha.vector)
140  SG_ERROR("No alphas set\n");
141  if (idx>=m_alpha.vlen)
142  SG_ERROR("Alphas index (%d) out of range (%d)\n", idx, m_svs.vlen);
143  return m_alpha.vector[idx];
144 }
145 
146 bool CKernelMachine::set_support_vector(int32_t idx, int32_t val)
147 {
148  if (m_svs.vector && idx<m_svs.vlen)
149  m_svs.vector[idx]=val;
150  else
151  return false;
152 
153  return true;
154 }
155 
156 bool CKernelMachine::set_alpha(int32_t idx, float64_t val)
157 {
158  if (m_alpha.vector && idx<m_alpha.vlen)
159  m_alpha.vector[idx]=val;
160  else
161  return false;
162 
163  return true;
164 }
165 
167 {
168  return m_svs.vlen;
169 }
170 
172 {
173  m_alpha = alphas;
174 }
175 
177 {
178  m_svs = svs;
179 }
180 
182 {
183  return m_svs;
184 }
185 
187 {
188  return m_alpha;
189 }
190 
192 {
195 
196  m_bias=0;
197 
198  if (num>0)
199  {
201  m_svs= SGVector<int32_t>(num);
202  return (m_alpha.vector!=NULL && m_svs.vector!=NULL);
203  }
204  else
205  return true;
206 }
207 
209 {
210  int32_t num_sv=get_num_support_vectors();
211 
212  if (kernel && kernel->has_property(KP_LINADD) && num_sv>0)
213  {
214  int32_t * sv_idx = SG_MALLOC(int32_t, num_sv);
215  float64_t* sv_weight = SG_MALLOC(float64_t, num_sv);
216 
217  for(int32_t i=0; i<num_sv; i++)
218  {
219  sv_idx[i] = get_support_vector(i) ;
220  sv_weight[i] = get_alpha(i) ;
221  }
222 
223  bool ret = kernel->init_optimization(num_sv, sv_idx, sv_weight) ;
224 
225  SG_FREE(sv_idx);
226  SG_FREE(sv_weight);
227 
228  if (!ret)
229  SG_ERROR( "initialization of kernel optimization failed\n");
230 
231  return ret;
232  }
233  else
234  SG_ERROR( "initialization of kernel optimization failed\n");
235 
236  return false;
237 }
238 
240 {
241  SGVector<float64_t> outputs = apply_get_outputs(data);
242  return new CRegressionLabels(outputs);
243 }
244 
246 {
247  SGVector<float64_t> outputs = apply_get_outputs(data);
248  return new CBinaryLabels(outputs);
249 }
250 
252 {
253  SG_DEBUG("entering %s::apply_get_outputs(%s at %p)\n",
254  get_name(), data ? data->get_name() : "NULL", data);
255 
256  REQUIRE(kernel, "%s::apply_get_outputs(): No kernel assigned!\n");
257 
258  if (!kernel->get_num_vec_lhs())
259  {
260  SG_ERROR("%s: No vectors on left hand side (%s). This is probably due to"
261  " an implementation error in %s, where it was forgotten to set "
262  "the data (m_svs) indices\n", get_name(),
263  data->get_name());
264  }
265 
266  if (data)
267  {
268  CFeatures* lhs=kernel->get_lhs();
269  REQUIRE(lhs, "%s::apply_get_outputs(): No left hand side specified\n",
270  get_name());
271  kernel->init(lhs, data);
272  SG_UNREF(lhs);
273  }
274 
275  /* using the features to get num vectors is far safer than using the kernel
276  * since SHOGUNs kernel num_rhs/num_lhs is buggy (CombinedKernel for ex.)
277  * Might be worth investigating why
278  * kernel->get_num_rhs() != rhs->get_num_vectors()
279  * However, the below version works
280  * TODO Heiko Strathmann
281  */
282  CFeatures* rhs=kernel->get_rhs();
283  int32_t num_vectors=rhs ? rhs->get_num_vectors() : kernel->get_num_vec_rhs();
284  SG_UNREF(rhs)
285 
286  SGVector<float64_t> output(num_vectors);
287 
288  if (kernel->get_num_vec_rhs()>0)
289  {
290  SG_DEBUG( "computing output on %d test examples\n", num_vectors);
291 
293 
294  if (io->get_show_progress())
295  io->enable_progress();
296  else
297  io->disable_progress();
298 
301  {
302  output.zero();
303  SG_DEBUG("Batch evaluation enabled\n");
304  if (get_num_support_vectors()>0)
305  {
306  int32_t* sv_idx=SG_MALLOC(int32_t, get_num_support_vectors());
308  int32_t* idx=SG_MALLOC(int32_t, num_vectors);
309 
310  //compute output for all vectors v[0]...v[num_vectors-1]
311  for (int32_t i=0; i<num_vectors; i++)
312  idx[i]=i;
313 
314  for (int32_t i=0; i<get_num_support_vectors(); i++)
315  {
316  sv_idx[i] = get_support_vector(i) ;
317  sv_weight[i] = get_alpha(i) ;
318  }
319 
320  kernel->compute_batch(num_vectors, idx,
321  output.vector, get_num_support_vectors(), sv_idx, sv_weight);
322  SG_FREE(sv_idx);
323  SG_FREE(sv_weight);
324  SG_FREE(idx);
325  }
326 
327  for (int32_t i=0; i<num_vectors; i++)
328  output[i] = get_bias() + output[i];
329 
330  }
331  else
332  {
333  int32_t num_threads=parallel->get_num_threads();
334  ASSERT(num_threads>0);
335 
336  if (num_threads < 2)
337  {
338  S_THREAD_PARAM params;
339  params.kernel_machine=this;
340  params.result = output.vector;
341  params.start=0;
342  params.end=num_vectors;
343  params.verbose=true;
344  params.indices = NULL;
345  params.indices_len = 0;
346  apply_helper((void*) &params);
347  }
348 #ifdef HAVE_PTHREAD
349  else
350  {
351  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
352  S_THREAD_PARAM* params = SG_MALLOC(S_THREAD_PARAM, num_threads);
353  int32_t step= num_vectors/num_threads;
354 
355  int32_t t;
356 
357  for (t=0; t<num_threads-1; t++)
358  {
359  params[t].kernel_machine = this;
360  params[t].result = output.vector;
361  params[t].start = t*step;
362  params[t].end = (t+1)*step;
363  params[t].verbose = false;
364  params[t].indices = NULL;
365  params[t].indices_len = 0;
366  pthread_create(&threads[t], NULL,
367  CKernelMachine::apply_helper, (void*)&params[t]);
368  }
369 
370  params[t].kernel_machine = this;
371  params[t].result = output.vector;
372  params[t].start = t*step;
373  params[t].end = num_vectors;
374  params[t].verbose = true;
375  params[t].indices = NULL;
376  params[t].indices_len = 0;
377  apply_helper((void*) &params[t]);
378 
379  for (t=0; t<num_threads-1; t++)
380  pthread_join(threads[t], NULL);
381 
382  SG_FREE(params);
383  SG_FREE(threads);
384  }
385 #endif
386  }
387 
388 #ifndef WIN32
390  SG_INFO( "prematurely stopped. \n");
391  else
392 #endif
393  SG_DONE();
394  }
395 
396  SG_DEBUG("leaving %s::apply_get_outputs(%s at %p)\n",
397  get_name(), data ? data->get_name() : "NULL", data);
398 
399  return output;
400 }
401 
403 {
404  ASSERT(kernel);
405 
407  {
408  float64_t score = kernel->compute_optimized(num);
409  return score+get_bias();
410  }
411  else
412  {
413  float64_t score=0;
414  for(int32_t i=0; i<get_num_support_vectors(); i++)
415  score+=kernel->kernel(get_support_vector(i), num)*get_alpha(i);
416 
417  return score+get_bias();
418  }
419 }
420 
422 {
423  S_THREAD_PARAM* params = (S_THREAD_PARAM*) p;
424  float64_t* result = params->result;
425  CKernelMachine* kernel_machine = params->kernel_machine;
426 
427 #ifdef WIN32
428  for (int32_t vec=params->start; vec<params->end; vec++)
429 #else
430  for (int32_t vec=params->start; vec<params->end &&
432 #endif
433  {
434  if (params->verbose)
435  {
436  int32_t num_vectors=params->end - params->start;
437  int32_t v=vec-params->start;
438  if ( (v% (num_vectors/100+1))== 0)
439  SG_SPROGRESS(v, 0.0, num_vectors-1);
440  }
441 
442  /* eventually use index mapping if exists */
443  index_t idx=params->indices ? params->indices[vec] : vec;
444  result[vec] = kernel_machine->apply_one(idx);
445  }
446 
447  return NULL;
448 }
449 
451 {
452  if (!kernel)
453  SG_ERROR("kernel is needed to store SV features.\n");
454 
455  CFeatures* lhs=kernel->get_lhs();
456  CFeatures* rhs=kernel->get_rhs();
457 
458  if (!lhs)
459  SG_ERROR("kernel lhs is needed to store SV features.\n");
460 
461  /* copy sv feature data */
462  CFeatures* sv_features=lhs->copy_subset(m_svs);
463  SG_UNREF(lhs);
464 
465  /* set new lhs to kernel */
466  kernel->init(sv_features, rhs);
467 
468  /* unref rhs */
469  SG_UNREF(rhs);
470 
471  /* was SG_REF'ed by copy_subset */
472  SG_UNREF(sv_features);
473 
474  /* now sv indices are just the identity */
475  m_svs.range_fill();
476 
477 }
478 
480 {
481  SG_DEBUG("entering %s::train_locked()\n", get_name());
482  if (!is_data_locked())
483  SG_ERROR("CKernelMachine::train_locked() call data_lock() before!\n");
484 
485  /* this is asusmed here */
487 
488  /* since its not easily possible to controll the row subsets of the custom
489  * kernel from outside, we enforce that there is only one row subset by
490  * removing all of them. Otherwise, they would add up in the stack until
491  * an error occurs */
493 
494  /* set custom kernel subset of data to train on */
497 
498  /* set corresponding labels subset */
499  m_labels->add_subset(indices);
500 
501  /* dont do train because model should not be stored (no acutal features)
502  * and train does data_unlock */
503  bool result=train_machine();
504 
505  /* remove last col subset of custom kernel */
507 
508  /* remove label subset after training */
510 
511  SG_DEBUG("leaving %s::train_locked()\n", get_name());
512  return result;
513 }
514 
516 {
517  SGVector<float64_t> outputs = apply_locked_get_output(indices);
518  return new CBinaryLabels(outputs);
519 }
520 
522  SGVector<index_t> indices)
523 {
524  SGVector<float64_t> outputs = apply_locked_get_output(indices);
525  return new CRegressionLabels(outputs);
526 }
527 
529  SGVector<index_t> indices)
530 {
531  if (!is_data_locked())
532  SG_ERROR("CKernelMachine::apply_locked() call data_lock() before!\n");
533 
534  /* we are working on a custom kernel here */
536 
537  int32_t num_inds=indices.vlen;
538  SGVector<float64_t> output(num_inds);
539 
541 
542  if (io->get_show_progress())
543  io->enable_progress();
544  else
545  io->disable_progress();
546 
547  /* custom kernel never has batch evaluation property so dont do this here */
548  int32_t num_threads=parallel->get_num_threads();
549  ASSERT(num_threads>0);
550 
551  if (num_threads<2)
552  {
553  S_THREAD_PARAM params;
554  params.kernel_machine=this;
555  params.result=output.vector;
556 
557  /* use the parameter index vector */
558  params.start=0;
559  params.end=num_inds;
560  params.indices=indices.vector;
561  params.indices_len=indices.vlen;
562 
563  params.verbose=true;
564  apply_helper((void*) &params);
565  }
566 #ifdef HAVE_PTHREAD
567  else
568  {
569  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
570  S_THREAD_PARAM* params=SG_MALLOC(S_THREAD_PARAM, num_threads);
571  int32_t step= num_inds/num_threads;
572 
573  int32_t t;
574  for (t=0; t<num_threads-1; t++)
575  {
576  params[t].kernel_machine=this;
577  params[t].result=output.vector;
578 
579  /* use the parameter index vector */
580  params[t].start=t*step;
581  params[t].end=(t+1)*step;
582  params[t].indices=indices.vector;
583  params[t].indices_len=indices.vlen;
584 
585  params[t].verbose=false;
586  pthread_create(&threads[t], NULL, CKernelMachine::apply_helper,
587  (void*)&params[t]);
588  }
589 
590  params[t].kernel_machine=this;
591  params[t].result=output.vector;
592 
593  /* use the parameter index vector */
594  params[t].start=t*step;
595  params[t].end=num_inds;
596  params[t].indices=indices.vector;
597  params[t].indices_len=indices.vlen;
598 
599  params[t].verbose=true;
600  apply_helper((void*) &params[t]);
601 
602  for (t=0; t<num_threads-1; t++)
603  pthread_join(threads[t], NULL);
604 
605  SG_FREE(params);
606  SG_FREE(threads);
607  }
608 #endif
609 
610 #ifndef WIN32
612  SG_INFO("prematurely stopped.\n");
613  else
614 #endif
615  SG_DONE();
616 
617  return output;
618 }
619 
621 {
622  if ( !kernel )
623  SG_ERROR("The kernel is not initialized\n");
624 
625  /* init kernel with data */
626  kernel->init(features, features);
627 
628  /* backup reference to old kernel */
632 
633  /* unref possible old custom kernel */
635 
636  /* create custom kernel matrix from current kernel */
639 
640  /* replace kernel by custom kernel */
641  SG_UNREF(kernel);
643  SG_REF(kernel);
644 
645  /* dont forget to call superclass method */
646  CMachine::data_lock(labs, features);
647 }
648 
650 {
652  m_custom_kernel=NULL;
653 
654  /* restore original kernel, possibly delete created one */
655  if (m_kernel_backup)
656  {
657  /* check if kernel was created in train_locked */
658  if (kernel!=m_kernel_backup)
659  SG_UNREF(kernel);
660 
662  m_kernel_backup=NULL;
663  }
664 
665  /* dont forget to call superclass method */
667 }
668 
669 void CKernelMachine::init()
670 {
671  m_bias=0.0;
672  kernel=NULL;
673  m_custom_kernel=NULL;
674  m_kernel_backup=NULL;
676  use_linadd=true;
677  use_bias=true;
678 
679  SG_ADD((CSGObject**) &kernel, "kernel", "", MS_AVAILABLE);
680  SG_ADD((CSGObject**) &m_custom_kernel, "custom_kernel", "Custom kernel for"
681  " data lock", MS_NOT_AVAILABLE);
682  SG_ADD((CSGObject**) &m_kernel_backup, "kernel_backup",
683  "Kernel backup for data lock", MS_NOT_AVAILABLE);
684  SG_ADD(&use_batch_computation, "use_batch_computation",
685  "Batch computation is enabled.", MS_NOT_AVAILABLE);
686  SG_ADD(&use_linadd, "use_linadd", "Linadd is enabled.", MS_NOT_AVAILABLE);
687  SG_ADD(&use_bias, "use_bias", "Bias shall be used.", MS_NOT_AVAILABLE);
688  SG_ADD(&m_bias, "m_bias", "Bias term.", MS_NOT_AVAILABLE);
689  SG_ADD(&m_alpha, "m_alpha", "Array of coefficients alpha.",
691  SG_ADD(&m_svs, "m_svs", "Number of ``support vectors''.", MS_NOT_AVAILABLE);
692 
694  new SGParamInfo("custom_kernel", CT_SCALAR, ST_NONE, PT_SGOBJECT, 1),
695  new SGParamInfo()
696  );
698  new SGParamInfo("kernel_backup", CT_SCALAR, ST_NONE, PT_SGOBJECT, 1),
699  new SGParamInfo()
700  );
702 }

SHOGUN Machine Learning Toolbox - Documentation