SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
implementation/SpecialPurpose.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Khaled Nasr
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  */
30 
31 #ifndef SPECIAL_PURPOSE_IMPL_H_
32 #define SPECIAL_PURPOSE_IMPL_H_
33 
34 #include <shogun/lib/config.h>
35 #include <shogun/lib/SGMatrix.h>
37 
39 
40 #ifdef HAVE_VIENNACL
41 #include <shogun/lib/GPUMatrix.h>
43 #endif // HAVE_VIENNACL
44 
45 namespace shogun
46 {
47 
48 namespace linalg
49 {
50 
51 namespace implementation
52 {
53 
54 namespace special_purpose
55 {
56 
60 template <enum Backend, class Matrix>
61 struct logistic
62 {
64  typedef typename Matrix::Scalar T;
65 
67  static void compute(Matrix A, Matrix result);
68 };
69 
70 
72 template <class Matrix>
73 struct logistic<Backend::EIGEN3, Matrix>
74 {
76  typedef typename Matrix::Scalar T;
77 
79  static void compute(SGMatrix<T> A, SGMatrix<T> result)
80  {
81  int32_t len = A.num_rows*A.num_cols;
82  for (int32_t i=0; i<len; i++)
83  result[i] = 1.0/(1+CMath::exp(-1*A[i]));
84  }
85 };
86 
87 #ifdef HAVE_VIENNACL
88 
90 template <class Matrix>
91 struct logistic<Backend::VIENNACL, Matrix>
92 {
94  typedef typename Matrix::Scalar T;
95 
97  static void compute(CGPUMatrix<T> A, CGPUMatrix<T> result)
98  {
99  const std::string operation = "return 1.0/(1+exp(-1*element));";
100 
101  std::string kernel_name = "logistic_" + ocl::get_type_string<T>();
102  viennacl::ocl::kernel& kernel =
103  ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
104 
105  kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
106 
107  viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
108  cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
109  result.vcl_matrix(), cl_int(result.offset)));
110  }
111 };
112 
113 #endif // HAVE_VIENNACL
114 
118 template <enum Backend, class Matrix>
120 {
122  typedef typename Matrix::Scalar T;
123 
125  static void compute(Matrix A, Matrix C);
126 };
127 
128 
130 template <class Matrix>
131 struct multiply_by_logistic_derivative<Backend::EIGEN3, Matrix>
132 {
134  typedef typename Matrix::Scalar T;
135 
137  static void compute(SGMatrix<T> A, SGMatrix<T> C)
138  {
139  int32_t len = A.num_rows*A.num_cols;
140  for (int32_t i=0; i<len; i++)
141  C[i] *= A[i] * (1.0-A[i]);
142  }
143 };
144 
145 #ifdef HAVE_VIENNACL
146 
148 template <class Matrix>
149 struct multiply_by_logistic_derivative<Backend::VIENNACL, Matrix>
150 {
152  typedef typename Matrix::Scalar T;
153 
155  static void compute(CGPUMatrix<T> A, CGPUMatrix<T> C)
156  {
157  const std::string operation = "return element2 * element1*(1.0-element1);";
158 
159  std::string kernel_name = "multiply_by_logistic_derivative_" + ocl::get_type_string<T>();
160  viennacl::ocl::kernel& kernel =
161  ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
162 
163  kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
164 
165  viennacl::ocl::enqueue(kernel(
166  A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
167  C.vcl_matrix(), cl_int(C.offset),
168  C.vcl_matrix(), cl_int(C.offset)));
169  }
170 };
171 
172 #endif // HAVE_VIENNACL
173 
177 template <enum Backend, class Matrix>
179 {
181  typedef typename Matrix::Scalar T;
182 
184  static void compute(Matrix A, Matrix result);
185 };
186 
187 
189 template <class Matrix>
190 struct rectified_linear<Backend::EIGEN3, Matrix>
191 {
193  typedef typename Matrix::Scalar T;
194 
196  static void compute(SGMatrix<T> A, SGMatrix<T> result)
197  {
198  int32_t len = A.num_rows*A.num_cols;
199  for (int32_t i=0; i<len; i++)
200  result[i] = CMath::max((T)0, A[i]);
201  }
202 };
203 
204 #ifdef HAVE_VIENNACL
205 
207 template <class Matrix>
208 struct rectified_linear<Backend::VIENNACL, Matrix>
209 {
211  typedef typename Matrix::Scalar T;
212 
214  static void compute(CGPUMatrix<T> A, CGPUMatrix<T> result)
215  {
216  const std::string operation = "return max((DATATYPE)0,element);";
217 
218  std::string kernel_name = "rectified_linear_" + ocl::get_type_string<T>();
219  viennacl::ocl::kernel& kernel =
220  ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
221 
222  kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
223 
224  viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
225  cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
226  result.vcl_matrix(), cl_int(result.offset)));
227  }
228 };
229 
230 #endif // HAVE_VIENNACL
231 
235 template <enum Backend, class Matrix>
237 {
239  typedef typename Matrix::Scalar T;
240 
242  static void compute(Matrix A, Matrix C);
243 };
244 
245 
247 template <class Matrix>
248 struct multiply_by_rectified_linear_derivative<Backend::EIGEN3, Matrix>
249 {
251  typedef typename Matrix::Scalar T;
252 
254  static void compute(SGMatrix<T> A, SGMatrix<T> C)
255  {
256  int32_t len = A.num_rows*A.num_cols;
257  for (int32_t i=0; i<len; i++)
258  if (A[i]==0)
259  C[i] = 0;
260  }
261 };
262 
263 #ifdef HAVE_VIENNACL
264 
266 template <class Matrix>
267 struct multiply_by_rectified_linear_derivative<Backend::VIENNACL, Matrix>
268 {
270  typedef typename Matrix::Scalar T;
271 
273  static void compute(CGPUMatrix<T> A, CGPUMatrix<T> C)
274  {
275  const std::string operation = "return element1==0 ? 0 : element2;";
276 
277  std::string kernel_name = "multiply_by_rectified_linear_derivative_" + ocl::get_type_string<T>();
278  viennacl::ocl::kernel& kernel =
279  ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
280 
281  kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
282 
283  viennacl::ocl::enqueue(kernel(
284  A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
285  C.vcl_matrix(), cl_int(C.offset),
286  C.vcl_matrix(), cl_int(C.offset)));
287  }
288 };
289 
290 #endif // HAVE_VIENNACL
291 
295 template <enum Backend, class Matrix>
296 struct softmax
297 {
299  typedef typename Matrix::Scalar T;
300 
304  static void compute(Matrix A);
305 };
306 
307 
309 template <class Matrix>
310 struct softmax<Backend::EIGEN3, Matrix>
311 {
313  typedef typename Matrix::Scalar T;
314 
317 
321  static void compute(SGMatrix<T> A)
322  {
323  Eigen::Map<MatrixXt> A_eig = A;
324 
325  float64_t max = A_eig.maxCoeff();
326 
327  for (int32_t j=0; j<A.num_cols; j++)
328  {
329  float64_t sum = 0;
330  for (int32_t i=0; i<A.num_rows; i++)
331  sum += CMath::exp(A(i,j)-max);
332 
333  float64_t normalizer = CMath::log(sum);
334  for (int32_t k=0; k<A.num_rows; k++)
335  A(k,j) = CMath::exp(A(k,j)-max-normalizer);
336  }
337  }
338 };
339 
340 #ifdef HAVE_VIENNACL
341 
343 template <class Matrix>
344 struct softmax<Backend::VIENNACL, Matrix>
345 {
347  typedef typename Matrix::Scalar T;
348 
350  template <class T>
351  static viennacl::ocl::kernel& generate_kernel()
352  {
353  std::string kernel_name = "softmax_" + ocl::get_type_string<T>();
354 
355  if (ocl::kernel_exists(kernel_name))
356  return ocl::get_kernel(kernel_name);
357 
358  std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
359 
360  source.append(
361  R"(
362  __kernel void KERNEL_NAME(
363  __global DATATYPE* A, int nrows, int ncols, int offset)
364  {
365  int j = get_global_id(0);
366 
367  if (j>=ncols)
368  return;
369 
370  DATATYPE col_max = -INFINITY;
371  for (int i=0; i<nrows; i++)
372  col_max = max(col_max, A[offset + i+j*nrows]);
373 
374  DATATYPE col_sum = 0;
375  for (int i=0; i<nrows; i++)
376  col_sum += exp(A[offset + i+j*nrows]-col_max);
377 
378  DATATYPE normalizer = log(col_sum);
379  for (int i=0; i<nrows; i++)
380  {
381  int index = offset + i+j*nrows;
382  A[index] = exp(A[index]-col_max-normalizer);
383  }
384  }
385  )"
386  );
387 
388  viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
389 
390  kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
391 
392  return kernel;
393  }
394 
398  static void compute(CGPUMatrix<T> A)
399  {
400  viennacl::ocl::kernel& kernel = generate_kernel<T>();
401  kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_cols));
402 
403  viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
404  cl_int(A.num_rows), cl_int(A.num_cols), cl_int(A.offset)));
405  }
406 };
407 
408 #endif // HAVE_VIENNACL
409 
413 template <enum Backend,class Matrix>
415 {
417  typedef typename Matrix::Scalar T;
418 
422  static T compute(Matrix P, Matrix Q);
423 };
424 
426 template <class Matrix>
427 struct cross_entropy<Backend::EIGEN3,Matrix>
428 {
430  typedef typename Matrix::Scalar T;
431 
434 
439  {
440  Eigen::Map<MatrixXt> P_eig = P;
441  Eigen::Map<MatrixXt> Q_eig = Q;
442 
443  return -1*(P_eig.array() * (Q_eig.array()+1e-30).log()).sum();
444  }
445 };
446 
447 #ifdef HAVE_VIENNACL
448 
449 template <class Matrix>
450 struct cross_entropy<Backend::VIENNACL,Matrix>
451 {
453  typedef typename Matrix::Scalar T;
454 
456  template <class T>
457  static viennacl::ocl::kernel& generate_kernel()
458  {
459  std::string kernel_name = "cross_entropy_" + ocl::get_type_string<T>();
460 
461  if (ocl::kernel_exists(kernel_name))
462  return ocl::get_kernel(kernel_name);
463 
464  std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
465 
466  source.append(
467  R"(
468  __kernel void KERNEL_NAME(
469  __global DATATYPE* p, int size, int p_offset,
470  __global DATATYPE* q, int q_offset,
471  __global DATATYPE* result)
472  {
473  __local DATATYPE buffer[WORK_GROUP_SIZE_1D];
474 
475  int local_id = get_local_id(0);
476 
477  DATATYPE thread_sum = 0;
478  for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D)
479  thread_sum += p[i+p_offset]*log(q[i+q_offset]+1e-30);
480 
481  buffer[local_id] = thread_sum;
482 
483  for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1)
484  {
485  barrier(CLK_LOCAL_MEM_FENCE);
486  if (local_id < j)
487  buffer[local_id] += buffer[local_id + j];
488  }
489 
490  barrier(CLK_LOCAL_MEM_FENCE);
491 
492  if (get_global_id(0)==0)
493  *result = -1*buffer[0];
494  }
495  )"
496  );
497 
498  viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
499 
500  kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
501  kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
502 
503  return kernel;
504  }
505 
509  static T compute(CGPUMatrix<T> P, CGPUMatrix<T> Q)
510  {
511  viennacl::ocl::kernel& kernel = generate_kernel<T>();
512 
513  CGPUVector<T> result(1);
514 
515  viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
516  cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
517  Q.vcl_matrix(), cl_int(Q.offset),
518  result.vcl_vector()));
519 
520  return result[0];
521  }
522 };
523 #endif // HAVE_VIENNACL
524 
528 template <enum Backend,class Matrix>
530 {
532  typedef typename Matrix::Scalar T;
533 
537  static T compute(Matrix P, Matrix Q);
538 };
539 
541 template <class Matrix>
542 struct squared_error<Backend::EIGEN3,Matrix>
543 {
545  typedef typename Matrix::Scalar T;
546 
549 
554  {
555  Eigen::Map<MatrixXt> P_eig = P;
556  Eigen::Map<MatrixXt> Q_eig = Q;
557 
558  return 0.5 * (P_eig - Q_eig).array().square().sum();
559  }
560 };
561 
562 #ifdef HAVE_VIENNACL
563 
564 template <class Matrix>
565 struct squared_error<Backend::VIENNACL,Matrix>
566 {
568  typedef typename Matrix::Scalar T;
569 
571  template <class T>
572  static viennacl::ocl::kernel& generate_kernel()
573  {
574  std::string kernel_name = "squared_error_" + ocl::get_type_string<T>();
575 
576  if (ocl::kernel_exists(kernel_name))
577  return ocl::get_kernel(kernel_name);
578 
579  std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
580 
581  source.append(
582  R"(
583  __kernel void KERNEL_NAME(
584  __global DATATYPE* p, int size, int p_offset,
585  __global DATATYPE* q, int q_offset,
586  __global DATATYPE* result)
587  {
588  __local DATATYPE buffer[WORK_GROUP_SIZE_1D];
589 
590  int local_id = get_local_id(0);
591 
592  DATATYPE thread_sum = 0;
593  for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D)
594  thread_sum += pown(p[i+p_offset]-q[i+q_offset], 2);
595 
596  buffer[local_id] = thread_sum;
597 
598  for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1)
599  {
600  barrier(CLK_LOCAL_MEM_FENCE);
601  if (local_id < j)
602  buffer[local_id] += buffer[local_id + j];
603  }
604 
605  barrier(CLK_LOCAL_MEM_FENCE);
606 
607  if (get_global_id(0)==0)
608  *result = 0.5*buffer[0];
609  }
610  )"
611  );
612 
613  viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
614 
615  kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
616  kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
617 
618  return kernel;
619  }
620 
624  static T compute(CGPUMatrix<T> P, CGPUMatrix<T> Q)
625  {
626  viennacl::ocl::kernel& kernel = generate_kernel<T>();
627 
628  CGPUVector<T> result(1);
629 
630  viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
631  cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
632  Q.vcl_matrix(), cl_int(Q.offset),
633  result.vcl_vector()));
634 
635  return result[0];
636  }
637 };
638 #endif // HAVE_VIENNACL
639 
640 }
641 
642 }
643 
644 }
645 
646 }
647 #endif // SPECIAL_PURPOSE_IMPL_H_
index_t num_cols
Definition: SGMatrix.h:376
index_t num_rows
Definition: SGMatrix.h:374
Generic class sum which provides a static compute method. This class is specialized for different typ...
Definition: Sum.h:69
shogun matrix
Generic class which is specialized for different backends to perform the max operation.
Definition: Max.h:63
double float64_t
Definition: common.h:50
static T max(T a, T b)
Definition: Math.h:168
static void compute(Matrix A, Matrix result)
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
static float64_t exp(float64_t x)
Definition: Math.h:621
static float64_t log(float64_t v)
Definition: Math.h:922

SHOGUN Machine Learning Toolbox - Documentation