SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
implementation/SpecialPurpose.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Khaled Nasr
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  */
30 
31 #ifndef SPECIAL_PURPOSE_IMPL_H_
32 #define SPECIAL_PURPOSE_IMPL_H_
33 
34 #include <shogun/lib/config.h>
35 #include <shogun/lib/SGMatrix.h>
37 
38 #ifdef HAVE_EIGEN3
40 #endif // HAVE_EIGEN3
41 
42 #ifdef HAVE_VIENNACL
43 #include <shogun/lib/GPUMatrix.h>
45 #endif // HAVE_VIENNACL
46 
47 namespace shogun
48 {
49 
50 namespace linalg
51 {
52 
53 namespace implementation
54 {
55 
56 namespace special_purpose
57 {
58 
62 template <enum Backend, class Matrix>
63 struct logistic
64 {
66  typedef typename Matrix::Scalar T;
67 
69  static void compute(Matrix A, Matrix result);
70 };
71 
72 #ifdef HAVE_EIGEN3
73 
75 template <> template <class Matrix>
76 struct logistic<Backend::EIGEN3, Matrix>
77 {
79  typedef typename Matrix::Scalar T;
80 
82  static void compute(SGMatrix<T> A, SGMatrix<T> result)
83  {
84  int32_t len = A.num_rows*A.num_cols;
85  for (int32_t i=0; i<len; i++)
86  result[i] = 1.0/(1+CMath::exp(-1*A[i]));
87  }
88 };
89 #endif // HAVE_EIGEN3
90 
91 #ifdef HAVE_VIENNACL
92 
94 template <> template <class Matrix>
95 struct logistic<Backend::VIENNACL, Matrix>
96 {
98  typedef typename Matrix::Scalar T;
99 
101  static void compute(CGPUMatrix<T> A, CGPUMatrix<T> result)
102  {
103  const std::string operation = "return 1.0/(1+exp(-1*element));";
104 
105  std::string kernel_name = "logistic_" + ocl::get_type_string<T>();
106  viennacl::ocl::kernel& kernel =
107  ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
108 
109  kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
110 
111  viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
112  cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
113  result.vcl_matrix(), cl_int(result.offset)));
114  }
115 };
116 
117 #endif // HAVE_VIENNACL
118 
122 template <enum Backend, class Matrix>
124 {
126  typedef typename Matrix::Scalar T;
127 
129  static void compute(Matrix A, Matrix C);
130 };
131 
132 #ifdef HAVE_EIGEN3
133 
135 template <> template <class Matrix>
136 struct multiply_by_logistic_derivative<Backend::EIGEN3, Matrix>
137 {
139  typedef typename Matrix::Scalar T;
140 
142  static void compute(SGMatrix<T> A, SGMatrix<T> C)
143  {
144  int32_t len = A.num_rows*A.num_cols;
145  for (int32_t i=0; i<len; i++)
146  C[i] *= A[i] * (1.0-A[i]);
147  }
148 };
149 #endif // HAVE_EIGEN3
150 
151 #ifdef HAVE_VIENNACL
152 
154 template <> template <class Matrix>
155 struct multiply_by_logistic_derivative<Backend::VIENNACL, Matrix>
156 {
158  typedef typename Matrix::Scalar T;
159 
161  static void compute(CGPUMatrix<T> A, CGPUMatrix<T> C)
162  {
163  const std::string operation = "return element2 * element1*(1.0-element1);";
164 
165  std::string kernel_name = "multiply_by_logistic_derivative_" + ocl::get_type_string<T>();
166  viennacl::ocl::kernel& kernel =
167  ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
168 
169  kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
170 
171  viennacl::ocl::enqueue(kernel(
172  A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
173  C.vcl_matrix(), cl_int(C.offset),
174  C.vcl_matrix(), cl_int(C.offset)));
175  }
176 };
177 
178 #endif // HAVE_VIENNACL
179 
183 template <enum Backend, class Matrix>
185 {
187  typedef typename Matrix::Scalar T;
188 
190  static void compute(Matrix A, Matrix result);
191 };
192 
193 #ifdef HAVE_EIGEN3
194 
196 template <> template <class Matrix>
197 struct rectified_linear<Backend::EIGEN3, Matrix>
198 {
200  typedef typename Matrix::Scalar T;
201 
203  static void compute(SGMatrix<T> A, SGMatrix<T> result)
204  {
205  int32_t len = A.num_rows*A.num_cols;
206  for (int32_t i=0; i<len; i++)
207  result[i] = CMath::max((T)0, A[i]);
208  }
209 };
210 #endif // HAVE_EIGEN3
211 
212 #ifdef HAVE_VIENNACL
213 
215 template <> template <class Matrix>
216 struct rectified_linear<Backend::VIENNACL, Matrix>
217 {
219  typedef typename Matrix::Scalar T;
220 
222  static void compute(CGPUMatrix<T> A, CGPUMatrix<T> result)
223  {
224  const std::string operation = "return max((DATATYPE)0,element);";
225 
226  std::string kernel_name = "rectified_linear_" + ocl::get_type_string<T>();
227  viennacl::ocl::kernel& kernel =
228  ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
229 
230  kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
231 
232  viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
233  cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
234  result.vcl_matrix(), cl_int(result.offset)));
235  }
236 };
237 
238 #endif // HAVE_VIENNACL
239 
243 template <enum Backend, class Matrix>
245 {
247  typedef typename Matrix::Scalar T;
248 
250  static void compute(Matrix A, Matrix C);
251 };
252 
253 #ifdef HAVE_EIGEN3
254 
256 template <> template <class Matrix>
257 struct multiply_by_rectified_linear_derivative<Backend::EIGEN3, Matrix>
258 {
260  typedef typename Matrix::Scalar T;
261 
263  static void compute(SGMatrix<T> A, SGMatrix<T> C)
264  {
265  int32_t len = A.num_rows*A.num_cols;
266  for (int32_t i=0; i<len; i++)
267  if (A[i]==0)
268  C[i] = 0;
269  }
270 };
271 #endif // HAVE_EIGEN3
272 
273 #ifdef HAVE_VIENNACL
274 
276 template <> template <class Matrix>
277 struct multiply_by_rectified_linear_derivative<Backend::VIENNACL, Matrix>
278 {
280  typedef typename Matrix::Scalar T;
281 
283  static void compute(CGPUMatrix<T> A, CGPUMatrix<T> C)
284  {
285  const std::string operation = "return element1==0 ? 0 : element2;";
286 
287  std::string kernel_name = "multiply_by_rectified_linear_derivative_" + ocl::get_type_string<T>();
288  viennacl::ocl::kernel& kernel =
289  ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
290 
291  kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
292 
293  viennacl::ocl::enqueue(kernel(
294  A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
295  C.vcl_matrix(), cl_int(C.offset),
296  C.vcl_matrix(), cl_int(C.offset)));
297  }
298 };
299 
300 #endif // HAVE_VIENNACL
301 
305 template <enum Backend, class Matrix>
306 struct softmax
307 {
309  typedef typename Matrix::Scalar T;
310 
314  static void compute(Matrix A);
315 };
316 
317 #ifdef HAVE_EIGEN3
318 
320 template <> template <class Matrix>
321 struct softmax<Backend::EIGEN3, Matrix>
322 {
324  typedef typename Matrix::Scalar T;
325 
327  typedef Eigen::Matrix<T,Eigen::Dynamic,Eigen::Dynamic> MatrixXt;
328 
332  static void compute(SGMatrix<T> A)
333  {
334  Eigen::Map<MatrixXt> A_eig = A;
335 
336  float64_t max = A_eig.maxCoeff();
337 
338  for (int32_t j=0; j<A.num_cols; j++)
339  {
340  float64_t sum = 0;
341  for (int32_t i=0; i<A.num_rows; i++)
342  sum += CMath::exp(A(i,j)-max);
343 
344  float64_t normalizer = CMath::log(sum);
345  for (int32_t k=0; k<A.num_rows; k++)
346  A(k,j) = CMath::exp(A(k,j)-max-normalizer);
347  }
348  }
349 };
350 #endif // HAVE_EIGEN3
351 
352 #ifdef HAVE_VIENNACL
353 
355 template <> template <class Matrix>
356 struct softmax<Backend::VIENNACL, Matrix>
357 {
359  typedef typename Matrix::Scalar T;
360 
362  template <class T>
363  static viennacl::ocl::kernel& generate_kernel()
364  {
365  std::string kernel_name = "softmax_" + ocl::get_type_string<T>();
366 
367  if (ocl::kernel_exists(kernel_name))
368  return ocl::get_kernel(kernel_name);
369 
370  std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
371 
372  source.append(
373  R"(
374  __kernel void KERNEL_NAME(
375  __global DATATYPE* A, int nrows, int ncols, int offset)
376  {
377  int j = get_global_id(0);
378 
379  if (j>=ncols)
380  return;
381 
382  DATATYPE col_max = -INFINITY;
383  for (int i=0; i<nrows; i++)
384  col_max = max(col_max, A[offset + i+j*nrows]);
385 
386  DATATYPE col_sum = 0;
387  for (int i=0; i<nrows; i++)
388  col_sum += exp(A[offset + i+j*nrows]-col_max);
389 
390  DATATYPE normalizer = log(col_sum);
391  for (int i=0; i<nrows; i++)
392  {
393  int index = offset + i+j*nrows;
394  A[index] = exp(A[index]-col_max-normalizer);
395  }
396  }
397  )"
398  );
399 
400  viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
401 
402  kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
403 
404  return kernel;
405  }
406 
410  static void compute(CGPUMatrix<T> A)
411  {
412  viennacl::ocl::kernel& kernel = generate_kernel<T>();
413  kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_cols));
414 
415  viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
416  cl_int(A.num_rows), cl_int(A.num_cols), cl_int(A.offset)));
417  }
418 };
419 
420 #endif // HAVE_VIENNACL
421 
425 template <enum Backend,class Matrix>
427 {
429  typedef typename Matrix::Scalar T;
430 
434  static T compute(Matrix P, Matrix Q);
435 };
436 
437 #ifdef HAVE_EIGEN3
438 
439 template <> template <class Matrix>
440 struct cross_entropy<Backend::EIGEN3,Matrix>
441 {
443  typedef typename Matrix::Scalar T;
444 
446  typedef Eigen::Matrix<T,Eigen::Dynamic,Eigen::Dynamic> MatrixXt;
447 
452  {
453  Eigen::Map<MatrixXt> P_eig = P;
454  Eigen::Map<MatrixXt> Q_eig = Q;
455 
456  return -1*(P_eig.array() * (Q_eig.array()+1e-30).log()).sum();
457  }
458 };
459 #endif // HAVE_EIGEN3
460 
461 #ifdef HAVE_VIENNACL
462 
463 template <> template <class Matrix>
464 struct cross_entropy<Backend::VIENNACL,Matrix>
465 {
467  typedef typename Matrix::Scalar T;
468 
470  template <class T>
471  static viennacl::ocl::kernel& generate_kernel()
472  {
473  std::string kernel_name = "cross_entropy_" + ocl::get_type_string<T>();
474 
475  if (ocl::kernel_exists(kernel_name))
476  return ocl::get_kernel(kernel_name);
477 
478  std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
479 
480  source.append(
481  R"(
482  __kernel void KERNEL_NAME(
483  __global DATATYPE* p, int size, int p_offset,
484  __global DATATYPE* q, int q_offset,
485  __global DATATYPE* result)
486  {
487  __local DATATYPE buffer[WORK_GROUP_SIZE_1D];
488 
489  int local_id = get_local_id(0);
490 
491  DATATYPE thread_sum = 0;
492  for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D)
493  thread_sum += p[i+p_offset]*log(q[i+q_offset]+1e-30);
494 
495  buffer[local_id] = thread_sum;
496 
497  for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1)
498  {
499  barrier(CLK_LOCAL_MEM_FENCE);
500  if (local_id < j)
501  buffer[local_id] += buffer[local_id + j];
502  }
503 
504  barrier(CLK_LOCAL_MEM_FENCE);
505 
506  if (get_global_id(0)==0)
507  *result = -1*buffer[0];
508  }
509  )"
510  );
511 
512  viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
513 
514  kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
515  kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
516 
517  return kernel;
518  }
519 
523  static T compute(CGPUMatrix<T> P, CGPUMatrix<T> Q)
524  {
525  viennacl::ocl::kernel& kernel = generate_kernel<T>();
526 
527  CGPUVector<T> result(1);
528 
529  viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
530  cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
531  Q.vcl_matrix(), cl_int(Q.offset),
532  result.vcl_vector()));
533 
534  return result[0];
535  }
536 };
537 #endif // HAVE_VIENNACL
538 
542 template <enum Backend,class Matrix>
544 {
546  typedef typename Matrix::Scalar T;
547 
551  static T compute(Matrix P, Matrix Q);
552 };
553 
554 #ifdef HAVE_EIGEN3
555 
556 template <> template <class Matrix>
557 struct squared_error<Backend::EIGEN3,Matrix>
558 {
560  typedef typename Matrix::Scalar T;
561 
563  typedef Eigen::Matrix<T,Eigen::Dynamic,Eigen::Dynamic> MatrixXt;
564 
569  {
570  Eigen::Map<MatrixXt> P_eig = P;
571  Eigen::Map<MatrixXt> Q_eig = Q;
572 
573  return 0.5 * (P_eig - Q_eig).array().square().sum();
574  }
575 };
576 #endif // HAVE_EIGEN3
577 
578 #ifdef HAVE_VIENNACL
579 
580 template <> template <class Matrix>
581 struct squared_error<Backend::VIENNACL,Matrix>
582 {
584  typedef typename Matrix::Scalar T;
585 
587  template <class T>
588  static viennacl::ocl::kernel& generate_kernel()
589  {
590  std::string kernel_name = "squared_error_" + ocl::get_type_string<T>();
591 
592  if (ocl::kernel_exists(kernel_name))
593  return ocl::get_kernel(kernel_name);
594 
595  std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
596 
597  source.append(
598  R"(
599  __kernel void KERNEL_NAME(
600  __global DATATYPE* p, int size, int p_offset,
601  __global DATATYPE* q, int q_offset,
602  __global DATATYPE* result)
603  {
604  __local DATATYPE buffer[WORK_GROUP_SIZE_1D];
605 
606  int local_id = get_local_id(0);
607 
608  DATATYPE thread_sum = 0;
609  for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D)
610  thread_sum += pown(p[i+p_offset]-q[i+q_offset], 2);
611 
612  buffer[local_id] = thread_sum;
613 
614  for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1)
615  {
616  barrier(CLK_LOCAL_MEM_FENCE);
617  if (local_id < j)
618  buffer[local_id] += buffer[local_id + j];
619  }
620 
621  barrier(CLK_LOCAL_MEM_FENCE);
622 
623  if (get_global_id(0)==0)
624  *result = 0.5*buffer[0];
625  }
626  )"
627  );
628 
629  viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
630 
631  kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
632  kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
633 
634  return kernel;
635  }
636 
640  static T compute(CGPUMatrix<T> P, CGPUMatrix<T> Q)
641  {
642  viennacl::ocl::kernel& kernel = generate_kernel<T>();
643 
644  CGPUVector<T> result(1);
645 
646  viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
647  cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
648  Q.vcl_matrix(), cl_int(Q.offset),
649  result.vcl_vector()));
650 
651  return result[0];
652  }
653 };
654 #endif // HAVE_VIENNACL
655 
656 }
657 
658 }
659 
660 }
661 
662 }
663 #endif // SPECIAL_PURPOSE_IMPL_H_

SHOGUN Machine Learning Toolbox - Documentation