SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
implementation/SpecialPurpose.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Khaled Nasr
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  */
30 
31 #ifndef SPECIAL_PURPOSE_IMPL_H_
32 #define SPECIAL_PURPOSE_IMPL_H_
33 
34 #include <shogun/lib/config.h>
35 #include <shogun/lib/SGMatrix.h>
37 
38 #ifdef HAVE_EIGEN3
40 #endif // HAVE_EIGEN3
41 
42 #ifdef HAVE_VIENNACL
43 #include <shogun/lib/GPUMatrix.h>
45 #endif // HAVE_VIENNACL
46 
47 namespace shogun
48 {
49 
50 namespace linalg
51 {
52 
53 namespace implementation
54 {
55 
56 namespace special_purpose
57 {
58 
62 template <enum Backend, class Matrix>
63 struct logistic
64 {
65  typedef typename Matrix::Scalar T;
66 
68  static void compute(Matrix A, Matrix result);
69 };
70 
71 #ifdef HAVE_EIGEN3
72 
74 template <> template <class Matrix>
75 struct logistic<Backend::EIGEN3, Matrix>
76 {
77  typedef typename Matrix::Scalar T;
78 
80  static void compute(SGMatrix<T> A, SGMatrix<T> result)
81  {
82  int32_t len = A.num_rows*A.num_cols;
83  for (int32_t i=0; i<len; i++)
84  result[i] = 1.0/(1+CMath::exp(-1*A[i]));
85  }
86 };
87 #endif // HAVE_EIGEN3
88 
89 #ifdef HAVE_VIENNACL
90 
92 template <> template <class Matrix>
93 struct logistic<Backend::VIENNACL, Matrix>
94 {
95  typedef typename Matrix::Scalar T;
96 
98  static void compute(CGPUMatrix<T> A, CGPUMatrix<T> result)
99  {
100  const std::string operation = "return 1.0/(1+exp(-1*element));";
101 
102  std::string kernel_name = "logistic_" + ocl::get_type_string<T>();
103  viennacl::ocl::kernel& kernel =
104  ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
105 
106  kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
107 
108  viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
109  cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
110  result.vcl_matrix(), cl_int(result.offset)));
111  }
112 };
113 
114 #endif // HAVE_VIENNACL
115 
119 template <enum Backend, class Matrix>
121 {
122  typedef typename Matrix::Scalar T;
123 
125  static void compute(Matrix A, Matrix C);
126 };
127 
128 #ifdef HAVE_EIGEN3
129 
131 template <> template <class Matrix>
132 struct multiply_by_logistic_derivative<Backend::EIGEN3, Matrix>
133 {
134  typedef typename Matrix::Scalar T;
135 
137  static void compute(SGMatrix<T> A, SGMatrix<T> C)
138  {
139  int32_t len = A.num_rows*A.num_cols;
140  for (int32_t i=0; i<len; i++)
141  C[i] *= A[i] * (1.0-A[i]);
142  }
143 };
144 #endif // HAVE_EIGEN3
145 
146 #ifdef HAVE_VIENNACL
147 
149 template <> template <class Matrix>
150 struct multiply_by_logistic_derivative<Backend::VIENNACL, Matrix>
151 {
152  typedef typename Matrix::Scalar T;
153 
155  static void compute(CGPUMatrix<T> A, CGPUMatrix<T> C)
156  {
157  const std::string operation = "return element2 * element1*(1.0-element1);";
158 
159  std::string kernel_name = "multiply_by_logistic_derivative_" + ocl::get_type_string<T>();
160  viennacl::ocl::kernel& kernel =
161  ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
162 
163  kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
164 
165  viennacl::ocl::enqueue(kernel(
166  A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
167  C.vcl_matrix(), cl_int(C.offset),
168  C.vcl_matrix(), cl_int(C.offset)));
169  }
170 };
171 
172 #endif // HAVE_VIENNACL
173 
177 template <enum Backend, class Matrix>
179 {
180  typedef typename Matrix::Scalar T;
181 
183  static void compute(Matrix A, Matrix result);
184 };
185 
186 #ifdef HAVE_EIGEN3
187 
189 template <> template <class Matrix>
190 struct rectified_linear<Backend::EIGEN3, Matrix>
191 {
192  typedef typename Matrix::Scalar T;
193 
195  static void compute(SGMatrix<T> A, SGMatrix<T> result)
196  {
197  int32_t len = A.num_rows*A.num_cols;
198  for (int32_t i=0; i<len; i++)
199  result[i] = CMath::max((T)0, A[i]);
200  }
201 };
202 #endif // HAVE_EIGEN3
203 
204 #ifdef HAVE_VIENNACL
205 
207 template <> template <class Matrix>
208 struct rectified_linear<Backend::VIENNACL, Matrix>
209 {
210  typedef typename Matrix::Scalar T;
211 
213  static void compute(CGPUMatrix<T> A, CGPUMatrix<T> result)
214  {
215  const std::string operation = "return max((DATATYPE)0,element);";
216 
217  std::string kernel_name = "rectified_linear_" + ocl::get_type_string<T>();
218  viennacl::ocl::kernel& kernel =
219  ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
220 
221  kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
222 
223  viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
224  cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
225  result.vcl_matrix(), cl_int(result.offset)));
226  }
227 };
228 
229 #endif // HAVE_VIENNACL
230 
234 template <enum Backend, class Matrix>
236 {
237  typedef typename Matrix::Scalar T;
238 
240  static void compute(Matrix A, Matrix C);
241 };
242 
243 #ifdef HAVE_EIGEN3
244 
246 template <> template <class Matrix>
247 struct multiply_by_rectified_linear_derivative<Backend::EIGEN3, Matrix>
248 {
249  typedef typename Matrix::Scalar T;
250 
252  static void compute(SGMatrix<T> A, SGMatrix<T> C)
253  {
254  int32_t len = A.num_rows*A.num_cols;
255  for (int32_t i=0; i<len; i++)
256  if (A[i]==0)
257  C[i] = 0;
258  }
259 };
260 #endif // HAVE_EIGEN3
261 
262 #ifdef HAVE_VIENNACL
263 
265 template <> template <class Matrix>
266 struct multiply_by_rectified_linear_derivative<Backend::VIENNACL, Matrix>
267 {
268  typedef typename Matrix::Scalar T;
269 
271  static void compute(CGPUMatrix<T> A, CGPUMatrix<T> C)
272  {
273  const std::string operation = "return element1==0 ? 0 : element2;";
274 
275  std::string kernel_name = "multiply_by_rectified_linear_derivative_" + ocl::get_type_string<T>();
276  viennacl::ocl::kernel& kernel =
277  ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
278 
279  kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
280 
281  viennacl::ocl::enqueue(kernel(
282  A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
283  C.vcl_matrix(), cl_int(C.offset),
284  C.vcl_matrix(), cl_int(C.offset)));
285  }
286 };
287 
288 #endif // HAVE_VIENNACL
289 
293 template <enum Backend, class Matrix>
294 struct softmax
295 {
296  typedef typename Matrix::Scalar T;
297 
301  static void compute(Matrix A);
302 };
303 
304 #ifdef HAVE_EIGEN3
305 
307 template <> template <class Matrix>
308 struct softmax<Backend::EIGEN3, Matrix>
309 {
310  typedef typename Matrix::Scalar T;
311  typedef Eigen::Matrix<T,Eigen::Dynamic,Eigen::Dynamic> MatrixXt;
312 
316  static void compute(SGMatrix<T> A)
317  {
318  Eigen::Map<MatrixXt> A_eig = A;
319 
320  float64_t max = A_eig.maxCoeff();
321 
322  for (int32_t j=0; j<A.num_cols; j++)
323  {
324  float64_t sum = 0;
325  for (int32_t i=0; i<A.num_rows; i++)
326  sum += CMath::exp(A(i,j)-max);
327 
328  float64_t normalizer = CMath::log(sum);
329  for (int32_t k=0; k<A.num_rows; k++)
330  A(k,j) = CMath::exp(A(k,j)-max-normalizer);
331  }
332  }
333 };
334 #endif // HAVE_EIGEN3
335 
336 #ifdef HAVE_VIENNACL
337 
339 template <> template <class Matrix>
340 struct softmax<Backend::VIENNACL, Matrix>
341 {
342  typedef typename Matrix::Scalar T;
343 
345  template <class T>
346  static viennacl::ocl::kernel& generate_kernel()
347  {
348  std::string kernel_name = "softmax_" + ocl::get_type_string<T>();
349 
350  if (ocl::kernel_exists(kernel_name))
351  return ocl::get_kernel(kernel_name);
352 
353  std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
354 
355  source.append(
356  R"(
357  __kernel void KERNEL_NAME(
358  __global DATATYPE* A, int nrows, int ncols, int offset)
359  {
360  int j = get_global_id(0);
361 
362  if (j>=ncols)
363  return;
364 
365  DATATYPE col_max = -INFINITY;
366  for (int i=0; i<nrows; i++)
367  col_max = max(col_max, A[offset + i+j*nrows]);
368 
369  DATATYPE col_sum = 0;
370  for (int i=0; i<nrows; i++)
371  col_sum += exp(A[offset + i+j*nrows]-col_max);
372 
373  DATATYPE normalizer = log(col_sum);
374  for (int i=0; i<nrows; i++)
375  {
376  int index = offset + i+j*nrows;
377  A[index] = exp(A[index]-col_max-normalizer);
378  }
379  }
380  )"
381  );
382 
383  viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
384 
385  kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
386 
387  return kernel;
388  }
389 
393  static void compute(CGPUMatrix<T> A)
394  {
395  viennacl::ocl::kernel& kernel = generate_kernel<T>();
396  kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_cols));
397 
398  viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
399  cl_int(A.num_rows), cl_int(A.num_cols), cl_int(A.offset)));
400  }
401 };
402 
403 #endif // HAVE_VIENNACL
404 
408 template <enum Backend,class Matrix>
410 {
411  typedef typename Matrix::Scalar T;
412 
416  static T compute(Matrix P, Matrix Q);
417 };
418 
419 #ifdef HAVE_EIGEN3
420 
421 template <> template <class Matrix>
422 struct cross_entropy<Backend::EIGEN3,Matrix>
423 {
424  typedef typename Matrix::Scalar T;
425  typedef Eigen::Matrix<T,Eigen::Dynamic,Eigen::Dynamic> MatrixXt;
426 
431  {
432  Eigen::Map<MatrixXt> P_eig = P;
433  Eigen::Map<MatrixXt> Q_eig = Q;
434 
435  return -1*(P_eig.array() * (Q_eig.array()+1e-30).log()).sum();
436  }
437 };
438 #endif // HAVE_EIGEN3
439 
440 #ifdef HAVE_VIENNACL
441 
442 template <> template <class Matrix>
443 struct cross_entropy<Backend::VIENNACL,Matrix>
444 {
445  typedef typename Matrix::Scalar T;
446 
448  template <class T>
449  static viennacl::ocl::kernel& generate_kernel()
450  {
451  std::string kernel_name = "cross_entropy_" + ocl::get_type_string<T>();
452 
453  if (ocl::kernel_exists(kernel_name))
454  return ocl::get_kernel(kernel_name);
455 
456  std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
457 
458  source.append(
459  R"(
460  __kernel void KERNEL_NAME(
461  __global DATATYPE* p, int size, int p_offset,
462  __global DATATYPE* q, int q_offset,
463  __global DATATYPE* result)
464  {
465  __local DATATYPE buffer[WORK_GROUP_SIZE_1D];
466 
467  int local_id = get_local_id(0);
468 
469  DATATYPE thread_sum = 0;
470  for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D)
471  thread_sum += p[i+p_offset]*log(q[i+q_offset]+1e-30);
472 
473  buffer[local_id] = thread_sum;
474 
475  for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1)
476  {
477  barrier(CLK_LOCAL_MEM_FENCE);
478  if (local_id < j)
479  buffer[local_id] += buffer[local_id + j];
480  }
481 
482  barrier(CLK_LOCAL_MEM_FENCE);
483 
484  if (get_global_id(0)==0)
485  *result = -1*buffer[0];
486  }
487  )"
488  );
489 
490  viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
491 
492  kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
493  kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
494 
495  return kernel;
496  }
497 
501  static T compute(CGPUMatrix<T> P, CGPUMatrix<T> Q)
502  {
503  viennacl::ocl::kernel& kernel = generate_kernel<T>();
504 
505  CGPUVector<T> result(1);
506 
507  viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
508  cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
509  Q.vcl_matrix(), cl_int(Q.offset),
510  result.vcl_vector()));
511 
512  return result[0];
513  }
514 };
515 #endif // HAVE_VIENNACL
516 
520 template <enum Backend,class Matrix>
522 {
523  typedef typename Matrix::Scalar T;
524 
528  static T compute(Matrix P, Matrix Q);
529 };
530 
531 #ifdef HAVE_EIGEN3
532 
533 template <> template <class Matrix>
534 struct squared_error<Backend::EIGEN3,Matrix>
535 {
536  typedef typename Matrix::Scalar T;
537  typedef Eigen::Matrix<T,Eigen::Dynamic,Eigen::Dynamic> MatrixXt;
538 
543  {
544  Eigen::Map<MatrixXt> P_eig = P;
545  Eigen::Map<MatrixXt> Q_eig = Q;
546 
547  return 0.5 * (P_eig - Q_eig).array().square().sum();
548  }
549 };
550 #endif // HAVE_EIGEN3
551 
552 #ifdef HAVE_VIENNACL
553 
554 template <> template <class Matrix>
555 struct squared_error<Backend::VIENNACL,Matrix>
556 {
557  typedef typename Matrix::Scalar T;
558 
560  template <class T>
561  static viennacl::ocl::kernel& generate_kernel()
562  {
563  std::string kernel_name = "squared_error_" + ocl::get_type_string<T>();
564 
565  if (ocl::kernel_exists(kernel_name))
566  return ocl::get_kernel(kernel_name);
567 
568  std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
569 
570  source.append(
571  R"(
572  __kernel void KERNEL_NAME(
573  __global DATATYPE* p, int size, int p_offset,
574  __global DATATYPE* q, int q_offset,
575  __global DATATYPE* result)
576  {
577  __local DATATYPE buffer[WORK_GROUP_SIZE_1D];
578 
579  int local_id = get_local_id(0);
580 
581  DATATYPE thread_sum = 0;
582  for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D)
583  thread_sum += pown(p[i+p_offset]-q[i+q_offset], 2);
584 
585  buffer[local_id] = thread_sum;
586 
587  for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1)
588  {
589  barrier(CLK_LOCAL_MEM_FENCE);
590  if (local_id < j)
591  buffer[local_id] += buffer[local_id + j];
592  }
593 
594  barrier(CLK_LOCAL_MEM_FENCE);
595 
596  if (get_global_id(0)==0)
597  *result = 0.5*buffer[0];
598  }
599  )"
600  );
601 
602  viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
603 
604  kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
605  kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
606 
607  return kernel;
608  }
609 
613  static T compute(CGPUMatrix<T> P, CGPUMatrix<T> Q)
614  {
615  viennacl::ocl::kernel& kernel = generate_kernel<T>();
616 
617  CGPUVector<T> result(1);
618 
619  viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
620  cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
621  Q.vcl_matrix(), cl_int(Q.offset),
622  result.vcl_vector()));
623 
624  return result[0];
625  }
626 };
627 #endif // HAVE_VIENNACL
628 
629 }
630 
631 }
632 
633 }
634 
635 }
636 #endif // SPECIAL_PURPOSE_IMPL_H_

SHOGUN Machine Learning Toolbox - Documentation