31 #ifndef SPECIAL_PURPOSE_IMPL_H_
32 #define SPECIAL_PURPOSE_IMPL_H_
43 #endif // HAVE_VIENNACL
51 namespace implementation
54 namespace special_purpose
60 template <enum Backend,
class Matrix>
64 typedef typename Matrix::Scalar
T;
67 static void compute(Matrix A, Matrix result);
72 template <
class Matrix>
76 typedef typename Matrix::Scalar
T;
82 for (int32_t i=0; i<len; i++)
90 template <
class Matrix>
91 struct logistic<Backend::VIENNACL, Matrix>
94 typedef typename Matrix::Scalar
T;
97 static void compute(CGPUMatrix<T> A, CGPUMatrix<T> result)
99 const std::string operation =
"return 1.0/(1+exp(-1*element));";
101 std::string kernel_name =
"logistic_" + ocl::get_type_string<T>();
102 viennacl::ocl::kernel& kernel =
103 ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
105 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
107 viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
108 cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
109 result.vcl_matrix(), cl_int(result.offset)));
113 #endif // HAVE_VIENNACL
118 template <enum Backend,
class Matrix>
122 typedef typename Matrix::Scalar
T;
125 static void compute(Matrix A, Matrix C);
130 template <
class Matrix>
134 typedef typename Matrix::Scalar
T;
140 for (int32_t i=0; i<len; i++)
141 C[i] *= A[i] * (1.0-A[i]);
148 template <
class Matrix>
149 struct multiply_by_logistic_derivative<Backend::VIENNACL, Matrix>
152 typedef typename Matrix::Scalar
T;
155 static void compute(CGPUMatrix<T> A, CGPUMatrix<T> C)
157 const std::string operation =
"return element2 * element1*(1.0-element1);";
159 std::string kernel_name =
"multiply_by_logistic_derivative_" + ocl::get_type_string<T>();
160 viennacl::ocl::kernel& kernel =
161 ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
163 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
165 viennacl::ocl::enqueue(kernel(
166 A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
167 C.vcl_matrix(), cl_int(C.offset),
168 C.vcl_matrix(), cl_int(C.offset)));
172 #endif // HAVE_VIENNACL
177 template <enum Backend,
class Matrix>
181 typedef typename Matrix::Scalar
T;
184 static void compute(Matrix A, Matrix result);
189 template <
class Matrix>
193 typedef typename Matrix::Scalar
T;
199 for (int32_t i=0; i<len; i++)
207 template <
class Matrix>
208 struct rectified_linear<Backend::VIENNACL, Matrix>
211 typedef typename Matrix::Scalar
T;
214 static void compute(CGPUMatrix<T> A, CGPUMatrix<T> result)
216 const std::string operation =
"return max((DATATYPE)0,element);";
218 std::string kernel_name =
"rectified_linear_" + ocl::get_type_string<T>();
219 viennacl::ocl::kernel& kernel =
220 ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
222 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
224 viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
225 cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
226 result.vcl_matrix(), cl_int(result.offset)));
230 #endif // HAVE_VIENNACL
235 template <enum Backend,
class Matrix>
239 typedef typename Matrix::Scalar
T;
242 static void compute(Matrix A, Matrix C);
247 template <
class Matrix>
251 typedef typename Matrix::Scalar
T;
257 for (int32_t i=0; i<len; i++)
266 template <
class Matrix>
267 struct multiply_by_rectified_linear_derivative<Backend::VIENNACL, Matrix>
270 typedef typename Matrix::Scalar
T;
273 static void compute(CGPUMatrix<T> A, CGPUMatrix<T> C)
275 const std::string operation =
"return element1==0 ? 0 : element2;";
277 std::string kernel_name =
"multiply_by_rectified_linear_derivative_" + ocl::get_type_string<T>();
278 viennacl::ocl::kernel& kernel =
279 ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
281 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
283 viennacl::ocl::enqueue(kernel(
284 A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
285 C.vcl_matrix(), cl_int(C.offset),
286 C.vcl_matrix(), cl_int(C.offset)));
290 #endif // HAVE_VIENNACL
295 template <enum Backend,
class Matrix>
299 typedef typename Matrix::Scalar
T;
309 template <
class Matrix>
313 typedef typename Matrix::Scalar
T;
327 for (int32_t j=0; j<A.
num_cols; j++)
330 for (int32_t i=0; i<A.
num_rows; i++)
334 for (int32_t k=0; k<A.
num_rows; k++)
343 template <
class Matrix>
344 struct softmax<Backend::VIENNACL, Matrix>
347 typedef typename Matrix::Scalar
T;
351 static viennacl::ocl::kernel& generate_kernel()
353 std::string kernel_name =
"softmax_" + ocl::get_type_string<T>();
355 if (ocl::kernel_exists(kernel_name))
356 return ocl::get_kernel(kernel_name);
358 std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
362 __kernel void KERNEL_NAME(
363 __global DATATYPE* A, int nrows, int ncols, int offset)
365 int j = get_global_id(0);
370 DATATYPE col_max = -INFINITY;
371 for (int i=0; i<nrows; i++)
372 col_max = max(col_max, A[offset + i+j*nrows]);
374 DATATYPE col_sum = 0;
375 for (int i=0; i<nrows; i++)
376 col_sum += exp(A[offset + i+j*nrows]-col_max);
378 DATATYPE normalizer = log(col_sum);
379 for (int i=0; i<nrows; i++)
381 int index = offset + i+j*nrows;
382 A[index] = exp(A[index]-col_max-normalizer);
388 viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
390 kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
398 static void compute(CGPUMatrix<T> A)
400 viennacl::ocl::kernel& kernel = generate_kernel<T>();
401 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_cols));
403 viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
404 cl_int(A.num_rows), cl_int(A.num_cols), cl_int(A.offset)));
408 #endif // HAVE_VIENNACL
413 template <enum Backend,
class Matrix>
417 typedef typename Matrix::Scalar
T;
422 static T
compute(Matrix P, Matrix Q);
426 template <
class Matrix>
430 typedef typename Matrix::Scalar
T;
443 return -1*(P_eig.array() * (Q_eig.array()+1e-30).log()).
sum();
449 template <
class Matrix>
450 struct cross_entropy<Backend::VIENNACL,Matrix>
453 typedef typename Matrix::Scalar
T;
457 static viennacl::ocl::kernel& generate_kernel()
459 std::string kernel_name =
"cross_entropy_" + ocl::get_type_string<T>();
461 if (ocl::kernel_exists(kernel_name))
462 return ocl::get_kernel(kernel_name);
464 std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
468 __kernel void KERNEL_NAME(
469 __global DATATYPE* p, int size, int p_offset,
470 __global DATATYPE* q, int q_offset,
471 __global DATATYPE* result)
473 __local DATATYPE buffer[WORK_GROUP_SIZE_1D];
475 int local_id = get_local_id(0);
477 DATATYPE thread_sum = 0;
478 for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D)
479 thread_sum += p[i+p_offset]*log(q[i+q_offset]+1e-30);
481 buffer[local_id] = thread_sum;
483 for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1)
485 barrier(CLK_LOCAL_MEM_FENCE);
487 buffer[local_id] += buffer[local_id + j];
490 barrier(CLK_LOCAL_MEM_FENCE);
492 if (get_global_id(0)==0)
493 *result = -1*buffer[0];
498 viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
500 kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
501 kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
509 static T
compute(CGPUMatrix<T> P, CGPUMatrix<T> Q)
511 viennacl::ocl::kernel& kernel = generate_kernel<T>();
513 CGPUVector<T> result(1);
515 viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
516 cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
517 Q.vcl_matrix(), cl_int(Q.offset),
518 result.vcl_vector()));
523 #endif // HAVE_VIENNACL
528 template <enum Backend,
class Matrix>
532 typedef typename Matrix::Scalar
T;
537 static T
compute(Matrix P, Matrix Q);
541 template <
class Matrix>
545 typedef typename Matrix::Scalar
T;
558 return 0.5 * (P_eig - Q_eig).array().square().sum();
564 template <
class Matrix>
565 struct squared_error<Backend::VIENNACL,Matrix>
568 typedef typename Matrix::Scalar
T;
572 static viennacl::ocl::kernel& generate_kernel()
574 std::string kernel_name =
"squared_error_" + ocl::get_type_string<T>();
576 if (ocl::kernel_exists(kernel_name))
577 return ocl::get_kernel(kernel_name);
579 std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
583 __kernel void KERNEL_NAME(
584 __global DATATYPE* p, int size, int p_offset,
585 __global DATATYPE* q, int q_offset,
586 __global DATATYPE* result)
588 __local DATATYPE buffer[WORK_GROUP_SIZE_1D];
590 int local_id = get_local_id(0);
592 DATATYPE thread_sum = 0;
593 for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D)
594 thread_sum += pown(p[i+p_offset]-q[i+q_offset], 2);
596 buffer[local_id] = thread_sum;
598 for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1)
600 barrier(CLK_LOCAL_MEM_FENCE);
602 buffer[local_id] += buffer[local_id + j];
605 barrier(CLK_LOCAL_MEM_FENCE);
607 if (get_global_id(0)==0)
608 *result = 0.5*buffer[0];
613 viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
615 kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
616 kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
624 static T
compute(CGPUMatrix<T> P, CGPUMatrix<T> Q)
626 viennacl::ocl::kernel& kernel = generate_kernel<T>();
628 CGPUVector<T> result(1);
630 viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
631 cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
632 Q.vcl_matrix(), cl_int(Q.offset),
633 result.vcl_vector()));
638 #endif // HAVE_VIENNACL
647 #endif // SPECIAL_PURPOSE_IMPL_H_
Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > MatrixXt
static void compute(Matrix A)
static void compute(SGMatrix< T > A, SGMatrix< T > result)
static T compute(Matrix P, Matrix Q)
static void compute(SGMatrix< T > A)
static void compute(Matrix A, Matrix result)
static T compute(Matrix P, Matrix Q)
static void compute(SGMatrix< T > A, SGMatrix< T > result)
Generic class sum which provides a static compute method. This class is specialized for different typ...
static void compute(SGMatrix< T > A, SGMatrix< T > C)
Generic class which is specialized for different backends to perform the max operation.
static T compute(SGMatrix< T > P, SGMatrix< T > Q)
static T compute(SGMatrix< T > P, SGMatrix< T > Q)
static void compute(Matrix A, Matrix result)
static void compute(SGMatrix< T > A, SGMatrix< T > C)
static void compute(Matrix A, Matrix C)
Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > MatrixXt
all of classes and functions are contained in the shogun namespace
Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > MatrixXt
static float64_t exp(float64_t x)
static float64_t log(float64_t v)
static void compute(Matrix A, Matrix C)