31 #ifndef SPECIAL_PURPOSE_IMPL_H_
32 #define SPECIAL_PURPOSE_IMPL_H_
45 #endif // HAVE_VIENNACL
53 namespace implementation
56 namespace special_purpose
62 template <enum Backend,
class Matrix>
66 typedef typename Matrix::Scalar
T;
69 static void compute(Matrix A, Matrix result);
75 template <
class Matrix>
79 typedef typename Matrix::Scalar
T;
85 for (int32_t i=0; i<len; i++)
94 template <
class Matrix>
95 struct logistic<Backend::VIENNACL, Matrix>
98 typedef typename Matrix::Scalar
T;
101 static void compute(CGPUMatrix<T> A, CGPUMatrix<T> result)
103 const std::string operation =
"return 1.0/(1+exp(-1*element));";
105 std::string kernel_name =
"logistic_" + ocl::get_type_string<T>();
106 viennacl::ocl::kernel& kernel =
107 ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
109 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
111 viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
112 cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
113 result.vcl_matrix(), cl_int(result.offset)));
117 #endif // HAVE_VIENNACL
122 template <enum Backend,
class Matrix>
126 typedef typename Matrix::Scalar
T;
129 static void compute(Matrix A, Matrix C);
135 template <
class Matrix>
139 typedef typename Matrix::Scalar
T;
145 for (int32_t i=0; i<len; i++)
146 C[i] *= A[i] * (1.0-A[i]);
149 #endif // HAVE_EIGEN3
154 template <
class Matrix>
155 struct multiply_by_logistic_derivative<Backend::VIENNACL, Matrix>
158 typedef typename Matrix::Scalar
T;
161 static void compute(CGPUMatrix<T> A, CGPUMatrix<T> C)
163 const std::string operation =
"return element2 * element1*(1.0-element1);";
165 std::string kernel_name =
"multiply_by_logistic_derivative_" + ocl::get_type_string<T>();
166 viennacl::ocl::kernel& kernel =
167 ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
169 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
171 viennacl::ocl::enqueue(kernel(
172 A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
173 C.vcl_matrix(), cl_int(C.offset),
174 C.vcl_matrix(), cl_int(C.offset)));
178 #endif // HAVE_VIENNACL
183 template <enum Backend,
class Matrix>
187 typedef typename Matrix::Scalar
T;
190 static void compute(Matrix A, Matrix result);
196 template <
class Matrix>
200 typedef typename Matrix::Scalar
T;
206 for (int32_t i=0; i<len; i++)
210 #endif // HAVE_EIGEN3
215 template <
class Matrix>
216 struct rectified_linear<Backend::VIENNACL, Matrix>
219 typedef typename Matrix::Scalar
T;
222 static void compute(CGPUMatrix<T> A, CGPUMatrix<T> result)
224 const std::string operation =
"return max((DATATYPE)0,element);";
226 std::string kernel_name =
"rectified_linear_" + ocl::get_type_string<T>();
227 viennacl::ocl::kernel& kernel =
228 ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
230 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
232 viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
233 cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
234 result.vcl_matrix(), cl_int(result.offset)));
238 #endif // HAVE_VIENNACL
243 template <enum Backend,
class Matrix>
247 typedef typename Matrix::Scalar
T;
250 static void compute(Matrix A, Matrix C);
256 template <
class Matrix>
260 typedef typename Matrix::Scalar
T;
266 for (int32_t i=0; i<len; i++)
271 #endif // HAVE_EIGEN3
276 template <
class Matrix>
277 struct multiply_by_rectified_linear_derivative<Backend::VIENNACL, Matrix>
280 typedef typename Matrix::Scalar
T;
283 static void compute(CGPUMatrix<T> A, CGPUMatrix<T> C)
285 const std::string operation =
"return element1==0 ? 0 : element2;";
287 std::string kernel_name =
"multiply_by_rectified_linear_derivative_" + ocl::get_type_string<T>();
288 viennacl::ocl::kernel& kernel =
289 ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
291 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
293 viennacl::ocl::enqueue(kernel(
294 A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
295 C.vcl_matrix(), cl_int(C.offset),
296 C.vcl_matrix(), cl_int(C.offset)));
300 #endif // HAVE_VIENNACL
305 template <enum Backend,
class Matrix>
309 typedef typename Matrix::Scalar
T;
320 template <
class Matrix>
324 typedef typename Matrix::Scalar
T;
338 for (int32_t j=0; j<A.
num_cols; j++)
341 for (int32_t i=0; i<A.
num_rows; i++)
345 for (int32_t k=0; k<A.
num_rows; k++)
350 #endif // HAVE_EIGEN3
355 template <
class Matrix>
356 struct softmax<Backend::VIENNACL, Matrix>
359 typedef typename Matrix::Scalar
T;
363 static viennacl::ocl::kernel& generate_kernel()
365 std::string kernel_name =
"softmax_" + ocl::get_type_string<T>();
367 if (ocl::kernel_exists(kernel_name))
368 return ocl::get_kernel(kernel_name);
370 std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
374 __kernel void KERNEL_NAME(
375 __global DATATYPE* A, int nrows, int ncols, int offset)
377 int j = get_global_id(0);
382 DATATYPE col_max = -INFINITY;
383 for (int i=0; i<nrows; i++)
384 col_max = max(col_max, A[offset + i+j*nrows]);
386 DATATYPE col_sum = 0;
387 for (int i=0; i<nrows; i++)
388 col_sum += exp(A[offset + i+j*nrows]-col_max);
390 DATATYPE normalizer = log(col_sum);
391 for (int i=0; i<nrows; i++)
393 int index = offset + i+j*nrows;
394 A[index] = exp(A[index]-col_max-normalizer);
400 viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
402 kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
410 static void compute(CGPUMatrix<T> A)
412 viennacl::ocl::kernel& kernel = generate_kernel<T>();
413 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_cols));
415 viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
416 cl_int(A.num_rows), cl_int(A.num_cols), cl_int(A.offset)));
420 #endif // HAVE_VIENNACL
425 template <enum Backend,
class Matrix>
429 typedef typename Matrix::Scalar
T;
434 static T
compute(Matrix P, Matrix Q);
439 template <
class Matrix>
443 typedef typename Matrix::Scalar
T;
456 return -1*(P_eig.array() * (Q_eig.array()+1e-30).log()).
sum();
459 #endif // HAVE_EIGEN3
463 template <
class Matrix>
464 struct cross_entropy<Backend::VIENNACL,Matrix>
467 typedef typename Matrix::Scalar
T;
471 static viennacl::ocl::kernel& generate_kernel()
473 std::string kernel_name =
"cross_entropy_" + ocl::get_type_string<T>();
475 if (ocl::kernel_exists(kernel_name))
476 return ocl::get_kernel(kernel_name);
478 std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
482 __kernel void KERNEL_NAME(
483 __global DATATYPE* p, int size, int p_offset,
484 __global DATATYPE* q, int q_offset,
485 __global DATATYPE* result)
487 __local DATATYPE buffer[WORK_GROUP_SIZE_1D];
489 int local_id = get_local_id(0);
491 DATATYPE thread_sum = 0;
492 for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D)
493 thread_sum += p[i+p_offset]*log(q[i+q_offset]+1e-30);
495 buffer[local_id] = thread_sum;
497 for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1)
499 barrier(CLK_LOCAL_MEM_FENCE);
501 buffer[local_id] += buffer[local_id + j];
504 barrier(CLK_LOCAL_MEM_FENCE);
506 if (get_global_id(0)==0)
507 *result = -1*buffer[0];
512 viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
514 kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
515 kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
523 static T
compute(CGPUMatrix<T> P, CGPUMatrix<T> Q)
525 viennacl::ocl::kernel& kernel = generate_kernel<T>();
527 CGPUVector<T> result(1);
529 viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
530 cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
531 Q.vcl_matrix(), cl_int(Q.offset),
532 result.vcl_vector()));
537 #endif // HAVE_VIENNACL
542 template <enum Backend,
class Matrix>
546 typedef typename Matrix::Scalar
T;
551 static T
compute(Matrix P, Matrix Q);
556 template <
class Matrix>
560 typedef typename Matrix::Scalar
T;
573 return 0.5 * (P_eig - Q_eig).array().square().sum();
576 #endif // HAVE_EIGEN3
580 template <
class Matrix>
581 struct squared_error<Backend::VIENNACL,Matrix>
584 typedef typename Matrix::Scalar
T;
588 static viennacl::ocl::kernel& generate_kernel()
590 std::string kernel_name =
"squared_error_" + ocl::get_type_string<T>();
592 if (ocl::kernel_exists(kernel_name))
593 return ocl::get_kernel(kernel_name);
595 std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
599 __kernel void KERNEL_NAME(
600 __global DATATYPE* p, int size, int p_offset,
601 __global DATATYPE* q, int q_offset,
602 __global DATATYPE* result)
604 __local DATATYPE buffer[WORK_GROUP_SIZE_1D];
606 int local_id = get_local_id(0);
608 DATATYPE thread_sum = 0;
609 for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D)
610 thread_sum += pown(p[i+p_offset]-q[i+q_offset], 2);
612 buffer[local_id] = thread_sum;
614 for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1)
616 barrier(CLK_LOCAL_MEM_FENCE);
618 buffer[local_id] += buffer[local_id + j];
621 barrier(CLK_LOCAL_MEM_FENCE);
623 if (get_global_id(0)==0)
624 *result = 0.5*buffer[0];
629 viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
631 kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
632 kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
640 static T
compute(CGPUMatrix<T> P, CGPUMatrix<T> Q)
642 viennacl::ocl::kernel& kernel = generate_kernel<T>();
644 CGPUVector<T> result(1);
646 viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
647 cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
648 Q.vcl_matrix(), cl_int(Q.offset),
649 result.vcl_vector()));
654 #endif // HAVE_VIENNACL
663 #endif // SPECIAL_PURPOSE_IMPL_H_
Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > MatrixXt
static void compute(Matrix A)
static void compute(SGMatrix< T > A, SGMatrix< T > result)
static T compute(Matrix P, Matrix Q)
static void compute(SGMatrix< T > A)
static void compute(Matrix A, Matrix result)
static T compute(Matrix P, Matrix Q)
static void compute(SGMatrix< T > A, SGMatrix< T > result)
Generic class sum which provides a static compute method. This class is specialized for different typ...
static void compute(SGMatrix< T > A, SGMatrix< T > C)
Generic class which is specialized for different backends to perform the max operation.
static T compute(SGMatrix< T > P, SGMatrix< T > Q)
static T compute(SGMatrix< T > P, SGMatrix< T > Q)
static void compute(Matrix A, Matrix result)
static void compute(SGMatrix< T > A, SGMatrix< T > C)
static void compute(Matrix A, Matrix C)
Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > MatrixXt
all of classes and functions are contained in the shogun namespace
Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > MatrixXt
static float64_t exp(float64_t x)
static float64_t log(float64_t v)
static void compute(Matrix A, Matrix C)