31 #ifndef SPECIAL_PURPOSE_IMPL_H_ 
   32 #define SPECIAL_PURPOSE_IMPL_H_ 
   43 #endif // HAVE_VIENNACL 
   51 namespace implementation
 
   54 namespace special_purpose
 
   60 template <enum Backend, 
class Matrix>
 
   64     typedef typename Matrix::Scalar 
T;
 
   67     static void compute(Matrix A, Matrix result);
 
   72 template <
class Matrix>
 
   76     typedef typename Matrix::Scalar 
T;
 
   82         for (int32_t i=0; i<len; i++)
 
   90 template <
class Matrix>
 
   91 struct logistic<Backend::VIENNACL, Matrix>
 
   94     typedef typename Matrix::Scalar 
T;
 
   97     static void compute(CGPUMatrix<T> A, CGPUMatrix<T> result)
 
   99         const std::string operation = 
"return 1.0/(1+exp(-1*element));";
 
  101         std::string kernel_name = 
"logistic_" + ocl::get_type_string<T>();
 
  102         viennacl::ocl::kernel& kernel =
 
  103             ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
 
  105         kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
 
  107         viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
 
  108             cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
 
  109             result.vcl_matrix(), cl_int(result.offset)));
 
  113 #endif // HAVE_VIENNACL 
  118 template <enum Backend, 
class Matrix>
 
  122     typedef typename Matrix::Scalar 
T;
 
  125     static void compute(Matrix A, Matrix C);
 
  130 template <
class Matrix>
 
  134     typedef typename Matrix::Scalar 
T;
 
  140         for (int32_t i=0; i<len; i++)
 
  141             C[i] *= A[i] * (1.0-A[i]);
 
  148 template <
class Matrix>
 
  149 struct multiply_by_logistic_derivative<Backend::VIENNACL, Matrix>
 
  152     typedef typename Matrix::Scalar 
T;
 
  155     static void compute(CGPUMatrix<T> A, CGPUMatrix<T> C)
 
  157         const std::string operation = 
"return element2 * element1*(1.0-element1);";
 
  159         std::string kernel_name = 
"multiply_by_logistic_derivative_" + ocl::get_type_string<T>();
 
  160         viennacl::ocl::kernel& kernel =
 
  161             ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
 
  163         kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
 
  165         viennacl::ocl::enqueue(kernel(
 
  166             A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
 
  167             C.vcl_matrix(), cl_int(C.offset),
 
  168             C.vcl_matrix(), cl_int(C.offset)));
 
  172 #endif // HAVE_VIENNACL 
  177 template <enum Backend, 
class Matrix>
 
  181     typedef typename Matrix::Scalar 
T;
 
  184     static void compute(Matrix A, Matrix result);
 
  189 template <
class Matrix>
 
  193     typedef typename Matrix::Scalar 
T;
 
  199         for (int32_t i=0; i<len; i++)
 
  207 template <
class Matrix>
 
  208 struct rectified_linear<Backend::VIENNACL, Matrix>
 
  211     typedef typename Matrix::Scalar 
T;
 
  214     static void compute(CGPUMatrix<T> A, CGPUMatrix<T> result)
 
  216         const std::string operation = 
"return max((DATATYPE)0,element);";
 
  218         std::string kernel_name = 
"rectified_linear_" + ocl::get_type_string<T>();
 
  219         viennacl::ocl::kernel& kernel =
 
  220             ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
 
  222         kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
 
  224         viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
 
  225             cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
 
  226             result.vcl_matrix(), cl_int(result.offset)));
 
  230 #endif // HAVE_VIENNACL 
  235 template <enum Backend, 
class Matrix>
 
  239     typedef typename Matrix::Scalar 
T;
 
  242     static void compute(Matrix A, Matrix C);
 
  247 template <
class Matrix>
 
  251     typedef typename Matrix::Scalar 
T;
 
  257         for (int32_t i=0; i<len; i++)
 
  266 template <
class Matrix>
 
  267 struct multiply_by_rectified_linear_derivative<Backend::VIENNACL, Matrix>
 
  270     typedef typename Matrix::Scalar 
T;
 
  273     static void compute(CGPUMatrix<T> A, CGPUMatrix<T> C)
 
  275         const std::string operation = 
"return element1==0 ? 0 : element2;";
 
  277         std::string kernel_name = 
"multiply_by_rectified_linear_derivative_" + ocl::get_type_string<T>();
 
  278         viennacl::ocl::kernel& kernel =
 
  279             ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
 
  281         kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
 
  283         viennacl::ocl::enqueue(kernel(
 
  284             A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
 
  285             C.vcl_matrix(), cl_int(C.offset),
 
  286             C.vcl_matrix(), cl_int(C.offset)));
 
  290 #endif // HAVE_VIENNACL 
  295 template <enum Backend, 
class Matrix>
 
  299     typedef typename Matrix::Scalar 
T;
 
  309 template <
class Matrix>
 
  313     typedef typename Matrix::Scalar 
T;
 
  327         for (int32_t j=0; j<A.
num_cols; j++)
 
  330             for (int32_t i=0; i<A.
num_rows; i++)
 
  334             for (int32_t k=0; k<A.
num_rows; k++)
 
  343 template <
class Matrix>
 
  344 struct softmax<Backend::VIENNACL, Matrix>
 
  347     typedef typename Matrix::Scalar 
T;
 
  351     static viennacl::ocl::kernel& generate_kernel()
 
  353         std::string kernel_name = 
"softmax_" + ocl::get_type_string<T>();
 
  355         if (ocl::kernel_exists(kernel_name))
 
  356             return ocl::get_kernel(kernel_name);
 
  358         std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
 
  362                 __kernel void KERNEL_NAME( 
  363                     __global DATATYPE* A, int nrows, int ncols, int offset) 
  365                     int j = get_global_id(0); 
  370                     DATATYPE col_max = -INFINITY; 
  371                     for (int i=0; i<nrows; i++) 
  372                         col_max = max(col_max, A[offset + i+j*nrows]); 
  374                     DATATYPE col_sum = 0; 
  375                     for (int i=0; i<nrows; i++) 
  376                         col_sum += exp(A[offset + i+j*nrows]-col_max); 
  378                     DATATYPE normalizer = log(col_sum); 
  379                     for (int i=0; i<nrows; i++) 
  381                         int index = offset + i+j*nrows; 
  382                         A[index] = exp(A[index]-col_max-normalizer); 
  388         viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source); 
  390         kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D); 
  398     static void compute(CGPUMatrix<T> A)
 
  400         viennacl::ocl::kernel& kernel = generate_kernel<T>();
 
  401         kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_cols));
 
  403         viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
 
  404             cl_int(A.num_rows), cl_int(A.num_cols), cl_int(A.offset)));
 
  408 #endif // HAVE_VIENNACL 
  413 template <enum Backend,
class Matrix>
 
  417     typedef typename Matrix::Scalar 
T;
 
  422     static T 
compute(Matrix P, Matrix Q);
 
  426 template <
class Matrix>
 
  430     typedef typename Matrix::Scalar 
T;
 
  443         return -1*(P_eig.array() * (Q_eig.array()+1e-30).log()).
sum();
 
  449 template <
class Matrix>
 
  450 struct cross_entropy<Backend::VIENNACL,Matrix>
 
  453     typedef typename Matrix::Scalar 
T;
 
  457     static viennacl::ocl::kernel& generate_kernel()
 
  459         std::string kernel_name = 
"cross_entropy_" + ocl::get_type_string<T>();
 
  461         if (ocl::kernel_exists(kernel_name))
 
  462             return ocl::get_kernel(kernel_name);
 
  464         std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
 
  468                 __kernel void KERNEL_NAME( 
  469                     __global DATATYPE* p, int size, int p_offset, 
  470                     __global DATATYPE* q, int q_offset, 
  471                     __global DATATYPE* result) 
  473                     __local DATATYPE buffer[WORK_GROUP_SIZE_1D]; 
  475                     int local_id = get_local_id(0); 
  477                     DATATYPE thread_sum = 0; 
  478                     for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D) 
  479                         thread_sum += p[i+p_offset]*log(q[i+q_offset]+1e-30); 
  481                     buffer[local_id] = thread_sum; 
  483                     for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1) 
  485                         barrier(CLK_LOCAL_MEM_FENCE); 
  487                             buffer[local_id] += buffer[local_id + j]; 
  490                     barrier(CLK_LOCAL_MEM_FENCE); 
  492                     if (get_global_id(0)==0) 
  493                         *result = -1*buffer[0]; 
  498         viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source); 
  500         kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D); 
  501         kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D); 
  509     static T 
compute(CGPUMatrix<T> P, CGPUMatrix<T> Q)
 
  511         viennacl::ocl::kernel& kernel = generate_kernel<T>();
 
  513         CGPUVector<T> result(1);
 
  515         viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
 
  516             cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
 
  517             Q.vcl_matrix(), cl_int(Q.offset),
 
  518             result.vcl_vector()));
 
  523 #endif // HAVE_VIENNACL 
  528 template <enum Backend,
class Matrix>
 
  532     typedef typename Matrix::Scalar 
T;
 
  537     static T 
compute(Matrix P, Matrix Q);
 
  541 template <
class Matrix>
 
  545     typedef typename Matrix::Scalar 
T;
 
  558         return 0.5 * (P_eig - Q_eig).array().square().sum();
 
  564 template <
class Matrix>
 
  565 struct squared_error<Backend::VIENNACL,Matrix>
 
  568     typedef typename Matrix::Scalar 
T;
 
  572     static viennacl::ocl::kernel& generate_kernel()
 
  574         std::string kernel_name = 
"squared_error_" + ocl::get_type_string<T>();
 
  576         if (ocl::kernel_exists(kernel_name))
 
  577             return ocl::get_kernel(kernel_name);
 
  579         std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
 
  583                 __kernel void KERNEL_NAME( 
  584                     __global DATATYPE* p, int size, int p_offset, 
  585                     __global DATATYPE* q, int q_offset, 
  586                     __global DATATYPE* result) 
  588                     __local DATATYPE buffer[WORK_GROUP_SIZE_1D]; 
  590                     int local_id = get_local_id(0); 
  592                     DATATYPE thread_sum = 0; 
  593                     for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D) 
  594                         thread_sum += pown(p[i+p_offset]-q[i+q_offset], 2); 
  596                     buffer[local_id] = thread_sum; 
  598                     for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1) 
  600                         barrier(CLK_LOCAL_MEM_FENCE); 
  602                             buffer[local_id] += buffer[local_id + j]; 
  605                     barrier(CLK_LOCAL_MEM_FENCE); 
  607                     if (get_global_id(0)==0) 
  608                         *result = 0.5*buffer[0]; 
  613         viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source); 
  615         kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D); 
  616         kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D); 
  624     static T 
compute(CGPUMatrix<T> P, CGPUMatrix<T> Q)
 
  626         viennacl::ocl::kernel& kernel = generate_kernel<T>();
 
  628         CGPUVector<T> result(1);
 
  630         viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
 
  631             cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
 
  632             Q.vcl_matrix(), cl_int(Q.offset),
 
  633             result.vcl_vector()));
 
  638 #endif // HAVE_VIENNACL 
  647 #endif // SPECIAL_PURPOSE_IMPL_H_ 
Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > MatrixXt
static void compute(Matrix A)
static void compute(SGMatrix< T > A, SGMatrix< T > result)
static T compute(Matrix P, Matrix Q)
static void compute(SGMatrix< T > A)
static void compute(Matrix A, Matrix result)
static T compute(Matrix P, Matrix Q)
static void compute(SGMatrix< T > A, SGMatrix< T > result)
Generic class sum which provides a static compute method. This class is specialized for different typ...
static void compute(SGMatrix< T > A, SGMatrix< T > C)
Generic class which is specialized for different backends to perform the max operation. 
static T compute(SGMatrix< T > P, SGMatrix< T > Q)
static T compute(SGMatrix< T > P, SGMatrix< T > Q)
static void compute(Matrix A, Matrix result)
static void compute(SGMatrix< T > A, SGMatrix< T > C)
static void compute(Matrix A, Matrix C)
Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > MatrixXt
all of classes and functions are contained in the shogun namespace 
Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > MatrixXt
static float64_t exp(float64_t x)
static float64_t log(float64_t v)
static void compute(Matrix A, Matrix C)