SHOGUN  4.1.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
Convolve.h
浏览该文件的文档.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Khaled Nasr
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  */
30 
31 #ifndef CONVOLVE_H_
32 #define CONVOLVE_H_
33 
34 #include <shogun/lib/config.h>
35 #include <shogun/lib/SGMatrix.h>
37 
38 #include <shogun/io/SGIO.h>
39 
40 #ifdef HAVE_EIGEN3
42 #endif // HAVE_EIGEN3
43 
44 #ifdef HAVE_VIENNACL
45 #include <shogun/lib/GPUMatrix.h>
47 #endif // HAVE_VIENNACL
48 
49 namespace shogun
50 {
51 
52 namespace linalg
53 {
54 
55 namespace implementation
56 {
57 
61 template <enum Backend, class Matrix>
62 struct convolve
63 {
65  typedef typename Matrix::Scalar T;
66 
83  static void compute(Matrix X, Matrix W, Matrix Y, bool flip ,
84  bool overwrite, int32_t stride_x, int32_t stride_y);
85 };
86 
87 #ifdef HAVE_EIGEN3
88 
90 template <class Matrix>
91 struct convolve<Backend::EIGEN3, Matrix>
92 {
94  typedef typename Matrix::Scalar T;
95 
98 
101 
115  static void compute(SGMatrix<T> X, SGMatrix<T> W, SGMatrix<T> Y, bool flip ,
116  bool overwrite, int32_t stride_x, int32_t stride_y)
117  {
118  int32_t width = X.num_cols;
119  int32_t height = X.num_rows;
120 
121  int32_t kx = W.num_cols;
122  int32_t ky = W.num_rows;
123 
124  int32_t rx = (kx-1)/2;
125  int32_t ry = (ky-1)/2;
126 
127  for (int32_t x=0; x<width; x+=stride_x)
128  {
129  int32_t xout = x/stride_x;
130 
131  for (int32_t y=0; y<height; y+=stride_y)
132  {
133  int32_t yout = y/stride_y;
134 
135  T sum = overwrite ? 0 : Y(yout,xout);
136  for (int32_t x1=x-rx; x1<=x+rx; x1++)
137  {
138  int32_t wx = flip ? x1-x+rx : rx-x1+x;
139  for (int32_t y1=y-ry; y1<=y+ry; y1++)
140  {
141  if (x1>=0 && y1>=0 && x1<width && y1<height)
142  {
143  if (flip)
144  sum += W(y1-y+ry,wx)*X(y1,x1);
145  else
146  sum += W(ry-y1+y,wx)*X(y1,x1);
147  }
148  }
149  }
150  Y(yout,xout) = sum;
151  }
152  }
153  }
154 };
155 #endif // HAVE_EIGEN3
156 
157 #ifdef HAVE_VIENNACL
158 
160 template <class Matrix>
161 struct convolve<Backend::VIENNACL, Matrix>
162 {
164  typedef typename Matrix::Scalar T;
165 
167  template <class T>
168  static viennacl::ocl::kernel& generate_kernel_unity_stride(
169  int32_t radius_x, int32_t radius_y, bool flip, bool overwrite)
170  {
171  std::string kernel_name =
172  "convolve_unity_stride_" + ocl::get_type_string<T>() + "_" +
173  std::to_string(radius_x) + "_" + std::to_string(radius_y);
174 
175  if (flip) kernel_name.append("_flip");
176  if (overwrite) kernel_name.append("_overwrite");
177 
178  if (ocl::kernel_exists(kernel_name))
179  return ocl::get_kernel(kernel_name);
180 
181  std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
182 
183  if (flip) source.append("#define FLIP\n");
184  if (overwrite) source.append("#define OVERWRITE\n");
185 
186  source.append("#define RADIUS_X " + std::to_string(radius_x) + "\n");
187  source.append("#define RADIUS_Y " + std::to_string(radius_y) + "\n");
188 
189  source.append(
190  R"(
191  #define W_WIDTH (2*RADIUS_X+1)
192  #define W_HEIGHT (2*RADIUS_Y+1)
193 
194  #define X_LOCAL_WIDTH (WORK_GROUP_SIZE_2D+2*RADIUS_X)
195  #define X_LOCAL_HEIGHT (WORK_GROUP_SIZE_2D+2*RADIUS_Y)
196 
197  inline DATATYPE readX(read_only __global DATATYPE* X, int x, int y,
198  int X_width, int X_height, int X_offset)
199  {
200  if (x>=0 && y>=0 && x<X_width && y<X_height)
201  return X[y + x*X_height + X_offset];
202  else
203  return 0;
204  }
205 
206  __kernel void KERNEL_NAME(
207  read_only __global DATATYPE* X, int X_width, int X_height, int X_offset,
208  __constant DATATYPE* W, int W_offset,
209  __global DATATYPE* Y, int Y_offset)
210  {
211  __local DATATYPE X_local[X_LOCAL_WIDTH][X_LOCAL_HEIGHT];
212 
213  int x = get_global_id(0);
214  int y = get_global_id(1);
215 
216  int xl = get_local_id(0);
217  int yl = get_local_id(1);
218 
219  if (xl==WORK_GROUP_SIZE_2D-1 && yl == WORK_GROUP_SIZE_2D-1)
220  {
221  for (int rx=0; rx<=2*RADIUS_X; rx++)
222  for (int ry=0; ry<=2*RADIUS_Y; ry++)
223  X_local[xl+rx][yl+ry] = readX(X, x-RADIUS_X+rx, y-RADIUS_Y+ry, X_width, X_height, X_offset);
224  }
225  else if (xl==WORK_GROUP_SIZE_2D-1)
226  {
227  for (int rx=0; rx<=2*RADIUS_X; rx++)
228  X_local[xl+rx][yl] = readX(X, x-RADIUS_X+rx, y-RADIUS_Y, X_width, X_height, X_offset);
229  }
230  else if (yl == WORK_GROUP_SIZE_2D-1)
231  {
232  for (int ry=0; ry<=2*RADIUS_Y; ry++)
233  X_local[xl][yl+ry] = readX(X, x-RADIUS_X, y-RADIUS_Y+ry, X_width, X_height, X_offset);
234  }
235  else
236  X_local[xl][yl] = readX(X, x-RADIUS_X, y-RADIUS_Y, X_width, X_height, X_offset);
237 
238  barrier(CLK_LOCAL_MEM_FENCE);
239 
240  if (x>=X_width || y>=X_height)
241  return;
242 
243  DATATYPE sum = 0;
244  for (int x1=0; x1<W_WIDTH; x1++)
245  {
246  #ifdef FLIP
247  int wx = x1*W_HEIGHT+W_offset;
248  #else
249  int wx = (2*RADIUS_X-x1)*W_HEIGHT+W_offset;
250  #endif
251  int inx = x1+xl;
252  for (int y1=0; y1<W_HEIGHT; y1++)
253  {
254  int iny = y1+yl;
255  #ifdef FLIP
256  sum += W[y1+wx]*X_local[inx][iny];
257  #else
258  sum += W[2*RADIUS_Y-y1+wx]*X_local[inx][iny];
259  #endif
260  }
261  }
262  #ifdef OVERWRITE
263  Y[y+X_height*x + Y_offset] = sum;
264  #else
265  Y[y+X_height*x + Y_offset] += sum;
266  #endif
267  }
268  )"
269  );
270 
271  viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
272 
273  kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_2D);
274  kernel.local_work_size(1, OCL_WORK_GROUP_SIZE_2D);
275 
276  return kernel;
277  }
278 
280  template <class T>
281  static viennacl::ocl::kernel& generate_kernel_arbitrary_stride(
282  int32_t radius_x, int32_t radius_y, bool flip, bool overwrite)
283  {
284  std::string kernel_name =
285  "convolve_arbitrary_stride_" + ocl::get_type_string<T>() + "_" +
286  std::to_string(radius_x) + "_" + std::to_string(radius_y);
287 
288  if (flip) kernel_name.append("_flip");
289  if (overwrite) kernel_name.append("_overwrite");
290 
291  if (ocl::kernel_exists(kernel_name))
292  return ocl::get_kernel(kernel_name);
293 
294  std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
295 
296  if (flip) source.append("#define FLIP\n");
297  if (overwrite) source.append("#define OVERWRITE\n");
298 
299  source.append("#define RADIUS_X " + std::to_string(radius_x) + "\n");
300  source.append("#define RADIUS_Y " + std::to_string(radius_y) + "\n");
301 
302  source.append(
303  R"(
304  #define W_WIDTH (2*RADIUS_X+1)
305  #define W_HEIGHT (2*RADIUS_Y+1)
306 
307  #define X_LOCAL_WIDTH (WORK_GROUP_SIZE_2D+2*RADIUS_X)
308  #define X_LOCAL_HEIGHT (WORK_GROUP_SIZE_2D+2*RADIUS_Y)
309 
310  __kernel void KERNEL_NAME(
311  read_only __global DATATYPE* X, int X_width, int X_height, int X_offset,
312  __constant DATATYPE* W, int W_offset,
313  __global DATATYPE* Y, int Y_offset,
314  int stride_x, int stride_y)
315  {
316  __local DATATYPE X_local[WORK_GROUP_SIZE_2D][WORK_GROUP_SIZE_2D];
317 
318  int x = get_global_id(0)*stride_x;
319  int y = get_global_id(1)*stride_y;
320 
321  int Y_width = X_width/stride_x;
322  int Y_height = X_height/stride_y;
323 
324  if (get_global_id(0)>=Y_width || get_global_id(1)>=Y_height)
325  return;
326 
327  DATATYPE sum = 0;
328  for (int x1=0; x1<W_WIDTH; x1++)
329  {
330  #ifdef FLIP
331  int wx = x1*W_HEIGHT+W_offset;
332  #else
333  int wx = (2*RADIUS_X-x1)*W_HEIGHT+W_offset;
334  #endif
335  int inx = x1+x-RADIUS_X;
336  for (int y1=0; y1<W_HEIGHT; y1++)
337  {
338  int iny = y1+y-RADIUS_Y;
339  if (inx>=0 && iny>=0 && inx<X_width && iny<X_height)
340  {
341  #ifdef FLIP
342  sum += W[y1+wx]*X[iny+inx*X_height+X_offset];
343  #else
344  sum += W[2*RADIUS_Y-y1+wx]*X[iny+inx*X_height+X_offset];
345  #endif
346  }
347  }
348  }
349  #ifdef OVERWRITE
350  Y[get_global_id(1)+Y_height*get_global_id(0) + Y_offset] = sum;
351  #else
352  Y[get_global_id(1)+Y_height*get_global_id(0) + Y_offset] += sum;
353  #endif
354  }
355  )"
356  );
357 
358  viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
359 
360  kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_2D);
361  kernel.local_work_size(1, OCL_WORK_GROUP_SIZE_2D);
362 
363  return kernel;
364  }
365 
382  static void compute(CGPUMatrix<T> X, CGPUMatrix<T> W, CGPUMatrix<T> Y, bool flip ,
383  bool overwrite, int32_t stride_x, int32_t stride_y)
384  {
385  if (stride_x==1 && stride_y==1)
386  {
387  viennacl::ocl::kernel& kernel = generate_kernel_unity_stride<T>(
388  (W.num_cols-1)/2, (W.num_rows-1)/2, flip, overwrite);
389 
390  kernel.global_work_size(0, ocl::align_to_multiple_2d(Y.num_cols));
391  kernel.global_work_size(1, ocl::align_to_multiple_2d(Y.num_rows));
392 
393  viennacl::ocl::enqueue(kernel(
394  X.vcl_matrix(), cl_int(X.num_cols), cl_int(X.num_rows), cl_int(X.offset),
395  W.vcl_matrix(), cl_int(W.offset),
396  Y.vcl_matrix(), cl_int(Y.offset)));
397  }
398  else
399  {
400  viennacl::ocl::kernel& kernel = generate_kernel_arbitrary_stride<T>(
401  (W.num_cols-1)/2, (W.num_rows-1)/2, flip, overwrite);
402 
403  kernel.global_work_size(0, ocl::align_to_multiple_2d(Y.num_cols));
404  kernel.global_work_size(1, ocl::align_to_multiple_2d(Y.num_rows));
405 
406  viennacl::ocl::enqueue(kernel(
407  X.vcl_matrix(), cl_int(X.num_cols), cl_int(X.num_rows), cl_int(X.offset),
408  W.vcl_matrix(), cl_int(W.offset),
409  Y.vcl_matrix(), cl_int(Y.offset),
410  cl_int(stride_x), cl_int(stride_y)));
411  }
412  }
413 };
414 
415 #endif // HAVE_VIENNACL
416 
417 }
418 
419 }
420 
421 }
422 #endif // CONVOLVE_H_
Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > MatrixXt
Definition: Convolve.h:97
static void compute(SGMatrix< T > X, SGMatrix< T > W, SGMatrix< T > Y, bool flip, bool overwrite, int32_t stride_x, int32_t stride_y)
Definition: Convolve.h:115
index_t num_cols
Definition: SGMatrix.h:378
index_t num_rows
Definition: SGMatrix.h:376
Generic class sum which provides a static compute method. This class is specialized for different typ...
Definition: Sum.h:71
shogun matrix
static void compute(Matrix X, Matrix W, Matrix Y, bool flip, bool overwrite, int32_t stride_x, int32_t stride_y)
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18

SHOGUN 机器学习工具包 - 项目文档