Fix ocl::filter2D.
In current implementation, this function only works when anchor point is in the kernel center and kernel size supported is either 3x3 or 5x5.
This commit is contained in:
		| @@ -689,6 +689,8 @@ namespace cv | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         //! applies non-separable 2D linear filter to the image |         //! applies non-separable 2D linear filter to the image | ||||||
|  |         //  Note, at the moment this function only works when anchor point is in the kernel center | ||||||
|  |         //  and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result | ||||||
|         CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel, |         CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel, | ||||||
|                                  Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); |                                  Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -645,7 +645,11 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel, | |||||||
|     args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols)); |     args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols)); | ||||||
|     args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows)); |     args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows)); | ||||||
|  |  | ||||||
|     openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth); |     const int buffer_size = 100; | ||||||
|  |     char opt_buffer [buffer_size] = ""; | ||||||
|  |     sprintf(opt_buffer, "-DANCHOR=%d -DANX=%d -DANY=%d", ksize.width, anchor.x, anchor.y); | ||||||
|  |  | ||||||
|  |     openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth, opt_buffer); | ||||||
| } | } | ||||||
| Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize, | Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize, | ||||||
|         Point anchor, int borderType) |         Point anchor, int borderType) | ||||||
| @@ -656,7 +660,7 @@ Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const | |||||||
|  |  | ||||||
|     oclMat gpu_krnl; |     oclMat gpu_krnl; | ||||||
|     int nDivisor; |     int nDivisor; | ||||||
|     normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true); |     normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, false); | ||||||
|     normalizeAnchor(anchor, ksize); |     normalizeAnchor(anchor, ksize); | ||||||
|  |  | ||||||
|     return Ptr<BaseFilter_GPU>(new LinearFilter_GPU(ksize, anchor, gpu_krnl, GPUFilter2D_callers[CV_MAT_CN(srcType)], |     return Ptr<BaseFilter_GPU>(new LinearFilter_GPU(ksize, anchor, gpu_krnl, GPUFilter2D_callers[CV_MAT_CN(srcType)], | ||||||
| @@ -1172,7 +1176,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel | |||||||
|     args.push_back(make_pair(sizeof(cl_int), (void *)&ridusy)); |     args.push_back(make_pair(sizeof(cl_int), (void *)&ridusy)); | ||||||
|     args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data)); |     args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data)); | ||||||
|  |  | ||||||
|     openCLExecuteKernel2(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option, CLFLUSH); |     openCLExecuteKernel(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option); | ||||||
| } | } | ||||||
|  |  | ||||||
| Ptr<BaseRowFilter_GPU> cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype) | Ptr<BaseRowFilter_GPU> cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype) | ||||||
|   | |||||||
| @@ -82,9 +82,9 @@ | |||||||
| ////////////////////////////////////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| /////////////////////////////Macro for define elements number per thread///////////////////////////// | /////////////////////////////Macro for define elements number per thread///////////////////////////// | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| #define ANCHOR                  3 | //#define ANCHOR                  3 | ||||||
| #define ANX                     1 | //#define ANX                     1 | ||||||
| #define ANY                     1 | //#define ANY                     1 | ||||||
|  |  | ||||||
| #define ROWS_PER_GROUP          4 | #define ROWS_PER_GROUP          4 | ||||||
| #define ROWS_PER_GROUP_BITS     2 | #define ROWS_PER_GROUP_BITS     2 | ||||||
| @@ -185,7 +185,7 @@ __kernel void filter2D_C1_D0(__global uchar *src, int src_step, int src_offset_x | |||||||
|  |  | ||||||
|         for(int i = 0; i < ANCHOR; i++) |         for(int i = 0; i < ANCHOR; i++) | ||||||
|         { |         { | ||||||
| #pragma unroll 3 | #pragma unroll | ||||||
|             for(int j = 0; j < ANCHOR; j++) |             for(int j = 0; j < ANCHOR; j++) | ||||||
|             { |             { | ||||||
|                 if(dst_rows_index < dst_rows_end) |                 if(dst_rows_index < dst_rows_end) | ||||||
| @@ -295,7 +295,7 @@ __kernel void filter2D_C1_D5(__global float *src, int src_step, int src_offset_x | |||||||
|  |  | ||||||
|         for(int i = 0; i < ANCHOR; i++) |         for(int i = 0; i < ANCHOR; i++) | ||||||
|         { |         { | ||||||
| #pragma unroll 3 | #pragma unroll | ||||||
|             for(int j = 0; j < ANCHOR; j++) |             for(int j = 0; j < ANCHOR; j++) | ||||||
|             { |             { | ||||||
|                 if(dst_rows_index < dst_rows_end) |                 if(dst_rows_index < dst_rows_end) | ||||||
| @@ -410,7 +410,7 @@ __kernel void filter2D_C4_D0(__global uchar4 *src, int src_step, int src_offset_ | |||||||
|  |  | ||||||
|         for(int i = 0; i < ANCHOR; i++) |         for(int i = 0; i < ANCHOR; i++) | ||||||
|         { |         { | ||||||
| #pragma unroll 3 | #pragma unroll | ||||||
|             for(int j = 0; j < ANCHOR; j++) |             for(int j = 0; j < ANCHOR; j++) | ||||||
|             { |             { | ||||||
|                 if(dst_rows_index < dst_rows_end) |                 if(dst_rows_index < dst_rows_end) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 peng xiao
					peng xiao