diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index 850a2e60e..4d63e3f00 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -575,6 +575,7 @@ protected: CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf); CV_EXPORTS const char* typeToStr(int t); CV_EXPORTS const char* memopTypeToStr(int t); +CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1); CV_EXPORTS void getPlatfomsInfo(std::vector& platform_info); class CV_EXPORTS Image2D diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index d4ed3bd9b..25b8a7b54 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -3319,7 +3319,7 @@ public: CV_Assert(u->handle != 0 && u->urefcount == 0); if(u->tempUMat()) { - UMatDataAutoLock lock(u); +// UMatDataAutoLock lock(u); if( u->hostCopyObsolete() && u->refcount > 0 ) { cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); @@ -3832,6 +3832,58 @@ const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf) return buf; } +template +static std::string kerToStr(const Mat & k) +{ + int width = k.cols - 1, depth = k.depth(); + const T * const data = reinterpret_cast(k.data); + + std::ostringstream stream; + stream.precision(10); + + if (depth <= CV_8S) + { + for (int i = 0; i < width; ++i) + stream << "DIG(" << (int)data[i] << ")"; + stream << "DIG(" << (int)data[width] << ")"; + } + else if (depth == CV_32F) + { + stream.setf(std::ios_base::showpoint); + for (int i = 0; i < width; ++i) + stream << "DIG(" << data[i] << "f)"; + stream << "DIG(" << data[width] << "f)"; + } + else + { + for (int i = 0; i < width; ++i) + stream << "DIG(" << data[i] << ")"; + stream << "DIG(" << data[width] << ")"; + } + + return stream.str(); +} + +String kernelToStr(InputArray _kernel, int ddepth) +{ + Mat kernel = _kernel.getMat().reshape(1, 1); + + int depth = kernel.depth(); + if (ddepth < 0) + ddepth = depth; + + if (ddepth != depth) + kernel.convertTo(kernel, ddepth); + + typedef std::string (*func_t)(const Mat &); + static const func_t funcs[] = { kerToStr, kerToStr, kerToStr,kerToStr, + kerToStr, kerToStr, kerToStr, 0 }; + const func_t func = funcs[depth]; + CV_Assert(func != 0); + + return cv::format(" -D COEFF=%s", func(kernel).c_str()); +} + /////////////////////////////////////////////////////////////////////////////////////////////// // deviceVersion has format // OpenCL diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index 8c11c62db..09519e74d 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -3378,6 +3378,7 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, btype, extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION", isIsolatedBorder ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED"); + build_options += ocl::kernelToStr(kernelX, CV_32F); Size srcWholeSize; Point srcOffset; src.locateROI(srcWholeSize, srcOffset); @@ -3390,7 +3391,8 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, strKernel << "_D" << sdepth; ocl::Kernel kernelRow; - if (!kernelRow.create(strKernel.str().c_str(), cv::ocl::imgproc::filterSepRow_oclsrc, build_options)) + if (!kernelRow.create(strKernel.str().c_str(), cv::ocl::imgproc::filterSepRow_oclsrc, + build_options)) return false; int idxArg = 0; @@ -3409,7 +3411,6 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, idxArg = kernelRow.set(idxArg, buf.cols); idxArg = kernelRow.set(idxArg, buf.rows); idxArg = kernelRow.set(idxArg, radiusY); - idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelX.getUMat(ACCESS_READ))); return kernelRow.run(2, globalsize, localsize, sync); } @@ -3479,6 +3480,8 @@ static bool ocl_sepColFilter2D(UMat &buf, UMat &dst, Mat &kernelY, int anchor, b } } + build_options += ocl::kernelToStr(kernelY, CV_32F); + ocl::Kernel kernelCol; if (!kernelCol.create("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc, build_options)) return false; @@ -3494,7 +3497,6 @@ static bool ocl_sepColFilter2D(UMat &buf, UMat &dst, Mat &kernelY, int anchor, b idxArg = kernelCol.set(idxArg, (int)(dst.step / dst.elemSize())); idxArg = kernelCol.set(idxArg, dst.cols); idxArg = kernelCol.set(idxArg, dst.rows); - idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelY.getUMat(ACCESS_READ))); return kernelCol.run(2, globalsize, localsize, sync); } @@ -3508,7 +3510,7 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, int type = _src.type(); if ( !( (CV_8UC1 == type || CV_8UC4 == type || CV_32FC1 == type || CV_32FC4 == type) && - (ddepth == CV_32F || ddepth == CV_8U) ) ) + (ddepth == CV_32F || ddepth == CV_8U || ddepth < 0) ) ) return false; int cn = CV_MAT_CN(type); @@ -3541,12 +3543,12 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, Size srcSize = src.size(); Size bufSize(srcSize.width, srcSize.height + kernelY.cols - 1); UMat buf; buf.create(bufSize, CV_MAKETYPE(CV_32F, cn)); - if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, true)) + if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, false)) return false; _dst.create(srcSize, CV_MAKETYPE(ddepth, cn)); UMat dst = _dst.getUMat(); - return ocl_sepColFilter2D(buf, dst, kernelY, anchor.y, true); + return ocl_sepColFilter2D(buf, dst, kernelY, anchor.y, false); } #endif diff --git a/modules/imgproc/src/opencl/filterSepCol.cl b/modules/imgproc/src/opencl/filterSepCol.cl index e99fa6ee0..2657ae931 100644 --- a/modules/imgproc/src/opencl/filterSepCol.cl +++ b/modules/imgproc/src/opencl/filterSepCol.cl @@ -60,6 +60,8 @@ Niko The info above maybe obsolete. ***********************************************************************************/ +#define DIG(a) a, +__constant float mat_kernel[] = { COEFF }; __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter (__global const GENTYPE_SRC * restrict src, @@ -70,8 +72,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter const int dst_offset_in_pixel, const int dst_step_in_pixel, const int dst_cols, - const int dst_rows, - __constant float * mat_kernel) + const int dst_rows) { int x = get_global_id(0); int y = get_global_id(1); diff --git a/modules/imgproc/src/opencl/filterSepRow.cl b/modules/imgproc/src/opencl/filterSepRow.cl index dfbf30099..d0623f590 100644 --- a/modules/imgproc/src/opencl/filterSepRow.cl +++ b/modules/imgproc/src/opencl/filterSepRow.cl @@ -144,6 +144,9 @@ Niko The info above maybe obsolete. ***********************************************************************************/ +#define DIG(a) a, +__constant float mat_kernel[] = { COEFF }; + __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C1_D0 (__global uchar * restrict src, int src_step_in_pixel, @@ -153,8 +156,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_ __global float * dst, int dst_step_in_pixel, int dst_cols, int dst_rows, - int radiusy, - __constant float * mat_kernel) + int radiusy) { int x = get_global_id(0)<<2; int y = get_global_id(1); @@ -297,8 +299,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_ __global float4 * dst, int dst_step_in_pixel, int dst_cols, int dst_rows, - int radiusy, - __constant float * mat_kernel) + int radiusy) { int x = get_global_id(0); int y = get_global_id(1); @@ -391,8 +392,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_ __global float * dst, int dst_step_in_pixel, int dst_cols, int dst_rows, - int radiusy, - __constant float * mat_kernel) + int radiusy) { int x = get_global_id(0); int y = get_global_id(1); @@ -484,8 +484,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_ __global float4 * dst, int dst_step_in_pixel, int dst_cols, int dst_rows, - int radiusy, - __constant float * mat_kernel) + int radiusy) { int x = get_global_id(0); int y = get_global_id(1);