From 10a52220f0ff40de659c124d7ba0ebef42a25da1 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Mon, 31 Mar 2014 16:45:15 +0400 Subject: [PATCH] Added integer arithmetic to sepFilter2D --- modules/imgproc/src/filter.cpp | 72 +++++++++++++++------- modules/imgproc/src/opencl/filterSepCol.cl | 7 ++- modules/imgproc/src/opencl/filterSepRow.cl | 3 +- modules/imgproc/src/smooth.cpp | 12 ++-- modules/imgproc/test/ocl/test_filters.cpp | 13 ++-- 5 files changed, 73 insertions(+), 34 deletions(-) diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index 2bc6b8a70..d81f8affb 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -3275,6 +3275,7 @@ static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX int type = src.type(), cn = CV_MAT_CN(type), sdepth = CV_MAT_DEPTH(type); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; Size bufSize = buf.size(); + int buf_type = buf.type(), bdepth = CV_MAT_DEPTH(buf_type); if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) return false; @@ -3306,11 +3307,11 @@ static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX radiusX, (int)localsize[0], (int)localsize[1], cn, btype, extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION", isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED", - ocl::typeToStr(type), ocl::typeToStr(CV_32FC(cn)), - ocl::convertTypeStr(sdepth, CV_32F, cn, cvt), - ocl::typeToStr(sdepth), ocl::typeToStr(CV_32F), + ocl::typeToStr(type), ocl::typeToStr(buf_type), + ocl::convertTypeStr(sdepth, bdepth, cn, cvt), + ocl::typeToStr(sdepth), ocl::typeToStr(bdepth), doubleSupport ? " -D DOUBLE_SUPPORT" : ""); - build_options += ocl::kernelToStr(kernelX, CV_32F); + build_options += ocl::kernelToStr(kernelX, bdepth); Size srcWholeSize; Point srcOffset; src.locateROI(srcWholeSize, srcOffset); @@ -3337,7 +3338,7 @@ static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX return k.run(2, globalsize, localsize, false); } -static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY, double delta, int anchor) +static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY, double delta, int anchor, int bits) { bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if (dst.depth() == CV_64F && !doubleSupport) @@ -3352,6 +3353,7 @@ static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY int dtype = dst.type(), cn = CV_MAT_CN(dtype), ddepth = CV_MAT_DEPTH(dtype); Size sz = dst.size(); + int buf_type = buf.type(), bdepth = CV_MAT_DEPTH(buf_type); globalsize[1] = DIVUP(sz.height, localsize[1]) * localsize[1]; globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0]; @@ -3359,13 +3361,13 @@ static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY char cvt[40]; cv::String build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d" " -D srcT=%s -D dstT=%s -D convertToDstT=%s" - " -D srcT1=%s -D dstT1=%s%s", + " -D srcT1=%s -D dstT1=%s -D BITS=%d%s", anchor, (int)localsize[0], (int)localsize[1], cn, - ocl::typeToStr(buf.type()), ocl::typeToStr(dtype), - ocl::convertTypeStr(CV_32F, ddepth, cn, cvt), - ocl::typeToStr(CV_32F), ocl::typeToStr(ddepth), - doubleSupport ? " -D DOUBLE_SUPPORT" : ""); - build_options += ocl::kernelToStr(kernelY, CV_32F); + ocl::typeToStr(buf_type), ocl::typeToStr(dtype), + ocl::convertTypeStr(bdepth, ddepth, cn, cvt), + ocl::typeToStr(bdepth), ocl::typeToStr(ddepth), + bits, doubleSupport ? " -D DOUBLE_SUPPORT" : ""); + build_options += ocl::kernelToStr(kernelY, bdepth); ocl::Kernel k("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc, build_options); @@ -3457,13 +3459,13 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, if (ddepth < 0) ddepth = sdepth; - CV_OCL_RUN_(kernelY.cols <= 21 && kernelX.cols <= 21 && - imgSize.width > optimizedSepFilterLocalSize + (kernelX.cols >> 1) && - imgSize.height > optimizedSepFilterLocalSize + (kernelY.cols >> 1) && - (!(borderType & BORDER_ISOLATED) || _src.offset() == 0) && anchor == Point(-1, -1) && - (d.isIntel() || (d.isAMD() && !d.hostUnifiedMemory())), - ocl_sepFilter2D_SinglePass(_src, _dst, kernelX, kernelY, delta, - borderType & ~BORDER_ISOLATED, ddepth), true) + //CV_OCL_RUN_(kernelY.cols <= 21 && kernelX.cols <= 21 && + // imgSize.width > optimizedSepFilterLocalSize + (kernelX.cols >> 1) && + // imgSize.height > optimizedSepFilterLocalSize + (kernelY.cols >> 1) && + // (!(borderType & BORDER_ISOLATED) || _src.offset() == 0) && anchor == Point(-1, -1) && + // (d.isIntel() || (d.isAMD() && !d.hostUnifiedMemory())), + // ocl_sepFilter2D_SinglePass(_src, _dst, kernelX, kernelY, delta, + // borderType & ~BORDER_ISOLATED, ddepth), true) if (anchor.x < 0) anchor.x = kernelX.cols >> 1; @@ -3474,19 +3476,45 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, Size srcWholeSize; Point srcOffset; src.locateROI(srcWholeSize, srcOffset); - bool fast8uc1 = type == CV_8UC1 && srcOffset.x % 4 == 0 && - src.cols % 4 == 0 && src.step % 4 == 0; + //bool fast8uc1 = type == CV_8UC1 && srcOffset.x % 4 == 0 && + // src.cols % 4 == 0 && src.step % 4 == 0; + bool fast8uc1 = false; + + int rtype = getKernelType(kernelX, + kernelX.rows == 1 ? Point(anchor.x, 0) : Point(0, anchor.x)); + int ctype = getKernelType(kernelY, + kernelY.rows == 1 ? Point(anchor.y, 0) : Point(0, anchor.y)); + + int bdepth = CV_32F; + int bits = 0; + + if( sdepth == CV_8U && + ((rtype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL && + ctype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL && + ddepth == CV_8U))) + { + bdepth = CV_32S; + bits = 8; + _kernelX.getMat().convertTo( kernelX, CV_32S, 1 << bits ); + _kernelY.getMat().convertTo( kernelY, CV_32S, 1 << bits ); + kernelX = kernelX.reshape(1,1); + kernelY = kernelY.reshape(1,1); + bits *= 2; + delta *= (1 << bits); + } Size srcSize = src.size(); Size bufSize(srcSize.width, srcSize.height + kernelY.cols - 1); - UMat buf(bufSize, CV_32FC(cn)); + UMat buf(bufSize, CV_MAKETYPE(bdepth, cn)); if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, ddepth, fast8uc1)) return false; + Mat buffer = buf.getMat(ACCESS_READ); + _dst.create(srcSize, CV_MAKETYPE(ddepth, cn)); UMat dst = _dst.getUMat(); - return ocl_sepColFilter2D(buf, dst, kernelY, delta, anchor.y); + return ocl_sepColFilter2D(buf, dst, kernelY, delta, anchor.y, bits); } #endif diff --git a/modules/imgproc/src/opencl/filterSepCol.cl b/modules/imgproc/src/opencl/filterSepCol.cl index 29514cc21..94730d878 100644 --- a/modules/imgproc/src/opencl/filterSepCol.cl +++ b/modules/imgproc/src/opencl/filterSepCol.cl @@ -60,7 +60,7 @@ #endif #define DIG(a) a, -__constant float mat_kernel[] = { COEFF }; +__constant srcT1 mat_kernel[] = { COEFF }; __kernel void col_filter(__global const uchar * src, int src_step, int src_offset, int src_whole_rows, int src_whole_cols, __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, float delta) @@ -97,8 +97,13 @@ __kernel void col_filter(__global const uchar * src, int src_step, int src_offse temp[0] = LDS_DAT[l_y + RADIUSY - i][l_x]; temp[1] = LDS_DAT[l_y + RADIUSY + i][l_x]; sum += mad(temp[0], mat_kernel[RADIUSY - i], temp[1] * mat_kernel[RADIUSY + i]); + //sum += temp[0]*mat_kernel[RADIUSY - i] + temp[1] * mat_kernel[RADIUSY + i]; } +#if BITS > 0 + sum = sum >> BITS; +#endif + // write the result to dst if (x < dst_cols && y < dst_rows) { diff --git a/modules/imgproc/src/opencl/filterSepRow.cl b/modules/imgproc/src/opencl/filterSepRow.cl index 726de448e..8deec35ae 100644 --- a/modules/imgproc/src/opencl/filterSepRow.cl +++ b/modules/imgproc/src/opencl/filterSepRow.cl @@ -138,7 +138,7 @@ #endif #define DIG(a) a, -__constant float mat_kernel[] = { COEFF }; +__constant dstT1 mat_kernel[] = { COEFF }; __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel, int src_offset_x, int src_offset_y, int src_cols, int src_rows, int src_whole_cols, int src_whole_rows, @@ -356,6 +356,7 @@ __kernel void row_filter(__global const uchar * src, int src_step, int src_offse temp[0] = LDS_DAT[l_y][l_x + RADIUSX - i]; temp[1] = LDS_DAT[l_y][l_x + RADIUSX + i]; sum += mad(convertToDstT(temp[0]), mat_kernel[RADIUSX - i], convertToDstT(temp[1]) * mat_kernel[RADIUSX + i]); + //sum += convertToDstT(temp[0])*mat_kernel[RADIUSX - i] + convertToDstT(temp[1]) * mat_kernel[RADIUSX + i]; } // write the result to dst diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index 864fec797..e2365cd20 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -1196,12 +1196,12 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize, } #endif - if (type == CV_8U) - { - CV_OCL_RUN_(_dst.isUMat() && _src.dims() <= 2 && - (!(borderType & BORDER_ISOLATED) || _src.offset() == 0), - GaussianBlur_8u(_src, _dst, ksize, sigma1, sigma2, borderType)) - } + //if (type == CV_8U) + //{ + // CV_OCL_RUN_(_dst.isUMat() && _src.dims() <= 2 && + // (!(borderType & BORDER_ISOLATED) || _src.offset() == 0), + // GaussianBlur_8u(_src, _dst, ksize, sigma1, sigma2, borderType)) + //} Mat kx, ky; createGaussianKernels(kx, ky, type, ksize, sigma1, sigma2); diff --git a/modules/imgproc/test/ocl/test_filters.cpp b/modules/imgproc/test/ocl/test_filters.cpp index a43a7712f..aee1f08b1 100644 --- a/modules/imgproc/test/ocl/test_filters.cpp +++ b/modules/imgproc/test/ocl/test_filters.cpp @@ -209,7 +209,7 @@ typedef FilterTestBase GaussianBlurTest; OCL_TEST_P(GaussianBlurTest, Mat) { - for (int j = 0; j < test_loop_times; j++) + for (int j = 0; j < test_loop_times + 100; j++) { random_roi(); @@ -222,7 +222,8 @@ OCL_TEST_P(GaussianBlurTest, Mat) if (checkNorm2(dst_roi, udst_roi) > 2 && CV_MAT_DEPTH(type) == CV_8U) { - Mat udst = udst_roi.getMat(ACCESS_READ); + std::cout << "i = " << j << std::endl; + Mat uudst = udst_roi.getMat(ACCESS_READ); Mat diff; absdiff(dst_roi, udst, diff); int nonZero = countNonZero(diff); @@ -231,11 +232,15 @@ OCL_TEST_P(GaussianBlurTest, Mat) minMaxLoc(diff, (double*)0, &max, (Point*) 0, &maxn); uchar a = dst_roi.at(maxn); - uchar b = udst.at(maxn); + uchar b = uudst.at(maxn); + std::cout << "dst_roi" << dst_roi << std::endl; + std::cout << "udst_roi" << uudst << std::endl; } - Near(CV_MAT_DEPTH(type) == CV_8U ? 2 : 5e-5, false); + + + Near(CV_MAT_DEPTH(type) == CV_8U ? 1 : 5e-5, false); } }