From f7d6d3cff59a21c8ba1d732df0888438792a1e1c Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 24 Mar 2014 23:07:00 +0400 Subject: [PATCH] improved cv::filter2D --- modules/core/src/system.cpp | 25 ++++++++++++------------- modules/imgproc/src/filter.cpp | 12 ++++++------ modules/imgproc/src/opencl/filter2D.cl | 5 ++++- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index d8d8ae632..1e6f592d5 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -414,24 +414,23 @@ const String& getBuildInformation() String format( const char* fmt, ... ) { - char buf[1024]; + AutoBuffer buf; - va_list va; - va_start(va, fmt); - int len = vsnprintf(buf, sizeof(buf), fmt, va); - va_end(va); - - if (len >= (int)sizeof(buf)) + for ( ; ; ) { - String s(len, '\0'); + va_list va; va_start(va, fmt); - len = vsnprintf((char*)s.c_str(), len + 1, fmt, va); - (void)len; + int bsize = static_cast(buf.size()), + len = vsnprintf((char *)buf, bsize, fmt, va); va_end(va); - return s; - } - return String(buf, len); + if (len < 0 || len >= bsize) + { + buf.resize(std::max(bsize << 1, len + 1)); + continue; + } + return String((char *)buf, len); + } } String tempfile( const char* suffix ) diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index ee89b2c49..2bc6b8a70 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -42,7 +42,6 @@ #include "precomp.hpp" #include "opencl_kernels.hpp" -#include /****************************************************************************************\ Base Image Filter @@ -3197,6 +3196,8 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth, size_t tryWorkItems = maxWorkItemSizes[0]; char cvt[2][40]; + String kerStr = ocl::kernelToStr(kernelMatDataFloat, CV_32F); + for ( ; ; ) { size_t BLOCK_SIZE = tryWorkItems; @@ -3226,14 +3227,14 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth, String opts = format("-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D cn=%d " "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d " - "-D KERNEL_SIZE_Y2_ALIGNED=%d -D %s -D %s -D %s%s " + "-D KERNEL_SIZE_Y2_ALIGNED=%d -D %s -D %s -D %s%s%s " "-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s " "-D convertToWT=%s -D convertToDstT=%s", (int)BLOCK_SIZE, (int)BLOCK_SIZE_Y, cn, anchor.x, anchor.y, ksize.width, ksize.height, kernel_size_y2_aligned, borderMap[borderType], extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION", isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED", - doubleSupport ? " -D DOUBLE_SUPPORT" : "", + doubleSupport ? " -D DOUBLE_SUPPORT" : "", kerStr.c_str(), ocl::typeToStr(type), ocl::typeToStr(sdepth), ocl::typeToStr(dtype), ocl::typeToStr(ddepth), ocl::typeToStr(wtype), ocl::typeToStr(wdepth), ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), @@ -3255,7 +3256,7 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth, } _dst.create(sz, dtype); - UMat dst = _dst.getUMat(), kernalDataUMat(kernelMatDataFloat, true); + UMat dst = _dst.getUMat(); int srcOffsetX = (int)((src.offset % src.step) / src.elemSize()); int srcOffsetY = (int)(src.offset / src.step); @@ -3263,8 +3264,7 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth, int srcEndY = (isolated ? (srcOffsetY + sz.height) : wholeSize.height); k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, srcOffsetX, srcOffsetY, - srcEndX, srcEndY, ocl::KernelArg::WriteOnly(dst), - ocl::KernelArg::PtrReadOnly(kernalDataUMat), (float)delta); + srcEndX, srcEndY, ocl::KernelArg::WriteOnly(dst), (float)delta); return k.run(2, globalsize, localsize, false); } diff --git a/modules/imgproc/src/opencl/filter2D.cl b/modules/imgproc/src/opencl/filter2D.cl index cfce26a6f..49657181f 100644 --- a/modules/imgproc/src/opencl/filter2D.cl +++ b/modules/imgproc/src/opencl/filter2D.cl @@ -200,8 +200,11 @@ inline WT readSrcPixel(int2 pos, __global const uchar * srcptr, int src_step, co } } +#define DIG(a) a, +__constant WT1 kernelData[] = { COEFF }; + __kernel void filter2D(__global const uchar * srcptr, int src_step, int srcOffsetX, int srcOffsetY, int srcEndX, int srcEndY, - __global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols, __constant WT1 * kernelData, float delta) + __global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols, float delta) { const struct RectCoords srcCoords = { srcOffsetX, srcOffsetY, srcEndX, srcEndY }; // for non-isolated border: offsetX, offsetY, wholeX, wholeY