From 1eedc6c42aaf7b5ac5cd5edca1b0a0367c7eb3f6 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 29 Apr 2013 11:51:47 +0400 Subject: [PATCH] refactored Linear Filter --- .../gpufilters/include/opencv2/gpufilters.hpp | 41 ++- modules/gpufilters/perf/perf_filters.cpp | 81 +++-- modules/gpufilters/src/cuda/filter2d.cu | 189 +++++------ modules/gpufilters/src/filtering.cpp | 319 ++++++++---------- modules/gpufilters/test/test_filters.cpp | 210 ++++++------ samples/gpu/performance/tests.cpp | 5 +- 6 files changed, 416 insertions(+), 429 deletions(-) diff --git a/modules/gpufilters/include/opencv2/gpufilters.hpp b/modules/gpufilters/include/opencv2/gpufilters.hpp index 5cc2ac49a..32d3403d5 100644 --- a/modules/gpufilters/include/opencv2/gpufilters.hpp +++ b/modules/gpufilters/include/opencv2/gpufilters.hpp @@ -96,6 +96,34 @@ inline void blur(InputArray src, OutputArray dst, Size ksize, Point anchor, Stre f->apply(src, dst, stream); } +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Linear Filter + +//! non-separable linear 2D filter +CV_EXPORTS Ptr createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor = Point(-1,-1), + int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); + +__OPENCV_GPUFILTERS_DEPR_BEFORE__ void filter2D(InputArray src, OutputArray dst, int ddepth, InputArray kernel, + Point anchor = Point(-1,-1), int borderType = BORDER_DEFAULT, + Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; + +inline void filter2D(InputArray src, OutputArray dst, int ddepth, InputArray kernel, Point anchor, int borderType, Stream& stream) +{ + Ptr f = gpu::createLinearFilter(src.type(), ddepth, kernel, anchor, borderType); + f->apply(src, dst, stream); +} + + + + + + + +//! applies Laplacian operator to the image +//! supports only ksize = 1 and ksize = 3 +CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null()); + + @@ -194,13 +222,7 @@ CV_EXPORTS Ptr createMorphologyFilter_GPU(int op, int type, co CV_EXPORTS Ptr createMorphologyFilter_GPU(int op, int type, const Mat& kernel, GpuMat& buf, const Point& anchor = Point(-1,-1), int iterations = 1); -//! returns 2D filter with the specified kernel -//! supports CV_8U, CV_16U and CV_32F one and four channel image -CV_EXPORTS Ptr getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); -//! returns the non-separable linear filter engine -CV_EXPORTS Ptr createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, - Point anchor = Point(-1,-1), int borderType = BORDER_DEFAULT); //! returns the primitive row filter with the specified kernel. //! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type. @@ -269,9 +291,6 @@ CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null()); -//! applies non-separable 2D linear filter to the image -CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null()); - //! applies separable 2D linear filter to the image CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1); @@ -297,10 +316,6 @@ CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0, int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null()); -//! applies Laplacian operator to the image -//! supports only ksize = 1 and ksize = 3 -CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null()); - }} // namespace cv { namespace gpu { #undef __OPENCV_GPUFILTERS_DEPR_BEFORE__ diff --git a/modules/gpufilters/perf/perf_filters.cpp b/modules/gpufilters/perf/perf_filters.cpp index 35c4a94fb..3d3f58755 100644 --- a/modules/gpufilters/perf/perf_filters.cpp +++ b/modules/gpufilters/perf/perf_filters.cpp @@ -86,6 +86,51 @@ PERF_TEST_P(Sz_Type_KernelSz, Blur, } } +////////////////////////////////////////////////////////////////////// +// Filter2D + +PERF_TEST_P(Sz_Type_KernelSz, Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15))) +{ + declare.time(20.0); + + const cv::Size size = GET_PARAM(0); + const int type = GET_PARAM(1); + const int ksize = GET_PARAM(2); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + cv::Mat kernel(ksize, ksize, CV_32FC1); + declare.in(kernel, WARMUP_RNG); + + if (PERF_RUN_GPU()) + { + const cv::gpu::GpuMat d_src(src); + cv::gpu::GpuMat dst; + + cv::Ptr filter2D = cv::gpu::createLinearFilter(d_src.type(), -1, kernel); + + TEST_CYCLE() filter2D->apply(d_src, dst); + + GPU_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::filter2D(src, dst, -1, kernel); + + CPU_SANITY_CHECK(dst); + } +} + + + + + + + + ////////////////////////////////////////////////////////////////////// // Sobel @@ -330,39 +375,3 @@ PERF_TEST_P(Sz_Type_Op, MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8 CPU_SANITY_CHECK(dst); } } - -////////////////////////////////////////////////////////////////////// -// Filter2D - -PERF_TEST_P(Sz_Type_KernelSz, Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15))) -{ - declare.time(20.0); - - const cv::Size size = GET_PARAM(0); - const int type = GET_PARAM(1); - const int ksize = GET_PARAM(2); - - cv::Mat src(size, type); - declare.in(src, WARMUP_RNG); - - cv::Mat kernel(ksize, ksize, CV_32FC1); - declare.in(kernel, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::filter2D(d_src, dst, -1, kernel); - - GPU_SANITY_CHECK(dst); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::filter2D(src, dst, -1, kernel); - - CPU_SANITY_CHECK(dst); - } -} diff --git a/modules/gpufilters/src/cuda/filter2d.cu b/modules/gpufilters/src/cuda/filter2d.cu index 80c93c54e..4e913124d 100644 --- a/modules/gpufilters/src/cuda/filter2d.cu +++ b/modules/gpufilters/src/cuda/filter2d.cu @@ -48,111 +48,104 @@ namespace cv { namespace gpu { namespace cudev { - namespace imgproc + template + __global__ void filter2D(const SrcPtr src, PtrStepSz dst, + const float* __restrict__ kernel, + const int kWidth, const int kHeight, + const int anchorX, const int anchorY) { - #define FILTER2D_MAX_KERNEL_SIZE 16 + typedef typename TypeVec::cn>::vec_type sum_t; - __constant__ float c_filter2DKernel[FILTER2D_MAX_KERNEL_SIZE * FILTER2D_MAX_KERNEL_SIZE]; + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; - template - __global__ void filter2D(const SrcT src, PtrStepSz dst, const int kWidth, const int kHeight, const int anchorX, const int anchorY) + if (x >= dst.cols || y >= dst.rows) + return; + + sum_t res = VecTraits::all(0); + int kInd = 0; + + for (int i = 0; i < kHeight; ++i) { - typedef typename TypeVec::cn>::vec_type sum_t; - - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= dst.cols || y >= dst.rows) - return; - - sum_t res = VecTraits::all(0); - int kInd = 0; - - for (int i = 0; i < kHeight; ++i) - { - for (int j = 0; j < kWidth; ++j) - res = res + src(y - anchorY + i, x - anchorX + j) * c_filter2DKernel[kInd++]; - } - - dst(y, x) = saturate_cast(res); + for (int j = 0; j < kWidth; ++j) + res = res + src(y - anchorY + i, x - anchorX + j) * kernel[kInd++]; } - template class Brd> struct Filter2DCaller; - - #define IMPLEMENT_FILTER2D_TEX_READER(type) \ - texture< type , cudaTextureType2D, cudaReadModeElementType> tex_filter2D_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \ - struct tex_filter2D_ ## type ## _reader \ - { \ - typedef type elem_type; \ - typedef int index_type; \ - const int xoff; \ - const int yoff; \ - tex_filter2D_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \ - __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \ - { \ - return tex2D(tex_filter2D_ ## type , x + xoff, y + yoff); \ - } \ - }; \ - template class Brd> struct Filter2DCaller< type , D, Brd> \ - { \ - static void call(const PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz dst, \ - int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \ - { \ - typedef typename TypeVec::cn>::vec_type work_type; \ - dim3 block(16, 16); \ - dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \ - bindTexture(&tex_filter2D_ ## type , srcWhole); \ - tex_filter2D_ ## type ##_reader texSrc(xoff, yoff); \ - Brd brd(dst.rows, dst.cols, VecTraits::make(borderValue)); \ - BorderReader< tex_filter2D_ ## type ##_reader, Brd > brdSrc(texSrc, brd); \ - filter2D<<>>(brdSrc, dst, kWidth, kHeight, anchorX, anchorY); \ - cudaSafeCall( cudaGetLastError() ); \ - if (stream == 0) \ - cudaSafeCall( cudaDeviceSynchronize() ); \ - } \ - }; - - IMPLEMENT_FILTER2D_TEX_READER(uchar); - IMPLEMENT_FILTER2D_TEX_READER(uchar4); - - IMPLEMENT_FILTER2D_TEX_READER(ushort); - IMPLEMENT_FILTER2D_TEX_READER(ushort4); - - IMPLEMENT_FILTER2D_TEX_READER(float); - IMPLEMENT_FILTER2D_TEX_READER(float4); - - #undef IMPLEMENT_FILTER2D_TEX_READER - - template - void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, - int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, - int borderMode, const float* borderValue, cudaStream_t stream) - { - typedef void (*func_t)(const PtrStepSz srcWhole, int xoff, int yoff, PtrStepSz dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream); - static const func_t funcs[] = - { - Filter2DCaller::call, - Filter2DCaller::call, - Filter2DCaller::call, - Filter2DCaller::call, - Filter2DCaller::call - }; - - if (stream == 0) - cudaSafeCall( cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); - else - cudaSafeCall( cudaMemcpyToSymbolAsync(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); - - funcs[borderMode](static_cast< PtrStepSz >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream); - } - - template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); - template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); - template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); - template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); - template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); - template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); + dst(y, x) = saturate_cast(res); } + + template class Brd> struct Filter2DCaller; + + #define IMPLEMENT_FILTER2D_TEX_READER(type) \ + texture< type , cudaTextureType2D, cudaReadModeElementType> tex_filter2D_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \ + struct tex_filter2D_ ## type ## _reader \ + { \ + typedef type elem_type; \ + typedef int index_type; \ + const int xoff; \ + const int yoff; \ + tex_filter2D_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \ + __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \ + { \ + return tex2D(tex_filter2D_ ## type , x + xoff, y + yoff); \ + } \ + }; \ + template class Brd> struct Filter2DCaller< type , D, Brd> \ + { \ + static void call(const PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz dst, const float* kernel, \ + int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \ + { \ + typedef typename TypeVec::cn>::vec_type work_type; \ + dim3 block(16, 16); \ + dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \ + bindTexture(&tex_filter2D_ ## type , srcWhole); \ + tex_filter2D_ ## type ##_reader texSrc(xoff, yoff); \ + Brd brd(dst.rows, dst.cols, VecTraits::make(borderValue)); \ + BorderReader< tex_filter2D_ ## type ##_reader, Brd > brdSrc(texSrc, brd); \ + filter2D<<>>(brdSrc, dst, kernel, kWidth, kHeight, anchorX, anchorY); \ + cudaSafeCall( cudaGetLastError() ); \ + if (stream == 0) \ + cudaSafeCall( cudaDeviceSynchronize() ); \ + } \ + }; + + IMPLEMENT_FILTER2D_TEX_READER(uchar); + IMPLEMENT_FILTER2D_TEX_READER(uchar4); + + IMPLEMENT_FILTER2D_TEX_READER(ushort); + IMPLEMENT_FILTER2D_TEX_READER(ushort4); + + IMPLEMENT_FILTER2D_TEX_READER(float); + IMPLEMENT_FILTER2D_TEX_READER(float4); + + #undef IMPLEMENT_FILTER2D_TEX_READER + + template + void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, + int kWidth, int kHeight, int anchorX, int anchorY, + int borderMode, const float* borderValue, cudaStream_t stream) + { + typedef void (*func_t)(const PtrStepSz srcWhole, int xoff, int yoff, PtrStepSz dst, const float* kernel, + int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream); + static const func_t funcs[] = + { + Filter2DCaller::call, + Filter2DCaller::call, + Filter2DCaller::call, + Filter2DCaller::call, + Filter2DCaller::call + }; + + funcs[borderMode]((PtrStepSz) srcWhole, ofsX, ofsY, (PtrStepSz) dst, kernel, + kWidth, kHeight, anchorX, anchorY, borderValue, stream); + } + + template void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); + template void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); + template void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); + template void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); + template void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); + template void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); }}} #endif // CUDA_DISABLER diff --git a/modules/gpufilters/src/filtering.cpp b/modules/gpufilters/src/filtering.cpp index 35df05ec6..3135b599a 100644 --- a/modules/gpufilters/src/filtering.cpp +++ b/modules/gpufilters/src/filtering.cpp @@ -49,6 +49,8 @@ using namespace cv::gpu; Ptr cv::gpu::createBoxFilter(int, int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr(); } +Ptr cv::gpu::createLinearFilter(int, int, InputArray, Point, int, Scalar) { throw_no_cuda(); return Ptr(); } + Ptr cv::gpu::createFilter2D_GPU(const Ptr&, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createSeparableFilter_GPU(const Ptr&, const Ptr&, int, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createSeparableFilter_GPU(const Ptr&, const Ptr&, int, int, int, GpuMat&) { throw_no_cuda(); return Ptr(0); } @@ -57,8 +59,6 @@ Ptr cv::gpu::getColumnSumFilter_GPU(int, int, int, int) { Ptr cv::gpu::getMorphologyFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, const Point&, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, GpuMat&, const Point&, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::getLinearFilter_GPU(int, int, const Mat&, Point, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::createLinearFilter_GPU(int, int, const Mat&, Point, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::getLinearRowFilter_GPU(int, int, const Mat&, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::getLinearColumnFilter_GPU(int, int, const Mat&, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&, int, int) { throw_no_cuda(); return Ptr(0); } @@ -76,7 +76,6 @@ void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_no_ void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, GpuMat&, Point, int, Stream&) { throw_no_cuda(); } void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_no_cuda(); } void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, GpuMat&, GpuMat&, Point, int, Stream&) { throw_no_cuda(); } -void cv::gpu::filter2D(const GpuMat&, GpuMat&, int, const Mat&, Point, int, Stream&) { throw_no_cuda(); } void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point, int, int) { throw_no_cuda(); } void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, GpuMat&, Point, int, int, Stream&) { throw_no_cuda(); } void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double, int, int) { throw_no_cuda(); } @@ -188,6 +187,138 @@ Ptr cv::gpu::createBoxFilter(int srcType, int dstType, Size ksize, Point return new NPPBoxFilter(srcType, dstType, ksize, anchor, borderMode, borderVal); } +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Linear Filter + +namespace cv { namespace gpu { namespace cudev +{ + template + void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, + int kWidth, int kHeight, int anchorX, int anchorY, + int borderMode, const float* borderValue, cudaStream_t stream); +}}} + +namespace +{ + class LinearFilter : public Filter + { + public: + LinearFilter(int srcType, int dstType, InputArray kernel, Point anchor, int borderMode, Scalar borderVal); + + void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + + private: + typedef void (*filter2D_t)(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, + int kWidth, int kHeight, int anchorX, int anchorY, + int borderMode, const float* borderValue, cudaStream_t stream); + + GpuMat kernel_; + Point anchor_; + int type_; + filter2D_t func_; + int borderMode_; + Scalar_ borderVal_; + }; + + LinearFilter::LinearFilter(int srcType, int dstType, InputArray _kernel, Point anchor, int borderMode, Scalar borderVal) : + anchor_(anchor), type_(srcType), borderMode_(borderMode), borderVal_(borderVal) + { + const int sdepth = CV_MAT_DEPTH(srcType); + const int scn = CV_MAT_CN(srcType); + + Mat kernel = _kernel.getMat(); + + CV_Assert( sdepth == CV_8U || sdepth == CV_16U || sdepth == CV_32F ); + CV_Assert( scn == 1 || scn == 4 ); + CV_Assert( dstType == srcType ); + CV_Assert( kernel.channels() == 1 ); + CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP ); + + Mat kernel32F; + kernel.convertTo(kernel32F, CV_32F); + + kernel_ = gpu::createContinuous(kernel.size(), CV_32FC1); + kernel_.upload(kernel32F); + + normalizeAnchor(anchor_, kernel.size()); + + switch (srcType) + { + case CV_8UC1: + func_ = cudev::filter2D; + break; + case CV_8UC4: + func_ = cudev::filter2D; + break; + case CV_16UC1: + func_ = cudev::filter2D; + break; + case CV_16UC4: + func_ = cudev::filter2D; + break; + case CV_32FC1: + func_ = cudev::filter2D; + break; + case CV_32FC4: + func_ = cudev::filter2D; + break; + } + } + + void LinearFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream) + { + GpuMat src = _src.getGpuMat(); + CV_Assert( src.type() == type_ ); + + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + Point ofs; + Size wholeSize; + src.locateROI(wholeSize, ofs); + + GpuMat srcWhole(wholeSize, src.type(), src.datastart); + + func_(srcWhole, ofs.x, ofs.y, dst, kernel_.ptr(), + kernel_.cols, kernel_.rows, anchor_.x, anchor_.y, + borderMode_, borderVal_.val, StreamAccessor::getStream(_stream)); + } +} + +Ptr cv::gpu::createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor, int borderMode, Scalar borderVal) +{ + if (dstType < 0) + dstType = srcType; + + return new LinearFilter(srcType, dstType, kernel, anchor, borderMode, borderVal); +} + + + + + + + + + + +void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, double scale, int borderType, Stream& stream) +{ + CV_Assert(ksize == 1 || ksize == 3); + + static const int K[2][9] = + { + {0, 1, 0, 1, -4, 1, 0, 1, 0}, + {2, 0, 2, 0, -8, 0, 2, 0, 2} + }; + Mat kernel(3, 3, CV_32S, (void*)K[ksize == 3]); + if (scale != 1) + kernel *= scale; + + Ptr f = gpu::createLinearFilter(src.type(), ddepth, kernel, Point(-1,-1), borderType); + f->apply(src, dst, stream); +} + @@ -702,172 +833,6 @@ void cv::gpu::morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& ke } } -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Linear Filter - -namespace cv { namespace gpu { namespace cudev -{ - namespace imgproc - { - template - void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, - int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, - int borderMode, const float* borderValue, cudaStream_t stream); - } -}}} - -namespace -{ - typedef NppStatus (*nppFilter2D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI, - const Npp32s * pKernel, NppiSize oKernelSize, NppiPoint oAnchor, Npp32s nDivisor); - - struct NPPLinearFilter : public BaseFilter_GPU - { - NPPLinearFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter2D_t func_) : - BaseFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {} - - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null()) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - NppiSize oKernelSize; - oKernelSize.height = ksize.height; - oKernelSize.width = ksize.width; - NppiPoint oAnchor; - oAnchor.x = anchor.x; - oAnchor.y = anchor.y; - - cudaStream_t stream = StreamAccessor::getStream(s); - - NppStreamHandler h(stream); - - nppSafeCall( func(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz, - kernel.ptr(), oKernelSize, oAnchor, nDivisor) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - GpuMat kernel; - Npp32s nDivisor; - nppFilter2D_t func; - }; - - typedef void (*gpuFilter2D_t)(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, - int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, - int borderMode, const float* borderValue, cudaStream_t stream); - - struct GpuFilter2D : public BaseFilter_GPU - { - GpuFilter2D(Size ksize_, Point anchor_, gpuFilter2D_t func_, const GpuMat& kernel_, int brd_type_) : - BaseFilter_GPU(ksize_, anchor_), func(func_), kernel(kernel_), brd_type(brd_type_) - { - } - - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) - { - using namespace cv::gpu::cudev::imgproc; - - Point ofs; - Size wholeSize; - src.locateROI(wholeSize, ofs); - GpuMat srcWhole(wholeSize, src.type(), src.datastart); - - static const Scalar_ zero = Scalar_::all(0.0f); - func(srcWhole, ofs.x, ofs.y, dst, ksize.width, ksize.height, anchor.x, anchor.y, kernel.ptr(), brd_type, zero.val, StreamAccessor::getStream(stream)); - } - - gpuFilter2D_t func; - GpuMat kernel; - int brd_type; - }; -} - -Ptr cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor, int brd_type) -{ - using namespace cv::gpu::cudev::imgproc; - - int sdepth = CV_MAT_DEPTH(srcType); - int scn = CV_MAT_CN(srcType); - - CV_Assert(sdepth == CV_8U || sdepth == CV_16U || sdepth == CV_32F); - CV_Assert(scn == 1 || scn == 4); - CV_Assert(dstType == srcType); - CV_Assert(brd_type == BORDER_REFLECT101 || brd_type == BORDER_REPLICATE || brd_type == BORDER_CONSTANT || brd_type == BORDER_REFLECT || brd_type == BORDER_WRAP); - - Size ksize = kernel.size(); - -#if 0 - if ((srcType == CV_8UC1 || srcType == CV_8UC4) && brd_type == BORDER_CONSTANT) - { - static const nppFilter2D_t cppFilter2D_callers[] = {0, nppiFilter_8u_C1R, 0, 0, nppiFilter_8u_C4R}; - - GpuMat gpu_krnl; - int nDivisor; - normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true); - - normalizeAnchor(anchor, ksize); - - return Ptr(new NPPLinearFilter(ksize, anchor, gpu_krnl, nDivisor, cppFilter2D_callers[CV_MAT_CN(srcType)])); - } -#endif - - CV_Assert(ksize.width * ksize.height <= 16 * 16); - - GpuMat gpu_krnl; - normalizeKernel(kernel, gpu_krnl, CV_32F); - - normalizeAnchor(anchor, ksize); - - gpuFilter2D_t func = 0; - - switch (srcType) - { - case CV_8UC1: - func = filter2D_gpu; - break; - case CV_8UC4: - func = filter2D_gpu; - break; - case CV_16UC1: - func = filter2D_gpu; - break; - case CV_16UC4: - func = filter2D_gpu; - break; - case CV_32FC1: - func = filter2D_gpu; - break; - case CV_32FC4: - func = filter2D_gpu; - break; - } - - return Ptr(new GpuFilter2D(ksize, anchor, func, gpu_krnl, brd_type)); -} - -Ptr cv::gpu::createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor, int borderType) -{ - Ptr linearFilter = getLinearFilter_GPU(srcType, dstType, kernel, anchor, borderType); - - return createFilter2D_GPU(linearFilter, srcType, dstType); -} - -void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor, int borderType, Stream& stream) -{ - if (ddepth < 0) - ddepth = src.depth(); - - int dst_type = CV_MAKE_TYPE(ddepth, src.channels()); - - Ptr f = createLinearFilter_GPU(src.type(), dst_type, kernel, anchor, borderType); - - dst.create(src.size(), dst_type); - - f->apply(src, dst, Rect(0, 0, src.cols, src.rows), stream); -} - //////////////////////////////////////////////////////////////////////////////////////////////////// // Separable Linear Filter @@ -1208,22 +1173,6 @@ void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, sepFilter2D(src, dst, ddepth, kx, ky, buf, Point(-1,-1), rowBorderType, columnBorderType, stream); } -void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, double scale, int borderType, Stream& stream) -{ - CV_Assert(ksize == 1 || ksize == 3); - - static const int K[2][9] = - { - {0, 1, 0, 1, -4, 1, 0, 1, 0}, - {2, 0, 2, 0, -8, 0, 2, 0, 2} - }; - Mat kernel(3, 3, CV_32S, (void*)K[ksize == 3]); - if (scale != 1) - kernel *= scale; - - filter2D(src, dst, ddepth, kernel, Point(-1,-1), borderType, stream); -} - //////////////////////////////////////////////////////////////////////////////////////////////////// // Gaussian Filter diff --git a/modules/gpufilters/test/test_filters.cpp b/modules/gpufilters/test/test_filters.cpp index a63d92b3d..6d6da7e4b 100644 --- a/modules/gpufilters/test/test_filters.cpp +++ b/modules/gpufilters/test/test_filters.cpp @@ -118,6 +118,121 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, Blur, testing::Combine( testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), WHOLE_SUBMAT)); +///////////////////////////////////////////////////////////////////////////////////////////////// +// Filter2D + +PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, BorderType, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + cv::Size size; + int type; + cv::Size ksize; + cv::Point anchor; + int borderType; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + type = GET_PARAM(2); + ksize = GET_PARAM(3); + anchor = GET_PARAM(4); + borderType = GET_PARAM(5); + useRoi = GET_PARAM(6); + + cv::gpu::setDevice(devInfo.deviceID()); + } +}; + +GPU_TEST_P(Filter2D, Accuracy) +{ + cv::Mat src = randomMat(size, type); + cv::Mat kernel = randomMat(cv::Size(ksize.width, ksize.height), CV_32FC1, 0.0, 1.0); + + cv::Ptr filter2D = cv::gpu::createLinearFilter(src.type(), -1, kernel, anchor, borderType); + + cv::gpu::GpuMat dst = createMat(size, type, useRoi); + filter2D->apply(loadMat(src, useRoi), dst); + + cv::Mat dst_gold; + cv::filter2D(src, dst_gold, -1, kernel, anchor, 0, borderType); + + EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1.0); +} + +INSTANTIATE_TEST_CASE_P(GPU_Filters, Filter2D, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC4)), + testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7)), KSize(cv::Size(11, 11)), KSize(cv::Size(13, 13)), KSize(cv::Size(15, 15))), + testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))), + testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), + WHOLE_SUBMAT)); + + + + + + + + + + + + + +///////////////////////////////////////////////////////////////////////////////////////////////// +// Laplacian + +PARAM_TEST_CASE(Laplacian, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + cv::Size size; + int type; + cv::Size ksize; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + type = GET_PARAM(2); + ksize = GET_PARAM(3); + useRoi = GET_PARAM(4); + + cv::gpu::setDevice(devInfo.deviceID()); + } +}; + +GPU_TEST_P(Laplacian, Accuracy) +{ + cv::Mat src = randomMat(size, type); + + cv::gpu::GpuMat dst = createMat(size, type, useRoi); + cv::gpu::Laplacian(loadMat(src, useRoi), dst, -1, ksize.width); + + cv::Mat dst_gold; + cv::Laplacian(src, dst_gold, -1, ksize.width); + + EXPECT_MAT_NEAR(dst_gold, dst, src.depth() < CV_32F ? 0.0 : 1e-3); +} + +INSTANTIATE_TEST_CASE_P(GPU_Filters, Laplacian, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)), + testing::Values(KSize(cv::Size(1, 1)), KSize(cv::Size(3, 3))), + WHOLE_SUBMAT)); + + + + + + + + + ///////////////////////////////////////////////////////////////////////////////////////////////// // Sobel @@ -332,49 +447,6 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, GaussianBlur, testing::Combine( BorderType(cv::BORDER_REFLECT)), WHOLE_SUBMAT)); -///////////////////////////////////////////////////////////////////////////////////////////////// -// Laplacian - -PARAM_TEST_CASE(Laplacian, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, UseRoi) -{ - cv::gpu::DeviceInfo devInfo; - cv::Size size; - int type; - cv::Size ksize; - bool useRoi; - - virtual void SetUp() - { - devInfo = GET_PARAM(0); - size = GET_PARAM(1); - type = GET_PARAM(2); - ksize = GET_PARAM(3); - useRoi = GET_PARAM(4); - - cv::gpu::setDevice(devInfo.deviceID()); - } -}; - -GPU_TEST_P(Laplacian, Accuracy) -{ - cv::Mat src = randomMat(size, type); - - cv::gpu::GpuMat dst = createMat(size, type, useRoi); - cv::gpu::Laplacian(loadMat(src, useRoi), dst, -1, ksize.width); - - cv::Mat dst_gold; - cv::Laplacian(src, dst_gold, -1, ksize.width); - - EXPECT_MAT_NEAR(dst_gold, dst, src.depth() < CV_32F ? 0.0 : 1e-3); -} - -INSTANTIATE_TEST_CASE_P(GPU_Filters, Laplacian, testing::Combine( - ALL_DEVICES, - DIFFERENT_SIZES, - testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)), - testing::Values(KSize(cv::Size(1, 1)), KSize(cv::Size(3, 3))), - WHOLE_SUBMAT)); - ///////////////////////////////////////////////////////////////////////////////////////////////// // Erode @@ -527,56 +599,4 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, MorphEx, testing::Combine( testing::Values(Iterations(1), Iterations(2), Iterations(3)), WHOLE_SUBMAT)); -///////////////////////////////////////////////////////////////////////////////////////////////// -// Filter2D - -PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, BorderType, UseRoi) -{ - cv::gpu::DeviceInfo devInfo; - cv::Size size; - int type; - cv::Size ksize; - cv::Point anchor; - int borderType; - bool useRoi; - - cv::Mat img; - - virtual void SetUp() - { - devInfo = GET_PARAM(0); - size = GET_PARAM(1); - type = GET_PARAM(2); - ksize = GET_PARAM(3); - anchor = GET_PARAM(4); - borderType = GET_PARAM(5); - useRoi = GET_PARAM(6); - - cv::gpu::setDevice(devInfo.deviceID()); - } -}; - -GPU_TEST_P(Filter2D, Accuracy) -{ - cv::Mat src = randomMat(size, type); - cv::Mat kernel = randomMat(cv::Size(ksize.width, ksize.height), CV_32FC1, 0.0, 1.0); - - cv::gpu::GpuMat dst = createMat(size, type, useRoi); - cv::gpu::filter2D(loadMat(src, useRoi), dst, -1, kernel, anchor, borderType); - - cv::Mat dst_gold; - cv::filter2D(src, dst_gold, -1, kernel, anchor, 0, borderType); - - EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1.0); -} - -INSTANTIATE_TEST_CASE_P(GPU_Filters, Filter2D, testing::Combine( - ALL_DEVICES, - DIFFERENT_SIZES, - testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC4)), - testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7)), KSize(cv::Size(11, 11)), KSize(cv::Size(13, 13)), KSize(cv::Size(15, 15))), - testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))), - testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), - WHOLE_SUBMAT)); - #endif // HAVE_CUDA diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index 97eb7a82a..f6ace6d77 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -961,10 +961,11 @@ TEST(filter2D) gpu::GpuMat d_src(src); gpu::GpuMat d_dst; - gpu::filter2D(d_src, d_dst, -1, kernel); + Ptr filter2D = gpu::createLinearFilter(d_src.type(), -1, kernel); + filter2D->apply(d_src, d_dst); GPU_ON; - gpu::filter2D(d_src, d_dst, -1, kernel); + filter2D->apply(d_src, d_dst); GPU_OFF; } }