From 77501f3ed0becb50aa1939611340f6ef9e985927 Mon Sep 17 00:00:00 2001 From: peng xiao <hisenxpress@gmail.com> Date: Mon, 8 Apr 2013 15:19:44 +0800 Subject: [PATCH 1/3] ocl: Add dft based convolve implementation. Match template in ocl module now can be utilized with dft based convolve. Note this feature needs OpenCV to be built with clAmdFft library. --- modules/ocl/include/opencv2/ocl.hpp | 24 ++- modules/ocl/src/imgproc.cpp | 159 +++++++++++++++++- modules/ocl/src/match_template.cpp | 30 +++- .../opencl/imgproc_mulAndScaleSpectrums.cl | 96 +++++++++++ modules/ocl/test/test_fft.cpp | 134 +++++++++++++++ 5 files changed, 432 insertions(+), 11 deletions(-) create mode 100644 modules/ocl/src/opencl/imgproc_mulAndScaleSpectrums.cl diff --git a/modules/ocl/include/opencv2/ocl.hpp b/modules/ocl/include/opencv2/ocl.hpp index f79e6b818..3145c6098 100644 --- a/modules/ocl/include/opencv2/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl.hpp @@ -540,9 +540,29 @@ namespace cv CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2); CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2); - //! computes convolution of two images + struct CV_EXPORTS ConvolveBuf + { + Size result_size; + Size block_size; + Size user_block_size; + Size dft_size; + + oclMat image_spect, templ_spect, result_spect; + oclMat image_block, templ_block, result_data; + + void create(Size image_size, Size templ_size); + static Size estimateBlockSize(Size result_size, Size templ_size); + }; + + //! computes convolution of two images, may use discrete Fourier transform //! support only CV_32FC1 type - CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result); + CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr = false); + CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr, ConvolveBuf& buf); + + //! Performs a per-element multiplication of two Fourier spectrums. + //! Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now. + //! support only CV_32FC2 type + CV_EXPORTS void mulSpectrums(const oclMat &a, const oclMat &b, oclMat &c, int flags, float scale, bool conjB = false); CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0); diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index 47c71477e..e3b3d2522 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -25,6 +25,7 @@ // Xu Pang, pangxu010@163.com // Wu Zailong, bullet@yeah.net // Wenju He, wenju@multicorewareinc.com +// Peng Xiao, pengxiao@outlook.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -79,6 +80,7 @@ namespace cv extern const char *imgproc_calcHarris; extern const char *imgproc_calcMinEigenVal; extern const char *imgproc_convolve; + extern const char *imgproc_mulAndScaleSpectrums; ////////////////////////////////////OpenCL call wrappers//////////////////////////// template <typename T> struct index_and_sizeof; @@ -1585,11 +1587,148 @@ namespace cv } } +//////////////////////////////////mulSpectrums//////////////////////////////////////////////////// +void cv::ocl::mulSpectrums(const oclMat &a, const oclMat &b, oclMat &c, int /*flags*/, float scale, bool conjB) +{ + CV_Assert(a.type() == CV_32FC2); + CV_Assert(b.type() == CV_32FC2); + + c.create(a.size(), CV_32FC2); + + size_t lt[3] = { 16, 16, 1 }; + size_t gt[3] = { a.cols, a.rows, 1 }; + + String kernelName = conjB ? "mulAndScaleSpectrumsKernel_CONJ":"mulAndScaleSpectrumsKernel"; + + std::vector<std::pair<size_t , const void *> > args; + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&a.data )); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&b.data )); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&c.data )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&a.cols )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&a.rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&a.step )); + + Context *clCxt = Context::getContext(); + openCLExecuteKernel(clCxt, &imgproc_mulAndScaleSpectrums, kernelName, gt, lt, args, -1, -1); +} //////////////////////////////////convolve//////////////////////////////////////////////////// inline int divUp(int total, int grain) { return (total + grain - 1) / grain; } + +// ported from CUDA module +void cv::ocl::ConvolveBuf::create(Size image_size, Size templ_size) +{ + result_size = Size(image_size.width - templ_size.width + 1, + image_size.height - templ_size.height + 1); + + block_size = user_block_size; + if (user_block_size.width == 0 || user_block_size.height == 0) + block_size = estimateBlockSize(result_size, templ_size); + + dft_size.width = 1 << int(ceil(std::log(block_size.width + templ_size.width - 1.) / std::log(2.))); + dft_size.height = 1 << int(ceil(std::log(block_size.height + templ_size.height - 1.) / std::log(2.))); + + // CUFFT has hard-coded kernels for power-of-2 sizes (up to 8192), + // see CUDA Toolkit 4.1 CUFFT Library Programming Guide + //if (dft_size.width > 8192) + dft_size.width = getOptimalDFTSize(block_size.width + templ_size.width - 1.); + //if (dft_size.height > 8192) + dft_size.height = getOptimalDFTSize(block_size.height + templ_size.height - 1.); + + // To avoid wasting time doing small DFTs + dft_size.width = std::max(dft_size.width, 512); + dft_size.height = std::max(dft_size.height, 512); + + image_block.create(dft_size, CV_32F); + templ_block.create(dft_size, CV_32F); + result_data.create(dft_size, CV_32F); + + //spect_len = dft_size.height * (dft_size.width / 2 + 1); + image_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2); + templ_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2); + result_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2); + + // Use maximum result matrix block size for the estimated DFT block size + block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width); + block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height); +} + +Size cv::ocl::ConvolveBuf::estimateBlockSize(Size result_size, Size /*templ_size*/) +{ + int width = (result_size.width + 2) / 3; + int height = (result_size.height + 2) / 3; + width = std::min(width, result_size.width); + height = std::min(height, result_size.height); + return Size(width, height); +} + +static void convolve_run_fft(const oclMat &image, const oclMat &templ, oclMat &result, bool ccorr, ConvolveBuf& buf) +{ +#if defined HAVE_CLAMDFFT + CV_Assert(image.type() == CV_32F); + CV_Assert(templ.type() == CV_32F); + + buf.create(image.size(), templ.size()); + result.create(buf.result_size, CV_32F); + + Size& block_size = buf.block_size; + Size& dft_size = buf.dft_size; + + oclMat& image_block = buf.image_block; + oclMat& templ_block = buf.templ_block; + oclMat& result_data = buf.result_data; + + oclMat& image_spect = buf.image_spect; + oclMat& templ_spect = buf.templ_spect; + oclMat& result_spect = buf.result_spect; + + oclMat templ_roi = templ; + copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, + templ_block.cols - templ_roi.cols, 0, Scalar()); + + cv::ocl::dft(templ_block, templ_spect, dft_size); + + // Process all blocks of the result matrix + for (int y = 0; y < result.rows; y += block_size.height) + { + for (int x = 0; x < result.cols; x += block_size.width) + { + Size image_roi_size(std::min(x + dft_size.width, image.cols) - x, + std::min(y + dft_size.height, image.rows) - y); + Rect roi0(x, y, image_roi_size.width, image_roi_size.height); + + oclMat image_roi(image, roi0); + + copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows, + 0, image_block.cols - image_roi.cols, 0, Scalar()); + + cv::ocl::dft(image_block, image_spect, dft_size); + + mulSpectrums(image_spect, templ_spect, result_spect, 0, + 1.f / dft_size.area(), ccorr); + + cv::ocl::dft(result_spect, result_data, dft_size, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT); + + Size result_roi_size(std::min(x + block_size.width, result.cols) - x, + std::min(y + block_size.height, result.rows) - y); + + Rect roi1(x, y, result_roi_size.width, result_roi_size.height); + Rect roi2(0, 0, result_roi_size.width, result_roi_size.height); + + oclMat result_roi(result, roi1); + oclMat result_block(result_data, roi2); + + result_block.copyTo(result_roi); + } + } + +#else + CV_Error(CV_StsNotImplemented, "OpenCL DFT is not implemented"); +#endif +} static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, String kernelName, const char **kernelString) { CV_Assert(src.depth() == CV_32FC1); @@ -1630,13 +1769,25 @@ static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, St openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); } -void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y) +void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y, bool ccorr) { CV_Assert(x.depth() == CV_32F); CV_Assert(t.depth() == CV_32F); - CV_Assert(x.type() == y.type() && x.size() == y.size()); y.create(x.size(), x.type()); String kernelName = "convolve"; - - convolve_run(x, t, y, kernelName, &imgproc_convolve); + if(t.cols > 17 || t.rows > 17) + { + ConvolveBuf buf; + convolve_run_fft(x, t, y, ccorr, buf); + } + else + { + CV_Assert(ccorr == false); + convolve_run(x, t, y, kernelName, &imgproc_convolve); + } +} +void cv::ocl::convolve(const oclMat &image, const oclMat &templ, oclMat &result, bool ccorr, ConvolveBuf& buf) +{ + result.create(image.size(), image.type()); + convolve_run_fft(image, templ, result, ccorr, buf); } diff --git a/modules/ocl/src/match_template.cpp b/modules/ocl/src/match_template.cpp index 40c544301..9dee1f4ea 100644 --- a/modules/ocl/src/match_template.cpp +++ b/modules/ocl/src/match_template.cpp @@ -98,11 +98,22 @@ namespace cv // Evaluates optimal template's area threshold. If // template's area is less than the threshold, we use naive match // template version, otherwise FFT-based (if available) - static bool useNaive(int , int , Size ) + static bool useNaive(int method, int depth, Size size) { - // FIXME! - // always use naive until convolve is imported +#ifdef HAVE_CLAMDFFT + if (method == CV_TM_SQDIFF && (depth == CV_32F || !Context::getContext()->supportsFeature(Context::CL_DOUBLE))) + { + return true; + } + else if(method == CV_TM_CCORR || (method == CV_TM_SQDIFF && depth == CV_8U)) + { + return size.height < 18 && size.width < 18; + } + else + return false; +#else return true; +#endif } ////////////////////////////////////////////////////////////////////// @@ -223,9 +234,18 @@ namespace cv ////////////////////////////////////////////////////////////////////// // CCORR void convolve_32F( - const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &) + const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf) { - CV_Error(-1, "convolve is not fully implemented yet"); + ConvolveBuf convolve_buf; + convolve_buf.user_block_size = buf.user_block_size; + if (image.oclchannels() == 1) + convolve(image, templ, result, true, convolve_buf); + else + { + oclMat result_; + convolve(image.reshape(1), templ.reshape(1), result_, true, convolve_buf); + extractFirstChannel_32F(result_, result); + } } void matchTemplate_CCORR( diff --git a/modules/ocl/src/opencl/imgproc_mulAndScaleSpectrums.cl b/modules/ocl/src/opencl/imgproc_mulAndScaleSpectrums.cl new file mode 100644 index 000000000..7d3774d07 --- /dev/null +++ b/modules/ocl/src/opencl/imgproc_mulAndScaleSpectrums.cl @@ -0,0 +1,96 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the uintel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business uinterruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +typedef float2 cfloat; +inline cfloat cmulf(cfloat a, cfloat b) +{ + return (cfloat)( a.x*b.x - a.y*b.y, a.x*b.y + a.y*b.x); +} + +inline cfloat conjf(cfloat a) +{ + return (cfloat)( a.x, - a.y ); +} + +__kernel void + mulAndScaleSpectrumsKernel( + __global const cfloat* a, + __global const cfloat* b, + float scale, + __global cfloat* dst, + uint cols, + uint rows, + uint mstep + ) +{ + const uint x = get_global_id(0); + const uint y = get_global_id(1); + const uint idx = mad24(y, mstep / sizeof(cfloat), x); + if (x < cols && y < rows) + { + cfloat v = cmulf(a[idx], b[idx]); + dst[idx] = (cfloat)( v.x * scale, v.y * scale ); + } +} +__kernel void + mulAndScaleSpectrumsKernel_CONJ( + __global const cfloat* a, + __global const cfloat* b, + float scale, + __global cfloat* dst, + uint cols, + uint rows, + uint mstep + ) +{ + const uint x = get_global_id(0); + const uint y = get_global_id(1); + const uint idx = mad24(y, mstep / sizeof(cfloat), x); + if (x < cols && y < rows) + { + cfloat v = cmulf(a[idx], conjf(b[idx])); + dst[idx] = (cfloat)( v.x * scale, v.y * scale ); + } +} diff --git a/modules/ocl/test/test_fft.cpp b/modules/ocl/test/test_fft.cpp index 0fee8b03d..d19a47132 100644 --- a/modules/ocl/test/test_fft.cpp +++ b/modules/ocl/test/test_fft.cpp @@ -103,4 +103,138 @@ INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Dft, testing::Combine( testing::Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), cv::Size(512, 1), cv::Size(1024, 768)), testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE) )); +//////////////////////////////////////////////////////////////////////////// +// MulSpectrums + +PARAM_TEST_CASE(MulSpectrums, cv::Size, DftFlags, bool) +{ + cv::Size size; + int flag; + bool ccorr; + cv::Mat a, b; + + virtual void SetUp() + { + size = GET_PARAM(0); + flag = GET_PARAM(1); + ccorr = GET_PARAM(2); + + a = randomMat(size, CV_32FC2); + b = randomMat(size, CV_32FC2); + } +}; + +TEST_P(MulSpectrums, Simple) +{ + cv::ocl::oclMat c; + cv::ocl::mulSpectrums(cv::ocl::oclMat(a), cv::ocl::oclMat(b), c, flag, 1.0, ccorr); + + cv::Mat c_gold; + cv::mulSpectrums(a, b, c_gold, flag, ccorr); + + EXPECT_MAT_NEAR(c_gold, c, 1e-2, ""); +} + +TEST_P(MulSpectrums, Scaled) +{ + float scale = 1.f / size.area(); + + cv::ocl::oclMat c; + cv::ocl::mulSpectrums(cv::ocl::oclMat(a), cv::ocl::oclMat(b), c, flag, scale, ccorr); + + cv::Mat c_gold; + cv::mulSpectrums(a, b, c_gold, flag, ccorr); + c_gold.convertTo(c_gold, c_gold.type(), scale); + + EXPECT_MAT_NEAR(c_gold, c, 1e-2, ""); +} + +INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine( + DIFFERENT_SIZES, + testing::Values(DftFlags(0)), + testing::Values(false, true))); + + +//////////////////////////////////////////////////////// +// Convolve + +void static convolveDFT(const cv::Mat& A, const cv::Mat& B, cv::Mat& C, bool ccorr = false) +{ + // reallocate the output array if needed + C.create(std::abs(A.rows - B.rows) + 1, std::abs(A.cols - B.cols) + 1, A.type()); + cv::Size dftSize; + + // compute the size of DFT transform + dftSize.width = cv::getOptimalDFTSize(A.cols + B.cols - 1); + dftSize.height = cv::getOptimalDFTSize(A.rows + B.rows - 1); + + // allocate temporary buffers and initialize them with 0s + cv::Mat tempA(dftSize, A.type(), cv::Scalar::all(0)); + cv::Mat tempB(dftSize, B.type(), cv::Scalar::all(0)); + + // copy A and B to the top-left corners of tempA and tempB, respectively + cv::Mat roiA(tempA, cv::Rect(0, 0, A.cols, A.rows)); + A.copyTo(roiA); + cv::Mat roiB(tempB, cv::Rect(0, 0, B.cols, B.rows)); + B.copyTo(roiB); + + // now transform the padded A & B in-place; + // use "nonzeroRows" hint for faster processing + cv::dft(tempA, tempA, 0, A.rows); + cv::dft(tempB, tempB, 0, B.rows); + + // multiply the spectrums; + // the function handles packed spectrum representations well + cv::mulSpectrums(tempA, tempB, tempA, 0, ccorr); + + // transform the product back from the frequency domain. + // Even though all the result rows will be non-zero, + // you need only the first C.rows of them, and thus you + // pass nonzeroRows == C.rows + cv::dft(tempA, tempA, cv::DFT_INVERSE + cv::DFT_SCALE, C.rows); + + // now copy the result back to C. + tempA(cv::Rect(0, 0, C.cols, C.rows)).copyTo(C); +} + +IMPLEMENT_PARAM_CLASS(KSize, int); +IMPLEMENT_PARAM_CLASS(Ccorr, bool); + +PARAM_TEST_CASE(Convolve_DFT, cv::Size, KSize, Ccorr) +{ + cv::Size size; + int ksize; + bool ccorr; + + cv::Mat src; + cv::Mat kernel; + + cv::Mat dst_gold; + + virtual void SetUp() + { + size = GET_PARAM(0); + ksize = GET_PARAM(1); + ccorr = GET_PARAM(2); + } +}; + +TEST_P(Convolve_DFT, Accuracy) +{ + cv::Mat src = randomMat(size, CV_32FC1, 0.0, 100.0); + cv::Mat kernel = randomMat(cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0); + + cv::ocl::oclMat dst; + cv::ocl::convolve(cv::ocl::oclMat(src), cv::ocl::oclMat(kernel), dst, ccorr); + + cv::Mat dst_gold; + convolveDFT(src, kernel, dst_gold, ccorr); + + EXPECT_MAT_NEAR(dst, dst_gold, 1e-1, ""); +} +#define DIFFERENT_CONVOLVE_SIZES testing::Values(cv::Size(251, 257), cv::Size(113, 113), cv::Size(200, 480), cv::Size(1300, 1300)) +INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Convolve_DFT, testing::Combine( + DIFFERENT_CONVOLVE_SIZES, + testing::Values(KSize(19), KSize(23), KSize(45)), + testing::Values(Ccorr(true)/*, Ccorr(false)*/))); // false ccorr cannot pass for some instances #endif // HAVE_CLAMDFFT From 3fea2620e69e82e7c933551ce89cc5cbb547dc41 Mon Sep 17 00:00:00 2001 From: peng xiao <hisenxpress@gmail.com> Date: Mon, 8 Apr 2013 16:22:20 +0800 Subject: [PATCH 2/3] Fix some compilation errors and warnings. --- modules/ocl/src/imgproc.cpp | 7 ++-- modules/ocl/src/match_template.cpp | 3 ++ .../opencl/imgproc_mulAndScaleSpectrums.cl | 36 +++++++++---------- modules/ocl/test/test_fft.cpp | 2 +- 4 files changed, 27 insertions(+), 21 deletions(-) diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index e3b3d2522..5ffb9dcf8 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -1661,7 +1661,7 @@ Size cv::ocl::ConvolveBuf::estimateBlockSize(Size result_size, Size /*templ_size int width = (result_size.width + 2) / 3; int height = (result_size.height + 2) / 3; width = std::min(width, result_size.width); - height = std::min(height, result_size.height); + height = std::min(height, result_size.height); return Size(width, height); } @@ -1714,7 +1714,7 @@ static void convolve_run_fft(const oclMat &image, const oclMat &templ, oclMat &r Size result_roi_size(std::min(x + block_size.width, result.cols) - x, std::min(y + block_size.height, result.rows) - y); - + Rect roi1(x, y, result_roi_size.width, result_roi_size.height); Rect roi2(0, 0, result_roi_size.width, result_roi_size.height); @@ -1727,6 +1727,9 @@ static void convolve_run_fft(const oclMat &image, const oclMat &templ, oclMat &r #else CV_Error(CV_StsNotImplemented, "OpenCL DFT is not implemented"); +#define UNUSED(x) (void)(x); + UNUSED(image) UNUSED(templ) UNUSED(result) UNUSED(ccorr) UNUSED(buf) +#undef UNUSED #endif } static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, String kernelName, const char **kernelString) diff --git a/modules/ocl/src/match_template.cpp b/modules/ocl/src/match_template.cpp index 9dee1f4ea..77f7b1d63 100644 --- a/modules/ocl/src/match_template.cpp +++ b/modules/ocl/src/match_template.cpp @@ -112,6 +112,9 @@ namespace cv else return false; #else +#define UNUSED(x) (void)(x); + UNUSED(method) UNUSED(depth) UNUSED(size) +#undef UNUSED return true; #endif } diff --git a/modules/ocl/src/opencl/imgproc_mulAndScaleSpectrums.cl b/modules/ocl/src/opencl/imgproc_mulAndScaleSpectrums.cl index 7d3774d07..86d4e5d52 100644 --- a/modules/ocl/src/opencl/imgproc_mulAndScaleSpectrums.cl +++ b/modules/ocl/src/opencl/imgproc_mulAndScaleSpectrums.cl @@ -54,16 +54,16 @@ inline cfloat conjf(cfloat a) return (cfloat)( a.x, - a.y ); } -__kernel void - mulAndScaleSpectrumsKernel( - __global const cfloat* a, - __global const cfloat* b, - float scale, - __global cfloat* dst, - uint cols, - uint rows, +__kernel void +mulAndScaleSpectrumsKernel( + __global const cfloat* a, + __global const cfloat* b, + float scale, + __global cfloat* dst, + uint cols, + uint rows, uint mstep - ) +) { const uint x = get_global_id(0); const uint y = get_global_id(1); @@ -74,16 +74,16 @@ __kernel void dst[idx] = (cfloat)( v.x * scale, v.y * scale ); } } -__kernel void - mulAndScaleSpectrumsKernel_CONJ( - __global const cfloat* a, - __global const cfloat* b, - float scale, - __global cfloat* dst, - uint cols, - uint rows, +__kernel void +mulAndScaleSpectrumsKernel_CONJ( + __global const cfloat* a, + __global const cfloat* b, + float scale, + __global cfloat* dst, + uint cols, + uint rows, uint mstep - ) +) { const uint x = get_global_id(0); const uint y = get_global_id(1); diff --git a/modules/ocl/test/test_fft.cpp b/modules/ocl/test/test_fft.cpp index d19a47132..030ea1ff1 100644 --- a/modules/ocl/test/test_fft.cpp +++ b/modules/ocl/test/test_fft.cpp @@ -226,7 +226,7 @@ TEST_P(Convolve_DFT, Accuracy) cv::ocl::oclMat dst; cv::ocl::convolve(cv::ocl::oclMat(src), cv::ocl::oclMat(kernel), dst, ccorr); - + cv::Mat dst_gold; convolveDFT(src, kernel, dst_gold, ccorr); From 143f8f69d6a49f18555e9a59abfb3066b4eb4bfb Mon Sep 17 00:00:00 2001 From: peng xiao <hisenxpress@gmail.com> Date: Mon, 8 Apr 2013 17:15:52 +0800 Subject: [PATCH 3/3] Add some documentation on ocl::convolve --- modules/ocl/doc/image_filtering.rst | 41 ++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/modules/ocl/doc/image_filtering.rst b/modules/ocl/doc/image_filtering.rst index ca97d3a93..3da5d3ded 100644 --- a/modules/ocl/doc/image_filtering.rst +++ b/modules/ocl/doc/image_filtering.rst @@ -109,17 +109,52 @@ Returns void The function calculates the Laplacian of the source image by adding up the second x and y derivatives calculated using the Sobel operator. +ocl::ConvolveBuf +---------------- +.. ocv:struct:: ocl::ConvolveBuf + +Class providing a memory buffer for :ocv:func:`ocl::convolve` function, plus it allows to adjust some specific parameters. :: + + struct CV_EXPORTS ConvolveBuf + { + Size result_size; + Size block_size; + Size user_block_size; + Size dft_size; + int spect_len; + + oclMat image_spect, templ_spect, result_spect; + oclMat image_block, templ_block, result_data; + + void create(Size image_size, Size templ_size); + static Size estimateBlockSize(Size result_size, Size templ_size); + }; + +You can use field `user_block_size` to set specific block size for :ocv:func:`ocl::convolve` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed. + +ocl::ConvolveBuf::create +------------------------ +.. ocv:function:: ocl::ConvolveBuf::create(Size image_size, Size templ_size) + +Constructs a buffer for :ocv:func:`ocl::convolve` function with respective arguments. + ocl::convolve ------------------ Returns void -.. ocv:function:: void ocl::convolve(const oclMat &image, const oclMat &temp1, oclMat &result) +.. ocv:function:: void ocl::convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr=false) - :param image: The source image +.. ocv:function:: void ocl::convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr, ConvolveBuf& buf) - :param temp1: Convolution kernel, a single-channel floating point matrix. + :param image: The source image. Only ``CV_32FC1`` images are supported for now. + + :param temp1: Convolution kernel, a single-channel floating point matrix. The size is not greater than the ``image`` size. The type is the same as ``image``. :param result: The destination image + + :param ccorr: Flags to evaluate cross-correlation instead of convolution. + + :param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`ocl::ConvolveBuf`. Convolves an image with the kernel. Supports only CV_32FC1 data types and do not support ROI.