refactored gpu::convolve function:
* converted it to Algorithm * old API still can be used for source compatibility (marked as deprecated)
This commit is contained in:
parent
26a4be89b1
commit
8461cb3f4b
@ -374,7 +374,23 @@ CV_EXPORTS void mulAndScaleSpectrums(InputArray src1, InputArray src2, OutputArr
|
|||||||
//! For complex-to-real transform it is assumed that the source matrix is packed in CUFFT's format.
|
//! For complex-to-real transform it is assumed that the source matrix is packed in CUFFT's format.
|
||||||
CV_EXPORTS void dft(InputArray src, OutputArray dst, Size dft_size, int flags=0, Stream& stream = Stream::Null());
|
CV_EXPORTS void dft(InputArray src, OutputArray dst, Size dft_size, int flags=0, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
struct CV_EXPORTS ConvolveBuf
|
//! computes convolution (or cross-correlation) of two images using discrete Fourier transform
|
||||||
|
//! supports source images of 32FC1 type only
|
||||||
|
//! result matrix will have 32FC1 type
|
||||||
|
class CV_EXPORTS Convolution : public Algorithm
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null()) = 0;
|
||||||
|
};
|
||||||
|
CV_EXPORTS Ptr<Convolution> createConvolution(Size user_block_size = Size());
|
||||||
|
|
||||||
|
__OPENCV_GPUARITHM_DEPR_BEFORE__ void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null()) __OPENCV_GPUARITHM_DEPR_AFTER__;
|
||||||
|
inline void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr , Stream& stream)
|
||||||
|
{
|
||||||
|
createConvolution()->convolve(image, templ, result, ccorr, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ConvolveBuf
|
||||||
{
|
{
|
||||||
Size result_size;
|
Size result_size;
|
||||||
Size block_size;
|
Size block_size;
|
||||||
@ -385,15 +401,15 @@ struct CV_EXPORTS ConvolveBuf
|
|||||||
GpuMat image_spect, templ_spect, result_spect;
|
GpuMat image_spect, templ_spect, result_spect;
|
||||||
GpuMat image_block, templ_block, result_data;
|
GpuMat image_block, templ_block, result_data;
|
||||||
|
|
||||||
void create(Size image_size, Size templ_size);
|
void create(Size, Size){}
|
||||||
static Size estimateBlockSize(Size result_size, Size templ_size);
|
static Size estimateBlockSize(Size, Size){ return Size(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
//! computes convolution (or cross-correlation) of two images using discrete Fourier transform
|
__OPENCV_GPUARITHM_DEPR_BEFORE__ void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr, ConvolveBuf& buf, Stream& stream = Stream::Null()) __OPENCV_GPUARITHM_DEPR_AFTER__;
|
||||||
//! supports source images of 32FC1 type only
|
inline void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr, ConvolveBuf& buf, Stream& stream)
|
||||||
//! result matrix will have 32FC1 type
|
{
|
||||||
CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr = false);
|
createConvolution(buf.user_block_size)->convolve(image, templ, result, ccorr, stream);
|
||||||
CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream = Stream::Null());
|
}
|
||||||
|
|
||||||
}} // namespace cv { namespace gpu {
|
}} // namespace cv { namespace gpu {
|
||||||
|
|
||||||
|
@ -228,10 +228,11 @@ PERF_TEST_P(Sz_KernelSz_Ccorr, Convolve,
|
|||||||
cv::gpu::GpuMat d_templ = cv::gpu::createContinuous(templ_size, templ_size, CV_32FC1);
|
cv::gpu::GpuMat d_templ = cv::gpu::createContinuous(templ_size, templ_size, CV_32FC1);
|
||||||
d_templ.upload(templ);
|
d_templ.upload(templ);
|
||||||
|
|
||||||
cv::gpu::GpuMat dst;
|
cv::Ptr<cv::gpu::Convolution> convolution = cv::gpu::createConvolution();
|
||||||
cv::gpu::ConvolveBuf d_buf;
|
|
||||||
|
|
||||||
TEST_CYCLE() cv::gpu::convolve(d_image, d_templ, dst, ccorr, d_buf);
|
cv::gpu::GpuMat dst;
|
||||||
|
|
||||||
|
TEST_CYCLE() convolution->convolve(d_image, d_templ, dst, ccorr);
|
||||||
|
|
||||||
GPU_SANITY_CHECK(dst);
|
GPU_SANITY_CHECK(dst);
|
||||||
}
|
}
|
||||||
|
@ -54,9 +54,7 @@ void cv::gpu::mulAndScaleSpectrums(InputArray, InputArray, OutputArray, int, flo
|
|||||||
|
|
||||||
void cv::gpu::dft(InputArray, OutputArray, Size, int, Stream&) { throw_no_cuda(); }
|
void cv::gpu::dft(InputArray, OutputArray, Size, int, Stream&) { throw_no_cuda(); }
|
||||||
|
|
||||||
void cv::gpu::ConvolveBuf::create(Size, Size) { throw_no_cuda(); }
|
Ptr<Convolution> cv::gpu::createConvolution(Size) { throw_no_cuda(); return Ptr<Convolution>(); }
|
||||||
void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_no_cuda(); }
|
|
||||||
void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&, Stream&) { throw_no_cuda(); }
|
|
||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
@ -486,16 +484,41 @@ void cv::gpu::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags, S
|
|||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// convolve
|
// Convolution
|
||||||
|
|
||||||
void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)
|
#ifdef HAVE_CUFFT
|
||||||
|
|
||||||
|
namespace
|
||||||
{
|
{
|
||||||
|
class ConvolutionImpl : public Convolution
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit ConvolutionImpl(Size user_block_size_) : user_block_size(user_block_size_) {}
|
||||||
|
|
||||||
|
void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
|
private:
|
||||||
|
void create(Size image_size, Size templ_size);
|
||||||
|
static Size estimateBlockSize(Size result_size);
|
||||||
|
|
||||||
|
Size result_size;
|
||||||
|
Size block_size;
|
||||||
|
Size user_block_size;
|
||||||
|
Size dft_size;
|
||||||
|
int spect_len;
|
||||||
|
|
||||||
|
GpuMat image_spect, templ_spect, result_spect;
|
||||||
|
GpuMat image_block, templ_block, result_data;
|
||||||
|
};
|
||||||
|
|
||||||
|
void ConvolutionImpl::create(Size image_size, Size templ_size)
|
||||||
|
{
|
||||||
result_size = Size(image_size.width - templ_size.width + 1,
|
result_size = Size(image_size.width - templ_size.width + 1,
|
||||||
image_size.height - templ_size.height + 1);
|
image_size.height - templ_size.height + 1);
|
||||||
|
|
||||||
block_size = user_block_size;
|
block_size = user_block_size;
|
||||||
if (user_block_size.width == 0 || user_block_size.height == 0)
|
if (user_block_size.width == 0 || user_block_size.height == 0)
|
||||||
block_size = estimateBlockSize(result_size, templ_size);
|
block_size = estimateBlockSize(result_size);
|
||||||
|
|
||||||
dft_size.width = 1 << int(ceil(std::log(block_size.width + templ_size.width - 1.) / std::log(2.)));
|
dft_size.width = 1 << int(ceil(std::log(block_size.width + templ_size.width - 1.) / std::log(2.)));
|
||||||
dft_size.height = 1 << int(ceil(std::log(block_size.height + templ_size.height - 1.) / std::log(2.)));
|
dft_size.height = 1 << int(ceil(std::log(block_size.height + templ_size.height - 1.) / std::log(2.)));
|
||||||
@ -523,66 +546,44 @@ void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)
|
|||||||
// Use maximum result matrix block size for the estimated DFT block size
|
// Use maximum result matrix block size for the estimated DFT block size
|
||||||
block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
|
block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
|
||||||
block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
|
block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Size ConvolutionImpl::estimateBlockSize(Size result_size)
|
||||||
Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size /*templ_size*/)
|
{
|
||||||
{
|
|
||||||
int width = (result_size.width + 2) / 3;
|
int width = (result_size.width + 2) / 3;
|
||||||
int height = (result_size.height + 2) / 3;
|
int height = (result_size.height + 2) / 3;
|
||||||
width = std::min(width, result_size.width);
|
width = std::min(width, result_size.width);
|
||||||
height = std::min(height, result_size.height);
|
height = std::min(height, result_size.height);
|
||||||
return Size(width, height);
|
return Size(width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ConvolutionImpl::convolve(InputArray _image, InputArray _templ, OutputArray _result, bool ccorr, Stream& _stream)
|
||||||
|
{
|
||||||
|
GpuMat image = _image.getGpuMat();
|
||||||
|
GpuMat templ = _templ.getGpuMat();
|
||||||
|
|
||||||
void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr)
|
CV_Assert( image.type() == CV_32FC1 );
|
||||||
{
|
CV_Assert( templ.type() == CV_32FC1 );
|
||||||
ConvolveBuf buf;
|
|
||||||
gpu::convolve(image, templ, result, ccorr, buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream)
|
create(image.size(), templ.size());
|
||||||
{
|
|
||||||
#ifndef HAVE_CUFFT
|
|
||||||
(void) image;
|
|
||||||
(void) templ;
|
|
||||||
(void) result;
|
|
||||||
(void) ccorr;
|
|
||||||
(void) buf;
|
|
||||||
(void) stream;
|
|
||||||
throw_no_cuda();
|
|
||||||
#else
|
|
||||||
CV_Assert(image.type() == CV_32F);
|
|
||||||
CV_Assert(templ.type() == CV_32F);
|
|
||||||
|
|
||||||
buf.create(image.size(), templ.size());
|
_result.create(result_size, CV_32FC1);
|
||||||
result.create(buf.result_size, CV_32F);
|
GpuMat result = _result.getGpuMat();
|
||||||
|
|
||||||
Size& block_size = buf.block_size;
|
cudaStream_t stream = StreamAccessor::getStream(_stream);
|
||||||
Size& dft_size = buf.dft_size;
|
|
||||||
|
|
||||||
GpuMat& image_block = buf.image_block;
|
|
||||||
GpuMat& templ_block = buf.templ_block;
|
|
||||||
GpuMat& result_data = buf.result_data;
|
|
||||||
|
|
||||||
GpuMat& image_spect = buf.image_spect;
|
|
||||||
GpuMat& templ_spect = buf.templ_spect;
|
|
||||||
GpuMat& result_spect = buf.result_spect;
|
|
||||||
|
|
||||||
cufftHandle planR2C, planC2R;
|
cufftHandle planR2C, planC2R;
|
||||||
cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R));
|
cufftSafeCall( cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R) );
|
||||||
cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C));
|
cufftSafeCall( cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C) );
|
||||||
|
|
||||||
cufftSafeCall( cufftSetStream(planR2C, StreamAccessor::getStream(stream)) );
|
cufftSafeCall( cufftSetStream(planR2C, stream) );
|
||||||
cufftSafeCall( cufftSetStream(planC2R, StreamAccessor::getStream(stream)) );
|
cufftSafeCall( cufftSetStream(planC2R, stream) );
|
||||||
|
|
||||||
GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step);
|
GpuMat templ_roi(templ.size(), CV_32FC1, templ.data, templ.step);
|
||||||
gpu::copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
|
gpu::copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
|
||||||
templ_block.cols - templ_roi.cols, 0, Scalar(), stream);
|
templ_block.cols - templ_roi.cols, 0, Scalar(), _stream);
|
||||||
|
|
||||||
cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(),
|
cufftSafeCall( cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(), templ_spect.ptr<cufftComplex>()) );
|
||||||
templ_spect.ptr<cufftComplex>()));
|
|
||||||
|
|
||||||
// Process all blocks of the result matrix
|
// Process all blocks of the result matrix
|
||||||
for (int y = 0; y < result.rows; y += block_size.height)
|
for (int y = 0; y < result.rows; y += block_size.height)
|
||||||
@ -594,12 +595,12 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
|
|||||||
GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x),
|
GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x),
|
||||||
image.step);
|
image.step);
|
||||||
gpu::copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,
|
gpu::copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,
|
||||||
0, image_block.cols - image_roi.cols, 0, Scalar(), stream);
|
0, image_block.cols - image_roi.cols, 0, Scalar(), _stream);
|
||||||
|
|
||||||
cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr<cufftReal>(),
|
cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr<cufftReal>(),
|
||||||
image_spect.ptr<cufftComplex>()));
|
image_spect.ptr<cufftComplex>()));
|
||||||
gpu::mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0,
|
gpu::mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0,
|
||||||
1.f / dft_size.area(), ccorr, stream);
|
1.f / dft_size.area(), ccorr, _stream);
|
||||||
cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(),
|
cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(),
|
||||||
result_data.ptr<cufftReal>()));
|
result_data.ptr<cufftReal>()));
|
||||||
|
|
||||||
@ -610,12 +611,25 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
|
|||||||
GpuMat result_block(result_roi_size, result_data.type(),
|
GpuMat result_block(result_roi_size, result_data.type(),
|
||||||
result_data.ptr(), result_data.step);
|
result_data.ptr(), result_data.step);
|
||||||
|
|
||||||
result_block.copyTo(result_roi, stream);
|
result_block.copyTo(result_roi, _stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cufftSafeCall(cufftDestroy(planR2C));
|
cufftSafeCall( cufftDestroy(planR2C) );
|
||||||
cufftSafeCall(cufftDestroy(planC2R));
|
cufftSafeCall( cufftDestroy(planC2R) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
Ptr<Convolution> cv::gpu::createConvolution(Size user_block_size)
|
||||||
|
{
|
||||||
|
#ifndef HAVE_CUBLAS
|
||||||
|
(void) user_block_size;
|
||||||
|
CV_Error(cv::Error::StsNotImplemented, "The library was build without CUFFT");
|
||||||
|
return Ptr<BLAS>();
|
||||||
|
#else
|
||||||
|
return new ConvolutionImpl(user_block_size);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -419,8 +419,10 @@ GPU_TEST_P(Convolve, Accuracy)
|
|||||||
cv::Mat src = randomMat(size, CV_32FC1, 0.0, 100.0);
|
cv::Mat src = randomMat(size, CV_32FC1, 0.0, 100.0);
|
||||||
cv::Mat kernel = randomMat(cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0);
|
cv::Mat kernel = randomMat(cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0);
|
||||||
|
|
||||||
|
cv::Ptr<cv::gpu::Convolution> conv = cv::gpu::createConvolution();
|
||||||
|
|
||||||
cv::gpu::GpuMat dst;
|
cv::gpu::GpuMat dst;
|
||||||
cv::gpu::convolve(loadMat(src), loadMat(kernel), dst, ccorr);
|
conv->convolve(loadMat(src), loadMat(kernel), dst, ccorr);
|
||||||
|
|
||||||
cv::Mat dst_gold;
|
cv::Mat dst_gold;
|
||||||
convolveDFT(src, kernel, dst_gold, ccorr);
|
convolveDFT(src, kernel, dst_gold, ccorr);
|
||||||
|
@ -172,15 +172,16 @@ namespace
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
gpu::ConvolveBuf convolve_buf;
|
Ptr<gpu::Convolution> conv = gpu::createConvolution(buf.user_block_size);
|
||||||
convolve_buf.user_block_size = buf.user_block_size;
|
|
||||||
|
|
||||||
if (image.channels() == 1)
|
if (image.channels() == 1)
|
||||||
gpu::convolve(image.reshape(1), templ.reshape(1), result, true, convolve_buf, stream);
|
{
|
||||||
|
conv->convolve(image.reshape(1), templ.reshape(1), result, true, stream);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
GpuMat result_;
|
GpuMat result_;
|
||||||
gpu::convolve(image.reshape(1), templ.reshape(1), result_, true, convolve_buf, stream);
|
conv->convolve(image.reshape(1), templ.reshape(1), result_, true, stream);
|
||||||
extractFirstChannel_32F(result_, result, image.channels(), StreamAccessor::getStream(stream));
|
extractFirstChannel_32F(result_, result, image.channels(), StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user