refactor CUDA part of photo module

This commit is contained in:
Vladislav Vinogradov 2015-01-15 16:45:35 +03:00
parent df57d038b8
commit f48befc6f0
4 changed files with 93 additions and 84 deletions

View File

@ -64,64 +64,66 @@ BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supporte
@sa @sa
fastNlMeansDenoising fastNlMeansDenoising
*/ */
CV_EXPORTS void nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, int borderMode = BORDER_DEFAULT, Stream& s = Stream::Null()); CV_EXPORTS void nonLocalMeans(InputArray src, OutputArray dst,
float h,
int search_window = 21,
int block_size = 7,
int borderMode = BORDER_DEFAULT,
Stream& stream = Stream::Null());
/** @brief The class implements fast approximate Non Local Means Denoising algorithm. /** @brief Perform image denoising using Non-local Means Denoising algorithm
*/ <http://www.ipol.im/pub/algo/bcm_non_local_means_denoising> with several computational
class CV_EXPORTS FastNonLocalMeansDenoising optimizations. Noise expected to be a gaussian white noise
{
public:
/** @brief Perform image denoising using Non-local Means Denoising algorithm
<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising> with several computational
optimizations. Noise expected to be a gaussian white noise
@param src Input 8-bit 1-channel, 2-channel or 3-channel image. @param src Input 8-bit 1-channel, 2-channel or 3-channel image.
@param dst Output image with the same size and type as src . @param dst Output image with the same size and type as src .
@param h Parameter regulating filter strength. Big h value perfectly removes noise but also @param h Parameter regulating filter strength. Big h value perfectly removes noise but also
removes image details, smaller h value preserves details but also preserves some noise removes image details, smaller h value preserves details but also preserves some noise
@param search_window Size in pixels of the window that is used to compute weighted average for @param search_window Size in pixels of the window that is used to compute weighted average for
given pixel. Should be odd. Affect performance linearly: greater search_window - greater given pixel. Should be odd. Affect performance linearly: greater search_window - greater
denoising time. Recommended value 21 pixels denoising time. Recommended value 21 pixels
@param block_size Size in pixels of the template patch that is used to compute weights. Should be @param block_size Size in pixels of the template patch that is used to compute weights. Should be
odd. Recommended value 7 pixels odd. Recommended value 7 pixels
@param s Stream for the asynchronous invocations. @param s Stream for the asynchronous invocations.
This function expected to be applied to grayscale images. For colored images look at This function expected to be applied to grayscale images. For colored images look at
FastNonLocalMeansDenoising::labMethod. FastNonLocalMeansDenoising::labMethod.
@sa @sa
fastNlMeansDenoising fastNlMeansDenoising
*/ */
void simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null()); CV_EXPORTS void fastNlMeansDenoising(InputArray src, OutputArray dst,
float h,
int search_window = 21,
int block_size = 7,
Stream& stream = Stream::Null());
/** @brief Modification of FastNonLocalMeansDenoising::simpleMethod for color images /** @brief Modification of fastNlMeansDenoising function for colored images
@param src Input 8-bit 3-channel image. @param src Input 8-bit 3-channel image.
@param dst Output image with the same size and type as src . @param dst Output image with the same size and type as src .
@param h_luminance Parameter regulating filter strength. Big h value perfectly removes noise but @param h_luminance Parameter regulating filter strength. Big h value perfectly removes noise but
also removes image details, smaller h value preserves details but also preserves some noise also removes image details, smaller h value preserves details but also preserves some noise
@param photo_render float The same as h but for color components. For most images value equals 10 will be @param photo_render float The same as h but for color components. For most images value equals 10 will be
enought to remove colored noise and do not distort colors enought to remove colored noise and do not distort colors
@param search_window Size in pixels of the window that is used to compute weighted average for @param search_window Size in pixels of the window that is used to compute weighted average for
given pixel. Should be odd. Affect performance linearly: greater search_window - greater given pixel. Should be odd. Affect performance linearly: greater search_window - greater
denoising time. Recommended value 21 pixels denoising time. Recommended value 21 pixels
@param block_size Size in pixels of the template patch that is used to compute weights. Should be @param block_size Size in pixels of the template patch that is used to compute weights. Should be
odd. Recommended value 7 pixels odd. Recommended value 7 pixels
@param s Stream for the asynchronous invocations. @param s Stream for the asynchronous invocations.
The function converts image to CIELAB colorspace and then separately denoise L and AB components The function converts image to CIELAB colorspace and then separately denoise L and AB components
with given h parameters using FastNonLocalMeansDenoising::simpleMethod function. with given h parameters using FastNonLocalMeansDenoising::simpleMethod function.
@sa @sa
fastNlMeansDenoisingColored fastNlMeansDenoisingColored
*/ */
void labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float photo_render, int search_window = 21, int block_size = 7, Stream& s = Stream::Null()); CV_EXPORTS void fastNlMeansDenoisingColored(InputArray src, OutputArray dst,
float h_luminance, float photo_render,
private: int search_window = 21,
int block_size = 7,
GpuMat buffer, extended_src_buffer; Stream& stream = Stream::Null());
GpuMat lab, l, ab;
};
//! @} photo //! @} photo

View File

@ -126,12 +126,10 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, CUDA_FastNonLocalMeans,
if (PERF_RUN_CUDA()) if (PERF_RUN_CUDA())
{ {
cv::cuda::FastNonLocalMeansDenoising fnlmd;
const cv::cuda::GpuMat d_src(src); const cv::cuda::GpuMat d_src(src);
cv::cuda::GpuMat dst; cv::cuda::GpuMat dst;
TEST_CYCLE() fnlmd.simpleMethod(d_src, dst, h, search_widow_size, block_size); TEST_CYCLE() cv::cuda::fastNlMeansDenoising(d_src, dst, h, search_widow_size, block_size);
CUDA_SANITY_CHECK(dst); CUDA_SANITY_CHECK(dst);
} }
@ -171,12 +169,10 @@ PERF_TEST_P(Sz_Depth_WinSz_BlockSz, CUDA_FastNonLocalMeansColored,
if (PERF_RUN_CUDA()) if (PERF_RUN_CUDA())
{ {
cv::cuda::FastNonLocalMeansDenoising fnlmd;
const cv::cuda::GpuMat d_src(src); const cv::cuda::GpuMat d_src(src);
cv::cuda::GpuMat dst; cv::cuda::GpuMat dst;
TEST_CYCLE() fnlmd.labMethod(d_src, dst, h, h, search_widow_size, block_size); TEST_CYCLE() cv::cuda::fastNlMeansDenoisingColored(d_src, dst, h, h, search_widow_size, block_size);
CUDA_SANITY_CHECK(dst); CUDA_SANITY_CHECK(dst);
} }

View File

@ -60,9 +60,9 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || !defined(HAVE_OPENCV_CUDAARITHM) || !defined(HAVE_OPENCV_CUDAIMGPROC) #if !defined (HAVE_CUDA) || !defined(HAVE_OPENCV_CUDAARITHM) || !defined(HAVE_OPENCV_CUDAIMGPROC)
void cv::cuda::nonLocalMeans(const GpuMat&, GpuMat&, float, int, int, int, Stream&) { throw_no_cuda(); } void cv::cuda::nonLocalMeans(InputArray, OutputArray, float, int, int, int, Stream&) { throw_no_cuda(); }
void cv::cuda::FastNonLocalMeansDenoising::simpleMethod(const GpuMat&, GpuMat&, float, int, int, Stream&) { throw_no_cuda(); } void cv::cuda::fastNlMeansDenoising(InputArray, OutputArray, float, int, int, Stream&) { throw_no_cuda(); }
void cv::cuda::FastNonLocalMeansDenoising::labMethod( const GpuMat&, GpuMat&, float, float, int, int, Stream&) { throw_no_cuda(); } void cv::cuda::fastNlMeansDenoisingColored(InputArray, OutputArray, float, float, int, int, Stream&) { throw_no_cuda(); }
#else #else
@ -78,13 +78,15 @@ namespace cv { namespace cuda { namespace device
} }
}}} }}}
void cv::cuda::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, int borderMode, Stream& s) void cv::cuda::nonLocalMeans(InputArray _src, OutputArray _dst, float h, int search_window, int block_window, int borderMode, Stream& stream)
{ {
using cv::cuda::device::imgproc::nlm_bruteforce_gpu; using cv::cuda::device::imgproc::nlm_bruteforce_gpu;
typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int search_radius, int block_radius, float h, int borderMode, cudaStream_t stream); typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int search_radius, int block_radius, float h, int borderMode, cudaStream_t stream);
static const func_t funcs[4] = { nlm_bruteforce_gpu<uchar>, nlm_bruteforce_gpu<uchar2>, nlm_bruteforce_gpu<uchar3>, 0/*nlm_bruteforce_gpu<uchar4>,*/ }; static const func_t funcs[4] = { nlm_bruteforce_gpu<uchar>, nlm_bruteforce_gpu<uchar2>, nlm_bruteforce_gpu<uchar3>, 0/*nlm_bruteforce_gpu<uchar4>,*/ };
const GpuMat src = _src.getGpuMat();
CV_Assert(src.type() == CV_8U || src.type() == CV_8UC2 || src.type() == CV_8UC3); CV_Assert(src.type() == CV_8U || src.type() == CV_8UC2 || src.type() == CV_8UC3);
const func_t func = funcs[src.channels() - 1]; const func_t func = funcs[src.channels() - 1];
@ -93,8 +95,10 @@ void cv::cuda::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search
int b = borderMode; int b = borderMode;
CV_Assert(b == BORDER_REFLECT101 || b == BORDER_REPLICATE || b == BORDER_CONSTANT || b == BORDER_REFLECT || b == BORDER_WRAP); CV_Assert(b == BORDER_REFLECT101 || b == BORDER_REPLICATE || b == BORDER_CONSTANT || b == BORDER_REFLECT || b == BORDER_WRAP);
dst.create(src.size(), src.type()); _dst.create(src.size(), src.type());
func(src, dst, search_window/2, block_window/2, h, borderMode, StreamAccessor::getStream(s)); GpuMat dst = _dst.getGpuMat();
func(src, dst, search_window/2, block_window/2, h, borderMode, StreamAccessor::getStream(stream));
} }
namespace cv { namespace cuda { namespace device namespace cv { namespace cuda { namespace device
@ -112,47 +116,55 @@ namespace cv { namespace cuda { namespace device
} }
}}} }}}
void cv::cuda::FastNonLocalMeansDenoising::simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, Stream& s) void cv::cuda::fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, int search_window, int block_window, Stream& stream)
{ {
const GpuMat src = _src.getGpuMat();
CV_Assert(src.depth() == CV_8U && src.channels() < 4); CV_Assert(src.depth() == CV_8U && src.channels() < 4);
int border_size = search_window/2 + block_window/2; int border_size = search_window/2 + block_window/2;
Size esize = src.size() + Size(border_size, border_size) * 2; Size esize = src.size() + Size(border_size, border_size) * 2;
cv::cuda::ensureSizeIsEnough(esize, CV_8UC3, extended_src_buffer); BufferPool pool(stream);
GpuMat extended_src(esize, src.type(), extended_src_buffer.ptr(), extended_src_buffer.step);
cv::cuda::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), s); GpuMat extended_src = pool.getBuffer(esize, src.type());
cv::cuda::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream);
GpuMat src_hdr = extended_src(Rect(Point2i(border_size, border_size), src.size())); GpuMat src_hdr = extended_src(Rect(Point2i(border_size, border_size), src.size()));
int bcols, brows; int bcols, brows;
device::imgproc::nln_fast_get_buffer_size(src_hdr, search_window, block_window, bcols, brows); device::imgproc::nln_fast_get_buffer_size(src_hdr, search_window, block_window, bcols, brows);
buffer.create(brows, bcols, CV_32S); GpuMat buffer = pool.getBuffer(brows, bcols, CV_32S);
using namespace cv::cuda::device::imgproc; using namespace cv::cuda::device::imgproc;
typedef void (*nlm_fast_t)(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t); typedef void (*nlm_fast_t)(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
static const nlm_fast_t funcs[] = { nlm_fast_gpu<uchar>, nlm_fast_gpu<uchar2>, nlm_fast_gpu<uchar3>, 0}; static const nlm_fast_t funcs[] = { nlm_fast_gpu<uchar>, nlm_fast_gpu<uchar2>, nlm_fast_gpu<uchar3>, 0};
dst.create(src.size(), src.type()); _dst.create(src.size(), src.type());
funcs[src.channels()-1](src_hdr, dst, buffer, search_window, block_window, h, StreamAccessor::getStream(s)); GpuMat dst = _dst.getGpuMat();
funcs[src.channels()-1](src_hdr, dst, buffer, search_window, block_window, h, StreamAccessor::getStream(stream));
} }
void cv::cuda::FastNonLocalMeansDenoising::labMethod( const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window, int block_window, Stream& s) void cv::cuda::fastNlMeansDenoisingColored(InputArray _src, OutputArray _dst, float h_luminance, float h_color, int search_window, int block_window, Stream& stream)
{ {
const GpuMat src = _src.getGpuMat();
CV_Assert(src.type() == CV_8UC3); CV_Assert(src.type() == CV_8UC3);
lab.create(src.size(), src.type()); BufferPool pool(stream);
cv::cuda::cvtColor(src, lab, cv::COLOR_BGR2Lab, 0, s);
l.create(src.size(), CV_8U); GpuMat lab = pool.getBuffer(src.size(), src.type());
ab.create(src.size(), CV_8UC2); cv::cuda::cvtColor(src, lab, cv::COLOR_BGR2Lab, 0, stream);
device::imgproc::fnlm_split_channels(lab, l, ab, StreamAccessor::getStream(s));
simpleMethod(l, l, h_luminance, search_window, block_window, s); GpuMat l = pool.getBuffer(src.size(), CV_8U);
simpleMethod(ab, ab, h_color, search_window, block_window, s); GpuMat ab = pool.getBuffer(src.size(), CV_8UC2);
device::imgproc::fnlm_split_channels(lab, l, ab, StreamAccessor::getStream(stream));
device::imgproc::fnlm_merge_channels(l, ab, lab, StreamAccessor::getStream(s)); fastNlMeansDenoising(l, l, h_luminance, search_window, block_window, stream);
cv::cuda::cvtColor(lab, dst, cv::COLOR_Lab2BGR, 0, s); fastNlMeansDenoising(ab, ab, h_color, search_window, block_window, stream);
device::imgproc::fnlm_merge_channels(l, ab, lab, StreamAccessor::getStream(stream));
cv::cuda::cvtColor(lab, _dst, cv::COLOR_Lab2BGR, 0, stream);
} }
#endif #endif

View File

@ -99,10 +99,9 @@ TEST(CUDA_FastNonLocalMeans, Regression)
cv::cvtColor(bgr, gray, cv::COLOR_BGR2GRAY); cv::cvtColor(bgr, gray, cv::COLOR_BGR2GRAY);
GpuMat dbgr, dgray; GpuMat dbgr, dgray;
cv::cuda::FastNonLocalMeansDenoising fnlmd;
fnlmd.simpleMethod(GpuMat(gray), dgray, 20); cv::cuda::fastNlMeansDenoising(GpuMat(gray), dgray, 20);
fnlmd.labMethod(GpuMat(bgr), dbgr, 20, 10); cv::cuda::fastNlMeansDenoisingColored(GpuMat(bgr), dbgr, 20, 10);
#if 0 #if 0
dumpImage("../gpu/denoising/fnlm_denoised_lena_bgr.png", cv::Mat(dbgr)); dumpImage("../gpu/denoising/fnlm_denoised_lena_bgr.png", cv::Mat(dbgr));