From fc8476544c7404bbc9fea06df6252e4ccf8d0366 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 30 Apr 2013 10:59:13 +0400 Subject: [PATCH] switched to Input/Output Array in Histogram Processing --- .../gpuimgproc/include/opencv2/gpuimgproc.hpp | 97 ++- modules/gpuimgproc/perf/perf_histogram.cpp | 3 +- modules/gpuimgproc/src/histogram.cpp | 651 +++++++++--------- samples/gpu/performance/tests.cpp | 5 +- 4 files changed, 389 insertions(+), 367 deletions(-) diff --git a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp index 9f3dd96f9..f880b6cab 100644 --- a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp +++ b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp @@ -98,46 +98,18 @@ CV_EXPORTS void alphaComp(InputArray img1, InputArray img2, OutputArray dst, int ////////////////////////////// Histogram /////////////////////////////// -//! Compute levels with even distribution. levels will have 1 row and nLevels cols and CV_32SC1 type. -CV_EXPORTS void evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel); - -//! Calculates histogram with evenly distributed bins for signle channel source. -//! Supports CV_8UC1, CV_16UC1 and CV_16SC1 source types. -//! Output hist will have one row and histSize cols and CV_32SC1 type. -CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null()); -CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null()); - -//! Calculates histogram with evenly distributed bins for four-channel source. -//! All channels of source are processed separately. -//! Supports CV_8UC4, CV_16UC4 and CV_16SC4 source types. -//! Output hist[i] will have one row and histSize[i] cols and CV_32SC1 type. -CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null()); -CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null()); - -//! Calculates histogram with bins determined by levels array. -//! levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise. -//! Supports CV_8UC1, CV_16UC1, CV_16SC1 and CV_32FC1 source types. -//! Output hist will have one row and (levels.cols-1) cols and CV_32SC1 type. -CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream = Stream::Null()); -CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream = Stream::Null()); - -//! Calculates histogram with bins determined by levels array. -//! All levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise. -//! All channels of source are processed separately. -//! Supports CV_8UC4, CV_16UC4, CV_16SC4 and CV_32FC4 source types. -//! Output hist[i] will have one row and (levels[i].cols-1) cols and CV_32SC1 type. -CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null()); -CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, Stream& stream = Stream::Null()); - //! Calculates histogram for 8u one channel image //! Output hist will have one row, 256 cols and CV32SC1 type. -CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, Stream& stream = Stream::Null()); -CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null()); +CV_EXPORTS void calcHist(InputArray src, OutputArray hist, Stream& stream = Stream::Null()); //! normalizes the grayscale image brightness and contrast by normalizing its histogram -CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); -CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, Stream& stream = Stream::Null()); -CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null()); +CV_EXPORTS void equalizeHist(InputArray src, OutputArray dst, InputOutputArray buf, Stream& stream = Stream::Null()); + +static inline void equalizeHist(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) +{ + GpuMat buf; + gpu::equalizeHist(src, dst, buf, stream); +} class CV_EXPORTS CLAHE : public cv::CLAHE { @@ -145,7 +117,58 @@ public: using cv::CLAHE::apply; virtual void apply(InputArray src, OutputArray dst, Stream& stream) = 0; }; -CV_EXPORTS Ptr createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8)); +CV_EXPORTS Ptr createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8)); + +//! Compute levels with even distribution. levels will have 1 row and nLevels cols and CV_32SC1 type. +CV_EXPORTS void evenLevels(OutputArray levels, int nLevels, int lowerLevel, int upperLevel); + +//! Calculates histogram with evenly distributed bins for signle channel source. +//! Supports CV_8UC1, CV_16UC1 and CV_16SC1 source types. +//! Output hist will have one row and histSize cols and CV_32SC1 type. +CV_EXPORTS void histEven(InputArray src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null()); + +static inline void histEven(InputArray src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null()) +{ + GpuMat buf; + gpu::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream); +} + +//! Calculates histogram with evenly distributed bins for four-channel source. +//! All channels of source are processed separately. +//! Supports CV_8UC4, CV_16UC4 and CV_16SC4 source types. +//! Output hist[i] will have one row and histSize[i] cols and CV_32SC1 type. +CV_EXPORTS void histEven(InputArray src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null()); + +static inline void histEven(InputArray src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null()) +{ + GpuMat buf; + gpu::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream); +} + +//! Calculates histogram with bins determined by levels array. +//! levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise. +//! Supports CV_8UC1, CV_16UC1, CV_16SC1 and CV_32FC1 source types. +//! Output hist will have one row and (levels.cols-1) cols and CV_32SC1 type. +CV_EXPORTS void histRange(InputArray src, OutputArray hist, InputArray levels, InputOutputArray buf, Stream& stream = Stream::Null()); + +static inline void histRange(InputArray src, OutputArray hist, InputArray levels, Stream& stream = Stream::Null()) +{ + GpuMat buf; + gpu::histRange(src, hist, levels, buf, stream); +} + +//! Calculates histogram with bins determined by levels array. +//! All levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise. +//! All channels of source are processed separately. +//! Supports CV_8UC4, CV_16UC4, CV_16SC4 and CV_32FC4 source types. +//! Output hist[i] will have one row and (levels[i].cols-1) cols and CV_32SC1 type. +CV_EXPORTS void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream = Stream::Null()); + +static inline void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null()) +{ + GpuMat buf; + gpu::histRange(src, hist, levels, buf, stream); +} //////////////////////////////// Canny //////////////////////////////// diff --git a/modules/gpuimgproc/perf/perf_histogram.cpp b/modules/gpuimgproc/perf/perf_histogram.cpp index 51f7416f9..d8def54ff 100644 --- a/modules/gpuimgproc/perf/perf_histogram.cpp +++ b/modules/gpuimgproc/perf/perf_histogram.cpp @@ -167,10 +167,9 @@ PERF_TEST_P(Sz, EqualizeHist, { const cv::gpu::GpuMat d_src(src); cv::gpu::GpuMat dst; - cv::gpu::GpuMat d_hist; cv::gpu::GpuMat d_buf; - TEST_CYCLE() cv::gpu::equalizeHist(d_src, dst, d_hist, d_buf); + TEST_CYCLE() cv::gpu::equalizeHist(d_src, dst, d_buf); GPU_SANITY_CHECK(dst); } diff --git a/modules/gpuimgproc/src/histogram.cpp b/modules/gpuimgproc/src/histogram.cpp index 3227dac6c..e54b33727 100644 --- a/modules/gpuimgproc/src/histogram.cpp +++ b/modules/gpuimgproc/src/histogram.cpp @@ -47,319 +47,22 @@ using namespace cv::gpu; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) -void cv::gpu::evenLevels(GpuMat&, int, int, int) { throw_no_cuda(); } +void cv::gpu::calcHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::histEven(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); } -void cv::gpu::histEven(const GpuMat&, GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); } -void cv::gpu::histEven(const GpuMat&, GpuMat*, int*, int*, int*, Stream&) { throw_no_cuda(); } -void cv::gpu::histEven(const GpuMat&, GpuMat*, GpuMat&, int*, int*, int*, Stream&) { throw_no_cuda(); } - -void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*, Stream&) { throw_no_cuda(); } -void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*, GpuMat&, Stream&) { throw_no_cuda(); } - -void cv::gpu::calcHist(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } - -void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::equalizeHist(InputArray, OutputArray, InputOutputArray, Stream&) { throw_no_cuda(); } cv::Ptr cv::gpu::createCLAHE(double, cv::Size) { throw_no_cuda(); return cv::Ptr(); } +void cv::gpu::evenLevels(OutputArray, int, int, int) { throw_no_cuda(); } + +void cv::gpu::histEven(InputArray, OutputArray, InputOutputArray, int, int, int, Stream&) { throw_no_cuda(); } +void cv::gpu::histEven(InputArray, GpuMat*, InputOutputArray, int*, int*, int*, Stream&) { throw_no_cuda(); } + +void cv::gpu::histRange(InputArray, OutputArray, InputArray, InputOutputArray, Stream&) { throw_no_cuda(); } +void cv::gpu::histRange(InputArray, GpuMat*, const GpuMat*, InputOutputArray, Stream&) { throw_no_cuda(); } + #else /* !defined (HAVE_CUDA) */ -//////////////////////////////////////////////////////////////////////// -// NPP Histogram - -namespace -{ - typedef NppStatus (*get_buf_size_c1_t)(NppiSize oSizeROI, int nLevels, int* hpBufferSize); - typedef NppStatus (*get_buf_size_c4_t)(NppiSize oSizeROI, int nLevels[], int* hpBufferSize); - - template struct NppHistogramEvenFuncC1 - { - typedef typename NPPTypeTraits::npp_type src_t; - - typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s * pHist, - int nLevels, Npp32s nLowerLevel, Npp32s nUpperLevel, Npp8u * pBuffer); - }; - template struct NppHistogramEvenFuncC4 - { - typedef typename NPPTypeTraits::npp_type src_t; - - typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, - Npp32s * pHist[4], int nLevels[4], Npp32s nLowerLevel[4], Npp32s nUpperLevel[4], Npp8u * pBuffer); - }; - - template::func_ptr func, get_buf_size_c1_t get_buf_size> - struct NppHistogramEvenC1 - { - typedef typename NppHistogramEvenFuncC1::src_t src_t; - - static void hist(const GpuMat& src, GpuMat& hist, GpuMat& buffer, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream) - { - int levels = histSize + 1; - hist.create(1, histSize, CV_32S); - - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - int buf_size; - get_buf_size(sz, levels, &buf_size); - - ensureSizeIsEnough(1, buf_size, CV_8U, buffer); - - NppStreamHandler h(stream); - - nppSafeCall( func(src.ptr(), static_cast(src.step), sz, hist.ptr(), levels, - lowerLevel, upperLevel, buffer.ptr()) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - template::func_ptr func, get_buf_size_c4_t get_buf_size> - struct NppHistogramEvenC4 - { - typedef typename NppHistogramEvenFuncC4::src_t src_t; - - static void hist(const GpuMat& src, GpuMat hist[4], GpuMat& buffer, int histSize[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream) - { - int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1}; - hist[0].create(1, histSize[0], CV_32S); - hist[1].create(1, histSize[1], CV_32S); - hist[2].create(1, histSize[2], CV_32S); - hist[3].create(1, histSize[3], CV_32S); - - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - Npp32s* pHist[] = {hist[0].ptr(), hist[1].ptr(), hist[2].ptr(), hist[3].ptr()}; - - int buf_size; - get_buf_size(sz, levels, &buf_size); - - ensureSizeIsEnough(1, buf_size, CV_8U, buffer); - - NppStreamHandler h(stream); - - nppSafeCall( func(src.ptr(), static_cast(src.step), sz, pHist, levels, lowerLevel, upperLevel, buffer.ptr()) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - - template struct NppHistogramRangeFuncC1 - { - typedef typename NPPTypeTraits::npp_type src_t; - typedef Npp32s level_t; - enum {LEVEL_TYPE_CODE=CV_32SC1}; - - typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist, - const Npp32s* pLevels, int nLevels, Npp8u* pBuffer); - }; - template<> struct NppHistogramRangeFuncC1 - { - typedef Npp32f src_t; - typedef Npp32f level_t; - enum {LEVEL_TYPE_CODE=CV_32FC1}; - - typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist, - const Npp32f* pLevels, int nLevels, Npp8u* pBuffer); - }; - template struct NppHistogramRangeFuncC4 - { - typedef typename NPPTypeTraits::npp_type src_t; - typedef Npp32s level_t; - enum {LEVEL_TYPE_CODE=CV_32SC1}; - - typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4], - const Npp32s* pLevels[4], int nLevels[4], Npp8u* pBuffer); - }; - template<> struct NppHistogramRangeFuncC4 - { - typedef Npp32f src_t; - typedef Npp32f level_t; - enum {LEVEL_TYPE_CODE=CV_32FC1}; - - typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4], - const Npp32f* pLevels[4], int nLevels[4], Npp8u* pBuffer); - }; - - template::func_ptr func, get_buf_size_c1_t get_buf_size> - struct NppHistogramRangeC1 - { - typedef typename NppHistogramRangeFuncC1::src_t src_t; - typedef typename NppHistogramRangeFuncC1::level_t level_t; - enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1::LEVEL_TYPE_CODE}; - - static void hist(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buffer, cudaStream_t stream) - { - CV_Assert(levels.type() == LEVEL_TYPE_CODE && levels.rows == 1); - - hist.create(1, levels.cols - 1, CV_32S); - - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - int buf_size; - get_buf_size(sz, levels.cols, &buf_size); - - ensureSizeIsEnough(1, buf_size, CV_8U, buffer); - - NppStreamHandler h(stream); - - nppSafeCall( func(src.ptr(), static_cast(src.step), sz, hist.ptr(), levels.ptr(), levels.cols, buffer.ptr()) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - template::func_ptr func, get_buf_size_c4_t get_buf_size> - struct NppHistogramRangeC4 - { - typedef typename NppHistogramRangeFuncC4::src_t src_t; - typedef typename NppHistogramRangeFuncC1::level_t level_t; - enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1::LEVEL_TYPE_CODE}; - - static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buffer, cudaStream_t stream) - { - CV_Assert(levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1); - CV_Assert(levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1); - CV_Assert(levels[2].type() == LEVEL_TYPE_CODE && levels[2].rows == 1); - CV_Assert(levels[3].type() == LEVEL_TYPE_CODE && levels[3].rows == 1); - - hist[0].create(1, levels[0].cols - 1, CV_32S); - hist[1].create(1, levels[1].cols - 1, CV_32S); - hist[2].create(1, levels[2].cols - 1, CV_32S); - hist[3].create(1, levels[3].cols - 1, CV_32S); - - Npp32s* pHist[] = {hist[0].ptr(), hist[1].ptr(), hist[2].ptr(), hist[3].ptr()}; - int nLevels[] = {levels[0].cols, levels[1].cols, levels[2].cols, levels[3].cols}; - const level_t* pLevels[] = {levels[0].ptr(), levels[1].ptr(), levels[2].ptr(), levels[3].ptr()}; - - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - int buf_size; - get_buf_size(sz, nLevels, &buf_size); - - ensureSizeIsEnough(1, buf_size, CV_8U, buffer); - - NppStreamHandler h(stream); - - nppSafeCall( func(src.ptr(), static_cast(src.step), sz, pHist, pLevels, nLevels, buffer.ptr()) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; -} - -void cv::gpu::evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel) -{ - Mat host_levels(1, nLevels, CV_32SC1); - nppSafeCall( nppiEvenLevelsHost_32s(host_levels.ptr(), nLevels, lowerLevel, upperLevel) ); - levels.upload(host_levels); -} - -void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream) -{ - GpuMat buf; - histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream); -} - -void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream) -{ - CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 ); - - typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, GpuMat& buf, int levels, int lowerLevel, int upperLevel, cudaStream_t stream); - static const hist_t hist_callers[] = - { - NppHistogramEvenC1::hist, - 0, - NppHistogramEvenC1::hist, - NppHistogramEvenC1::hist - }; - - hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream)); -} - -void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream) -{ - GpuMat buf; - histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream); -} - -void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream) -{ - CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 ); - - typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int levels[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream); - static const hist_t hist_callers[] = - { - NppHistogramEvenC4::hist, - 0, - NppHistogramEvenC4::hist, - NppHistogramEvenC4::hist - }; - - hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream)); -} - -void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream) -{ - GpuMat buf; - histRange(src, hist, levels, buf, stream); -} - -void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream) -{ - CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1); - - typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, cudaStream_t stream); - static const hist_t hist_callers[] = - { - NppHistogramRangeC1::hist, - 0, - NppHistogramRangeC1::hist, - NppHistogramRangeC1::hist, - 0, - NppHistogramRangeC1::hist - }; - - hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream)); -} - -void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream) -{ - GpuMat buf; - histRange(src, hist, levels, buf, stream); -} - -void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, Stream& stream) -{ - CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4); - - typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, cudaStream_t stream); - static const hist_t hist_callers[] = - { - NppHistogramRangeC4::hist, - 0, - NppHistogramRangeC4::hist, - NppHistogramRangeC4::hist, - 0, - NppHistogramRangeC4::hist - }; - - hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream)); -} - //////////////////////////////////////////////////////////////////////// // calcHist @@ -368,12 +71,16 @@ namespace hist void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream); } -void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream) +void cv::gpu::calcHist(InputArray _src, OutputArray _hist, Stream& stream) { - CV_Assert(src.type() == CV_8UC1); + GpuMat src = _src.getGpuMat(); - hist.create(1, 256, CV_32SC1); - hist.setTo(Scalar::all(0)); + CV_Assert( src.type() == CV_8UC1 ); + + _hist.create(1, 256, CV_32SC1); + GpuMat hist = _hist.getGpuMat(); + + hist.setTo(Scalar::all(0), stream); hist::histogram256(src, hist.ptr(), StreamAccessor::getStream(stream)); } @@ -386,31 +93,30 @@ namespace hist void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream); } -void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream) +void cv::gpu::equalizeHist(InputArray _src, OutputArray _dst, InputOutputArray _buf, Stream& _stream) { - GpuMat hist; - GpuMat buf; - equalizeHist(src, dst, hist, buf, stream); -} + GpuMat src = _src.getGpuMat(); -void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& s) -{ - CV_Assert(src.type() == CV_8UC1); + CV_Assert( src.type() == CV_8UC1 ); - dst.create(src.size(), src.type()); + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); int intBufSize; nppSafeCall( nppsIntegralGetBufferSize_32s(256, &intBufSize) ); - ensureSizeIsEnough(1, intBufSize + 256 * sizeof(int), CV_8UC1, buf); + size_t bufSize = intBufSize + 2 * 256 * sizeof(int); - GpuMat intBuf(1, intBufSize, CV_8UC1, buf.ptr()); - GpuMat lut(1, 256, CV_32S, buf.ptr() + intBufSize); + ensureSizeIsEnough(1, static_cast(bufSize), CV_8UC1, _buf); + GpuMat buf = _buf.getGpuMat(); - calcHist(src, hist, s); + GpuMat hist(1, 256, CV_32SC1, buf.data); + GpuMat lut(1, 256, CV_32SC1, buf.data + 256 * sizeof(int)); + GpuMat intBuf(1, intBufSize, CV_8UC1, buf.data + 2 * 256 * sizeof(int)); - cudaStream_t stream = StreamAccessor::getStream(s); + gpu::calcHist(src, hist, _stream); + cudaStream_t stream = StreamAccessor::getStream(_stream); NppStreamHandler h(stream); nppSafeCall( nppsIntegral_32s(hist.ptr(), lut.ptr(), 256, intBuf.ptr()) ); @@ -554,4 +260,299 @@ cv::Ptr cv::gpu::createCLAHE(double clipLimit, cv::Size tileGrid return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height); } +//////////////////////////////////////////////////////////////////////// +// NPP Histogram + +namespace +{ + typedef NppStatus (*get_buf_size_c1_t)(NppiSize oSizeROI, int nLevels, int* hpBufferSize); + typedef NppStatus (*get_buf_size_c4_t)(NppiSize oSizeROI, int nLevels[], int* hpBufferSize); + + template struct NppHistogramEvenFuncC1 + { + typedef typename NPPTypeTraits::npp_type src_t; + + typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s * pHist, + int nLevels, Npp32s nLowerLevel, Npp32s nUpperLevel, Npp8u * pBuffer); + }; + template struct NppHistogramEvenFuncC4 + { + typedef typename NPPTypeTraits::npp_type src_t; + + typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, + Npp32s * pHist[4], int nLevels[4], Npp32s nLowerLevel[4], Npp32s nUpperLevel[4], Npp8u * pBuffer); + }; + + template::func_ptr func, get_buf_size_c1_t get_buf_size> + struct NppHistogramEvenC1 + { + typedef typename NppHistogramEvenFuncC1::src_t src_t; + + static void hist(const GpuMat& src, OutputArray _hist, InputOutputArray _buf, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream) + { + const int levels = histSize + 1; + + _hist.create(1, histSize, CV_32S); + GpuMat hist = _hist.getGpuMat(); + + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + int buf_size; + get_buf_size(sz, levels, &buf_size); + + ensureSizeIsEnough(1, buf_size, CV_8UC1, _buf); + GpuMat buf = _buf.getGpuMat(); + + NppStreamHandler h(stream); + + nppSafeCall( func(src.ptr(), static_cast(src.step), sz, hist.ptr(), levels, + lowerLevel, upperLevel, buf.ptr()) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + template::func_ptr func, get_buf_size_c4_t get_buf_size> + struct NppHistogramEvenC4 + { + typedef typename NppHistogramEvenFuncC4::src_t src_t; + + static void hist(const GpuMat& src, GpuMat hist[4],InputOutputArray _buf, int histSize[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream) + { + int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1}; + hist[0].create(1, histSize[0], CV_32S); + hist[1].create(1, histSize[1], CV_32S); + hist[2].create(1, histSize[2], CV_32S); + hist[3].create(1, histSize[3], CV_32S); + + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + Npp32s* pHist[] = {hist[0].ptr(), hist[1].ptr(), hist[2].ptr(), hist[3].ptr()}; + + int buf_size; + get_buf_size(sz, levels, &buf_size); + + ensureSizeIsEnough(1, buf_size, CV_8U, _buf); + GpuMat buf = _buf.getGpuMat(); + + NppStreamHandler h(stream); + + nppSafeCall( func(src.ptr(), static_cast(src.step), sz, pHist, levels, lowerLevel, upperLevel, buf.ptr()) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + + template struct NppHistogramRangeFuncC1 + { + typedef typename NPPTypeTraits::npp_type src_t; + typedef Npp32s level_t; + enum {LEVEL_TYPE_CODE=CV_32SC1}; + + typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist, + const Npp32s* pLevels, int nLevels, Npp8u* pBuffer); + }; + template<> struct NppHistogramRangeFuncC1 + { + typedef Npp32f src_t; + typedef Npp32f level_t; + enum {LEVEL_TYPE_CODE=CV_32FC1}; + + typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist, + const Npp32f* pLevels, int nLevels, Npp8u* pBuffer); + }; + template struct NppHistogramRangeFuncC4 + { + typedef typename NPPTypeTraits::npp_type src_t; + typedef Npp32s level_t; + enum {LEVEL_TYPE_CODE=CV_32SC1}; + + typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4], + const Npp32s* pLevels[4], int nLevels[4], Npp8u* pBuffer); + }; + template<> struct NppHistogramRangeFuncC4 + { + typedef Npp32f src_t; + typedef Npp32f level_t; + enum {LEVEL_TYPE_CODE=CV_32FC1}; + + typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4], + const Npp32f* pLevels[4], int nLevels[4], Npp8u* pBuffer); + }; + + template::func_ptr func, get_buf_size_c1_t get_buf_size> + struct NppHistogramRangeC1 + { + typedef typename NppHistogramRangeFuncC1::src_t src_t; + typedef typename NppHistogramRangeFuncC1::level_t level_t; + enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1::LEVEL_TYPE_CODE}; + + static void hist(const GpuMat& src, OutputArray _hist, const GpuMat& levels, InputOutputArray _buf, cudaStream_t stream) + { + CV_Assert( levels.type() == LEVEL_TYPE_CODE && levels.rows == 1 ); + + _hist.create(1, levels.cols - 1, CV_32S); + GpuMat hist = _hist.getGpuMat(); + + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + int buf_size; + get_buf_size(sz, levels.cols, &buf_size); + + ensureSizeIsEnough(1, buf_size, CV_8U, _buf); + GpuMat buf = _buf.getGpuMat(); + + NppStreamHandler h(stream); + + nppSafeCall( func(src.ptr(), static_cast(src.step), sz, hist.ptr(), levels.ptr(), levels.cols, buf.ptr()) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + template::func_ptr func, get_buf_size_c4_t get_buf_size> + struct NppHistogramRangeC4 + { + typedef typename NppHistogramRangeFuncC4::src_t src_t; + typedef typename NppHistogramRangeFuncC1::level_t level_t; + enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1::LEVEL_TYPE_CODE}; + + static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4],InputOutputArray _buf, cudaStream_t stream) + { + CV_Assert( levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1 ); + CV_Assert( levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1 ); + CV_Assert( levels[2].type() == LEVEL_TYPE_CODE && levels[2].rows == 1 ); + CV_Assert( levels[3].type() == LEVEL_TYPE_CODE && levels[3].rows == 1 ); + + hist[0].create(1, levels[0].cols - 1, CV_32S); + hist[1].create(1, levels[1].cols - 1, CV_32S); + hist[2].create(1, levels[2].cols - 1, CV_32S); + hist[3].create(1, levels[3].cols - 1, CV_32S); + + Npp32s* pHist[] = {hist[0].ptr(), hist[1].ptr(), hist[2].ptr(), hist[3].ptr()}; + int nLevels[] = {levels[0].cols, levels[1].cols, levels[2].cols, levels[3].cols}; + const level_t* pLevels[] = {levels[0].ptr(), levels[1].ptr(), levels[2].ptr(), levels[3].ptr()}; + + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + int buf_size; + get_buf_size(sz, nLevels, &buf_size); + + ensureSizeIsEnough(1, buf_size, CV_8U, _buf); + GpuMat buf = _buf.getGpuMat(); + + NppStreamHandler h(stream); + + nppSafeCall( func(src.ptr(), static_cast(src.step), sz, pHist, pLevels, nLevels, buf.ptr()) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; +} + +void cv::gpu::evenLevels(OutputArray _levels, int nLevels, int lowerLevel, int upperLevel) +{ + const int kind = _levels.kind(); + + _levels.create(1, nLevels, CV_32SC1); + + Mat host_levels; + if (kind == _InputArray::GPU_MAT) + host_levels.create(1, nLevels, CV_32SC1); + else + host_levels = _levels.getMat(); + + nppSafeCall( nppiEvenLevelsHost_32s(host_levels.ptr(), nLevels, lowerLevel, upperLevel) ); + + if (kind == _InputArray::GPU_MAT) + _levels.getGpuMatRef().upload(host_levels); +} + +void cv::gpu::histEven(InputArray _src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream) +{ + typedef void (*hist_t)(const GpuMat& src, OutputArray hist, InputOutputArray buf, int levels, int lowerLevel, int upperLevel, cudaStream_t stream); + static const hist_t hist_callers[] = + { + NppHistogramEvenC1::hist, + 0, + NppHistogramEvenC1::hist, + NppHistogramEvenC1::hist + }; + + GpuMat src = _src.getGpuMat(); + + CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 ); + + hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream)); +} + +void cv::gpu::histEven(InputArray _src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream) +{ + typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], InputOutputArray buf, int levels[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream); + static const hist_t hist_callers[] = + { + NppHistogramEvenC4::hist, + 0, + NppHistogramEvenC4::hist, + NppHistogramEvenC4::hist + }; + + GpuMat src = _src.getGpuMat(); + + CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 ); + + hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream)); +} + +void cv::gpu::histRange(InputArray _src, OutputArray hist, InputArray _levels, InputOutputArray buf, Stream& stream) +{ + typedef void (*hist_t)(const GpuMat& src, OutputArray hist, const GpuMat& levels, InputOutputArray buf, cudaStream_t stream); + static const hist_t hist_callers[] = + { + NppHistogramRangeC1::hist, + 0, + NppHistogramRangeC1::hist, + NppHistogramRangeC1::hist, + 0, + NppHistogramRangeC1::hist + }; + + GpuMat src = _src.getGpuMat(); + GpuMat levels = _levels.getGpuMat(); + + CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1 ); + + hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream)); +} + +void cv::gpu::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream) +{ + typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, cudaStream_t stream); + static const hist_t hist_callers[] = + { + NppHistogramRangeC4::hist, + 0, + NppHistogramRangeC4::hist, + NppHistogramRangeC4::hist, + 0, + NppHistogramRangeC4::hist + }; + + GpuMat src = _src.getGpuMat(); + + CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4 ); + + hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream)); +} + #endif /* !defined (HAVE_CUDA) */ diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index 4333b7625..7193ee93a 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -1047,13 +1047,12 @@ TEST(equalizeHist) gpu::GpuMat d_src(src); gpu::GpuMat d_dst; - gpu::GpuMat d_hist; gpu::GpuMat d_buf; - gpu::equalizeHist(d_src, d_dst, d_hist, d_buf); + gpu::equalizeHist(d_src, d_dst, d_buf); GPU_ON; - gpu::equalizeHist(d_src, d_dst, d_hist, d_buf); + gpu::equalizeHist(d_src, d_dst, d_buf); GPU_OFF; } }