From f826bd8bce08010786a1a6be66a353ba10974126 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 22 Aug 2013 11:46:09 +0400 Subject: [PATCH 1/5] removed NPP implementation --- modules/gpu/src/resize.cpp | 108 +++++++++---------------------- modules/gpu/test/test_resize.cpp | 47 +------------- 2 files changed, 33 insertions(+), 122 deletions(-) diff --git a/modules/gpu/src/resize.cpp b/modules/gpu/src/resize.cpp index 685013542..0e8c865fa 100644 --- a/modules/gpu/src/resize.cpp +++ b/modules/gpu/src/resize.cpp @@ -44,18 +44,7 @@ #if !defined HAVE_CUDA || defined(CUDA_DISABLER) -void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s) -{ - (void)src; - (void)dst; - (void)dsize; - (void)fx; - (void)fy; - (void)interpolation; - (void)s; - - throw_nogpu(); -} +void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&) { throw_nogpu(); } #else // HAVE_CUDA @@ -69,94 +58,57 @@ namespace cv { namespace gpu { namespace device } }}} -void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s) +void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& stream) { - CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); - CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR - || interpolation == INTER_CUBIC || interpolation == INTER_AREA); - CV_Assert(!(dsize == Size()) || (fx > 0 && fy > 0)); + using namespace ::cv::gpu::device::imgproc; + + typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream); + static const func_t funcs[6][4] = + { + {resize_gpu , 0 /*resize_gpu*/ , resize_gpu , resize_gpu }, + {0 /*resize_gpu*/, 0 /*resize_gpu*/ , 0 /*resize_gpu*/, 0 /*resize_gpu*/}, + {resize_gpu , 0 /*resize_gpu*/, resize_gpu , resize_gpu }, + {resize_gpu , 0 /*resize_gpu*/ , resize_gpu , resize_gpu }, + {0 /*resize_gpu*/ , 0 /*resize_gpu*/ , 0 /*resize_gpu*/ , 0 /*resize_gpu*/ }, + {resize_gpu , 0 /*resize_gpu*/ , resize_gpu , resize_gpu } + }; + + CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 ); + CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_AREA ); + CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) ); if (dsize == Size()) + { dsize = Size(saturate_cast(src.cols * fx), saturate_cast(src.rows * fy)); + } else { fx = static_cast(dsize.width) / src.cols; fy = static_cast(dsize.height) / src.rows; } - if (dsize != dst.size()) - dst.create(dsize, src.type()); + + dst.create(dsize, src.type()); if (dsize == src.size()) { - if (s) - s.enqueueCopy(src, dst); + if (stream) + stream.enqueueCopy(src, dst); else src.copyTo(dst); return; } - cudaStream_t stream = StreamAccessor::getStream(s); + const func_t func = funcs[src.depth()][src.channels() - 1]; + + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); Size wholeSize; Point ofs; src.locateROI(wholeSize, ofs); + PtrStepSzb wholeSrc(wholeSize.height, wholeSize.width, src.datastart, src.step); - bool useNpp = (src.type() == CV_8UC1 || src.type() == CV_8UC4); - useNpp = useNpp && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR); - - if (useNpp) - { - typedef NppStatus (*func_t)(const Npp8u * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, Npp8u * pDst, int nDstStep, NppiSize dstROISize, - double xFactor, double yFactor, int eInterpolation); - - const func_t funcs[4] = { nppiResize_8u_C1R, 0, 0, nppiResize_8u_C4R }; - - static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS}; - - NppiSize srcsz; - srcsz.width = wholeSize.width; - srcsz.height = wholeSize.height; - - NppiRect srcrect; - srcrect.x = ofs.x; - srcrect.y = ofs.y; - srcrect.width = src.cols; - srcrect.height = src.rows; - - NppiSize dstsz; - dstsz.width = dst.cols; - dstsz.height = dst.rows; - - NppStreamHandler h(stream); - - nppSafeCall( funcs[src.channels() - 1](src.datastart, srcsz, static_cast(src.step), srcrect, - dst.ptr(), static_cast(dst.step), dstsz, fx, fy, npp_inter[interpolation]) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - else - { - using namespace ::cv::gpu::device::imgproc; - - typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream); - - static const func_t funcs[6][4] = - { - {resize_gpu , 0 /*resize_gpu*/ , resize_gpu , resize_gpu }, - {0 /*resize_gpu*/, 0 /*resize_gpu*/ , 0 /*resize_gpu*/, 0 /*resize_gpu*/}, - {resize_gpu , 0 /*resize_gpu*/, resize_gpu , resize_gpu }, - {resize_gpu , 0 /*resize_gpu*/ , resize_gpu , resize_gpu }, - {0 /*resize_gpu*/ , 0 /*resize_gpu*/ , 0 /*resize_gpu*/ , 0 /*resize_gpu*/ }, - {resize_gpu , 0 /*resize_gpu*/ , resize_gpu , resize_gpu } - }; - - const func_t func = funcs[src.depth()][src.channels() - 1]; - CV_Assert(func != 0); - - func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, - static_cast(1.0 / fx), static_cast(1.0 / fy), dst, interpolation, stream); - } + func(src, wholeSrc, ofs.x, ofs.y, static_cast(1.0 / fx), static_cast(1.0 / fy), dst, interpolation, StreamAccessor::getStream(stream)); } #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_resize.cpp b/modules/gpu/test/test_resize.cpp index 593c891e6..88e6b1cab 100644 --- a/modules/gpu/test/test_resize.cpp +++ b/modules/gpu/test/test_resize.cpp @@ -155,7 +155,7 @@ GPU_TEST_P(Resize, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Resize, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, - testing::Values(MatType(CV_8UC3), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), testing::Values(0.3, 0.5, 1.5, 2.0), testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), WHOLE_SUBMAT)); @@ -201,50 +201,9 @@ GPU_TEST_P(ResizeSameAsHost, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeSameAsHost, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, - testing::Values(MatType(CV_8UC3), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), testing::Values(0.3, 0.5), - testing::Values(Interpolation(cv::INTER_AREA), Interpolation(cv::INTER_NEAREST)), //, Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC) + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_AREA)), WHOLE_SUBMAT)); -/////////////////////////////////////////////////////////////////// -// Test NPP - -PARAM_TEST_CASE(ResizeNPP, cv::gpu::DeviceInfo, MatType, double, Interpolation) -{ - cv::gpu::DeviceInfo devInfo; - double coeff; - int interpolation; - int type; - - virtual void SetUp() - { - devInfo = GET_PARAM(0); - type = GET_PARAM(1); - coeff = GET_PARAM(2); - interpolation = GET_PARAM(3); - - cv::gpu::setDevice(devInfo.deviceID()); - } -}; - -GPU_TEST_P(ResizeNPP, Accuracy) -{ - cv::Mat src = readImageType("stereobp/aloe-L.png", type); - ASSERT_FALSE(src.empty()); - - cv::gpu::GpuMat dst; - cv::gpu::resize(loadMat(src), dst, cv::Size(), coeff, coeff, interpolation); - - cv::Mat dst_gold; - resizeGold(src, dst_gold, coeff, coeff, interpolation); - - EXPECT_MAT_SIMILAR(dst_gold, dst, 1e-1); -} - -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeNPP, testing::Combine( - ALL_DEVICES, - testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)), - testing::Values(0.3, 0.5, 1.5, 2.0), - testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)))); - #endif // HAVE_CUDA From 3b05acf9362a4e554a8590639a1fda7a2658d4e9 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 22 Aug 2013 12:03:17 +0400 Subject: [PATCH 2/5] reorganize code for further modifiction --- modules/gpu/src/cuda/resize.cu | 136 +++++++++++++-------------------- 1 file changed, 52 insertions(+), 84 deletions(-) diff --git a/modules/gpu/src/cuda/resize.cu b/modules/gpu/src/cuda/resize.cu index e72029767..c244ca7d6 100644 --- a/modules/gpu/src/cuda/resize.cu +++ b/modules/gpu/src/cuda/resize.cu @@ -42,20 +42,19 @@ #if !defined CUDA_DISABLER -#include "internal_shared.hpp" +#include +#include "opencv2/gpu/device/common.hpp" #include "opencv2/gpu/device/border_interpolate.hpp" #include "opencv2/gpu/device/vec_traits.hpp" #include "opencv2/gpu/device/vec_math.hpp" #include "opencv2/gpu/device/saturate_cast.hpp" #include "opencv2/gpu/device/filters.hpp" -#include -#include namespace cv { namespace gpu { namespace device { namespace imgproc { - template __global__ void resize(const Ptr2D src, float fx, float fy, PtrStepSz dst) + template __global__ void resize(const Ptr2D src, const float fx, const float fy, PtrStepSz dst) { const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; @@ -69,23 +68,12 @@ namespace cv { namespace gpu { namespace device } } - template __global__ void resize_area(const Ptr2D src, float fx, float fy, PtrStepSz dst) - { - const int x = blockDim.x * blockIdx.x + threadIdx.x; - const int y = blockDim.y * blockIdx.y + threadIdx.y; - - if (x < dst.cols && y < dst.rows) - { - dst(y, x) = saturate_cast(src(y, x)); - } - } - template