From f2c30cd90d46caf28d1933ff741a06cc7a252d5e Mon Sep 17 00:00:00 2001 From: Marina Kolpakova Date: Sat, 9 Jun 2012 15:24:01 +0000 Subject: [PATCH] resize area for big downscaling integration --- modules/gpu/include/opencv2/gpu/gpu.hpp | 6 ++- modules/gpu/perf/perf_imgproc.cpp | 34 +++++++++++++ modules/gpu/perf_cpu/perf_imgproc.cpp | 31 ++++++++++++ modules/gpu/src/cuda/resize.cu | 48 +++++++++++++++++- modules/gpu/src/resize.cpp | 67 ++++++++++++++++++++++++- 5 files changed, 183 insertions(+), 3 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 2d1902245..0e01d9cfc 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -626,9 +626,13 @@ CV_EXPORTS void swapChannels(GpuMat& image, const int dstOrder[4], Stream& strea CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null()); //! resizes the image -//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC +//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null()); +//! resizes the image +//! Supports INTER_AREA +CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, GpuMat& buffer, double fx=0, double fy=0, int interpolation = INTER_AREA, Stream& stream = Stream::Null()); + //! warps the image using affine transformation //! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC CV_EXPORTS void warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR, diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp index b261ad086..78ca5bf0e 100644 --- a/modules/gpu/perf/perf_imgproc.cpp +++ b/modules/gpu/perf/perf_imgproc.cpp @@ -90,6 +90,40 @@ INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine( Interpolation(cv::INTER_CUBIC), Interpolation(cv::INTER_AREA)), testing::Values(Scale(0.5), Scale(0.3), Scale(2.0)))); +GPU_PERF_TEST(ResizeArea, cv::gpu::DeviceInfo, cv::Size, MatType, Scale) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int type = GET_PARAM(2); + int interpolation = cv::INTER_AREA; + double f = GET_PARAM(3); + + cv::Mat src_host(size, type); + fill(src_host, 0, 255); + + cv::gpu::GpuMat src(src_host); + cv::gpu::GpuMat dst; + + cv::gpu::resize(src, dst, cv::Size(), f, f, interpolation); + + declare.time(1.0); + + TEST_CYCLE() + { + cv::gpu::resize(src, dst, cv::Size(), f, f, interpolation); + } +} + +INSTANTIATE_TEST_CASE_P(ImgProc, ResizeArea, testing::Combine( + ALL_DEVICES, + testing::Values(perf::sz1080p, cv::Size(4096, 2048)), + testing::Values(MatType(CV_8UC1)/*, MatType(CV_8UC3), MatType(CV_8UC4), + MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), + MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)*/), + testing::Values(Scale(0.2),Scale(0.1),Scale(0.05)))); + ////////////////////////////////////////////////////////////////////// // WarpAffine diff --git a/modules/gpu/perf_cpu/perf_imgproc.cpp b/modules/gpu/perf_cpu/perf_imgproc.cpp index 9fe9fd07a..9a1adde81 100644 --- a/modules/gpu/perf_cpu/perf_imgproc.cpp +++ b/modules/gpu/perf_cpu/perf_imgproc.cpp @@ -80,6 +80,37 @@ INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine( Interpolation(cv::INTER_CUBIC), Interpolation(cv::INTER_AREA)), testing::Values(Scale(0.5), Scale(0.3), Scale(2.0)))); +GPU_PERF_TEST(ResizeArea, cv::gpu::DeviceInfo, cv::Size, MatType, Scale) +{ + cv::Size size = GET_PARAM(1); + int type = GET_PARAM(2); + int interpolation = cv::INTER_AREA; + double f = GET_PARAM(3); + + cv::Mat src_host(size, type); + fill(src_host, 0, 255); + + cv::Mat src(src_host); + cv::Mat dst; + + cv::resize(src, dst, cv::Size(), f, f, interpolation); + + declare.time(1.0); + + TEST_CYCLE() + { + cv::resize(src, dst, cv::Size(), f, f, interpolation); + } +} + +INSTANTIATE_TEST_CASE_P(ImgProc, ResizeArea, testing::Combine( + ALL_DEVICES, + testing::Values(perf::sz1080p, cv::Size(4096, 2048)), + testing::Values(MatType(CV_8UC1)/*, MatType(CV_8UC3), MatType(CV_8UC4), + MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), + MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)*/), + testing::Values(Scale(0.2),Scale(0.1),Scale(0.05)))); + ////////////////////////////////////////////////////////////////////// // WarpAffine diff --git a/modules/gpu/src/cuda/resize.cu b/modules/gpu/src/cuda/resize.cu index a51c04306..a0686d0d5 100644 --- a/modules/gpu/src/cuda/resize.cu +++ b/modules/gpu/src/cuda/resize.cu @@ -116,7 +116,6 @@ namespace cv { namespace gpu { namespace device { dim3 block(32, 8); dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); - BrdConstant brd(src.rows, src.cols); BorderReader< PtrStep, BrdConstant > brdSrc(src, brd); IntegerAreaFilter< BorderReader< PtrStep, BrdConstant > > filteredSrc(brdSrc, fx, fy); @@ -278,5 +277,52 @@ namespace cv { namespace gpu { namespace device //template void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream); template void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream); template void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream); + + template struct scan_traits{}; + + template<> struct scan_traits + { + typedef int scan_line_type; + }; + + template + __global__ void resize_area_scan(const Ptr2D src, int fx, int fy, DevMem2D_ dst, DevMem2D_ buffer) + { + typedef typename scan_traits::scan_line_type W; + extern __shared__ W line[]; + + const int x = blockDim.x * blockIdx.x + threadIdx.x; + const int y = blockDim.y * blockIdx.y + threadIdx.y; + } + + template struct InterAreaDispatcherStream + { + static void call(DevMem2D_ src, int fx, int fy, DevMem2D_ dst, DevMem2D_ buffer, cudaStream_t stream) + { + dim3 block(256, 1); + dim3 grid(divUp(dst.cols, block.x), 1); + + resize_area_scan<<::scan_line_type) >>>(src, fx, fy, dst, buffer); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + + template + void resize_area_gpu(DevMem2Db src, DevMem2Db dst,float fx, float fy, + int interpolation, DevMem2Db buffer, cudaStream_t stream) + { + (void)interpolation; + + int iscale_x = round(fx); + int iscale_y = round(fy); + + InterAreaDispatcherStream::call(src, iscale_x, iscale_y, dst, buffer, stream); + } + + template void resize_area_gpu(DevMem2Db src, DevMem2Db dst, float fx, float fy, int interpolation, DevMem2Db buffer, cudaStream_t stream); + } // namespace imgproc }}} // namespace cv { namespace gpu { namespace device diff --git a/modules/gpu/src/resize.cpp b/modules/gpu/src/resize.cpp index 672dbc288..4f8ea0517 100644 --- a/modules/gpu/src/resize.cpp +++ b/modules/gpu/src/resize.cpp @@ -44,7 +44,32 @@ #ifndef HAVE_CUDA -void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&) { throw_nogpu(); } +void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s) +{ + (void)src; + (void)dst; + (void)dsize; + (void)fx; + (void)fy; + (void)interpolation; + (void)s; + + throw_nogpu(); +} +void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, + int interpolation, const GpuMat& buffer, Stream& s) +{ + (void)src; + (void)dst; + (void)dsize; + (void)fx; + (void)fy; + (void)interpolation; + (void)buffer; + (void)s; + + throw_nogpu(); +} #else // HAVE_CUDA @@ -55,9 +80,49 @@ namespace cv { namespace gpu { namespace device template void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream); + + template + void resize_area_gpu(DevMem2Db src, DevMem2Db dst,float fx, float fy, + int interpolation, DevMem2Db buffer, cudaStream_t stream); } }}} +void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, GpuMat& buffer, double fx, double fy, + int interpolation, Stream& s) +{ + CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); + CV_Assert(interpolation == INTER_AREA); + CV_Assert( (fx < 1.0) && (fy < 1.0)); + CV_Assert(!(dsize == Size()) || (fx > 0 && fy > 0)); + + if (dsize == Size()) + dsize = Size(saturate_cast(src.cols * fx), saturate_cast(src.rows * fy)); + else + { + fx = static_cast(dsize.width) / src.cols; + fy = static_cast(dsize.height) / src.rows; + } + + fx = static_cast(1.0 / fx); + fy = static_cast(1.0 / fy); + + dst.create(dsize, src.type()); + buffer.create(cv::Size(dsize.width, src.rows), src.type()); + + if (dsize == src.size()) + { + if (s) + s.enqueueCopy(src, dst); + else + src.copyTo(dst); + return; + } + + cudaStream_t stream = StreamAccessor::getStream(s); + + cv::gpu::device::imgproc::resize_area_gpu(src, dst, fx, fy, interpolation, buffer, stream); +} + void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s) { CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);