From 17d9014373598b988499e818b8361fdf9660ee1a Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 6 Dec 2010 08:10:11 +0000 Subject: [PATCH] added per-element min/max to gpu module. fixed compile error in transform. --- modules/gpu/include/opencv2/gpu/gpu.hpp | 20 ++++ modules/gpu/src/arithm.cpp | 147 ++++++++++++++++++++++++ modules/gpu/src/cuda/mathfunc.cu | 124 ++++++++++++++++++++ modules/gpu/src/cuda/transform.hpp | 6 +- tests/gpu/src/arithm.cpp | 25 ++++ 5 files changed, 319 insertions(+), 3 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 6f941f33b..a8d35944b 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -546,6 +546,26 @@ namespace cv CV_EXPORTS GpuMat operator & (const GpuMat& src1, const GpuMat& src2); CV_EXPORTS GpuMat operator ^ (const GpuMat& src1, const GpuMat& src2); + //! computes per-element minimum of two arrays (dst = min(src1, src2)) + CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst); + //! Async version + CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream); + + //! computes per-element minimum of array and scalar (dst = min(src1, src2)) + CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst); + //! Async version + CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream); + + //! computes per-element maximum of two arrays (dst = max(src1, src2)) + CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst); + //! Async version + CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream); + + //! computes per-element maximum of array and scalar (dst = max(src1, src2)) + CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst); + //! Async version + CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream); + ////////////////////////////// Image processing ////////////////////////////// diff --git a/modules/gpu/src/arithm.cpp b/modules/gpu/src/arithm.cpp index e1ac3d2fa..0d9131416 100644 --- a/modules/gpu/src/arithm.cpp +++ b/modules/gpu/src/arithm.cpp @@ -98,6 +98,14 @@ cv::gpu::GpuMat cv::gpu::operator ~ (const GpuMat&) { throw_nogpu(); return GpuM cv::gpu::GpuMat cv::gpu::operator | (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); } cv::gpu::GpuMat cv::gpu::operator & (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); } cv::gpu::GpuMat cv::gpu::operator ^ (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); } +void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); } +void cv::gpu::min(const GpuMat&, double, GpuMat&) { throw_nogpu(); } +void cv::gpu::min(const GpuMat&, double, GpuMat&, const Stream&) { throw_nogpu(); } +void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); } +void cv::gpu::max(const GpuMat&, double, GpuMat&) { throw_nogpu(); } +void cv::gpu::max(const GpuMat&, double, GpuMat&, const Stream&) { throw_nogpu(); } #else /* !defined (HAVE_CUDA) */ @@ -1119,5 +1127,144 @@ cv::gpu::GpuMat cv::gpu::operator ^ (const GpuMat& src1, const GpuMat& src2) return dst; } +////////////////////////////////////////////////////////////////////////////// +// min/max + +namespace cv { namespace gpu { namespace mathfunc +{ + template + void min_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream); + + template + void max_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream); + + template + void min_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream); + + template + void max_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream); +}}} + +namespace +{ + template + void min_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream) + { + CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); + dst.create(src1.size(), src1.type()); + mathfunc::min_gpu(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream); + } + + template + void min_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream) + { + dst.create(src1.size(), src1.type()); + mathfunc::min_gpu(src1.reshape(1), src2, dst.reshape(1), stream); + } + + template + void max_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream) + { + CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); + dst.create(src1.size(), src1.type()); + mathfunc::max_gpu(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream); + } + + template + void max_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream) + { + dst.create(src1.size(), src1.type()); + mathfunc::max_gpu(src1.reshape(1), src2, dst.reshape(1), stream); + } +} + +void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) +{ + typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream); + static const func_t funcs[] = + { + min_caller, min_caller, min_caller, min_caller, min_caller, + min_caller, min_caller + }; + funcs[src1.depth()](src1, src2, dst, 0); +} + +void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream) +{ + typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream); + static const func_t funcs[] = + { + min_caller, min_caller, min_caller, min_caller, min_caller, + min_caller, min_caller + }; + funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream)); +} + +void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst) +{ + typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream); + static const func_t funcs[] = + { + min_caller, min_caller, min_caller, min_caller, min_caller, + min_caller, min_caller + }; + funcs[src1.depth()](src1, src2, dst, 0); +} + +void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream) +{ + typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream); + static const func_t funcs[] = + { + min_caller, min_caller, min_caller, min_caller, min_caller, + min_caller, min_caller + }; + funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream)); +} + +void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) +{ + typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream); + static const func_t funcs[] = + { + max_caller, max_caller, max_caller, max_caller, max_caller, + max_caller, max_caller + }; + funcs[src1.depth()](src1, src2, dst, 0); +} + +void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream) +{ + typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream); + static const func_t funcs[] = + { + max_caller, max_caller, max_caller, max_caller, max_caller, + max_caller, max_caller + }; + funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream)); +} + +void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst) +{ + typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream); + static const func_t funcs[] = + { + max_caller, max_caller, max_caller, max_caller, max_caller, + max_caller, max_caller + }; + funcs[src1.depth()](src1, src2, dst, 0); +} + +void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream) +{ + typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream); + static const func_t funcs[] = + { + max_caller, max_caller, max_caller, max_caller, max_caller, + max_caller, max_caller + }; + funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream)); +} + #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/cuda/mathfunc.cu b/modules/gpu/src/cuda/mathfunc.cu index 1bf811a59..c088058ba 100644 --- a/modules/gpu/src/cuda/mathfunc.cu +++ b/modules/gpu/src/cuda/mathfunc.cu @@ -43,6 +43,7 @@ #include "cuda_shared.hpp" #include "transform.hpp" #include "limits_gpu.hpp" +#include "saturate_cast.hpp" using namespace cv::gpu; using namespace cv::gpu::device; @@ -1295,4 +1296,127 @@ namespace cv { namespace gpu { namespace mathfunc template void transpose_gpu(const DevMem2D& src, const DevMem2D& dst); template void transpose_gpu(const DevMem2D& src, const DevMem2D& dst); template void transpose_gpu(const DevMem2D& src, const DevMem2D& dst); + +////////////////////////////////////////////////////////////////////////////////////////////////////////// +// min/max + + struct MinOp + { + template + __device__ T operator()(T a, T b) + { + return min(a, b); + } + __device__ float operator()(float a, float b) + { + return fmin(a, b); + } + __device__ double operator()(double a, double b) + { + return fmin(a, b); + } + }; + + struct MaxOp + { + template + __device__ T operator()(T a, T b) + { + return max(a, b); + } + __device__ float operator()(float a, float b) + { + return fmax(a, b); + } + __device__ double operator()(double a, double b) + { + return fmax(a, b); + } + }; + + struct ScalarMinOp + { + double s; + + explicit ScalarMinOp(double s_) : s(s_) {} + + template + __device__ T operator()(T a) + { + return saturate_cast(fmin((double)a, s)); + } + }; + + struct ScalarMaxOp + { + double s; + + explicit ScalarMaxOp(double s_) : s(s_) {} + + template + __device__ T operator()(T a) + { + return saturate_cast(fmax((double)a, s)); + } + }; + + template + void min_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream) + { + MinOp op; + transform(src1, src2, dst, op, stream); + } + + template void min_gpu(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream); + template void min_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream); + template void min_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream); + template void min_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream); + template void min_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream); + template void min_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream); + template void min_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream); + + template + void max_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream) + { + MaxOp op; + transform(src1, src2, dst, op, stream); + } + + template void max_gpu(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream); + template void max_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream); + template void max_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream); + template void max_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream); + template void max_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream); + template void max_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream); + template void max_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream); + + template + void min_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream) + { + ScalarMinOp op(src2); + transform(src1, dst, op, stream); + } + + template void min_gpu(const DevMem2D& src1, double src2, const DevMem2D& dst, cudaStream_t stream); + template void min_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream); + template void min_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream); + template void min_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream); + template void min_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream); + template void min_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream); + template void min_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream); + + template + void max_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream) + { + ScalarMaxOp op(src2); + transform(src1, dst, op, stream); + } + + template void max_gpu(const DevMem2D& src1, double src2, const DevMem2D& dst, cudaStream_t stream); + template void max_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream); + template void max_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream); + template void max_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream); + template void max_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream); + template void max_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream); + template void max_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream); }}} diff --git a/modules/gpu/src/cuda/transform.hpp b/modules/gpu/src/cuda/transform.hpp index af516b3d9..c50d8d151 100644 --- a/modules/gpu/src/cuda/transform.hpp +++ b/modules/gpu/src/cuda/transform.hpp @@ -64,7 +64,7 @@ namespace cv { namespace gpu { namespace device //! Transform kernels - template + template static __global__ void transform(const DevMem2D_ src, PtrStep_ dst, const Mask mask, UnOp op) { const int x = blockDim.x * blockIdx.x + threadIdx.x; @@ -77,7 +77,7 @@ namespace cv { namespace gpu { namespace device } } - template + template static __global__ void transform(const DevMem2D_ src1, const PtrStep_ src2, PtrStep_ dst, const Mask mask, BinOp op) { const int x = blockDim.x * blockIdx.x + threadIdx.x; @@ -105,7 +105,7 @@ namespace cv grid.x = divUp(src.cols, threads.x); grid.y = divUp(src.rows, threads.y); - device::transform<<>>(src, dst, device::NoMask(), op); + device::transform<<>>(src, dst, device::NoMask(), op); if (stream == 0) cudaSafeCall( cudaThreadSynchronize() ); diff --git a/tests/gpu/src/arithm.cpp b/tests/gpu/src/arithm.cpp index 29943c781..5b7d5d600 100644 --- a/tests/gpu/src/arithm.cpp +++ b/tests/gpu/src/arithm.cpp @@ -949,6 +949,30 @@ struct CV_GpuCountNonZeroTest: CvTest } }; +//////////////////////////////////////////////////////////////////////////////// +// min/max + +struct CV_GpuImageMinMaxTest : public CV_GpuArithmTest +{ + CV_GpuImageMinMaxTest() : CV_GpuArithmTest( "GPU-ImageMinMax", "min/max" ) {} + + int test( const Mat& mat1, const Mat& mat2 ) + { + cv::Mat cpuMinRes, cpuMaxRes; + cv::min(mat1, mat2, cpuMinRes); + cv::max(mat1, mat2, cpuMaxRes); + + GpuMat gpu1(mat1); + GpuMat gpu2(mat2); + GpuMat gpuMinRes, gpuMaxRes; + cv::gpu::min(gpu1, gpu2, gpuMinRes); + cv::gpu::max(gpu1, gpu2, gpuMaxRes); + + return CheckNorm(cpuMinRes, gpuMinRes) == CvTS::OK && CheckNorm(cpuMaxRes, gpuMaxRes) == CvTS::OK ? + CvTS::OK : CvTS::FAIL_GENERIC; + } +}; + ///////////////////////////////////////////////////////////////////////////// /////////////////// tests registration ///////////////////////////////////// @@ -979,3 +1003,4 @@ CV_GpuNppImagePolarToCartTest CV_GpuNppImagePolarToCart_test; CV_GpuMinMaxTest CV_GpuMinMaxTest_test; CV_GpuMinMaxLocTest CV_GpuMinMaxLocTest_test; CV_GpuCountNonZeroTest CV_CountNonZero_test; +CV_GpuImageMinMaxTest CV_GpuImageMinMax_test;