From af59a75ffc58e04c64cf9941f54f21ef030e2ee0 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 10 Jan 2012 11:11:58 +0000 Subject: [PATCH] fixed bug with submatrix in some gpu functions update gpu tests --- modules/core/src/cuda/matrix_operations.cu | 59 +- modules/core/src/gpumat.cpp | 12 +- modules/gpu/src/cuda/calib3d.cu | 4 +- modules/gpu/src/cuda/color.cu | 2 +- modules/gpu/src/cuda/element_operations.cu | 60 +- modules/gpu/src/cuda/matrix_reductions.cu | 1 - modules/gpu/src/cuda/remap.cu | 102 +- modules/gpu/src/cuda/resize.cu | 84 +- modules/gpu/src/imgproc.cpp | 59 +- .../gpu/device/detail/transform_detail.hpp | 14 - .../gpu/src/opencv2/gpu/device/transform.hpp | 26 +- .../gpu/src/opencv2/gpu/device/utility.hpp | 15 +- modules/gpu/test/test_arithm.cpp | 1066 ++++------- modules/gpu/test/test_calib3d.cpp | 41 +- modules/gpu/test/test_features2d.cpp | 203 +- modules/gpu/test/test_filters.cpp | 359 ++-- modules/gpu/test/test_gpu_base.cpp | 101 +- modules/gpu/test/test_gpu_base.hpp | 92 +- modules/gpu/test/test_hog.cpp | 19 +- modules/gpu/test/test_imgproc.cpp | 1635 ++++++----------- modules/gpu/test/test_main.cpp | 24 +- modules/gpu/test/test_matop.cpp | 221 +-- modules/gpu/test/test_nvidia.cpp | 42 +- modules/gpu/test/test_precomp.hpp | 1 + modules/gpu/test/test_video.cpp | 21 +- 25 files changed, 1777 insertions(+), 2486 deletions(-) diff --git a/modules/core/src/cuda/matrix_operations.cu b/modules/core/src/cuda/matrix_operations.cu index 46dc85929..38c7b2826 100644 --- a/modules/core/src/cuda/matrix_operations.cu +++ b/modules/core/src/cuda/matrix_operations.cu @@ -59,56 +59,27 @@ namespace cv { namespace gpu { namespace device ////////////////////////////////// CopyTo ///////////////////////////////// /////////////////////////////////////////////////////////////////////////// - template - __global__ void copy_to_with_mask(const T* mat_src, T* mat_dst, const uchar* mask, int cols, int rows, size_t step_mat, size_t step_mask, int channels) - { - size_t x = blockIdx.x * blockDim.x + threadIdx.x; - size_t y = blockIdx.y * blockDim.y + threadIdx.y; - - if ((x < cols * channels ) && (y < rows)) - if (mask[y * step_mask + x / channels] != 0) - { - size_t idx = y * ( step_mat >> shift_and_sizeof::shift ) + x; - mat_dst[idx] = mat_src[idx]; - } + template void copyToWithMask(DevMem2Db src, DevMem2Db dst, DevMem2Db mask, int channels, cudaStream_t stream) + { + cv::gpu::device::transform((DevMem2D_)src, (DevMem2D_)dst, identity(), SingleMaskChannels(mask, channels), stream); } - template - void copy_to_with_mask_run(DevMem2Db mat_src, DevMem2Db mat_dst, DevMem2Db mask, int channels, cudaStream_t stream) + void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, int depth, int channels, DevMem2Db mask, cudaStream_t stream) { - dim3 threadsPerBlock(16,16, 1); - dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1); + typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, DevMem2Db mask, int channels, cudaStream_t stream); - copy_to_with_mask<<>> - ((T*)mat_src.data, (T*)mat_dst.data, (unsigned char*)mask.data, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall ( cudaDeviceSynchronize() ); - } - - void copy_to_with_mask(DevMem2Db mat_src, DevMem2Db mat_dst, int depth, DevMem2Db mask, int channels, cudaStream_t stream) - { - typedef void (*CopyToFunc)(DevMem2Db mat_src, DevMem2Db mat_dst, DevMem2Db mask, int channels, cudaStream_t stream); - - static CopyToFunc tab[8] = + static func_t tab[] = { - copy_to_with_mask_run, - copy_to_with_mask_run, - copy_to_with_mask_run, - copy_to_with_mask_run, - copy_to_with_mask_run, - copy_to_with_mask_run, - copy_to_with_mask_run, - 0 + copyToWithMask, + copyToWithMask, + copyToWithMask, + copyToWithMask, + copyToWithMask, + copyToWithMask, + copyToWithMask }; - CopyToFunc func = tab[depth]; - - if (func == 0) - cv::gpu::error("Unsupported copyTo operation", __FILE__, __LINE__, "copy_to_with_mask"); - - func(mat_src, mat_dst, mask, channels, stream); + tab[depth](src, dst, mask, channels, stream); } /////////////////////////////////////////////////////////////////////////// @@ -303,7 +274,7 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaSetDoubleForDevice(&alpha) ); cudaSafeCall( cudaSetDoubleForDevice(&beta) ); Convertor op(alpha, beta); - ::cv::gpu::device::transform((DevMem2D_)src, (DevMem2D_)dst, op, stream); + cv::gpu::device::transform((DevMem2D_)src, (DevMem2D_)dst, op, WithOutMask(), stream); } void convert_gpu(DevMem2Db src, int sdepth, DevMem2Db dst, int ddepth, double alpha, double beta, cudaStream_t stream) diff --git a/modules/core/src/gpumat.cpp b/modules/core/src/gpumat.cpp index c5910761f..756daa9d8 100644 --- a/modules/core/src/gpumat.cpp +++ b/modules/core/src/gpumat.cpp @@ -348,7 +348,7 @@ namespace namespace cv { namespace gpu { namespace device { - void copy_to_with_mask(DevMem2Db src, DevMem2Db dst, int depth, DevMem2Db mask, int channels, cudaStream_t stream); + void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, int depth, int channels, DevMem2Db mask, cudaStream_t stream); template void set_to_gpu(DevMem2Db mat, const T* scalar, int channels, cudaStream_t stream); @@ -391,13 +391,13 @@ namespace template void kernelSetCaller(GpuMat& src, Scalar s, cudaStream_t stream) { Scalar_ sf = s; - ::cv::gpu::device::set_to_gpu(src, sf.val, src.channels(), stream); + cv::gpu::device::set_to_gpu(src, sf.val, src.channels(), stream); } template void kernelSetCaller(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream) { Scalar_ sf = s; - ::cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), stream); + cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), stream); } } @@ -405,17 +405,17 @@ namespace cv { namespace gpu { CV_EXPORTS void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0) { - ::cv::gpu::device::copy_to_with_mask(src, dst, src.depth(), mask, src.channels(), stream); + cv::gpu::device::copyToWithMask_gpu(src.reshape(1), dst.reshape(1), src.depth(), src.channels(), mask, stream); } CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst) { - ::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0, 0); + cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0, 0); } CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream = 0) { - ::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta, stream); + cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta, stream); } CV_EXPORTS void setTo(GpuMat& src, Scalar s, cudaStream_t stream) diff --git a/modules/gpu/src/cuda/calib3d.cu b/modules/gpu/src/cuda/calib3d.cu index 27c2afb34..e296aeb56 100644 --- a/modules/gpu/src/cuda/calib3d.cu +++ b/modules/gpu/src/cuda/calib3d.cu @@ -74,7 +74,7 @@ namespace cv { namespace gpu { namespace device cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3)); - ::cv::gpu::device::transform(src, dst, TransformOp(), stream); + cv::gpu::device::transform(src, dst, TransformOp(), WithOutMask(), stream); } } // namespace transform_points @@ -113,7 +113,7 @@ namespace cv { namespace gpu { namespace device cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3)); - ::cv::gpu::device::transform(src, dst, ProjectOp(), stream); + cv::gpu::device::transform(src, dst, ProjectOp(), WithOutMask(), stream); } } // namespace project_points diff --git a/modules/gpu/src/cuda/color.cu b/modules/gpu/src/cuda/color.cu index 9384ea668..1dc03c46a 100644 --- a/modules/gpu/src/cuda/color.cu +++ b/modules/gpu/src/cuda/color.cu @@ -226,7 +226,7 @@ namespace cv { namespace gpu { namespace device traits::functor_type functor = traits::create_functor(); \ typedef typename traits::functor_type::argument_type src_t; \ typedef typename traits::functor_type::result_type dst_t; \ - ::cv::gpu::device::transform((DevMem2D_)src, (DevMem2D_)dst, functor, stream); \ + cv::gpu::device::transform((DevMem2D_)src, (DevMem2D_)dst, functor, WithOutMask(), stream); \ } #define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \ diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index b4d72f0ce..8d1995514 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -84,9 +84,9 @@ namespace cv { namespace gpu { namespace device template void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream) { if (mask.data) - ::cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, mask, Add(), stream); + cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, Add(), SingleMask(mask), stream); else - ::cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, Add(), stream); + cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, Add(), WithOutMask(), stream); } template void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream); @@ -181,9 +181,9 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaSetDoubleForDevice(&val) ); AddScalar op(val); if (mask.data) - ::cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)dst, mask, op, stream); + cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)dst, op, SingleMask(mask), stream); else - ::cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)dst, op, stream); + cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)dst, op, WithOutMask(), stream); } template void add_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream); @@ -277,9 +277,9 @@ namespace cv { namespace gpu { namespace device template void subtract_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream) { if (mask.data) - ::cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, mask, Subtract(), stream); + cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, Subtract(), SingleMask(mask), stream); else - ::cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, Subtract(), stream); + cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, Subtract(), WithOutMask(), stream); } template void subtract_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream); @@ -374,9 +374,9 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaSetDoubleForDevice(&val) ); SubtractScalar op(val); if (mask.data) - ::cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)dst, mask, op, stream); + cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)dst, op, SingleMask(mask), stream); else - ::cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)dst, op, stream); + cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)dst, op, WithOutMask(), stream); } template void subtract_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream); @@ -462,7 +462,7 @@ namespace cv { namespace gpu { namespace device void multiply_gpu(const DevMem2D_& src1, const DevMem2Df& src2, const DevMem2D_& dst, cudaStream_t stream) { - ::cv::gpu::device::transform(static_cast< DevMem2D_ >(src1), src2, static_cast< DevMem2D_ >(dst), multiply_8uc4_32f(), stream); + cv::gpu::device::transform(static_cast< DevMem2D_ >(src1), src2, static_cast< DevMem2D_ >(dst), multiply_8uc4_32f(), WithOutMask(), stream); } struct multiply_16sc4_32f : binary_function @@ -483,7 +483,7 @@ namespace cv { namespace gpu { namespace device void multiply_gpu(const DevMem2D_& src1, const DevMem2Df& src2, const DevMem2D_& dst, cudaStream_t stream) { - ::cv::gpu::device::transform(static_cast< DevMem2D_ >(src1), src2, static_cast< DevMem2D_ >(dst), multiply_16sc4_32f(), stream); + cv::gpu::device::transform(static_cast< DevMem2D_ >(src1), src2, static_cast< DevMem2D_ >(dst), multiply_16sc4_32f(), WithOutMask(), stream); } template struct Multiply : binary_function @@ -521,7 +521,7 @@ namespace cv { namespace gpu { namespace device { cudaSafeCall( cudaSetDoubleForDevice(&scale) ); Multiply op(scale); - ::cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, op, stream); + cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, op, WithOutMask(), stream); } template void multiply_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream); @@ -617,7 +617,7 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaSetDoubleForDevice(&val) ); cudaSafeCall( cudaSetDoubleForDevice(&scale) ); MultiplyScalar op(val, scale); - ::cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)dst, op, stream); + cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)dst, op, WithOutMask(), stream); } template void multiply_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream); @@ -698,7 +698,7 @@ namespace cv { namespace gpu { namespace device void divide_gpu(const DevMem2D_& src1, const DevMem2Df& src2, const DevMem2D_& dst, cudaStream_t stream) { - transform(static_cast< DevMem2D_ >(src1), src2, static_cast< DevMem2D_ >(dst), divide_8uc4_32f(), stream); + cv::gpu::device::transform(static_cast< DevMem2D_ >(src1), src2, static_cast< DevMem2D_ >(dst), divide_8uc4_32f(), WithOutMask(), stream); } @@ -721,7 +721,7 @@ namespace cv { namespace gpu { namespace device void divide_gpu(const DevMem2D_& src1, const DevMem2Df& src2, const DevMem2D_& dst, cudaStream_t stream) { - transform(static_cast< DevMem2D_ >(src1), src2, static_cast< DevMem2D_ >(dst), divide_16sc4_32f(), stream); + cv::gpu::device::transform(static_cast< DevMem2D_ >(src1), src2, static_cast< DevMem2D_ >(dst), divide_16sc4_32f(), WithOutMask(), stream); } template struct Divide : binary_function @@ -759,7 +759,7 @@ namespace cv { namespace gpu { namespace device { cudaSafeCall( cudaSetDoubleForDevice(&scale) ); Divide op(scale); - ::cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, op, stream); + cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, op, WithOutMask(), stream); } template void divide_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream); @@ -855,7 +855,7 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaSetDoubleForDevice(&val) ); cudaSafeCall( cudaSetDoubleForDevice(&scale) ); DivideScalar op(val, scale); - ::cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)dst, op, stream); + cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)dst, op, WithOutMask(), stream); } template void divide_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream); @@ -949,7 +949,7 @@ namespace cv { namespace gpu { namespace device { cudaSafeCall( cudaSetDoubleForDevice(&scalar) ); Reciprocal op(scalar); - ::cv::gpu::device::transform((DevMem2D_)src2, (DevMem2D_)dst, op, stream); + cv::gpu::device::transform((DevMem2D_)src2, (DevMem2D_)dst, op, WithOutMask(), stream); } template void divide_gpu(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); @@ -1055,7 +1055,7 @@ namespace cv { namespace gpu { namespace device template void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream) { - ::cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, Absdiff(), stream); + cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, Absdiff(), WithOutMask(), stream); } template void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); @@ -1101,7 +1101,7 @@ namespace cv { namespace gpu { namespace device { cudaSafeCall( cudaSetDoubleForDevice(&val) ); AbsdiffScalar op(val); - ::cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)dst, op, stream); + cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)dst, op, WithOutMask(), stream); } template void absdiff_gpu(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream); @@ -1188,7 +1188,7 @@ namespace cv { namespace gpu { namespace device template