diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 2b367fa1b..2fc6b48de 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -60,12 +60,14 @@ CV_EXPORTS void subtract(InputArray src1, InputArray src2, OutputArray dst, Inpu //! computes element-wise weighted product of the two arrays (dst = scale * src1 * src2) CV_EXPORTS void multiply(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); -//! computes element-wise weighted quotient of the two arrays (c = a / b) -CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); -//! computes element-wise weighted quotient of matrix and scalar (c = a / s) -CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); +//! computes element-wise weighted quotient of the two arrays (dst = scale * (src1 / src2)) +CV_EXPORTS void divide(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); + //! computes element-wise weighted reciprocal of an array (dst = scale/src2) -CV_EXPORTS void divide(double scale, const GpuMat& b, GpuMat& c, int dtype = -1, Stream& stream = Stream::Null()); +static inline void divide(double src1, InputArray src2, OutputArray dst, int dtype = -1, Stream& stream = Stream::Null()) +{ + divide(src1, src2, dst, 1.0, dtype, stream); +} //! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma) CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, diff --git a/modules/gpuarithm/src/cuda/div_inv.cu b/modules/gpuarithm/src/cuda/div_inv.cu deleted file mode 100644 index 9cfda933c..000000000 --- a/modules/gpuarithm/src/cuda/div_inv.cu +++ /dev/null @@ -1,144 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/functional.hpp" -#include "opencv2/core/cuda/transform.hpp" -#include "opencv2/core/cuda/saturate_cast.hpp" -#include "opencv2/core/cuda/simd_functions.hpp" - -#include "arithm_func_traits.hpp" - -using namespace cv::gpu; -using namespace cv::gpu::cudev; - -namespace arithm -{ - template struct DivInv : unary_function - { - S val; - - __host__ explicit DivInv(S val_) : val(val_) {} - - __device__ __forceinline__ D operator ()(T a) const - { - return a != 0 ? saturate_cast(val / a) : 0; - } - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< arithm::DivInv > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) - { - DivInv op(static_cast(val)); - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); - } - - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); -} - -#endif // CUDA_DISABLER diff --git a/modules/gpuarithm/src/cuda/div_scalar.cu b/modules/gpuarithm/src/cuda/div_scalar.cu index 42ba90cb0..464c4adf8 100644 --- a/modules/gpuarithm/src/cuda/div_scalar.cu +++ b/modules/gpuarithm/src/cuda/div_scalar.cu @@ -66,6 +66,18 @@ namespace arithm return saturate_cast(a / val); } }; + + template struct DivScalarInv : unary_function + { + S val; + + explicit DivScalarInv(S val_) : val(val_) {} + + __device__ __forceinline__ D operator ()(T a) const + { + return a != 0 ? saturate_cast(val / a) : 0; + } + }; } namespace cv { namespace gpu { namespace cudev @@ -73,72 +85,84 @@ namespace cv { namespace gpu { namespace cudev template struct TransformFunctorTraits< arithm::DivScalar > : arithm::ArithmFuncTraits { }; + + template struct TransformFunctorTraits< arithm::DivScalarInv > : arithm::ArithmFuncTraits + { + }; }}} namespace arithm { template - void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) + void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream) { - DivScalar op(static_cast(val)); - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); + if (inv) + { + DivScalarInv op(static_cast(val)); + cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); + } + else + { + DivScalar op(static_cast(val)); + cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); + } } - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); } #endif // CUDA_DISABLER diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index 76bfb9ba3..24f67b7db 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -53,9 +53,7 @@ void cv::gpu::subtract(InputArray, InputArray, OutputArray, InputArray, int, Str void cv::gpu::multiply(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); } -void cv::gpu::divide(const GpuMat&, const GpuMat&, GpuMat&, double, int, Stream&) { throw_no_cuda(); } -void cv::gpu::divide(const GpuMat&, const Scalar&, GpuMat&, double, int, Stream&) { throw_no_cuda(); } -void cv::gpu::divide(double, const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); } +void cv::gpu::divide(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); } void cv::gpu::absdiff(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } void cv::gpu::absdiff(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_no_cuda(); } @@ -120,7 +118,7 @@ void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, namespace { typedef void (*mat_mat_func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream); - typedef void (*mat_scalar_func_t)(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, Stream& stream); + typedef void (*mat_scalar_func_t)(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream); void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, double scale, int dtype, Stream& stream, mat_mat_func_t mat_mat_func, mat_scalar_func_t mat_scalar_func) @@ -151,10 +149,6 @@ namespace { CV_Assert( scalar.total() <= 4 ); scalar.convertTo(Mat_(scalar.rows, scalar.cols, &val[0]), CV_64F); - val[0] *= scale; - val[1] *= scale; - val[2] *= scale; - val[3] *= scale; } GpuMat mask = _mask.getGpuMat(); @@ -182,9 +176,9 @@ namespace GpuMat dst = _dst.getGpuMat(); if (isScalar1) - mat_scalar_func(src2, val, true, dst, mask, stream); + mat_scalar_func(src2, val, true, dst, mask, scale, stream); else if (isScalar2) - mat_scalar_func(src1, val, false, dst, mask, stream); + mat_scalar_func(src1, val, false, dst, mask, scale, stream); else mat_mat_func(src1, src2, dst, mask, scale, stream); } @@ -505,7 +499,7 @@ namespace arithm void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -static void addScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat& mask, Stream& _stream) +static void addScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat& mask, double, Stream& _stream) { typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); static const func_t funcs[7][7] = @@ -756,7 +750,7 @@ namespace arithm void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -static void subScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, Stream& _stream) +static void subScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double, Stream& _stream) { typedef void (*func_t)(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); static const func_t funcs[7][7] = @@ -973,7 +967,7 @@ namespace arithm void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); } -static void mulScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat&, Stream& _stream) +static void mulScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat&, double scale, Stream& _stream) { typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[7][7] = @@ -1061,6 +1055,11 @@ static void mulScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const Gp cudaStream_t stream = StreamAccessor::getStream(_stream); + val[0] *= scale; + val[1] *= scale; + val[2] *= scale; + val[3] *= scale; + const npp_func_t npp_func = npp_funcs[sdepth][cn - 1]; if (ddepth == sdepth && cn > 1 && npp_func != 0) { @@ -1123,204 +1122,167 @@ namespace arithm void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); } -void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s) +static void divMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& _stream) { - using namespace arithm; - - cudaStream_t stream = StreamAccessor::getStream(s); - - if (src1.type() == CV_8UC4 && src2.type() == CV_32FC1) + typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); + static const func_t funcs[7][7] = { - CV_Assert( src1.size() == src2.size() ); - - dst.create(src1.size(), src1.type()); - - divMat_8uc4_32f(src1, src2, dst, stream); - } - else if (src1.type() == CV_16SC4 && src2.type() == CV_32FC1) - { - CV_Assert( src1.size() == src2.size() ); - - dst.create(src1.size(), src1.type()); - - divMat_16sc4_32f(src1, src2, dst, stream); - } - else - { - typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - static const func_t funcs[7][7] = { - { - divMat, - divMat, - divMat, - divMat, - divMat, - divMat, - divMat - }, - { - divMat, - divMat, - divMat, - divMat, - divMat, - divMat, - divMat - }, - { - 0 /*divMat*/, - 0 /*divMat*/, - divMat, - divMat, - divMat, - divMat, - divMat - }, - { - 0 /*divMat*/, - 0 /*divMat*/, - divMat, - divMat, - divMat, - divMat, - divMat - }, - { - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - divMat, - divMat, - divMat - }, - { - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - divMat, - divMat - }, - { - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - divMat - } - }; - - if (dtype < 0) - dtype = src1.depth(); - - const int sdepth = src1.depth(); - const int ddepth = CV_MAT_DEPTH(dtype); - const int cn = src1.channels(); - - CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); - CV_Assert( src2.type() == src1.type() && src2.size() == src1.size() ); - - if (sdepth == CV_64F || ddepth == CV_64F) + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat + }, { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat + }, + { + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat + }, + { + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat + }, + { + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + arithm::divMat, + arithm::divMat, + arithm::divMat + }, + { + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + arithm::divMat, + arithm::divMat + }, + { + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + arithm::divMat } + }; - dst.create(src1.size(), CV_MAKE_TYPE(ddepth, cn)); + const int sdepth = src1.depth(); + const int ddepth = dst.depth(); + const int cn = src1.channels(); - PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step); - PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step); - PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step); + cudaStream_t stream = StreamAccessor::getStream(_stream); - const func_t func = funcs[sdepth][ddepth]; + PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step); + PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step); + PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step); - if (!func) - CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + const func_t func = funcs[sdepth][ddepth]; - func(src1_, src2_, dst_, scale, stream); - } + if (!func) + CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + func(src1_, src2_, dst_, scale, stream); } namespace arithm { template - void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); } -void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s) +static void divScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat&, double scale, Stream& _stream) { - using namespace arithm; - - typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + typedef void (*func_t)(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[7][7] = { { - divScalar, - divScalar, - divScalar, - divScalar, - divScalar, - divScalar, - divScalar + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar }, { - divScalar, - divScalar, - divScalar, - divScalar, - divScalar, - divScalar, - divScalar + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar }, { - 0 /*divScalar*/, - 0 /*divScalar*/, - divScalar, - divScalar, - divScalar, - divScalar, - divScalar + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar }, { - 0 /*divScalar*/, - 0 /*divScalar*/, - divScalar, - divScalar, - divScalar, - divScalar, - divScalar + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar }, { - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - divScalar, - divScalar, - divScalar + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar }, { - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - divScalar, - divScalar + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + arithm::divScalar, + arithm::divScalar }, { - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - divScalar + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + arithm::divScalar } }; @@ -1336,32 +1298,31 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc {0 , 0, 0 , 0 } }; - if (dtype < 0) - dtype = src.depth(); - const int sdepth = src.depth(); - const int ddepth = CV_MAT_DEPTH(dtype); + const int ddepth = dst.depth(); const int cn = src.channels(); - CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); - CV_Assert( cn <= 4 ); + cudaStream_t stream = StreamAccessor::getStream(_stream); - if (sdepth == CV_64F || ddepth == CV_64F) + if (inv) { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); + val[0] *= scale; + val[1] *= scale; + val[2] *= scale; + val[3] *= scale; + } + else + { + val[0] /= scale; + val[1] /= scale; + val[2] /= scale; + val[3] /= scale; } - dst.create(src.size(), CV_MAKE_TYPE(ddepth, cn)); - - cudaStream_t stream = StreamAccessor::getStream(s); - - const Scalar nsc(sc.val[0] / scale, sc.val[1] / scale, sc.val[2] / scale, sc.val[3] / scale); - const npp_func_t npp_func = npp_funcs[sdepth][cn - 1]; - if (ddepth == sdepth && cn > 1 && npp_func != 0) + if (ddepth == sdepth && cn > 1 && npp_func != 0 && !inv) { - npp_func(src, nsc, dst, stream); + npp_func(src, val, dst, stream); return; } @@ -1372,113 +1333,39 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc if (!func) CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); - func(src, nsc.val[0], dst, stream); + func(src, val[0], inv, dst, stream); } -namespace arithm +void cv::gpu::divide(InputArray _src1, InputArray _src2, OutputArray _dst, double scale, int dtype, Stream& stream) { - template - void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); -} - -void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, Stream& s) -{ - using namespace arithm; - - typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[7][7] = + if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1) { - { - divInv, - divInv, - divInv, - divInv, - divInv, - divInv, - divInv - }, - { - divInv, - divInv, - divInv, - divInv, - divInv, - divInv, - divInv - }, - { - 0 /*divInv*/, - 0 /*divInv*/, - divInv, - divInv, - divInv, - divInv, - divInv - }, - { - 0 /*divInv*/, - 0 /*divInv*/, - divInv, - divInv, - divInv, - divInv, - divInv - }, - { - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - divInv, - divInv, - divInv - }, - { - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - divInv, - divInv - }, - { - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - divInv - } - }; + GpuMat src1 = _src1.getGpuMat(); + GpuMat src2 = _src2.getGpuMat(); - if (dtype < 0) - dtype = src.depth(); + CV_Assert( src1.size() == src2.size() ); - const int sdepth = src.depth(); - const int ddepth = CV_MAT_DEPTH(dtype); - const int cn = src.channels(); + _dst.create(src1.size(), src1.type()); + GpuMat dst = _dst.getGpuMat(); - CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); - CV_Assert( cn == 1 ); - - if (sdepth == CV_64F || ddepth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); + arithm::divMat_8uc4_32f(src1, src2, dst, StreamAccessor::getStream(stream)); } + else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1) + { + GpuMat src1 = _src1.getGpuMat(); + GpuMat src2 = _src2.getGpuMat(); - dst.create(src.size(), CV_MAKE_TYPE(ddepth, cn)); + CV_Assert( src1.size() == src2.size() ); - cudaStream_t stream = StreamAccessor::getStream(s); + _dst.create(src1.size(), src1.type()); + GpuMat dst = _dst.getGpuMat(); - const func_t func = funcs[sdepth][ddepth]; - - if (!func) - CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); - - func(src, scale, dst, stream); + arithm::divMat_16sc4_32f(src1, src2, dst, StreamAccessor::getStream(stream)); + } + else + { + arithm_op(_src1, _src2, _dst, GpuMat(), scale, dtype, stream, divMat, divScalar); + } } ////////////////////////////////////////////////////////////////////////////// diff --git a/modules/gpuarithm/test/test_element_operations.cpp b/modules/gpuarithm/test/test_element_operations.cpp index ece38311e..345c96015 100644 --- a/modules/gpuarithm/test/test_element_operations.cpp +++ b/modules/gpuarithm/test/test_element_operations.cpp @@ -1299,9 +1299,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Scalar, testing::Combine( WHOLE_SUBMAT)); //////////////////////////////////////////////////////////////////////////////// -// Divide_Scalar_Inv +// Divide_Scalar_First -PARAM_TEST_CASE(Divide_Scalar_Inv, cv::gpu::DeviceInfo, cv::Size, std::pair, UseRoi) +PARAM_TEST_CASE(Divide_Scalar_First, cv::gpu::DeviceInfo, cv::Size, std::pair, UseRoi) { cv::gpu::DeviceInfo devInfo; cv::Size size; @@ -1319,7 +1319,7 @@ PARAM_TEST_CASE(Divide_Scalar_Inv, cv::gpu::DeviceInfo, cv::Size, std::pair