diff --git a/modules/cudaarithm/src/cuda/cmp_mat.cu b/modules/cudaarithm/src/cuda/cmp_mat.cu index cdbb9639b..3693fc2b7 100644 --- a/modules/cudaarithm/src/cuda/cmp_mat.cu +++ b/modules/cudaarithm/src/cuda/cmp_mat.cu @@ -40,64 +40,23 @@ // //M*/ -#if !defined CUDA_DISABLER +#include "opencv2/opencv_modules.hpp" -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/functional.hpp" -#include "opencv2/core/cuda/transform.hpp" -#include "opencv2/core/cuda/saturate_cast.hpp" -#include "opencv2/core/cuda/simd_functions.hpp" +#ifndef HAVE_OPENCV_CUDEV -#include "arithm_func_traits.hpp" +#error "opencv_cudev is required" -using namespace cv::cuda; -using namespace cv::cuda::device; +#else -namespace arithm +#include "opencv2/cudev.hpp" + +using namespace cv::cudev; + +void cmpMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop); + +namespace { - struct VCmpEq4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vcmpeq4(a, b); - } - - __host__ __device__ __forceinline__ VCmpEq4() {} - __host__ __device__ __forceinline__ VCmpEq4(const VCmpEq4&) {} - }; - struct VCmpNe4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vcmpne4(a, b); - } - - __host__ __device__ __forceinline__ VCmpNe4() {} - __host__ __device__ __forceinline__ VCmpNe4(const VCmpNe4&) {} - }; - struct VCmpLt4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vcmplt4(a, b); - } - - __host__ __device__ __forceinline__ VCmpLt4() {} - __host__ __device__ __forceinline__ VCmpLt4(const VCmpLt4&) {} - }; - struct VCmpLe4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vcmple4(a, b); - } - - __host__ __device__ __forceinline__ VCmpLe4() {} - __host__ __device__ __forceinline__ VCmpLe4(const VCmpLe4&) {} - }; - - template - struct Cmp : binary_function + template struct CmpOp : binary_function { __device__ __forceinline__ uchar operator()(T a, T b) const { @@ -105,102 +64,156 @@ namespace arithm return -op(a, b); } }; -} -namespace cv { namespace cuda { namespace device -{ - template <> struct TransformFunctorTraits< arithm::VCmpEq4 > : arithm::ArithmFuncTraits + template struct TransformPolicy : DefaultTransformPolicy { }; - template <> struct TransformFunctorTraits< arithm::VCmpNe4 > : arithm::ArithmFuncTraits + template <> struct TransformPolicy : DefaultTransformPolicy { + enum { + shift = 1 + }; }; - template <> struct TransformFunctorTraits< arithm::VCmpLt4 > : arithm::ArithmFuncTraits - { - }; - template <> struct TransformFunctorTraits< arithm::VCmpLe4 > : arithm::ArithmFuncTraits - { - }; - - template struct TransformFunctorTraits< arithm::Cmp > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - void cmpMatEq_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - device::transform(src1, src2, dst, VCmpEq4(), WithOutMask(), stream); - } - void cmpMatNe_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - device::transform(src1, src2, dst, VCmpNe4(), WithOutMask(), stream); - } - void cmpMatLt_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - device::transform(src1, src2, dst, VCmpLt4(), WithOutMask(), stream); - } - void cmpMatLe_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - device::transform(src1, src2, dst, VCmpLe4(), WithOutMask(), stream); - } template