used new device layer for cv::gpu::compare
This commit is contained in:
parent
43e811118b
commit
ef9917ecf1
@ -40,64 +40,23 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
#include "opencv2/opencv_modules.hpp"
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
#ifndef HAVE_OPENCV_CUDEV
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
#error "opencv_cudev is required"
|
||||
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cuda::device;
|
||||
#else
|
||||
|
||||
namespace arithm
|
||||
#include "opencv2/cudev.hpp"
|
||||
|
||||
using namespace cv::cudev;
|
||||
|
||||
void cmpMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop);
|
||||
|
||||
namespace
|
||||
{
|
||||
struct VCmpEq4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vcmpeq4(a, b);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ VCmpEq4() {}
|
||||
__host__ __device__ __forceinline__ VCmpEq4(const VCmpEq4&) {}
|
||||
};
|
||||
struct VCmpNe4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vcmpne4(a, b);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ VCmpNe4() {}
|
||||
__host__ __device__ __forceinline__ VCmpNe4(const VCmpNe4&) {}
|
||||
};
|
||||
struct VCmpLt4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vcmplt4(a, b);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ VCmpLt4() {}
|
||||
__host__ __device__ __forceinline__ VCmpLt4(const VCmpLt4&) {}
|
||||
};
|
||||
struct VCmpLe4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vcmple4(a, b);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ VCmpLe4() {}
|
||||
__host__ __device__ __forceinline__ VCmpLe4(const VCmpLe4&) {}
|
||||
};
|
||||
|
||||
template <class Op, typename T>
|
||||
struct Cmp : binary_function<T, T, uchar>
|
||||
template <class Op, typename T> struct CmpOp : binary_function<T, T, uchar>
|
||||
{
|
||||
__device__ __forceinline__ uchar operator()(T a, T b) const
|
||||
{
|
||||
@ -105,102 +64,156 @@ namespace arithm
|
||||
return -op(a, b);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <> struct TransformFunctorTraits< arithm::VCmpEq4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
|
||||
{
|
||||
};
|
||||
template <> struct TransformFunctorTraits< arithm::VCmpNe4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
template <> struct TransformPolicy<double> : DefaultTransformPolicy
|
||||
{
|
||||
enum {
|
||||
shift = 1
|
||||
};
|
||||
};
|
||||
template <> struct TransformFunctorTraits< arithm::VCmpLt4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
template <> struct TransformFunctorTraits< arithm::VCmpLe4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
|
||||
template <class Op, typename T> struct TransformFunctorTraits< arithm::Cmp<Op, T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
void cmpMatEq_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
device::transform(src1, src2, dst, VCmpEq4(), WithOutMask(), stream);
|
||||
}
|
||||
void cmpMatNe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
device::transform(src1, src2, dst, VCmpNe4(), WithOutMask(), stream);
|
||||
}
|
||||
void cmpMatLt_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
device::transform(src1, src2, dst, VCmpLt4(), WithOutMask(), stream);
|
||||
}
|
||||
void cmpMatLe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
device::transform(src1, src2, dst, VCmpLe4(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <template <typename> class Op, typename T>
|
||||
void cmpMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
void cmpMat_v1(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
Cmp<Op<T>, T> op;
|
||||
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, dst, op, WithOutMask(), stream);
|
||||
CmpOp<Op<T>, T> op;
|
||||
gridTransformBinary_< TransformPolicy<T> >(globPtr<T>(src1), globPtr<T>(src2), globPtr<uchar>(dst), op, stream);
|
||||
}
|
||||
|
||||
template <typename T> void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
struct VCmpEq4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
cmpMat<equal_to, T>(src1, src2, dst, stream);
|
||||
}
|
||||
template <typename T> void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vcmpeq4(a, b);
|
||||
}
|
||||
};
|
||||
struct VCmpNe4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
cmpMat<not_equal_to, T>(src1, src2, dst, stream);
|
||||
}
|
||||
template <typename T> void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vcmpne4(a, b);
|
||||
}
|
||||
};
|
||||
struct VCmpLt4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
cmpMat<less, T>(src1, src2, dst, stream);
|
||||
}
|
||||
template <typename T> void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vcmplt4(a, b);
|
||||
}
|
||||
};
|
||||
struct VCmpLe4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
cmpMat<less_equal, T>(src1, src2, dst, stream);
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vcmple4(a, b);
|
||||
}
|
||||
};
|
||||
|
||||
void cmpMatEq_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
const int vcols = src1.cols >> 2;
|
||||
|
||||
GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
|
||||
GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
|
||||
GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
|
||||
|
||||
gridTransformBinary(src1_, src2_, dst_, VCmpEq4(), stream);
|
||||
}
|
||||
void cmpMatNe_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
const int vcols = src1.cols >> 2;
|
||||
|
||||
template void cmpMatEq<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatEq<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatEq<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatEq<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatEq<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatEq<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatEq<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
|
||||
GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
|
||||
GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
|
||||
|
||||
template void cmpMatNe<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatNe<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatNe<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatNe<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatNe<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatNe<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatNe<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
gridTransformBinary(src1_, src2_, dst_, VCmpNe4(), stream);
|
||||
}
|
||||
void cmpMatLt_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
const int vcols = src1.cols >> 2;
|
||||
|
||||
template void cmpMatLt<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLt<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLt<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLt<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLt<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLt<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLt<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
|
||||
GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
|
||||
GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
|
||||
|
||||
template void cmpMatLe<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLe<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLe<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLe<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLe<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLe<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLe<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
gridTransformBinary(src1_, src2_, dst_, VCmpLt4(), stream);
|
||||
}
|
||||
void cmpMatLe_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
const int vcols = src1.cols >> 2;
|
||||
|
||||
GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
|
||||
GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
|
||||
GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
|
||||
|
||||
gridTransformBinary(src1_, src2_, dst_, VCmpLe4(), stream);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
||||
void cmpMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
|
||||
static const func_t funcs[7][4] =
|
||||
{
|
||||
{cmpMat_v1<equal_to, uchar> , cmpMat_v1<not_equal_to, uchar> , cmpMat_v1<less, uchar> , cmpMat_v1<less_equal, uchar> },
|
||||
{cmpMat_v1<equal_to, schar> , cmpMat_v1<not_equal_to, schar> , cmpMat_v1<less, schar> , cmpMat_v1<less_equal, schar> },
|
||||
{cmpMat_v1<equal_to, ushort>, cmpMat_v1<not_equal_to, ushort>, cmpMat_v1<less, ushort>, cmpMat_v1<less_equal, ushort>},
|
||||
{cmpMat_v1<equal_to, short> , cmpMat_v1<not_equal_to, short> , cmpMat_v1<less, short> , cmpMat_v1<less_equal, short> },
|
||||
{cmpMat_v1<equal_to, int> , cmpMat_v1<not_equal_to, int> , cmpMat_v1<less, int> , cmpMat_v1<less_equal, int> },
|
||||
{cmpMat_v1<equal_to, float> , cmpMat_v1<not_equal_to, float> , cmpMat_v1<less, float> , cmpMat_v1<less_equal, float> },
|
||||
{cmpMat_v1<equal_to, double>, cmpMat_v1<not_equal_to, double>, cmpMat_v1<less, double>, cmpMat_v1<less_equal, double>}
|
||||
};
|
||||
|
||||
typedef void (*func_v4_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
|
||||
static const func_v4_t funcs_v4[] =
|
||||
{
|
||||
cmpMatEq_v4, cmpMatNe_v4, cmpMatLt_v4, cmpMatLe_v4
|
||||
};
|
||||
|
||||
const int depth = src1.depth();
|
||||
|
||||
CV_DbgAssert( depth <= CV_64F );
|
||||
|
||||
static const int codes[] =
|
||||
{
|
||||
0, 2, 3, 2, 3, 1
|
||||
};
|
||||
const GpuMat* psrc1[] =
|
||||
{
|
||||
&src1, &src2, &src2, &src1, &src1, &src1
|
||||
};
|
||||
const GpuMat* psrc2[] =
|
||||
{
|
||||
&src2, &src1, &src1, &src2, &src2, &src2
|
||||
};
|
||||
|
||||
const int code = codes[cmpop];
|
||||
|
||||
GpuMat src1_ = psrc1[cmpop]->reshape(1);
|
||||
GpuMat src2_ = psrc2[cmpop]->reshape(1);
|
||||
GpuMat dst_ = dst.reshape(1);
|
||||
|
||||
if (depth == CV_8U && (src1_.cols & 3) == 0)
|
||||
{
|
||||
const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
|
||||
const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
|
||||
const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data);
|
||||
|
||||
const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
|
||||
|
||||
if (isAllAligned)
|
||||
{
|
||||
funcs_v4[code](src1_, src2_, dst_, stream);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const func_t func = funcs[depth][code];
|
||||
|
||||
func(src1_, src2_, dst_, stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -40,24 +40,23 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
#include "opencv2/opencv_modules.hpp"
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#ifndef HAVE_OPENCV_CUDEV
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
#error "opencv_cudev is required"
|
||||
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cuda::device;
|
||||
#else
|
||||
|
||||
namespace arithm
|
||||
#include "opencv2/cudev.hpp"
|
||||
|
||||
using namespace cv::cudev;
|
||||
|
||||
void cmpScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop);
|
||||
|
||||
namespace
|
||||
{
|
||||
template <class Op, typename T>
|
||||
struct Cmp : binary_function<T, T, uchar>
|
||||
template <class Op, typename T> struct CmpOp : binary_function<T, T, uchar>
|
||||
{
|
||||
__device__ __forceinline__ uchar operator()(T a, T b) const
|
||||
{
|
||||
@ -66,219 +65,161 @@ namespace arithm
|
||||
}
|
||||
};
|
||||
|
||||
#define TYPE_VEC(type, cn) typename TypeVec<type, cn>::vec_type
|
||||
#define MAKE_VEC(_type, _cn) typename MakeVec<_type, _cn>::type
|
||||
|
||||
template <class Op, typename T, int cn> struct CmpScalarOp;
|
||||
|
||||
template <class Op, typename T, int cn> struct CmpScalar;
|
||||
template <class Op, typename T>
|
||||
struct CmpScalar<Op, T, 1> : unary_function<T, uchar>
|
||||
struct CmpScalarOp<Op, T, 1> : unary_function<T, uchar>
|
||||
{
|
||||
T val;
|
||||
|
||||
__host__ explicit CmpScalar(T val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ uchar operator()(T src) const
|
||||
{
|
||||
Cmp<Op, T> op;
|
||||
CmpOp<Op, T> op;
|
||||
return op(src, val);
|
||||
}
|
||||
};
|
||||
|
||||
template <class Op, typename T>
|
||||
struct CmpScalar<Op, T, 2> : unary_function<TYPE_VEC(T, 2), TYPE_VEC(uchar, 2)>
|
||||
struct CmpScalarOp<Op, T, 2> : unary_function<MAKE_VEC(T, 2), MAKE_VEC(uchar, 2)>
|
||||
{
|
||||
TYPE_VEC(T, 2) val;
|
||||
MAKE_VEC(T, 2) val;
|
||||
|
||||
__host__ explicit CmpScalar(TYPE_VEC(T, 2) val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ TYPE_VEC(uchar, 2) operator()(const TYPE_VEC(T, 2) & src) const
|
||||
__device__ __forceinline__ MAKE_VEC(uchar, 2) operator()(const MAKE_VEC(T, 2) & src) const
|
||||
{
|
||||
Cmp<Op, T> op;
|
||||
return VecTraits<TYPE_VEC(uchar, 2)>::make(op(src.x, val.x), op(src.y, val.y));
|
||||
CmpOp<Op, T> op;
|
||||
return VecTraits<MAKE_VEC(uchar, 2)>::make(op(src.x, val.x), op(src.y, val.y));
|
||||
}
|
||||
};
|
||||
|
||||
template <class Op, typename T>
|
||||
struct CmpScalar<Op, T, 3> : unary_function<TYPE_VEC(T, 3), TYPE_VEC(uchar, 3)>
|
||||
struct CmpScalarOp<Op, T, 3> : unary_function<MAKE_VEC(T, 3), MAKE_VEC(uchar, 3)>
|
||||
{
|
||||
TYPE_VEC(T, 3) val;
|
||||
MAKE_VEC(T, 3) val;
|
||||
|
||||
__host__ explicit CmpScalar(TYPE_VEC(T, 3) val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ TYPE_VEC(uchar, 3) operator()(const TYPE_VEC(T, 3) & src) const
|
||||
__device__ __forceinline__ MAKE_VEC(uchar, 3) operator()(const MAKE_VEC(T, 3) & src) const
|
||||
{
|
||||
Cmp<Op, T> op;
|
||||
return VecTraits<TYPE_VEC(uchar, 3)>::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z));
|
||||
CmpOp<Op, T> op;
|
||||
return VecTraits<MAKE_VEC(uchar, 3)>::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z));
|
||||
}
|
||||
};
|
||||
|
||||
template <class Op, typename T>
|
||||
struct CmpScalar<Op, T, 4> : unary_function<TYPE_VEC(T, 4), TYPE_VEC(uchar, 4)>
|
||||
struct CmpScalarOp<Op, T, 4> : unary_function<MAKE_VEC(T, 4), MAKE_VEC(uchar, 4)>
|
||||
{
|
||||
TYPE_VEC(T, 4) val;
|
||||
MAKE_VEC(T, 4) val;
|
||||
|
||||
__host__ explicit CmpScalar(TYPE_VEC(T, 4) val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ TYPE_VEC(uchar, 4) operator()(const TYPE_VEC(T, 4) & src) const
|
||||
__device__ __forceinline__ MAKE_VEC(uchar, 4) operator()(const MAKE_VEC(T, 4) & src) const
|
||||
{
|
||||
Cmp<Op, T> op;
|
||||
return VecTraits<TYPE_VEC(uchar, 4)>::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z), op(src.w, val.w));
|
||||
CmpOp<Op, T> op;
|
||||
return VecTraits<MAKE_VEC(uchar, 4)>::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z), op(src.w, val.w));
|
||||
}
|
||||
};
|
||||
|
||||
#undef TYPE_VEC
|
||||
}
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <class Op, typename T> struct TransformFunctorTraits< arithm::CmpScalar<Op, T, 1> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
|
||||
template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
|
||||
{
|
||||
};
|
||||
}}}
|
||||
template <> struct TransformPolicy<double> : DefaultTransformPolicy
|
||||
{
|
||||
enum {
|
||||
shift = 1
|
||||
};
|
||||
};
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <template <typename> class Op, typename T, int cn>
|
||||
void cmpScalar(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream)
|
||||
void cmpScalarImpl(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
typedef typename TypeVec<T, cn>::vec_type src_t;
|
||||
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
|
||||
typedef typename MakeVec<T, cn>::type src_type;
|
||||
typedef typename MakeVec<uchar, cn>::type dst_type;
|
||||
|
||||
T sval[] = {static_cast<T>(val[0]), static_cast<T>(val[1]), static_cast<T>(val[2]), static_cast<T>(val[3])};
|
||||
src_t val1 = VecTraits<src_t>::make(sval);
|
||||
cv::Scalar_<T> value_ = value;
|
||||
|
||||
CmpScalar<Op<T>, T, cn> op(val1);
|
||||
device::transform((PtrStepSz<src_t>) src, (PtrStepSz<dst_t>) dst, op, WithOutMask(), stream);
|
||||
CmpScalarOp<Op<T>, T, cn> op;
|
||||
op.val = VecTraits<src_type>::make(value_.val);
|
||||
|
||||
gridTransformUnary_< TransformPolicy<T> >(globPtr<src_type>(src), globPtr<dst_type>(dst), op, stream);
|
||||
}
|
||||
|
||||
template <typename T> void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
0,
|
||||
cmpScalar<equal_to, T, 1>,
|
||||
cmpScalar<equal_to, T, 2>,
|
||||
cmpScalar<equal_to, T, 3>,
|
||||
cmpScalar<equal_to, T, 4>
|
||||
};
|
||||
|
||||
funcs[cn](src, val, dst, stream);
|
||||
}
|
||||
template <typename T> void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
0,
|
||||
cmpScalar<not_equal_to, T, 1>,
|
||||
cmpScalar<not_equal_to, T, 2>,
|
||||
cmpScalar<not_equal_to, T, 3>,
|
||||
cmpScalar<not_equal_to, T, 4>
|
||||
};
|
||||
|
||||
funcs[cn](src, val, dst, stream);
|
||||
}
|
||||
template <typename T> void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
0,
|
||||
cmpScalar<less, T, 1>,
|
||||
cmpScalar<less, T, 2>,
|
||||
cmpScalar<less, T, 3>,
|
||||
cmpScalar<less, T, 4>
|
||||
};
|
||||
|
||||
funcs[cn](src, val, dst, stream);
|
||||
}
|
||||
template <typename T> void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
0,
|
||||
cmpScalar<less_equal, T, 1>,
|
||||
cmpScalar<less_equal, T, 2>,
|
||||
cmpScalar<less_equal, T, 3>,
|
||||
cmpScalar<less_equal, T, 4>
|
||||
};
|
||||
|
||||
funcs[cn](src, val, dst, stream);
|
||||
}
|
||||
template <typename T> void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
0,
|
||||
cmpScalar<greater, T, 1>,
|
||||
cmpScalar<greater, T, 2>,
|
||||
cmpScalar<greater, T, 3>,
|
||||
cmpScalar<greater, T, 4>
|
||||
};
|
||||
|
||||
funcs[cn](src, val, dst, stream);
|
||||
}
|
||||
template <typename T> void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
0,
|
||||
cmpScalar<greater_equal, T, 1>,
|
||||
cmpScalar<greater_equal, T, 2>,
|
||||
cmpScalar<greater_equal, T, 3>,
|
||||
cmpScalar<greater_equal, T, 4>
|
||||
};
|
||||
|
||||
funcs[cn](src, val, dst, stream);
|
||||
}
|
||||
|
||||
template void cmpScalarEq<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarEq<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarEq<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarEq<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarEq<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarEq<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarEq<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void cmpScalarNe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarNe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarNe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarNe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarNe<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarNe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarNe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void cmpScalarLt<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLt<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLt<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLt<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLt<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLt<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLt<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void cmpScalarLe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLe<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void cmpScalarGt<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGt<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGt<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGt<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGt<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGt<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGt<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void cmpScalarGe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGe<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
||||
void cmpScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& stream);
|
||||
static const func_t funcs[7][6][4] =
|
||||
{
|
||||
{
|
||||
{cmpScalarImpl<equal_to, uchar, 1>, cmpScalarImpl<equal_to, uchar, 2>, cmpScalarImpl<equal_to, uchar, 3>, cmpScalarImpl<equal_to, uchar, 4>},
|
||||
{cmpScalarImpl<greater, uchar, 1>, cmpScalarImpl<greater, uchar, 2>, cmpScalarImpl<greater, uchar, 3>, cmpScalarImpl<greater, uchar, 4>},
|
||||
{cmpScalarImpl<greater_equal, uchar, 1>, cmpScalarImpl<greater_equal, uchar, 2>, cmpScalarImpl<greater_equal, uchar, 3>, cmpScalarImpl<greater_equal, uchar, 4>},
|
||||
{cmpScalarImpl<less, uchar, 1>, cmpScalarImpl<less, uchar, 2>, cmpScalarImpl<less, uchar, 3>, cmpScalarImpl<less, uchar, 4>},
|
||||
{cmpScalarImpl<less_equal, uchar, 1>, cmpScalarImpl<less_equal, uchar, 2>, cmpScalarImpl<less_equal, uchar, 3>, cmpScalarImpl<less_equal, uchar, 4>},
|
||||
{cmpScalarImpl<not_equal_to, uchar, 1>, cmpScalarImpl<not_equal_to, uchar, 2>, cmpScalarImpl<not_equal_to, uchar, 3>, cmpScalarImpl<not_equal_to, uchar, 4>}
|
||||
},
|
||||
{
|
||||
{cmpScalarImpl<equal_to, schar, 1>, cmpScalarImpl<equal_to, schar, 2>, cmpScalarImpl<equal_to, schar, 3>, cmpScalarImpl<equal_to, schar, 4>},
|
||||
{cmpScalarImpl<greater, schar, 1>, cmpScalarImpl<greater, schar, 2>, cmpScalarImpl<greater, schar, 3>, cmpScalarImpl<greater, schar, 4>},
|
||||
{cmpScalarImpl<greater_equal, schar, 1>, cmpScalarImpl<greater_equal, schar, 2>, cmpScalarImpl<greater_equal, schar, 3>, cmpScalarImpl<greater_equal, schar, 4>},
|
||||
{cmpScalarImpl<less, schar, 1>, cmpScalarImpl<less, schar, 2>, cmpScalarImpl<less, schar, 3>, cmpScalarImpl<less, schar, 4>},
|
||||
{cmpScalarImpl<less_equal, schar, 1>, cmpScalarImpl<less_equal, schar, 2>, cmpScalarImpl<less_equal, schar, 3>, cmpScalarImpl<less_equal, schar, 4>},
|
||||
{cmpScalarImpl<not_equal_to, schar, 1>, cmpScalarImpl<not_equal_to, schar, 2>, cmpScalarImpl<not_equal_to, schar, 3>, cmpScalarImpl<not_equal_to, schar, 4>}
|
||||
},
|
||||
{
|
||||
{cmpScalarImpl<equal_to, ushort, 1>, cmpScalarImpl<equal_to, ushort, 2>, cmpScalarImpl<equal_to, ushort, 3>, cmpScalarImpl<equal_to, ushort, 4>},
|
||||
{cmpScalarImpl<greater, ushort, 1>, cmpScalarImpl<greater, ushort, 2>, cmpScalarImpl<greater, ushort, 3>, cmpScalarImpl<greater, ushort, 4>},
|
||||
{cmpScalarImpl<greater_equal, ushort, 1>, cmpScalarImpl<greater_equal, ushort, 2>, cmpScalarImpl<greater_equal, ushort, 3>, cmpScalarImpl<greater_equal, ushort, 4>},
|
||||
{cmpScalarImpl<less, ushort, 1>, cmpScalarImpl<less, ushort, 2>, cmpScalarImpl<less, ushort, 3>, cmpScalarImpl<less, ushort, 4>},
|
||||
{cmpScalarImpl<less_equal, ushort, 1>, cmpScalarImpl<less_equal, ushort, 2>, cmpScalarImpl<less_equal, ushort, 3>, cmpScalarImpl<less_equal, ushort, 4>},
|
||||
{cmpScalarImpl<not_equal_to, ushort, 1>, cmpScalarImpl<not_equal_to, ushort, 2>, cmpScalarImpl<not_equal_to, ushort, 3>, cmpScalarImpl<not_equal_to, ushort, 4>}
|
||||
},
|
||||
{
|
||||
{cmpScalarImpl<equal_to, short, 1>, cmpScalarImpl<equal_to, short, 2>, cmpScalarImpl<equal_to, short, 3>, cmpScalarImpl<equal_to, short, 4>},
|
||||
{cmpScalarImpl<greater, short, 1>, cmpScalarImpl<greater, short, 2>, cmpScalarImpl<greater, short, 3>, cmpScalarImpl<greater, short, 4>},
|
||||
{cmpScalarImpl<greater_equal, short, 1>, cmpScalarImpl<greater_equal, short, 2>, cmpScalarImpl<greater_equal, short, 3>, cmpScalarImpl<greater_equal, short, 4>},
|
||||
{cmpScalarImpl<less, short, 1>, cmpScalarImpl<less, short, 2>, cmpScalarImpl<less, short, 3>, cmpScalarImpl<less, short, 4>},
|
||||
{cmpScalarImpl<less_equal, short, 1>, cmpScalarImpl<less_equal, short, 2>, cmpScalarImpl<less_equal, short, 3>, cmpScalarImpl<less_equal, short, 4>},
|
||||
{cmpScalarImpl<not_equal_to, short, 1>, cmpScalarImpl<not_equal_to, short, 2>, cmpScalarImpl<not_equal_to, short, 3>, cmpScalarImpl<not_equal_to, short, 4>}
|
||||
},
|
||||
{
|
||||
{cmpScalarImpl<equal_to, int, 1>, cmpScalarImpl<equal_to, int, 2>, cmpScalarImpl<equal_to, int, 3>, cmpScalarImpl<equal_to, int, 4>},
|
||||
{cmpScalarImpl<greater, int, 1>, cmpScalarImpl<greater, int, 2>, cmpScalarImpl<greater, int, 3>, cmpScalarImpl<greater, int, 4>},
|
||||
{cmpScalarImpl<greater_equal, int, 1>, cmpScalarImpl<greater_equal, int, 2>, cmpScalarImpl<greater_equal, int, 3>, cmpScalarImpl<greater_equal, int, 4>},
|
||||
{cmpScalarImpl<less, int, 1>, cmpScalarImpl<less, int, 2>, cmpScalarImpl<less, int, 3>, cmpScalarImpl<less, int, 4>},
|
||||
{cmpScalarImpl<less_equal, int, 1>, cmpScalarImpl<less_equal, int, 2>, cmpScalarImpl<less_equal, int, 3>, cmpScalarImpl<less_equal, int, 4>},
|
||||
{cmpScalarImpl<not_equal_to, int, 1>, cmpScalarImpl<not_equal_to, int, 2>, cmpScalarImpl<not_equal_to, int, 3>, cmpScalarImpl<not_equal_to, int, 4>}
|
||||
},
|
||||
{
|
||||
{cmpScalarImpl<equal_to, float, 1>, cmpScalarImpl<equal_to, float, 2>, cmpScalarImpl<equal_to, float, 3>, cmpScalarImpl<equal_to, float, 4>},
|
||||
{cmpScalarImpl<greater, float, 1>, cmpScalarImpl<greater, float, 2>, cmpScalarImpl<greater, float, 3>, cmpScalarImpl<greater, float, 4>},
|
||||
{cmpScalarImpl<greater_equal, float, 1>, cmpScalarImpl<greater_equal, float, 2>, cmpScalarImpl<greater_equal, float, 3>, cmpScalarImpl<greater_equal, float, 4>},
|
||||
{cmpScalarImpl<less, float, 1>, cmpScalarImpl<less, float, 2>, cmpScalarImpl<less, float, 3>, cmpScalarImpl<less, float, 4>},
|
||||
{cmpScalarImpl<less_equal, float, 1>, cmpScalarImpl<less_equal, float, 2>, cmpScalarImpl<less_equal, float, 3>, cmpScalarImpl<less_equal, float, 4>},
|
||||
{cmpScalarImpl<not_equal_to, float, 1>, cmpScalarImpl<not_equal_to, float, 2>, cmpScalarImpl<not_equal_to, float, 3>, cmpScalarImpl<not_equal_to, float, 4>}
|
||||
},
|
||||
{
|
||||
{cmpScalarImpl<equal_to, double, 1>, cmpScalarImpl<equal_to, double, 2>, cmpScalarImpl<equal_to, double, 3>, cmpScalarImpl<equal_to, double, 4>},
|
||||
{cmpScalarImpl<greater, double, 1>, cmpScalarImpl<greater, double, 2>, cmpScalarImpl<greater, double, 3>, cmpScalarImpl<greater, double, 4>},
|
||||
{cmpScalarImpl<greater_equal, double, 1>, cmpScalarImpl<greater_equal, double, 2>, cmpScalarImpl<greater_equal, double, 3>, cmpScalarImpl<greater_equal, double, 4>},
|
||||
{cmpScalarImpl<less, double, 1>, cmpScalarImpl<less, double, 2>, cmpScalarImpl<less, double, 3>, cmpScalarImpl<less, double, 4>},
|
||||
{cmpScalarImpl<less_equal, double, 1>, cmpScalarImpl<less_equal, double, 2>, cmpScalarImpl<less_equal, double, 3>, cmpScalarImpl<less_equal, double, 4>},
|
||||
{cmpScalarImpl<not_equal_to, double, 1>, cmpScalarImpl<not_equal_to, double, 2>, cmpScalarImpl<not_equal_to, double, 3>, cmpScalarImpl<not_equal_to, double, 4>}
|
||||
}
|
||||
};
|
||||
|
||||
if (inv)
|
||||
{
|
||||
// src1 is a scalar; swap it with src2
|
||||
cmpop = cmpop == cv::CMP_LT ? cv::CMP_GT : cmpop == cv::CMP_LE ? cv::CMP_GE :
|
||||
cmpop == cv::CMP_GE ? cv::CMP_LE : cmpop == cv::CMP_GT ? cv::CMP_LT : cmpop;
|
||||
}
|
||||
|
||||
const int depth = src.depth();
|
||||
const int cn = src.channels();
|
||||
|
||||
CV_DbgAssert( depth <= CV_64F && cn <= 4 );
|
||||
|
||||
funcs[depth][cmpop][cn - 1](src, val, dst, stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -454,147 +454,9 @@ void cv::cuda::absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// compare
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
void cmpMatEq_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
|
||||
void cmpMatNe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
|
||||
void cmpMatLt_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
|
||||
void cmpMatLe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
|
||||
void cmpMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop);
|
||||
|
||||
template <typename T> void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template <typename T> void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template <typename T> void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template <typename T> void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
static void cmpMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& _stream, int cmpop)
|
||||
{
|
||||
using namespace arithm;
|
||||
|
||||
typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[7][4] =
|
||||
{
|
||||
{cmpMatEq<unsigned char> , cmpMatNe<unsigned char> , cmpMatLt<unsigned char> , cmpMatLe<unsigned char> },
|
||||
{cmpMatEq<signed char> , cmpMatNe<signed char> , cmpMatLt<signed char> , cmpMatLe<signed char> },
|
||||
{cmpMatEq<unsigned short>, cmpMatNe<unsigned short>, cmpMatLt<unsigned short>, cmpMatLe<unsigned short>},
|
||||
{cmpMatEq<short> , cmpMatNe<short> , cmpMatLt<short> , cmpMatLe<short> },
|
||||
{cmpMatEq<int> , cmpMatNe<int> , cmpMatLt<int> , cmpMatLe<int> },
|
||||
{cmpMatEq<float> , cmpMatNe<float> , cmpMatLt<float> , cmpMatLe<float> },
|
||||
{cmpMatEq<double> , cmpMatNe<double> , cmpMatLt<double> , cmpMatLe<double> }
|
||||
};
|
||||
|
||||
typedef void (*func_v4_t)(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
|
||||
static const func_v4_t funcs_v4[] =
|
||||
{
|
||||
cmpMatEq_v4, cmpMatNe_v4, cmpMatLt_v4, cmpMatLe_v4
|
||||
};
|
||||
|
||||
const int depth = src1.depth();
|
||||
const int cn = src1.channels();
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(_stream);
|
||||
|
||||
static const int codes[] =
|
||||
{
|
||||
0, 2, 3, 2, 3, 1
|
||||
};
|
||||
const GpuMat* psrc1[] =
|
||||
{
|
||||
&src1, &src2, &src2, &src1, &src1, &src1
|
||||
};
|
||||
const GpuMat* psrc2[] =
|
||||
{
|
||||
&src2, &src1, &src1, &src2, &src2, &src2
|
||||
};
|
||||
|
||||
const int code = codes[cmpop];
|
||||
PtrStepSzb src1_(src1.rows, src1.cols * cn, psrc1[cmpop]->data, psrc1[cmpop]->step);
|
||||
PtrStepSzb src2_(src1.rows, src1.cols * cn, psrc2[cmpop]->data, psrc2[cmpop]->step);
|
||||
PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step);
|
||||
|
||||
if (depth == CV_8U && (src1_.cols & 3) == 0)
|
||||
{
|
||||
const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
|
||||
const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
|
||||
const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data);
|
||||
|
||||
const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
|
||||
|
||||
if (isAllAligned)
|
||||
{
|
||||
const int vcols = src1_.cols >> 2;
|
||||
|
||||
funcs_v4[code](PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
|
||||
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
|
||||
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
|
||||
stream);
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const func_t func = funcs[depth][code];
|
||||
|
||||
func(src1_, src2_, dst_, stream);
|
||||
}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T> void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template <typename T> void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template <typename T> void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template <typename T> void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template <typename T> void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template <typename T> void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T> void castScalar(Scalar& sc)
|
||||
{
|
||||
sc.val[0] = saturate_cast<T>(sc.val[0]);
|
||||
sc.val[1] = saturate_cast<T>(sc.val[1]);
|
||||
sc.val[2] = saturate_cast<T>(sc.val[2]);
|
||||
sc.val[3] = saturate_cast<T>(sc.val[3]);
|
||||
}
|
||||
}
|
||||
|
||||
static void cmpScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop)
|
||||
{
|
||||
using namespace arithm;
|
||||
|
||||
typedef void (*func_t)(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[7][6] =
|
||||
{
|
||||
{cmpScalarEq<unsigned char> , cmpScalarGt<unsigned char> , cmpScalarGe<unsigned char> , cmpScalarLt<unsigned char> , cmpScalarLe<unsigned char> , cmpScalarNe<unsigned char> },
|
||||
{cmpScalarEq<signed char> , cmpScalarGt<signed char> , cmpScalarGe<signed char> , cmpScalarLt<signed char> , cmpScalarLe<signed char> , cmpScalarNe<signed char> },
|
||||
{cmpScalarEq<unsigned short>, cmpScalarGt<unsigned short>, cmpScalarGe<unsigned short>, cmpScalarLt<unsigned short>, cmpScalarLe<unsigned short>, cmpScalarNe<unsigned short>},
|
||||
{cmpScalarEq<short> , cmpScalarGt<short> , cmpScalarGe<short> , cmpScalarLt<short> , cmpScalarLe<short> , cmpScalarNe<short> },
|
||||
{cmpScalarEq<int> , cmpScalarGt<int> , cmpScalarGe<int> , cmpScalarLt<int> , cmpScalarLe<int> , cmpScalarNe<int> },
|
||||
{cmpScalarEq<float> , cmpScalarGt<float> , cmpScalarGe<float> , cmpScalarLt<float> , cmpScalarLe<float> , cmpScalarNe<float> },
|
||||
{cmpScalarEq<double> , cmpScalarGt<double> , cmpScalarGe<double> , cmpScalarLt<double> , cmpScalarLe<double> , cmpScalarNe<double> }
|
||||
};
|
||||
|
||||
typedef void (*cast_func_t)(Scalar& sc);
|
||||
static const cast_func_t cast_func[] =
|
||||
{
|
||||
castScalar<unsigned char>, castScalar<signed char>, castScalar<unsigned short>, castScalar<short>, castScalar<int>, castScalar<float>, castScalar<double>
|
||||
};
|
||||
|
||||
if (inv)
|
||||
{
|
||||
// src1 is a scalar; swap it with src2
|
||||
cmpop = cmpop == CMP_LT ? CMP_GT : cmpop == CMP_LE ? CMP_GE :
|
||||
cmpop == CMP_GE ? CMP_LE : cmpop == CMP_GT ? CMP_LT : cmpop;
|
||||
}
|
||||
|
||||
const int depth = src.depth();
|
||||
const int cn = src.channels();
|
||||
|
||||
cast_func[depth](val);
|
||||
|
||||
funcs[depth][cmpop](src, cn, val.val, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
void cmpScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop);
|
||||
|
||||
void cv::cuda::compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream)
|
||||
{
|
||||
|
@ -70,7 +70,7 @@ CV_CUDEV_MAKE_VEC_INST(double)
|
||||
|
||||
#undef CV_CUDEV_MAKE_VEC_INST
|
||||
|
||||
template<> struct MakeVec<schar, 1> { typedef char type; };
|
||||
template<> struct MakeVec<schar, 1> { typedef schar type; };
|
||||
template<> struct MakeVec<schar, 2> { typedef char2 type; };
|
||||
template<> struct MakeVec<schar, 3> { typedef char3 type; };
|
||||
template<> struct MakeVec<schar, 4> { typedef char4 type; };
|
||||
|
Loading…
x
Reference in New Issue
Block a user