used new device layer for cv::gpu::compare
This commit is contained in:
@@ -454,147 +454,9 @@ void cv::cuda::absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// compare
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
void cmpMatEq_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
|
||||
void cmpMatNe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
|
||||
void cmpMatLt_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
|
||||
void cmpMatLe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
|
||||
void cmpMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop);
|
||||
|
||||
template <typename T> void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template <typename T> void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template <typename T> void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template <typename T> void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
static void cmpMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& _stream, int cmpop)
|
||||
{
|
||||
using namespace arithm;
|
||||
|
||||
typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[7][4] =
|
||||
{
|
||||
{cmpMatEq<unsigned char> , cmpMatNe<unsigned char> , cmpMatLt<unsigned char> , cmpMatLe<unsigned char> },
|
||||
{cmpMatEq<signed char> , cmpMatNe<signed char> , cmpMatLt<signed char> , cmpMatLe<signed char> },
|
||||
{cmpMatEq<unsigned short>, cmpMatNe<unsigned short>, cmpMatLt<unsigned short>, cmpMatLe<unsigned short>},
|
||||
{cmpMatEq<short> , cmpMatNe<short> , cmpMatLt<short> , cmpMatLe<short> },
|
||||
{cmpMatEq<int> , cmpMatNe<int> , cmpMatLt<int> , cmpMatLe<int> },
|
||||
{cmpMatEq<float> , cmpMatNe<float> , cmpMatLt<float> , cmpMatLe<float> },
|
||||
{cmpMatEq<double> , cmpMatNe<double> , cmpMatLt<double> , cmpMatLe<double> }
|
||||
};
|
||||
|
||||
typedef void (*func_v4_t)(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
|
||||
static const func_v4_t funcs_v4[] =
|
||||
{
|
||||
cmpMatEq_v4, cmpMatNe_v4, cmpMatLt_v4, cmpMatLe_v4
|
||||
};
|
||||
|
||||
const int depth = src1.depth();
|
||||
const int cn = src1.channels();
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(_stream);
|
||||
|
||||
static const int codes[] =
|
||||
{
|
||||
0, 2, 3, 2, 3, 1
|
||||
};
|
||||
const GpuMat* psrc1[] =
|
||||
{
|
||||
&src1, &src2, &src2, &src1, &src1, &src1
|
||||
};
|
||||
const GpuMat* psrc2[] =
|
||||
{
|
||||
&src2, &src1, &src1, &src2, &src2, &src2
|
||||
};
|
||||
|
||||
const int code = codes[cmpop];
|
||||
PtrStepSzb src1_(src1.rows, src1.cols * cn, psrc1[cmpop]->data, psrc1[cmpop]->step);
|
||||
PtrStepSzb src2_(src1.rows, src1.cols * cn, psrc2[cmpop]->data, psrc2[cmpop]->step);
|
||||
PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step);
|
||||
|
||||
if (depth == CV_8U && (src1_.cols & 3) == 0)
|
||||
{
|
||||
const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
|
||||
const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
|
||||
const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data);
|
||||
|
||||
const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
|
||||
|
||||
if (isAllAligned)
|
||||
{
|
||||
const int vcols = src1_.cols >> 2;
|
||||
|
||||
funcs_v4[code](PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
|
||||
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
|
||||
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
|
||||
stream);
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const func_t func = funcs[depth][code];
|
||||
|
||||
func(src1_, src2_, dst_, stream);
|
||||
}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T> void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template <typename T> void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template <typename T> void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template <typename T> void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template <typename T> void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template <typename T> void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T> void castScalar(Scalar& sc)
|
||||
{
|
||||
sc.val[0] = saturate_cast<T>(sc.val[0]);
|
||||
sc.val[1] = saturate_cast<T>(sc.val[1]);
|
||||
sc.val[2] = saturate_cast<T>(sc.val[2]);
|
||||
sc.val[3] = saturate_cast<T>(sc.val[3]);
|
||||
}
|
||||
}
|
||||
|
||||
static void cmpScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop)
|
||||
{
|
||||
using namespace arithm;
|
||||
|
||||
typedef void (*func_t)(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[7][6] =
|
||||
{
|
||||
{cmpScalarEq<unsigned char> , cmpScalarGt<unsigned char> , cmpScalarGe<unsigned char> , cmpScalarLt<unsigned char> , cmpScalarLe<unsigned char> , cmpScalarNe<unsigned char> },
|
||||
{cmpScalarEq<signed char> , cmpScalarGt<signed char> , cmpScalarGe<signed char> , cmpScalarLt<signed char> , cmpScalarLe<signed char> , cmpScalarNe<signed char> },
|
||||
{cmpScalarEq<unsigned short>, cmpScalarGt<unsigned short>, cmpScalarGe<unsigned short>, cmpScalarLt<unsigned short>, cmpScalarLe<unsigned short>, cmpScalarNe<unsigned short>},
|
||||
{cmpScalarEq<short> , cmpScalarGt<short> , cmpScalarGe<short> , cmpScalarLt<short> , cmpScalarLe<short> , cmpScalarNe<short> },
|
||||
{cmpScalarEq<int> , cmpScalarGt<int> , cmpScalarGe<int> , cmpScalarLt<int> , cmpScalarLe<int> , cmpScalarNe<int> },
|
||||
{cmpScalarEq<float> , cmpScalarGt<float> , cmpScalarGe<float> , cmpScalarLt<float> , cmpScalarLe<float> , cmpScalarNe<float> },
|
||||
{cmpScalarEq<double> , cmpScalarGt<double> , cmpScalarGe<double> , cmpScalarLt<double> , cmpScalarLe<double> , cmpScalarNe<double> }
|
||||
};
|
||||
|
||||
typedef void (*cast_func_t)(Scalar& sc);
|
||||
static const cast_func_t cast_func[] =
|
||||
{
|
||||
castScalar<unsigned char>, castScalar<signed char>, castScalar<unsigned short>, castScalar<short>, castScalar<int>, castScalar<float>, castScalar<double>
|
||||
};
|
||||
|
||||
if (inv)
|
||||
{
|
||||
// src1 is a scalar; swap it with src2
|
||||
cmpop = cmpop == CMP_LT ? CMP_GT : cmpop == CMP_LE ? CMP_GE :
|
||||
cmpop == CMP_GE ? CMP_LE : cmpop == CMP_GT ? CMP_LT : cmpop;
|
||||
}
|
||||
|
||||
const int depth = src.depth();
|
||||
const int cn = src.channels();
|
||||
|
||||
cast_func[depth](val);
|
||||
|
||||
funcs[depth][cmpop](src, cn, val.val, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
void cmpScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop);
|
||||
|
||||
void cv::cuda::compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream)
|
||||
{
|
||||
|
Reference in New Issue
Block a user