used new device layer for cv::gpu::absdiff

This commit is contained in:
Vladislav Vinogradov
2013-07-29 16:43:37 +04:00
parent 574ff47146
commit 7628e57fc6
3 changed files with 158 additions and 201 deletions

View File

@@ -442,105 +442,9 @@ void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, doub
//////////////////////////////////////////////////////////////////////////////
// absdiff
namespace arithm
{
void absDiffMat_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
void absDiffMat_v2(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
void absDiffMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int);
template <typename T>
void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
}
static void absDiffMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& _stream, int)
{
typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[] =
{
arithm::absDiffMat<unsigned char>,
arithm::absDiffMat<signed char>,
arithm::absDiffMat<unsigned short>,
arithm::absDiffMat<short>,
arithm::absDiffMat<int>,
arithm::absDiffMat<float>,
arithm::absDiffMat<double>
};
const int depth = src1.depth();
const int cn = src1.channels();
cudaStream_t stream = StreamAccessor::getStream(_stream);
PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step);
PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step);
PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step);
if (depth == CV_8U || depth == CV_16U)
{
const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data);
const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
if (isAllAligned)
{
if (depth == CV_8U && (src1_.cols & 3) == 0)
{
const int vcols = src1_.cols >> 2;
arithm::absDiffMat_v4(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
stream);
return;
}
else if (depth == CV_16U && (src1_.cols & 1) == 0)
{
const int vcols = src1_.cols >> 1;
arithm::absDiffMat_v2(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
stream);
return;
}
}
}
const func_t func = funcs[depth];
if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src1_, src2_, dst_, stream);
}
namespace arithm
{
template <typename T, typename S>
void absDiffScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
}
static void absDiffScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int)
{
typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[] =
{
arithm::absDiffScalar<unsigned char, float>,
arithm::absDiffScalar<signed char, float>,
arithm::absDiffScalar<unsigned short, float>,
arithm::absDiffScalar<short, float>,
arithm::absDiffScalar<int, float>,
arithm::absDiffScalar<float, float>,
arithm::absDiffScalar<double, double>
};
const int depth = src.depth();
funcs[depth](src, val[0], dst, StreamAccessor::getStream(stream));
}
void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int);
void cv::cuda::absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
{