Bug #4315 : fix CUDA bitwise operations with mask
(cherry picked from commit d87c30dc8470e39048cc01f75f26eb7bd3d6ce9f)
This commit is contained in:
parent
2598392295
commit
1d40946959
@ -1896,53 +1896,53 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
namespace arithm
|
namespace arithm
|
||||||
{
|
{
|
||||||
template <typename T> void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
|
template <typename T> void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), mask, stream);
|
transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), SingleMaskChannels(mask, num_channels), stream);
|
||||||
else
|
else
|
||||||
transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), WithOutMask(), stream);
|
transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), WithOutMask(), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
|
template <typename T> void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), mask, stream);
|
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), SingleMaskChannels(mask, num_channels), stream);
|
||||||
else
|
else
|
||||||
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), WithOutMask(), stream);
|
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), WithOutMask(), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
|
template <typename T> void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), mask, stream);
|
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), SingleMaskChannels(mask, num_channels), stream);
|
||||||
else
|
else
|
||||||
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), WithOutMask(), stream);
|
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), WithOutMask(), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
|
template <typename T> void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), mask, stream);
|
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), SingleMaskChannels(mask, num_channels), stream);
|
||||||
else
|
else
|
||||||
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), WithOutMask(), stream);
|
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), WithOutMask(), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void bitMatNot<uchar>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
template void bitMatNot<uchar>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream);
|
||||||
template void bitMatNot<ushort>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
template void bitMatNot<ushort>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream);
|
||||||
template void bitMatNot<uint>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
template void bitMatNot<uint>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream);
|
||||||
|
|
||||||
template void bitMatAnd<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
template void bitMatAnd<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream);
|
||||||
template void bitMatAnd<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
template void bitMatAnd<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream);
|
||||||
template void bitMatAnd<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
template void bitMatAnd<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream);
|
||||||
|
|
||||||
template void bitMatOr<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
template void bitMatOr<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream);
|
||||||
template void bitMatOr<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
template void bitMatOr<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream);
|
||||||
template void bitMatOr<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
template void bitMatOr<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream);
|
||||||
|
|
||||||
template void bitMatXor<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
template void bitMatXor<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream);
|
||||||
template void bitMatXor<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
template void bitMatXor<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream);
|
||||||
template void bitMatXor<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
template void bitMatXor<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -1955,7 +1955,7 @@ void cv::gpu::compare(const GpuMat& src, Scalar sc, GpuMat& dst, int cmpop, Stre
|
|||||||
|
|
||||||
namespace arithm
|
namespace arithm
|
||||||
{
|
{
|
||||||
template <typename T> void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
template <typename T> void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, Stream& s)
|
void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, Stream& s)
|
||||||
@ -1964,39 +1964,73 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St
|
|||||||
|
|
||||||
const int depth = src.depth();
|
const int depth = src.depth();
|
||||||
|
|
||||||
CV_Assert( depth <= CV_64F );
|
CV_Assert( depth < CV_32F );
|
||||||
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
|
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
|
||||||
|
|
||||||
dst.create(src.size(), src.type());
|
dst.create(src.size(), src.type());
|
||||||
|
|
||||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
if (mask.empty())
|
||||||
|
{
|
||||||
const int bcols = (int) (src.cols * src.elemSize());
|
const int bcols = (int) (src.cols * src.elemSize());
|
||||||
|
bool aligned =
|
||||||
|
isAligned(src.data, sizeof(unsigned int)) &&
|
||||||
|
isAligned(dst.data, sizeof(unsigned int));
|
||||||
|
|
||||||
if ((bcols & 3) == 0)
|
if (aligned && (bcols & 3) == 0)
|
||||||
{
|
{
|
||||||
const int vcols = bcols >> 2;
|
const int vcols = bcols >> 2;
|
||||||
|
|
||||||
bitMatNot<unsigned int>(
|
bitMatNot<unsigned int>(
|
||||||
PtrStepSzb(src.rows, vcols, src.data, src.step),
|
PtrStepSzb(src.rows, vcols, src.data, src.step),
|
||||||
PtrStepSzb(src.rows, vcols, dst.data, dst.step),
|
PtrStepSzb(src.rows, vcols, dst.data, dst.step),
|
||||||
mask, stream);
|
PtrStepb(), 1, stream);
|
||||||
}
|
}
|
||||||
else if ((bcols & 1) == 0)
|
else if (aligned && (bcols & 1) == 0)
|
||||||
{
|
{
|
||||||
const int vcols = bcols >> 1;
|
const int vcols = bcols >> 1;
|
||||||
|
|
||||||
bitMatNot<unsigned short>(
|
bitMatNot<unsigned short>(
|
||||||
PtrStepSzb(src.rows, vcols, src.data, src.step),
|
PtrStepSzb(src.rows, vcols, src.data, src.step),
|
||||||
PtrStepSzb(src.rows, vcols, dst.data, dst.step),
|
PtrStepSzb(src.rows, vcols, dst.data, dst.step),
|
||||||
mask, stream);
|
PtrStepb(), 1, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
bitMatNot<unsigned char>(
|
bitMatNot<unsigned char>(
|
||||||
PtrStepSzb(src.rows, bcols, src.data, src.step),
|
PtrStepSzb(src.rows, bcols, src.data, src.step),
|
||||||
PtrStepSzb(src.rows, bcols, dst.data, dst.step),
|
PtrStepSzb(src.rows, bcols, dst.data, dst.step),
|
||||||
mask, stream);
|
PtrStepb(), 1, stream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const int elem_size = src.elemSize1();
|
||||||
|
const int num_channels = src.channels();
|
||||||
|
const int bcols = src.cols * num_channels;
|
||||||
|
|
||||||
|
if (elem_size == 1)
|
||||||
|
{
|
||||||
|
bitMatNot<unsigned char>(
|
||||||
|
PtrStepSzb(src.rows, bcols, src.data, src.step),
|
||||||
|
PtrStepSzb(src.rows, bcols, dst.data, dst.step),
|
||||||
|
mask, num_channels, stream);
|
||||||
|
}
|
||||||
|
else if (elem_size == 2)
|
||||||
|
{
|
||||||
|
bitMatNot<unsigned short>(
|
||||||
|
PtrStepSzb(src.rows, bcols, src.data, src.step),
|
||||||
|
PtrStepSzb(src.rows, bcols, dst.data, dst.step),
|
||||||
|
mask, num_channels, stream);
|
||||||
|
}
|
||||||
|
else if (elem_size == 4)
|
||||||
|
{
|
||||||
|
bitMatNot<unsigned int>(
|
||||||
|
PtrStepSzb(src.rows, bcols, src.data, src.step),
|
||||||
|
PtrStepSzb(src.rows, bcols, dst.data, dst.step),
|
||||||
|
mask, num_channels, stream);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2005,9 +2039,9 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St
|
|||||||
|
|
||||||
namespace arithm
|
namespace arithm
|
||||||
{
|
{
|
||||||
template <typename T> void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
template <typename T> void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream);
|
||||||
template <typename T> void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
template <typename T> void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream);
|
||||||
template <typename T> void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
template <typename T> void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& s)
|
void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& s)
|
||||||
@ -2016,7 +2050,7 @@ void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c
|
|||||||
|
|
||||||
const int depth = src1.depth();
|
const int depth = src1.depth();
|
||||||
|
|
||||||
CV_Assert( depth <= CV_64F );
|
CV_Assert( depth < CV_32F );
|
||||||
CV_Assert( src2.size() == src1.size() && src2.type() == src1.type() );
|
CV_Assert( src2.size() == src1.size() && src2.type() == src1.type() );
|
||||||
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src1.size()) );
|
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src1.size()) );
|
||||||
|
|
||||||
@ -2024,9 +2058,15 @@ void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c
|
|||||||
|
|
||||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
if (mask.empty())
|
||||||
|
{
|
||||||
const int bcols = (int) (src1.cols * src1.elemSize());
|
const int bcols = (int) (src1.cols * src1.elemSize());
|
||||||
|
bool aligned =
|
||||||
|
isAligned(src1.data, sizeof(unsigned int)) &&
|
||||||
|
isAligned(src2.data, sizeof(unsigned int)) &&
|
||||||
|
isAligned(dst.data, sizeof(unsigned int));
|
||||||
|
|
||||||
if ((bcols & 3) == 0)
|
if (aligned && (bcols & 3) == 0)
|
||||||
{
|
{
|
||||||
const int vcols = bcols >> 2;
|
const int vcols = bcols >> 2;
|
||||||
|
|
||||||
@ -2034,9 +2074,9 @@ void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c
|
|||||||
PtrStepSzb(src1.rows, vcols, src1.data, src1.step),
|
PtrStepSzb(src1.rows, vcols, src1.data, src1.step),
|
||||||
PtrStepSzb(src1.rows, vcols, src2.data, src2.step),
|
PtrStepSzb(src1.rows, vcols, src2.data, src2.step),
|
||||||
PtrStepSzb(src1.rows, vcols, dst.data, dst.step),
|
PtrStepSzb(src1.rows, vcols, dst.data, dst.step),
|
||||||
mask, stream);
|
PtrStepb(), 1, stream);
|
||||||
}
|
}
|
||||||
else if ((bcols & 1) == 0)
|
else if (aligned && (bcols & 1) == 0)
|
||||||
{
|
{
|
||||||
const int vcols = bcols >> 1;
|
const int vcols = bcols >> 1;
|
||||||
|
|
||||||
@ -2044,16 +2084,47 @@ void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c
|
|||||||
PtrStepSzb(src1.rows, vcols, src1.data, src1.step),
|
PtrStepSzb(src1.rows, vcols, src1.data, src1.step),
|
||||||
PtrStepSzb(src1.rows, vcols, src2.data, src2.step),
|
PtrStepSzb(src1.rows, vcols, src2.data, src2.step),
|
||||||
PtrStepSzb(src1.rows, vcols, dst.data, dst.step),
|
PtrStepSzb(src1.rows, vcols, dst.data, dst.step),
|
||||||
mask, stream);
|
PtrStepb(), 1, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
||||||
bitMatAnd<unsigned char>(
|
bitMatAnd<unsigned char>(
|
||||||
PtrStepSzb(src1.rows, bcols, src1.data, src1.step),
|
PtrStepSzb(src1.rows, bcols, src1.data, src1.step),
|
||||||
PtrStepSzb(src1.rows, bcols, src2.data, src2.step),
|
PtrStepSzb(src1.rows, bcols, src2.data, src2.step),
|
||||||
PtrStepSzb(src1.rows, bcols, dst.data, dst.step),
|
PtrStepSzb(src1.rows, bcols, dst.data, dst.step),
|
||||||
mask, stream);
|
PtrStepb(), 1, stream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const int elem_size = src1.elemSize1();
|
||||||
|
const int num_channels = src1.channels();
|
||||||
|
const int bcols = src1.cols * num_channels;
|
||||||
|
|
||||||
|
if (elem_size == 1)
|
||||||
|
{
|
||||||
|
bitMatAnd<unsigned char>(
|
||||||
|
PtrStepSzb(src1.rows, bcols, src1.data, src1.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, src2.data, src2.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, dst.data, dst.step),
|
||||||
|
mask, num_channels, stream);
|
||||||
|
}
|
||||||
|
else if (elem_size == 2)
|
||||||
|
{
|
||||||
|
bitMatAnd<unsigned short>(
|
||||||
|
PtrStepSzb(src1.rows, bcols, src1.data, src1.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, src2.data, src2.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, dst.data, dst.step),
|
||||||
|
mask, num_channels, stream);
|
||||||
|
}
|
||||||
|
else if (elem_size == 4)
|
||||||
|
{
|
||||||
|
bitMatAnd<unsigned int>(
|
||||||
|
PtrStepSzb(src1.rows, bcols, src1.data, src1.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, src2.data, src2.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, dst.data, dst.step),
|
||||||
|
mask, num_channels, stream);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2063,7 +2134,7 @@ void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, co
|
|||||||
|
|
||||||
const int depth = src1.depth();
|
const int depth = src1.depth();
|
||||||
|
|
||||||
CV_Assert( depth <= CV_64F );
|
CV_Assert( depth < CV_32F );
|
||||||
CV_Assert( src2.size() == src1.size() && src2.type() == src1.type() );
|
CV_Assert( src2.size() == src1.size() && src2.type() == src1.type() );
|
||||||
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src1.size()) );
|
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src1.size()) );
|
||||||
|
|
||||||
@ -2071,9 +2142,15 @@ void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, co
|
|||||||
|
|
||||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
if (mask.empty())
|
||||||
|
{
|
||||||
const int bcols = (int) (src1.cols * src1.elemSize());
|
const int bcols = (int) (src1.cols * src1.elemSize());
|
||||||
|
bool aligned =
|
||||||
|
isAligned(src1.data, sizeof(unsigned int)) &&
|
||||||
|
isAligned(src2.data, sizeof(unsigned int)) &&
|
||||||
|
isAligned(dst.data, sizeof(unsigned int));
|
||||||
|
|
||||||
if ((bcols & 3) == 0)
|
if (aligned && (bcols & 3) == 0)
|
||||||
{
|
{
|
||||||
const int vcols = bcols >> 2;
|
const int vcols = bcols >> 2;
|
||||||
|
|
||||||
@ -2081,9 +2158,9 @@ void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, co
|
|||||||
PtrStepSzb(src1.rows, vcols, src1.data, src1.step),
|
PtrStepSzb(src1.rows, vcols, src1.data, src1.step),
|
||||||
PtrStepSzb(src1.rows, vcols, src2.data, src2.step),
|
PtrStepSzb(src1.rows, vcols, src2.data, src2.step),
|
||||||
PtrStepSzb(src1.rows, vcols, dst.data, dst.step),
|
PtrStepSzb(src1.rows, vcols, dst.data, dst.step),
|
||||||
mask, stream);
|
PtrStepb(), 1, stream);
|
||||||
}
|
}
|
||||||
else if ((bcols & 1) == 0)
|
else if (aligned && (bcols & 1) == 0)
|
||||||
{
|
{
|
||||||
const int vcols = bcols >> 1;
|
const int vcols = bcols >> 1;
|
||||||
|
|
||||||
@ -2091,16 +2168,47 @@ void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, co
|
|||||||
PtrStepSzb(src1.rows, vcols, src1.data, src1.step),
|
PtrStepSzb(src1.rows, vcols, src1.data, src1.step),
|
||||||
PtrStepSzb(src1.rows, vcols, src2.data, src2.step),
|
PtrStepSzb(src1.rows, vcols, src2.data, src2.step),
|
||||||
PtrStepSzb(src1.rows, vcols, dst.data, dst.step),
|
PtrStepSzb(src1.rows, vcols, dst.data, dst.step),
|
||||||
mask, stream);
|
PtrStepb(), 1, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
||||||
bitMatOr<unsigned char>(
|
bitMatOr<unsigned char>(
|
||||||
PtrStepSzb(src1.rows, bcols, src1.data, src1.step),
|
PtrStepSzb(src1.rows, bcols, src1.data, src1.step),
|
||||||
PtrStepSzb(src1.rows, bcols, src2.data, src2.step),
|
PtrStepSzb(src1.rows, bcols, src2.data, src2.step),
|
||||||
PtrStepSzb(src1.rows, bcols, dst.data, dst.step),
|
PtrStepSzb(src1.rows, bcols, dst.data, dst.step),
|
||||||
mask, stream);
|
PtrStepb(), 1, stream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const int elem_size = src1.elemSize1();
|
||||||
|
const int num_channels = src1.channels();
|
||||||
|
const int bcols = src1.cols * num_channels;
|
||||||
|
|
||||||
|
if (elem_size == 1)
|
||||||
|
{
|
||||||
|
bitMatOr<unsigned char>(
|
||||||
|
PtrStepSzb(src1.rows, bcols, src1.data, src1.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, src2.data, src2.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, dst.data, dst.step),
|
||||||
|
mask, num_channels, stream);
|
||||||
|
}
|
||||||
|
else if (elem_size == 2)
|
||||||
|
{
|
||||||
|
bitMatOr<unsigned short>(
|
||||||
|
PtrStepSzb(src1.rows, bcols, src1.data, src1.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, src2.data, src2.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, dst.data, dst.step),
|
||||||
|
mask, num_channels, stream);
|
||||||
|
}
|
||||||
|
else if (elem_size == 4)
|
||||||
|
{
|
||||||
|
bitMatOr<unsigned int>(
|
||||||
|
PtrStepSzb(src1.rows, bcols, src1.data, src1.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, src2.data, src2.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, dst.data, dst.step),
|
||||||
|
mask, num_channels, stream);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2110,7 +2218,7 @@ void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c
|
|||||||
|
|
||||||
const int depth = src1.depth();
|
const int depth = src1.depth();
|
||||||
|
|
||||||
CV_Assert( depth <= CV_64F );
|
CV_Assert( depth < CV_32F );
|
||||||
CV_Assert( src2.size() == src1.size() && src2.type() == src1.type() );
|
CV_Assert( src2.size() == src1.size() && src2.type() == src1.type() );
|
||||||
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src1.size()) );
|
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src1.size()) );
|
||||||
|
|
||||||
@ -2118,9 +2226,15 @@ void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c
|
|||||||
|
|
||||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
if (mask.empty())
|
||||||
|
{
|
||||||
const int bcols = (int) (src1.cols * src1.elemSize());
|
const int bcols = (int) (src1.cols * src1.elemSize());
|
||||||
|
bool aligned =
|
||||||
|
isAligned(src1.data, sizeof(unsigned int)) &&
|
||||||
|
isAligned(src2.data, sizeof(unsigned int)) &&
|
||||||
|
isAligned(dst.data, sizeof(unsigned int));
|
||||||
|
|
||||||
if ((bcols & 3) == 0)
|
if (aligned && (bcols & 3) == 0)
|
||||||
{
|
{
|
||||||
const int vcols = bcols >> 2;
|
const int vcols = bcols >> 2;
|
||||||
|
|
||||||
@ -2128,9 +2242,9 @@ void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c
|
|||||||
PtrStepSzb(src1.rows, vcols, src1.data, src1.step),
|
PtrStepSzb(src1.rows, vcols, src1.data, src1.step),
|
||||||
PtrStepSzb(src1.rows, vcols, src2.data, src2.step),
|
PtrStepSzb(src1.rows, vcols, src2.data, src2.step),
|
||||||
PtrStepSzb(src1.rows, vcols, dst.data, dst.step),
|
PtrStepSzb(src1.rows, vcols, dst.data, dst.step),
|
||||||
mask, stream);
|
PtrStepb(), 1, stream);
|
||||||
}
|
}
|
||||||
else if ((bcols & 1) == 0)
|
else if (aligned && (bcols & 1) == 0)
|
||||||
{
|
{
|
||||||
const int vcols = bcols >> 1;
|
const int vcols = bcols >> 1;
|
||||||
|
|
||||||
@ -2138,16 +2252,47 @@ void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c
|
|||||||
PtrStepSzb(src1.rows, vcols, src1.data, src1.step),
|
PtrStepSzb(src1.rows, vcols, src1.data, src1.step),
|
||||||
PtrStepSzb(src1.rows, vcols, src2.data, src2.step),
|
PtrStepSzb(src1.rows, vcols, src2.data, src2.step),
|
||||||
PtrStepSzb(src1.rows, vcols, dst.data, dst.step),
|
PtrStepSzb(src1.rows, vcols, dst.data, dst.step),
|
||||||
mask, stream);
|
PtrStepb(), 1, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
||||||
bitMatXor<unsigned char>(
|
bitMatXor<unsigned char>(
|
||||||
PtrStepSzb(src1.rows, bcols, src1.data, src1.step),
|
PtrStepSzb(src1.rows, bcols, src1.data, src1.step),
|
||||||
PtrStepSzb(src1.rows, bcols, src2.data, src2.step),
|
PtrStepSzb(src1.rows, bcols, src2.data, src2.step),
|
||||||
PtrStepSzb(src1.rows, bcols, dst.data, dst.step),
|
PtrStepSzb(src1.rows, bcols, dst.data, dst.step),
|
||||||
mask, stream);
|
PtrStepb(), 1, stream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const int elem_size = src1.elemSize1();
|
||||||
|
const int num_channels = src1.channels();
|
||||||
|
const int bcols = src1.cols * num_channels;
|
||||||
|
|
||||||
|
if (elem_size == 1)
|
||||||
|
{
|
||||||
|
bitMatXor<unsigned char>(
|
||||||
|
PtrStepSzb(src1.rows, bcols, src1.data, src1.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, src2.data, src2.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, dst.data, dst.step),
|
||||||
|
mask, num_channels, stream);
|
||||||
|
}
|
||||||
|
else if (elem_size == 2)
|
||||||
|
{
|
||||||
|
bitMatXor<unsigned short>(
|
||||||
|
PtrStepSzb(src1.rows, bcols, src1.data, src1.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, src2.data, src2.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, dst.data, dst.step),
|
||||||
|
mask, num_channels, stream);
|
||||||
|
}
|
||||||
|
else if (elem_size == 4)
|
||||||
|
{
|
||||||
|
bitMatXor<unsigned int>(
|
||||||
|
PtrStepSzb(src1.rows, bcols, src1.data, src1.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, src2.data, src2.step),
|
||||||
|
PtrStepSzb(src1.rows, bcols, dst.data, dst.step),
|
||||||
|
mask, num_channels, stream);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1785,72 +1785,95 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Compare_Scalar, testing::Combine(
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Bitwise_Array
|
// Bitwise_Array
|
||||||
|
|
||||||
PARAM_TEST_CASE(Bitwise_Array, cv::gpu::DeviceInfo, cv::Size, MatType)
|
PARAM_TEST_CASE(Bitwise_Array, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
|
||||||
{
|
{
|
||||||
cv::gpu::DeviceInfo devInfo;
|
cv::gpu::DeviceInfo devInfo;
|
||||||
cv::Size size;
|
cv::Size size;
|
||||||
int type;
|
int type;
|
||||||
|
bool useRoi;
|
||||||
|
|
||||||
cv::Mat src1;
|
cv::Mat src1;
|
||||||
cv::Mat src2;
|
cv::Mat src2;
|
||||||
|
|
||||||
|
cv::Mat mask;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
devInfo = GET_PARAM(0);
|
devInfo = GET_PARAM(0);
|
||||||
size = GET_PARAM(1);
|
size = GET_PARAM(1);
|
||||||
type = GET_PARAM(2);
|
type = GET_PARAM(2);
|
||||||
|
useRoi = GET_PARAM(3);
|
||||||
|
|
||||||
cv::gpu::setDevice(devInfo.deviceID());
|
cv::gpu::setDevice(devInfo.deviceID());
|
||||||
|
|
||||||
src1 = randomMat(size, type, 0.0, std::numeric_limits<int>::max());
|
src1 = randomMat(size, type, 0.0, std::numeric_limits<int>::max());
|
||||||
src2 = randomMat(size, type, 0.0, std::numeric_limits<int>::max());
|
src2 = randomMat(size, type, 0.0, std::numeric_limits<int>::max());
|
||||||
|
|
||||||
|
mask = randomMat(size, CV_8UC1, 0.0, 2.0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
GPU_TEST_P(Bitwise_Array, Not)
|
GPU_TEST_P(Bitwise_Array, Not)
|
||||||
{
|
{
|
||||||
cv::gpu::GpuMat dst;
|
cv::gpu::GpuMat dst_nomask, dst_mask(src1.size(), src1.type(), cv::Scalar::all(0));
|
||||||
cv::gpu::bitwise_not(loadMat(src1), dst);
|
cv::gpu::bitwise_not(loadMat(src1, useRoi), dst_nomask);
|
||||||
|
cv::gpu::bitwise_not(loadMat(src1, useRoi), dst_mask, loadMat(mask, useRoi));
|
||||||
|
|
||||||
cv::Mat dst_gold = ~src1;
|
cv::Mat dst_gold_nomask, dst_gold_mask(src1.size(), src1.type(), cv::Scalar::all(0));
|
||||||
|
cv::bitwise_not(src1, dst_gold_nomask);
|
||||||
|
cv::bitwise_not(src1, dst_gold_mask, mask);
|
||||||
|
|
||||||
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
|
EXPECT_MAT_NEAR(dst_gold_nomask, dst_nomask, 0.0);
|
||||||
|
EXPECT_MAT_NEAR(dst_gold_mask, dst_mask, 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
GPU_TEST_P(Bitwise_Array, Or)
|
GPU_TEST_P(Bitwise_Array, Or)
|
||||||
{
|
{
|
||||||
cv::gpu::GpuMat dst;
|
cv::gpu::GpuMat dst_nomask, dst_mask(src1.size(), src1.type(), cv::Scalar::all(0));
|
||||||
cv::gpu::bitwise_or(loadMat(src1), loadMat(src2), dst);
|
cv::gpu::bitwise_or(loadMat(src1, useRoi), loadMat(src2, useRoi), dst_nomask);
|
||||||
|
cv::gpu::bitwise_or(loadMat(src1, useRoi), loadMat(src2, useRoi), dst_mask, loadMat(mask, useRoi));
|
||||||
|
|
||||||
cv::Mat dst_gold = src1 | src2;
|
cv::Mat dst_gold_nomask, dst_gold_mask(src1.size(), src1.type(), cv::Scalar::all(0));
|
||||||
|
cv::bitwise_or(src1, src2, dst_gold_nomask);
|
||||||
|
cv::bitwise_or(src1, src2, dst_gold_mask, mask);
|
||||||
|
|
||||||
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
|
EXPECT_MAT_NEAR(dst_gold_nomask, dst_nomask, 0.0);
|
||||||
|
EXPECT_MAT_NEAR(dst_gold_mask, dst_mask, 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
GPU_TEST_P(Bitwise_Array, And)
|
GPU_TEST_P(Bitwise_Array, And)
|
||||||
{
|
{
|
||||||
cv::gpu::GpuMat dst;
|
cv::gpu::GpuMat dst_nomask, dst_mask(src1.size(), src1.type(), cv::Scalar::all(0));
|
||||||
cv::gpu::bitwise_and(loadMat(src1), loadMat(src2), dst);
|
cv::gpu::bitwise_and(loadMat(src1, useRoi), loadMat(src2, useRoi), dst_nomask);
|
||||||
|
cv::gpu::bitwise_and(loadMat(src1, useRoi), loadMat(src2, useRoi), dst_mask, loadMat(mask, useRoi));
|
||||||
|
|
||||||
cv::Mat dst_gold = src1 & src2;
|
cv::Mat dst_gold_nomask, dst_gold_mask(src1.size(), src1.type(), cv::Scalar::all(0));
|
||||||
|
cv::bitwise_and(src1, src2, dst_gold_nomask);
|
||||||
|
cv::bitwise_and(src1, src2, dst_gold_mask, mask);
|
||||||
|
|
||||||
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
|
EXPECT_MAT_NEAR(dst_gold_nomask, dst_nomask, 0.0);
|
||||||
|
EXPECT_MAT_NEAR(dst_gold_mask, dst_mask, 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
GPU_TEST_P(Bitwise_Array, Xor)
|
GPU_TEST_P(Bitwise_Array, Xor)
|
||||||
{
|
{
|
||||||
cv::gpu::GpuMat dst;
|
cv::gpu::GpuMat dst_nomask, dst_mask(src1.size(), src1.type(), cv::Scalar::all(0));
|
||||||
cv::gpu::bitwise_xor(loadMat(src1), loadMat(src2), dst);
|
cv::gpu::bitwise_xor(loadMat(src1, useRoi), loadMat(src2, useRoi), dst_nomask);
|
||||||
|
cv::gpu::bitwise_xor(loadMat(src1, useRoi), loadMat(src2, useRoi), dst_mask, loadMat(mask, useRoi));
|
||||||
|
|
||||||
cv::Mat dst_gold = src1 ^ src2;
|
cv::Mat dst_gold_nomask, dst_gold_mask(src1.size(), src1.type(), cv::Scalar::all(0));
|
||||||
|
cv::bitwise_xor(src1, src2, dst_gold_nomask);
|
||||||
|
cv::bitwise_xor(src1, src2, dst_gold_mask, mask);
|
||||||
|
|
||||||
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
|
EXPECT_MAT_NEAR(dst_gold_nomask, dst_nomask, 0.0);
|
||||||
|
EXPECT_MAT_NEAR(dst_gold_mask, dst_mask, 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_Core, Bitwise_Array, testing::Combine(
|
INSTANTIATE_TEST_CASE_P(GPU_Core, Bitwise_Array, testing::Combine(
|
||||||
ALL_DEVICES,
|
ALL_DEVICES,
|
||||||
DIFFERENT_SIZES,
|
DIFFERENT_SIZES,
|
||||||
TYPES(CV_8U, CV_32S, 1, 4)));
|
TYPES(CV_8U, CV_32S, 1, 4),
|
||||||
|
WHOLE_SUBMAT));
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Bitwise_Scalar
|
// Bitwise_Scalar
|
||||||
|
Loading…
x
Reference in New Issue
Block a user