optimized UMat::copyTo with mask

This commit is contained in:
Ilya Lavrenov 2014-05-30 18:01:49 +04:00
parent 6126788535
commit 7f818e9bc3
2 changed files with 31 additions and 13 deletions

View File

@ -44,14 +44,14 @@
#ifdef COPY_TO_MASK
#define DEFINE_DATA \
int src_index = mad24(y, src_step, mad24(x, (int)sizeof(T) * scn, src_offset)); \
int dst_index = mad24(y, dst_step, mad24(x, (int)sizeof(T) * scn, dst_offset)); \
int src_index = mad24(y, src_step, mad24(x, (int)sizeof(T1) * scn, src_offset)); \
int dst_index = mad24(y, dst_step, mad24(x, (int)sizeof(T1) * scn, dst_offset)); \
\
__global const T * src = (__global const T *)(srcptr + src_index); \
__global T * dst = (__global T *)(dstptr + dst_index)
__global const T1 * src = (__global const T1 *)(srcptr + src_index); \
__global T1 * dst = (__global T1 *)(dstptr + dst_index)
__kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_offset,
__global const uchar * maskptr, int mask_step, int mask_offset,
__global const uchar * mask, int mask_step, int mask_offset,
__global uchar * dstptr, int dst_step, int dst_offset,
int dst_rows, int dst_cols)
{
@ -60,8 +60,7 @@ __kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_of
if (x < dst_cols && y < dst_rows)
{
int mask_index = mad24(y, mask_step, mad24(x, mcn, mask_offset));
__global const uchar * mask = (__global const uchar *)(maskptr + mask_index);
mask += mad24(y, mask_step, mad24(x, mcn, mask_offset));
#if mcn == 1
if (mask[0])
@ -72,6 +71,16 @@ __kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_of
for (int c = 0; c < scn; ++c)
dst[c] = src[c];
}
#ifdef HAVE_DST_UNINIT
else
{
DEFINE_DATA;
#pragma unroll
for (int c = 0; c < scn; ++c)
dst[c] = (T1)(0);
}
#endif
#elif scn == mcn
DEFINE_DATA;
@ -79,6 +88,10 @@ __kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_of
for (int c = 0; c < scn; ++c)
if (mask[c])
dst[c] = src[c];
#ifdef HAVE_DST_UNINIT
else
dst[c] = (T1)(0);
#endif
#else
#error "(mcn == 1 || mcn == scn) should be true"
#endif

View File

@ -678,16 +678,21 @@ void UMat::copyTo(OutputArray _dst, InputArray _mask) const
UMat dst = _dst.getUMat();
bool haveDstUninit = false;
if( prevu != dst.u ) // do not leave dst uninitialized
dst = Scalar(0);
haveDstUninit = true;
ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc,
format("-D COPY_TO_MASK -D T=%s -D scn=%d -D mcn=%d",
ocl::memopTypeToStr(depth()), cn, mcn));
String opts = format("-D COPY_TO_MASK -D T1=%s -D scn=%d -D mcn=%d%s",
ocl::memopTypeToStr(depth()), cn, mcn,
haveDstUninit ? " -D HAVE_DST_UNINIT" : "");
ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc, opts);
if (!k.empty())
{
k.args(ocl::KernelArg::ReadOnlyNoSize(*this), ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()),
ocl::KernelArg::WriteOnly(dst));
k.args(ocl::KernelArg::ReadOnlyNoSize(*this),
ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()),
haveDstUninit ? ocl::KernelArg::WriteOnly(dst) :
ocl::KernelArg::ReadWrite(dst));
size_t globalsize[2] = { cols, rows };
if (k.run(2, globalsize, NULL, false))