3-channel support in OpenCL kernels for setTo, resize, warpAffine and warpPerspective

This commit is contained in:
Vadim Pisarevsky
2014-02-12 19:29:18 +04:00
parent f7620dc7d1
commit 290fbc0121
6 changed files with 166 additions and 99 deletions

View File

@@ -87,9 +87,21 @@ __kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_of
#else
#ifndef dstST
#define dstST dstT
#endif
#if cn != 3
#define value value_
#define storedst(val) *(__global dstT*)(dstptr + dst_index) = val
#else
#define value (dstT)(value_.x, value_.y, value_.z)
#define storedst(val) vstore3(val, 0, (__global dstT1*)(dstptr + dst_index))
#endif
__kernel void setMask(__global const uchar* mask, int maskstep, int maskoffset,
__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols, dstT value )
int rows, int cols, dstST value_ )
{
int x = get_global_id(0);
int y = get_global_id(1);
@@ -99,22 +111,22 @@ __kernel void setMask(__global const uchar* mask, int maskstep, int maskoffset,
int mask_index = mad24(y, maskstep, x + maskoffset);
if( mask[mask_index] )
{
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT) + dstoffset);
*(__global dstT*)(dstptr + dst_index) = value;
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT1)*cn + dstoffset);
storedst(value);
}
}
}
__kernel void set(__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols, dstT value )
int rows, int cols, dstST value_ )
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT) + dstoffset);
*(__global dstT*)(dstptr + dst_index) = value;
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT1)*cn + dstoffset);
storedst(value);
}
}

View File

@@ -744,20 +744,23 @@ UMat& UMat::setTo(InputArray _value, InputArray _mask)
{
bool haveMask = !_mask.empty();
int tp = type(), cn = CV_MAT_CN(tp);
if( dims <= 2 && cn <= 4 && cn != 3 && ocl::useOpenCL() )
if( dims <= 2 && cn <= 4 && CV_MAT_DEPTH(tp) < CV_64F && ocl::useOpenCL() )
{
Mat value = _value.getMat();
CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::UMAT) );
double buf[4];
double buf[4]={0,0,0,0};
convertAndUnrollScalar(value, tp, (uchar*)buf, 1);
int scalarcn = cn == 3 ? 4 : cn;
char opts[1024];
sprintf(opts, "-D dstT=%s", ocl::memopTypeToStr(tp));
sprintf(opts, "-D dstT=%s -D dstST=%s -D dstT1=%s -D cn=%d", ocl::memopTypeToStr(tp),
ocl::memopTypeToStr(CV_MAKETYPE(tp,scalarcn)),
ocl::memopTypeToStr(CV_MAT_DEPTH(tp)), cn);
ocl::Kernel setK(haveMask ? "setMask" : "set", ocl::core::copyset_oclsrc, opts);
if( !setK.empty() )
{
ocl::KernelArg scalararg(0, 0, 0, buf, CV_ELEM_SIZE(tp));
ocl::KernelArg scalararg(0, 0, 0, buf, CV_ELEM_SIZE1(tp)*scalarcn);
UMat mask;
if( haveMask )