Merge pull request #2494 from ilya-lavrenov:tapi_merge
This commit is contained in:
@@ -1299,7 +1299,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
|
||||
bool haveMask = !_mask.empty();
|
||||
|
||||
if( ((haveMask || haveScalar) && cn > 4) )
|
||||
if ( (haveMask || haveScalar) && cn > 4 )
|
||||
return false;
|
||||
|
||||
int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32S, CV_MAT_DEPTH(wtype));
|
||||
@@ -1320,14 +1320,11 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
"-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d",
|
||||
(haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"),
|
||||
oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)),
|
||||
ocl::typeToStr(CV_MAKETYPE(depth1, 1)),
|
||||
ocl::typeToStr(CV_MAKETYPE(depth2, kercn)),
|
||||
ocl::typeToStr(CV_MAKETYPE(depth2, 1)),
|
||||
ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)),
|
||||
ocl::typeToStr(CV_MAKETYPE(ddepth, 1)),
|
||||
ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)),
|
||||
ocl::typeToStr(depth1), ocl::typeToStr(CV_MAKETYPE(depth2, kercn)),
|
||||
ocl::typeToStr(depth2), ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)),
|
||||
ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)),
|
||||
ocl::typeToStr(CV_MAKETYPE(wdepth, scalarcn)),
|
||||
ocl::typeToStr(CV_MAKETYPE(wdepth, 1)), wdepth,
|
||||
ocl::typeToStr(wdepth), wdepth,
|
||||
ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]),
|
||||
ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]),
|
||||
ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]),
|
||||
@@ -1347,7 +1344,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
}
|
||||
|
||||
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
|
||||
if( k.empty() )
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
UMat src1 = _src1.getUMat(), src2;
|
||||
@@ -1388,12 +1385,12 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
|
||||
if( !haveMask )
|
||||
{
|
||||
if(n == 0)
|
||||
if (n == 0)
|
||||
k.args(src1arg, src2arg, dstarg);
|
||||
else if(n == 1)
|
||||
else if (n == 1)
|
||||
k.args(src1arg, src2arg, dstarg,
|
||||
ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz));
|
||||
else if(n == 3)
|
||||
else if (n == 3)
|
||||
k.args(src1arg, src2arg, dstarg,
|
||||
ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz),
|
||||
ocl::KernelArg(0, 0, 0, 0, usrdata_p + usrdata_esz, usrdata_esz),
|
||||
|
@@ -415,42 +415,54 @@ namespace cv {
|
||||
|
||||
static bool ocl_merge( InputArrayOfArrays _mv, OutputArray _dst )
|
||||
{
|
||||
std::vector<UMat> src;
|
||||
std::vector<UMat> src, ksrc;
|
||||
_mv.getUMatVector(src);
|
||||
CV_Assert(!src.empty());
|
||||
|
||||
int type = src[0].type(), depth = CV_MAT_DEPTH(type);
|
||||
Size size = src[0].size();
|
||||
|
||||
size_t srcsize = src.size();
|
||||
for (size_t i = 0; i < srcsize; ++i)
|
||||
for (size_t i = 0, srcsize = src.size(); i < srcsize; ++i)
|
||||
{
|
||||
int itype = src[i].type(), icn = CV_MAT_CN(itype), idepth = CV_MAT_DEPTH(itype);
|
||||
if (src[i].dims > 2 || icn != 1)
|
||||
int itype = src[i].type(), icn = CV_MAT_CN(itype), idepth = CV_MAT_DEPTH(itype),
|
||||
esz1 = CV_ELEM_SIZE1(idepth);
|
||||
if (src[i].dims > 2)
|
||||
return false;
|
||||
CV_Assert(size == src[i].size() && depth == idepth);
|
||||
}
|
||||
|
||||
String srcargs, srcdecl, processelem;
|
||||
for (size_t i = 0; i < srcsize; ++i)
|
||||
CV_Assert(size == src[i].size() && depth == idepth);
|
||||
|
||||
for (int cn = 0; cn < icn; ++cn)
|
||||
{
|
||||
UMat tsrc = src[i];
|
||||
tsrc.offset += cn * esz1;
|
||||
ksrc.push_back(tsrc);
|
||||
}
|
||||
}
|
||||
int dcn = (int)ksrc.size();
|
||||
|
||||
String srcargs, srcdecl, processelem, cndecl;
|
||||
for (int i = 0; i < dcn; ++i)
|
||||
{
|
||||
srcargs += format("DECLARE_SRC_PARAM(%d)", i);
|
||||
srcdecl += format("DECLARE_DATA(%d)", i);
|
||||
processelem += format("PROCESS_ELEM(%d)", i);
|
||||
cndecl += format(" -D scn%d=%d", i, ksrc[i].channels());
|
||||
}
|
||||
|
||||
ocl::Kernel k("merge", ocl::core::split_merge_oclsrc,
|
||||
format("-D OP_MERGE -D cn=%d -D T=%s -D DECLARE_SRC_PARAMS_N=%s -D DECLARE_DATA_N=%s -D PROCESS_ELEMS_N=%s",
|
||||
(int)srcsize, ocl::memopTypeToStr(depth), srcargs.c_str(), srcdecl.c_str(), processelem.c_str()));
|
||||
format("-D OP_MERGE -D cn=%d -D T=%s -D DECLARE_SRC_PARAMS_N=%s"
|
||||
" -D DECLARE_DATA_N=%s -D PROCESS_ELEMS_N=%s%s",
|
||||
dcn, ocl::memopTypeToStr(depth), srcargs.c_str(),
|
||||
srcdecl.c_str(), processelem.c_str(), cndecl.c_str()));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
_dst.create(size, CV_MAKE_TYPE(depth, (int)srcsize));
|
||||
_dst.create(size, CV_MAKE_TYPE(depth, dcn));
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
int argidx = 0;
|
||||
for (size_t i = 0; i < srcsize; ++i)
|
||||
argidx = k.set(argidx, ocl::KernelArg::ReadOnlyNoSize(src[i]));
|
||||
for (int i = 0; i < dcn; ++i)
|
||||
argidx = k.set(argidx, ocl::KernelArg::ReadOnlyNoSize(ksrc[i]));
|
||||
k.set(argidx, ocl::KernelArg::WriteOnly(dst));
|
||||
|
||||
size_t globalsize[2] = { dst.cols, dst.rows };
|
||||
|
@@ -2041,7 +2041,7 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
|
||||
const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW";
|
||||
|
||||
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
|
||||
format("-D dstT=%s -D %s -D UNARY_OP%s", ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
|
||||
format("-D dstT=%s -D %s -D UNARY_OP%s", ocl::typeToStr(depth),
|
||||
op, doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
||||
if (k.empty())
|
||||
return false;
|
||||
@@ -2081,7 +2081,7 @@ void pow( InputArray _src, double power, OutputArray _dst )
|
||||
{
|
||||
if( ipower < 0 )
|
||||
{
|
||||
divide( 1., _src, _dst );
|
||||
divide( Scalar::all(1), _src, _dst );
|
||||
if( ipower == -1 )
|
||||
return;
|
||||
ipower = -ipower;
|
||||
@@ -2115,10 +2115,7 @@ void pow( InputArray _src, double power, OutputArray _dst )
|
||||
|
||||
Mat src, dst;
|
||||
if (same)
|
||||
{
|
||||
dst = _dst.getMat();
|
||||
src = dst;
|
||||
}
|
||||
src = dst = _dst.getMat();
|
||||
else
|
||||
{
|
||||
src = _src.getMat();
|
||||
|
@@ -4348,7 +4348,7 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
|
||||
InputArray src4, InputArray src5, InputArray src6,
|
||||
InputArray src7, InputArray src8, InputArray src9)
|
||||
{
|
||||
int type = src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
int type = src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), esz = CV_ELEM_SIZE(depth);
|
||||
Size ssize = src1.size();
|
||||
const ocl::Device & d = ocl::Device::getDefault();
|
||||
|
||||
@@ -4372,7 +4372,8 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
|
||||
PROCESS_SRC(src9);
|
||||
|
||||
size_t size = offsets.size();
|
||||
std::vector<int> dividers(size, width);
|
||||
int wsz = width * esz;
|
||||
std::vector<int> dividers(size, wsz);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % dividers[i] != 0)
|
||||
@@ -4380,7 +4381,7 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
|
||||
|
||||
// default strategy
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
if (dividers[i] != width)
|
||||
if (dividers[i] != wsz)
|
||||
{
|
||||
width = 1;
|
||||
break;
|
||||
|
@@ -45,7 +45,7 @@
|
||||
|
||||
#define DECLARE_SRC_PARAM(index) __global const uchar * src##index##ptr, int src##index##_step, int src##index##_offset,
|
||||
#define DECLARE_DATA(index) __global const T * src##index = \
|
||||
(__global T *)(src##index##ptr + mad24(src##index##_step, y, mad24(x, (int)sizeof(T), src##index##_offset)));
|
||||
(__global T *)(src##index##ptr + mad24(src##index##_step, y, mad24(x, (int)sizeof(T) * scn##index, src##index##_offset)));
|
||||
#define PROCESS_ELEM(index) dst[index] = src##index[0];
|
||||
|
||||
__kernel void merge(DECLARE_SRC_PARAMS_N
|
||||
|
Reference in New Issue
Block a user