multiple rows in KF kernel

This commit is contained in:
Ilya Lavrenov
2014-05-14 15:42:30 +04:00
parent 5ee398bfd6
commit 15738bf7ef
6 changed files with 124 additions and 88 deletions

View File

@@ -1008,7 +1008,8 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
int srcdepth = CV_MAT_DEPTH(srctype);
int cn = CV_MAT_CN(srctype);
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
const ocl::Device d = ocl::Device::getDefault();
bool doubleSupport = d.doubleFPConfig() > 0;
if( oclop < 0 || ((haveMask || haveScalar) && cn > 4) ||
(!doubleSupport && srcdepth == CV_64F && !bitwise))
return false;
@@ -1016,8 +1017,9 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
char opts[1024];
int kercn = haveMask || haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
int scalarcn = kercn == 3 ? 4 : kercn;
int rowsPerWI = d.isIntel() ? 4 : 1;
sprintf(opts, "-D %s%s -D %s -D dstT=%s%s -D dstT_C1=%s -D workST=%s -D cn=%d",
sprintf(opts, "-D %s%s -D %s -D dstT=%s%s -D dstT_C1=%s -D workST=%s -D cn=%d -D rowsPerWI=%d",
haveMask ? "MASK_" : "", haveScalar ? "UNARY_OP" : "BINARY_OP", oclop2str[oclop],
bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, kercn)) :
ocl::typeToStr(CV_MAKETYPE(srcdepth, kercn)), doubleSupport ? " -D DOUBLE_SUPPORT" : "",
@@ -1025,7 +1027,7 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
ocl::typeToStr(CV_MAKETYPE(srcdepth, 1)),
bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, scalarcn)) :
ocl::typeToStr(CV_MAKETYPE(srcdepth, scalarcn)),
kercn);
kercn, rowsPerWI);
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
if (k.empty())
@@ -1068,7 +1070,7 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
k.args(src1arg, src2arg, maskarg, dstarg);
}
size_t globalsize[] = { src1.cols * cn / kercn, src1.rows };
size_t globalsize[] = { src1.cols * cn / kercn, (src1.rows + rowsPerWI - 1) / rowsPerWI };
return k.run(2, globalsize, 0, false);
}
@@ -1371,7 +1373,8 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
void* usrdata, int oclop,
bool haveScalar )
{
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
const ocl::Device d = ocl::Device::getDefault();
bool doubleSupport = d.doubleFPConfig() > 0;
int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
bool haveMask = !_mask.empty();
@@ -1388,12 +1391,12 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
return false;
int kercn = haveMask || haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
int scalarcn = kercn == 3 ? 4 : kercn;
int scalarcn = kercn == 3 ? 4 : kercn, rowsPerWI = d.isIntel() ? 4 : 1;
char cvtstr[4][32], opts[1024];
sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT1_C1=%s -D srcT2=%s -D srcT2_C1=%s "
"-D dstT=%s -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D wdepth=%d -D convertToWT1=%s "
"-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d",
"-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d -D rowsPerWI=%d",
(haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"),
oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)),
ocl::typeToStr(depth1), ocl::typeToStr(CV_MAKETYPE(depth2, kercn)),
@@ -1404,7 +1407,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]),
ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]),
ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]),
doubleSupport ? " -D DOUBLE_SUPPORT" : "", kercn);
doubleSupport ? " -D DOUBLE_SUPPORT" : "", kercn, rowsPerWI);
size_t usrdata_esz = CV_ELEM_SIZE(wdepth);
const uchar* usrdata_p = (const uchar*)usrdata;
@@ -1478,7 +1481,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
k.args(src1arg, src2arg, maskarg, dstarg);
}
size_t globalsize[] = { src1.cols * cn / kercn, src1.rows };
size_t globalsize[] = { src1.cols * cn / kercn, (src1.rows + rowsPerWI - 1) / rowsPerWI };
return k.run(2, globalsize, NULL, false);
}
@@ -2764,7 +2767,7 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in
if (!haveScalar && (!_src1.sameSize(_src2) || type1 != type2))
return false;
int kercn = haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
int kercn = haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst), rowsPerWI = dev.isIntel() ? 4 : 1;
// Workaround for bug with "?:" operator in AMD OpenCL compiler
if (depth1 >= CV_16U)
kercn = 1;
@@ -2775,14 +2778,14 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in
String opts = format("-D %s -D srcT1=%s -D dstT=%s -D workT=srcT1 -D cn=%d"
" -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s -D srcT1_C1=%s"
" -D srcT2_C1=%s -D dstT_C1=%s -D workST=%s%s",
" -D srcT2_C1=%s -D dstT_C1=%s -D workST=%s -D rowsPerWI=%d%s",
haveScalar ? "UNARY_OP" : "BINARY_OP",
ocl::typeToStr(CV_MAKE_TYPE(depth1, kercn)),
ocl::typeToStr(CV_8UC(kercn)), kercn,
ocl::convertTypeStr(depth1, CV_8U, kercn, cvt),
operationMap[op], ocl::typeToStr(depth1),
ocl::typeToStr(depth1), ocl::typeToStr(CV_8U),
ocl::typeToStr(CV_MAKE_TYPE(depth1, scalarcn)),
ocl::typeToStr(CV_MAKE_TYPE(depth1, scalarcn)), rowsPerWI,
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
@@ -2839,7 +2842,7 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in
ocl::KernelArg::WriteOnly(dst, cn, kercn));
}
size_t globalsize[2] = { dst.cols * cn / kercn, dst.rows };
size_t globalsize[2] = { dst.cols * cn / kercn, (dst.rows + rowsPerWI - 1) / rowsPerWI };
return k.run(2, globalsize, NULL, false);
}