Merge pull request #2918 from vbystricky:oclopt_reduce2
This commit is contained in:
@@ -3450,8 +3450,11 @@ static bool ocl_reduce(InputArray _src, OutputArray _dst,
|
||||
const int min_opt_cols = 128, buf_cols = 32;
|
||||
int sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype),
|
||||
ddepth = CV_MAT_DEPTH(dtype), ddepth0 = ddepth;
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0,
|
||||
useOptimized = 1 == dim && _src.cols() > min_opt_cols;
|
||||
const ocl::Device &defDev = ocl::Device::getDefault();
|
||||
bool doubleSupport = defDev.doubleFPConfig() > 0;
|
||||
|
||||
size_t wgs = defDev.maxWorkGroupSize();
|
||||
bool useOptimized = 1 == dim && _src.cols() > min_opt_cols && (wgs >= buf_cols);
|
||||
|
||||
if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
|
||||
return false;
|
||||
@@ -3464,78 +3467,80 @@ static bool ocl_reduce(InputArray _src, OutputArray _dst,
|
||||
|
||||
const char * const ops[4] = { "OCL_CV_REDUCE_SUM", "OCL_CV_REDUCE_AVG",
|
||||
"OCL_CV_REDUCE_MAX", "OCL_CV_REDUCE_MIN" };
|
||||
char cvt[2][40];
|
||||
|
||||
int wdepth = std::max(ddepth, CV_32F);
|
||||
cv::String build_opt = format("-D %s -D dim=%d -D cn=%d -D ddepth=%d"
|
||||
" -D srcT=%s -D dstT=%s -D dstT0=%s -D convertToWT=%s"
|
||||
" -D convertToDT=%s -D convertToDT0=%s%s",
|
||||
ops[op], dim, cn, ddepth, ocl::typeToStr(useOptimized ? ddepth : sdepth),
|
||||
ocl::typeToStr(ddepth), ocl::typeToStr(ddepth0),
|
||||
ocl::convertTypeStr(ddepth, wdepth, 1, cvt[0]),
|
||||
ocl::convertTypeStr(sdepth, ddepth, 1, cvt[0]),
|
||||
ocl::convertTypeStr(wdepth, ddepth0, 1, cvt[1]),
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
|
||||
|
||||
if (useOptimized)
|
||||
{
|
||||
cv::String build_opt_pre = format("-D OP_REDUCE_PRE -D BUF_COLS=%d -D %s -D dim=1"
|
||||
" -D cn=%d -D ddepth=%d -D srcT=%s -D dstT=%s -D convertToDT=%s%s",
|
||||
buf_cols, ops[op], cn, ddepth, ocl::typeToStr(sdepth), ocl::typeToStr(ddepth),
|
||||
ocl::convertTypeStr(sdepth, ddepth, 1, cvt[0]),
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
|
||||
ocl::Kernel kpre("reduce_horz_pre", ocl::core::reduce2_oclsrc, build_opt_pre);
|
||||
if (kpre.empty())
|
||||
size_t tileHeight = (size_t)(wgs / buf_cols);
|
||||
if (defDev.isIntel())
|
||||
{
|
||||
static const size_t maxItemInGroupCount = 16;
|
||||
tileHeight = min(tileHeight, defDev.localMemSize() / buf_cols / CV_ELEM_SIZE(CV_MAKETYPE(wdepth, cn)) / maxItemInGroupCount);
|
||||
}
|
||||
char cvt[3][40];
|
||||
cv::String build_opt = format("-D OP_REDUCE_PRE -D BUF_COLS=%d -D TILE_HEIGHT=%d -D %s -D dim=1"
|
||||
" -D cn=%d -D ddepth=%d"
|
||||
" -D srcT=%s -D bufT=%s -D dstT=%s"
|
||||
" -D convertToWT=%s -D convertToBufT=%s -D convertToDT=%s%s",
|
||||
buf_cols, tileHeight, ops[op], cn, ddepth,
|
||||
ocl::typeToStr(sdepth),
|
||||
ocl::typeToStr(ddepth),
|
||||
ocl::typeToStr(ddepth0),
|
||||
ocl::convertTypeStr(ddepth, wdepth, 1, cvt[0]),
|
||||
ocl::convertTypeStr(sdepth, ddepth, 1, cvt[1]),
|
||||
ocl::convertTypeStr(wdepth, ddepth0, 1, cvt[2]),
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
|
||||
ocl::Kernel k("reduce_horz_opt", ocl::core::reduce2_oclsrc, build_opt);
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
ocl::Kernel kmain("reduce", ocl::core::reduce2_oclsrc, build_opt);
|
||||
if (kmain.empty())
|
||||
return false;
|
||||
|
||||
UMat src = _src.getUMat();
|
||||
Size dsize(1, src.rows);
|
||||
_dst.create(dsize, dtype);
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
UMat buf(src.rows, buf_cols, dst.type());
|
||||
|
||||
kpre.args(ocl::KernelArg::ReadOnly(src),
|
||||
ocl::KernelArg::WriteOnlyNoSize(buf));
|
||||
if (op0 == CV_REDUCE_AVG)
|
||||
k.args(ocl::KernelArg::ReadOnly(src),
|
||||
ocl::KernelArg::WriteOnlyNoSize(dst), 1.0f / src.cols);
|
||||
else
|
||||
k.args(ocl::KernelArg::ReadOnly(src),
|
||||
ocl::KernelArg::WriteOnlyNoSize(dst));
|
||||
|
||||
size_t localSize[2] = { buf_cols, tileHeight};
|
||||
size_t globalSize[2] = { buf_cols, src.rows };
|
||||
if (!kpre.run(2, globalSize, NULL, false))
|
||||
return k.run(2, globalSize, localSize, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
char cvt[2][40];
|
||||
cv::String build_opt = format("-D %s -D dim=%d -D cn=%d -D ddepth=%d"
|
||||
" -D srcT=%s -D dstT=%s -D dstT0=%s -D convertToWT=%s"
|
||||
" -D convertToDT=%s -D convertToDT0=%s%s",
|
||||
ops[op], dim, cn, ddepth, ocl::typeToStr(useOptimized ? ddepth : sdepth),
|
||||
ocl::typeToStr(ddepth), ocl::typeToStr(ddepth0),
|
||||
ocl::convertTypeStr(ddepth, wdepth, 1, cvt[0]),
|
||||
ocl::convertTypeStr(sdepth, ddepth, 1, cvt[0]),
|
||||
ocl::convertTypeStr(wdepth, ddepth0, 1, cvt[1]),
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
|
||||
|
||||
ocl::Kernel k("reduce", ocl::core::reduce2_oclsrc, build_opt);
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
UMat src = _src.getUMat();
|
||||
Size dsize(dim == 0 ? src.cols : 1, dim == 0 ? 1 : src.rows);
|
||||
_dst.create(dsize, dtype);
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnly(src),
|
||||
temparg = ocl::KernelArg::WriteOnlyNoSize(dst);
|
||||
|
||||
if (op0 == CV_REDUCE_AVG)
|
||||
kmain.args(ocl::KernelArg::ReadOnly(buf),
|
||||
ocl::KernelArg::WriteOnlyNoSize(dst), 1.0f / src.cols);
|
||||
k.args(srcarg, temparg, 1.0f / (dim == 0 ? src.rows : src.cols));
|
||||
else
|
||||
kmain.args(ocl::KernelArg::ReadOnly(buf),
|
||||
ocl::KernelArg::WriteOnlyNoSize(dst));
|
||||
k.args(srcarg, temparg);
|
||||
|
||||
globalSize[0] = src.rows;
|
||||
return kmain.run(1, globalSize, NULL, false);
|
||||
size_t globalsize = std::max(dsize.width, dsize.height);
|
||||
return k.run(1, &globalsize, NULL, false);
|
||||
}
|
||||
|
||||
ocl::Kernel k("reduce", ocl::core::reduce2_oclsrc, build_opt);
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
UMat src = _src.getUMat();
|
||||
Size dsize(dim == 0 ? src.cols : 1, dim == 0 ? 1 : src.rows);
|
||||
_dst.create(dsize, dtype);
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnly(src),
|
||||
temparg = ocl::KernelArg::WriteOnlyNoSize(dst);
|
||||
|
||||
if (op0 == CV_REDUCE_AVG)
|
||||
k.args(srcarg, temparg, 1.0f / (dim == 0 ? src.rows : src.cols));
|
||||
else
|
||||
k.args(srcarg, temparg);
|
||||
|
||||
size_t globalsize = std::max(dsize.width, dsize.height);
|
||||
return k.run(1, &globalsize, NULL, false);
|
||||
}
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user