multiple rows in KF kernel
This commit is contained in:
@@ -65,13 +65,15 @@ static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, in
|
||||
int kercn = oclop == OCL_OP_PHASE_DEGREES ||
|
||||
oclop == OCL_OP_PHASE_RADIANS ? 1 : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
|
||||
|
||||
bool double_support = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
const ocl::Device d = ocl::Device::getDefault();
|
||||
bool double_support = d.doubleFPConfig() > 0;
|
||||
if (!double_support && depth == CV_64F)
|
||||
return false;
|
||||
int rowsPerWI = d.isIntel() ? 4 : 1;
|
||||
|
||||
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
|
||||
format("-D %s -D %s -D dstT=%s%s", _src2.empty() ? "UNARY_OP" : "BINARY_OP",
|
||||
oclop2str[oclop], ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
|
||||
format("-D %s -D %s -D dstT=%s -D rowsPerWI=%d%s", _src2.empty() ? "UNARY_OP" : "BINARY_OP",
|
||||
oclop2str[oclop], ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), rowsPerWI,
|
||||
double_support ? " -D DOUBLE_SUPPORT" : ""));
|
||||
if (k.empty())
|
||||
return false;
|
||||
@@ -89,7 +91,7 @@ static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, in
|
||||
else
|
||||
k.args(src1arg, src2arg, dstarg);
|
||||
|
||||
size_t globalsize[] = { src1.cols * cn / kercn, src1.rows };
|
||||
size_t globalsize[] = { src1.cols * cn / kercn, (src1.rows + rowsPerWI - 1) / rowsPerWI };
|
||||
return k.run(2, globalsize, 0, false);
|
||||
}
|
||||
|
||||
@@ -524,8 +526,10 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre
|
||||
static bool ocl_cartToPolar( InputArray _src1, InputArray _src2,
|
||||
OutputArray _dst1, OutputArray _dst2, bool angleInDegrees )
|
||||
{
|
||||
int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
const ocl::Device & d = ocl::Device::getDefault();
|
||||
int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
|
||||
rowsPerWI = d.isIntel() ? 4 : 1;
|
||||
bool doubleSupport = d.doubleFPConfig() > 0;
|
||||
|
||||
if ( !(_src1.dims() <= 2 && _src2.dims() <= 2 &&
|
||||
(depth == CV_32F || depth == CV_64F) && type == _src2.type()) ||
|
||||
@@ -533,9 +537,9 @@ static bool ocl_cartToPolar( InputArray _src1, InputArray _src2,
|
||||
return false;
|
||||
|
||||
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
|
||||
format("-D BINARY_OP -D dstT=%s -D depth=%d -D OP_CTP_%s%s",
|
||||
format("-D BINARY_OP -D dstT=%s -D depth=%d -D rowsPerWI=%d -D OP_CTP_%s%s",
|
||||
ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
|
||||
depth, angleInDegrees ? "AD" : "AR",
|
||||
depth, rowsPerWI, angleInDegrees ? "AD" : "AR",
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
||||
if (k.empty())
|
||||
return false;
|
||||
@@ -553,7 +557,7 @@ static bool ocl_cartToPolar( InputArray _src1, InputArray _src2,
|
||||
ocl::KernelArg::WriteOnly(dst1, cn),
|
||||
ocl::KernelArg::WriteOnlyNoSize(dst2));
|
||||
|
||||
size_t globalsize[2] = { dst1.cols * cn, dst1.rows };
|
||||
size_t globalsize[2] = { dst1.cols * cn, (dst1.rows + rowsPerWI - 1) / rowsPerWI };
|
||||
return k.run(2, globalsize, NULL, false);
|
||||
}
|
||||
|
||||
@@ -713,16 +717,18 @@ static void SinCos_32f( const float *angle, float *sinval, float* cosval,
|
||||
static bool ocl_polarToCart( InputArray _mag, InputArray _angle,
|
||||
OutputArray _dst1, OutputArray _dst2, bool angleInDegrees )
|
||||
{
|
||||
int type = _angle.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
const ocl::Device & d = ocl::Device::getDefault();
|
||||
int type = _angle.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
|
||||
rowsPerWI = d.isIntel() ? 4 : 1;
|
||||
bool doubleSupport = d.doubleFPConfig() > 0;
|
||||
|
||||
if ( !doubleSupport && depth == CV_64F )
|
||||
return false;
|
||||
|
||||
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
|
||||
format("-D dstT=%s -D depth=%d -D BINARY_OP -D OP_PTC_%s%s",
|
||||
ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), depth,
|
||||
angleInDegrees ? "AD" : "AR",
|
||||
format("-D dstT=%s -D rowsPerWI=%d -D depth=%d -D BINARY_OP -D OP_PTC_%s%s",
|
||||
ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), rowsPerWI,
|
||||
depth, angleInDegrees ? "AD" : "AR",
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
||||
if (k.empty())
|
||||
return false;
|
||||
@@ -738,7 +744,7 @@ static bool ocl_polarToCart( InputArray _mag, InputArray _angle,
|
||||
k.args(ocl::KernelArg::ReadOnlyNoSize(mag), ocl::KernelArg::ReadOnlyNoSize(angle),
|
||||
ocl::KernelArg::WriteOnly(dst1, cn), ocl::KernelArg::WriteOnlyNoSize(dst2));
|
||||
|
||||
size_t globalsize[2] = { dst1.cols * cn, dst1.rows };
|
||||
size_t globalsize[2] = { dst1.cols * cn, (dst1.rows + rowsPerWI - 1) / rowsPerWI };
|
||||
return k.run(2, globalsize, NULL, false);
|
||||
}
|
||||
|
||||
@@ -2103,8 +2109,10 @@ static IPowFunc ipowTab[] =
|
||||
static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
|
||||
bool is_ipower, int ipower)
|
||||
{
|
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
const ocl::Device & d = ocl::Device::getDefault();
|
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
|
||||
rowsPerWI = d.isIntel() ? 4 : 1;
|
||||
bool doubleSupport = d.doubleFPConfig() > 0;
|
||||
|
||||
if (depth == CV_64F && !doubleSupport)
|
||||
return false;
|
||||
@@ -2113,8 +2121,8 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
|
||||
const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW";
|
||||
|
||||
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
|
||||
format("-D dstT=%s -D %s -D UNARY_OP%s", ocl::typeToStr(depth),
|
||||
op, doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
||||
format("-D dstT=%s -D rowsPerWI=%d -D %s -D UNARY_OP%s", ocl::typeToStr(depth),
|
||||
rowsPerWI, op, doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
@@ -2137,7 +2145,7 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
|
||||
k.args(srcarg, dstarg, power);
|
||||
}
|
||||
|
||||
size_t globalsize[2] = { dst.cols * cn, dst.rows };
|
||||
size_t globalsize[2] = { dst.cols * cn, (dst.rows + rowsPerWI - 1) / rowsPerWI };
|
||||
return k.run(2, globalsize, NULL, false);
|
||||
}
|
||||
|
||||
@@ -2491,8 +2499,10 @@ bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double ma
|
||||
|
||||
static bool ocl_patchNaNs( InputOutputArray _a, float value )
|
||||
{
|
||||
int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
|
||||
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
|
||||
format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=int"));
|
||||
format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=int -D rowsPerWI=%d",
|
||||
rowsPerWI));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
@@ -2502,7 +2512,7 @@ static bool ocl_patchNaNs( InputOutputArray _a, float value )
|
||||
k.args(ocl::KernelArg::ReadOnlyNoSize(a),
|
||||
ocl::KernelArg::WriteOnly(a, cn), (float)value);
|
||||
|
||||
size_t globalsize[2] = { a.cols * cn, a.rows };
|
||||
size_t globalsize[2] = { a.cols * cn, (a.rows + rowsPerWI - 1) / rowsPerWI };
|
||||
return k.run(2, globalsize, NULL, false);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user