Merge pull request #3369 from vbystricky:fix_scaleAdd

This commit is contained in:
Alexander Alekhin 2014-10-27 10:03:29 +00:00
commit dee56598e9

View File

@ -2173,14 +2173,18 @@ typedef void (*ScaleAddFunc)(const uchar* src1, const uchar* src2, uchar* dst, i
static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray _dst, int type ) static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray _dst, int type )
{ {
const ocl::Device & d = ocl::Device::getDefault(); const ocl::Device & d = ocl::Device::getDefault();
int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), wdepth = std::max(depth, CV_32F),
kercn = ocl::predictOptimalVectorWidth(_src1, _src2, _dst), rowsPerWI = d.isIntel() ? 4 : 1;
bool doubleSupport = d.doubleFPConfig() > 0; bool doubleSupport = d.doubleFPConfig() > 0;
Size size = _src1.size(); Size size = _src1.size();
int depth = CV_MAT_DEPTH(type);
if ( (!doubleSupport && depth == CV_64F) || size != _src2.size() ) if ( (!doubleSupport && depth == CV_64F) || size != _src2.size() )
return false; return false;
_dst.create(size, type);
int cn = CV_MAT_CN(type), wdepth = std::max(depth, CV_32F);
int kercn = ocl::predictOptimalVectorWidthMax(_src1, _src2, _dst),
rowsPerWI = d.isIntel() ? 4 : 1;
char cvt[2][50]; char cvt[2][50];
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D workT=%s -D convertToWT1=%s" format("-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D workT=%s -D convertToWT1=%s"
@ -2195,9 +2199,7 @@ static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, Outp
if (k.empty()) if (k.empty())
return false; return false;
UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(); UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(), dst = _dst.getUMat();
_dst.create(size, type);
UMat dst = _dst.getUMat();
ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1), ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
src2arg = ocl::KernelArg::ReadOnlyNoSize(src2), src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),