From c0d61964d62e21cb5d527800a4c1e14044f926a7 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 3 Jul 2015 20:05:00 +0300 Subject: [PATCH] ocl: fix unaligned memory access http://code.opencv.org/issues/4462 --- modules/core/src/opencl/minmaxloc.cl | 9 +++++++++ modules/core/src/stat.cpp | 17 +++++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/modules/core/src/opencl/minmaxloc.cl b/modules/core/src/opencl/minmaxloc.cl index bd026c5c8..cabc72059 100644 --- a/modules/core/src/opencl/minmaxloc.cl +++ b/modules/core/src/opencl/minmaxloc.cl @@ -13,6 +13,11 @@ #endif #endif +static inline int align(int pos) +{ + return (pos + (MINMAX_STRUCT_ALIGNMENT - 1)) & (~(MINMAX_STRUCT_ALIGNMENT - 1)); +} + #ifdef DEPTH_0 #define MIN_VAL 0 #define MAX_VAL UCHAR_MAX @@ -366,19 +371,23 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off #ifdef NEED_MINVAL *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_min[0]; pos = mad24(groupnum, (int)sizeof(dstT1), pos); + pos = align(pos); #endif #ifdef NEED_MAXVAL *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max[0]; pos = mad24(groupnum, (int)sizeof(dstT1), pos); + pos = align(pos); #endif #ifdef NEED_MINLOC *(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_minloc[0]; pos = mad24(groupnum, (int)sizeof(uint), pos); + pos = align(pos); #endif #ifdef NEED_MAXLOC *(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_maxloc[0]; #ifdef OP_CALC2 pos = mad24(groupnum, (int)sizeof(uint), pos); + pos = align(pos); #endif #endif #ifdef OP_CALC2 diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index d47688ba1..0a4c0415c 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -1970,6 +1970,8 @@ static void ofs2idx(const Mat& a, size_t ofs, int* idx) #ifdef HAVE_OPENCL +#define MINMAX_STRUCT_ALIGNMENT 8 // sizeof double + template void getMinMaxRes(const Mat & db, double * minVal, double * maxVal, int* minLoc, int* maxLoc, @@ -1980,28 +1982,32 @@ void getMinMaxRes(const Mat & db, double * minVal, double * maxVal, T maxval = std::numeric_limits::min() > 0 ? -std::numeric_limits::max() : std::numeric_limits::min(), maxval2 = maxval; uint minloc = index_max, maxloc = index_max; - int index = 0; + size_t index = 0; const T * minptr = NULL, * maxptr = NULL, * maxptr2 = NULL; const uint * minlocptr = NULL, * maxlocptr = NULL; if (minVal || minLoc) { minptr = db.ptr(); index += sizeof(T) * groupnum; + index = alignSize(index, MINMAX_STRUCT_ALIGNMENT); } if (maxVal || maxLoc) { maxptr = (const T *)(db.ptr() + index); index += sizeof(T) * groupnum; + index = alignSize(index, MINMAX_STRUCT_ALIGNMENT); } if (minLoc) { minlocptr = (const uint *)(db.ptr() + index); index += sizeof(uint) * groupnum; + index = alignSize(index, MINMAX_STRUCT_ALIGNMENT); } if (maxLoc) { maxlocptr = (const uint *)(db.ptr() + index); index += sizeof(uint) * groupnum; + index = alignSize(index, MINMAX_STRUCT_ALIGNMENT); } if (maxVal2) maxptr2 = (const T *)(db.ptr() + index); @@ -2121,7 +2127,8 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* char cvt[2][40]; String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s" " -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s" - " -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s -D wdepth=%d -D convertFromU=%s", + " -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s -D wdepth=%d -D convertFromU=%s" + " -D MINMAX_STRUCT_ALIGNMENT=%d", depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs, ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", @@ -2134,7 +2141,8 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* absValues ? " -D OP_ABS" : "", haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "", haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", ddepth, - depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, kercn, cvt[1]) : "noconvert"); + depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, kercn, cvt[1]) : "noconvert", + MINMAX_STRUCT_ALIGNMENT); ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts); if (k.empty()) @@ -2143,7 +2151,8 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* int esz = CV_ELEM_SIZE(ddepth), esz32s = CV_ELEM_SIZE1(CV_32S), dbsize = groupnum * ((needMinVal ? esz : 0) + (needMaxVal ? esz : 0) + (needMinLoc ? esz32s : 0) + (needMaxLoc ? esz32s : 0) + - (maxVal2 ? esz : 0)); + (maxVal2 ? esz : 0)) + + 5 * MINMAX_STRUCT_ALIGNMENT; UMat src = _src.getUMat(), src2 = _src2.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat(); if (cn > 1 && !haveMask)