Merge pull request #4172 from alalek:ocl_minmaxloc_unaligned
This commit is contained in:
commit
8bede85f60
@ -13,6 +13,11 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static inline int align(int pos)
|
||||||
|
{
|
||||||
|
return (pos + (MINMAX_STRUCT_ALIGNMENT - 1)) & (~(MINMAX_STRUCT_ALIGNMENT - 1));
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef DEPTH_0
|
#ifdef DEPTH_0
|
||||||
#define MIN_VAL 0
|
#define MIN_VAL 0
|
||||||
#define MAX_VAL UCHAR_MAX
|
#define MAX_VAL UCHAR_MAX
|
||||||
@ -366,19 +371,23 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
|
|||||||
#ifdef NEED_MINVAL
|
#ifdef NEED_MINVAL
|
||||||
*(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_min[0];
|
*(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_min[0];
|
||||||
pos = mad24(groupnum, (int)sizeof(dstT1), pos);
|
pos = mad24(groupnum, (int)sizeof(dstT1), pos);
|
||||||
|
pos = align(pos);
|
||||||
#endif
|
#endif
|
||||||
#ifdef NEED_MAXVAL
|
#ifdef NEED_MAXVAL
|
||||||
*(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max[0];
|
*(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max[0];
|
||||||
pos = mad24(groupnum, (int)sizeof(dstT1), pos);
|
pos = mad24(groupnum, (int)sizeof(dstT1), pos);
|
||||||
|
pos = align(pos);
|
||||||
#endif
|
#endif
|
||||||
#ifdef NEED_MINLOC
|
#ifdef NEED_MINLOC
|
||||||
*(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_minloc[0];
|
*(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_minloc[0];
|
||||||
pos = mad24(groupnum, (int)sizeof(uint), pos);
|
pos = mad24(groupnum, (int)sizeof(uint), pos);
|
||||||
|
pos = align(pos);
|
||||||
#endif
|
#endif
|
||||||
#ifdef NEED_MAXLOC
|
#ifdef NEED_MAXLOC
|
||||||
*(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_maxloc[0];
|
*(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_maxloc[0];
|
||||||
#ifdef OP_CALC2
|
#ifdef OP_CALC2
|
||||||
pos = mad24(groupnum, (int)sizeof(uint), pos);
|
pos = mad24(groupnum, (int)sizeof(uint), pos);
|
||||||
|
pos = align(pos);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#ifdef OP_CALC2
|
#ifdef OP_CALC2
|
||||||
|
@ -1970,6 +1970,8 @@ static void ofs2idx(const Mat& a, size_t ofs, int* idx)
|
|||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
|
|
||||||
|
#define MINMAX_STRUCT_ALIGNMENT 8 // sizeof double
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void getMinMaxRes(const Mat & db, double * minVal, double * maxVal,
|
void getMinMaxRes(const Mat & db, double * minVal, double * maxVal,
|
||||||
int* minLoc, int* maxLoc,
|
int* minLoc, int* maxLoc,
|
||||||
@ -1980,28 +1982,32 @@ void getMinMaxRes(const Mat & db, double * minVal, double * maxVal,
|
|||||||
T maxval = std::numeric_limits<T>::min() > 0 ? -std::numeric_limits<T>::max() : std::numeric_limits<T>::min(), maxval2 = maxval;
|
T maxval = std::numeric_limits<T>::min() > 0 ? -std::numeric_limits<T>::max() : std::numeric_limits<T>::min(), maxval2 = maxval;
|
||||||
uint minloc = index_max, maxloc = index_max;
|
uint minloc = index_max, maxloc = index_max;
|
||||||
|
|
||||||
int index = 0;
|
size_t index = 0;
|
||||||
const T * minptr = NULL, * maxptr = NULL, * maxptr2 = NULL;
|
const T * minptr = NULL, * maxptr = NULL, * maxptr2 = NULL;
|
||||||
const uint * minlocptr = NULL, * maxlocptr = NULL;
|
const uint * minlocptr = NULL, * maxlocptr = NULL;
|
||||||
if (minVal || minLoc)
|
if (minVal || minLoc)
|
||||||
{
|
{
|
||||||
minptr = db.ptr<T>();
|
minptr = db.ptr<T>();
|
||||||
index += sizeof(T) * groupnum;
|
index += sizeof(T) * groupnum;
|
||||||
|
index = alignSize(index, MINMAX_STRUCT_ALIGNMENT);
|
||||||
}
|
}
|
||||||
if (maxVal || maxLoc)
|
if (maxVal || maxLoc)
|
||||||
{
|
{
|
||||||
maxptr = (const T *)(db.ptr() + index);
|
maxptr = (const T *)(db.ptr() + index);
|
||||||
index += sizeof(T) * groupnum;
|
index += sizeof(T) * groupnum;
|
||||||
|
index = alignSize(index, MINMAX_STRUCT_ALIGNMENT);
|
||||||
}
|
}
|
||||||
if (minLoc)
|
if (minLoc)
|
||||||
{
|
{
|
||||||
minlocptr = (const uint *)(db.ptr() + index);
|
minlocptr = (const uint *)(db.ptr() + index);
|
||||||
index += sizeof(uint) * groupnum;
|
index += sizeof(uint) * groupnum;
|
||||||
|
index = alignSize(index, MINMAX_STRUCT_ALIGNMENT);
|
||||||
}
|
}
|
||||||
if (maxLoc)
|
if (maxLoc)
|
||||||
{
|
{
|
||||||
maxlocptr = (const uint *)(db.ptr() + index);
|
maxlocptr = (const uint *)(db.ptr() + index);
|
||||||
index += sizeof(uint) * groupnum;
|
index += sizeof(uint) * groupnum;
|
||||||
|
index = alignSize(index, MINMAX_STRUCT_ALIGNMENT);
|
||||||
}
|
}
|
||||||
if (maxVal2)
|
if (maxVal2)
|
||||||
maxptr2 = (const T *)(db.ptr() + index);
|
maxptr2 = (const T *)(db.ptr() + index);
|
||||||
@ -2121,7 +2127,8 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
|
|||||||
char cvt[2][40];
|
char cvt[2][40];
|
||||||
String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s"
|
String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s"
|
||||||
" -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s"
|
" -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s"
|
||||||
" -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s -D wdepth=%d -D convertFromU=%s",
|
" -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s -D wdepth=%d -D convertFromU=%s"
|
||||||
|
" -D MINMAX_STRUCT_ALIGNMENT=%d",
|
||||||
depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs,
|
depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs,
|
||||||
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned,
|
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned,
|
||||||
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
|
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
|
||||||
@ -2134,7 +2141,8 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
|
|||||||
absValues ? " -D OP_ABS" : "",
|
absValues ? " -D OP_ABS" : "",
|
||||||
haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "",
|
haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "",
|
||||||
haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", ddepth,
|
haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", ddepth,
|
||||||
depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, kercn, cvt[1]) : "noconvert");
|
depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, kercn, cvt[1]) : "noconvert",
|
||||||
|
MINMAX_STRUCT_ALIGNMENT);
|
||||||
|
|
||||||
ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts);
|
ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts);
|
||||||
if (k.empty())
|
if (k.empty())
|
||||||
@ -2143,7 +2151,8 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
|
|||||||
int esz = CV_ELEM_SIZE(ddepth), esz32s = CV_ELEM_SIZE1(CV_32S),
|
int esz = CV_ELEM_SIZE(ddepth), esz32s = CV_ELEM_SIZE1(CV_32S),
|
||||||
dbsize = groupnum * ((needMinVal ? esz : 0) + (needMaxVal ? esz : 0) +
|
dbsize = groupnum * ((needMinVal ? esz : 0) + (needMaxVal ? esz : 0) +
|
||||||
(needMinLoc ? esz32s : 0) + (needMaxLoc ? esz32s : 0) +
|
(needMinLoc ? esz32s : 0) + (needMaxLoc ? esz32s : 0) +
|
||||||
(maxVal2 ? esz : 0));
|
(maxVal2 ? esz : 0))
|
||||||
|
+ 5 * MINMAX_STRUCT_ALIGNMENT;
|
||||||
UMat src = _src.getUMat(), src2 = _src2.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat();
|
UMat src = _src.getUMat(), src2 = _src2.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat();
|
||||||
|
|
||||||
if (cn > 1 && !haveMask)
|
if (cn > 1 && !haveMask)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user