minmaxloc

This commit is contained in:
Ilya Lavrenov 2014-06-04 18:22:55 +04:00
parent f30301d171
commit fd5a8b3e97
3 changed files with 399 additions and 153 deletions

View File

@ -0,0 +1,280 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2014, Itseez, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
#ifdef DEPTH_0
#define MIN_VAL 0
#define MAX_VAL 255
#elif defined DEPTH_1
#define MIN_VAL -128
#define MAX_VAL 127
#elif defined DEPTH_2
#define MIN_VAL 0
#define MAX_VAL 65535
#elif defined DEPTH_3
#define MIN_VAL -32768
#define MAX_VAL 32767
#elif defined DEPTH_4
#define MIN_VAL INT_MIN
#define MAX_VAL INT_MAX
#elif defined DEPTH_5
#define MIN_VAL (-FLT_MAX)
#define MAX_VAL FLT_MAX
#elif defined DEPTH_6
#define MIN_VAL (-DBL_MAX)
#define MAX_VAL DBL_MAX
#endif
#define INDEX_MAX UINT_MAX
#ifdef NEED_MINLOC
#define CALC_MINLOC(inc) minloc = id + inc
#else
#define CALC_MINLOC(inc)
#endif
#ifdef NEED_MAXLOC
#define CALC_MAXLOC(inc) maxloc = id + inc
#else
#define CALC_MAXLOC(inc)
#endif
#ifdef NEED_MINVAL
#define CALC_MIN(p, inc) \
if (minval > temp.p) \
{ \
minval = temp.p; \
CALC_MINLOC(inc); \
}
#else
#define CALC_MIN(p, inc)
#endif
#ifdef NEED_MAXVAL
#define CALC_MAX(p, inc) \
if (maxval < temp.p) \
{ \
maxval = temp.p; \
CALC_MAXLOC(inc); \
}
#else
#define CALC_MAX(p, inc)
#endif
#define CALC_P(p, inc) \
CALC_MIN(p, inc) \
CALC_MAX(p, inc)
__kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_offset, int cols,
int total, int groupnum, __global uchar * dstptr
#ifdef HAVE_MASK
, __global const uchar * mask, int mask_step, int mask_offset
#endif
)
{
int lid = get_local_id(0);
int gid = get_group_id(0);
int id = get_global_id(0) * kercn;
srcptr += src_offset;
#ifdef HAVE_MASK
mask += mask_offset;
#endif
#ifdef NEED_MINVAL
__local srcT1 localmem_min[WGS2_ALIGNED];
#ifdef NEED_MINLOC
__local uint localmem_minloc[WGS2_ALIGNED];
#endif
#endif
#ifdef NEED_MAXVAL
__local srcT1 localmem_max[WGS2_ALIGNED];
#ifdef NEED_MAXLOC
__local uint localmem_maxloc[WGS2_ALIGNED];
#endif
#endif
srcT1 minval = MAX_VAL, maxval = MIN_VAL;
srcT temp;
uint minloc = INDEX_MAX, maxloc = INDEX_MAX;
int src_index;
#ifdef HAVE_MASK
int mask_index;
#endif
for (int grain = groupnum * WGS * kercn; id < total; id += grain)
{
#ifdef HAVE_SRC_CONT
src_index = mul24(id, (int)sizeof(srcT1));
#else
src_index = mad24(id / cols, src_step, mul24(id % cols, (int)sizeof(srcT1)));
#endif
#ifdef HAVE_MASK
#ifdef HAVE_MASK_CONT
mask_index = id;
#else
mask_index = mad24(id / cols, mask_step, id % cols);
#endif
if (mask[mask_index])
#endif
{
temp = *(__global const srcT *)(srcptr + src_index);
#if kercn == 1
#ifdef NEED_MINVAL
if (minval > temp)
{
minval = temp;
#ifdef NEED_MINLOC
minloc = id;
#endif
}
#endif
#ifdef NEED_MAXVAL
if (maxval < temp)
{
maxval = temp;
#ifdef NEED_MAXLOC
maxloc = id;
#endif
}
#endif
#elif kercn >= 2
CALC_P(s0, 0)
CALC_P(s1, 1)
#if kercn >= 4
CALC_P(s2, 2)
CALC_P(s3, 3)
#if kercn >= 8
CALC_P(s4, 4)
CALC_P(s5, 5)
CALC_P(s6, 6)
CALC_P(s7, 7)
#if kercn == 16
CALC_P(s8, 8)
CALC_P(s9, 9)
CALC_P(sA, 10)
CALC_P(sB, 11)
CALC_P(sC, 12)
CALC_P(sD, 13)
CALC_P(sE, 14)
CALC_P(sF, 15)
#endif
#endif
#endif
#endif
}
}
if (lid < WGS2_ALIGNED)
{
#ifdef NEED_MINVAL
localmem_min[lid] = minval;
#endif
#ifdef NEED_MAXVAL
localmem_max[lid] = maxval;
#endif
#ifdef NEED_MINLOC
localmem_minloc[lid] = minloc;
#endif
#ifdef NEED_MAXLOC
localmem_maxloc[lid] = maxloc;
#endif
}
barrier(CLK_LOCAL_MEM_FENCE);
if (lid >= WGS2_ALIGNED && total >= WGS2_ALIGNED)
{
int lid3 = lid - WGS2_ALIGNED;
#ifdef NEED_MINVAL
if (localmem_min[lid3] >= minval)
{
#ifdef NEED_MINLOC
if (localmem_min[lid3] == minval)
localmem_minloc[lid3] = min(localmem_minloc[lid3], minloc);
else
localmem_minloc[lid3] = minloc,
#endif
localmem_min[lid3] = minval;
}
#endif
#ifdef NEED_MAXVAL
if (localmem_max[lid3] <= maxval)
{
#ifdef NEED_MAXLOC
if (localmem_max[lid3] == maxval)
localmem_maxloc[lid3] = min(localmem_maxloc[lid3], maxloc);
else
localmem_maxloc[lid3] = maxloc,
#endif
localmem_max[lid3] = maxval;
}
#endif
}
barrier(CLK_LOCAL_MEM_FENCE);
for (int lsize = WGS2_ALIGNED >> 1; lsize > 0; lsize >>= 1)
{
if (lid < lsize)
{
int lid2 = lsize + lid;
#ifdef NEED_MINVAL
if (localmem_min[lid] >= localmem_min[lid2])
{
#ifdef NEED_MINLOC
if (localmem_min[lid] == localmem_min[lid2])
localmem_minloc[lid] = min(localmem_minloc[lid2], localmem_minloc[lid]);
else
localmem_minloc[lid] = localmem_minloc[lid2],
#endif
localmem_min[lid] = localmem_min[lid2];
}
#endif
#ifdef NEED_MAXVAL
if (localmem_max[lid] <= localmem_max[lid2])
{
#ifdef NEED_MAXLOC
if (localmem_max[lid] == localmem_max[lid2])
localmem_maxloc[lid] = min(localmem_maxloc[lid2], localmem_maxloc[lid]);
else
localmem_maxloc[lid] = localmem_maxloc[lid2],
#endif
localmem_max[lid] = localmem_max[lid2];
}
#endif
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if (lid == 0)
{
int pos = 0;
#ifdef NEED_MINVAL
*(__global srcT1 *)(dstptr + mad24(gid, (int)sizeof(srcT1), pos)) = localmem_min[0];
pos = mad24(groupnum, (int)sizeof(srcT1), pos);
#endif
#ifdef NEED_MAXVAL
*(__global srcT1 *)(dstptr + mad24(gid, (int)sizeof(srcT1), pos)) = localmem_max[0];
pos = mad24(groupnum, (int)sizeof(srcT1), pos);
#endif
#ifdef NEED_MINLOC
*(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_minloc[0];
pos = mad24(groupnum, (int)sizeof(uint), pos);
#endif
#ifdef NEED_MAXLOC
*(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_maxloc[0];
#endif
}
}

View File

@ -75,6 +75,8 @@
#define MAX_VAL DBL_MAX #define MAX_VAL DBL_MAX
#endif #endif
#define INDEX_MAX UINT_MAX
#define dstT srcT #define dstT srcT
#define dstT1 srcT1 #define dstT1 srcT1
@ -357,102 +359,11 @@
#define CALC_RESULT \ #define CALC_RESULT \
storepix(localmem_max[0], dstptr + dstTSIZE * gid) storepix(localmem_max[0], dstptr + dstTSIZE * gid)
// minMaxLoc stuff
#elif defined OP_MIN_MAX_LOC || defined OP_MIN_MAX_LOC_MASK
#define DECLARE_LOCAL_MEM \
__local srcT localmem_min[WGS2_ALIGNED]; \
__local srcT localmem_max[WGS2_ALIGNED]; \
__local int localmem_minloc[WGS2_ALIGNED]; \
__local int localmem_maxloc[WGS2_ALIGNED]
#define DEFINE_ACCUMULATOR \
srcT minval = MAX_VAL; \
srcT maxval = MIN_VAL; \
int negative = -1; \
int minloc = negative; \
int maxloc = negative; \
srcT temp; \
int temploc
#define REDUCE_GLOBAL \
temp = loadpix(srcptr + src_index); \
temploc = id; \
srcT temp_minval = minval, temp_maxval = maxval; \
minval = min(minval, temp); \
maxval = max(maxval, temp); \
minloc = (minval == temp_minval) ? (temp_minval == MAX_VAL) ? temploc : minloc : temploc; \
maxloc = (maxval == temp_maxval) ? (temp_maxval == MIN_VAL) ? temploc : maxloc : temploc
#define SET_LOCAL_1 \
localmem_min[lid] = minval; \
localmem_max[lid] = maxval; \
localmem_minloc[lid] = minloc; \
localmem_maxloc[lid] = maxloc
#define REDUCE_LOCAL_1 \
srcT oldmin = localmem_min[lid-WGS2_ALIGNED]; \
srcT oldmax = localmem_max[lid-WGS2_ALIGNED]; \
localmem_min[lid - WGS2_ALIGNED] = min(minval, localmem_min[lid-WGS2_ALIGNED]); \
localmem_max[lid - WGS2_ALIGNED] = max(maxval, localmem_max[lid-WGS2_ALIGNED]); \
srcT minv = localmem_min[lid - WGS2_ALIGNED], maxv = localmem_max[lid - WGS2_ALIGNED]; \
localmem_minloc[lid - WGS2_ALIGNED] = (minv == minval) ? (minv == oldmin) ? \
min(minloc, localmem_minloc[lid-WGS2_ALIGNED]) : minloc : localmem_minloc[lid-WGS2_ALIGNED]; \
localmem_maxloc[lid - WGS2_ALIGNED] = (maxv == maxval) ? (maxv == oldmax) ? \
min(maxloc, localmem_maxloc[lid-WGS2_ALIGNED]) : maxloc : localmem_maxloc[lid-WGS2_ALIGNED]
#define REDUCE_LOCAL_2 \
srcT oldmin = localmem_min[lid]; \
srcT oldmax = localmem_max[lid]; \
localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]); \
localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]); \
srcT min1 = localmem_min[lid], min2 = localmem_min[lid2]; \
localmem_minloc[lid] = (localmem_minloc[lid] == negative) ? localmem_minloc[lid2] : (localmem_minloc[lid2] == negative) ? \
localmem_minloc[lid] : (min1 == min2) ? (min1 == oldmin) ? min(localmem_minloc[lid2],localmem_minloc[lid]) : \
localmem_minloc[lid2] : localmem_minloc[lid]; \
srcT max1 = localmem_max[lid], max2 = localmem_max[lid2]; \
localmem_maxloc[lid] = (localmem_maxloc[lid] == negative) ? localmem_maxloc[lid2] : (localmem_maxloc[lid2] == negative) ? \
localmem_maxloc[lid] : (max1 == max2) ? (max1 == oldmax) ? min(localmem_maxloc[lid2],localmem_maxloc[lid]) : \
localmem_maxloc[lid2] : localmem_maxloc[lid]
#define CALC_RESULT \
storepix(localmem_min[0], dstptr + dstTSIZE * gid); \
storepix(localmem_max[0], dstptr2 + dstTSIZE * gid); \
dstlocptr[gid] = localmem_minloc[0]; \
dstlocptr2[gid] = localmem_maxloc[0]
#if defined OP_MIN_MAX_LOC_MASK
#undef DEFINE_ACCUMULATOR
#define DEFINE_ACCUMULATOR \
srcT minval = MAX_VAL; \
srcT maxval = MIN_VAL; \
int negative = -1; \
int minloc = negative; \
int maxloc = negative; \
srcT temp, temp_mask, zeroVal = (srcT)(0); \
int temploc
#undef REDUCE_GLOBAL
#define REDUCE_GLOBAL \
temp = loadpix(srcptr + src_index); \
temploc = id; \
MASK_INDEX; \
__global const uchar * mask = (__global const uchar *)(maskptr + mask_index); \
temp_mask = mask[0]; \
srcT temp_minval = minval, temp_maxval = maxval; \
minval = (temp_mask == zeroVal) ? minval : min(minval, temp); \
maxval = (temp_mask == zeroVal) ? maxval : max(maxval, temp); \
minloc = (temp_mask == zeroVal) ? minloc : (minval == temp_minval) ? (temp_minval == MAX_VAL) ? temploc : minloc : temploc; \
maxloc = (temp_mask == zeroVal) ? maxloc : (maxval == temp_maxval) ? (temp_maxval == MIN_VAL) ? temploc : maxloc : temploc
#endif
#else #else
#error "No operation" #error "No operation"
#endif // end of minMaxLoc stuff #endif // end of norm (NORM_INF) with cn > 1 and mask
#ifdef OP_MIN_MAX_LOC #ifdef OP_DOT
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , __global uchar * dstptr2, __global int * dstlocptr, __global int * dstlocptr2
#elif defined OP_MIN_MAX_LOC_MASK
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , __global uchar * dstptr2, __global int * dstlocptr, __global int * dstlocptr2, \
__global const uchar * maskptr, int mask_step, int mask_offset
#elif defined OP_DOT
#undef EXTRA_PARAMS #undef EXTRA_PARAMS
#define EXTRA_PARAMS , __global uchar * src2ptr, int src2_step, int src2_offset #define EXTRA_PARAMS , __global uchar * src2ptr, int src2_step, int src2_offset
#endif #endif

View File

@ -1311,104 +1311,157 @@ static void ofs2idx(const Mat& a, size_t ofs, int* idx)
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
template <typename T> template <typename T>
void getMinMaxRes(const Mat &minv, const Mat &maxv, const Mat &minl, const Mat &maxl, double* minVal, void getMinMaxRes(const Mat & db, double* minVal, double* maxVal,
double* maxVal, int* minLoc, int* maxLoc, const int groupnum, const int cn, const int cols) int* minLoc, int* maxLoc,
int groupnum, int cn, int cols)
{ {
T min = std::numeric_limits<T>::max(); uint index_max = std::numeric_limits<uint>::max();
T max = std::numeric_limits<T>::min() > 0 ? -std::numeric_limits<T>::max() : std::numeric_limits<T>::min(); T minval = std::numeric_limits<T>::max();
int minloc = INT_MAX, maxloc = INT_MAX; T maxval = std::numeric_limits<T>::min() > 0 ? -std::numeric_limits<T>::max() : std::numeric_limits<T>::min();
for (int i = 0; i < groupnum; i++) uint minloc = index_max, maxloc = index_max;
int index = 0;
const T * minptr = NULL, * maxptr = NULL;
const uint * minlocptr = NULL, * maxlocptr = NULL;
if (minVal || minLoc)
{ {
T current_min = minv.at<T>(0,i); minptr = (const T *)db.data;
T current_max = maxv.at<T>(0,i); index += sizeof(T) * groupnum;
T oldmin = min, oldmax = max; }
min = std::min(min, current_min); if (maxVal || maxLoc)
max = std::max(max, current_max); {
if (cn == 1) maxptr = (const T *)(db.data + index);
{ index += sizeof(T) * groupnum;
int current_minloc = minl.at<int>(0,i);
int current_maxloc = maxl.at<int>(0,i);
if(current_minloc < 0 || current_maxloc < 0) continue;
minloc = (oldmin == current_min) ? std::min(minloc, current_minloc) : (oldmin < current_min) ? minloc : current_minloc;
maxloc = (oldmax == current_max) ? std::min(maxloc, current_maxloc) : (oldmax > current_max) ? maxloc : current_maxloc;
}
} }
bool zero_mask = (maxloc == INT_MAX) || (minloc == INT_MAX);
if (minVal)
*minVal = zero_mask ? 0 : (double)min;
if (maxVal)
*maxVal = zero_mask ? 0 : (double)max;
if (minLoc) if (minLoc)
{ {
minLoc[0] = zero_mask ? -1 : minloc/cols; minlocptr = (uint *)(db.data + index);
minLoc[1] = zero_mask ? -1 : minloc%cols; index += sizeof(uint) * groupnum;
}
if (maxLoc)
maxlocptr = (uint *)(db.data + index);
for (int i = 0; i < groupnum; i++)
{
if (minptr && minptr[i] <= minval)
{
if (minptr[i] == minval)
{
if (minlocptr)
minloc = std::min(minlocptr[i], minloc);
}
else
{
if (minlocptr)
minloc = minlocptr[i];
minval = minptr[i];
}
}
if (maxptr && maxptr[i] >= maxval)
{
if (maxptr[i] == maxval)
{
if (maxlocptr)
maxloc = std::min(maxlocptr[i], maxloc);
}
else
{
if (maxlocptr)
maxloc = maxlocptr[i];
maxval = maxptr[i];
}
}
}
bool zero_mask = (minLoc && minloc == index_max) ||
(maxLoc && maxloc == index_max);
if (minVal)
*minVal = zero_mask ? 0 : (double)minval;
if (maxVal)
*maxVal = zero_mask ? 0 : (double)maxval;
if (minLoc)
{
minLoc[0] = zero_mask ? -1 : minloc / cols;
minLoc[1] = zero_mask ? -1 : minloc % cols;
} }
if (maxLoc) if (maxLoc)
{ {
maxLoc[0] = zero_mask ? -1 : maxloc/cols; maxLoc[0] = zero_mask ? -1 : maxloc / cols;
maxLoc[1] = zero_mask ? -1 : maxloc%cols; maxLoc[1] = zero_mask ? -1 : maxloc % cols;
} }
} }
typedef void (*getMinMaxResFunc)(const Mat &minv, const Mat &maxv, const Mat &minl, const Mat &maxl, double *minVal, typedef void (*getMinMaxResFunc)(const Mat & db, double *minVal, double *maxVal,
double *maxVal, int *minLoc, int *maxLoc, const int gropunum, const int cn, const int cols); int *minLoc, int *maxLoc,
int gropunum, int cn, int cols);
static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc, int* maxLoc, InputArray _mask) static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc, int* maxLoc, InputArray _mask)
{ {
CV_Assert( (_src.channels() == 1 && (_mask.empty() || _mask.type() == CV_8U)) || CV_Assert( (_src.channels() == 1 && (_mask.empty() || _mask.type() == CV_8U)) ||
(_src.channels() >= 1 && _mask.empty() && !minLoc && !maxLoc) ); (_src.channels() >= 1 && _mask.empty() && !minLoc && !maxLoc) );
int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = 1; const ocl::Device & dev = ocl::Device::getDefault();
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty();
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
kercn = haveMask ? 1 : std::min(4, ocl::predictOptimalVectorWidth(_src));
if (depth == CV_64F && !doubleSupport) if (depth == CV_64F && !doubleSupport)
return false; return false;
int groupnum = ocl::Device::getDefault().maxComputeUnits(); int groupnum = dev.maxComputeUnits();
size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); size_t wgs = dev.maxWorkGroupSize();
int wgs2_aligned = 1; int wgs2_aligned = 1;
while (wgs2_aligned < (int)wgs) while (wgs2_aligned < (int)wgs)
wgs2_aligned <<= 1; wgs2_aligned <<= 1;
wgs2_aligned >>= 1; wgs2_aligned >>= 1;
String opts = format("-D DEPTH_%d -D srcT=%s -D OP_MIN_MAX_LOC%s -D WGS=%d" bool needMinVal = minVal || minLoc, needMinLoc = minLoc != NULL,
" -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d", needMaxVal = maxVal || maxLoc, needMaxLoc = maxLoc != NULL;
depth, ocl::typeToStr(depth), _mask.empty() ? "" : "_MASK", (int)wgs,
wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
_src.isContinuous() ? " -D HAVE_SRC_CONT" : "",
_mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn);
ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, opts); // in case of mask we must know whether mask is filled with zeros or not
// so let's calculate min or max location, if it's undefined, so mask is zeros
if (!(needMaxLoc || needMinLoc) && haveMask)
if (needMinVal)
needMinLoc = true;
else
needMaxVal = true;
String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s"
" -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s",
depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs,
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned,
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
_src.isContinuous() ? " -D HAVE_SRC_CONT" : "",
_mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn,
needMinVal ? " -D NEED_MINVAL" : "", needMaxVal ? " -D NEED_MAXVAL" : "",
needMinLoc ? " -D NEED_MINLOC" : "", needMaxLoc ? " -D NEED_MAXLOC" : "");
ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts);
if (k.empty()) if (k.empty())
return false; return false;
UMat src = _src.getUMat(), minval(1, groupnum, src.type()), int esz = CV_ELEM_SIZE(depth), esz32s = CV_ELEM_SIZE1(CV_32S),
maxval(1, groupnum, src.type()), minloc( 1, groupnum, CV_32SC1), dbsize = groupnum * ((needMinVal ? esz : 0) + (needMaxVal ? esz : 0) +
maxloc( 1, groupnum, CV_32SC1), mask; (needMinLoc ? esz32s : 0) + (needMaxLoc ? esz32s : 0));
if (!_mask.empty()) UMat src = _src.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat();
mask = _mask.getUMat();
if (src.channels() > 1) if (cn > 1)
src = src.reshape(1); src = src.reshape(1);
if (mask.empty()) if (!haveMask)
k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
groupnum, ocl::KernelArg::PtrWriteOnly(minval), ocl::KernelArg::PtrWriteOnly(maxval), groupnum, ocl::KernelArg::PtrWriteOnly(db));
ocl::KernelArg::PtrWriteOnly(minloc), ocl::KernelArg::PtrWriteOnly(maxloc));
else else
k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), groupnum, k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
ocl::KernelArg::PtrWriteOnly(minval), ocl::KernelArg::PtrWriteOnly(maxval), groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask));
ocl::KernelArg::PtrWriteOnly(minloc), ocl::KernelArg::PtrWriteOnly(maxloc), ocl::KernelArg::ReadOnlyNoSize(mask));
size_t globalsize = groupnum * wgs; size_t globalsize = groupnum * wgs;
if (!k.run(1, &globalsize, &wgs, false)) if (!k.run(1, &globalsize, &wgs, false))
return false; return false;
Mat minv = minval.getMat(ACCESS_READ), maxv = maxval.getMat(ACCESS_READ), static const getMinMaxResFunc functab[7] =
minl = minloc.getMat(ACCESS_READ), maxl = maxloc.getMat(ACCESS_READ);
static getMinMaxResFunc functab[7] =
{ {
getMinMaxRes<uchar>, getMinMaxRes<uchar>,
getMinMaxRes<char>, getMinMaxRes<char>,
@ -1419,10 +1472,12 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
getMinMaxRes<double> getMinMaxRes<double>
}; };
getMinMaxResFunc func; getMinMaxResFunc func = functab[depth];
func = functab[depth]; int locTemp[2];
func(minv, maxv, minl, maxl, minVal, maxVal, minLoc, maxLoc, groupnum, src.channels(), src.cols); func(db.getMat(ACCESS_READ), minVal, maxVal,
needMinLoc ? minLoc ? minLoc : locTemp : minLoc,
needMaxLoc ? maxLoc ? maxLoc : locTemp : maxLoc, groupnum, cn, src.cols);
return true; return true;
} }