fixes
This commit is contained in:
parent
5403bdd228
commit
7f2662b310
@ -5,7 +5,7 @@
|
|||||||
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
|
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
// Third party copyrights are property of their respective owners.
|
// Third party copyrights are property of their respective owners.
|
||||||
|
|
||||||
//#define CV_OPENCL_RUN_ASSERT
|
#define CV_OPENCL_RUN_ASSERT
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
|
|
||||||
|
@ -15,16 +15,16 @@
|
|||||||
|
|
||||||
#ifdef DEPTH_0
|
#ifdef DEPTH_0
|
||||||
#define MIN_VAL 0
|
#define MIN_VAL 0
|
||||||
#define MAX_VAL 255
|
#define MAX_VAL UCHAR_MAX
|
||||||
#elif defined DEPTH_1
|
#elif defined DEPTH_1
|
||||||
#define MIN_VAL -128
|
#define MIN_VAL SCHAR_MIN
|
||||||
#define MAX_VAL 127
|
#define MAX_VAL SCHAR_MAX
|
||||||
#elif defined DEPTH_2
|
#elif defined DEPTH_2
|
||||||
#define MIN_VAL 0
|
#define MIN_VAL 0
|
||||||
#define MAX_VAL 65535
|
#define MAX_VAL USHRT_MAX
|
||||||
#elif defined DEPTH_3
|
#elif defined DEPTH_3
|
||||||
#define MIN_VAL -32768
|
#define MIN_VAL SHRT_MIN
|
||||||
#define MAX_VAL 32767
|
#define MAX_VAL SHRT_MAX
|
||||||
#elif defined DEPTH_4
|
#elif defined DEPTH_4
|
||||||
#define MIN_VAL INT_MIN
|
#define MIN_VAL INT_MIN
|
||||||
#define MAX_VAL INT_MAX
|
#define MAX_VAL INT_MAX
|
||||||
@ -39,6 +39,14 @@
|
|||||||
#define noconvert
|
#define noconvert
|
||||||
#define INDEX_MAX UINT_MAX
|
#define INDEX_MAX UINT_MAX
|
||||||
|
|
||||||
|
#if kercn != 3
|
||||||
|
#define loadpix(addr) *(__global const srcT *)(addr)
|
||||||
|
#define srcTSIZE (int)sizeof(srcT1)
|
||||||
|
#else
|
||||||
|
#define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
|
||||||
|
#define srcTSIZE ((int)sizeof(srcT1))
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef NEED_MINLOC
|
#ifdef NEED_MINLOC
|
||||||
#define CALC_MINLOC(inc) minloc = id + inc
|
#define CALC_MINLOC(inc) minloc = id + inc
|
||||||
#else
|
#else
|
||||||
@ -154,22 +162,22 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
|
|||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#ifdef HAVE_SRC_CONT
|
#ifdef HAVE_SRC_CONT
|
||||||
src_index = mul24(id, (int)sizeof(srcT1));
|
src_index = mul24(id, srcTSIZE);
|
||||||
#else
|
#else
|
||||||
src_index = mad24(id / cols, src_step, mul24(id % cols, (int)sizeof(srcT1)));
|
src_index = mad24(id / cols, src_step, mul24(id % cols, srcTSIZE));
|
||||||
#endif
|
#endif
|
||||||
temp = convertToDT(*(__global const srcT *)(srcptr + src_index));
|
temp = convertToDT(loadpix(srcptr + src_index));
|
||||||
#ifdef OP_ABS
|
#ifdef OP_ABS
|
||||||
temp = temp >= (dstT)(0) ? temp : -temp;
|
temp = temp >= (dstT)(0) ? temp : -temp;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_SRC2
|
#ifdef HAVE_SRC2
|
||||||
#ifdef HAVE_SRC2_CONT
|
#ifdef HAVE_SRC2_CONT
|
||||||
src2_index = mul24(id, (int)sizeof(srcT1));
|
src2_index = mul24(id, srcTSIZE);
|
||||||
#else
|
#else
|
||||||
src2_index = mad24(id / cols, src2_step, mul24(id % cols, (int)sizeof(srcT1)));
|
src2_index = mad24(id / cols, src2_step, mul24(id % cols, srcTSIZE));
|
||||||
#endif
|
#endif
|
||||||
temp2 = convertToDT(*(__global const srcT *)(src2ptr + src2_index));
|
temp2 = convertToDT(loadpix(src2ptr + src2_index));
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp);
|
temp = temp > temp2 ? temp - temp2 : (temp2 - temp);
|
||||||
#ifdef OP_CALC2
|
#ifdef OP_CALC2
|
||||||
temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2;
|
temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2;
|
||||||
@ -202,8 +210,9 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
|
|||||||
#elif kercn >= 2
|
#elif kercn >= 2
|
||||||
CALC_P(s0, 0)
|
CALC_P(s0, 0)
|
||||||
CALC_P(s1, 1)
|
CALC_P(s1, 1)
|
||||||
#if kercn >= 4
|
#if kercn >= 3
|
||||||
CALC_P(s2, 2)
|
CALC_P(s2, 2)
|
||||||
|
#if kercn >= 4
|
||||||
CALC_P(s3, 3)
|
CALC_P(s3, 3)
|
||||||
#if kercn >= 8
|
#if kercn >= 8
|
||||||
CALC_P(s4, 4)
|
CALC_P(s4, 4)
|
||||||
@ -222,6 +231,7 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -335,9 +345,11 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
|
|||||||
#endif
|
#endif
|
||||||
#ifdef NEED_MAXLOC
|
#ifdef NEED_MAXLOC
|
||||||
*(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_maxloc[0];
|
*(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_maxloc[0];
|
||||||
#endif
|
|
||||||
#ifdef OP_CALC2
|
#ifdef OP_CALC2
|
||||||
pos = mad24(groupnum, (int)sizeof(uint), pos);
|
pos = mad24(groupnum, (int)sizeof(uint), pos);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#ifdef OP_CALC2
|
||||||
*(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max2[0];
|
*(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max2[0];
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -148,11 +148,9 @@
|
|||||||
|
|
||||||
#ifdef OP_CALC2
|
#ifdef OP_CALC2
|
||||||
#define DECLARE_LOCAL_MEM \
|
#define DECLARE_LOCAL_MEM \
|
||||||
__local dstT localmem[WGS2_ALIGNED]; \
|
__local dstT localmem[WGS2_ALIGNED], localmem2[WGS2_ALIGNED]
|
||||||
__local dstT localmem2[WGS2_ALIGNED]
|
|
||||||
#define DEFINE_ACCUMULATOR \
|
#define DEFINE_ACCUMULATOR \
|
||||||
dstT accumulator = (dstT)(0); \
|
dstT accumulator = (dstT)(0), accumulator2 = (dstT)(0)
|
||||||
dstT accumulator2 = (dstT)(0)
|
|
||||||
#else
|
#else
|
||||||
#define DECLARE_LOCAL_MEM \
|
#define DECLARE_LOCAL_MEM \
|
||||||
__local dstT localmem[WGS2_ALIGNED]
|
__local dstT localmem[WGS2_ALIGNED]
|
||||||
@ -163,10 +161,10 @@
|
|||||||
#ifdef HAVE_SRC2
|
#ifdef HAVE_SRC2
|
||||||
#ifdef OP_CALC2
|
#ifdef OP_CALC2
|
||||||
#define PROCESS_ELEMS \
|
#define PROCESS_ELEMS \
|
||||||
dstT temp = convertToDT(loadpix(srcptr + src_index)) - convertToDT(loadpix(src2ptr + src2_index)); \
|
dstT temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstT temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstT temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp -= temp2; \
|
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
||||||
temp = temp > (dstT)(0) ? temp : -temp; \
|
temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
|
||||||
FUNC(accumulator2, temp2); \
|
FUNC(accumulator2, temp2); \
|
||||||
FUNC(accumulator, temp)
|
FUNC(accumulator, temp)
|
||||||
#else
|
#else
|
||||||
@ -258,6 +256,7 @@
|
|||||||
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
||||||
|
temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
|
||||||
FUNC(accumulator, temp); \
|
FUNC(accumulator, temp); \
|
||||||
FUNC(accumulator2, temp2)
|
FUNC(accumulator2, temp2)
|
||||||
#elif kercn == 2
|
#elif kercn == 2
|
||||||
@ -265,6 +264,7 @@
|
|||||||
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
||||||
|
temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
|
||||||
FUNC(accumulator, temp.s0); \
|
FUNC(accumulator, temp.s0); \
|
||||||
FUNC(accumulator, temp.s1); \
|
FUNC(accumulator, temp.s1); \
|
||||||
FUNC(accumulator2, temp2.s0); \
|
FUNC(accumulator2, temp2.s0); \
|
||||||
@ -274,6 +274,7 @@
|
|||||||
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
||||||
|
temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
|
||||||
FUNC(accumulator, temp.s0); \
|
FUNC(accumulator, temp.s0); \
|
||||||
FUNC(accumulator, temp.s1); \
|
FUNC(accumulator, temp.s1); \
|
||||||
FUNC(accumulator, temp.s2); \
|
FUNC(accumulator, temp.s2); \
|
||||||
@ -287,6 +288,7 @@
|
|||||||
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
||||||
|
temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
|
||||||
FUNC(accumulator, temp.s0); \
|
FUNC(accumulator, temp.s0); \
|
||||||
FUNC(accumulator, temp.s1); \
|
FUNC(accumulator, temp.s1); \
|
||||||
FUNC(accumulator, temp.s2); \
|
FUNC(accumulator, temp.s2); \
|
||||||
@ -308,6 +310,7 @@
|
|||||||
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
||||||
|
temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
|
||||||
FUNC(accumulator, temp.s0); \
|
FUNC(accumulator, temp.s0); \
|
||||||
FUNC(accumulator, temp.s1); \
|
FUNC(accumulator, temp.s1); \
|
||||||
FUNC(accumulator, temp.s2); \
|
FUNC(accumulator, temp.s2); \
|
||||||
@ -452,6 +455,20 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef OP_CALC2
|
||||||
|
#define SET_LOCAL_1 \
|
||||||
|
localmem[lid] = accumulator; \
|
||||||
|
localmem2[lid] = accumulator2
|
||||||
|
#define REDUCE_LOCAL_1 \
|
||||||
|
localmem[lid - WGS2_ALIGNED] += accumulator; \
|
||||||
|
localmem2[lid - WGS2_ALIGNED] += accumulator2
|
||||||
|
#define REDUCE_LOCAL_2 \
|
||||||
|
localmem[lid] += localmem[lid2]; \
|
||||||
|
localmem2[lid] += localmem2[lid2]
|
||||||
|
#define CALC_RESULT \
|
||||||
|
storepix(localmem[0], dstptr + dstTSIZE * gid); \
|
||||||
|
storepix(localmem2[0], dstptr + mad24(groupnum, dstTSIZE, dstTSIZE * gid))
|
||||||
|
#else
|
||||||
#define SET_LOCAL_1 \
|
#define SET_LOCAL_1 \
|
||||||
localmem[lid] = accumulator
|
localmem[lid] = accumulator
|
||||||
#define REDUCE_LOCAL_1 \
|
#define REDUCE_LOCAL_1 \
|
||||||
@ -460,6 +477,7 @@
|
|||||||
localmem[lid] += localmem[lid2]
|
localmem[lid] += localmem[lid2]
|
||||||
#define CALC_RESULT \
|
#define CALC_RESULT \
|
||||||
storepix(localmem[0], dstptr + dstTSIZE * gid)
|
storepix(localmem[0], dstptr + dstTSIZE * gid)
|
||||||
|
#endif
|
||||||
|
|
||||||
// countNonZero stuff
|
// countNonZero stuff
|
||||||
#elif defined OP_COUNT_NON_ZERO
|
#elif defined OP_COUNT_NON_ZERO
|
||||||
@ -516,20 +534,6 @@
|
|||||||
accumulator += value.sF == zero ? zero : one
|
accumulator += value.sF == zero ? zero : one
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef OP_CALC2
|
|
||||||
#define SET_LOCAL_1 \
|
|
||||||
localmem[lid] = accumulator; \
|
|
||||||
localmem2[lid] = accumulator2; \
|
|
||||||
#define REDUCE_LOCAL_1 \
|
|
||||||
localmem[lid - WGS2_ALIGNED] += accumulator; \
|
|
||||||
localmem2[lid - WGS2_ALIGNED] += accumulator2
|
|
||||||
#define REDUCE_LOCAL_2 \
|
|
||||||
localmem[lid] += localmem[lid2]; \
|
|
||||||
localmem2[lid] += localmem2[lid2]
|
|
||||||
#define CALC_RESULT \
|
|
||||||
storepix(localmem[0], dstptr + dstTSIZE * gid); \
|
|
||||||
storepix(localmem2[0], dstptr + mad24(groupnum, srcTSIZE, dstTSIZE * gid))
|
|
||||||
#else
|
|
||||||
#define SET_LOCAL_1 \
|
#define SET_LOCAL_1 \
|
||||||
localmem[lid] = accumulator
|
localmem[lid] = accumulator
|
||||||
#define REDUCE_LOCAL_1 \
|
#define REDUCE_LOCAL_1 \
|
||||||
@ -538,7 +542,6 @@
|
|||||||
localmem[lid] += localmem[lid2]
|
localmem[lid] += localmem[lid2]
|
||||||
#define CALC_RESULT \
|
#define CALC_RESULT \
|
||||||
storepix(localmem[0], dstptr + dstTSIZE * gid)
|
storepix(localmem[0], dstptr + dstTSIZE * gid)
|
||||||
#endif
|
|
||||||
|
|
||||||
// norm (NORM_INF) with cn > 1 and mask
|
// norm (NORM_INF) with cn > 1 and mask
|
||||||
#elif defined OP_NORM_INF_MASK
|
#elif defined OP_NORM_INF_MASK
|
||||||
|
@ -550,9 +550,9 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask
|
|||||||
|
|
||||||
Mat mres = db.getMat(ACCESS_READ);
|
Mat mres = db.getMat(ACCESS_READ);
|
||||||
if (calc2)
|
if (calc2)
|
||||||
const_cast<Scalar &>(res2) = func(mres.colRange(dbsize, dbsize));
|
const_cast<Scalar &>(res2) = func(mres.colRange(ngroups, dbsize));
|
||||||
|
|
||||||
res = func(mres.colRange(0, dbsize));
|
res = func(mres.colRange(0, ngroups));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@ -1434,10 +1434,10 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
|
|||||||
bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(),
|
bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(),
|
||||||
haveSrc2 = _src2.kind() != _InputArray::NONE;
|
haveSrc2 = _src2.kind() != _InputArray::NONE;
|
||||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
|
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
|
||||||
kercn = haveMask ? 1 : std::min(4, ocl::predictOptimalVectorWidth(_src));
|
kercn = haveMask ? cn : std::min(4, ocl::predictOptimalVectorWidth(_src));
|
||||||
|
|
||||||
CV_Assert( (cn == 1 && (_mask.empty() || _mask.type() == CV_8U)) ||
|
CV_Assert( (cn == 1 && (!haveMask || _mask.type() == CV_8U)) ||
|
||||||
(cn >= 1 && _mask.empty() && !minLoc && !maxLoc) );
|
(cn >= 1 && (!haveMask || haveSrc2) && !minLoc && !maxLoc) );
|
||||||
|
|
||||||
if (ddepth < 0)
|
if (ddepth < 0)
|
||||||
ddepth = depth;
|
ddepth = depth;
|
||||||
@ -1484,6 +1484,8 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
|
|||||||
haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "",
|
haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "",
|
||||||
haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "");
|
haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "");
|
||||||
|
|
||||||
|
printf("%s\n", opts.c_str());
|
||||||
|
|
||||||
ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts);
|
ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts);
|
||||||
if (k.empty())
|
if (k.empty())
|
||||||
return false;
|
return false;
|
||||||
@ -2556,9 +2558,9 @@ static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArr
|
|||||||
{
|
{
|
||||||
Scalar sc1, sc2;
|
Scalar sc1, sc2;
|
||||||
int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||||
bool relative = (normType & NORM_RELATIVE) != 0,
|
bool relative = (normType & NORM_RELATIVE) != 0;
|
||||||
normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR;
|
|
||||||
normType &= ~NORM_RELATIVE;
|
normType &= ~NORM_RELATIVE;
|
||||||
|
bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR;
|
||||||
|
|
||||||
if ( !(normType == NORM_INF || normsum) )
|
if ( !(normType == NORM_INF || normsum) )
|
||||||
return false;
|
return false;
|
||||||
@ -2608,8 +2610,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
|
|||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
double _result = 0;
|
double _result = 0;
|
||||||
CV_OCL_RUN_(_src1.isUMat() && _src2.isUMat() &&
|
CV_OCL_RUN_(_src1.isUMat(),
|
||||||
_src1.dims() <= 2 && _src2.dims() <= 2,
|
|
||||||
ocl_norm(_src1, _src2, normType, _mask, _result),
|
ocl_norm(_src1, _src2, normType, _mask, _result),
|
||||||
_result)
|
_result)
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user