used abs in reduction operations
This commit is contained in:
parent
f1e24381d1
commit
316c044e06
@ -39,6 +39,14 @@
|
|||||||
#define noconvert
|
#define noconvert
|
||||||
#define INDEX_MAX UINT_MAX
|
#define INDEX_MAX UINT_MAX
|
||||||
|
|
||||||
|
#if wdepth <= 4
|
||||||
|
#define MIN_ABS(a) convertToDT(abs(a))
|
||||||
|
#define MIN_ABS2(a, b) convertToDT(abs_diff(a, b))
|
||||||
|
#else
|
||||||
|
#define MIN_ABS(a) fabs(a)
|
||||||
|
#define MIN_ABS2(a, b) fabs(a - b)
|
||||||
|
#endif
|
||||||
|
|
||||||
#if kercn != 3
|
#if kercn != 3
|
||||||
#define loadpix(addr) *(__global const srcT *)(addr)
|
#define loadpix(addr) *(__global const srcT *)(addr)
|
||||||
#define srcTSIZE (int)sizeof(srcT)
|
#define srcTSIZE (int)sizeof(srcT)
|
||||||
@ -182,7 +190,7 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
|
|||||||
#endif
|
#endif
|
||||||
temp = convertToDT(loadpix(srcptr + src_index));
|
temp = convertToDT(loadpix(srcptr + src_index));
|
||||||
#ifdef OP_ABS
|
#ifdef OP_ABS
|
||||||
temp = temp >= (dstT)(0) ? temp : -temp;
|
temp = MIN_ABS(temp);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_SRC2
|
#ifdef HAVE_SRC2
|
||||||
@ -192,9 +200,9 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
|
|||||||
src2_index = mad24(id / cols, src2_step, mul24(id % cols, srcTSIZE));
|
src2_index = mad24(id / cols, src2_step, mul24(id % cols, srcTSIZE));
|
||||||
#endif
|
#endif
|
||||||
temp2 = convertToDT(loadpix(src2ptr + src2_index));
|
temp2 = convertToDT(loadpix(src2ptr + src2_index));
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp);
|
temp = MIN_ABS2(temp, temp2);
|
||||||
#ifdef OP_CALC2
|
#ifdef OP_CALC2
|
||||||
temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2;
|
temp2 = MIN_ABS(temp2);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -108,6 +108,14 @@
|
|||||||
#define dstTSIZE ((int)sizeof(dstT1)*3)
|
#define dstTSIZE ((int)sizeof(dstT1)*3)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if ddepth <= 4
|
||||||
|
#define SUM_ABS(a) convertToDT(abs(a))
|
||||||
|
#define SUM_ABS2(a, b) convertToDT(abs_diff(a, b))
|
||||||
|
#else
|
||||||
|
#define SUM_ABS(a) fabs(a)
|
||||||
|
#define SUM_ABS2(a, b) fabs(a - b)
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_MASK
|
#ifdef HAVE_MASK
|
||||||
#ifdef HAVE_SRC2
|
#ifdef HAVE_SRC2
|
||||||
#define EXTRA_PARAMS , __global const uchar * mask, int mask_step, int mask_offset, __global const uchar * src2ptr, int src2_step, int src2_offset
|
#define EXTRA_PARAMS , __global const uchar * mask, int mask_step, int mask_offset, __global const uchar * src2ptr, int src2_step, int src2_offset
|
||||||
@ -136,7 +144,7 @@
|
|||||||
#define FUNC(a, b) a += b
|
#define FUNC(a, b) a += b
|
||||||
|
|
||||||
#elif defined OP_SUM_ABS
|
#elif defined OP_SUM_ABS
|
||||||
#define FUNC(a, b) a += b >= (dstT)(0) ? b : -b
|
#define FUNC(a, b) a += SUM_ABS(b)
|
||||||
|
|
||||||
#elif defined OP_SUM_SQR
|
#elif defined OP_SUM_SQR
|
||||||
#if ddepth <= 4
|
#if ddepth <= 4
|
||||||
@ -163,15 +171,15 @@
|
|||||||
#define PROCESS_ELEMS \
|
#define PROCESS_ELEMS \
|
||||||
dstT temp = convertToDT(loadpix(srcptr + src_index)); \
|
dstT temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstT temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstT temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
temp = SUM_ABS2(temp, temp2); \
|
||||||
temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
|
temp2 = SUM_ABS(temp2); \
|
||||||
FUNC(accumulator2, temp2); \
|
FUNC(accumulator2, temp2); \
|
||||||
FUNC(accumulator, temp)
|
FUNC(accumulator, temp)
|
||||||
#else
|
#else
|
||||||
#define PROCESS_ELEMS \
|
#define PROCESS_ELEMS \
|
||||||
dstT temp = convertToDT(loadpix(srcptr + src_index)); \
|
dstT temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstT temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstT temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
temp = SUM_ABS2(temp, temp2); \
|
||||||
FUNC(accumulator, temp)
|
FUNC(accumulator, temp)
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
@ -255,16 +263,16 @@
|
|||||||
#define REDUCE_GLOBAL \
|
#define REDUCE_GLOBAL \
|
||||||
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
temp = SUM_ABS2(temp, temp2); \
|
||||||
temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
|
temp2 = SUM_ABS(temp2); \
|
||||||
FUNC(accumulator, temp); \
|
FUNC(accumulator, temp); \
|
||||||
FUNC(accumulator2, temp2)
|
FUNC(accumulator2, temp2)
|
||||||
#elif kercn == 2
|
#elif kercn == 2
|
||||||
#define REDUCE_GLOBAL \
|
#define REDUCE_GLOBAL \
|
||||||
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
temp = SUM_ABS2(temp, temp2); \
|
||||||
temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
|
temp2 = SUM_ABS(temp2); \
|
||||||
FUNC(accumulator, temp.s0); \
|
FUNC(accumulator, temp.s0); \
|
||||||
FUNC(accumulator, temp.s1); \
|
FUNC(accumulator, temp.s1); \
|
||||||
FUNC(accumulator2, temp2.s0); \
|
FUNC(accumulator2, temp2.s0); \
|
||||||
@ -273,8 +281,8 @@
|
|||||||
#define REDUCE_GLOBAL \
|
#define REDUCE_GLOBAL \
|
||||||
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
temp = SUM_ABS2(temp, temp2); \
|
||||||
temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
|
temp2 = SUM_ABS(temp2); \
|
||||||
FUNC(accumulator, temp.s0); \
|
FUNC(accumulator, temp.s0); \
|
||||||
FUNC(accumulator, temp.s1); \
|
FUNC(accumulator, temp.s1); \
|
||||||
FUNC(accumulator, temp.s2); \
|
FUNC(accumulator, temp.s2); \
|
||||||
@ -287,8 +295,8 @@
|
|||||||
#define REDUCE_GLOBAL \
|
#define REDUCE_GLOBAL \
|
||||||
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
temp = SUM_ABS2(temp, temp2); \
|
||||||
temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
|
temp2 = SUM_ABS(temp2); \
|
||||||
FUNC(accumulator, temp.s0); \
|
FUNC(accumulator, temp.s0); \
|
||||||
FUNC(accumulator, temp.s1); \
|
FUNC(accumulator, temp.s1); \
|
||||||
FUNC(accumulator, temp.s2); \
|
FUNC(accumulator, temp.s2); \
|
||||||
@ -309,8 +317,8 @@
|
|||||||
#define REDUCE_GLOBAL \
|
#define REDUCE_GLOBAL \
|
||||||
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
temp = SUM_ABS2(temp, temp2); \
|
||||||
temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
|
temp2 = SUM_ABS(temp2); \
|
||||||
FUNC(accumulator, temp.s0); \
|
FUNC(accumulator, temp.s0); \
|
||||||
FUNC(accumulator, temp.s1); \
|
FUNC(accumulator, temp.s1); \
|
||||||
FUNC(accumulator, temp.s2); \
|
FUNC(accumulator, temp.s2); \
|
||||||
@ -349,20 +357,20 @@
|
|||||||
#define REDUCE_GLOBAL \
|
#define REDUCE_GLOBAL \
|
||||||
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
temp = SUM_ABS2(temp, temp2); \
|
||||||
FUNC(accumulator, temp)
|
FUNC(accumulator, temp)
|
||||||
#elif kercn == 2
|
#elif kercn == 2
|
||||||
#define REDUCE_GLOBAL \
|
#define REDUCE_GLOBAL \
|
||||||
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
temp = SUM_ABS2(temp, temp2); \
|
||||||
FUNC(accumulator, temp.s0); \
|
FUNC(accumulator, temp.s0); \
|
||||||
FUNC(accumulator, temp.s1)
|
FUNC(accumulator, temp.s1)
|
||||||
#elif kercn == 4
|
#elif kercn == 4
|
||||||
#define REDUCE_GLOBAL \
|
#define REDUCE_GLOBAL \
|
||||||
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
temp = SUM_ABS2(temp, temp2); \
|
||||||
FUNC(accumulator, temp.s0); \
|
FUNC(accumulator, temp.s0); \
|
||||||
FUNC(accumulator, temp.s1); \
|
FUNC(accumulator, temp.s1); \
|
||||||
FUNC(accumulator, temp.s2); \
|
FUNC(accumulator, temp.s2); \
|
||||||
@ -371,7 +379,7 @@
|
|||||||
#define REDUCE_GLOBAL \
|
#define REDUCE_GLOBAL \
|
||||||
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
temp = SUM_ABS2(temp, temp2)); \
|
||||||
FUNC(accumulator, temp.s0); \
|
FUNC(accumulator, temp.s0); \
|
||||||
FUNC(accumulator, temp.s1); \
|
FUNC(accumulator, temp.s1); \
|
||||||
FUNC(accumulator, temp.s2); \
|
FUNC(accumulator, temp.s2); \
|
||||||
@ -384,7 +392,7 @@
|
|||||||
#define REDUCE_GLOBAL \
|
#define REDUCE_GLOBAL \
|
||||||
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||||
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||||
temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
|
temp = SUM_ABS2(temp, temp2); \
|
||||||
FUNC(accumulator, temp.s0); \
|
FUNC(accumulator, temp.s0); \
|
||||||
FUNC(accumulator, temp.s1); \
|
FUNC(accumulator, temp.s1); \
|
||||||
FUNC(accumulator, temp.s2); \
|
FUNC(accumulator, temp.s2); \
|
||||||
|
@ -1471,7 +1471,7 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
|
|||||||
char cvt[40];
|
char cvt[40];
|
||||||
String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s"
|
String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s"
|
||||||
" -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s"
|
" -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s"
|
||||||
" -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s",
|
" -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s -D wdepth=%d",
|
||||||
depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs,
|
depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs,
|
||||||
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned,
|
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned,
|
||||||
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
|
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
|
||||||
@ -1482,7 +1482,7 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
|
|||||||
ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
|
ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
|
||||||
ocl::convertTypeStr(depth, ddepth, kercn, cvt), absValues ? " -D OP_ABS" : "",
|
ocl::convertTypeStr(depth, ddepth, kercn, cvt), absValues ? " -D OP_ABS" : "",
|
||||||
haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "",
|
haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "",
|
||||||
haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "");
|
haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", ddepth);
|
||||||
|
|
||||||
ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts);
|
ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts);
|
||||||
if (k.empty())
|
if (k.empty())
|
||||||
|
Loading…
x
Reference in New Issue
Block a user