Fixed buffer initialization in reduce kernel. Enabled OCL version of reduce for SUM, MAX, MIN modes.
This commit is contained in:
parent
5119564579
commit
c3100eeb19
@ -3462,9 +3462,6 @@ static bool ocl_reduce(InputArray _src, OutputArray _dst,
|
||||
if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
|
||||
return false;
|
||||
|
||||
if ((op == CV_REDUCE_SUM && sdepth == CV_32F) || op == CV_REDUCE_MIN || op == CV_REDUCE_MAX)
|
||||
return false;
|
||||
|
||||
if (op == CV_REDUCE_AVG)
|
||||
{
|
||||
if (sdepth < CV_32S && ddepth < CV_32S)
|
||||
|
@ -108,7 +108,10 @@ __kernel void reduce_horz_opt(__global const uchar * srcptr, int src_step, int s
|
||||
int src_index = mad24(y, src_step, mad24(x, (int)sizeof(srcT) * cn, src_offset));
|
||||
|
||||
__global const srcT * src = (__global const srcT *)(srcptr + src_index);
|
||||
bufT tmp[cn] = { INIT_VALUE };
|
||||
bufT tmp[cn];
|
||||
#pragma unroll
|
||||
for (int c = 0; c < cn; ++c)
|
||||
tmp[c] = INIT_VALUE;
|
||||
|
||||
int src_step_mul = BUF_COLS * cn;
|
||||
for (int idx = x; idx < cols; idx += BUF_COLS, src += src_step_mul)
|
||||
@ -140,7 +143,10 @@ __kernel void reduce_horz_opt(__global const uchar * srcptr, int src_step, int s
|
||||
int dst_index = mad24(y, dst_step, dst_offset);
|
||||
|
||||
__global dstT * dst = (__global dstT *)(dstptr + dst_index);
|
||||
bufT tmp[cn] = { INIT_VALUE };
|
||||
bufT tmp[cn];
|
||||
#pragma unroll
|
||||
for (int c = 0; c < cn; ++c)
|
||||
tmp[c] = INIT_VALUE;
|
||||
|
||||
#pragma unroll
|
||||
for (int xin = 0; xin < BUF_COLS / 2; xin ++)
|
||||
@ -179,7 +185,10 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset
|
||||
int dst_index = mad24(x, (int)sizeof(dstT0) * cn, dst_offset);
|
||||
|
||||
__global dstT0 * dst = (__global dstT0 *)(dstptr + dst_index);
|
||||
dstT tmp[cn] = { INIT_VALUE };
|
||||
dstT tmp[cn];
|
||||
#pragma unroll
|
||||
for (int c = 0; c < cn; ++c)
|
||||
tmp[c] = INIT_VALUE;
|
||||
|
||||
for (int y = 0; y < rows; ++y, src_index += src_step)
|
||||
{
|
||||
@ -209,7 +218,10 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset
|
||||
|
||||
__global const srcT * src = (__global const srcT *)(srcptr + src_index);
|
||||
__global dstT * dst = (__global dstT *)(dstptr + dst_index);
|
||||
dstT tmp[cn] = { INIT_VALUE };
|
||||
dstT tmp[cn];
|
||||
#pragma unroll
|
||||
for (int c = 0; c < cn; ++c)
|
||||
tmp[c] = INIT_VALUE;
|
||||
|
||||
for (int x = 0; x < cols; ++x, src += cn)
|
||||
{
|
||||
|
@ -1704,7 +1704,7 @@ OCL_TEST_P(ReduceSum, Mat)
|
||||
OCL_OFF(cv::reduce(src_roi, dst_roi, dim, CV_REDUCE_SUM, dtype));
|
||||
OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, CV_REDUCE_SUM, dtype));
|
||||
|
||||
double eps = ddepth <= CV_32S ? 1 : 1e-4;
|
||||
double eps = ddepth <= CV_32S ? 1 : 7e-4;
|
||||
OCL_EXPECT_MATS_NEAR(dst, eps);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user