optimized cv::countNonZero
This commit is contained in:
parent
7804d57f8b
commit
002a79bfc4
@ -82,6 +82,10 @@
|
|||||||
|
|
||||||
#define noconvert
|
#define noconvert
|
||||||
|
|
||||||
|
#ifndef kercn
|
||||||
|
#define kercn 1
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_MASK_CONT
|
#ifdef HAVE_MASK_CONT
|
||||||
#define MASK_INDEX int mask_index = id + mask_offset;
|
#define MASK_INDEX int mask_index = id + mask_offset;
|
||||||
#else
|
#else
|
||||||
@ -176,9 +180,39 @@
|
|||||||
__local dstT localmem[WGS2_ALIGNED]
|
__local dstT localmem[WGS2_ALIGNED]
|
||||||
#define DEFINE_ACCUMULATOR \
|
#define DEFINE_ACCUMULATOR \
|
||||||
dstT accumulator = (dstT)(0); \
|
dstT accumulator = (dstT)(0); \
|
||||||
srcT zero = (srcT)(0), one = (srcT)(1)
|
srcT1 zero = (srcT1)(0), one = (srcT1)(1)
|
||||||
|
#if kercn == 1
|
||||||
#define REDUCE_GLOBAL \
|
#define REDUCE_GLOBAL \
|
||||||
accumulator += loadpix(srcptr + src_index) == zero ? zero : one
|
accumulator += loadpix(srcptr + src_index) == zero ? zero : one
|
||||||
|
#elif kercn == 4
|
||||||
|
#define REDUCE_GLOBAL \
|
||||||
|
srcT value = loadpix(srcptr + src_index); \
|
||||||
|
accumulator += value.s0 == zero ? zero : one; \
|
||||||
|
accumulator += value.s1 == zero ? zero : one; \
|
||||||
|
accumulator += value.s2 == zero ? zero : one; \
|
||||||
|
accumulator += value.s3 == zero ? zero : one
|
||||||
|
#elif kercn == 16
|
||||||
|
#define REDUCE_GLOBAL \
|
||||||
|
srcT value = loadpix(srcptr + src_index); \
|
||||||
|
accumulator += value.s0 == zero ? zero : one; \
|
||||||
|
accumulator += value.s1 == zero ? zero : one; \
|
||||||
|
accumulator += value.s2 == zero ? zero : one; \
|
||||||
|
accumulator += value.s3 == zero ? zero : one; \
|
||||||
|
accumulator += value.s4 == zero ? zero : one; \
|
||||||
|
accumulator += value.s5 == zero ? zero : one; \
|
||||||
|
accumulator += value.s6 == zero ? zero : one; \
|
||||||
|
accumulator += value.s7 == zero ? zero : one; \
|
||||||
|
accumulator += value.s8 == zero ? zero : one; \
|
||||||
|
accumulator += value.s9 == zero ? zero : one; \
|
||||||
|
accumulator += value.sA == zero ? zero : one; \
|
||||||
|
accumulator += value.sB == zero ? zero : one; \
|
||||||
|
accumulator += value.sC == zero ? zero : one; \
|
||||||
|
accumulator += value.sD == zero ? zero : one; \
|
||||||
|
accumulator += value.sE == zero ? zero : one; \
|
||||||
|
accumulator += value.sF == zero ? zero : one
|
||||||
|
#else
|
||||||
|
#error "kercn should be either 1, 4 or 16"
|
||||||
|
#endif
|
||||||
#define SET_LOCAL_1 \
|
#define SET_LOCAL_1 \
|
||||||
localmem[lid] = accumulator
|
localmem[lid] = accumulator
|
||||||
#define REDUCE_LOCAL_1 \
|
#define REDUCE_LOCAL_1 \
|
||||||
@ -316,14 +350,14 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset
|
|||||||
{
|
{
|
||||||
int lid = get_local_id(0);
|
int lid = get_local_id(0);
|
||||||
int gid = get_group_id(0);
|
int gid = get_group_id(0);
|
||||||
int id = get_global_id(0);
|
int id = get_global_id(0) * kercn;
|
||||||
|
|
||||||
srcptr += src_offset;
|
srcptr += src_offset;
|
||||||
|
|
||||||
DECLARE_LOCAL_MEM;
|
DECLARE_LOCAL_MEM;
|
||||||
DEFINE_ACCUMULATOR;
|
DEFINE_ACCUMULATOR;
|
||||||
|
|
||||||
for (int grain = groupnum * WGS; id < total; id += grain)
|
for (int grain = groupnum * WGS * kercn; id < total; id += grain)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_SRC_CONT
|
#ifdef HAVE_SRC_CONT
|
||||||
int src_index = mul24(id, srcTSIZE);
|
int src_index = mul24(id, srcTSIZE);
|
||||||
|
@ -645,7 +645,7 @@ namespace cv {
|
|||||||
|
|
||||||
static bool ocl_countNonZero( InputArray _src, int & res )
|
static bool ocl_countNonZero( InputArray _src, int & res )
|
||||||
{
|
{
|
||||||
int type = _src.type(), depth = CV_MAT_DEPTH(type);
|
int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = ocl::predictOptimalVectorWidth(_src);
|
||||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||||
|
|
||||||
if (depth == CV_64F && !doubleSupport)
|
if (depth == CV_64F && !doubleSupport)
|
||||||
@ -660,9 +660,10 @@ static bool ocl_countNonZero( InputArray _src, int & res )
|
|||||||
wgs2_aligned >>= 1;
|
wgs2_aligned >>= 1;
|
||||||
|
|
||||||
ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
|
ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
|
||||||
format("-D srcT=%s -D OP_COUNT_NON_ZERO -D WGS=%d "
|
format("-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO -D WGS=%d "
|
||||||
"-D WGS2_ALIGNED=%d%s%s",
|
"-D kercn=%d -D WGS2_ALIGNED=%d%s%s",
|
||||||
ocl::typeToStr(type), (int)wgs,
|
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
|
||||||
|
ocl::typeToStr(depth), (int)wgs, kercn,
|
||||||
wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
|
wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
|
||||||
_src.isContinuous() ? " -D HAVE_SRC_CONT" : ""));
|
_src.isContinuous() ? " -D HAVE_SRC_CONT" : ""));
|
||||||
if (k.empty())
|
if (k.empty())
|
||||||
|
Loading…
x
Reference in New Issue
Block a user