Merge pull request #1724 from ilya-lavrenov:ocl_thresh
This commit is contained in:
commit
bf58049d11
modules/ocl
@ -366,21 +366,23 @@ PERF_TEST_P(resizeFixture, resize,
|
|||||||
|
|
||||||
///////////// threshold////////////////////////
|
///////////// threshold////////////////////////
|
||||||
|
|
||||||
CV_ENUM(ThreshType, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV)
|
CV_ENUM(ThreshType, THRESH_BINARY, THRESH_TOZERO_INV)
|
||||||
|
|
||||||
typedef tuple<Size, ThreshType> ThreshParams;
|
typedef tuple<Size, MatType, ThreshType> ThreshParams;
|
||||||
typedef TestBaseWithParam<ThreshParams> ThreshFixture;
|
typedef TestBaseWithParam<ThreshParams> ThreshFixture;
|
||||||
|
|
||||||
PERF_TEST_P(ThreshFixture, threshold,
|
PERF_TEST_P(ThreshFixture, threshold,
|
||||||
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
|
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
|
||||||
|
OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC4, CV_32FC1),
|
||||||
ThreshType::all()))
|
ThreshType::all()))
|
||||||
{
|
{
|
||||||
const ThreshParams params = GetParam();
|
const ThreshParams params = GetParam();
|
||||||
const Size srcSize = get<0>(params);
|
const Size srcSize = get<0>(params);
|
||||||
const int threshType = get<1>(params);
|
const int srcType = get<1>(params);
|
||||||
|
const int threshType = get<2>(params);
|
||||||
const double maxValue = 220.0, threshold = 50;
|
const double maxValue = 220.0, threshold = 50;
|
||||||
|
|
||||||
Mat src(srcSize, CV_8U), dst(srcSize, CV_8U);
|
Mat src(srcSize, srcType), dst(srcSize, srcType);
|
||||||
randu(src, 0, 100);
|
randu(src, 0, 100);
|
||||||
declare.in(src).out(dst);
|
declare.in(src).out(dst);
|
||||||
|
|
||||||
|
@ -118,22 +118,20 @@ namespace cv
|
|||||||
static void threshold_runner(const oclMat &src, oclMat &dst, double thresh, double maxVal, int thresholdType)
|
static void threshold_runner(const oclMat &src, oclMat &dst, double thresh, double maxVal, int thresholdType)
|
||||||
{
|
{
|
||||||
bool ival = src.depth() < CV_32F;
|
bool ival = src.depth() < CV_32F;
|
||||||
|
int cn = src.channels(), vecSize = 4, depth = src.depth();
|
||||||
std::vector<uchar> thresholdValue = scalarToVector(cv::Scalar::all(ival ? cvFloor(thresh) : thresh), dst.depth(),
|
std::vector<uchar> thresholdValue = scalarToVector(cv::Scalar::all(ival ? cvFloor(thresh) : thresh), dst.depth(),
|
||||||
dst.oclchannels(), dst.channels());
|
dst.oclchannels(), dst.channels());
|
||||||
std::vector<uchar> maxValue = scalarToVector(cv::Scalar::all(maxVal), dst.depth(), dst.oclchannels(), dst.channels());
|
std::vector<uchar> maxValue = scalarToVector(cv::Scalar::all(maxVal), dst.depth(), dst.oclchannels(), dst.channels());
|
||||||
|
|
||||||
size_t localThreads[3] = { 16, 16, 1 };
|
|
||||||
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
|
|
||||||
|
|
||||||
const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
|
const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
|
||||||
"THRESH_TOZERO", "THRESH_TOZERO_INV" };
|
"THRESH_TOZERO", "THRESH_TOZERO_INV" };
|
||||||
const char * const channelMap[] = { "", "", "2", "4", "4" };
|
const char * const channelMap[] = { "", "", "2", "4", "4" };
|
||||||
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
|
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
|
||||||
std::string buildOptions = format("-D T=%s%s -D %s", typeMap[src.depth()], channelMap[src.channels()],
|
std::string buildOptions = format("-D T=%s%s -D %s", typeMap[depth], channelMap[cn], thresholdMap[thresholdType]);
|
||||||
thresholdMap[thresholdType]);
|
|
||||||
|
|
||||||
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
|
int elemSize = src.elemSize();
|
||||||
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
|
int src_step = src.step / elemSize, src_offset = src.offset / elemSize;
|
||||||
|
int dst_step = dst.step / elemSize, dst_offset = dst.offset / elemSize;
|
||||||
|
|
||||||
vector< pair<size_t, const void *> > args;
|
vector< pair<size_t, const void *> > args;
|
||||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
|
||||||
@ -142,11 +140,32 @@ namespace cv
|
|||||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
|
||||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_offset));
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_offset));
|
||||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step));
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step));
|
||||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
|
|
||||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
|
|
||||||
args.push_back( make_pair(thresholdValue.size(), (void *)&thresholdValue[0]));
|
args.push_back( make_pair(thresholdValue.size(), (void *)&thresholdValue[0]));
|
||||||
args.push_back( make_pair(maxValue.size(), (void *)&maxValue[0]));
|
args.push_back( make_pair(maxValue.size(), (void *)&maxValue[0]));
|
||||||
|
|
||||||
|
int max_index = dst.cols, cols = dst.cols;
|
||||||
|
if (cn == 1 && vecSize > 1)
|
||||||
|
{
|
||||||
|
CV_Assert(((vecSize - 1) & vecSize) == 0 && vecSize <= 16);
|
||||||
|
cols = divUp(cols, vecSize);
|
||||||
|
buildOptions += format(" -D VECTORIZED -D VT=%s%d -D VLOADN=vload%d -D VECSIZE=%d -D VSTOREN=vstore%d",
|
||||||
|
typeMap[depth], vecSize, vecSize, vecSize, vecSize);
|
||||||
|
|
||||||
|
int vecSizeBytes = vecSize * dst.elemSize1();
|
||||||
|
if ((dst.offset % dst.step) % vecSizeBytes == 0 && dst.step % vecSizeBytes == 0)
|
||||||
|
buildOptions += " -D DST_ALIGNED";
|
||||||
|
if ((src.offset % src.step) % vecSizeBytes == 0 && src.step % vecSizeBytes == 0)
|
||||||
|
buildOptions += " -D SRC_ALIGNED";
|
||||||
|
|
||||||
|
args.push_back( make_pair(sizeof(cl_int), (void *)&max_index));
|
||||||
|
}
|
||||||
|
|
||||||
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
|
||||||
|
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
|
||||||
|
|
||||||
|
size_t localThreads[3] = { 16, 16, 1 };
|
||||||
|
size_t globalThreads[3] = { cols, dst.rows, 1 };
|
||||||
|
|
||||||
openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args,
|
openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args,
|
||||||
-1, -1, buildOptions.c_str());
|
-1, -1, buildOptions.c_str());
|
||||||
}
|
}
|
||||||
|
@ -51,9 +51,63 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef VECTORIZED
|
||||||
|
|
||||||
__kernel void threshold(__global const T * restrict src, int src_offset, int src_step,
|
__kernel void threshold(__global const T * restrict src, int src_offset, int src_step,
|
||||||
__global T * dst, int dst_offset, int dst_step,
|
__global T * dst, int dst_offset, int dst_step,
|
||||||
int rows, int cols, T thresh, T max_val)
|
T thresh, T max_val, int max_index, int rows, int cols)
|
||||||
|
{
|
||||||
|
int gx = get_global_id(0);
|
||||||
|
int gy = get_global_id(1);
|
||||||
|
|
||||||
|
if (gx < cols && gy < rows)
|
||||||
|
{
|
||||||
|
gx *= VECSIZE;
|
||||||
|
int src_index = mad24(gy, src_step, src_offset + gx);
|
||||||
|
int dst_index = mad24(gy, dst_step, dst_offset + gx);
|
||||||
|
|
||||||
|
#ifdef SRC_ALIGNED
|
||||||
|
VT sdata = *((__global VT *)(src + src_index));
|
||||||
|
#else
|
||||||
|
VT sdata = VLOADN(0, src + src_index);
|
||||||
|
#endif
|
||||||
|
VT vthresh = (VT)(thresh), zero = (VT)(0);
|
||||||
|
|
||||||
|
#ifdef THRESH_BINARY
|
||||||
|
VT vecValue = sdata > vthresh ? max_val : zero;
|
||||||
|
#elif defined THRESH_BINARY_INV
|
||||||
|
VT vecValue = sdata > vthresh ? zero : max_val;
|
||||||
|
#elif defined THRESH_TRUNC
|
||||||
|
VT vecValue = sdata > vthresh ? thresh : sdata;
|
||||||
|
#elif defined THRESH_TOZERO
|
||||||
|
VT vecValue = sdata > vthresh ? sdata : zero;
|
||||||
|
#elif defined THRESH_TOZERO_INV
|
||||||
|
VT vecValue = sdata > vthresh ? zero : sdata;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (gx + VECSIZE <= max_index)
|
||||||
|
#ifdef DST_ALIGNED
|
||||||
|
*(__global VT*)(dst + dst_index) = vecValue;
|
||||||
|
#else
|
||||||
|
VSTOREN(vecValue, 0, dst + dst_index);
|
||||||
|
#endif
|
||||||
|
else
|
||||||
|
{
|
||||||
|
T array[VECSIZE];
|
||||||
|
VSTOREN(vecValue, 0, array);
|
||||||
|
#pragma unroll
|
||||||
|
for (int i = 0; i < VECSIZE; ++i)
|
||||||
|
if (gx + i < max_index)
|
||||||
|
dst[dst_index + i] = array[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
__kernel void threshold(__global const T * restrict src, int src_offset, int src_step,
|
||||||
|
__global T * dst, int dst_offset, int dst_step,
|
||||||
|
T thresh, T max_val, int rows, int cols)
|
||||||
{
|
{
|
||||||
int gx = get_global_id(0);
|
int gx = get_global_id(0);
|
||||||
int gy = get_global_id(1);
|
int gy = get_global_id(1);
|
||||||
@ -78,3 +132,5 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user