rewrote and generalized ocl::threshold
This commit is contained in:
parent
d3bcf609f1
commit
1f7f9c9682
@ -98,80 +98,66 @@ namespace cv
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// threshold
|
||||
|
||||
typedef void (*gpuThresh_t)(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type);
|
||||
|
||||
static void threshold_8u(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type)
|
||||
static std::vector<uchar> scalarToVector(const cv::Scalar & sc, int depth, int ocn, int cn)
|
||||
{
|
||||
uchar thresh_uchar = cvFloor(thresh);
|
||||
uchar max_val = cvRound(maxVal);
|
||||
CV_Assert(ocn == cn || (ocn == 4 && cn == 3));
|
||||
|
||||
size_t cols = (dst.cols + (dst.offset % 16) + 15) / 16;
|
||||
size_t bSizeX = 16, bSizeY = 16;
|
||||
size_t gSizeX = cols % bSizeX == 0 ? cols : (cols + bSizeX - 1) / bSizeX * bSizeX;
|
||||
size_t gSizeY = dst.rows;
|
||||
size_t globalThreads[3] = {gSizeX, gSizeY, 1};
|
||||
size_t localThreads[3] = {bSizeX, bSizeY, 1};
|
||||
static const int sizeMap[] = { sizeof(uchar), sizeof(char), sizeof(ushort),
|
||||
sizeof(short), sizeof(int), sizeof(float), sizeof(double) };
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
args.push_back( make_pair(sizeof(cl_mem), &src.data));
|
||||
args.push_back( make_pair(sizeof(cl_mem), &dst.data));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&src.step));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
|
||||
args.push_back( make_pair(sizeof(cl_uchar), (void *)&thresh_uchar));
|
||||
args.push_back( make_pair(sizeof(cl_uchar), (void *)&max_val));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&type));
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args, src.oclchannels(), src.depth());
|
||||
int elemSize1 = sizeMap[depth];
|
||||
int bufSize = elemSize1 * ocn;
|
||||
std::vector<uchar> _buf(bufSize);
|
||||
uchar * buf = &_buf[0];
|
||||
scalarToRawData(sc, buf, CV_MAKE_TYPE(depth, cn));
|
||||
memset(buf + elemSize1 * cn, 0, (ocn - cn) * elemSize1);
|
||||
|
||||
return _buf;
|
||||
}
|
||||
|
||||
static void threshold_32f(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type)
|
||||
static void threshold_runner(const oclMat &src, oclMat &dst, double thresh, double maxVal, int thresholdType)
|
||||
{
|
||||
float thresh_f = thresh;
|
||||
float max_val = maxVal;
|
||||
int dst_offset = (dst.offset >> 2);
|
||||
int dst_step = (dst.step >> 2);
|
||||
int src_offset = (src.offset >> 2);
|
||||
int src_step = (src.step >> 2);
|
||||
bool ival = src.depth() < CV_32F;
|
||||
std::vector<uchar> thresholdValue = scalarToVector(cv::Scalar::all(ival ? cvFloor(thresh) : thresh), dst.depth(),
|
||||
dst.oclchannels(), dst.channels());
|
||||
std::vector<uchar> maxValue = scalarToVector(cv::Scalar::all(maxVal), dst.depth(), dst.oclchannels(), dst.channels());
|
||||
|
||||
size_t cols = (dst.cols + (dst_offset & 3) + 3) / 4;
|
||||
size_t bSizeX = 16, bSizeY = 16;
|
||||
size_t gSizeX = cols % bSizeX == 0 ? cols : (cols + bSizeX - 1) / bSizeX * bSizeX;
|
||||
size_t gSizeY = dst.rows;
|
||||
size_t globalThreads[3] = {gSizeX, gSizeY, 1};
|
||||
size_t localThreads[3] = {bSizeX, bSizeY, 1};
|
||||
size_t localThreads[3] = { 16, 16, 1 };
|
||||
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
|
||||
|
||||
const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
|
||||
"THRESH_TOZERO", "THRESH_TOZERO_INV" };
|
||||
const char * const channelMap[] = { "", "", "2", "4", "4" };
|
||||
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
|
||||
std::string buildOptions = format("-D T=%s%s -D %s", typeMap[src.depth()], channelMap[src.channels()],
|
||||
thresholdMap[thresholdType]);
|
||||
|
||||
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
|
||||
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
args.push_back( make_pair(sizeof(cl_mem), &src.data));
|
||||
args.push_back( make_pair(sizeof(cl_mem), &dst.data));
|
||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&src_offset));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&src_step));
|
||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_offset));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step));
|
||||
args.push_back( make_pair(sizeof(cl_float), (void *)&thresh_f));
|
||||
args.push_back( make_pair(sizeof(cl_float), (void *)&max_val));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&type));
|
||||
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args, src.oclchannels(), src.depth());
|
||||
args.push_back( make_pair(thresholdValue.size(), (void *)&thresholdValue[0]));
|
||||
args.push_back( make_pair(maxValue.size(), (void *)&maxValue[0]));
|
||||
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args,
|
||||
-1, -1, buildOptions.c_str());
|
||||
}
|
||||
|
||||
// threshold: support 8UC1 and 32FC1 data type and five threshold type
|
||||
double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type)
|
||||
double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int thresholdType)
|
||||
{
|
||||
//TODO: These limitations shall be removed later.
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_32FC1);
|
||||
CV_Assert(type == THRESH_BINARY || type == THRESH_BINARY_INV || type == THRESH_TRUNC
|
||||
|| type == THRESH_TOZERO || type == THRESH_TOZERO_INV );
|
||||
CV_Assert(thresholdType == THRESH_BINARY || thresholdType == THRESH_BINARY_INV || thresholdType == THRESH_TRUNC
|
||||
|| thresholdType == THRESH_TOZERO || thresholdType == THRESH_TOZERO_INV);
|
||||
|
||||
static const gpuThresh_t gpuThresh_callers[2] = {threshold_8u, threshold_32f};
|
||||
|
||||
dst.create( src.size(), src.type() );
|
||||
gpuThresh_callers[(src.type() == CV_32FC1)](src, dst, thresh, maxVal, type);
|
||||
dst.create(src.size(), src.type());
|
||||
threshold_runner(src, dst, thresh, maxVal, thresholdType);
|
||||
|
||||
return thresh;
|
||||
}
|
||||
|
@ -44,109 +44,37 @@
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// threshold type:
|
||||
// enum { THRESH_BINARY=0, THRESH_BINARY_INV=1, THRESH_TRUNC=2, THRESH_TOZERO=3,
|
||||
// THRESH_TOZERO_INV=4, THRESH_MASK=7, THRESH_OTSU=8 };
|
||||
|
||||
__kernel void threshold_C1_D0(__global const uchar * restrict src, __global uchar *dst,
|
||||
int src_offset, int src_step,
|
||||
int dst_offset, int dst_rows, int dst_cols, int dst_step,
|
||||
uchar thresh, uchar max_val, int thresh_type
|
||||
)
|
||||
__kernel void threshold(__global const T * restrict src, int src_offset, int src_step,
|
||||
__global T * dst, int dst_offset, int dst_step,
|
||||
int rows, int cols, T thresh, T max_val)
|
||||
{
|
||||
int gx = get_global_id(0);
|
||||
const int gy = get_global_id(1);
|
||||
int gy = get_global_id(1);
|
||||
|
||||
int offset = (dst_offset & 15);
|
||||
src_offset -= offset;
|
||||
|
||||
int dstart = (gx << 4) - offset;
|
||||
if(dstart < dst_cols && gy < dst_rows)
|
||||
if (gx < cols && gy < rows)
|
||||
{
|
||||
uchar16 sdata = vload16(gx, src+src_offset+gy*src_step);
|
||||
uchar16 ddata;
|
||||
uchar16 zero = 0;
|
||||
switch (thresh_type)
|
||||
{
|
||||
case 0:
|
||||
ddata = ((sdata > thresh) ) ? (uchar16)(max_val) : (uchar16)(0);
|
||||
break;
|
||||
case 1:
|
||||
ddata = ((sdata > thresh)) ? zero : (uchar16)(max_val);
|
||||
break;
|
||||
case 2:
|
||||
ddata = ((sdata > thresh)) ? (uchar16)(thresh) : sdata;
|
||||
break;
|
||||
case 3:
|
||||
ddata = ((sdata > thresh)) ? sdata : zero;
|
||||
break;
|
||||
case 4:
|
||||
ddata = ((sdata > thresh)) ? zero : sdata;
|
||||
break;
|
||||
default:
|
||||
ddata = sdata;
|
||||
}
|
||||
int16 dpos = (int16)(dstart, dstart+1, dstart+2, dstart+3, dstart+4, dstart+5, dstart+6, dstart+7, dstart+8,
|
||||
dstart+9, dstart+10, dstart+11, dstart+12, dstart+13, dstart+14, dstart+15);
|
||||
uchar16 dVal = *(__global uchar16*)(dst+dst_offset+gy*dst_step+dstart);
|
||||
int16 con = dpos >= 0 && dpos < dst_cols;
|
||||
ddata = convert_uchar16(con != 0) ? ddata : dVal;
|
||||
if(dstart < dst_cols)
|
||||
{
|
||||
*(__global uchar16*)(dst+dst_offset+gy*dst_step+dstart) = ddata;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void threshold_C1_D5(__global const float * restrict src, __global float *dst,
|
||||
int src_offset, int src_step,
|
||||
int dst_offset, int dst_rows, int dst_cols, int dst_step,
|
||||
float thresh, float max_val, int thresh_type
|
||||
)
|
||||
{
|
||||
const int gx = get_global_id(0);
|
||||
const int gy = get_global_id(1);
|
||||
|
||||
int offset = (dst_offset & 3);
|
||||
src_offset -= offset;
|
||||
|
||||
int dstart = (gx << 2) - offset;
|
||||
if(dstart < dst_cols && gy < dst_rows)
|
||||
{
|
||||
float4 sdata = vload4(gx, src+src_offset+gy*src_step);
|
||||
float4 ddata;
|
||||
float4 zero = 0;
|
||||
switch (thresh_type)
|
||||
{
|
||||
case 0:
|
||||
ddata = sdata > thresh ? (float4)(max_val) : (float4)(0.f);
|
||||
break;
|
||||
case 1:
|
||||
ddata = sdata > thresh ? zero : (float4)max_val;
|
||||
break;
|
||||
case 2:
|
||||
ddata = sdata > thresh ? (float4)thresh : sdata;
|
||||
break;
|
||||
case 3:
|
||||
ddata = sdata > thresh ? sdata : (float4)(0.f);
|
||||
break;
|
||||
case 4:
|
||||
ddata = sdata > thresh ? (float4)(0.f) : sdata;
|
||||
break;
|
||||
default:
|
||||
ddata = sdata;
|
||||
}
|
||||
int4 dpos = (int4)(dstart, dstart+1, dstart+2, dstart+3);
|
||||
float4 dVal = *(__global float4*)(dst+dst_offset+gy*dst_step+dstart);
|
||||
int4 con = dpos >= 0 && dpos < dst_cols;
|
||||
ddata = convert_float4(con) != (float4)(0) ? ddata : dVal;
|
||||
if(dstart < dst_cols)
|
||||
{
|
||||
*(__global float4*)(dst+dst_offset+gy*dst_step+dstart) = ddata;
|
||||
}
|
||||
int src_index = mad24(gy, src_step, src_offset + gx);
|
||||
int dst_index = mad24(gy, dst_step, dst_offset + gx);
|
||||
|
||||
T sdata = src[src_index], zero = (T)(0);
|
||||
|
||||
#ifdef THRESH_BINARY
|
||||
dst[dst_index] = sdata > thresh ? max_val : zero;
|
||||
#elif defined THRESH_BINARY_INV
|
||||
dst[dst_index] = sdata > thresh ? zero : max_val;
|
||||
#elif defined THRESH_TRUNC
|
||||
dst[dst_index] = sdata > thresh ? thresh : sdata;
|
||||
#elif defined THRESH_TOZERO
|
||||
dst[dst_index] = sdata > thresh ? sdata : zero;
|
||||
#elif defined THRESH_TOZERO_INV
|
||||
dst[dst_index] = sdata > thresh ? zero : sdata;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -502,7 +502,9 @@ INSTANTIATE_TEST_CASE_P(Imgproc, Integral, Combine(
|
||||
Bool()));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Imgproc, Threshold, Combine(
|
||||
Values(CV_8UC1, CV_32FC1),
|
||||
Values(CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4,
|
||||
CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4,
|
||||
CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4),
|
||||
Values(0),
|
||||
Values(ThreshOp(THRESH_BINARY),
|
||||
ThreshOp(THRESH_BINARY_INV), ThreshOp(THRESH_TRUNC),
|
||||
|
Loading…
Reference in New Issue
Block a user