From 1f7f9c96821267559da29a04a2ef23fd79faf072 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 28 Oct 2013 18:04:34 +0400 Subject: [PATCH] rewrote and generalized ocl::threshold --- modules/ocl/src/imgproc.cpp | 96 +++++++-------- modules/ocl/src/opencl/imgproc_threshold.cl | 122 ++++---------------- modules/ocl/test/test_imgproc.cpp | 4 +- 3 files changed, 69 insertions(+), 153 deletions(-) diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index a2c685496..8ae9c643d 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -98,80 +98,66 @@ namespace cv ///////////////////////////////////////////////////////////////////////////////////// // threshold - typedef void (*gpuThresh_t)(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type); - - static void threshold_8u(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type) + static std::vector scalarToVector(const cv::Scalar & sc, int depth, int ocn, int cn) { - uchar thresh_uchar = cvFloor(thresh); - uchar max_val = cvRound(maxVal); + CV_Assert(ocn == cn || (ocn == 4 && cn == 3)); - size_t cols = (dst.cols + (dst.offset % 16) + 15) / 16; - size_t bSizeX = 16, bSizeY = 16; - size_t gSizeX = cols % bSizeX == 0 ? cols : (cols + bSizeX - 1) / bSizeX * bSizeX; - size_t gSizeY = dst.rows; - size_t globalThreads[3] = {gSizeX, gSizeY, 1}; - size_t localThreads[3] = {bSizeX, bSizeY, 1}; + static const int sizeMap[] = { sizeof(uchar), sizeof(char), sizeof(ushort), + sizeof(short), sizeof(int), sizeof(float), sizeof(double) }; - vector< pair > args; - args.push_back( make_pair(sizeof(cl_mem), &src.data)); - args.push_back( make_pair(sizeof(cl_mem), &dst.data)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.step)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step)); - args.push_back( make_pair(sizeof(cl_uchar), (void *)&thresh_uchar)); - args.push_back( make_pair(sizeof(cl_uchar), (void *)&max_val)); - args.push_back( make_pair(sizeof(cl_int), (void *)&type)); - openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args, src.oclchannels(), src.depth()); + int elemSize1 = sizeMap[depth]; + int bufSize = elemSize1 * ocn; + std::vector _buf(bufSize); + uchar * buf = &_buf[0]; + scalarToRawData(sc, buf, CV_MAKE_TYPE(depth, cn)); + memset(buf + elemSize1 * cn, 0, (ocn - cn) * elemSize1); + + return _buf; } - static void threshold_32f(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type) + static void threshold_runner(const oclMat &src, oclMat &dst, double thresh, double maxVal, int thresholdType) { - float thresh_f = thresh; - float max_val = maxVal; - int dst_offset = (dst.offset >> 2); - int dst_step = (dst.step >> 2); - int src_offset = (src.offset >> 2); - int src_step = (src.step >> 2); + bool ival = src.depth() < CV_32F; + std::vector thresholdValue = scalarToVector(cv::Scalar::all(ival ? cvFloor(thresh) : thresh), dst.depth(), + dst.oclchannels(), dst.channels()); + std::vector maxValue = scalarToVector(cv::Scalar::all(maxVal), dst.depth(), dst.oclchannels(), dst.channels()); - size_t cols = (dst.cols + (dst_offset & 3) + 3) / 4; - size_t bSizeX = 16, bSizeY = 16; - size_t gSizeX = cols % bSizeX == 0 ? cols : (cols + bSizeX - 1) / bSizeX * bSizeX; - size_t gSizeY = dst.rows; - size_t globalThreads[3] = {gSizeX, gSizeY, 1}; - size_t localThreads[3] = {bSizeX, bSizeY, 1}; + size_t localThreads[3] = { 16, 16, 1 }; + size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; + + const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC", + "THRESH_TOZERO", "THRESH_TOZERO_INV" }; + const char * const channelMap[] = { "", "", "2", "4", "4" }; + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + std::string buildOptions = format("-D T=%s%s -D %s", typeMap[src.depth()], channelMap[src.channels()], + thresholdMap[thresholdType]); + + int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize(); + int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize(); vector< pair > args; - args.push_back( make_pair(sizeof(cl_mem), &src.data)); - args.push_back( make_pair(sizeof(cl_mem), &dst.data)); + args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back( make_pair(sizeof(cl_int), (void *)&src_offset)); args.push_back( make_pair(sizeof(cl_int), (void *)&src_step)); + args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst_offset)); + args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step)); - args.push_back( make_pair(sizeof(cl_float), (void *)&thresh_f)); - args.push_back( make_pair(sizeof(cl_float), (void *)&max_val)); - args.push_back( make_pair(sizeof(cl_int), (void *)&type)); - - openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args, src.oclchannels(), src.depth()); + args.push_back( make_pair(thresholdValue.size(), (void *)&thresholdValue[0])); + args.push_back( make_pair(maxValue.size(), (void *)&maxValue[0])); + openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args, + -1, -1, buildOptions.c_str()); } - // threshold: support 8UC1 and 32FC1 data type and five threshold type - double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type) + double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int thresholdType) { - //TODO: These limitations shall be removed later. - CV_Assert(src.type() == CV_8UC1 || src.type() == CV_32FC1); - CV_Assert(type == THRESH_BINARY || type == THRESH_BINARY_INV || type == THRESH_TRUNC - || type == THRESH_TOZERO || type == THRESH_TOZERO_INV ); + CV_Assert(thresholdType == THRESH_BINARY || thresholdType == THRESH_BINARY_INV || thresholdType == THRESH_TRUNC + || thresholdType == THRESH_TOZERO || thresholdType == THRESH_TOZERO_INV); - static const gpuThresh_t gpuThresh_callers[2] = {threshold_8u, threshold_32f}; - - dst.create( src.size(), src.type() ); - gpuThresh_callers[(src.type() == CV_32FC1)](src, dst, thresh, maxVal, type); + dst.create(src.size(), src.type()); + threshold_runner(src, dst, thresh, maxVal, thresholdType); return thresh; } diff --git a/modules/ocl/src/opencl/imgproc_threshold.cl b/modules/ocl/src/opencl/imgproc_threshold.cl index 8d7c77e1f..81f2a7400 100644 --- a/modules/ocl/src/opencl/imgproc_threshold.cl +++ b/modules/ocl/src/opencl/imgproc_threshold.cl @@ -44,109 +44,37 @@ //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) #pragma OPENCL EXTENSION cl_khr_fp64:enable #endif +#endif -// threshold type: -// enum { THRESH_BINARY=0, THRESH_BINARY_INV=1, THRESH_TRUNC=2, THRESH_TOZERO=3, -// THRESH_TOZERO_INV=4, THRESH_MASK=7, THRESH_OTSU=8 }; - -__kernel void threshold_C1_D0(__global const uchar * restrict src, __global uchar *dst, - int src_offset, int src_step, - int dst_offset, int dst_rows, int dst_cols, int dst_step, - uchar thresh, uchar max_val, int thresh_type - ) +__kernel void threshold(__global const T * restrict src, int src_offset, int src_step, + __global T * dst, int dst_offset, int dst_step, + int rows, int cols, T thresh, T max_val) { int gx = get_global_id(0); - const int gy = get_global_id(1); + int gy = get_global_id(1); - int offset = (dst_offset & 15); - src_offset -= offset; - - int dstart = (gx << 4) - offset; - if(dstart < dst_cols && gy < dst_rows) + if (gx < cols && gy < rows) { - uchar16 sdata = vload16(gx, src+src_offset+gy*src_step); - uchar16 ddata; - uchar16 zero = 0; - switch (thresh_type) - { - case 0: - ddata = ((sdata > thresh) ) ? (uchar16)(max_val) : (uchar16)(0); - break; - case 1: - ddata = ((sdata > thresh)) ? zero : (uchar16)(max_val); - break; - case 2: - ddata = ((sdata > thresh)) ? (uchar16)(thresh) : sdata; - break; - case 3: - ddata = ((sdata > thresh)) ? sdata : zero; - break; - case 4: - ddata = ((sdata > thresh)) ? zero : sdata; - break; - default: - ddata = sdata; - } - int16 dpos = (int16)(dstart, dstart+1, dstart+2, dstart+3, dstart+4, dstart+5, dstart+6, dstart+7, dstart+8, - dstart+9, dstart+10, dstart+11, dstart+12, dstart+13, dstart+14, dstart+15); - uchar16 dVal = *(__global uchar16*)(dst+dst_offset+gy*dst_step+dstart); - int16 con = dpos >= 0 && dpos < dst_cols; - ddata = convert_uchar16(con != 0) ? ddata : dVal; - if(dstart < dst_cols) - { - *(__global uchar16*)(dst+dst_offset+gy*dst_step+dstart) = ddata; - } - } -} - - -__kernel void threshold_C1_D5(__global const float * restrict src, __global float *dst, - int src_offset, int src_step, - int dst_offset, int dst_rows, int dst_cols, int dst_step, - float thresh, float max_val, int thresh_type - ) -{ - const int gx = get_global_id(0); - const int gy = get_global_id(1); - - int offset = (dst_offset & 3); - src_offset -= offset; - - int dstart = (gx << 2) - offset; - if(dstart < dst_cols && gy < dst_rows) - { - float4 sdata = vload4(gx, src+src_offset+gy*src_step); - float4 ddata; - float4 zero = 0; - switch (thresh_type) - { - case 0: - ddata = sdata > thresh ? (float4)(max_val) : (float4)(0.f); - break; - case 1: - ddata = sdata > thresh ? zero : (float4)max_val; - break; - case 2: - ddata = sdata > thresh ? (float4)thresh : sdata; - break; - case 3: - ddata = sdata > thresh ? sdata : (float4)(0.f); - break; - case 4: - ddata = sdata > thresh ? (float4)(0.f) : sdata; - break; - default: - ddata = sdata; - } - int4 dpos = (int4)(dstart, dstart+1, dstart+2, dstart+3); - float4 dVal = *(__global float4*)(dst+dst_offset+gy*dst_step+dstart); - int4 con = dpos >= 0 && dpos < dst_cols; - ddata = convert_float4(con) != (float4)(0) ? ddata : dVal; - if(dstart < dst_cols) - { - *(__global float4*)(dst+dst_offset+gy*dst_step+dstart) = ddata; - } + int src_index = mad24(gy, src_step, src_offset + gx); + int dst_index = mad24(gy, dst_step, dst_offset + gx); + + T sdata = src[src_index], zero = (T)(0); + +#ifdef THRESH_BINARY + dst[dst_index] = sdata > thresh ? max_val : zero; +#elif defined THRESH_BINARY_INV + dst[dst_index] = sdata > thresh ? zero : max_val; +#elif defined THRESH_TRUNC + dst[dst_index] = sdata > thresh ? thresh : sdata; +#elif defined THRESH_TOZERO + dst[dst_index] = sdata > thresh ? sdata : zero; +#elif defined THRESH_TOZERO_INV + dst[dst_index] = sdata > thresh ? zero : sdata; +#endif } } diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp index eb983fb17..c37f0377a 100644 --- a/modules/ocl/test/test_imgproc.cpp +++ b/modules/ocl/test/test_imgproc.cpp @@ -502,7 +502,9 @@ INSTANTIATE_TEST_CASE_P(Imgproc, Integral, Combine( Bool())); INSTANTIATE_TEST_CASE_P(Imgproc, Threshold, Combine( - Values(CV_8UC1, CV_32FC1), + Values(CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4, + CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4, + CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4), Values(0), Values(ThreshOp(THRESH_BINARY), ThreshOp(THRESH_BINARY_INV), ThreshOp(THRESH_TRUNC),