diff --git a/modules/ocl/src/columnsum.cpp b/modules/ocl/src/columnsum.cpp index 46ff73d22..f0beed43d 100644 --- a/modules/ocl/src/columnsum.cpp +++ b/modules/ocl/src/columnsum.cpp @@ -52,25 +52,24 @@ using namespace cv::ocl; void cv::ocl::columnSum(const oclMat &src, oclMat &dst) { CV_Assert(src.type() == CV_32FC1); - dst.create(src.size(), src.type()); - Context *clCxt = src.clCxt; - - const std::string kernelName = "columnSum"; + int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize(); + int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize(); std::vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src_step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset)); size_t globalThreads[3] = {dst.cols, 1, 1}; size_t localThreads[3] = {256, 1, 1}; - openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); + openCLExecuteKernel(src.clCxt, &imgproc_columnsum, "columnSum", globalThreads, localThreads, args, src.oclchannels(), src.depth()); } diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index 916d25ca9..10b680486 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -183,111 +183,89 @@ namespace cv void remap( const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int borderType, const Scalar &borderValue ) { Context *clCxt = src.clCxt; + bool supportsDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE); + if (!supportsDouble && src.depth() == CV_64F) + { + CV_Error(CV_OpenCLDoubleNotSupported, "Selected device does not support double"); + return; + } + CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4); - CV_Assert((map1.type() == CV_16SC2 && !map2.data) || (map1.type() == CV_32FC2 && !map2.data) || (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)); + CV_Assert((map1.type() == CV_16SC2 && !map2.data) || (map1.type() == CV_32FC2 && !map2.data) || + (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)); CV_Assert(!map2.data || map2.size() == map1.size()); - CV_Assert(dst.size() == map1.size()); + CV_Assert(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE || borderType == BORDER_WRAP + || borderType == BORDER_REFLECT_101 || borderType == BORDER_REFLECT); dst.create(map1.size(), src.type()); - string kernelName; + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + const char * const channelMap[] = { "", "", "2", "4", "4" }; + const char * const interMap[] = { "INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC", "INTER_LINEAR", "INTER_LANCZOS" }; + const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", + "BORDER_REFLECT_101", "BORDER_TRANSPARENT" }; + string kernelName = "remap"; if ( map1.type() == CV_32FC2 && !map2.data ) - { - if (interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT) - kernelName = "remapLNFConstant"; - else if (interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT) - kernelName = "remapNNFConstant"; - } + kernelName += "_32FC2"; else if (map1.type() == CV_16SC2 && !map2.data) - { - if (interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT) - kernelName = "remapLNSConstant"; - else if (interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT) - kernelName = "remapNNSConstant"; - - } + kernelName += "_16SC2"; else if (map1.type() == CV_32FC1 && map2.type() == CV_32FC1) - { - if (interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT) - kernelName = "remapLNF1Constant"; - else if (interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT) - kernelName = "remapNNF1Constant"; - } - - size_t blkSizeX = 16, blkSizeY = 16; - size_t glbSizeX; - int cols = dst.cols; - if (src.type() == CV_8UC1) - { - cols = (dst.cols + dst.offset % 4 + 3) / 4; - glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX; - - } - else if (src.type() == CV_32FC1 && interpolation == INTER_LINEAR) - { - cols = (dst.cols + (dst.offset >> 2) % 4 + 3) / 4; - glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX; - } + kernelName += "_2_32FC1"; else - glbSizeX = dst.cols % blkSizeX == 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX; + CV_Error(CV_StsBadArg, "Unsupported map types"); - size_t glbSizeY = dst.rows % blkSizeY == 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY; - size_t globalThreads[3] = {glbSizeX, glbSizeY, 1}; - size_t localThreads[3] = {blkSizeX, blkSizeY, 1}; + int ocn = dst.oclchannels(); + size_t localThreads[3] = { 16, 16, 1}; + size_t globalThreads[3] = { dst.cols, dst.rows, 1}; + + Mat scalar(1, 1, CV_MAKE_TYPE(dst.depth(), ocn), borderValue); + std::string buildOptions = format("-D %s -D %s -D T=%s%s", interMap[interpolation], + borderMap[borderType], typeMap[src.depth()], channelMap[ocn]); + + if (interpolation != INTER_NEAREST) + { + int wdepth = std::max(CV_32F, dst.depth()); + if (!supportsDouble) + wdepth = std::min(CV_32F, wdepth); + + buildOptions += format(" -D WT=%s%s -D convertToT=convert_%s%s%s -D convertToWT=convert_%s%s" + " -D convertToWT2=convert_%s2 -D WT2=%s2", + typeMap[wdepth], channelMap[ocn], + typeMap[src.depth()], channelMap[ocn], src.depth() < CV_32F ? "_sat_rte" : "", + typeMap[wdepth], channelMap[ocn], + typeMap[wdepth], typeMap[wdepth]); + } + + int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize(); + int map1_step = map1.step / map1.elemSize(), map1_offset = map1.offset / map1.elemSize(); + int map2_step = map2.step / map2.elemSize(), map2_offset = map2.offset / map2.elemSize(); + int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize(); - float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]}; vector< pair > args; - if (map1.channels() == 2) - { - args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data)); - args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data)); - args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset)); - args.push_back( make_pair(sizeof(cl_int), (void *)&map1.offset)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.step)); - args.push_back( make_pair(sizeof(cl_int), (void *)&map1.step)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows)); - args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols)); - args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows)); - args.push_back( make_pair(sizeof(cl_int), (void *)&cols)); - - if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) - args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue)); - else - args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat)); - } - if (map1.channels() == 1) - { - args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data)); - args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data)); - args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data)); + args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data)); + args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data)); + args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data)); + if (!map2.empty()) args.push_back( make_pair(sizeof(cl_mem), (void *)&map2.data)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset)); - args.push_back( make_pair(sizeof(cl_int), (void *)&map1.offset)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.step)); - args.push_back( make_pair(sizeof(cl_int), (void *)&map1.step)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows)); - args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols)); - args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows)); - args.push_back( make_pair(sizeof(cl_int), (void *)&cols)); - if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) - args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue)); - else - args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat)); - } - openCLExecuteKernel(clCxt, &imgproc_remap, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); + args.push_back( make_pair(sizeof(cl_int), (void *)&src_offset)); + args.push_back( make_pair(sizeof(cl_int), (void *)&dst_offset)); + args.push_back( make_pair(sizeof(cl_int), (void *)&map1_offset)); + if (!map2.empty()) + args.push_back( make_pair(sizeof(cl_int), (void *)&map2_offset)); + args.push_back( make_pair(sizeof(cl_int), (void *)&src_step)); + args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step)); + args.push_back( make_pair(sizeof(cl_int), (void *)&map1_step)); + if (!map2.empty()) + args.push_back( make_pair(sizeof(cl_int), (void *)&map2_step)); + args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols)); + args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows)); + args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); + args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows)); + args.push_back( make_pair(scalar.elemSize(), (void *)scalar.data)); + + openCLExecuteKernel(clCxt, &imgproc_remap, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); } //////////////////////////////////////////////////////////////////////////////////////////// @@ -448,31 +426,47 @@ namespace cv void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int bordertype, const Scalar &scalar) { - CV_Assert(top >= 0 && bottom >= 0 && left >= 0 && right >= 0); - if ((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { - if (((bordertype & cv::BORDER_ISOLATED) == 0) && - (bordertype != cv::BORDER_CONSTANT) && - (bordertype != cv::BORDER_REPLICATE)) - { - CV_Error(CV_StsBadArg, "Unsupported border type"); - } + CV_Error(CV_OpenCLDoubleNotSupported, "Selected device does not support double"); + return; } + oclMat _src = src; + + CV_Assert(top >= 0 && bottom >= 0 && left >= 0 && right >= 0); + + if( _src.offset != 0 && (bordertype & BORDER_ISOLATED) == 0 ) + { + Size wholeSize; + Point ofs; + _src.locateROI(wholeSize, ofs); + int dtop = std::min(ofs.y, top); + int dbottom = std::min(wholeSize.height - _src.rows - ofs.y, bottom); + int dleft = std::min(ofs.x, left); + int dright = std::min(wholeSize.width - _src.cols - ofs.x, right); + _src.adjustROI(dtop, dbottom, dleft, dright); + top -= dtop; + left -= dleft; + bottom -= dbottom; + right -= dright; + } bordertype &= ~cv::BORDER_ISOLATED; + + // TODO need to remove this conditions and fix the code if (bordertype == cv::BORDER_REFLECT || bordertype == cv::BORDER_WRAP) { - CV_Assert((src.cols >= left) && (src.cols >= right) && (src.rows >= top) && (src.rows >= bottom)); + CV_Assert((_src.cols >= left) && (_src.cols >= right) && (_src.rows >= top) && (_src.rows >= bottom)); } else if (bordertype == cv::BORDER_REFLECT_101) { - CV_Assert((src.cols > left) && (src.cols > right) && (src.rows > top) && (src.rows > bottom)); + CV_Assert((_src.cols > left) && (_src.cols > right) && (_src.rows > top) && (_src.rows > bottom)); } - dst.create(src.rows + top + bottom, src.cols + left + right, src.type()); - int srcStep = src.step1() / src.oclchannels(), dstStep = dst.step1() / dst.oclchannels(); - int srcOffset = src.offset / src.elemSize(), dstOffset = dst.offset / dst.elemSize(); - int depth = src.depth(), ochannels = src.oclchannels(); + dst.create(_src.rows + top + bottom, _src.cols + left + right, _src.type()); + int srcStep = _src.step1() / _src.oclchannels(), dstStep = dst.step1() / dst.oclchannels(); + int srcOffset = _src.offset / _src.elemSize(), dstOffset = dst.offset / dst.elemSize(); + int depth = _src.depth(), ochannels = _src.oclchannels(); int __bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, BORDER_REFLECT, BORDER_WRAP, BORDER_REFLECT_101}; const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"}; @@ -483,19 +477,19 @@ namespace cv break; if (bordertype_index == sizeof(__bordertype) / sizeof(int)) - CV_Error(CV_StsBadArg, "unsupported border type"); + CV_Error(CV_StsBadArg, "Unsupported border type"); string kernelName = "copymakeborder"; size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&_src.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols)); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&_src.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&_src.rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep)); args.push_back( make_pair( sizeof(cl_int), (void *)&srcOffset)); args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep)); @@ -1314,6 +1308,8 @@ namespace cv args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&clipLimit )); args.push_back( std::make_pair( sizeof(cl_float), (void *)&lutScale )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.offset )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset )); String kernelName = "calcLut"; size_t localThreads[3] = { 32, 8, 1 }; @@ -1333,7 +1329,7 @@ namespace cv } static void transform(const oclMat &src, oclMat &dst, const oclMat &lut, - const int tilesX, const int tilesY, const cv::Size tileSize) + const int tilesX, const int tilesY, const Size & tileSize) { cl_int2 tile_size; tile_size.s[0] = tileSize.width; @@ -1351,6 +1347,9 @@ namespace cv args.push_back( std::make_pair( sizeof(cl_int2), (void *)&tile_size )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesY )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.offset )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&lut.offset )); size_t localThreads[3] = { 32, 8, 1 }; size_t globalThreads[3] = { src.cols, src.rows, 1 }; @@ -1419,9 +1418,10 @@ namespace cv } else { - cv::ocl::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101, cv::Scalar()); + ocl::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, + tilesX_ - (src.cols % tilesX_), BORDER_REFLECT_101, Scalar::all(0)); - tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_); + tileSize = Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_); srcForLut = srcExt_; } @@ -1579,30 +1579,31 @@ static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, st { dst.create(src.size(), src.type()); - int channels = dst.oclchannels(), depth = dst.depth(); - - size_t vector_length = 1; - int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1); - int cols = divUp(dst.cols * channels + offset_cols, vector_length); - int rows = dst.rows; - size_t localThreads[3] = { 16, 16, 1 }; - size_t globalThreads[3] = { cols, rows, 1 }; + size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; + + int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize(); + int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize(); + int temp1_step = temp1.step / temp1.elemSize(), temp1_offset = temp1.offset / temp1.elemSize(); vector > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&temp1.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&temp1_step )); args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&temp1_offset )); - openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, -1, dst.depth()); } + void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y) { CV_Assert(x.depth() == CV_32F && t.depth() == CV_32F); diff --git a/modules/ocl/src/opencl/imgproc_clahe.cl b/modules/ocl/src/opencl/imgproc_clahe.cl index 49c709692..55692ae3b 100644 --- a/modules/ocl/src/opencl/imgproc_clahe.cl +++ b/modules/ocl/src/opencl/imgproc_clahe.cl @@ -53,12 +53,8 @@ int calc_lut(__local int* smem, int val, int tid) barrier(CLK_LOCAL_MEM_FENCE); if (tid == 0) - { for (int i = 1; i < 256; ++i) - { smem[i] += smem[i - 1]; - } - } barrier(CLK_LOCAL_MEM_FENCE); return smem[tid]; @@ -71,69 +67,51 @@ void reduce(volatile __local int* smem, int val, int tid) barrier(CLK_LOCAL_MEM_FENCE); if (tid < 128) - { smem[tid] = val += smem[tid + 128]; - } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 64) - { smem[tid] = val += smem[tid + 64]; - } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 32) - { smem[tid] += smem[tid + 32]; - } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 16) - { smem[tid] += smem[tid + 16]; - } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 8) - { smem[tid] += smem[tid + 8]; - } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 4) - { smem[tid] += smem[tid + 4]; - } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 2) - { smem[tid] += smem[tid + 2]; - } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 1) - { smem[256] = smem[tid] + smem[tid + 1]; - } barrier(CLK_LOCAL_MEM_FENCE); } + #else + void reduce(__local volatile int* smem, int val, int tid) { smem[tid] = val; barrier(CLK_LOCAL_MEM_FENCE); if (tid < 128) - { smem[tid] = val += smem[tid + 128]; - } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 64) - { smem[tid] = val += smem[tid + 64]; - } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 32) @@ -141,12 +119,17 @@ void reduce(__local volatile int* smem, int val, int tid) smem[tid] += smem[tid + 32]; #if WAVE_SIZE < 32 } barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 16) { + + if (tid < 16) + { #endif smem[tid] += smem[tid + 16]; #if WAVE_SIZE < 16 - } barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 8) { + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 8) + { #endif smem[tid] += smem[tid + 8]; smem[tid] += smem[tid + 4]; @@ -159,7 +142,8 @@ void reduce(__local volatile int* smem, int val, int tid) __kernel void calcLut(__global __const uchar * src, __global uchar * lut, const int srcStep, const int dstStep, const int2 tileSize, const int tilesX, - const int clipLimit, const float lutScale) + const int clipLimit, const float lutScale, + const int src_offset, const int dst_offset) { __local int smem[512]; @@ -173,25 +157,21 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut, for (int i = get_local_id(1); i < tileSize.y; i += get_local_size(1)) { - __global const uchar* srcPtr = src + mad24( ty * tileSize.y + i, - srcStep, tx * tileSize.x ); + __global const uchar* srcPtr = src + mad24(ty * tileSize.y + i, srcStep, tx * tileSize.x + src_offset); for (int j = get_local_id(0); j < tileSize.x; j += get_local_size(0)) { const int data = srcPtr[j]; atomic_inc(&smem[data]); } } - barrier(CLK_LOCAL_MEM_FENCE); int tHistVal = smem[tid]; - barrier(CLK_LOCAL_MEM_FENCE); if (clipLimit > 0) { // clip histogram bar - int clipped = 0; if (tHistVal > clipLimit) { @@ -200,7 +180,6 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut, } // find number of overall clipped samples - reduce(smem, clipped, tid); barrier(CLK_LOCAL_MEM_FENCE); #ifdef CPU @@ -229,7 +208,7 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut, const int lutVal = calc_lut(smem, tHistVal, tid); uint ires = (uint)convert_int_rte(lutScale * lutVal); - lut[(ty * tilesX + tx) * dstStep + tid] = + lut[(ty * tilesX + tx) * dstStep + tid + dst_offset] = convert_uchar(clamp(ires, (uint)0, (uint)255)); } @@ -239,7 +218,8 @@ __kernel void transform(__global __const uchar * src, const int srcStep, const int dstStep, const int lutStep, const int cols, const int rows, const int2 tileSize, - const int tilesX, const int tilesY) + const int tilesX, const int tilesY, + const int src_offset, const int dst_offset, int lut_offset) { const int x = get_global_id(0); const int y = get_global_id(1); @@ -261,15 +241,15 @@ __kernel void transform(__global __const uchar * src, tx1 = max(tx1, 0); tx2 = min(tx2, tilesX - 1); - const int srcVal = src[mad24(y, srcStep, x)]; + const int srcVal = src[mad24(y, srcStep, x + src_offset)]; float res = 0; - res += lut[mad24(ty1 * tilesX + tx1, lutStep, srcVal)] * ((1.0f - xa) * (1.0f - ya)); - res += lut[mad24(ty1 * tilesX + tx2, lutStep, srcVal)] * ((xa) * (1.0f - ya)); - res += lut[mad24(ty2 * tilesX + tx1, lutStep, srcVal)] * ((1.0f - xa) * (ya)); - res += lut[mad24(ty2 * tilesX + tx2, lutStep, srcVal)] * ((xa) * (ya)); + res += lut[mad24(ty1 * tilesX + tx1, lutStep, srcVal + lut_offset)] * ((1.0f - xa) * (1.0f - ya)); + res += lut[mad24(ty1 * tilesX + tx2, lutStep, srcVal + lut_offset)] * ((xa) * (1.0f - ya)); + res += lut[mad24(ty2 * tilesX + tx1, lutStep, srcVal + lut_offset)] * ((1.0f - xa) * (ya)); + res += lut[mad24(ty2 * tilesX + tx2, lutStep, srcVal + lut_offset)] * ((xa) * (ya)); uint ires = (uint)convert_int_rte(res); - dst[mad24(y, dstStep, x)] = convert_uchar(clamp(ires, (uint)0, (uint)255)); + dst[mad24(y, dstStep, x + dst_offset)] = convert_uchar(clamp(ires, (uint)0, (uint)255)); } diff --git a/modules/ocl/src/opencl/imgproc_columnsum.cl b/modules/ocl/src/opencl/imgproc_columnsum.cl index c693919f8..1609d7c55 100644 --- a/modules/ocl/src/opencl/imgproc_columnsum.cl +++ b/modules/ocl/src/opencl/imgproc_columnsum.cl @@ -43,38 +43,28 @@ // //M*/ -#pragma OPENCL EXTENSION cl_amd_printf : enable -#if defined (__ATI__) -#pragma OPENCL EXTENSION cl_amd_fp64:enable - -#elif defined (__NVIDIA__) -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#endif - //////////////////////////////////////////////////////////////////// ///////////////////////// columnSum //////////////////////////////// //////////////////////////////////////////////////////////////////// -/// CV_32FC1 -__kernel void columnSum_C1_D5(__global float* src,__global float* dst,int srcCols,int srcRows,int srcStep,int dstStep) + +__kernel void columnSum_C1_D5(__global float * src, __global float * dst, + int cols, int rows, int src_step, int dst_step, int src_offset, int dst_offset) { const int x = get_global_id(0); - srcStep >>= 2; - dstStep >>= 2; - - if (x < srcCols) + if (x < cols) { - int srcIdx = x ; - int dstIdx = x ; + int srcIdx = x + src_offset; + int dstIdx = x + dst_offset; float sum = 0; - for (int y = 0; y < srcRows; ++y) + for (int y = 0; y < rows; ++y) { sum += src[srcIdx]; dst[dstIdx] = sum; - srcIdx += srcStep; - dstIdx += dstStep; + srcIdx += src_step; + dstIdx += dst_step; } } } diff --git a/modules/ocl/src/opencl/imgproc_convolve.cl b/modules/ocl/src/opencl/imgproc_convolve.cl index 76e7cfc55..db7a7dfc3 100644 --- a/modules/ocl/src/opencl/imgproc_convolve.cl +++ b/modules/ocl/src/opencl/imgproc_convolve.cl @@ -48,9 +48,12 @@ #elif defined (__NVIDIA__) #pragma OPENCL EXTENSION cl_khr_fp64:enable #endif + /************************************** convolve **************************************/ -__kernel void convolve_D5 (__global float *src, __global float *temp1, __global float *dst, - int rows, int cols, int src_step, int dst_step,int k_step, int kWidth, int kHeight) + +__kernel void convolve_D5(__global float *src, __global float *temp1, __global float *dst, + int rows, int cols, int src_step, int dst_step,int k_step, int kWidth, int kHeight, + int src_offset, int dst_offset, int koffset) { __local float smem[16 + 2 * 8][16 + 2 * 8]; @@ -65,7 +68,7 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global // 0 | 0 0 | 0 // ----------- // 0 | 0 0 | 0 - smem[y][x] = src[min(max(gy - 8, 0), rows - 1)*(src_step >> 2) + min(max(gx - 8, 0), cols - 1)]; + smem[y][x] = src[min(max(gy - 8, 0), rows - 1) * src_step + min(max(gx - 8, 0), cols - 1) + src_offset]; // 0 | 0 x | x // ----------- @@ -73,7 +76,7 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global // 0 | 0 0 | 0 // ----------- // 0 | 0 0 | 0 - smem[y][x + 16] = src[min(max(gy - 8, 0), rows - 1)*(src_step >> 2) + min(gx + 8, cols - 1)]; + smem[y][x + 16] = src[min(max(gy - 8, 0), rows - 1) * src_step + min(gx + 8, cols - 1) + src_offset]; // 0 | 0 0 | 0 // ----------- @@ -81,7 +84,7 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global // x | x 0 | 0 // ----------- // x | x 0 | 0 - smem[y + 16][x] = src[min(gy + 8, rows - 1)*(src_step >> 2) + min(max(gx - 8, 0), cols - 1)]; + smem[y + 16][x] = src[min(gy + 8, rows - 1) * src_step + min(max(gx - 8, 0), cols - 1) + src_offset]; // 0 | 0 0 | 0 // ----------- @@ -89,21 +92,18 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global // 0 | 0 x | x // ----------- // 0 | 0 x | x - smem[y + 16][x + 16] = src[min(gy + 8, rows - 1)*(src_step >> 2) + min(gx + 8, cols - 1)]; + smem[y + 16][x + 16] = src[min(gy + 8, rows - 1) * src_step + min(gx + 8, cols - 1) + src_offset]; barrier(CLK_LOCAL_MEM_FENCE); if (gx < cols && gy < rows) { - float res = 0; + float res = 0; for (int i = 0; i < kHeight; ++i) - { for (int j = 0; j < kWidth; ++j) - { - res += smem[y + 8 - kHeight / 2 + i][x + 8 - kWidth / 2 + j] * temp1[i * (k_step>>2) + j]; - } - } - dst[gy*(dst_step >> 2)+gx] = res; - } + res += smem[y + 8 - kHeight / 2 + i][x + 8 - kWidth / 2 + j] * temp1[i * k_step + j + koffset]; + + dst[gy * dst_step + gx + dst_offset] = res; + } } diff --git a/modules/ocl/src/opencl/imgproc_copymakeboder.cl b/modules/ocl/src/opencl/imgproc_copymakeboder.cl index 8c1889d37..ff7509ffd 100644 --- a/modules/ocl/src/opencl/imgproc_copymakeboder.cl +++ b/modules/ocl/src/opencl/imgproc_copymakeboder.cl @@ -34,6 +34,13 @@ // // +#if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif +#endif #ifdef BORDER_CONSTANT //BORDER_CONSTANT: iiiiii|abcdefgh|iiiiiii diff --git a/modules/ocl/src/opencl/imgproc_remap.cl b/modules/ocl/src/opencl/imgproc_remap.cl index ee40e935c..23899bdbb 100644 --- a/modules/ocl/src/opencl/imgproc_remap.cl +++ b/modules/ocl/src/opencl/imgproc_remap.cl @@ -1,4 +1,3 @@ - /*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. @@ -43,940 +42,282 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ -//#pragma OPENCL EXTENSION cl_amd_printf : enable -#if defined DOUBLE_SUPPORT +#if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable -typedef double4 F4 ; -#else -typedef float4 F4; +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif +#ifdef INTER_NEAREST +#define convertToWT +#endif -///////////////////////////////////////////////////////// -///////////////////////using buffer////////////////////// -///////////////////////////////////////////////////////// -__kernel void remapNNSConstant_C1_D0(__global unsigned char* dst, __global unsigned char const * restrict src, - __global short * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows, int threadCols, F4 nVal) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < threadCols && y < dst_rows) - { - x = x << 2; - int gx = x - (dst_offset&3); - int4 Gx = (int4)(gx, gx+1, gx+2, gx+3); - - uchar4 nval =convert_uchar4(nVal); - uchar4 val = (uchar4)(nval.s0); - - int dstStart = (y * dst_step + x + dst_offset) - (dst_offset&3); - - int map1Start = y * map1_step + (x << 2) + map1_offset - ((dst_offset & 3) << 2); - short8 map1_data; - - map1_data = *((__global short8 *)((__global char*)map1 + map1Start)); - int4 srcIdx = convert_int4(map1_data.odd) * src_step + convert_int4(map1_data.even) + src_offset; - - uchar4 con = convert_uchar4(convert_int4(map1_data.even) >= (int4)(src_cols) || convert_int4(map1_data.odd) >= (int4)(src_rows) || convert_int4(map1_data.even) < (int4)(0) || convert_int4(map1_data.odd) < (int4)(0)); - uchar4 src_data = val; - - if (con.s0 == 0) - src_data.s0 = *(src + srcIdx.s0); - if (con.s1 == 0) - src_data.s1 = *(src + srcIdx.s1); - if (con.s2 == 0) - src_data.s2 = *(src + srcIdx.s2); - if (con.s3 == 0) - src_data.s3 = *(src + srcIdx.s3); - - uchar4 dst_data; - - __global uchar4* d = (__global uchar4 *)(dst + dstStart); - - uchar4 dVal = *d; - - int4 dcon = (Gx >= 0 && Gx < dst_cols && y >= 0 && y < dst_rows); - dst_data = (convert_uchar4(dcon) != convert_uchar4((int4)(0))) ? src_data : dVal; - - *d = dst_data; - +#ifdef BORDER_CONSTANT +#define EXTRAPOLATE(v2, v) v = scalar; +#elif defined BORDER_REPLICATE +#define EXTRAPOLATE(v2, v) \ + { \ + v2 = max(min(v2, (int2)(src_cols - 1, src_rows - 1)), zero); \ + v = convertToWT(src[mad24(v2.y, src_step, v2.x + src_offset)]); \ } - -} - -__kernel void remapNNFConstant_C1_D0(__global unsigned char* dst, __global unsigned char const * restrict src, - __global float * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows, int threadCols, F4 nVal) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < threadCols && y < dst_rows) - { - x = x << 2; - int gx = x - (dst_offset&3); - int4 Gx = (int4)(gx, gx+1, gx+2, gx+3); - - uchar4 nval =convert_uchar4(nVal); - uchar val = nval.s0; - - int dstStart = (y * dst_step + x + dst_offset) - (dst_offset&3); - - int map1Start = y * map1_step + (x << 3) + map1_offset - ((dst_offset & 3) << 3); - float8 map1_data; - - map1_data = *((__global float8 *)((__global char*)map1 + map1Start)); - int8 map1_dataZ = convert_int8_sat_rte(map1_data); - int4 srcIdx = map1_dataZ.odd * src_step + map1_dataZ.even + src_offset; - - uchar4 src_data = val; - uchar4 con = convert_uchar4(map1_dataZ.even >= (int4)(src_cols) || map1_dataZ.odd >= (int4)(src_rows) || map1_dataZ.even < (int4)(0) || map1_dataZ.odd < (int4)(0)); - - if (con.s0 == 0) - src_data.s0 = *(src + srcIdx.s0); - if (con.s1 == 0) - src_data.s1 = *(src + srcIdx.s1); - if (con.s2 == 0) - src_data.s2 = *(src + srcIdx.s2); - if (con.s3 == 0) - src_data.s3 = *(src + srcIdx.s3); - uchar4 dst_data; - // dst_data = convert_uchar4(map1_dataZ.even >= (int4)(src_cols) || map1_dataZ.odd >= (int4)(src_rows)) ? (uchar4)(val) : src_data; - __global uchar4* d = (__global uchar4 *)(dst + dstStart); - - uchar4 dVal = *d; - - int4 dcon = (Gx >= 0 && Gx < dst_cols && y >= 0 && y < dst_rows); - - dst_data = (convert_uchar4(dcon) != convert_uchar4((int4)(0))) ? src_data : dVal; - *d = dst_data; +#elif defined BORDER_WRAP +#define EXTRAPOLATE(v2, v) \ + { \ + if (v2.x < 0) \ + v2.x -= ((v2.x - src_cols + 1) / src_cols) * src_cols; \ + if (v2.x >= src_cols) \ + v2.x %= src_cols; \ + \ + if (v2.y < 0) \ + v2.y -= ((v2.y - src_rows + 1) / src_rows) * src_rows; \ + if( v2.y >= src_rows ) \ + v2.y %= src_rows; \ + v = convertToWT(src[mad24(v2.y, src_step, v2.x + src_offset)]); \ } -} - -__kernel void remapNNF1Constant_C1_D0(__global unsigned char* dst, __global unsigned char const * restrict src, - __global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows, int threadCols, F4 nVal) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < threadCols && y < dst_rows) - { - x = x << 2; - int gx = x - (dst_offset&3); - int4 Gx = (int4)(gx, gx+1, gx+2, gx+3); - - uchar4 nval =convert_uchar4(nVal); - uchar4 val = (uchar4)(nval.s0); - - int dstStart = (y * dst_step + x + dst_offset) - (dst_offset&3); - - int map1Start = y * map1_step + (x << 2) + map1_offset - ((dst_offset & 3) << 2); - float4 map1_data; - float4 map2_data; - - map1_data = *((__global float4 *)((__global char*)map1 + map1Start)); - map2_data = *((__global float4 *)((__global char*)map2 + map1Start)); - float8 map_data = (float8)(map1_data.s0, map2_data.s0, map1_data.s1, map2_data.s1, map1_data.s2, map2_data.s2, map1_data.s3, map2_data.s3); - int8 map_dataZ = convert_int8_sat_rte(map_data); - int4 srcIdx = map_dataZ.odd * src_step + map_dataZ.even + src_offset; - - uchar4 src_data = val; - uchar4 con = convert_uchar4(map_dataZ.even >= (int4)(src_cols) || map_dataZ.odd >= (int4)(src_rows)|| map_dataZ.even < (int4)(0) || map_dataZ.odd < (int4)(0)); - - if (con.s0 == 0) - src_data.s0 = *(src + srcIdx.s0); - if (con.s1 == 0) - src_data.s1 = *(src + srcIdx.s1); - if (con.s2 == 0) - src_data.s2 = *(src + srcIdx.s2); - if (con.s3 == 0) - src_data.s3 = *(src + srcIdx.s3); - uchar4 dst_data; - - // dst_data = convert_uchar4(map_dataZ.even >= (int4)(src_cols) || map_dataZ.odd >= (int4)(src_rows)) ? (uchar4)(val) : src_data; - __global uchar4* d = (__global uchar4 *)(dst + dstStart); - - uchar4 dVal = *d; - - int4 dcon = (Gx >= 0 && Gx < dst_cols && y >= 0 && y < dst_rows); - - dst_data = (convert_uchar4(dcon) != convert_uchar4((int4)(0))) ? src_data : dVal; - *d = dst_data; +#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101) +#ifdef BORDER_REFLECT +#define DELTA int delta = 0 +#else +#define DELTA int delta = 1 +#endif +#define EXTRAPOLATE(v2, v) \ + { \ + DELTA; \ + if (src_cols == 1) \ + v2.x = 0; \ + else \ + do \ + { \ + if( v2.x < 0 ) \ + v2.x = -v2.x - 1 + delta; \ + else \ + v2.x = src_cols - 1 - (v2.x - src_cols) - delta; \ + } \ + while (v2.x >= src_cols || v2.x < 0); \ + \ + if (src_rows == 1) \ + v2.y = 0; \ + else \ + do \ + { \ + if( v2.y < 0 ) \ + v2.y = -v2.y - 1 + delta; \ + else \ + v2.y = src_rows - 1 - (v2.y - src_rows) - delta; \ + } \ + while (v2.y >= src_rows || v2.y < 0); \ + v = convertToWT(src[mad24(v2.y, src_step, v2.x + src_offset)]); \ } -} +#else +#error No extrapolation method +#endif +#define NEED_EXTRAPOLATION(gx, gy) (gx >= src_cols || gy >= src_rows || gx < 0 || gy < 0) -__kernel void remapNNSConstant_C4_D0(__global unsigned char* dst, __global unsigned char const * restrict src, - __global short * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows, int threadCols, F4 nVal) +#ifdef INTER_NEAREST + +__kernel void remap_2_32FC1(__global const T * restrict src, __global T * dst, + __global float * map1, __global float * map2, + int src_offset, int dst_offset, int map1_offset, int map2_offset, + int src_step, int dst_step, int map1_step, int map2_step, + int src_cols, int src_rows, int dst_cols, int dst_rows, T scalar) { int x = get_global_id(0); int y = get_global_id(1); - if(x < threadCols && y < dst_rows) + if (x < dst_cols && y < dst_rows) { - int dstIdx = y * dst_step + (x << 2) + dst_offset; - int mapIdx = y * map1_step + (x << 2) + map1_offset; - short2 map1_data = *((__global short2 *)((__global char*)map1 + mapIdx)); - int srcIdx = map1_data.y * src_step + (map1_data.x << 2) + src_offset; - uchar4 nval = convert_uchar4(nVal); - uchar4 src_data; - if(map1_data.x >= src_cols || map1_data.y >= src_rows || map1_data.x <0 || map1_data.y < 0 ) - src_data = nval; - else - src_data = *((__global uchar4 *)((__global uchar *)src + srcIdx)); - *((__global uchar4 *)((__global uchar*)dst + dstIdx)) = src_data; + int dstIdx = mad24(y, dst_step, x + dst_offset); + int map1Idx = mad24(y, map1_step, x + map1_offset); + int map2Idx = mad24(y, map2_step, x + map2_offset); + int gx = convert_int_sat_rte(map1[map1Idx]); + int gy = convert_int_sat_rte(map2[map2Idx]); - } - - -} - -__kernel void remapNNFConstant_C4_D0(__global unsigned char* dst, __global unsigned char const * restrict src, - __global float * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows, int threadCols, F4 nVal) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < threadCols && y < dst_rows) - { - int dstIdx = y * dst_step + (x << 2) + dst_offset; - int mapIdx = y * map1_step + (x << 3) + map1_offset; - float2 map1_data = *((__global float2 *)((__global char*)map1 + mapIdx)); - int2 map1_dataZ = convert_int2_sat_rte(map1_data); - int srcIdx = map1_dataZ.y * src_step + (map1_dataZ.x << 2) + src_offset; - uchar4 nval = convert_uchar4(nVal); - uchar4 src_data; - if(map1_dataZ.x >= src_cols || map1_dataZ.y >= src_rows || map1_dataZ.x < 0 || map1_dataZ.y < 0) - src_data = nval; - else - src_data = *((__global uchar4 *)((__global uchar *)src + srcIdx)); - *((__global uchar4 *)((__global uchar*)dst + dstIdx)) = src_data; - - - } - -} - -__kernel void remapNNF1Constant_C4_D0(__global unsigned char* dst, __global unsigned char const * restrict src, - __global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows, int threadCols, F4 nVal) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < threadCols && y < dst_rows) - { - int dstIdx = y * dst_step + (x << 2) + dst_offset; - int mapIdx = y * map1_step + (x << 2) + map1_offset; - float map1_data = *((__global float *)((__global char*)map1 + mapIdx)); - float map2_data = *((__global float *)((__global char*)map2 + mapIdx)); - int srcIdx = convert_int_sat_rte(map2_data) * src_step + (convert_int_sat_rte(map1_data) << 2) + src_offset; - uchar4 nval = convert_uchar4(nVal); - uchar4 src_data; - if(convert_int_sat_rte(map1_data) >= src_cols || convert_int_sat_rte(map2_data) >= src_rows || convert_int_sat_rte(map1_data) < 0 || convert_int_sat_rte(map2_data) < 0) - src_data = nval; + if (NEED_EXTRAPOLATION(gx, gy)) + { + int2 gxy = (int2)(gx, gy), zero = (int2)(0); + EXTRAPOLATE(gxy, dst[dstIdx]); + } else - src_data = *((__global uchar4 *)((__global uchar *)src + srcIdx)); - *((__global uchar4 *)((__global uchar*)dst + dstIdx)) = src_data; + { + int srcIdx = mad24(gy, src_step, gx + src_offset); + dst[dstIdx] = src[srcIdx]; + } } } -__kernel void remapNNSConstant_C1_D5(__global float* dst, __global float const * restrict src, - __global short * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows ,int threadCols, F4 nVal) +__kernel void remap_32FC2(__global const T * restrict src, __global T * dst, __global float2 * map1, + int src_offset, int dst_offset, int map1_offset, + int src_step, int dst_step, int map1_step, + int src_cols, int src_rows, int dst_cols, int dst_rows, T scalar) { int x = get_global_id(0); int y = get_global_id(1); - if(x < threadCols && y < dst_rows) + if (x < dst_cols && y < dst_rows) { - int dstIdx = y * dst_step + (x << 2) + dst_offset; - int mapIdx = y * map1_step + (x << 2) + map1_offset; - short2 map1_data = *((__global short2 *)((__global char*)map1 + mapIdx)); - int srcIdx = map1_data.y * src_step + (map1_data.x << 2) + src_offset; - float nval = convert_float(nVal.x); - float src_data; - if(map1_data.x >= src_cols || map1_data.y >= src_rows|| map1_data.x < 0 || map1_data.y < 0) - src_data = nval; + int dstIdx = mad24(y, dst_step, x + dst_offset); + int map1Idx = mad24(y, map1_step, x + map1_offset); + + int2 gxy = convert_int2_sat_rte(map1[map1Idx]); + int gx = gxy.x, gy = gxy.y; + + if (NEED_EXTRAPOLATION(gx, gy)) + { + int2 zero = (int2)(0); + EXTRAPOLATE(gxy, dst[dstIdx]); + } else - src_data = *((__global float *)((__global uchar *)src + srcIdx)); - *((__global float *)((__global uchar*)dst + dstIdx)) = src_data; - - + { + int srcIdx = mad24(gy, src_step, gx + src_offset); + dst[dstIdx] = src[srcIdx]; + } } - - } -__kernel void remapNNFConstant_C1_D5(__global float* dst, __global float const * restrict src, - __global float * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows ,int threadCols, F4 nVal) +__kernel void remap_16SC2(__global const T * restrict src, __global T * dst, __global short2 * map1, + int src_offset, int dst_offset, int map1_offset, + int src_step, int dst_step, int map1_step, + int src_cols, int src_rows, int dst_cols, int dst_rows, T scalar) { int x = get_global_id(0); int y = get_global_id(1); - if(x < threadCols && y < dst_rows) + if (x < dst_cols && y < dst_rows) { - int dstIdx = y * dst_step + (x << 2) + dst_offset; - int mapIdx = y * map1_step + (x << 3) + map1_offset; - float2 map1_data = *((__global float2 *)((__global char*)map1 + mapIdx)); - int2 map1_dataZ = convert_int2_sat_rte(map1_data); - int srcIdx = map1_dataZ.y * src_step + (map1_dataZ.x << 2) + src_offset; - float nval = convert_float(nVal.x); - float src_data; - if(map1_dataZ.x >= src_cols || map1_dataZ.y >= src_rows || map1_dataZ.x < 0 || map1_dataZ.y < 0) - src_data = nval; + int dstIdx = mad24(y, dst_step, x + dst_offset); + int map1Idx = mad24(y, map1_step, x + map1_offset); + + int2 gxy = convert_int2(map1[map1Idx]); + int gx = gxy.x, gy = gxy.y; + + if (NEED_EXTRAPOLATION(gx, gy)) + { + int2 zero = (int2)(0); + EXTRAPOLATE(gxy, dst[dstIdx]); + } else - src_data = *((__global float *)((__global uchar *)src + srcIdx)); - *((__global float *)((__global uchar*)dst + dstIdx)) = src_data; - - + { + int srcIdx = mad24(gy, src_step, gx + src_offset); + dst[dstIdx] = src[srcIdx]; + } } - } -__kernel void remapNNF1Constant_C1_D5(__global float* dst, __global float const * restrict src, - __global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows ,int threadCols, F4 nVal) +#elif INTER_LINEAR + +__kernel void remap_2_32FC1(__global T const * restrict src, __global T * dst, + __global float * map1, __global float * map2, + int src_offset, int dst_offset, int map1_offset, int map2_offset, + int src_step, int dst_step, int map1_step, int map2_step, + int src_cols, int src_rows, int dst_cols, int dst_rows, T nVal) { int x = get_global_id(0); int y = get_global_id(1); - if(x < threadCols && y < dst_rows) + if (x < dst_cols && y < dst_rows) { - int dstIdx = y * dst_step + (x << 2) + dst_offset; - int mapIdx = y * map1_step + (x << 2) + map1_offset; - float map1_data = *((__global float *)((__global char*)map1 + mapIdx)); - float map2_data = *((__global float *)((__global char*)map2 + mapIdx)); - float2 map_data = (float2)(map1_data, map2_data); - int2 map1_dataZ = convert_int2_sat_rte(map_data); - int srcIdx = map1_dataZ.y * src_step + (map1_dataZ.x << 2) + src_offset; - float nval = convert_float(nVal.x); - float src_data; + int dstIdx = mad24(y, dst_step, x + dst_offset); + int map1Idx = mad24(y, map1_step, x + map1_offset); + int map2Idx = mad24(y, map2_step, x + map2_offset); - if(map1_dataZ.x >= src_cols || map1_dataZ.y >= src_rows || map1_dataZ.x < 0 || map1_dataZ.y < 0) - src_data = nval; - else - src_data = *((__global float *)((__global uchar *)src + srcIdx)); - *((__global float *)((__global uchar*)dst + dstIdx)) = src_data; + float2 map_data = (float2)(map1[map1Idx], map2[map2Idx]); - - } - -} - -__kernel void remapNNSConstant_C4_D5(__global float * dst, __global float const * restrict src, - __global short * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < threadCols && y < dst_rows) - { - int dstIdx = y * dst_step + (x << 4) + dst_offset ; - int mapIdx = y * map1_step + (x << 2) + map1_offset ; - short2 map1_data = *((__global short2 *)((__global char*)map1 + mapIdx)); - int srcIdx = map1_data.y * src_step + (map1_data.x << 4) + src_offset; - float4 nval = convert_float4(nVal); - float4 src_data; - if (map1_data.x <0 || map1_data.x >= src_cols || map1_data.y <0 || map1_data.y >= src_rows) - src_data = nval; - else - src_data = *((__global float4 *)((__global uchar *)src + srcIdx)); - *((__global float4 *)((__global uchar*)dst + dstIdx)) = src_data; - - - } -} - -__kernel void remapNNFConstant_C4_D5(__global float * dst, __global float const * restrict src, - __global float * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < threadCols && y < dst_rows) - { - int dstIdx = y * dst_step + (x << 4) + dst_offset ; - int mapIdx = y * map1_step + (x << 3) + map1_offset ; - float2 map1_data = *((__global float2 *)((__global char*)map1 + mapIdx)); - int2 map1_dataZ = convert_int2_sat_rte(map1_data); - int srcIdx = map1_dataZ.y * src_step + (map1_dataZ.x << 4) + src_offset; - float4 nval = convert_float4(nVal); - float4 src_data = nval; - if(map1_dataZ.x >= 0 && map1_dataZ.x < src_cols && map1_dataZ.y >=0 && map1_dataZ.y < src_rows) - src_data = *((__global float4 *)((__global uchar *)src + srcIdx)); - *((__global float4 *)((__global uchar*)dst + dstIdx)) = src_data; - } -} - -__kernel void remapNNF1Constant_C4_D5(__global float * dst, __global float const * restrict src, - __global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < threadCols && y < dst_rows) - { - int dstIdx = y * dst_step + (x << 4) + dst_offset ; - int mapIdx = y * map1_step + (x << 2) + map1_offset ; - float map1_data = *((__global float *)((__global char*)map1 + mapIdx)); - float map2_data = *((__global float *)((__global char*)map2 + mapIdx)); - float2 map_data = (float2)(map1_data, map2_data); - int2 map1_dataZ = convert_int2_sat_rte(map_data); - int srcIdx = map1_dataZ.y * src_step + (map1_dataZ.x << 4) + src_offset; - float4 nval = convert_float4(nVal); - float4 src_data = nval; - if(map1_dataZ.x >= 0 && map1_dataZ.x < src_cols && map1_dataZ.y >= 0 && map1_dataZ.y < src_rows) - src_data = *((__global float4 *)((__global uchar *)src + srcIdx)); - *((__global float4 *)((__global uchar*)dst + dstIdx)) = src_data; - } -} - - - -__kernel void remapLNFConstant_C1_D0(__global unsigned char* dst, __global unsigned char const * restrict src, - __global float * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) -{ - - int x = get_global_id(0); - int y = get_global_id(1); - if(x < threadCols && y < dst_rows) - { - x = x << 2; - int gx = x - (dst_offset&3); - int4 Gx = (int4)(gx, gx+1, gx+2, gx+3); - - uchar4 nval =convert_uchar4(nVal); - uchar4 val = (uchar4)(nval.s0); - - - int dstStart = (y * dst_step + x + dst_offset) - (dst_offset&3); - - int map1Start = y * map1_step + (x << 3) + map1_offset - ((dst_offset & 3) << 3); - float8 map1_data; - - map1_data = *((__global float8 *)((__global char*)map1 + map1Start)); - int8 map1_dataD = convert_int8(map1_data); - float8 temp = map1_data - convert_float8(map1_dataD); - - float4 u = temp.even; - float4 v = temp.odd; - float4 ud = (float4)(1.0) - u; - float4 vd = (float4)(1.0) - v; - //float8 map1_dataU = map1_dataD + 1; - - int4 map1_dataDx = map1_dataD.even; - int4 map1_dataDy = map1_dataD.odd; - int4 map1_dataDx1 = map1_dataDx + (int4)(1); - int4 map1_dataDy1 = map1_dataDy + (int4)(1); - uchar4 a = val, b = val, c = val, d =val; - - if (map1_dataDx.s0 < src_cols && map1_dataDx.s0 >= 0 && map1_dataDy.s0 < src_rows && map1_dataDy.s0 >= 0) - a.s0 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s0 * src_step + map1_dataDx.s0 + src_offset)); - if (map1_dataDx.s1 < src_cols && map1_dataDx.s1 >= 0 && map1_dataDy.s1 < src_rows && map1_dataDy.s1 >= 0) - a.s1 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s1 * src_step + map1_dataDx.s1 + src_offset)); - if (map1_dataDx.s2 < src_cols && map1_dataDx.s2 >= 0 && map1_dataDy.s2 < src_rows && map1_dataDy.s2 >= 0) - a.s2 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s2 * src_step + map1_dataDx.s2 + src_offset)); - if (map1_dataDx.s3 < src_cols && map1_dataDx.s3 >= 0 && map1_dataDy.s3 < src_rows && map1_dataDy.s3 >= 0) - a.s3 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s3 * src_step + map1_dataDx.s3 + src_offset)); - - if (map1_dataDx1.s0 < src_cols && map1_dataDx1.s0 >= 0 && map1_dataDy.s0 < src_rows && map1_dataDy.s0 >= 0) - b.s0 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s0 * src_step + map1_dataDx1.s0 + src_offset)); - if (map1_dataDx1.s1 < src_cols && map1_dataDx1.s1 >= 0 && map1_dataDy.s1 < src_rows && map1_dataDy.s1 >= 0) - b.s1 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s1 * src_step + map1_dataDx1.s1 + src_offset)); - if (map1_dataDx1.s2 < src_cols && map1_dataDx1.s2 >= 0 && map1_dataDy.s2 < src_rows && map1_dataDy.s2 >= 0) - b.s2 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s2 * src_step + map1_dataDx1.s2 + src_offset)); - if (map1_dataDx1.s3 < src_cols && map1_dataDx1.s3 >= 0 && map1_dataDy.s3 < src_rows && map1_dataDy.s3 >= 0) - b.s3 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s3 * src_step + map1_dataDx1.s3 + src_offset)); - - if (map1_dataDx.s0 < src_cols && map1_dataDx.s0 >= 0 && map1_dataDy1.s0 < src_rows && map1_dataDy1.s0 >= 0) - c.s0 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s0 * src_step + map1_dataDx.s0 + src_offset)); - if (map1_dataDx.s1 < src_cols && map1_dataDx.s1 >= 0 && map1_dataDy1.s1 < src_rows && map1_dataDy1.s1 >= 0) - c.s1 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s1 * src_step + map1_dataDx.s1 + src_offset)); - if (map1_dataDx.s2 < src_cols && map1_dataDx.s2 >= 0 && map1_dataDy1.s2 < src_rows && map1_dataDy1.s2 >= 0) - c.s2 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s2 * src_step + map1_dataDx.s2 + src_offset)); - if (map1_dataDx.s3 < src_cols && map1_dataDx.s3 >= 0 && map1_dataDy1.s3 < src_rows && map1_dataDy1.s3 >= 0) - c.s3 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s3 * src_step + map1_dataDx.s3 + src_offset)); - - if (map1_dataDx1.s0 < src_cols && map1_dataDx1.s0 >= 0 && map1_dataDy1.s0 < src_rows && map1_dataDy1.s0 >= 0) - d.s0 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s0 * src_step + map1_dataDx1.s0 + src_offset)); - if (map1_dataDx1.s1 < src_cols && map1_dataDx1.s1 >= 0 && map1_dataDy1.s1 < src_rows && map1_dataDy1.s1 >= 0) - d.s1 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s1 * src_step + map1_dataDx1.s1 + src_offset)); - if (map1_dataDx1.s2 < src_cols && map1_dataDx1.s2 >= 0 && map1_dataDy1.s2 < src_rows && map1_dataDy1.s2 >= 0) - d.s2 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s2 * src_step + map1_dataDx1.s2 + src_offset)); - if (map1_dataDx1.s3 < src_cols && map1_dataDx1.s3 >= 0 && map1_dataDy1.s3 < src_rows && map1_dataDy1.s3 >= 0) - d.s3 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s3 * src_step + map1_dataDx1.s3 + src_offset)); - - uchar4 dst_data = convert_uchar4_sat_rte((convert_float4(a))* ud * vd +(convert_float4(b))* u * vd + (convert_float4(c))* ud * v + (convert_float4(d)) * u * v ); - - __global uchar4* D = (__global uchar4 *)(dst + dstStart); - - uchar4 dVal = *D; - int4 con = (Gx >= 0 && Gx < dst_cols && y >= 0 && y < dst_rows); - dst_data = (convert_uchar4(con) != (uchar4)(0)) ? dst_data : dVal; - - *D = dst_data; - } -} - -__kernel void remapLNF1Constant_C1_D0(__global unsigned char* dst, __global unsigned char const * restrict src, - __global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) -{ - - int x = get_global_id(0); - int y = get_global_id(1); - if(x < threadCols && y < dst_rows) - { - x = x << 2; - int gx = x - (dst_offset&3); - int4 Gx = (int4)(gx, gx+1, gx+2, gx+3); - - uchar4 nval =convert_uchar4(nVal); - uchar4 val = (uchar4)(nval.s0); - - - int dstStart = (y * dst_step + x + dst_offset) - (dst_offset&3); - - int map1Start = y * map1_step + (x << 2) + map1_offset - ((dst_offset & 3) << 2); - float4 map1_data; - float4 map2_data; - - map1_data = *((__global float4 *)((__global char*)map1 + map1Start)); - map2_data = *((__global float4 *)((__global char*)map2 + map1Start)); - float8 map_data = (float8)(map1_data.s0, map2_data.s0, map1_data.s1, map2_data.s1, map1_data.s2, map2_data.s2, map1_data.s3, map2_data.s3); - int8 map1_dataD = convert_int8(map_data); - float8 temp = map_data - convert_float8(map1_dataD); - - float4 u = temp.even; - float4 v = temp.odd; - float4 ud = (float4)(1.0) - u; - float4 vd = (float4)(1.0) - v; - //float8 map1_dataU = map1_dataD + 1; - - int4 map1_dataDx = map1_dataD.even; - int4 map1_dataDy = map1_dataD.odd; - int4 map1_dataDx1 = map1_dataDx + (int4)(1); - int4 map1_dataDy1 = map1_dataDy + (int4)(1); - - uchar4 a = val, b = val, c = val, d =val; - if (map1_dataDx.s0 < src_cols && map1_dataDx.s0 >= 0 && map1_dataDy.s0 < src_rows && map1_dataDy.s0 >= 0) - a.s0 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s0 * src_step + map1_dataDx.s0 + src_offset)); - if (map1_dataDx.s1 < src_cols && map1_dataDx.s1 >= 0 && map1_dataDy.s1 < src_rows && map1_dataDy.s1 >= 0) - a.s1 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s1 * src_step + map1_dataDx.s1 + src_offset)); - if (map1_dataDx.s2 < src_cols && map1_dataDx.s2 >= 0 && map1_dataDy.s2 < src_rows && map1_dataDy.s2 >= 0) - a.s2 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s2 * src_step + map1_dataDx.s2 + src_offset)); - if (map1_dataDx.s3 < src_cols && map1_dataDx.s3 >= 0 && map1_dataDy.s3 < src_rows && map1_dataDy.s3 >= 0) - a.s3 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s3 * src_step + map1_dataDx.s3 + src_offset)); - - if (map1_dataDx1.s0 < src_cols && map1_dataDx1.s0 >= 0 && map1_dataDy.s0 < src_rows && map1_dataDy.s0 >= 0) - b.s0 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s0 * src_step + map1_dataDx1.s0 + src_offset)); - if (map1_dataDx1.s1 < src_cols && map1_dataDx1.s1 >= 0 && map1_dataDy.s1 < src_rows && map1_dataDy.s1 >= 0) - b.s1 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s1 * src_step + map1_dataDx1.s1 + src_offset)); - if (map1_dataDx1.s2 < src_cols && map1_dataDx1.s2 >= 0 && map1_dataDy.s2 < src_rows && map1_dataDy.s2 >= 0) - b.s2 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s2 * src_step + map1_dataDx1.s2 + src_offset)); - if (map1_dataDx1.s3 < src_cols && map1_dataDx1.s3 >= 0 && map1_dataDy.s3 < src_rows && map1_dataDy.s3 >= 0) - b.s3 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s3 * src_step + map1_dataDx1.s3 + src_offset)); - - if (map1_dataDx.s0 < src_cols && map1_dataDx.s0 >= 0 && map1_dataDy1.s0 < src_rows && map1_dataDy1.s0 >= 0) - c.s0 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s0 * src_step + map1_dataDx.s0 + src_offset)); - if (map1_dataDx.s1 < src_cols && map1_dataDx.s1 >= 0 && map1_dataDy1.s1 < src_rows && map1_dataDy1.s1 >= 0) - c.s1 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s1 * src_step + map1_dataDx.s1 + src_offset)); - if (map1_dataDx.s2 < src_cols && map1_dataDx.s2 >= 0 && map1_dataDy1.s2 < src_rows && map1_dataDy1.s2 >= 0) - c.s2 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s2 * src_step + map1_dataDx.s2 + src_offset)); - if (map1_dataDx.s3 < src_cols && map1_dataDx.s3 >= 0 && map1_dataDy1.s3 < src_rows && map1_dataDy1.s3 >= 0) - c.s3 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s3 * src_step + map1_dataDx.s3 + src_offset)); - - if (map1_dataDx1.s0 < src_cols && map1_dataDx1.s0 >= 0 && map1_dataDy1.s0 < src_rows && map1_dataDy1.s0 >= 0) - d.s0 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s0 * src_step + map1_dataDx1.s0 + src_offset)); - if (map1_dataDx1.s1 < src_cols && map1_dataDx1.s1 >= 0 && map1_dataDy1.s1 < src_rows && map1_dataDy1.s1 >= 0) - d.s1 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s1 * src_step + map1_dataDx1.s1 + src_offset)); - if (map1_dataDx1.s2 < src_cols && map1_dataDx1.s2 >= 0 && map1_dataDy1.s2 < src_rows && map1_dataDy1.s2 >= 0) - d.s2 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s2 * src_step + map1_dataDx1.s2 + src_offset)); - if (map1_dataDx1.s3 < src_cols && map1_dataDx1.s3 >= 0 && map1_dataDy1.s3 < src_rows && map1_dataDy1.s3 >= 0) - d.s3 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s3 * src_step + map1_dataDx1.s3 + src_offset)); - - - uchar4 dst_data = convert_uchar4_sat_rte((convert_float4(a))* ud * vd +(convert_float4(b))* u * vd + (convert_float4(c))* ud * v + (convert_float4(d)) * u * v ); - - __global uchar4* D = (__global uchar4 *)(dst + dstStart); - - uchar4 dVal = *D; - int4 con = (Gx >= 0 && Gx < dst_cols && y >= 0 && y < dst_rows); - dst_data = (convert_uchar4(con) != (uchar4)(0)) ? dst_data : dVal; - - *D = dst_data; - } -} - - - -__kernel void remapLNFConstant_C4_D0(__global unsigned char* dst, __global unsigned char const * restrict src, - __global float * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) -{ - - int x = get_global_id(0); - int y = get_global_id(1); - if(x < threadCols && y < dst_rows) - { - int dstIdx = y * dst_step + (x << 2) + dst_offset; - int mapIdx = y * map1_step + (x << 3) + map1_offset; - float2 map_data = *((__global float2 *)((__global char*)map1 + mapIdx)); - int2 map_dataA = convert_int2(map_data); - float2 u = map_data - convert_float2(map_dataA); + int2 map_dataA = convert_int2_sat_rtn(map_data); int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y); int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1); int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y +1); - uchar4 nval = convert_uchar4(nVal); - uchar4 a, b, c , d; - if(map_dataA.x < 0 || map_dataA.x >= src_cols || map_dataA.y >= src_rows || map_dataA.y < 0) - a = nval; - else - a = *((__global uchar4 *)((__global uchar *)src + map_dataA.y * src_step + (map_dataA.x<<2) + src_offset )); - if(map_dataB.x < 0 || map_dataB.x >= src_cols || map_dataB.y >= src_rows || map_dataB.y < 0) - b = nval; - else - b = *((__global uchar4 *)((__global uchar *)src + map_dataB.y * src_step + (map_dataB.x<<2) + src_offset )); + int2 zero = (int2)(0); - if(map_dataC.x < 0 || map_dataC.x >= src_cols || map_dataC.y >= src_rows || map_dataC.y < 0) - c = nval; + float2 _u = map_data - convert_float2(map_dataA); + WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)32)) / (WT2)32; + WT scalar = convertToWT(nVal); + WT a = scalar, b = scalar, c = scalar, d = scalar; + + if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y)) + a = convertToWT(src[mad24(map_dataA.y, src_step, map_dataA.x + src_offset)]); else - c = *((__global uchar4 *)((__global uchar *)src + map_dataC.y * src_step + (map_dataC.x<<2) + src_offset )); + EXTRAPOLATE(map_dataA, a); - if(map_dataD.x < 0 || map_dataD.x >= src_cols || map_dataD.y >= src_rows || map_dataD.y < 0) - d = nval; + if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y)) + b = convertToWT(src[mad24(map_dataB.y, src_step, map_dataB.x + src_offset)]); else - d = *((__global uchar4 *)((__global uchar *)src + map_dataD.y * src_step + (map_dataD.x<<2) + src_offset )); - float4 dst_data = convert_float4(a)*((float4)(1.0-u.x)*((float4)(1.0-u.y))) + convert_float4(b)*((float4)(u.x))*((float4)(1.0-u.y)) + convert_float4(c)*((float4)(1.0-u.x))*((float4)(u.y)) + convert_float4(d)*((float4)(u.x))*((float4)(u.y)); - *((__global uchar4 *)((__global uchar*)dst + dstIdx)) = convert_uchar4_sat_rte(dst_data); + EXTRAPOLATE(map_dataB, b); + if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y)) + c = convertToWT(src[mad24(map_dataC.y, src_step, map_dataC.x + src_offset)]); + else + EXTRAPOLATE(map_dataC, c); + if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y)) + d = convertToWT(src[mad24(map_dataD.y, src_step, map_dataD.x + src_offset)]); + else + EXTRAPOLATE(map_dataD, d); + + WT dst_data = a * (WT)(1 - u.x) * (WT)(1 - u.y) + + b * (WT)(u.x) * (WT)(1 - u.y) + + c * (WT)(1 - u.x) * (WT)(u.y) + + d * (WT)(u.x) * (WT)(u.y); + dst[dstIdx] = convertToT(dst_data); } - } -__kernel void remapLNF1Constant_C4_D0(__global unsigned char* dst, __global unsigned char const * restrict src, - __global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) -{ +__kernel void remap_32FC2(__global T const * restrict src, __global T * dst, + __global float2 * map1, + int src_offset, int dst_offset, int map1_offset, + int src_step, int dst_step, int map1_step, + int src_cols, int src_rows, int dst_cols, int dst_rows, T nVal) +{ int x = get_global_id(0); int y = get_global_id(1); - if(x < threadCols && y < dst_rows) + + if (x < dst_cols && y < dst_rows) { - int dstIdx = y * dst_step + (x << 2) + dst_offset; - int mapIdx = y * map1_step + (x << 2) + map1_offset; - float map1_data = *((__global float *)((__global char*)map1 + mapIdx)); - float map2_data = *((__global float *)((__global char*)map2 + mapIdx)); - float2 map_data = (float2)(map1_data, map2_data); - int2 map_dataA = convert_int2(map_data); - float2 u = map_data - convert_float2(map_dataA); + int dstIdx = mad24(y, dst_step, x + dst_offset); + int map1Idx = mad24(y, map1_step, x + map1_offset); + + float2 map_data = map1[map1Idx]; + int2 map_dataA = convert_int2_sat_rtn(map_data); int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y); int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1); - int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y +1); - uchar4 nval = convert_uchar4(nVal); - uchar4 a, b, c , d; - if(map_dataA.x < 0 || map_dataA.x >= src_cols || map_dataA.y >= src_rows || map_dataA.y < 0) - a = nval; + int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1); + int2 zero = (int2)(0); + + float2 _u = map_data - convert_float2(map_dataA); + WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)32)) / (WT2)32; + WT scalar = convertToWT(nVal); + WT a = scalar, b = scalar, c = scalar, d = scalar; + + if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y)) + a = convertToWT(src[mad24(map_dataA.y, src_step, map_dataA.x + src_offset)]); else - a = *((__global uchar4 *)((__global uchar *)src + map_dataA.y * src_step + (map_dataA.x<<2) + src_offset )); - if(map_dataB.x < 0 || map_dataB.x >= src_cols || map_dataB.y >= src_rows || map_dataB.y < 0) - b = nval; + EXTRAPOLATE(map_dataA, a); + + if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y)) + b = convertToWT(src[mad24(map_dataB.y, src_step, map_dataB.x + src_offset)]); else - b = *((__global uchar4 *)((__global uchar *)src + map_dataB.y * src_step + (map_dataB.x<<2) + src_offset )); + EXTRAPOLATE(map_dataB, b); - if(map_dataC.x < 0 || map_dataC.x >= src_cols || map_dataC.y >= src_rows || map_dataC.y < 0) - c = nval; + if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y)) + c = convertToWT(src[mad24(map_dataC.y, src_step, map_dataC.x + src_offset)]); else - c = *((__global uchar4 *)((__global uchar *)src + map_dataC.y * src_step + (map_dataC.x<<2) + src_offset )); + EXTRAPOLATE(map_dataC, c); - if(map_dataD.x < 0 || map_dataD.x >= src_cols || map_dataD.y >= src_rows || map_dataD.y < 0) - d = nval; + if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y)) + d = convertToWT(src[mad24(map_dataD.y, src_step, map_dataD.x + src_offset)]); else - d = *((__global uchar4 *)((__global uchar *)src + map_dataD.y * src_step + (map_dataD.x<<2) + src_offset )); - float4 dst_data = convert_float4(a)*((float4)(1.0-u.x)*((float4)(1.0-u.y))) + convert_float4(b)*((float4)(u.x))*((float4)(1.0-u.y)) + convert_float4(c)*((float4)(1.0-u.x))*((float4)(u.y)) + convert_float4(d)*((float4)(u.x))*((float4)(u.y)); - *((__global uchar4 *)((__global uchar*)dst + dstIdx)) = convert_uchar4_sat_rte(dst_data); - - + EXTRAPOLATE(map_dataD, d); + WT dst_data = a * (WT)(1 - u.x) * (WT)(1 - u.y) + + b * (WT)(u.x) * (WT)(1 - u.y) + + c * (WT)(1 - u.x) * (WT)(u.y) + + d * (WT)(u.x) * (WT)(u.y); + dst[dstIdx] = convertToT(dst_data); } } - - -__kernel void remapLNFConstant_C1_D5(__global float* dst, __global float const * restrict src, - __global float * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) -{ - - int x = get_global_id(0); - int y = get_global_id(1); - if(x < threadCols && y < dst_rows) - { - x = x << 4; - int gx = x - (dst_offset&15); - int4 Gx = (int4)(gx, gx+4, gx+8, gx+12); - - float4 nval =convert_float4(nVal); - float4 val = (float4)(nval.s0); - - int dstStart = (y * dst_step + x + dst_offset) - (dst_offset&15); - int map1Start = y * map1_step + (x << 1) + map1_offset - ((dst_offset & 15) << 1); - float8 map1_data; - - map1_data = *((__global float8 *)((__global char*)map1 + map1Start)); - int8 map1_dataD = convert_int8(map1_data); - float8 temp = map1_data - convert_float8(map1_dataD); - - float4 u = temp.even; - float4 v = temp.odd; - float4 ud = (float4)(1.0) - u; - float4 vd = (float4)(1.0) - v; - //float8 map1_dataU = map1_dataD + 1; - - int4 map1_dataDx = map1_dataD.even; - int4 map1_dataDy = map1_dataD.odd; - int4 map1_dataDx1 = map1_dataDx + (int4)(1); - int4 map1_dataDy1 = map1_dataDy + (int4)(1); - - float4 a = val, b = val, c = val, d = val; - if (map1_dataDx.s0 < src_cols && map1_dataDx.s0 >= 0 && map1_dataDy.s0 < src_rows && map1_dataDy.s0 >= 0) - a.s0 = *((__global float*)((__global uchar *)src + map1_dataDy.s0 * src_step + (map1_dataDx.s0 << 2) + src_offset)); - if (map1_dataDx.s1 < src_cols && map1_dataDx.s1 >= 0 && map1_dataDy.s1 < src_rows && map1_dataDy.s1 >= 0) - a.s1 = *((__global float*)((__global uchar *)src + map1_dataDy.s1 * src_step + (map1_dataDx.s1 << 2) + src_offset)); - if (map1_dataDx.s2 < src_cols && map1_dataDx.s2 >= 0 && map1_dataDy.s2 < src_rows && map1_dataDy.s2 >= 0) - a.s2 = *((__global float*)((__global uchar *)src + map1_dataDy.s2 * src_step + (map1_dataDx.s2 << 2) + src_offset)); - if (map1_dataDx.s3 < src_cols && map1_dataDx.s3 >= 0 && map1_dataDy.s3 < src_rows && map1_dataDy.s3 >= 0) - a.s3 = *((__global float*)((__global uchar *)src + map1_dataDy.s3 * src_step + (map1_dataDx.s3 << 2) + src_offset)); - - if (map1_dataDx1.s0 < src_cols && map1_dataDx1.s0 >= 0 && map1_dataDy.s0 < src_rows && map1_dataDy.s0 >= 0) - b.s0 = *((__global float*)((__global uchar *)src + map1_dataDy.s0 * src_step + (map1_dataDx1.s0 << 2) + src_offset)); - if (map1_dataDx1.s1 < src_cols && map1_dataDx1.s1 >= 0 && map1_dataDy.s1 < src_rows && map1_dataDy.s1 >= 0) - b.s1 = *((__global float*)((__global uchar *)src + map1_dataDy.s1 * src_step + (map1_dataDx1.s1 << 2) + src_offset)); - if (map1_dataDx1.s2 < src_cols && map1_dataDx1.s2 >= 0 && map1_dataDy.s2 < src_rows && map1_dataDy.s2 >= 0) - b.s2 = *((__global float*)((__global uchar *)src + map1_dataDy.s2 * src_step + (map1_dataDx1.s2 << 2) + src_offset)); - if (map1_dataDx1.s3 < src_cols && map1_dataDx1.s3 >= 0 && map1_dataDy.s3 < src_rows && map1_dataDy.s3 >= 0) - b.s3 = *((__global float*)((__global uchar *)src + map1_dataDy.s3 * src_step + (map1_dataDx1.s3 << 2) + src_offset)); - - if (map1_dataDx.s0 < src_cols && map1_dataDx.s0 >= 0 && map1_dataDy1.s0 < src_rows && map1_dataDy1.s0 >= 0) - c.s0 = *((__global float*)((__global uchar *)src + map1_dataDy1.s0 * src_step + (map1_dataDx.s0 << 2) + src_offset)); - if (map1_dataDx.s1 < src_cols && map1_dataDx.s1 >= 0 && map1_dataDy1.s1 < src_rows && map1_dataDy1.s1 >= 0) - c.s1 = *((__global float*)((__global uchar *)src + map1_dataDy1.s1 * src_step + (map1_dataDx.s1 << 2) + src_offset)); - if (map1_dataDx.s2 < src_cols && map1_dataDx.s2 >= 0 && map1_dataDy1.s2 < src_rows && map1_dataDy1.s2 >= 0) - c.s2 = *((__global float*)((__global uchar *)src + map1_dataDy1.s2 * src_step + (map1_dataDx.s2 << 2) + src_offset)); - if (map1_dataDx.s3 < src_cols && map1_dataDx.s3 >= 0 && map1_dataDy1.s3 < src_rows && map1_dataDy1.s3 >= 0) - c.s3 = *((__global float*)((__global uchar *)src + map1_dataDy1.s3 * src_step + (map1_dataDx.s3 << 2) + src_offset)); - - if (map1_dataDx1.s0 < src_cols && map1_dataDx1.s0 >= 0 && map1_dataDy1.s0 < src_rows && map1_dataDy1.s0 >= 0) - d.s0 = *((__global float*)((__global uchar *)src + map1_dataDy1.s0 * src_step + (map1_dataDx1.s0 << 2) + src_offset)); - if (map1_dataDx1.s1 < src_cols && map1_dataDx1.s1 >= 0 && map1_dataDy1.s1 < src_rows && map1_dataDy1.s1 >= 0) - d.s1 = *((__global float*)((__global uchar *)src + map1_dataDy1.s1 * src_step + (map1_dataDx1.s1 << 2) + src_offset)); - if (map1_dataDx1.s2 < src_cols && map1_dataDx1.s2 >= 0 && map1_dataDy1.s2 < src_rows && map1_dataDy1.s2 >= 0) - d.s2 = *((__global float*)((__global uchar *)src + map1_dataDy1.s2 * src_step + (map1_dataDx1.s2 << 2) + src_offset)); - if (map1_dataDx1.s3 < src_cols && map1_dataDx1.s3 >= 0 && map1_dataDy1.s3 < src_rows && map1_dataDy1.s3 >= 0) - d.s3 = *((__global float*)((__global uchar *)src + map1_dataDy1.s3 * src_step + (map1_dataDx1.s3 << 2) + src_offset)); - - float4 dst_data = a * ud * vd + b * u * vd + c * ud * v + d * u * v ; - - __global float4* D = (__global float4 *)((__global char*)dst + dstStart); - - float4 dVal = *D; - int4 con = (Gx >= 0 && Gx < (dst_cols << 2) && y >= 0 && y < dst_rows); - dst_data = (convert_float4(con) != (float4)(0)) ? dst_data : dVal; - - *D = dst_data; - } -} - -__kernel void remapLNF1Constant_C1_D5(__global float* dst, __global float const * restrict src, - __global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) -{ - - int x = get_global_id(0); - int y = get_global_id(1); - if(x < threadCols && y < dst_rows) - { - x = x << 4; - int gx = x - (dst_offset&15); - int4 Gx = (int4)(gx, gx+4, gx+8, gx+12); - - float4 nval =convert_float4(nVal); - float4 val = (float4)(nval.s0); - - int dstStart = y * dst_step + x + dst_offset - (dst_offset & 15); - int map1Start = y * map1_step + x + map1_offset - (dst_offset & 15); - float4 map1_data; - float4 map2_data; - - map1_data = *((__global float4 *)((__global char*)map1 + map1Start)); - map2_data = *((__global float4 *)((__global char*)map2 + map1Start)); - float8 map_data = (float8)(map1_data.s0, map2_data.s0, map1_data.s1, map2_data.s1, map1_data.s2, map2_data.s2, map1_data.s3, map2_data.s3); - int8 map1_dataD = convert_int8(map_data); - float8 temp = map_data - convert_float8(map1_dataD); - - float4 u = temp.even; - float4 v = temp.odd; - float4 ud = (float4)(1.0) - u; - float4 vd = (float4)(1.0) - v; - //float8 map1_dataU = map1_dataD + 1; - - int4 map1_dataDx = map1_dataD.even; - int4 map1_dataDy = map1_dataD.odd; - int4 map1_dataDx1 = map1_dataDx + (int4)(1); - int4 map1_dataDy1 = map1_dataDy + (int4)(1); - - float4 a = val, b = val, c = val, d = val; - if (map1_dataDx.s0 < src_cols && map1_dataDx.s0 >= 0 && map1_dataDy.s0 < src_rows && map1_dataDy.s0 >= 0) - a.s0 = *((__global float*)((__global uchar *)src + map1_dataDy.s0 * src_step + (map1_dataDx.s0 << 2) + src_offset)); - if (map1_dataDx.s1 < src_cols && map1_dataDx.s1 >= 0 && map1_dataDy.s1 < src_rows && map1_dataDy.s1 >= 0) - a.s1 = *((__global float*)((__global uchar *)src + map1_dataDy.s1 * src_step + (map1_dataDx.s1 << 2) + src_offset)); - if (map1_dataDx.s2 < src_cols && map1_dataDx.s2 >= 0 && map1_dataDy.s2 < src_rows && map1_dataDy.s2 >= 0) - a.s2 = *((__global float*)((__global uchar *)src + map1_dataDy.s2 * src_step + (map1_dataDx.s2 << 2) + src_offset)); - if (map1_dataDx.s3 < src_cols && map1_dataDx.s3 >= 0 && map1_dataDy.s3 < src_rows && map1_dataDy.s3 >= 0) - a.s3 = *((__global float*)((__global uchar *)src + map1_dataDy.s3 * src_step + (map1_dataDx.s3 << 2) + src_offset)); - - if (map1_dataDx1.s0 < src_cols && map1_dataDx1.s0 >= 0 && map1_dataDy.s0 < src_rows && map1_dataDy.s0 >= 0) - b.s0 = *((__global float*)((__global uchar *)src + map1_dataDy.s0 * src_step + (map1_dataDx1.s0 << 2) + src_offset)); - if (map1_dataDx1.s1 < src_cols && map1_dataDx1.s1 >= 0 && map1_dataDy.s1 < src_rows && map1_dataDy.s1 >= 0) - b.s1 = *((__global float*)((__global uchar *)src + map1_dataDy.s1 * src_step + (map1_dataDx1.s1 << 2) + src_offset)); - if (map1_dataDx1.s2 < src_cols && map1_dataDx1.s2 >= 0 && map1_dataDy.s2 < src_rows && map1_dataDy.s2 >= 0) - b.s2 = *((__global float*)((__global uchar *)src + map1_dataDy.s2 * src_step + (map1_dataDx1.s2 << 2) + src_offset)); - if (map1_dataDx1.s3 < src_cols && map1_dataDx1.s3 >= 0 && map1_dataDy.s3 < src_rows && map1_dataDy.s3 >= 0) - b.s3 = *((__global float*)((__global uchar *)src + map1_dataDy.s3 * src_step + (map1_dataDx1.s3 << 2) + src_offset)); - - if (map1_dataDx.s0 < src_cols && map1_dataDx.s0 >= 0 && map1_dataDy1.s0 < src_rows && map1_dataDy1.s0 >= 0) - c.s0 = *((__global float*)((__global uchar *)src + map1_dataDy1.s0 * src_step + (map1_dataDx.s0 << 2) + src_offset)); - if (map1_dataDx.s1 < src_cols && map1_dataDx.s1 >= 0 && map1_dataDy1.s1 < src_rows && map1_dataDy1.s1 >= 0) - c.s1 = *((__global float*)((__global uchar *)src + map1_dataDy1.s1 * src_step + (map1_dataDx.s1 << 2) + src_offset)); - if (map1_dataDx.s2 < src_cols && map1_dataDx.s2 >= 0 && map1_dataDy1.s2 < src_rows && map1_dataDy1.s2 >= 0) - c.s2 = *((__global float*)((__global uchar *)src + map1_dataDy1.s2 * src_step + (map1_dataDx.s2 << 2) + src_offset)); - if (map1_dataDx.s3 < src_cols && map1_dataDx.s3 >= 0 && map1_dataDy1.s3 < src_rows && map1_dataDy1.s3 >= 0) - c.s3 = *((__global float*)((__global uchar *)src + map1_dataDy1.s3 * src_step + (map1_dataDx.s3 << 2) + src_offset)); - - if (map1_dataDx1.s0 < src_cols && map1_dataDx1.s0 >= 0 && map1_dataDy1.s0 < src_rows && map1_dataDy1.s0 >= 0) - d.s0 = *((__global float*)((__global uchar *)src + map1_dataDy1.s0 * src_step + (map1_dataDx1.s0 << 2) + src_offset)); - if (map1_dataDx1.s1 < src_cols && map1_dataDx1.s1 >= 0 && map1_dataDy1.s1 < src_rows && map1_dataDy1.s1 >= 0) - d.s1 = *((__global float*)((__global uchar *)src + map1_dataDy1.s1 * src_step + (map1_dataDx1.s1 << 2) + src_offset)); - if (map1_dataDx1.s2 < src_cols && map1_dataDx1.s2 >= 0 && map1_dataDy1.s2 < src_rows && map1_dataDy1.s2 >= 0) - d.s2 = *((__global float*)((__global uchar *)src + map1_dataDy1.s2 * src_step + (map1_dataDx1.s2 << 2) + src_offset)); - if (map1_dataDx1.s3 < src_cols && map1_dataDx1.s3 >= 0 && map1_dataDy1.s3 < src_rows && map1_dataDy1.s3 >= 0) - d.s3 = *((__global float*)((__global uchar *)src + map1_dataDy1.s3 * src_step + (map1_dataDx1.s3 << 2) + src_offset)); - - - float4 dst_data = a * ud * vd + b * u * vd + c * ud * v + d * u * v ; - - __global float4* D = (__global float4 *)((__global char*)dst + dstStart); - - float4 dVal = *D; - int4 con = (Gx >= 0 && Gx < (dst_cols << 2) && y >= 0 && y < dst_rows); - dst_data = (convert_float4(con) != (float4)(0)) ? dst_data : dVal; - - *D = dst_data; - } -} - - - -__kernel void remapLNFConstant_C4_D5(__global float * dst, __global float const * restrict src, - __global float * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < threadCols && y < dst_rows) - { - int dstIdx = y * dst_step + (x << 4) + dst_offset ; - int mapIdx = y * map1_step + (x << 3) + map1_offset ; - float2 map_data = *((__global float2 *)((__global char*)map1 + mapIdx)); - int2 map_dataA = convert_int2(map_data); - float2 u = map_data - convert_float2(map_dataA); - int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y); - int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1); - int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y +1); - float4 nval = convert_float4(nVal); - float4 a, b, c , d; - if(map_dataA.x < 0 || map_dataA.x >= src_cols || map_dataA.y >= src_rows || map_dataA.y < 0) - a = nval; - else - a = *((__global float4 *)((__global uchar *)src + map_dataA.y * src_step + (map_dataA.x<<4) + src_offset )); - if(map_dataB.x < 0 || map_dataB.x >= src_cols || map_dataB.y >= src_rows || map_dataB.y < 0) - b = nval; - else - b = *((__global float4 *)((__global uchar *)src + map_dataB.y * src_step + (map_dataB.x<<4) + src_offset )); - - if(map_dataC.x < 0 || map_dataC.x >= src_cols || map_dataC.y >= src_rows || map_dataC.y < 0) - c = nval; - else - c = *((__global float4 *)((__global uchar *)src + map_dataC.y * src_step + (map_dataC.x<<4) + src_offset )); - - if(map_dataD.x < 0 || map_dataD.x >= src_cols || map_dataD.y >= src_rows || map_dataD.y < 0) - d = nval; - else - d = *((__global float4 *)((__global uchar *)src + map_dataD.y * src_step + (map_dataD.x<<4) + src_offset )); - - float4 dst_data = a * ((float4)(1.0-u.x)) * ((float4)(1.0-u.y)) + b *((float4)(u.x)) * ((float4)(1.0-u.y)) + c * ((float4)(1.0-u.x)) *((float4)(u.y)) + d *((float4)(u.x)) *((float4)(u.y)); - *((__global float4 *)((__global uchar*)dst + dstIdx)) = dst_data ; - - } -} - -__kernel void remapLNF1Constant_C4_D5(__global float * dst, __global float const * restrict src, - __global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, - int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < threadCols && y < dst_rows) - { - int dstIdx = y * dst_step + (x << 4) + dst_offset ; - int mapIdx = y * map1_step + (x << 2) + map1_offset ; - float map1_data = *((__global float *)((__global char*)map1 + mapIdx)); - float map2_data = *((__global float *)((__global char*)map2 + mapIdx)); - float2 map_data = (float2)(map1_data, map2_data); - int2 map_dataA = convert_int2(map_data); - float2 u = map_data - convert_float2(map_dataA); - int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y); - int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1); - int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y +1); - float4 nval = convert_float4(nVal); - float4 a, b, c , d; - if(map_dataA.x < 0 || map_dataA.x >= src_cols || map_dataA.y >= src_rows || map_dataA.y < 0) - a = nval; - else - a = *((__global float4 *)((__global uchar *)src + map_dataA.y * src_step + (map_dataA.x<<4) + src_offset )); - if(map_dataB.x < 0 || map_dataB.x >= src_cols || map_dataB.y >= src_rows || map_dataB.y < 0) - b = nval; - else - b = *((__global float4 *)((__global uchar *)src + map_dataB.y * src_step + (map_dataB.x<<4) + src_offset )); - - if(map_dataC.x < 0 || map_dataC.x >= src_cols || map_dataC.y >= src_rows || map_dataC.y < 0) - c = nval; - else - c = *((__global float4 *)((__global uchar *)src + map_dataC.y * src_step + (map_dataC.x<<4) + src_offset )); - - if(map_dataD.x < 0 || map_dataD.x >= src_cols || map_dataD.y >= src_rows || map_dataD.y < 0) - d = nval; - else - d = *((__global float4 *)((__global uchar *)src + map_dataD.y * src_step + (map_dataD.x<<4) + src_offset )); - - float4 dst_data = a * ((float4)(1.0-u.x)) * ((float4)(1.0-u.y)) + b *((float4)(u.x)) * ((float4)(1.0-u.y)) + c * ((float4)(1.0-u.x)) *((float4)(u.y)) + d *((float4)(u.x)) *((float4)(u.y)); - *((__global float4 *)((__global uchar*)dst + dstIdx)) = dst_data ; - - - } -} +#endif diff --git a/modules/ocl/test/test_filters.cpp b/modules/ocl/test/test_filters.cpp index 1c7dd21fb..cf1857479 100644 --- a/modules/ocl/test/test_filters.cpp +++ b/modules/ocl/test/test_filters.cpp @@ -62,8 +62,7 @@ PARAM_TEST_CASE(FilterTestBase, MatType, int, // border type, or iteration bool) // roi or not { - int type, borderType; - int ksize; + int type, borderType, ksize; bool useRoi; Mat src, dst_whole, src_roi, dst_roi; @@ -92,8 +91,12 @@ PARAM_TEST_CASE(FilterTestBase, MatType, void Near(double threshold = 0.0) { - EXPECT_MAT_NEAR(dst_whole, Mat(gdst_whole), threshold); - EXPECT_MAT_NEAR(dst_roi, Mat(gdst_roi), threshold); + Mat roi, whole; + gdst_whole.download(whole); + gdst_roi.download(roi); + + EXPECT_MAT_NEAR(dst_whole, whole, threshold); + EXPECT_MAT_NEAR(dst_roi, roi, threshold); } }; diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp index bbd98464f..fa7a70f4d 100644 --- a/modules/ocl/test/test_imgproc.cpp +++ b/modules/ocl/test/test_imgproc.cpp @@ -55,1603 +55,492 @@ #ifdef HAVE_OPENCL -using namespace cv; -using namespace std; using namespace testing; +using namespace std; +using namespace cv; -MatType nulltype = -1; +/////////////////////////////////////////////////////////////////////////////// -#define ONE_TYPE(type) testing::ValuesIn(typeVector(type)) -#define NULL_TYPE testing::ValuesIn(typeVector(nulltype)) - -vector typeVector(MatType type) +PARAM_TEST_CASE(ImgprocTestBase, MatType, + int, // blockSize + int, // border type + bool) // roi or not { - vector v; - v.push_back(type); - return v; -} + int type, borderType, blockSize; + bool useRoi; -typedef struct -{ - short x; - short y; -} COOR; - -COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, Size size, int sp, int sr, int maxIter, float eps, int *tab) -{ - - int isr2 = sr * sr; - int c0, c1, c2, c3; - int iter; - uchar *ptr = NULL; - uchar *pstart = NULL; - int revx = 0, revy = 0; - c0 = sptr[0]; - c1 = sptr[1]; - c2 = sptr[2]; - c3 = sptr[3]; - // iterate meanshift procedure - for(iter = 0; iter < maxIter; iter++ ) - { - int count = 0; - int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0; - - //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp) - int minx = x0 - sp; - int miny = y0 - sp; - int maxx = x0 + sp; - int maxy = y0 + sp; - - //deal with the image boundary - if(minx < 0) minx = 0; - if(miny < 0) miny = 0; - if(maxx >= size.width) maxx = size.width - 1; - if(maxy >= size.height) maxy = size.height - 1; - if(iter == 0) - { - pstart = sptr; - } - else - { - pstart = pstart + revy * sstep + (revx << 2); //point to the new position - } - ptr = pstart; - ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row - - for( int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2)) - { - int rowCount = 0; - int x = minx; -#if CV_ENABLE_UNROLLED - for( ; x + 4 <= maxx; x += 4, ptr += 16) - { - int t0, t1, t2; - t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; - if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x; - rowCount++; - } - t0 = ptr[4], t1 = ptr[5], t2 = ptr[6]; - if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x + 1; - rowCount++; - } - t0 = ptr[8], t1 = ptr[9], t2 = ptr[10]; - if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x + 2; - rowCount++; - } - t0 = ptr[12], t1 = ptr[13], t2 = ptr[14]; - if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x + 3; - rowCount++; - } - } -#endif - for(; x <= maxx; x++, ptr += 4) - { - int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; - if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x; - rowCount++; - } - } - if(rowCount == 0) - continue; - count += rowCount; - sy += y * rowCount; - } - - if( count == 0 ) - break; - - int x1 = sx / count; - int y1 = sy / count; - s0 = s0 / count; - s1 = s1 / count; - s2 = s2 / count; - - bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) + - tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps); - - //revise the pointer corresponding to the new (y0,x0) - revx = x1 - x0; - revy = y1 - y0; - - x0 = x1; - y0 = y1; - c0 = s0; - c1 = s1; - c2 = s2; - - if( stopFlag ) - break; - } //for iter - - dptr[0] = (uchar)c0; - dptr[1] = (uchar)c1; - dptr[2] = (uchar)c2; - dptr[3] = (uchar)c3; - - COOR coor; - coor.x = (short)x0; - coor.y = (short)y0; - return coor; -} - -void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, TermCriteria crit) -{ - if( src_roi.empty() ) - CV_Error( CV_StsBadArg, "The input image is empty" ); - - if( src_roi.depth() != CV_8U || src_roi.channels() != 4 ) - CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" ); - - CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) ); - CV_Assert( !(dst_roi.step & 0x3) ); - - if( !(crit.type & TermCriteria::MAX_ITER) ) - crit.maxCount = 5; - int maxIter = std::min(std::max(crit.maxCount, 1), 100); - float eps; - if( !(crit.type & TermCriteria::EPS) ) - eps = 1.f; - eps = (float)std::max(crit.epsilon, 0.0); - - int tab[512]; - for(int i = 0; i < 512; i++) - tab[i] = (i - 255) * (i - 255); - uchar *sptr = src_roi.data; - uchar *dptr = dst_roi.data; - int sstep = (int)src_roi.step; - int dstep = (int)dst_roi.step; - Size size = src_roi.size(); - - for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2), - dptr += dstep - (size.width << 2)) - { - for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4) - { - do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab); - } - } -} - -void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, TermCriteria crit) -{ - - if( src_roi.empty() ) - CV_Error( CV_StsBadArg, "The input image is empty" ); - if( src_roi.depth() != CV_8U || src_roi.channels() != 4 ) - CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" ); - CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) && - (src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows)); - CV_Assert( !(dstCoor_roi.step & 0x3) ); - - if( !(crit.type & TermCriteria::MAX_ITER) ) - crit.maxCount = 5; - int maxIter = std::min(std::max(crit.maxCount, 1), 100); - float eps; - if( !(crit.type & TermCriteria::EPS) ) - eps = 1.f; - eps = (float)std::max(crit.epsilon, 0.0); - - int tab[512]; - for(int i = 0; i < 512; i++) - tab[i] = (i - 255) * (i - 255); - uchar *sptr = src_roi.data; - uchar *dptr = dst_roi.data; - short *dCoorptr = (short *)dstCoor_roi.data; - int sstep = (int)src_roi.step; - int dstep = (int)dst_roi.step; - int dCoorstep = (int)dstCoor_roi.step >> 1; - Size size = src_roi.size(); - - for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2), - dptr += dstep - (size.width << 2), dCoorptr += dCoorstep - (size.width << 1)) - { - for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4, dCoorptr += 2) - { - *((COOR *)dCoorptr) = do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab); - } - } - -} - -PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bool) -{ - int type1, type2, type3, type4, type5; - Scalar val; - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int src2x; - int src2y; - int dstx; - int dsty; - int dst1x; - int dst1y; - int maskx; - int masky; - - //mat - Mat mat1; - Mat mat2; - Mat mask; - Mat dst; - Mat dst1; //bak, for two outputs - - //mat with roi - Mat mat1_roi; - Mat mat2_roi; - Mat mask_roi; - Mat dst_roi; - Mat dst1_roi; //bak - - //ocl mat - ocl::oclMat clmat1; - ocl::oclMat clmat2; - ocl::oclMat clmask; - ocl::oclMat cldst; - ocl::oclMat cldst1; //bak - - //ocl mat with roi - ocl::oclMat clmat1_roi; - ocl::oclMat clmat2_roi; - ocl::oclMat clmask_roi; - ocl::oclMat cldst_roi; - ocl::oclMat cldst1_roi; + Mat src, dst_whole, src_roi, dst_roi; + ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi; virtual void SetUp() { - type1 = GET_PARAM(0); - type2 = GET_PARAM(1); - type3 = GET_PARAM(2); - type4 = GET_PARAM(3); - type5 = GET_PARAM(4); - Size size(MWIDTH, MHEIGHT); - double min = 1, max = 20; - - if(type1 != nulltype) - { - mat1 = randomMat(size, type1, min, max, false); - clmat1 = mat1; - } - if(type2 != nulltype) - { - mat2 = randomMat(size, type2, min, max, false); - clmat2 = mat2; - } - if(type3 != nulltype) - { - dst = randomMat(size, type3, min, max, false); - cldst = dst; - } - if(type4 != nulltype) - { - dst1 = randomMat(size, type4, min, max, false); - cldst1 = dst1; - } - if(type5 != nulltype) - { - mask = randomMat(size, CV_8UC1, 0, 2, false); - threshold(mask, mask, 0.5, 255., type5); - clmask = mask; - } - val = Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); + type = GET_PARAM(0); + blockSize = GET_PARAM(1); + borderType = GET_PARAM(2); + useRoi = GET_PARAM(3); } void random_roi() { -#ifdef RANDOMROI - //randomize ROI - roicols = rng.uniform(1, mat1.cols); - roirows = rng.uniform(1, mat1.rows); - src1x = rng.uniform(0, mat1.cols - roicols); - src1y = rng.uniform(0, mat1.rows - roirows); - src2x = rng.uniform(0, mat2.cols - roicols); - src2y = rng.uniform(0, mat2.rows - roirows); - dstx = rng.uniform(0, dst.cols - roicols); - dsty = rng.uniform(0, dst.rows - roirows); - dst1x = rng.uniform(0, dst1.cols - roicols); - dst1y = rng.uniform(0, dst1.rows - roirows); - maskx = rng.uniform(0, mask.cols - roicols); - masky = rng.uniform(0, mask.rows - roirows); -#else - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - src2x = 0; - src2y = 0; - dstx = 0; - dsty = 0; - dst1x = 0; - dst1y = 0; - maskx = 0; - masky = 0; -#endif + Size roiSize = randomSize(1, MAX_VALUE); + Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256); + Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(dst_whole, dst_roi, roiSize, dstBorder, type, 5, 16); - if(type1 != nulltype) - { - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - } - if(type2 != nulltype) - { - mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); - clmat2_roi = clmat2(Rect(src2x, src2y, roicols, roirows)); - } - if(type3 != nulltype) - { - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - cldst_roi = cldst(Rect(dstx, dsty, roicols, roirows)); - } - if(type4 != nulltype) - { - dst1_roi = dst1(Rect(dst1x, dst1y, roicols, roirows)); - cldst1_roi = cldst1(Rect(dst1x, dst1y, roicols, roirows)); - } - if(type5 != nulltype) - { - mask_roi = mask(Rect(maskx, masky, roicols, roirows)); - clmask_roi = clmask(Rect(maskx, masky, roicols, roirows)); - } + generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder); + generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder); } - void Near(double threshold) + void Near(double threshold = 0.0) { - Mat cpu_cldst; - cldst.download(cpu_cldst); - EXPECT_MAT_NEAR(dst, cpu_cldst, threshold); + Mat whole, roi; + gdst_whole.download(whole); + gdst_roi.download(roi); + + EXPECT_MAT_NEAR(dst_whole, whole, threshold); + EXPECT_MAT_NEAR(dst_roi, roi, threshold); } }; -////////////////////////////////equalizeHist////////////////////////////////////////// + +////////////////////////////////copyMakeBorder//////////////////////////////////////////// + +PARAM_TEST_CASE(CopyMakeBorder, MatDepth, // depth + Channels, // channels + bool, // isolated or not + Border, // border type + bool) // roi or not +{ + int type, borderType; + bool useRoi; + + Border border; + Scalar val; + + Mat src, dst_whole, src_roi, dst_roi; + ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi; + + virtual void SetUp() + { + type = CV_MAKE_TYPE(GET_PARAM(0), GET_PARAM(1)); + borderType = GET_PARAM(3); + + if (GET_PARAM(2)) + borderType |= BORDER_ISOLATED; + + useRoi = GET_PARAM(4); + } + + void random_roi() + { + Size roiSize = randomSize(1, MAX_VALUE); + Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256); + + Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(dst_whole, dst_roi, roiSize, dstBorder, type, 5, 16); + + generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder); + generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder); + + border = randomBorder(0, 10); + val = randomScalar(-MAX_VALUE, MAX_VALUE); + } + + void Near(double threshold = 0.0) + { + Mat whole, roi; + gdst_whole.download(whole); + gdst_roi.download(roi); + + EXPECT_MAT_NEAR(dst_whole, whole, threshold); + EXPECT_MAT_NEAR(dst_roi, roi, threshold); + } +}; + +OCL_TEST_P(CopyMakeBorder, Mat) +{ + for (int i = 0; i < LOOP_TIMES; ++i) + { + random_roi(); + + cv::copyMakeBorder(src_roi, dst_roi, border.top, border.bot, border.lef, border.rig, borderType, val); + ocl::copyMakeBorder(gsrc_roi, gdst_roi, border.top, border.bot, border.lef, border.rig, borderType, val); + + Near(); + } +} + +////////////////////////////////equalizeHist////////////////////////////////////////////// typedef ImgprocTestBase EqualizeHist; OCL_TEST_P(EqualizeHist, Mat) { - if (mat1.type() != CV_8UC1 || mat1.type() != dst.type()) + for (int j = 0; j < LOOP_TIMES; j++) { - cout << "Unsupported type" << endl; - EXPECT_DOUBLE_EQ(0.0, 0.0); - } - else - { - for(int j = 0; j < LOOP_TIMES; j++) - { - random_roi(); - equalizeHist(mat1_roi, dst_roi); - ocl::equalizeHist(clmat1_roi, cldst_roi); - Near(1.1); - } + random_roi(); + + equalizeHist(src_roi, dst_roi); + ocl::equalizeHist(gsrc_roi, gdst_roi); + + Near(1.1); } } - -////////////////////////////////copyMakeBorder//////////////////////////////////////////// - -typedef ImgprocTestBase CopyMakeBorder; - -OCL_TEST_P(CopyMakeBorder, Mat) -{ - int bordertype[] = {BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT, BORDER_WRAP, BORDER_REFLECT_101}; - int top = rng.uniform(0, 10); - int bottom = rng.uniform(0, 10); - int left = rng.uniform(0, 10); - int right = rng.uniform(0, 10); - if (mat1.type() != dst.type()) - { - cout << "Unsupported type" << endl; - EXPECT_DOUBLE_EQ(0.0, 0.0); - } - else - { - for(size_t i = 0; i < sizeof(bordertype) / sizeof(int); i++) - for(int j = 0; j < LOOP_TIMES; j++) - { - random_roi(); -#ifdef RANDOMROI - if(((bordertype[i] != BORDER_CONSTANT) && (bordertype[i] != BORDER_REPLICATE)) && (mat1_roi.cols <= left) || (mat1_roi.cols <= right) || (mat1_roi.rows <= top) || (mat1_roi.rows <= bottom)) - { - continue; - } - if((dstx >= left) && (dsty >= top) && (dstx + cldst_roi.cols + right <= cldst_roi.wholecols) && (dsty + cldst_roi.rows + bottom <= cldst_roi.wholerows)) - { - dst_roi.adjustROI(top, bottom, left, right); - cldst_roi.adjustROI(top, bottom, left, right); - } - else - { - continue; - } -#endif - cv::copyMakeBorder(mat1_roi, dst_roi, top, bottom, left, right, bordertype[i] | BORDER_ISOLATED, Scalar(1.0)); - ocl::copyMakeBorder(clmat1_roi, cldst_roi, top, bottom, left, right, bordertype[i] | BORDER_ISOLATED, Scalar(1.0)); - - Mat cpu_cldst; -#ifndef RANDOMROI - cldst_roi.download(cpu_cldst); - EXPECT_MAT_NEAR(dst_roi, cpu_cldst, 0.0); -#else - cldst.download(cpu_cldst); - EXPECT_MAT_NEAR(dst, cpu_cldst, 0.0); -#endif - - } - } -} - - - ////////////////////////////////cornerMinEigenVal////////////////////////////////////////// -struct CornerMinEigenVal : ImgprocTestBase {}; +typedef ImgprocTestBase CornerMinEigenVal; OCL_TEST_P(CornerMinEigenVal, Mat) { - for(int j = 0; j < LOOP_TIMES; j++) + for (int j = 0; j < LOOP_TIMES; j++) { - random_roi(); - int blockSize = 3, apertureSize = 3;//1 + 2 * (rand() % 4); - //int borderType = BORDER_CONSTANT; - //int borderType = BORDER_REPLICATE; - int borderType = BORDER_REFLECT; - cornerMinEigenVal(mat1_roi, dst_roi, blockSize, apertureSize, borderType); - ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType); - Near(1.); + + int apertureSize = 3; + + cornerMinEigenVal(src_roi, dst_roi, blockSize, apertureSize, borderType); + ocl::cornerMinEigenVal(gsrc_roi, gdst_roi, blockSize, apertureSize, borderType); + + Near(1.0); } } - - ////////////////////////////////cornerHarris////////////////////////////////////////// typedef ImgprocTestBase CornerHarris; OCL_TEST_P(CornerHarris, Mat) { - for(int j = 0; j < LOOP_TIMES; j++) + for (int j = 0; j < LOOP_TIMES; j++) { - random_roi(); - int blockSize = 3, apertureSize = 3; //1 + 2 * (rand() % 4); - double k = 2; - //int borderType = BORDER_CONSTANT; - //int borderType = BORDER_REPLICATE; - int borderType = BORDER_REFLECT; - cornerHarris(mat1_roi, dst_roi, blockSize, apertureSize, k, borderType); - ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, k, borderType); - Near(1.); + + int apertureSize = 3; + double k = 2.0; + + cornerHarris(src_roi, dst_roi, blockSize, apertureSize, k, borderType); + ocl::cornerHarris(gsrc_roi, gdst_roi, blockSize, apertureSize, k, borderType); + + Near(1.0); } } - -////////////////////////////////integral///////////////////////////////////////////////// +//////////////////////////////////integral///////////////////////////////////////////////// typedef ImgprocTestBase Integral; OCL_TEST_P(Integral, Mat1) { - for(int j = 0; j < LOOP_TIMES; j++) + for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); - ocl::integral(clmat1_roi, cldst_roi); - integral(mat1_roi, dst_roi); - Near(0); + ocl::integral(gsrc_roi, gdst_roi); + integral(src_roi, dst_roi); + + Near(); } } -OCL_TEST_P(Integral, Mat2) +// TODO wrong output type +OCL_TEST_P(Integral, DISABLED_Mat2) { - for(int j = 0; j < LOOP_TIMES; j++) + Mat dst1; + ocl::oclMat gdst1; + + for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); - ocl::integral(clmat1_roi, cldst_roi, cldst1_roi); - integral(mat1_roi, dst_roi, dst1_roi); - Near(0); + integral(src_roi, dst1, dst_roi); + ocl::integral(gsrc_roi, gdst1, gdst_roi); - Mat cpu_cldst1; - cldst1.download(cpu_cldst1); - EXPECT_MAT_NEAR(dst1, cpu_cldst1, 0.0); + Near(); } } +/////////////////////////////////////////////////////////////////////////////////////////////////// +//// threshold -///////////////////////////////////////////////////////////////////////////////////////////////// -// warpAffine & warpPerspective - -PARAM_TEST_CASE(WarpTestBase, MatType, int) +struct Threshold : + public ImgprocTestBase { - int type; - Size size; - int interpolation; - - //src mat - Mat mat1; - Mat dst; - - // set up roi - int src_roicols; - int src_roirows; - int dst_roicols; - int dst_roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - - //src mat with roi - Mat mat1_roi; - Mat dst_roi; - - //ocl dst mat for testing - ocl::oclMat gdst_whole; - - //ocl mat with roi - ocl::oclMat gmat1; - ocl::oclMat gdst; + int thresholdType; virtual void SetUp() { type = GET_PARAM(0); - interpolation = GET_PARAM(1); - size = Size(MWIDTH, MHEIGHT); - - mat1 = randomMat(size, type, 5, 16, false); - dst = randomMat(size, type, 5, 16, false); + blockSize = GET_PARAM(1); + thresholdType = GET_PARAM(2); + useRoi = GET_PARAM(3); } - - void random_roi() - { -#ifdef RANDOMROI - //randomize ROI - src_roicols = rng.uniform(1, mat1.cols); - src_roirows = rng.uniform(1, mat1.rows); - dst_roicols = rng.uniform(1, dst.cols); - dst_roirows = rng.uniform(1, dst.rows); - src1x = rng.uniform(0, mat1.cols - src_roicols); - src1y = rng.uniform(0, mat1.rows - src_roirows); - dstx = rng.uniform(0, dst.cols - dst_roicols); - dsty = rng.uniform(0, dst.rows - dst_roirows); -#else - src_roicols = mat1.cols; - src_roirows = mat1.rows; - dst_roicols = dst.cols; - dst_roirows = dst.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; -#endif - - - mat1_roi = mat1(Rect(src1x, src1y, src_roicols, src_roirows)); - dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows)); - - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); - - - gmat1 = mat1_roi; - } - -}; - -/////warpAffine - -typedef WarpTestBase WarpAffine; - -OCL_TEST_P(WarpAffine, Mat) -{ - static const double coeffs[2][3] = - { - {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0}, - {sin(CV_PI / 6), cos(CV_PI / 6), -100.0} - }; - Mat M(2, 3, CV_64F, (void *)coeffs); - - for(int j = 0; j < LOOP_TIMES; j++) - { - random_roi(); - - warpAffine(mat1_roi, dst_roi, M, size, interpolation); - ocl::warpAffine(gmat1, gdst, M, size, interpolation); - - Mat cpu_dst; - gdst_whole.download(cpu_dst); - EXPECT_MAT_NEAR(dst, cpu_dst, 1.0); - } - -} - - -// warpPerspective - -typedef WarpTestBase WarpPerspective; - -OCL_TEST_P(WarpPerspective, Mat) -{ - static const double coeffs[3][3] = - { - {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, - {sin(3.14 / 6), cos(3.14 / 6), -100.0}, - {0.0, 0.0, 1.0} - }; - Mat M(3, 3, CV_64F, (void *)coeffs); - - for(int j = 0; j < LOOP_TIMES; j++) - { - random_roi(); - - warpPerspective(mat1_roi, dst_roi, M, size, interpolation); - ocl::warpPerspective(gmat1, gdst, M, size, interpolation); - - Mat cpu_dst; - gdst_whole.download(cpu_dst); - EXPECT_MAT_NEAR(dst, cpu_dst, 1.0); - } - -} - -///////////////////////////////////////////////////////////////////////////////////////////////// -// remap -////////////////////////////////////////////////////////////////////////////////////////////////// - -PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) -{ - int srcType; - int map1Type; - int map2Type; - Scalar val; - - int interpolation; - int bordertype; - - Mat src; - Mat dst; - Mat map1; - Mat map2; - - //std::vector oclinfo; - - int src_roicols; - int src_roirows; - int dst_roicols; - int dst_roirows; - int map1_roicols; - int map1_roirows; - int map2_roicols; - int map2_roirows; - int srcx; - int srcy; - int dstx; - int dsty; - int map1x; - int map1y; - int map2x; - int map2y; - - Mat src_roi; - Mat dst_roi; - Mat map1_roi; - Mat map2_roi; - - //ocl mat for testing - ocl::oclMat gdst; - - //ocl mat with roi - ocl::oclMat gsrc_roi; - ocl::oclMat gdst_roi; - ocl::oclMat gmap1_roi; - ocl::oclMat gmap2_roi; - - virtual void SetUp() - { - srcType = GET_PARAM(0); - map1Type = GET_PARAM(1); - map2Type = GET_PARAM(2); - interpolation = GET_PARAM(3); - bordertype = GET_PARAM(4); - - Size srcSize = Size(MWIDTH, MHEIGHT); - Size map1Size = Size(MWIDTH, MHEIGHT); - double min = 5, max = 16; - - if(srcType != nulltype) - { - src = randomMat(srcSize, srcType, min, max, false); - } - if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype)) - { - map1 = randomMat(map1Size, map1Type, min, max, false); - } - else if (map1Type == CV_32FC1 && map2Type == CV_32FC1) - { - map1 = randomMat(map1Size, map1Type, min, max, false); - map2 = randomMat(map1Size, map1Type, min, max, false); - } - - else - { - cout << "The wrong input type" << endl; - return; - } - - dst = randomMat(map1Size, srcType, min, max, false); - switch (src.channels()) - { - case 1: - val = Scalar(rng.uniform(0.0, 10.0), 0, 0, 0); - break; - case 2: - val = Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0, 0); - break; - case 3: - val = Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0); - break; - case 4: - val = Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0)); - break; - } - - } - void random_roi() - { - dst_roicols = rng.uniform(1, dst.cols); - dst_roirows = rng.uniform(1, dst.rows); - - src_roicols = rng.uniform(1, src.cols); - src_roirows = rng.uniform(1, src.rows); - - - srcx = rng.uniform(0, src.cols - src_roicols); - srcy = rng.uniform(0, src.rows - src_roirows); - dstx = rng.uniform(0, dst.cols - dst_roicols); - dsty = rng.uniform(0, dst.rows - dst_roirows); - map1_roicols = dst_roicols; - map1_roirows = dst_roirows; - map2_roicols = dst_roicols; - map2_roirows = dst_roirows; - map1x = dstx; - map1y = dsty; - map2x = dstx; - map2y = dsty; - - if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype)) - { - map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows)); - gmap1_roi = map1_roi; - } - - else if (map1Type == CV_32FC1 && map2Type == CV_32FC1) - { - map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows)); - gmap1_roi = map1_roi; - map2_roi = map2(Rect(map2x, map2y, map2_roicols, map2_roirows)); - gmap2_roi = map2_roi; - } - src_roi = src(Rect(srcx, srcy, src_roicols, src_roirows)); - dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows)); - gsrc_roi = src_roi; - gdst = dst; - gdst_roi = gdst(Rect(dstx, dsty, dst_roicols, dst_roirows)); - } -}; - -OCL_TEST_P(Remap, Mat) -{ - if((interpolation == 1 && map1Type == CV_16SC2) || (map1Type == CV_32FC1 && map2Type == nulltype) || (map1Type == CV_16SC2 && map2Type == CV_32FC1) || (map1Type == CV_32FC2 && map2Type == CV_32FC1)) - { - cout << "Don't support the dataType" << endl; - return; - } - int bordertype[] = {BORDER_CONSTANT, BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/}; - - for(int j = 0; j < LOOP_TIMES; j++) - { - random_roi(); - remap(src_roi, dst_roi, map1_roi, map2_roi, interpolation, bordertype[0], val); - ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, interpolation, bordertype[0], val); - Mat cpu_dst; - gdst.download(cpu_dst); - - if(interpolation == 0) - EXPECT_MAT_NEAR(dst, cpu_dst, 1.0); - EXPECT_MAT_NEAR(dst, cpu_dst, 2.0); - } -} - - - -///////////////////////////////////////////////////////////////////////////////////////////////// -// resize - -PARAM_TEST_CASE(Resize, MatType, Size, double, double, int) -{ - int type; - Size dsize; - double fx, fy; - int interpolation; - - //src mat - Mat mat1; - Mat dst; - - // set up roi - int src_roicols; - int src_roirows; - int dst_roicols; - int dst_roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - //src mat with roi - Mat mat1_roi; - Mat dst_roi; - - //ocl dst mat for testing - ocl::oclMat gdst_whole; - - //ocl mat with roi - ocl::oclMat gmat1; - ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - dsize = GET_PARAM(1); - fx = GET_PARAM(2); - fy = GET_PARAM(3); - interpolation = GET_PARAM(4); - - Size size(MWIDTH, MHEIGHT); - - if(dsize == Size() && !(fx > 0 && fy > 0)) - { - cout << "invalid dsize and fx fy" << endl; - return; - } - - if(dsize == Size()) - { - dsize.width = (int)(size.width * fx); - dsize.height = (int)(size.height * fy); - } - - mat1 = randomMat(size, type, 5, 16, false); - dst = randomMat(dsize, type, 5, 16, false); - - } - - void random_roi() - { -#ifdef RANDOMROI - //randomize ROI - src_roicols = rng.uniform(1, mat1.cols); - src_roirows = rng.uniform(1, mat1.rows); - dst_roicols = (int)(src_roicols * fx); - dst_roirows = (int)(src_roirows * fy); - src1x = rng.uniform(0, mat1.cols - src_roicols); - src1y = rng.uniform(0, mat1.rows - src_roirows); - dstx = rng.uniform(0, dst.cols - dst_roicols); - dsty = rng.uniform(0, dst.rows - dst_roirows); -#else - src_roicols = mat1.cols; - src_roirows = mat1.rows; - dst_roicols = dst.cols; - dst_roirows = dst.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; -#endif - dsize.width = dst_roicols; - dsize.height = dst_roirows; - mat1_roi = mat1(Rect(src1x, src1y, src_roicols, src_roirows)); - dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows)); - - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); - - dsize.width = (int)(mat1_roi.size().width * fx); - dsize.height = (int)(mat1_roi.size().height * fy); - - gmat1 = mat1_roi; - } - -}; - -OCL_TEST_P(Resize, Mat) -{ - for(int j = 0; j < LOOP_TIMES; j++) - { - random_roi(); - - // resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation); - // ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation); - if(dst_roicols < 1 || dst_roirows < 1) continue; - resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation); - ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation); - - Mat cpu_dst; - gdst_whole.download(cpu_dst); - EXPECT_MAT_NEAR(dst, cpu_dst, 1.0); - } - -} - - -///////////////////////////////////////////////////////////////////////////////////////////////// -//threshold - -PARAM_TEST_CASE(Threshold, MatType, ThreshOp) -{ - int type; - int threshOp; - - //src mat - Mat mat1; - Mat dst; - - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - //src mat with roi - Mat mat1_roi; - Mat dst_roi; - - //ocl dst mat for testing - ocl::oclMat gdst_whole; - - //ocl mat with roi - ocl::oclMat gmat1; - ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - threshOp = GET_PARAM(1); - - Size size(MWIDTH, MHEIGHT); - - mat1 = randomMat(size, type, 5, 16, false); - dst = randomMat(size, type, 5, 16, false); - } - - void random_roi() - { -#ifdef RANDOMROI - //randomize ROI - roicols = rng.uniform(1, mat1.cols); - roirows = rng.uniform(1, mat1.rows); - src1x = rng.uniform(0, mat1.cols - roicols); - src1y = rng.uniform(0, mat1.rows - roirows); - dstx = rng.uniform(0, dst.cols - roicols); - dsty = rng.uniform(0, dst.rows - roirows); -#else - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; -#endif - - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - - gmat1 = mat1_roi; - } - }; OCL_TEST_P(Threshold, Mat) { - for(int j = 0; j < LOOP_TIMES; j++) + for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); + double maxVal = randomDouble(20.0, 127.0); double thresh = randomDouble(0.0, maxVal); - threshold(mat1_roi, dst_roi, thresh, maxVal, threshOp); - ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp); + threshold(src_roi, dst_roi, thresh, maxVal, thresholdType); + ocl::threshold(gsrc_roi, gdst_roi, thresh, maxVal, thresholdType); - Mat cpu_dst; - gdst_whole.download(cpu_dst); - EXPECT_MAT_NEAR(dst, cpu_dst, 1); - } - -} - -PARAM_TEST_CASE(MeanShiftTestBase, MatType, MatType, int, int, TermCriteria) -{ - int type, typeCoor; - int sp, sr; - TermCriteria crit; - //src mat - Mat src; - Mat dst; - Mat dstCoor; - - //set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int dstx; - int dsty; - - //src mat with roi - Mat src_roi; - Mat dst_roi; - Mat dstCoor_roi; - - //ocl dst mat - ocl::oclMat gdst; - ocl::oclMat gdstCoor; - - //ocl mat with roi - ocl::oclMat gsrc_roi; - ocl::oclMat gdst_roi; - ocl::oclMat gdstCoor_roi; - - virtual void SetUp() - { - type = GET_PARAM(0); - typeCoor = GET_PARAM(1); - sp = GET_PARAM(2); - sr = GET_PARAM(3); - crit = GET_PARAM(4); - - // MWIDTH=256, MHEIGHT=256. defined in utility.hpp - Size size = Size(MWIDTH, MHEIGHT); - - src = randomMat(size, type, 5, 16, false); - dst = randomMat(size, type, 5, 16, false); - dstCoor = randomMat(size, typeCoor, 5, 16, false); - - } - - void random_roi() - { -#ifdef RANDOMROI - //randomize ROI - roicols = rng.uniform(1, src.cols); - roirows = rng.uniform(1, src.rows); - srcx = rng.uniform(0, src.cols - roicols); - srcy = rng.uniform(0, src.rows - roirows); - dstx = rng.uniform(0, dst.cols - roicols); - dsty = rng.uniform(0, dst.rows - roirows); -#else - roicols = src.cols; - roirows = src.rows; - srcx = 0; - srcy = 0; - dstx = 0; - dsty = 0; -#endif - src_roi = src(Rect(srcx, srcy, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - dstCoor_roi = dstCoor(Rect(dstx, dsty, roicols, roirows)); - - gdst = dst; - gdstCoor = dstCoor; - - gsrc_roi = src_roi; - gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi - gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows)); - } -}; - -/////////////////////////meanShiftFiltering///////////////////////////// - -typedef MeanShiftTestBase MeanShiftFiltering; - -OCL_TEST_P(MeanShiftFiltering, Mat) -{ - - for(int j = 0; j < LOOP_TIMES; j++) - { - random_roi(); - - Mat cpu_gdst; - gdst.download(cpu_gdst); - - ::meanShiftFiltering_(src_roi, dst_roi, sp, sr, crit); - ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit); - - gdst.download(cpu_gdst); - EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0); + Near(1); } } -///////////////////////////meanShiftProc////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////// +// calcHist -typedef MeanShiftTestBase MeanShiftProc; - -OCL_TEST_P(MeanShiftProc, Mat) +static void calcHistGold(const Mat &src, Mat &hist) { + hist = Mat(1, 256, CV_32SC1, Scalar::all(0)); - for(int j = 0; j < LOOP_TIMES; j++) - { - random_roi(); - - Mat cpu_gdst; - Mat cpu_gdstCoor; - - meanShiftProc_(src_roi, dst_roi, dstCoor_roi, sp, sr, crit); - ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit); - - gdst.download(cpu_gdst); - gdstCoor.download(cpu_gdstCoor); - EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0); - EXPECT_MAT_NEAR(dstCoor, cpu_gdstCoor, 0.0); - } -} - -/////////////////////////////////////////////////////////////////////////////////////// -//hist - -void calcHistGold(const Mat &src, Mat &hist) -{ - hist.create(1, 256, CV_32SC1); - hist.setTo(Scalar::all(0)); - - int *hist_row = hist.ptr(); + int * const hist_row = hist.ptr(); for (int y = 0; y < src.rows; ++y) { - const uchar *src_row = src.ptr(y); + const uchar * const src_row = src.ptr(y); for (int x = 0; x < src.cols; ++x) ++hist_row[src_row[x]]; } } -PARAM_TEST_CASE(HistTestBase, MatType, MatType) -{ - int type_src; - - //src mat - Mat src; - Mat dst_hist; - //set up roi - int roicols; - int roirows; - int srcx; - int srcy; - //src mat with roi - Mat src_roi; - //ocl dst mat, dst_hist and gdst_hist don't have roi - ocl::oclMat gdst_hist; - //ocl mat with roi - ocl::oclMat gsrc_roi; - - virtual void SetUp() - { - type_src = GET_PARAM(0); - - Size size = Size(MWIDTH, MHEIGHT); - - src = randomMat(size, type_src, 0, 256, false); - - } - - void random_roi() - { -#ifdef RANDOMROI - //randomize ROI - roicols = rng.uniform(1, src.cols); - roirows = rng.uniform(1, src.rows); - srcx = rng.uniform(0, src.cols - roicols); - srcy = rng.uniform(0, src.rows - roirows); -#else - roicols = src.cols; - roirows = src.rows; - srcx = 0; - srcy = 0; -#endif - src_roi = src(Rect(srcx, srcy, roicols, roirows)); - - gsrc_roi = src_roi; - } -}; - -///////////////////////////calcHist/////////////////////////////////////// - -typedef HistTestBase CalcHist; +typedef ImgprocTestBase CalcHist; OCL_TEST_P(CalcHist, Mat) { - for(int j = 0; j < LOOP_TIMES; j++) + for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); - Mat cpu_hist; + calcHistGold(src_roi, dst_roi); + ocl::calcHist(gsrc_roi, gdst_roi); - calcHistGold(src_roi, dst_hist); - ocl::calcHist(gsrc_roi, gdst_hist); - - gdst_hist.download(cpu_hist); - EXPECT_MAT_NEAR(dst_hist, cpu_hist, 0.0); + Near(); } } -/////////////////////////////////////////////////////////////////////////////////////////////////////// -// CLAHE +///////////////////////////////////////////////////////////////////////////////////////////////////////// +//// CLAHE -PARAM_TEST_CASE(CLAHE_Test, Size, double) +PARAM_TEST_CASE(CLAHETest, Size, double, bool) { Size gridSize; double clipLimit; + bool useRoi; - Mat src; - Mat dst_gold; - - ocl::oclMat g_src; - ocl::oclMat g_dst; + Mat src, dst_whole, src_roi, dst_roi; + ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi; virtual void SetUp() { gridSize = GET_PARAM(0); clipLimit = GET_PARAM(1); - - src = randomMat(Size(MWIDTH, MHEIGHT), CV_8UC1, 0, 256, false); - g_src.upload(src); + useRoi = GET_PARAM(2); } -}; -OCL_TEST_P(CLAHE_Test, Accuracy) -{ - Ptr clahe = ocl::createCLAHE(clipLimit, gridSize); - clahe->apply(g_src, g_dst); - Mat dst(g_dst); - - Ptr clahe_gold = createCLAHE(clipLimit, gridSize); - clahe_gold->apply(src, dst_gold); - - EXPECT_MAT_NEAR(dst_gold, dst, 1.0); -} - -///////////////////////////Convolve////////////////////////////////// - -PARAM_TEST_CASE(ConvolveTestBase, MatType, bool) -{ - int type; - //src mat - Mat mat1; - Mat mat2; - Mat dst; - Mat dst1; //bak, for two outputs - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int src2x; - int src2y; - int dstx; - int dsty; - //src mat with roi - Mat mat1_roi; - Mat mat2_roi; - Mat dst_roi; - Mat dst1_roi; //bak - //ocl dst mat for testing - ocl::oclMat gdst_whole; - ocl::oclMat gdst1_whole; //bak - //ocl mat with roi - ocl::oclMat gmat1; - ocl::oclMat gmat2; - ocl::oclMat gdst; - ocl::oclMat gdst1; //bak - virtual void SetUp() - { - type = GET_PARAM(0); - - Size size(MWIDTH, MHEIGHT); - - mat1 = randomMat(size, type, 5, 16, false); - mat2 = randomMat(size, type, 5, 16, false); - dst = randomMat(size, type, 5, 16, false); - dst1 = randomMat(size, type, 5, 16, false); - } void random_roi() { -#ifdef RANDOMROI - //randomize ROI - roicols = rng.uniform(1, mat1.cols); - roirows = rng.uniform(1, mat1.rows); - src1x = rng.uniform(0, mat1.cols - roicols); - src1y = rng.uniform(0, mat1.rows - roirows); - dstx = rng.uniform(0, dst.cols - roicols); - dsty = rng.uniform(0, dst.rows - roirows); -#else - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; -#endif - src2x = rng.uniform(0, mat2.cols - roicols); - src2y = rng.uniform(0, mat2.rows - roirows); - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows)); + Size roiSize = randomSize(std::max(gridSize.height, gridSize.width), MAX_VALUE); + Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, roiSize, srcBorder, CV_8UC1, 5, 256); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(dst_whole, dst_roi, roiSize, dstBorder, CV_8UC1, 5, 16); - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - //end + generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder); + generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder); } + void Near(double threshold = 0.0) + { + Mat whole, roi; + gdst_whole.download(whole); + gdst_roi.download(roi); + + EXPECT_MAT_NEAR(dst_whole, whole, threshold); + EXPECT_MAT_NEAR(dst_roi, roi, threshold); + } }; -typedef ConvolveTestBase Convolve; - -void conv2( Mat x, Mat y, Mat z) +OCL_TEST_P(CLAHETest, Accuracy) { - int N1 = x.rows; - int M1 = x.cols; - int N2 = y.rows; - int M2 = y.cols; + for (int i = 0; i < LOOP_TIMES; ++i) + { + random_roi(); - int i, j; - int m, n; + Ptr clahe = ocl::createCLAHE(clipLimit, gridSize); + clahe->apply(gsrc_roi, gdst_roi); + Ptr clahe_gold = createCLAHE(clipLimit, gridSize); + clahe_gold->apply(src_roi, dst_roi); - float *kerneldata = (float *)(x.data); - float *srcdata = (float *)(y.data); - float *dstdata = (float *)(z.data); + Near(1.0); + } +} - for(i = 0; i < N2; i++) - for(j = 0; j < M2; j++) +/////////////////////////////Convolve////////////////////////////////// + +static void convolve_gold(const Mat & src, const Mat & kernel, Mat & dst) +{ + for (int i = 0; i < src.rows; i++) + { + float * const dstptr = dst.ptr(i); + + for (int j = 0; j < src.cols; j++) { float temp = 0; - for(m = 0; m < N1; m++) - for(n = 0; n < M1; n++) + + for (int m = 0; m < kernel.rows; m++) + { + const float * const kptr = kernel.ptr(m); + for (int n = 0; n < kernel.cols; n++) { - int r, c; - r = min(max((i - N1 / 2 + m), 0), N2 - 1); - c = min(max((j - M1 / 2 + n), 0), M2 - 1); - temp += kerneldata[m * (x.step >> 2) + n] * srcdata[r * (y.step >> 2) + c]; + int r = clipInt(i - kernel.rows / 2 + m, 0, src.rows - 1); + int c = clipInt(j - kernel.cols / 2 + n, 0, src.cols - 1); + + temp += src.ptr(r)[c] * kptr[n]; } - dstdata[i * (z.step >> 2) + j] = temp; + } + + dstptr[j] = temp; } + } } +typedef ImgprocTestBase Convolve; + OCL_TEST_P(Convolve, Mat) { - if(mat1.type() != CV_32FC1) - { - cout << "\tUnsupported type\t\n"; - } - for(int j = 0; j < LOOP_TIMES; j++) + Mat kernel, kernel_roi; + ocl::oclMat gkernel, gkernel_roi; + const Size roiSize(7, 7); + + for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); - ocl::oclMat temp1; - Mat kernel_cpu = mat2(Rect(0, 0, 7, 7)); - temp1 = kernel_cpu; - conv2(kernel_cpu, mat1_roi, dst_roi); - ocl::convolve(gmat1, temp1, gdst); + Border kernelBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(kernel, kernel_roi, roiSize, kernelBorder, type, 5, 16); + generateOclMat(gkernel, gkernel_roi, kernel, roiSize, kernelBorder); - Mat cpu_dst; - gdst_whole.download(cpu_dst); - EXPECT_MAT_NEAR(dst, cpu_dst, .1); + convolve_gold(src_roi, kernel_roi, dst_roi); + ocl::convolve(gsrc_roi, gkernel_roi, gdst_roi); + Near(1); } } -//////////////////////////////// ColumnSum ////////////////////////////////////// +////////////////////////////////// ColumnSum ////////////////////////////////////// -PARAM_TEST_CASE(ColumnSum, Size) +static void columnSum_gold(const Mat & src, Mat & dst) { - Size size; - Mat src; + float * prevdptr = dst.ptr(0); + const float * sptr = src.ptr(0); - virtual void SetUp() + for (int x = 0; x < src.cols; ++x) + prevdptr[x] = sptr[x]; + + for (int y = 1; y < src.rows; ++y) { - size = GET_PARAM(0); + sptr = src.ptr(y); + float * const dptr = dst.ptr(y); + + for (int x = 0; x < src.cols; ++x) + dptr[x] = prevdptr[x] + sptr[x]; + + prevdptr = dptr; } -}; +} + +typedef ImgprocTestBase ColumnSum; OCL_TEST_P(ColumnSum, Accuracy) { - Mat src = randomMat(size, CV_32FC1, 0, 255); - ocl::oclMat d_dst; - ocl::oclMat d_src(src); - - ocl::columnSum(d_src, d_dst); - - Mat dst(d_dst); - - for (int j = 0; j < src.cols; ++j) + for (int i = 0; i < LOOP_TIMES; ++i) { - float gold = src.at(0, j); - float res = dst.at(0, j); - ASSERT_NEAR(res, gold, 1e-5); - } + random_roi(); - for (int i = 1; i < src.rows; ++i) - { - for (int j = 0; j < src.cols; ++j) - { - float gold = src.at(i, j) += src.at(i - 1, j); - float res = dst.at(i, j); - ASSERT_NEAR(res, gold, 1e-5); - } + columnSum_gold(src_roi, dst_roi); + ocl::columnSum(gsrc_roi, gdst_roi); + + Near(1e-5); } } ///////////////////////////////////////////////////////////////////////////////////// -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, EqualizeHist, Combine( - ONE_TYPE(CV_8UC1), - NULL_TYPE, - ONE_TYPE(CV_8UC1), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter +INSTANTIATE_TEST_CASE_P(Imgproc, EqualizeHist, Combine( + Values((MatType)CV_8UC1), + Values(0), // not used + Values(0), // not used + Bool())); - -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine( - Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), - NULL_TYPE, - Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CornerMinEigenVal, Combine( +INSTANTIATE_TEST_CASE_P(Imgproc, CornerMinEigenVal, Combine( Values(CV_8UC1, CV_32FC1), - NULL_TYPE, - ONE_TYPE(CV_32FC1), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter + Values(3), // TODO some fails when blockSize != 3 (for example 5) + Values((int)BORDER_REFLECT, (int)BORDER_CONSTANT, (int)BORDER_REPLICATE), // TODO does not work with (int)BORDER_REFLECT101 + Bool())); -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CornerHarris, Combine( - Values(CV_8UC1, CV_32FC1), - NULL_TYPE, - ONE_TYPE(CV_32FC1), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter - - -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, Integral, Combine( - ONE_TYPE(CV_8UC1), - NULL_TYPE, - ONE_TYPE(CV_32SC1), - ONE_TYPE(CV_32FC1), - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Imgproc, WarpAffine, Combine( - Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), - Values((MatType)INTER_NEAREST, (MatType)INTER_LINEAR, - (MatType)INTER_CUBIC, (MatType)(INTER_NEAREST | WARP_INVERSE_MAP), - (MatType)(INTER_LINEAR | WARP_INVERSE_MAP), (MatType)(INTER_CUBIC | WARP_INVERSE_MAP)))); - - -INSTANTIATE_TEST_CASE_P(Imgproc, WarpPerspective, Combine - (Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), - Values((MatType)INTER_NEAREST, (MatType)INTER_LINEAR, - (MatType)INTER_CUBIC, (MatType)(INTER_NEAREST | WARP_INVERSE_MAP), - (MatType)(INTER_LINEAR | WARP_INVERSE_MAP), (MatType)(INTER_CUBIC | WARP_INVERSE_MAP)))); - - -INSTANTIATE_TEST_CASE_P(Imgproc, Resize, Combine( - Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(Size()), - Values(0.5, 1.5, 2), Values(0.5, 1.5, 2), Values((MatType)INTER_NEAREST, (MatType)INTER_LINEAR))); +INSTANTIATE_TEST_CASE_P(Imgproc, CornerHarris, Combine( + Values((MatType)CV_8UC1), // TODO does not work properly with CV_32FC1 + Values(3, 5), + Values((int)BORDER_REFLECT101, (int)BORDER_REFLECT, (int)BORDER_CONSTANT, (int)BORDER_REPLICATE), + Bool())); +INSTANTIATE_TEST_CASE_P(Imgproc, Integral, Combine( + Values((MatType)CV_8UC1), // TODO does work with CV_32F, CV_64F + Values(0), // not used + Values(0), // not used + Bool())); INSTANTIATE_TEST_CASE_P(Imgproc, Threshold, Combine( - Values(CV_8UC1, CV_32FC1), Values(ThreshOp(THRESH_BINARY), - ThreshOp(THRESH_BINARY_INV), ThreshOp(THRESH_TRUNC), - ThreshOp(THRESH_TOZERO), ThreshOp(THRESH_TOZERO_INV)))); + Values(CV_8UC1, CV_32FC1), + Values(0), + Values(ThreshOp(THRESH_BINARY), + ThreshOp(THRESH_BINARY_INV), ThreshOp(THRESH_TRUNC), + ThreshOp(THRESH_TOZERO), ThreshOp(THRESH_TOZERO_INV)), + Bool())); +INSTANTIATE_TEST_CASE_P(Imgproc, CalcHist, Combine( + Values((MatType)CV_8UC1), + Values(0), // not used + Values(0), // not used + Bool())); -INSTANTIATE_TEST_CASE_P(Imgproc, MeanShiftFiltering, Combine( - ONE_TYPE(CV_8UC4), - ONE_TYPE(CV_16SC2), - Values(5), - Values(6), - Values(TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 5, 1)) - )); +INSTANTIATE_TEST_CASE_P(Imgproc, CLAHETest, Combine( + Values(Size(4, 4), Size(32, 8), Size(8, 64)), + Values(0.0, 10.0, 62.0, 300.0), + Bool())); +INSTANTIATE_TEST_CASE_P(Imgproc, Convolve, Combine( + Values((MatType)CV_32FC1), + Values(0), // not used + Values(0), // not used + Bool())); -INSTANTIATE_TEST_CASE_P(Imgproc, MeanShiftProc, Combine( - ONE_TYPE(CV_8UC4), - ONE_TYPE(CV_16SC2), - Values(5), - Values(6), - Values(TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 5, 1)) - )); +INSTANTIATE_TEST_CASE_P(Imgproc, ColumnSum, Combine( + Values(MatType(CV_32FC1)), + Values(0), // not used + Values(0), // not used + Bool())); -INSTANTIATE_TEST_CASE_P(Imgproc, Remap, Combine( - Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), - Values(CV_32FC1, CV_16SC2, CV_32FC2), Values(-1, CV_32FC1), - Values((int)INTER_NEAREST, (int)INTER_LINEAR), - Values((int)BORDER_CONSTANT))); - - -INSTANTIATE_TEST_CASE_P(histTestBase, CalcHist, Combine( - ONE_TYPE(CV_8UC1), - ONE_TYPE(CV_32SC1) //no use - )); - -INSTANTIATE_TEST_CASE_P(Imgproc, CLAHE_Test, Combine( - Values(Size(4, 4), Size(32, 8), Size(8, 64)), - Values(0.0, 10.0, 62.0, 300.0))); - -INSTANTIATE_TEST_CASE_P(Imgproc, ColumnSum, DIFFERENT_SIZES); +INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine( + testing::Range((MatDepth)CV_8U, (MatDepth)CV_USRTYPE1), + testing::Values((Channels)1, (Channels)4), + Bool(), // border isolated or not + Values((Border)BORDER_CONSTANT, + (Border)BORDER_REPLICATE, + (Border)BORDER_REFLECT, + (Border)BORDER_WRAP, + (Border)BORDER_REFLECT_101), + Bool())); #endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_mean_shift.cpp b/modules/ocl/test/test_mean_shift.cpp new file mode 100644 index 000000000..684a2a937 --- /dev/null +++ b/modules/ocl/test/test_mean_shift.cpp @@ -0,0 +1,408 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Niko Li, newlife20080214@gmail.com +// Jia Haipeng, jiahaipeng95@gmail.com +// Shengen Yan, yanshengen@gmail.com +// Jiang Liyuan, lyuan001.good@163.com +// Rock Li, Rock.Li@amd.com +// Wu Zailong, bullet@yeah.net +// Xu Pang, pangxu010@163.com +// Sen Liu, swjtuls1987@126.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" + +#ifdef HAVE_OPENCL + +using namespace testing; +using namespace std; +using namespace cv; + +typedef struct +{ + short x; + short y; +} COOR; + +COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, Size size, int sp, int sr, int maxIter, float eps, int *tab) +{ + + int isr2 = sr * sr; + int c0, c1, c2, c3; + int iter; + uchar *ptr = NULL; + uchar *pstart = NULL; + int revx = 0, revy = 0; + c0 = sptr[0]; + c1 = sptr[1]; + c2 = sptr[2]; + c3 = sptr[3]; + // iterate meanshift procedure + for(iter = 0; iter < maxIter; iter++ ) + { + int count = 0; + int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0; + + //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp) + int minx = x0 - sp; + int miny = y0 - sp; + int maxx = x0 + sp; + int maxy = y0 + sp; + + //deal with the image boundary + if(minx < 0) minx = 0; + if(miny < 0) miny = 0; + if(maxx >= size.width) maxx = size.width - 1; + if(maxy >= size.height) maxy = size.height - 1; + if(iter == 0) + { + pstart = sptr; + } + else + { + pstart = pstart + revy * sstep + (revx << 2); //point to the new position + } + ptr = pstart; + ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row + + for( int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2)) + { + int rowCount = 0; + int x = minx; +#if CV_ENABLE_UNROLLED + for( ; x + 4 <= maxx; x += 4, ptr += 16) + { + int t0, t1, t2; + t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x; + rowCount++; + } + t0 = ptr[4], t1 = ptr[5], t2 = ptr[6]; + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x + 1; + rowCount++; + } + t0 = ptr[8], t1 = ptr[9], t2 = ptr[10]; + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x + 2; + rowCount++; + } + t0 = ptr[12], t1 = ptr[13], t2 = ptr[14]; + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x + 3; + rowCount++; + } + } +#endif + for(; x <= maxx; x++, ptr += 4) + { + int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x; + rowCount++; + } + } + if(rowCount == 0) + continue; + count += rowCount; + sy += y * rowCount; + } + + if( count == 0 ) + break; + + int x1 = sx / count; + int y1 = sy / count; + s0 = s0 / count; + s1 = s1 / count; + s2 = s2 / count; + + bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) + + tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps); + + //revise the pointer corresponding to the new (y0,x0) + revx = x1 - x0; + revy = y1 - y0; + + x0 = x1; + y0 = y1; + c0 = s0; + c1 = s1; + c2 = s2; + + if( stopFlag ) + break; + } //for iter + + dptr[0] = (uchar)c0; + dptr[1] = (uchar)c1; + dptr[2] = (uchar)c2; + dptr[3] = (uchar)c3; + + COOR coor; + coor.x = (short)x0; + coor.y = (short)y0; + return coor; +} + +void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, TermCriteria crit) +{ + if( src_roi.empty() ) + CV_Error( CV_StsBadArg, "The input image is empty" ); + + if( src_roi.depth() != CV_8U || src_roi.channels() != 4 ) + CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" ); + + CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) ); + CV_Assert( !(dst_roi.step & 0x3) ); + + if( !(crit.type & TermCriteria::MAX_ITER) ) + crit.maxCount = 5; + int maxIter = std::min(std::max(crit.maxCount, 1), 100); + float eps; + if( !(crit.type & TermCriteria::EPS) ) + eps = 1.f; + eps = (float)std::max(crit.epsilon, 0.0); + + int tab[512]; + for(int i = 0; i < 512; i++) + tab[i] = (i - 255) * (i - 255); + uchar *sptr = src_roi.data; + uchar *dptr = dst_roi.data; + int sstep = (int)src_roi.step; + int dstep = (int)dst_roi.step; + Size size = src_roi.size(); + + for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2), + dptr += dstep - (size.width << 2)) + { + for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4) + { + do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab); + } + } +} + +void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, TermCriteria crit) +{ + if( src_roi.empty() ) + CV_Error( CV_StsBadArg, "The input image is empty" ); + if( src_roi.depth() != CV_8U || src_roi.channels() != 4 ) + CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" ); + CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) && + (src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows)); + CV_Assert( !(dstCoor_roi.step & 0x3) ); + + if( !(crit.type & TermCriteria::MAX_ITER) ) + crit.maxCount = 5; + int maxIter = std::min(std::max(crit.maxCount, 1), 100); + float eps; + if( !(crit.type & TermCriteria::EPS) ) + eps = 1.f; + eps = (float)std::max(crit.epsilon, 0.0); + + int tab[512]; + for(int i = 0; i < 512; i++) + tab[i] = (i - 255) * (i - 255); + uchar *sptr = src_roi.data; + uchar *dptr = dst_roi.data; + short *dCoorptr = (short *)dstCoor_roi.data; + int sstep = (int)src_roi.step; + int dstep = (int)dst_roi.step; + int dCoorstep = (int)dstCoor_roi.step >> 1; + Size size = src_roi.size(); + + for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2), + dptr += dstep - (size.width << 2), dCoorptr += dCoorstep - (size.width << 1)) + { + for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4, dCoorptr += 2) + { + *((COOR *)dCoorptr) = do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab); + } + } + +} + +//////////////////////////////// meanShift ////////////////////////////////////////// + +PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, TermCriteria, bool) +{ + int type, typeCoor; + int sp, sr; + TermCriteria crit; + bool useRoi; + + // src mat + Mat src, src_roi; + Mat dst, dst_roi; + Mat dstCoor, dstCoor_roi; + + // ocl dst mat + ocl::oclMat gsrc, gsrc_roi; + ocl::oclMat gdst, gdst_roi; + ocl::oclMat gdstCoor, gdstCoor_roi; + + virtual void SetUp() + { + type = GET_PARAM(0); + typeCoor = GET_PARAM(1); + sp = GET_PARAM(2); + sr = GET_PARAM(3); + crit = GET_PARAM(4); + useRoi = GET_PARAM(5); + } + + void random_roi() + { + Size roiSize = randomSize(1, MAX_VALUE); + Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256); + generateOclMat(gsrc, gsrc_roi, src, roiSize, srcBorder); + + Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 5, 256); + generateOclMat(gdst, gdst_roi, dst, roiSize, dstBorder); + + randomSubMat(dstCoor, dstCoor_roi, roiSize, dstBorder, typeCoor, 5, 256); + generateOclMat(gdstCoor, gdstCoor_roi, dstCoor, roiSize, dstBorder); + } + + void Near(double threshold = 0.0) + { + Mat whole, roi; + gdst.download(whole); + gdst_roi.download(roi); + + EXPECT_MAT_NEAR(dst, whole, threshold); + EXPECT_MAT_NEAR(dst_roi, roi, threshold); + } + + void Near1(double threshold = 0.0) + { + Mat whole, roi; + gdstCoor.download(whole); + gdstCoor_roi.download(roi); + + EXPECT_MAT_NEAR(dstCoor, whole, threshold); + EXPECT_MAT_NEAR(dstCoor_roi, roi, threshold); + } +}; + +/////////////////////////meanShiftFiltering///////////////////////////// + +typedef meanShiftTestBase meanShiftFiltering; + +OCL_TEST_P(meanShiftFiltering, Mat) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + meanShiftFiltering_(src_roi, dst_roi, sp, sr, crit); + ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit); + + Near(); + } +} + +///////////////////////////meanShiftProc////////////////////////////////// + +typedef meanShiftTestBase meanShiftProc; + +OCL_TEST_P(meanShiftProc, Mat) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + meanShiftProc_(src_roi, dst_roi, dstCoor_roi, sp, sr, crit); + ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit); + + Near(); + Near1(); + } +} + +///////////////////////////////////////////////////////////////////////////////////// + +INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftFiltering, Combine( + Values((MatType)CV_8UC4), + Values((MatType)CV_16SC2), + Values(5), + Values(6), + Values(TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 5, 1)), + Bool() + )); + +INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftProc, Combine( + Values((MatType)CV_8UC4), + Values((MatType)CV_16SC2), + Values(5), + Values(6), + Values(TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 5, 1)), + Bool() + )); + +#endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_warp.cpp b/modules/ocl/test/test_warp.cpp new file mode 100644 index 000000000..717bbc7a2 --- /dev/null +++ b/modules/ocl/test/test_warp.cpp @@ -0,0 +1,371 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Niko Li, newlife20080214@gmail.com +// Jia Haipeng, jiahaipeng95@gmail.com +// Shengen Yan, yanshengen@gmail.com +// Jiang Liyuan, lyuan001.good@163.com +// Rock Li, Rock.Li@amd.com +// Wu Zailong, bullet@yeah.net +// Xu Pang, pangxu010@163.com +// Sen Liu, swjtuls1987@126.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" + +#ifdef HAVE_OPENCL + +using namespace cv; +using namespace testing; +using namespace std; + +static MatType noType = -1; + +///////////////////////////////////////////////////////////////////////////////////////////////// +// warpAffine & warpPerspective + +PARAM_TEST_CASE(WarpTestBase, MatType, Interpolation, bool, bool) +{ + int type, interpolation; + Size dsize; + bool useRoi, mapInverse; + + Mat src, dst_whole, src_roi, dst_roi; + ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi; + + virtual void SetUp() + { + type = GET_PARAM(0); + interpolation = GET_PARAM(1); + mapInverse = GET_PARAM(2); + useRoi = GET_PARAM(3); + + if (mapInverse) + interpolation |= WARP_INVERSE_MAP; + } + + void random_roi() + { + Size roiSize = randomSize(1, MAX_VALUE); + Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE); + + Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(dst_whole, dst_roi, roiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE); + + generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder); + generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder); + + dsize = randomSize(1, MAX_VALUE); + } + + void Near(double threshold = 0.0) + { + Mat whole, roi; + gdst_whole.download(whole); + gdst_roi.download(roi); + + EXPECT_MAT_NEAR(dst_whole, whole, threshold); + EXPECT_MAT_NEAR(dst_roi, roi, threshold); + } +}; + +/////warpAffine + +typedef WarpTestBase WarpAffine; + +OCL_TEST_P(WarpAffine, Mat) +{ + static const double coeffs[2][3] = + { + { cos(CV_PI / 6), -sin(CV_PI / 6), 100.0 }, + { sin(CV_PI / 6), cos(CV_PI / 6), -100.0 } + }; + + static Mat M(2, 3, CV_64FC1, (void *)coeffs); + + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + warpAffine(src_roi, dst_roi, M, dsize, interpolation); + ocl::warpAffine(gsrc_roi, gdst_roi, M, dsize, interpolation); + + Near(1.0); + } +} + +// warpPerspective + +typedef WarpTestBase WarpPerspective; + +OCL_TEST_P(WarpPerspective, Mat) +{ + static const double coeffs[3][3] = + { + { cos(CV_PI / 6), -sin(CV_PI / 6), 100.0 }, + { sin(CV_PI / 6), cos(CV_PI / 6), -100.0 }, + { 0.0, 0.0, 1.0 } + }; + + static Mat M(3, 3, CV_64FC1, (void *)coeffs); + + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + warpPerspective(src_roi, dst_roi, M, dsize, interpolation); + ocl::warpPerspective(gsrc_roi, gdst_roi, M, dsize, interpolation); + + Near(1.0); + } +} + +///////////////////////////////////////////////////////////////////////////////////////////////// +// remap + +PARAM_TEST_CASE(Remap, MatDepth, Channels, pair, Border, bool) +{ + int srcType, map1Type, map2Type; + int borderType; + bool useRoi; + + Scalar val; + + Mat src, src_roi; + Mat dst, dst_roi; + Mat map1, map1_roi; + Mat map2, map2_roi; + + // ocl mat with roi + ocl::oclMat gsrc, gsrc_roi; + ocl::oclMat gdst, gdst_roi; + ocl::oclMat gmap1, gmap1_roi; + ocl::oclMat gmap2, gmap2_roi; + + virtual void SetUp() + { + srcType = CV_MAKE_TYPE(GET_PARAM(0), GET_PARAM(1)); + map1Type = GET_PARAM(2).first; + map2Type = GET_PARAM(2).second; + borderType = GET_PARAM(3); + useRoi = GET_PARAM(4); + } + + void random_roi() + { + val = randomScalar(-MAX_VALUE, MAX_VALUE); + Size srcROISize = randomSize(1, MAX_VALUE); + Size dstROISize = randomSize(1, MAX_VALUE); + + Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, srcROISize, srcBorder, srcType, 5, 256); + + Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(dst, dst_roi, dstROISize, dstBorder, srcType, -MAX_VALUE, MAX_VALUE); + + int mapMaxValue = MAX_VALUE << 2; + Border map1Border = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(map1, map1_roi, dstROISize, map1Border, map1Type, -mapMaxValue, mapMaxValue); + + Border map2Border = randomBorder(0, useRoi ? MAX_VALUE : 0); + if (map2Type != noType) + randomSubMat(map2, map2_roi, dstROISize, map2Border, map2Type, -mapMaxValue, mapMaxValue); + + generateOclMat(gsrc, gsrc_roi, src, srcROISize, srcBorder); + generateOclMat(gdst, gdst_roi, dst, dstROISize, dstBorder); + generateOclMat(gmap1, gmap1_roi, map1, dstROISize, map1Border); + if (noType != map2Type) + generateOclMat(gmap2, gmap2_roi, map2, dstROISize, map2Border); + } + + void Near(double threshold = 0.0) + { + Mat whole, roi; + gdst.download(whole); + gdst_roi.download(roi); + + EXPECT_MAT_NEAR(dst, whole, threshold); + EXPECT_MAT_NEAR(dst_roi, roi, threshold); + } +}; + +typedef Remap Remap_INTER_NEAREST; + +OCL_TEST_P(Remap_INTER_NEAREST, Mat) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + remap(src_roi, dst_roi, map1_roi, map2_roi, INTER_NEAREST, borderType, val); + ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, INTER_NEAREST, borderType, val); + + Near(1.0); + } +} + +typedef Remap Remap_INTER_LINEAR; + +OCL_TEST_P(Remap_INTER_LINEAR, Mat) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + cv::remap(src_roi, dst_roi, map1_roi, map2_roi, INTER_LINEAR, borderType, val); + ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, INTER_LINEAR, borderType, val); + + Near(2.0); + } +} + +///////////////////////////////////////////////////////////////////////////////////////////////// +// resize + +PARAM_TEST_CASE(Resize, MatType, double, double, Interpolation, bool) +{ + int type, interpolation; + double fx, fy; + bool useRoi; + + Mat src, dst_whole, src_roi, dst_roi; + ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi; + + virtual void SetUp() + { + type = GET_PARAM(0); + fx = GET_PARAM(1); + fy = GET_PARAM(2); + interpolation = GET_PARAM(3); + useRoi = GET_PARAM(4); + } + + void random_roi() + { + Size srcRoiSize = randomSize(1, MAX_VALUE); + Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, srcRoiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE); + + Size dstRoiSize; + dstRoiSize.width = cvRound(srcRoiSize.width * fx); + dstRoiSize.height = cvRound(srcRoiSize.height * fy); + + Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(dst_whole, dst_roi, dstRoiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE); + + generateOclMat(gsrc_whole, gsrc_roi, src, srcRoiSize, srcBorder); + generateOclMat(gdst_whole, gdst_roi, dst_whole, dstRoiSize, dstBorder); + } + + void Near(double threshold = 0.0) + { + Mat whole, roi; + gdst_whole.download(whole); + gdst_roi.download(roi); + + EXPECT_MAT_NEAR(dst_whole, whole, threshold); + EXPECT_MAT_NEAR(dst_roi, roi, threshold); + } +}; + +OCL_TEST_P(Resize, Mat) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + resize(src_roi, dst_roi, Size(), fx, fy, interpolation); + ocl::resize(gsrc_roi, gdst_roi, Size(), fx, fy, interpolation); + + Near(1.0); + } +} + +///////////////////////////////////////////////////////////////////////////////////// + +INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpAffine, Combine( + Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), + Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR, (Interpolation)INTER_CUBIC), + Bool(), + Bool())); + +INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpPerspective, Combine( + Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), + Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR, (Interpolation)INTER_CUBIC), + Bool(), + Bool())); + +INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_LINEAR, Combine( + Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F), + Values(1, 2, 3, 4), + Values(pair((MatType)CV_32FC1, (MatType)CV_32FC1), + pair((MatType)CV_32FC2, noType)), + Values((Border)BORDER_CONSTANT, + (Border)BORDER_REPLICATE, + (Border)BORDER_WRAP, + (Border)BORDER_REFLECT, + (Border)BORDER_REFLECT_101), + Bool())); + +INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_NEAREST, Combine( + Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F), + Values(1, 2, 3, 4), + Values(pair((MatType)CV_32FC1, (MatType)CV_32FC1), + pair((MatType)CV_32FC2, noType), + pair((MatType)CV_16SC2, noType)), + Values((Border)BORDER_CONSTANT, + (Border)BORDER_REPLICATE, + (Border)BORDER_WRAP, + (Border)BORDER_REFLECT, + (Border)BORDER_REFLECT_101), + Bool())); + +INSTANTIATE_TEST_CASE_P(ImgprocWarp, Resize, Combine( + Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), + Values(0.5, 1.5, 2.0), + Values(0.5, 1.5, 2.0), + Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR), + Bool())); + +#endif // HAVE_OPENCL