From 474fc887a68765337ec4b6f34e67edc7911b48d2 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 2 Dec 2013 23:45:16 +0400 Subject: [PATCH] added cv::copyMakeBorder to T-API --- modules/core/src/copy.cpp | 64 +++++++++- modules/core/src/ocl.cpp | 2 +- modules/core/src/opencl/copymakeborder.cl | 132 +++++++++++++++++++++ modules/core/src/umatrix.cpp | 1 + modules/imgproc/src/opencl/threshold.cl | 136 ++++++++++++++++++++++ modules/imgproc/test/ocl/test_imgproc.cpp | 2 +- 6 files changed, 334 insertions(+), 3 deletions(-) create mode 100644 modules/core/src/opencl/copymakeborder.cl create mode 100644 modules/imgproc/src/opencl/threshold.cl diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index ee34ef451..fa6fa69f1 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -47,6 +47,7 @@ // */ #include "precomp.hpp" +#include "opencl_kernels.hpp" namespace cv { @@ -701,12 +702,73 @@ void copyMakeConstBorder_8u( const uchar* src, size_t srcstep, cv::Size srcroi, } +namespace cv { + +static bool ocl_copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom, + int left, int right, int borderType, const Scalar& value ) +{ + int type = _src.type(), cn = CV_MAT_CN(type); + bool isolated = (borderType & BORDER_ISOLATED) != 0; + borderType &= ~cv::BORDER_ISOLATED; + + if ( !(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE || borderType == BORDER_REFLECT || + borderType == BORDER_WRAP || borderType == BORDER_REFLECT_101) || + cn == 3 || cn > 4) + return false; + + const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101" }; + ocl::Kernel k("copyMakeBorder", ocl::core::copymakeborder_oclsrc, + format("-D T=%s -D %s", ocl::memopTypeToStr(type), borderMap[borderType])); + if (k.empty()) + return false; + + UMat src = _src.getUMat(); + if( src.isSubmatrix() && !isolated ) + { + Size wholeSize; + Point ofs; + src.locateROI(wholeSize, ofs); + int dtop = std::min(ofs.y, top); + int dbottom = std::min(wholeSize.height - src.rows - ofs.y, bottom); + int dleft = std::min(ofs.x, left); + int dright = std::min(wholeSize.width - src.cols - ofs.x, right); + src.adjustROI(dtop, dbottom, dleft, dright); + top -= dtop; + left -= dleft; + bottom -= dbottom; + right -= dright; + } + + _dst.create(src.rows + top + bottom, src.cols + left + right, type); + UMat dst = _dst.getUMat(); + + if (top == 0 && left == 0 && bottom == 0 && right == 0) + { + if(src.u != dst.u || src.step != dst.step) + src.copyTo(dst); + return true; + } + + k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), + top, left, ocl::KernelArg::Constant(Mat(1, 1, type, value))); + + size_t globalsize[2] = { dst.cols, dst.rows }; + return k.run(2, globalsize, NULL, false); +} + +} + void cv::copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom, int left, int right, int borderType, const Scalar& value ) { - Mat src = _src.getMat(); CV_Assert( top >= 0 && bottom >= 0 && left >= 0 && right >= 0 ); + if (ocl::useOpenCL() && _dst.isUMat() && _src.dims() <= 2 && + ocl_copyMakeBorder(_src, _dst, top, bottom, left, right, borderType, value)) + return; + + Mat src = _src.getMat(); + if( src.isSubmatrix() && (borderType & BORDER_ISOLATED) == 0 ) { Size wholeSize; diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index f733dd11f..8b54876f8 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -1893,7 +1893,7 @@ Context2& Context2::getDefault() // First, try to retrieve existing context of the same type. // In its turn, Platform::getContext() may call Context2::create() // if there is no such context. - ctx.create(Device::TYPE_ACCELERATOR); + ctx.create(Device::TYPE_CPU); if(!ctx.p) ctx.create(Device::TYPE_DGPU); if(!ctx.p) diff --git a/modules/core/src/opencl/copymakeborder.cl b/modules/core/src/opencl/copymakeborder.cl new file mode 100644 index 000000000..bb264421f --- /dev/null +++ b/modules/core/src/opencl/copymakeborder.cl @@ -0,0 +1,132 @@ +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Niko Li, newlife20080214@gmail.com +// Zero Lin zero.lin@amd.com +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +// + +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif +#endif + +#ifdef BORDER_CONSTANT +#define EXTRAPOLATE(x, y, v) v = scalar; +#elif defined BORDER_REPLICATE +#define EXTRAPOLATE(x, y, v) \ + { \ + x = max(min(x, src_cols - 1), 0); \ + y = max(min(y, src_rows - 1), 0); \ + v = *(__global const T *)(srcptr + mad24(y, src_step, x * (int)sizeof(T) + src_offset)); \ + } +#elif defined BORDER_WRAP +#define EXTRAPOLATE(x, y, v) \ + { \ + if (x < 0) \ + x -= ((x - src_cols + 1) / src_cols) * src_cols; \ + if (x >= src_cols) \ + x %= src_cols; \ + \ + if (y < 0) \ + y -= ((y - src_rows + 1) / src_rows) * src_rows; \ + if( y >= src_rows ) \ + y %= src_rows; \ + v = *(__global const T *)(srcptr + mad24(y, src_step, x * (int)sizeof(T) + src_offset)); \ + } +#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101) +#ifdef BORDER_REFLECT +#define DELTA int delta = 0 +#else +#define DELTA int delta = 1 +#endif +#define EXTRAPOLATE(x, y, v) \ + { \ + DELTA; \ + if (src_cols == 1) \ + x = 0; \ + else \ + do \ + { \ + if( x < 0 ) \ + x = -x - 1 + delta; \ + else \ + x = src_cols - 1 - (x - src_cols) - delta; \ + } \ + while (x >= src_cols || x < 0); \ + \ + if (src_rows == 1) \ + y = 0; \ + else \ + do \ + { \ + if( y < 0 ) \ + y = -y - 1 + delta; \ + else \ + y = src_rows - 1 - (y - src_rows) - delta; \ + } \ + while (y >= src_rows || y < 0); \ + v = *(__global const T *)(srcptr + mad24(y, src_step, x * (int)sizeof(T) + src_offset)); \ + } +#else +#error No extrapolation method +#endif + +#define NEED_EXTRAPOLATION(gx, gy) (gx >= src_cols || gy >= src_rows || gx < 0 || gy < 0) + +__kernel void copyMakeBorder(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, + __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, + int top, int left, T scalar) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < dst_cols && y < dst_rows) + { + int src_x = x - left; + int src_y = y - top; + + int dst_index = mad24(y, dst_step, x * (int)sizeof(T) + dst_offset); + __global T * dst = (__global T *)(dstptr + dst_index); + + if (NEED_EXTRAPOLATION(src_x, src_y)) + EXTRAPOLATE(src_x, src_y, dst[0]) + else + { + int src_index = mad24(src_y, src_step, src_x * (int)sizeof(T) + src_offset); + __global const T * src = (__global const T *)(srcptr + src_index); + dst[0] = src[0]; + } + } +} diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index c35d4ccfa..e6d9c9e38 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -580,6 +580,7 @@ Mat UMat::getMat(int accessFlags) const Mat hdr(dims, size.p, type(), u->data + offset, step.p); hdr.flags = flags; hdr.u = u; + hdr.flags = flags; hdr.datastart = u->data; hdr.data = hdr.datastart + offset; hdr.datalimit = hdr.dataend = u->data + u->size; diff --git a/modules/imgproc/src/opencl/threshold.cl b/modules/imgproc/src/opencl/threshold.cl new file mode 100644 index 000000000..63e410297 --- /dev/null +++ b/modules/imgproc/src/opencl/threshold.cl @@ -0,0 +1,136 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Zhang Ying, zhangying913@gmail.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif +#endif + +#ifdef VECTORIZED + +__kernel void threshold(__global const T * restrict src, int src_offset, int src_step, + __global T * dst, int dst_offset, int dst_step, + T thresh, T max_val, int max_index, int rows, int cols) +{ + int gx = get_global_id(0); + int gy = get_global_id(1); + + if (gx < cols && gy < rows) + { + gx *= VECSIZE; + int src_index = mad24(gy, src_step, src_offset + gx); + int dst_index = mad24(gy, dst_step, dst_offset + gx); + +#ifdef SRC_ALIGNED + VT sdata = *((__global VT *)(src + src_index)); +#else + VT sdata = VLOADN(0, src + src_index); +#endif + VT vthresh = (VT)(thresh); + +#ifdef THRESH_BINARY + VT vecValue = sdata > vthresh ? max_val : (VT)(0); +#elif defined THRESH_BINARY_INV + VT vecValue = sdata > vthresh ? (VT)(0) : max_val; +#elif defined THRESH_TRUNC + VT vecValue = sdata > vthresh ? thresh : sdata; +#elif defined THRESH_TOZERO + VT vecValue = sdata > vthresh ? sdata : (VT)(0); +#elif defined THRESH_TOZERO_INV + VT vecValue = sdata > vthresh ? (VT)(0) : sdata; +#endif + + if (gx + VECSIZE <= max_index) +#ifdef DST_ALIGNED + *(__global VT*)(dst + dst_index) = vecValue; +#else + VSTOREN(vecValue, 0, dst + dst_index); +#endif + else + { + __attribute__(( aligned(sizeof(VT)) )) T array[VECSIZE]; + *((VT*)array) = vecValue; + #pragma unroll + for (int i = 0; i < VECSIZE; ++i) + if (gx + i < max_index) + dst[dst_index + i] = array[i]; + } + } +} + +#else + +__kernel void threshold(__global const T * restrict src, int src_offset, int src_step, + __global T * dst, int dst_offset, int dst_step, + T thresh, T max_val, int rows, int cols) +{ + int gx = get_global_id(0); + int gy = get_global_id(1); + + if (gx < cols && gy < rows) + { + int src_index = mad24(gy, src_step, src_offset + gx); + int dst_index = mad24(gy, dst_step, dst_offset + gx); + + T sdata = src[src_index]; + +#ifdef THRESH_BINARY + dst[dst_index] = sdata > thresh ? max_val : (T)(0); +#elif defined THRESH_BINARY_INV + dst[dst_index] = sdata > thresh ? (T)(0) : max_val; +#elif defined THRESH_TRUNC + dst[dst_index] = sdata > thresh ? thresh : sdata; +#elif defined THRESH_TOZERO + dst[dst_index] = sdata > thresh ? sdata : (T)(0); +#elif defined THRESH_TOZERO_INV + dst[dst_index] = sdata > thresh ? (T)(0) : sdata; +#endif + } +} + +#endif diff --git a/modules/imgproc/test/ocl/test_imgproc.cpp b/modules/imgproc/test/ocl/test_imgproc.cpp index d92486330..ae833406f 100644 --- a/modules/imgproc/test/ocl/test_imgproc.cpp +++ b/modules/imgproc/test/ocl/test_imgproc.cpp @@ -446,7 +446,7 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine( testing::Values((MatDepth)CV_8U, (MatDepth)CV_16S, (MatDepth)CV_32S, (MatDepth)CV_32F), testing::Values(Channels(1), Channels(3), (Channels)4), Bool(), // border isolated or not - Values((BorderType)BORDER_REPLICATE, (BorderType)BORDER_REFLECT, + Values((BorderType)BORDER_CONSTANT, (BorderType)BORDER_REPLICATE, (BorderType)BORDER_REFLECT, (BorderType)BORDER_WRAP, (BorderType)BORDER_REFLECT_101), Bool()));