From 1291bd1c4a7407d2d07f6a11176718cf8052e0ac Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 6 Mar 2014 15:40:27 +0400 Subject: [PATCH] ported fast calculation of covar data --- modules/imgproc/src/corner.cpp | 91 +++++-- modules/imgproc/src/opencl/covardata.cl | 315 ++++++++++++++++++++++++ 2 files changed, 386 insertions(+), 20 deletions(-) create mode 100644 modules/imgproc/src/opencl/covardata.cl diff --git a/modules/imgproc/src/corner.cpp b/modules/imgproc/src/corner.cpp index 172a531a3..1a58391f8 100644 --- a/modules/imgproc/src/corner.cpp +++ b/modules/imgproc/src/corner.cpp @@ -41,6 +41,7 @@ //M*/ #include "precomp.hpp" +#define CV_OPENCL_RUN_ASSERT #include "opencl_kernels.hpp" namespace cv @@ -304,6 +305,70 @@ cornerEigenValsVecs( const Mat& src, Mat& eigenv, int block_size, #ifdef HAVE_OPENCL +static bool extractCovData(InputArray _src, UMat & Dx, UMat & Dy, int depth, + int block_size, int aperture_size, int borderType, + const char * const borderTypeStr) +{ + float scale = (float)(1 << ((aperture_size > 0 ? aperture_size : 3) - 1)) * block_size; + if (aperture_size < 0) + scale *= 2.0f; + if (depth == CV_8U) + scale *= 255.0f; + scale = 1.0f / scale; + + UMat src = _src.getUMat(); + + Size wholeSize; + Point ofs; + src.locateROI(wholeSize, ofs); + + const int sobel_lsz = 16; + if ((aperture_size == 3 || aperture_size == 5 || aperture_size == 7 || aperture_size == -1) && + wholeSize.height > sobel_lsz + (aperture_size >> 1) && + wholeSize.width > sobel_lsz + (aperture_size >> 1)) + { + CV_Assert(depth == CV_8U || depth == CV_32F); + + Dx.create(src.size(), CV_32FC1); + Dy.create(src.size(), CV_32FC1); + + size_t localsize[2] = { sobel_lsz, sobel_lsz }; + size_t globalsize[2] = { localsize[0]*(1 + (src.cols - 1) / localsize[0]), + localsize[1]*(1 + (src.rows - 1) / localsize[1]) }; + + int src_offset_x = (src.offset % src.step) / src.elemSize(); + int src_offset_y = src.offset / src.step; + + ocl::Kernel k(format("sobel%d", aperture_size).c_str(), ocl::imgproc::covardata_oclsrc, + cv::format("-D BLK_X=%d -D BLK_Y=%d -D %s -D SRCTYPE=%s%s", + (int)localsize[0], (int)localsize[1], borderTypeStr, ocl::typeToStr(depth), + aperture_size < 0 ? " -D SCHARR" : "")); + if (k.empty()) + return false; + + k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, src_offset_x, src_offset_y, + ocl::KernelArg::WriteOnlyNoSize(Dx), ocl::KernelArg::WriteOnly(Dy), + wholeSize.height, wholeSize.width, scale); + + return k.run(2, globalsize, localsize, NULL); + } + else + { + if (aperture_size > 0) + { + Sobel(_src, Dx, CV_32F, 1, 0, aperture_size, scale, 0, borderType); + Sobel(_src, Dy, CV_32F, 0, 1, aperture_size, scale, 0, borderType); + } + else + { + Scharr(_src, Dx, CV_32F, 1, 0, scale, 0, borderType); + Scharr(_src, Dy, CV_32F, 0, 1, scale, 0, borderType); + } + } + + return true; +} + static bool ocl_cornerMinEigenValVecs(InputArray _src, OutputArray _dst, int block_size, int aperture_size, double k, int borderType, int op_type) { @@ -314,32 +379,18 @@ static bool ocl_cornerMinEigenValVecs(InputArray _src, OutputArray _dst, int blo return false; int type = _src.type(), depth = CV_MAT_DEPTH(type); - double scale = (double)(1 << ((aperture_size > 0 ? aperture_size : 3) - 1)) * block_size; - if( aperture_size < 0 ) - scale *= 2.0; - if( depth == CV_8U ) - scale *= 255.0; - scale = 1.0 / scale; - if ( !(type == CV_8UC1 || type == CV_32FC1) ) return false; - UMat Dx, Dy; - if (aperture_size > 0) - { - Sobel(_src, Dx, CV_32F, 1, 0, aperture_size, scale, 0, borderType); - Sobel(_src, Dy, CV_32F, 0, 1, aperture_size, scale, 0, borderType); - } - else - { - Scharr(_src, Dx, CV_32F, 1, 0, scale, 0, borderType); - Scharr(_src, Dy, CV_32F, 0, 1, scale, 0, borderType); - } - const char * const borderTypes[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", - 0, "BORDER_REFLECT101" }; + "BORDER_WRAP", "BORDER_REFLECT101" }; const char * const cornerType[] = { "CORNER_MINEIGENVAL", "CORNER_HARRIS", 0 }; + UMat Dx, Dy; + if (!extractCovData(_src, Dx, Dy, depth, block_size, aperture_size, + borderType, borderTypes[borderType])) + return false; + ocl::Kernel cornelKernel("corner", ocl::imgproc::corner_oclsrc, format("-D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s -D %s", block_size / 2, block_size / 2, block_size, block_size, diff --git a/modules/imgproc/src/opencl/covardata.cl b/modules/imgproc/src/opencl/covardata.cl new file mode 100644 index 000000000..8aee637da --- /dev/null +++ b/modules/imgproc/src/opencl/covardata.cl @@ -0,0 +1,315 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. + +/////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////Macro for border type//////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef BORDER_CONSTANT +//CCCCCC|abcdefgh|CCCCCCC +#define EXTRAPOLATE(x, maxV) +#elif defined BORDER_REPLICATE +//aaaaaa|abcdefgh|hhhhhhh +#define EXTRAPOLATE(x, maxV) \ + { \ + (x) = max(min((x), (maxV) - 1), 0); \ + } +#elif defined BORDER_WRAP +//cdefgh|abcdefgh|abcdefg +#define EXTRAPOLATE(x, maxV) \ + { \ + (x) = ( (x) + (maxV) ) % (maxV); \ + } +#elif defined BORDER_REFLECT +//fedcba|abcdefgh|hgfedcb +#define EXTRAPOLATE(x, maxV) \ + { \ + (x) = min( mad24((maxV)-1,2,-(x))+1 , max((x),-(x)-1) ); \ + } +#elif defined BORDER_REFLECT_101 || defined BORDER_REFLECT101 +//gfedcb|abcdefgh|gfedcba +#define EXTRAPOLATE(x, maxV) \ + { \ + (x) = min( mad24((maxV)-1,2,-(x)), max((x),-(x)) ); \ + } +#else +#error No extrapolation method +#endif + +#define SRC(_x,_y) convert_float(((global SRCTYPE*)(Src+(_y)*src_step))[_x]) + +#ifdef BORDER_CONSTANT +//CCCCCC|abcdefgh|CCCCCCC +#define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y)) +#else +#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y)) +#endif + +#define DSTX(_x,_y) (((global float*)(DstX+DstXOffset+(_y)*DstXPitch))[_x]) +#define DSTY(_x,_y) (((global float*)(DstY+DstYOffset+(_y)*DstYPitch))[_x]) + +#define INIT_AND_READ_LOCAL_SOURCE(width, height, fill_const, kernel_border) \ + int srcX = x + srcOffsetX - (kernel_border); \ + int srcY = y + srcOffsetY - (kernel_border); \ + int xb = srcX; \ + int yb = srcY; \ + \ + EXTRAPOLATE(xb, (width)); \ + EXTRAPOLATE(yb, (height)); \ + lsmem[liy][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \ + \ + if(lix < ((kernel_border)*2)) \ + { \ + int xb = srcX+BLK_X; \ + EXTRAPOLATE(xb,(width)); \ + lsmem[liy][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \ + } \ + if(liy< ((kernel_border)*2)) \ + { \ + int yb = srcY+BLK_Y; \ + EXTRAPOLATE(yb, (height)); \ + lsmem[liy+BLK_Y][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \ + } \ + if(lix<((kernel_border)*2) && liy<((kernel_border)*2)) \ + { \ + int xb = srcX+BLK_X; \ + int yb = srcY+BLK_Y; \ + EXTRAPOLATE(xb,(width)); \ + EXTRAPOLATE(yb,(height)); \ + lsmem[liy+BLK_Y][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \ + } + +__kernel void sobel3(__global const uchar * Src, int src_step, int srcOffsetX, int srcOffsetY, + __global uchar * DstX, int DstXPitch, int DstXOffset, + __global uchar * DstY, int DstYPitch, int DstYOffset, int dstHeight, int dstWidth, + int height, int width, float scale) +{ + __local float lsmem[BLK_Y+2][BLK_X+2]; + + int lix = get_local_id(0); + int liy = get_local_id(1); + + int x = (int)get_global_id(0); + int y = (int)get_global_id(1); + + INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 1) + barrier(CLK_LOCAL_MEM_FENCE); + + if( x >= dstWidth || y >=dstHeight ) return; + + float u1 = lsmem[liy][lix]; + float u2 = lsmem[liy][lix+1]; + float u3 = lsmem[liy][lix+2]; + + float m1 = lsmem[liy+1][lix]; + float m3 = lsmem[liy+1][lix+2]; + + float b1 = lsmem[liy+2][lix]; + float b2 = lsmem[liy+2][lix+1]; + float b3 = lsmem[liy+2][lix+2]; + + //calc and store dx and dy;// +#ifdef SCHARR + DSTX(x,y) = mad(10.0f, m3 - m1, 3.0f * (u3 - u1 + b3 - b1)) * scale; + DSTY(x,y) = mad(10.0f, b2 - u2, 3.0f * (b1 - u1 + b3 - u3)) * scale; +#else + DSTX(x,y) = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1) * scale; + DSTY(x,y) = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3) * scale; +#endif +} + +__kernel void sobel5(__global const uchar * Src, int src_step, int srcOffsetX, int srcOffsetY, + __global uchar * DstX, int DstXPitch, int DstXOffset, + __global uchar * DstY, int DstYPitch, int DstYOffset, int dstHeight, int dstWidth, + int height, int width, float scale) +{ + __local float lsmem[BLK_Y+4][BLK_X+4]; + + int lix = get_local_id(0); + int liy = get_local_id(1); + + int x = (int)get_global_id(0); + int y = (int)get_global_id(1); + + INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 2) + barrier(CLK_LOCAL_MEM_FENCE); + + if( x >= dstWidth || y >=dstHeight ) return; + + float t1 = lsmem[liy][lix]; + float t2 = lsmem[liy][lix+1]; + float t3 = lsmem[liy][lix+2]; + float t4 = lsmem[liy][lix+3]; + float t5 = lsmem[liy][lix+4]; + + float u1 = lsmem[liy+1][lix]; + float u2 = lsmem[liy+1][lix+1]; + float u3 = lsmem[liy+1][lix+2]; + float u4 = lsmem[liy+1][lix+3]; + float u5 = lsmem[liy+1][lix+4]; + + float m1 = lsmem[liy+2][lix]; + float m2 = lsmem[liy+2][lix+1]; + float m4 = lsmem[liy+2][lix+3]; + float m5 = lsmem[liy+2][lix+4]; + + float l1 = lsmem[liy+3][lix]; + float l2 = lsmem[liy+3][lix+1]; + float l3 = lsmem[liy+3][lix+2]; + float l4 = lsmem[liy+3][lix+3]; + float l5 = lsmem[liy+3][lix+4]; + + float b1 = lsmem[liy+4][lix]; + float b2 = lsmem[liy+4][lix+1]; + float b3 = lsmem[liy+4][lix+2]; + float b4 = lsmem[liy+4][lix+3]; + float b5 = lsmem[liy+4][lix+4]; + + //calc and store dx and dy;// + DSTX(x,y) = scale * + mad(12.0f, m4 - m2, + mad(6.0f, m5 - m1, + mad(8.0f, u4 - u2 + l4 - l2, + mad(4.0f, u5 - u1 + l5 - l1, + mad(2.0f, t4 - t2 + b4 - b2, t5 - t1 + b5 - b1 ) + ) + ) + ) + ); + + DSTY(x,y) = scale * + mad(12.0f, l3 - u3, + mad(6.0f, b3 - t3, + mad(8.0f, l2 - u2 + l4 - u4, + mad(4.0f, b2 - t2 + b4 - t4, + mad(2.0f, l1 - u1 + l5 - u5, b1 - t1 + b5 - t5 ) + ) + ) + ) + ); +} + +__kernel void sobel7(__global const uchar * Src, int src_step, int srcOffsetX, int srcOffsetY, + __global uchar * DstX, int DstXPitch, int DstXOffset, + __global uchar * DstY, int DstYPitch, int DstYOffset, int dstHeight, int dstWidth, + int height, int width, float scale) +{ + __local float lsmem[BLK_Y+6][BLK_X+6]; + + int lix = get_local_id(0); + int liy = get_local_id(1); + + int x = (int)get_global_id(0); + int y = (int)get_global_id(1); + + INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 3) + barrier(CLK_LOCAL_MEM_FENCE); + + if( x >= dstWidth || y >=dstHeight ) return; + + float tt1 = lsmem[liy][lix]; + float tt2 = lsmem[liy][lix+1]; + float tt3 = lsmem[liy][lix+2]; + float tt4 = lsmem[liy][lix+3]; + float tt5 = lsmem[liy][lix+4]; + float tt6 = lsmem[liy][lix+5]; + float tt7 = lsmem[liy][lix+6]; + + float t1 = lsmem[liy+1][lix]; + float t2 = lsmem[liy+1][lix+1]; + float t3 = lsmem[liy+1][lix+2]; + float t4 = lsmem[liy+1][lix+3]; + float t5 = lsmem[liy+1][lix+4]; + float t6 = lsmem[liy+1][lix+5]; + float t7 = lsmem[liy+1][lix+6]; + + float u1 = lsmem[liy+2][lix]; + float u2 = lsmem[liy+2][lix+1]; + float u3 = lsmem[liy+2][lix+2]; + float u4 = lsmem[liy+2][lix+3]; + float u5 = lsmem[liy+2][lix+4]; + float u6 = lsmem[liy+2][lix+5]; + float u7 = lsmem[liy+2][lix+6]; + + float m1 = lsmem[liy+3][lix]; + float m2 = lsmem[liy+3][lix+1]; + float m3 = lsmem[liy+3][lix+2]; + float m5 = lsmem[liy+3][lix+4]; + float m6 = lsmem[liy+3][lix+5]; + float m7 = lsmem[liy+3][lix+6]; + + float l1 = lsmem[liy+4][lix]; + float l2 = lsmem[liy+4][lix+1]; + float l3 = lsmem[liy+4][lix+2]; + float l4 = lsmem[liy+4][lix+3]; + float l5 = lsmem[liy+4][lix+4]; + float l6 = lsmem[liy+4][lix+5]; + float l7 = lsmem[liy+4][lix+6]; + + float b1 = lsmem[liy+5][lix]; + float b2 = lsmem[liy+5][lix+1]; + float b3 = lsmem[liy+5][lix+2]; + float b4 = lsmem[liy+5][lix+3]; + float b5 = lsmem[liy+5][lix+4]; + float b6 = lsmem[liy+5][lix+5]; + float b7 = lsmem[liy+5][lix+6]; + + float bb1 = lsmem[liy+6][lix]; + float bb2 = lsmem[liy+6][lix+1]; + float bb3 = lsmem[liy+6][lix+2]; + float bb4 = lsmem[liy+6][lix+3]; + float bb5 = lsmem[liy+6][lix+4]; + float bb6 = lsmem[liy+6][lix+5]; + float bb7 = lsmem[liy+6][lix+6]; + + //calc and store dx and dy + DSTX(x,y) = scale * + mad(100.0f, m5 - m3, + mad(80.0f, m6 - m2, + mad(20.0f, m7 - m1, + mad(75.0f, u5 - u3 + l5 - l3, + mad(60.0f, u6 - u2 + l6 - l2, + mad(15.0f, u7 - u1 + l7 - l1, + mad(30.0f, t5 - t3 + b5 - b3, + mad(24.0f, t6 - t2 + b6 - b2, + mad(6.0f, t7 - t1 + b7 - b1, + mad(5.0f, tt5 - tt3 + bb5 - bb3, + mad(4.0f, tt6 - tt2 + bb6 - bb2, tt7 - tt1 + bb7 - bb1 ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ); + + DSTY(x,y) = scale * + mad(100.0f, l4 - u4, + mad(80.0f, b4 - t4, + mad(20.0f, bb4 - tt4, + mad(75.0f, l5 - u5 + l3 - u3, + mad(60.0f, b5 - t5 + b3 - t3, + mad(15.0f, bb5 - tt5 + bb3 - tt3, + mad(30.0f, l6 - u6 + l2 - u2, + mad(24.0f, b6 - t6 + b2 - t2, + mad(6.0f, bb6 - tt6 + bb2 - tt2, + mad(5.0f, l7 - u7 + l1 - u1, + mad(4.0f, b7 - t7 + b1 - t1, bb7 - tt7 + bb1 - tt1 ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ); +}