diff --git a/modules/imgproc/perf/opencl/perf_imgproc.cpp b/modules/imgproc/perf/opencl/perf_imgproc.cpp index fa82b7aa4..6f0e46365 100644 --- a/modules/imgproc/perf/opencl/perf_imgproc.cpp +++ b/modules/imgproc/perf/opencl/perf_imgproc.cpp @@ -108,12 +108,9 @@ OCL_PERF_TEST_P(CornerMinEigenValFixture, CornerMinEigenVal, UMat src(srcSize, type), dst(srcSize, CV_32FC1); declare.in(src, WARMUP_RNG).out(dst); - const int depth = CV_MAT_DEPTH(type); - const ERROR_TYPE errorType = depth == CV_8U ? ERROR_ABSOLUTE : ERROR_RELATIVE; - OCL_TEST_CYCLE() cv::cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType); - SANITY_CHECK(dst, 1e-6, errorType); + SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE); } ///////////// CornerHarris //////////////////////// @@ -132,7 +129,7 @@ OCL_PERF_TEST_P(CornerHarrisFixture, CornerHarris, OCL_TEST_CYCLE() cv::cornerHarris(src, dst, 5, 7, 0.1, borderType); - SANITY_CHECK(dst, 5e-5); + SANITY_CHECK(dst, 5e-6, ERROR_RELATIVE); } ///////////// Integral //////////////////////// diff --git a/modules/imgproc/src/corner.cpp b/modules/imgproc/src/corner.cpp index 8f8c77006..e5d54c682 100644 --- a/modules/imgproc/src/corner.cpp +++ b/modules/imgproc/src/corner.cpp @@ -41,14 +41,12 @@ //M*/ #include "precomp.hpp" -#include - +#include "opencl_kernels.hpp" namespace cv { -static void -calcMinEigenVal( const Mat& _cov, Mat& _dst ) +static void calcMinEigenVal( const Mat& _cov, Mat& _dst ) { int i, j; Size size = _cov.size(); @@ -104,8 +102,7 @@ calcMinEigenVal( const Mat& _cov, Mat& _dst ) } -static void -calcHarris( const Mat& _cov, Mat& _dst, double k ) +static void calcHarris( const Mat& _cov, Mat& _dst, double k ) { int i, j; Size size = _cov.size(); @@ -219,8 +216,7 @@ static void eigen2x2( const float* cov, float* dst, int n ) } } -static void -calcEigenValsVecs( const Mat& _cov, Mat& _dst ) +static void calcEigenValsVecs( const Mat& _cov, Mat& _dst ) { Size size = _cov.size(); if( _cov.isContinuous() && _dst.isContinuous() ) @@ -306,12 +302,77 @@ cornerEigenValsVecs( const Mat& src, Mat& eigenv, int block_size, calcEigenValsVecs( cov, eigenv ); } +static bool ocl_cornerMinEigenValVecs(InputArray _src, OutputArray _dst, int block_size, + int aperture_size, double k, int borderType, int op_type) +{ + CV_Assert(op_type == HARRIS || op_type == MINEIGENVAL); + + if ( !(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE || + borderType == BORDER_REFLECT || borderType == BORDER_REFLECT_101) ) + return false; + + int type = _src.type(), depth = CV_MAT_DEPTH(type); + double scale = (double)(1 << ((aperture_size > 0 ? aperture_size : 3) - 1)) * block_size; + if( aperture_size < 0 ) + scale *= 2.0; + if( depth == CV_8U ) + scale *= 255.0; + scale = 1.0 / scale; + + if ( !(type == CV_8UC1 || type == CV_32FC1) ) + return false; + + UMat Dx, Dy; + if (aperture_size > 0) + { + Sobel(_src, Dx, CV_32F, 1, 0, aperture_size, scale, 0, borderType); + Sobel(_src, Dy, CV_32F, 0, 1, aperture_size, scale, 0, borderType); + } + else + { + Scharr(_src, Dx, CV_32F, 1, 0, scale, 0, borderType); + Scharr(_src, Dy, CV_32F, 0, 1, scale, 0, borderType); + } + + const char * const borderTypes[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", + 0, "BORDER_REFLECT101" }; + const char * const cornerType[] = { "CORNER_MINEIGENVAL", "CORNER_HARRIS", 0 }; + + ocl::Kernel cornelKernel("corner", ocl::imgproc::corner_oclsrc, + format("-D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s -D %s", + block_size / 2, block_size / 2, block_size, block_size, + borderTypes[borderType], cornerType[op_type])); + if (cornelKernel.empty()) + return false; + + _dst.createSameSize(_src, CV_32FC1); + UMat dst = _dst.getUMat(); + + cornelKernel.args(ocl::KernelArg::ReadOnly(Dx), ocl::KernelArg::ReadOnly(Dy), + ocl::KernelArg::WriteOnly(dst), (float)k); + + size_t blockSizeX = 256, blockSizeY = 1; + size_t gSize = blockSizeX - block_size / 2 * 2; + size_t globalSizeX = (Dx.cols) % gSize == 0 ? Dx.cols / gSize * blockSizeX : (Dx.cols / gSize + 1) * blockSizeX; + size_t rows_per_thread = 2; + size_t globalSizeY = ((Dx.rows + rows_per_thread - 1) / rows_per_thread) % blockSizeY == 0 ? + ((Dx.rows + rows_per_thread - 1) / rows_per_thread) : + (((Dx.rows + rows_per_thread - 1) / rows_per_thread) / blockSizeY + 1) * blockSizeY; + + size_t globalsize[2] = { globalSizeX, globalSizeY }, localsize[2] = { blockSizeX, blockSizeY }; + return cornelKernel.run(2, globalsize, localsize, false); +} + } void cv::cornerMinEigenVal( InputArray _src, OutputArray _dst, int blockSize, int ksize, int borderType ) { + if (ocl::useOpenCL() && _src.dims() <= 2 && _dst.isUMat() && + ocl_cornerMinEigenValVecs(_src, _dst, blockSize, ksize, 0.0, borderType, MINEIGENVAL)) + return; + Mat src = _src.getMat(); - _dst.create( src.size(), CV_32F ); + _dst.create( src.size(), CV_32FC1 ); Mat dst = _dst.getMat(); cornerEigenValsVecs( src, dst, blockSize, ksize, MINEIGENVAL, 0, borderType ); } @@ -319,8 +380,12 @@ void cv::cornerMinEigenVal( InputArray _src, OutputArray _dst, int blockSize, in void cv::cornerHarris( InputArray _src, OutputArray _dst, int blockSize, int ksize, double k, int borderType ) { + if (ocl::useOpenCL() && _src.dims() <= 2 && _dst.isUMat() && + ocl_cornerMinEigenValVecs(_src, _dst, blockSize, ksize, k, borderType, HARRIS)) + return; + Mat src = _src.getMat(); - _dst.create( src.size(), CV_32F ); + _dst.create( src.size(), CV_32FC1 ); Mat dst = _dst.getMat(); cornerEigenValsVecs( src, dst, blockSize, ksize, HARRIS, k, borderType ); } diff --git a/modules/imgproc/src/opencl/corner.cl b/modules/imgproc/src/opencl/corner.cl new file mode 100644 index 000000000..563cb9808 --- /dev/null +++ b/modules/imgproc/src/opencl/corner.cl @@ -0,0 +1,227 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Shengen Yan,yanshengen@gmail.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +/////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////Macro for border type//////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef BORDER_CONSTANT +#elif defined BORDER_REPLICATE +#define EXTRAPOLATE(x, maxV) \ + { \ + x = max(min(x, maxV - 1), 0); \ + } +#elif defined BORDER_WRAP +#define EXTRAPOLATE(x, maxV) \ + { \ + if (x < 0) \ + x -= ((x - maxV + 1) / maxV) * maxV; \ + if (x >= maxV) \ + x %= maxV; \ + } +#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT101) +#define EXTRAPOLATE_(x, maxV, delta) \ + { \ + if (maxV == 1) \ + x = 0; \ + else \ + do \ + { \ + if ( x < 0 ) \ + x = -x - 1 + delta; \ + else \ + x = maxV - 1 - (x - maxV) - delta; \ + } \ + while (x >= maxV || x < 0); \ + } +#ifdef BORDER_REFLECT +#define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 0) +#else +#define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 1) +#endif +#else +#error No extrapolation method +#endif + +#define THREADS 256 + +/////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////calcHarris//////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////// + +__kernel void corner(__global const float * Dx, int dx_step, int dx_offset, int dx_whole_rows, int dx_whole_cols, + __global const float * Dy, int dy_step, int dy_offset, int dy_whole_rows, int dy_whole_cols, + __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, float k) +{ + int col = get_local_id(0); + int gX = get_group_id(0); + int gY = get_group_id(1); + int gly = get_global_id(1); + + int dx_x_off = (dx_offset % dx_step) >> 2; + int dx_y_off = dx_offset / dx_step; + int dy_x_off = (dy_offset % dy_step) >> 2; + int dy_y_off = dy_offset / dy_step; + int dst_x_off = (dst_offset % dst_step) >> 2; + int dst_y_off = dst_offset / dst_step; + + int dx_startX = gX * (THREADS-ksX+1) - anX + dx_x_off; + int dx_startY = (gY << 1) - anY + dx_y_off; + int dy_startX = gX * (THREADS-ksX+1) - anX + dy_x_off; + int dy_startY = (gY << 1) - anY + dy_y_off; + int dst_startX = gX * (THREADS-ksX+1) + dst_x_off; + int dst_startY = (gY << 1) + dst_y_off; + + float dx_data[ksY+1],dy_data[ksY+1], data[3][ksY+1]; + __local float temp[6][THREADS]; + +#ifdef BORDER_CONSTANT + for (int i=0; i < ksY+1; i++) + { + bool dx_con = dx_startX+col >= 0 && dx_startX+col < dx_whole_cols && dx_startY+i >= 0 && dx_startY+i < dx_whole_rows; + int indexDx = (dx_startY+i)*(dx_step>>2)+(dx_startX+col); + float dx_s = dx_con ? Dx[indexDx] : 0.0f; + dx_data[i] = dx_s; + + bool dy_con = dy_startX+col >= 0 && dy_startX+col < dy_whole_cols && dy_startY+i >= 0 && dy_startY+i < dy_whole_rows; + int indexDy = (dy_startY+i)*(dy_step>>2)+(dy_startX+col); + float dy_s = dy_con ? Dy[indexDy] : 0.0f; + dy_data[i] = dy_s; + + data[0][i] = dx_data[i] * dx_data[i]; + data[1][i] = dx_data[i] * dy_data[i]; + data[2][i] = dy_data[i] * dy_data[i]; + } +#else + int clamped_col = min(2*dst_cols, col); + for (int i=0; i < ksY+1; i++) + { + int dx_selected_row = dx_startY+i, dx_selected_col = dx_startX+clamped_col; + EXTRAPOLATE(dx_selected_row, dx_whole_rows) + EXTRAPOLATE(dx_selected_col, dx_whole_cols) + dx_data[i] = Dx[dx_selected_row * (dx_step>>2) + dx_selected_col]; + + int dy_selected_row = dy_startY+i, dy_selected_col = dy_startX+clamped_col; + EXTRAPOLATE(dy_selected_row, dy_whole_rows) + EXTRAPOLATE(dy_selected_col, dy_whole_cols) + dy_data[i] = Dy[dy_selected_row * (dy_step>>2) + dy_selected_col]; + + data[0][i] = dx_data[i] * dx_data[i]; + data[1][i] = dx_data[i] * dy_data[i]; + data[2][i] = dy_data[i] * dy_data[i]; + } +#endif + float sum0 = 0.0f, sum1 = 0.0f, sum2 = 0.0f; + for (int i=1; i < ksY; i++) + { + sum0 += data[0][i]; + sum1 += data[1][i]; + sum2 += data[2][i]; + } + + float sum01 = sum0 + data[0][0]; + float sum02 = sum0 + data[0][ksY]; + temp[0][col] = sum01; + temp[1][col] = sum02; + float sum11 = sum1 + data[1][0]; + float sum12 = sum1 + data[1][ksY]; + temp[2][col] = sum11; + temp[3][col] = sum12; + float sum21 = sum2 + data[2][0]; + float sum22 = sum2 + data[2][ksY]; + temp[4][col] = sum21; + temp[5][col] = sum22; + barrier(CLK_LOCAL_MEM_FENCE); + + if (col < (THREADS - (ksX - 1))) + { + col += anX; + int posX = dst_startX - dst_x_off + col - anX; + int posY = (gly << 1); + int till = (ksX + 1)%2; + float tmp_sum[6] = { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; + for (int k=0; k<6; k++) + { + float temp_sum = 0; + for (int i=-anX; i<=anX - till; i++) + temp_sum += temp[k][col+i]; + tmp_sum[k] = temp_sum; + } + +#ifdef CORNER_HARRIS + if (posX < dst_cols && (posY) < dst_rows) + { + int dst_index = mad24(dst_step, dst_startY, (int)sizeof(float) * (dst_startX + col - anX)); + *(__global float *)(dst + dst_index) = + tmp_sum[0] * tmp_sum[4] - tmp_sum[2] * tmp_sum[2] - k * (tmp_sum[0] + tmp_sum[4]) * (tmp_sum[0] + tmp_sum[4]); + } + if (posX < dst_cols && (posY + 1) < dst_rows) + { + int dst_index = mad24(dst_step, dst_startY + 1, (int)sizeof(float) * (dst_startX + col - anX)); + *(__global float *)(dst + dst_index) = + tmp_sum[1] * tmp_sum[5] - tmp_sum[3] * tmp_sum[3] - k * (tmp_sum[1] + tmp_sum[5]) * (tmp_sum[1] + tmp_sum[5]); + } +#elif defined CORNER_MINEIGENVAL + if (posX < dst_cols && (posY) < dst_rows) + { + int dst_index = mad24(dst_step, dst_startY, (int)sizeof(float) * (dst_startX + col - anX)); + float a = tmp_sum[0] * 0.5f; + float b = tmp_sum[2]; + float c = tmp_sum[4] * 0.5f; + *(__global float *)(dst + dst_index) = (float)((a+c) - sqrt((a-c)*(a-c) + b*b)); + } + if (posX < dst_cols && (posY + 1) < dst_rows) + { + int dst_index = mad24(dst_step, dst_startY + 1, (int)sizeof(float) * (dst_startX + col - anX)); + float a = tmp_sum[1] * 0.5f; + float b = tmp_sum[3]; + float c = tmp_sum[5] * 0.5f; + *(__global float *)(dst + dst_index) = (float)((a+c) - sqrt((a-c)*(a-c) + b*b)); + } +#else +#error "No such corners type" +#endif + } +} diff --git a/modules/imgproc/test/ocl/test_imgproc.cpp b/modules/imgproc/test/ocl/test_imgproc.cpp index bf6f8e64a..2d34d7154 100644 --- a/modules/imgproc/test/ocl/test_imgproc.cpp +++ b/modules/imgproc/test/ocl/test_imgproc.cpp @@ -103,7 +103,7 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType, } }; -////////////////////////////////copyMakeBorder//////////////////////////////////////////// +//////////////////////////////// copyMakeBorder //////////////////////////////////////////// PARAM_TEST_CASE(CopyMakeBorder, MatDepth, // depth Channels, // channels @@ -171,7 +171,7 @@ OCL_TEST_P(CopyMakeBorder, Mat) } } -////////////////////////////////equalizeHist////////////////////////////////////////////// +//////////////////////////////// equalizeHist ////////////////////////////////////////////// typedef ImgprocTestBase EqualizeHist; @@ -188,14 +188,14 @@ OCL_TEST_P(EqualizeHist, Mat) } } -////////////////////////////////cornerMinEigenVal////////////////////////////////////////// +//////////////////////////////// Corners test ////////////////////////////////////////// struct CornerTestBase : public ImgprocTestBase { virtual void random_roi() { - Mat image = readImageType("gpu/stereobm/aloe-L.png", type); + Mat image = readImageType("../gpu/stereobm/aloe-L.png", type); ASSERT_FALSE(image.empty()); bool isFP = CV_MAT_DEPTH(type) >= CV_32F; @@ -224,7 +224,7 @@ struct CornerTestBase : typedef CornerTestBase CornerMinEigenVal; -OCL_TEST_P(CornerMinEigenVal, DISABLED_Mat) +OCL_TEST_P(CornerMinEigenVal, Mat) { for (int j = 0; j < test_loop_times; j++) { @@ -239,11 +239,11 @@ OCL_TEST_P(CornerMinEigenVal, DISABLED_Mat) } } -////////////////////////////////cornerHarris////////////////////////////////////////// +//////////////////////////////// cornerHarris ////////////////////////////////////////// typedef CornerTestBase CornerHarris; -OCL_TEST_P(CornerHarris, DISABLED_Mat) +OCL_TEST_P(CornerHarris, Mat) { for (int j = 0; j < test_loop_times; j++) { @@ -255,11 +255,11 @@ OCL_TEST_P(CornerHarris, DISABLED_Mat) OCL_OFF(cv::cornerHarris(src_roi, dst_roi, blockSize, apertureSize, k, borderType)); OCL_ON(cv::cornerHarris(usrc_roi, udst_roi, blockSize, apertureSize, k, borderType)); - Near(1e-5, true); + Near(1e-6, true); } } -//////////////////////////////////integral///////////////////////////////////////////////// +////////////////////////////////// integral ///////////////////////////////////////////////// struct Integral : public ImgprocTestBase @@ -331,8 +331,7 @@ OCL_TEST_P(Integral, Mat2) } } -/////////////////////////////////////////////////////////////////////////////////////////////////// -//// threshold +//////////////////////////////////////// threshold ////////////////////////////////////////////// struct Threshold : public ImgprocTestBase @@ -364,9 +363,7 @@ OCL_TEST_P(Threshold, Mat) } } - -///////////////////////////////////////////////////////////////////////////////////////////////////////// -//// CLAHE +/////////////////////////////////////////// CLAHE ////////////////////////////////////////////////// PARAM_TEST_CASE(CLAHETest, Size, double, bool) {