From b5f717b6b38846449206b53fd56628804d328be0 Mon Sep 17 00:00:00 2001 From: Konstantin Matskevich Date: Thu, 16 Jan 2014 14:10:17 +0400 Subject: [PATCH] stereoBM --- modules/calib3d/perf/opencl/perf_stereobm.cpp | 76 +++++++++ modules/calib3d/src/opencl/stereobm.cl | 159 ++++++++++++++++++ modules/calib3d/src/precomp.hpp | 2 + modules/calib3d/src/stereobm.cpp | 120 +++++++++++-- modules/calib3d/test/opencl/test_stereobm.cpp | 96 +++++++++++ 5 files changed, 443 insertions(+), 10 deletions(-) create mode 100644 modules/calib3d/perf/opencl/perf_stereobm.cpp create mode 100644 modules/calib3d/src/opencl/stereobm.cl create mode 100644 modules/calib3d/test/opencl/test_stereobm.cpp diff --git a/modules/calib3d/perf/opencl/perf_stereobm.cpp b/modules/calib3d/perf/opencl/perf_stereobm.cpp new file mode 100644 index 000000000..3352e6b1a --- /dev/null +++ b/modules/calib3d/perf/opencl/perf_stereobm.cpp @@ -0,0 +1,76 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" +#include "opencv2/ts/ocl_perf.hpp" + +#ifdef HAVE_OPENCL + +namespace cvtest { +namespace ocl { + +typedef std::tr1::tuple StereoBMFixture_t; +typedef TestBaseWithParam StereoBMFixture; + +OCL_PERF_TEST_P(StereoBMFixture, StereoBM, ::testing::Combine(OCL_PERF_ENUM(32, 64, 128), OCL_PERF_ENUM(11,21) ) ) +{ + const int n_disp = get<0>(GetParam()), winSize = get<1>(GetParam()); + UMat left, right, disp; + + imread(getDataPath("gpu/stereobm/aloe-L.png"), IMREAD_GRAYSCALE).copyTo(left); + imread(getDataPath("gpu/stereobm/aloe-R.png"), IMREAD_GRAYSCALE).copyTo(right); + ASSERT_FALSE(left.empty()); + ASSERT_FALSE(right.empty()); + + declare.in(left, right); + + Ptr bm = createStereoBM( n_disp, winSize ); + bm->setPreFilterType(bm->PREFILTER_NORMALIZED_RESPONSE); + + OCL_TEST_CYCLE() bm->compute(left, right, disp); + + SANITY_CHECK(disp, 1e-2, ERROR_RELATIVE); +} + +}//ocl +}//cvtest +#endif diff --git a/modules/calib3d/src/opencl/stereobm.cl b/modules/calib3d/src/opencl/stereobm.cl new file mode 100644 index 000000000..d8f238b89 --- /dev/null +++ b/modules/calib3d/src/opencl/stereobm.cl @@ -0,0 +1,159 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + +////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////// stereoBM ////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef SIZE + +__kernel void stereoBM(__global const uchar * left, __global const uchar * right, __global uchar * dispptr, + int disp_step, int disp_offset, int rows, int cols, int mindisp, int ndisp, + int preFilterCap, int winsize, int textureTreshold, int uniquenessRatio) +{ + int x = get_global_id(0); + int y = get_global_id(1); + int wsz2 = winsize/2; + short FILTERED = (mindisp - 1)<<4; + + if(x < cols && y < rows ) + { + int dispIdx = mad24(y, disp_step, disp_offset + x*(int)sizeof(short) ); + __global short * disp = (__global short*)(dispptr + dispIdx); + disp[0] = FILTERED; + if( (x > mindisp+ndisp+wsz2-2) && (y > wsz2-1) && (x < cols-wsz2-mindisp) && (y < rows - wsz2)) + { + int cost[SIZE]; + int textsum = 0; + + for(int d = mindisp; d < ndisp+mindisp; d++) + { + cost[d-mindisp] = 0; + for(int i = -wsz2; i < wsz2+1; i++) + for(int j = -wsz2; j < wsz2+1; j++) + { + textsum += abs( left[min( y+i, rows-1 ) * cols + min( x+j, cols-1 )] - preFilterCap ); + cost[d-mindisp] += abs( left[min( y+i, rows-1 ) * cols + min( x+j, cols-1 )] + - right[min( y+i, rows-1 ) * cols + min( x+j-d, cols-1 )] ); + } + } + + int best_disp = mindisp, best_cost = cost[0]; + for(int d = mindisp; d < ndisp+mindisp; d++) + { + best_cost = (cost[d-mindisp] < best_cost) ? cost[d-mindisp] : best_cost; + best_disp = (best_cost == cost[d-mindisp]) ? d : best_disp; + } + + int thresh = best_cost + (best_cost * uniquenessRatio/100); + for(int d = mindisp; (d < ndisp + mindisp) && (uniquenessRatio > 0); d++) + { + best_disp = ( (cost[d-mindisp] <= thresh) && (d < best_disp-1 || d > best_disp + 1) ) ? FILTERED : best_disp; + } + + disp[0] = textsum < textureTreshold ? (FILTERED) : (best_disp == FILTERED) ? (short)(best_disp) : (short)(best_disp); + + if( best_disp != FILTERED ) + { + int y1 = (best_disp > mindisp) ? cost[best_disp-mindisp-1] : cost[best_disp-mindisp+1], + y2 = cost[best_disp-mindisp], + y3 = (best_disp < mindisp+ndisp-1) ? cost[best_disp-mindisp+1] : cost[best_disp-mindisp-1]; + float a = (y3 - ((best_disp+1)*(y2-y1) + best_disp*y1 - (best_disp-1)*y2)/(best_disp - (best_disp-1)) )/ + ((best_disp+1)*((best_disp+1) - (best_disp-1) - best_disp) + (best_disp-1)*best_disp); + float b = (y2 - y1)/(best_disp - (best_disp-1)) - a*((best_disp-1)+best_disp); + disp[0] = (y1 == y2 || y2 == y3) ? (short)(best_disp*16) : (short)(-b/(2*a)*16); + } + } + } +} + +#endif + +////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////// Norm Prefiler //////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////////////// + +__kernel void prefilter_norm(__global unsigned char *input, __global unsigned char *output, + int rows, int cols, int prefilterCap, int winsize, int scale_g, int scale_s) +{ + int x = get_global_id(0); + int y = get_global_id(1); + int wsz2 = winsize/2; + + if(x < cols && y < rows) + { + int cov1 = input[ max(y-1, 0) * cols + x] * 1 + + input[y * cols + max(x-1,0)] * 1 + input[ y * cols + x] * 4 + input[y * cols + min(x+1, cols-1)] * 1 + + input[min(y+1, rows-1) * cols + x] * 1; + int cov2 = 0; + for(int i = -wsz2; i < wsz2+1; i++) + for(int j = -wsz2; j < wsz2+1; j++) + cov2 += input[min( max( (y+i),0 ),rows-1 ) * cols + min( max( (x+j),0 ),cols-1 )]; + + int res = (cov1*scale_g - cov2*scale_s)>>10; + res = min(min(max(-prefilterCap, res), prefilterCap) + prefilterCap, 255); + output[y * cols + x] = res & 0xFF; + } +} + + +////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// Sobel Prefiler //////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////////////// + +__kernel void prefilter_xsobel(__global unsigned char *input, __global unsigned char *output, + int rows, int cols, int prefilterCap) +{ + int x = get_global_id(0); + int y = get_global_id(1); + output[y * cols + x] = min(prefilterCap, 255) & 0xFF; + if(x < cols && y < rows-1 && x > 0) + { + int cov = input[((y > 0) ? y-1 : y+1) * cols + (x-1)] * (-1) + input[((y > 0) ? y-1 : y+1) * cols + ((x #include +#include "opencl_kernels.hpp" namespace cv { @@ -85,6 +86,26 @@ struct StereoBMParams int dispType; }; +static bool ocl_prefilter_norm(InputArray _input, OutputArray _output, int winsize, int prefilterCap) +{ + ocl::Kernel k("prefilter_norm", ocl::calib3d::stereobm_oclsrc); + if(k.empty()) + return false; + + int scale_g = winsize*winsize/8, scale_s = (1024 + scale_g)/(scale_g*2); + scale_g *= scale_s; + + UMat input = _input.getUMat(), output; + _output.create(input.size(), input.type()); + output = _output.getUMat(); + + size_t globalThreads[3] = { input.cols, input.rows, 1 }; + + k.args(ocl::KernelArg::PtrReadOnly(input), ocl::KernelArg::PtrWriteOnly(output), input.rows, input.cols, + prefilterCap, winsize, scale_g, scale_s); + + return k.run(2, globalThreads, NULL, false); +} static void prefilterNorm( const Mat& src, Mat& dst, int winsize, int ftzero, uchar* buf ) { @@ -149,6 +170,24 @@ static void prefilterNorm( const Mat& src, Mat& dst, int winsize, int ftzero, uc } } +static bool ocl_prefilter_xsobel(InputArray _input, OutputArray _output, int prefilterCap) +{ + ocl::Kernel k("prefilter_xsobel", ocl::calib3d::stereobm_oclsrc); + if(k.empty()) + return false; + + UMat input = _input.getUMat(), output; + _output.create(input.size(), input.type()); + output = _output.getUMat(); + + size_t blockSize = 1; + size_t globalThreads[3] = { input.cols, input.rows, 1 }; + size_t localThreads[3] = { blockSize, blockSize, 1 }; + + k.args(ocl::KernelArg::PtrReadOnly(input), ocl::KernelArg::PtrWriteOnly(output), input.rows, input.cols, prefilterCap); + + return k.run(2, globalThreads, localThreads, false); +} static void prefilterXSobel( const Mat& src, Mat& dst, int ftzero ) @@ -534,7 +573,6 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right, hsad = hsad0 - dy0*ndisp; cbuf = cbuf0 + (x + wsz2 + 1)*cstep - dy0*ndisp; lptr = lptr0 + std::min(std::max(x, -lofs), width-lofs-1) - dy0*sstep; rptr = rptr0 + std::min(std::max(x, -rofs), width-rofs-1) - dy0*sstep; - for( y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep ) { int lval = lptr[0]; @@ -651,6 +689,25 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right, } } +static bool ocl_prefiltering(InputArray left0, InputArray right0, OutputArray left, OutputArray right, StereoBMParams* state) +{ + if( state->preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE ) + { + if(!ocl_prefilter_norm( left0, left, state->preFilterSize, state->preFilterCap)) + return false; + if(!ocl_prefilter_norm( right0, right, state->preFilterSize, state->preFilterCap)) + return false; + } + else + { + if(!ocl_prefilter_xsobel( left0, left, state->preFilterCap )) + return false; + if(!ocl_prefilter_xsobel( right0, right, state->preFilterCap)) + return false; + } + return true; +} + struct PrefilterInvoker : public ParallelLoopBody { PrefilterInvoker(const Mat& left0, const Mat& right0, Mat& left, Mat& right, @@ -679,6 +736,32 @@ struct PrefilterInvoker : public ParallelLoopBody StereoBMParams* state; }; +static bool ocl_stereo( InputArray _left, InputArray _right, + OutputArray _disp, StereoBMParams* state) +{ + ocl::Kernel k("stereoBM", ocl::calib3d::stereobm_oclsrc, cv::format("-D SIZE=%d", state->numDisparities ) ); + if(k.empty()) + return false; + + UMat left = _left.getUMat(), right = _right.getUMat(); + _disp.create(_left.size(), CV_16S); + UMat disp = _disp.getUMat(); + + size_t globalThreads[3] = { left.cols, left.rows, 1 }; + + int idx = 0; + idx = k.set(idx, ocl::KernelArg::PtrReadOnly(left)); + idx = k.set(idx, ocl::KernelArg::PtrReadOnly(right)); + idx = k.set(idx, ocl::KernelArg::WriteOnly(disp)); + idx = k.set(idx, state->minDisparity); + idx = k.set(idx, state->numDisparities); + idx = k.set(idx, state->preFilterCap); + idx = k.set(idx, state->SADWindowSize); + idx = k.set(idx, state->textureThreshold); + idx = k.set(idx, state->uniquenessRatio); + + return k.run(2, globalThreads, NULL, false); +} struct FindStereoCorrespInvoker : public ParallelLoopBody { @@ -776,21 +859,18 @@ public: void compute( InputArray leftarr, InputArray rightarr, OutputArray disparr ) { - Mat left0 = leftarr.getMat(), right0 = rightarr.getMat(); int dtype = disparr.fixedType() ? disparr.type() : params.dispType; + Size leftsize = leftarr.size(); - if (left0.size() != right0.size()) + if (leftarr.size() != rightarr.size()) CV_Error( Error::StsUnmatchedSizes, "All the images must have the same size" ); - if (left0.type() != CV_8UC1 || right0.type() != CV_8UC1) + if (leftarr.type() != CV_8UC1 || rightarr.type() != CV_8UC1) CV_Error( Error::StsUnsupportedFormat, "Both input images must have CV_8UC1" ); if (dtype != CV_16SC1 && dtype != CV_32FC1) CV_Error( Error::StsUnsupportedFormat, "Disparity image must have CV_16SC1 or CV_32FC1 format" ); - disparr.create(left0.size(), dtype); - Mat disp0 = disparr.getMat(); - if( params.preFilterType != PREFILTER_NORMALIZED_RESPONSE && params.preFilterType != PREFILTER_XSOBEL ) CV_Error( Error::StsOutOfRange, "preFilterType must be = CV_STEREO_BM_NORMALIZED_RESPONSE" ); @@ -802,7 +882,7 @@ public: CV_Error( Error::StsOutOfRange, "preFilterCap must be within 1..63" ); if( params.SADWindowSize < 5 || params.SADWindowSize > 255 || params.SADWindowSize % 2 == 0 || - params.SADWindowSize >= std::min(left0.cols, left0.rows) ) + params.SADWindowSize >= std::min(leftsize.width, leftsize.height) ) CV_Error( Error::StsOutOfRange, "SADWindowSize must be odd, be within 5..255 and be not larger than image width or height" ); if( params.numDisparities <= 0 || params.numDisparities % 16 != 0 ) @@ -814,6 +894,26 @@ public: if( params.uniquenessRatio < 0 ) CV_Error( Error::StsOutOfRange, "uniqueness ratio must be non-negative" ); + int FILTERED = (params.minDisparity - 1) << DISPARITY_SHIFT; + + if(ocl::useOpenCL() && disparr.isUMat()) + { + UMat left, right; + CV_Assert(ocl_prefiltering(leftarr, rightarr, left, right, ¶ms)); + CV_Assert(ocl_stereo(left, right, disparr, ¶ms)); + + if( params.speckleRange >= 0 && params.speckleWindowSize > 0 ) + filterSpeckles(disparr.getMat(), FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf); + + if (dtype == CV_32F) + disparr.getUMat().convertTo(disparr, CV_32FC1, 1./(1 << DISPARITY_SHIFT), 0); + return; + } + + Mat left0 = leftarr.getMat(), right0 = rightarr.getMat(); + disparr.create(left0.size(), dtype); + Mat disp0 = disparr.getMat(); + preFilteredImg0.create( left0.size(), CV_8U ); preFilteredImg1.create( left0.size(), CV_8U ); cost.create( left0.size(), CV_16S ); @@ -828,7 +928,6 @@ public: int lofs = std::max(ndisp - 1 + mindisp, 0); int rofs = -std::min(ndisp - 1 + mindisp, 0); int width1 = width - rofs - ndisp + 1; - int FILTERED = (params.minDisparity - 1) << DISPARITY_SHIFT; if( lofs >= width || rofs >= width || width1 < 1 ) { @@ -855,7 +954,7 @@ public: bufSize2 = width*height*(sizeof(Point_) + sizeof(int) + sizeof(uchar)); #if CV_SSE2 - bool useShorts = params.preFilterCap <= 31 && params.SADWindowSize <= 21 && checkHardwareSupport(CV_CPU_SSE2); + bool useShorts = false;//params.preFilterCap <= 31 && params.SADWindowSize <= 21 && checkHardwareSupport(CV_CPU_SSE2); #else const bool useShorts = false; #endif @@ -870,6 +969,7 @@ public: slidingSumBuf.create( 1, bufSize, CV_8U ); uchar *_buf = slidingSumBuf.data; + parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, _buf, _buf + bufSize1, ¶ms), 1); Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2; diff --git a/modules/calib3d/test/opencl/test_stereobm.cpp b/modules/calib3d/test/opencl/test_stereobm.cpp new file mode 100644 index 000000000..c3903f6a8 --- /dev/null +++ b/modules/calib3d/test/opencl/test_stereobm.cpp @@ -0,0 +1,96 @@ +/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" +#include "cvconfig.h" +#include "opencv2/ts/ocl_test.hpp" + +#ifdef HAVE_OPENCL + +namespace cvtest { +namespace ocl { + +PARAM_TEST_CASE(StereoBMFixture, int, int) +{ + int n_disp; + int winSize; + Mat left, right, disp; + UMat uleft, uright, udisp; + + virtual void SetUp() + { + n_disp = GET_PARAM(0); + winSize = GET_PARAM(1); + + left = readImage("gpu/stereobm/aloe-L.png", IMREAD_GRAYSCALE); + right = readImage("gpu/stereobm/aloe-R.png", IMREAD_GRAYSCALE); + + ASSERT_FALSE(left.empty()); + ASSERT_FALSE(right.empty()); + + left.copyTo(uleft); + right.copyTo(uright); + } + + void Near(double eps = 0.0) + { + EXPECT_MAT_NEAR_RELATIVE(disp, udisp, eps); + } +}; + +OCL_TEST_P(StereoBMFixture, StereoBM) +{ + Ptr bm = createStereoBM( n_disp, winSize); + bm->setPreFilterType(bm->PREFILTER_XSOBEL); + + OCL_OFF(bm->compute(left, right, disp)); + OCL_ON(bm->compute(uleft, uright, udisp)); + + Near(1e-2); +} + +OCL_INSTANTIATE_TEST_CASE_P(StereoMatcher, StereoBMFixture, testing::Combine(testing::Values(128), + testing::Values(15))); +}//ocl +}//cvtest + +#endif //HAVE_OPENCL