From f31cf6d88d0d1d23273a271c29aa214a6c2cb5e6 Mon Sep 17 00:00:00 2001 From: Anatoly Baksheev Date: Tue, 20 Jul 2010 13:00:07 +0000 Subject: [PATCH] prefilter_xsobel option added to stereobm_gpu --- modules/gpu/include/opencv2/gpu/gpu.hpp | 15 ++-- modules/gpu/src/cuda/cuda_shared.hpp | 3 +- modules/gpu/src/cuda/stereobm.cu | 102 ++++++++++++++++++------ modules/gpu/src/stereobm_gpu.cpp | 34 +++++--- 4 files changed, 112 insertions(+), 42 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 7ce5e79d4..d36404355 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -326,13 +326,15 @@ namespace cv class CV_EXPORTS StereoBM_GPU { public: - enum { BASIC_PRESET=0, PREFILTER_XSOBEL = 1 }; + enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 }; + + enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 }; //! the default constructor StereoBM_GPU(); //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size //! ndisparities should be multiple of 8. SSD WindowsSize is fixed to 19 now - StereoBM_GPU(int preset, int ndisparities=0); + StereoBM_GPU(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ); //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair //! Output disparity has CV_8U type. void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity); @@ -344,10 +346,13 @@ namespace cv // if current GPU will be faster then CPU in this algorithm. // It queries current active device. static bool checkIfGpuCallReasonable(); - private: - GpuMat minSSD; - int preset; + int ndisp; + int winSize; + int preset; + private: + GpuMat minSSD, leBuf, riBuf; + }; } } diff --git a/modules/gpu/src/cuda/cuda_shared.hpp b/modules/gpu/src/cuda/cuda_shared.hpp index 23c4c0021..272e4f56c 100644 --- a/modules/gpu/src/cuda/cuda_shared.hpp +++ b/modules/gpu/src/cuda/cuda_shared.hpp @@ -59,8 +59,7 @@ namespace cv namespace impl { static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; } - - extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, DevMem2D& disp, int maxdisp, DevMem2D_& minSSD_buf); + extern "C" void set_to_without_mask (const DevMem2D& mat, const double * scalar, int depth, int channels); extern "C" void set_to_with_mask (const DevMem2D& mat, const double * scalar, const DevMem2D& mask, int depth, int channels); diff --git a/modules/gpu/src/cuda/stereobm.cu b/modules/gpu/src/cuda/stereobm.cu index 39d72af85..e2427da36 100644 --- a/modules/gpu/src/cuda/stereobm.cu +++ b/modules/gpu/src/cuda/stereobm.cu @@ -55,14 +55,14 @@ #define COL_SSD_SIZE (BLOCK_W + N_DIRTY_PIXELS) #define SHARED_MEM_SIZE (COL_SSD_SIZE) // amount of shared memory used +namespace stereobm_gpu +{ + __constant__ unsigned int* cminSSDImage; __constant__ size_t cminSSD_step; __constant__ int cwidth; __constant__ int cheight; -namespace device_code -{ - __device__ int SQ(int a) { return a * a; @@ -290,29 +290,79 @@ extern "C" __global__ void stereoKernel(unsigned char *left, unsigned char *righ } -extern "C" void cv::gpu::impl::stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, DevMem2D& disp, int maxdisp, DevMem2D_& minSSD_buf) -{ - //cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferL1) ); - //cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferShared) ); - - size_t smem_size = (BLOCK_W + N_DISPARITIES * SHARED_MEM_SIZE) * sizeof(unsigned int); +namespace cv { namespace gpu { namespace impl +{ + extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, int winsz, const DevMem2D_& minSSD_buf) + { + //cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferL1) ); + //cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferShared) ); - cudaSafeCall( cudaMemset2D(disp.ptr, disp.step, 0, disp.cols, disp. rows) ); - cudaSafeCall( cudaMemset2D(minSSD_buf.ptr, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp. rows) ); + size_t smem_size = (BLOCK_W + N_DISPARITIES * SHARED_MEM_SIZE) * sizeof(unsigned int); - dim3 grid(1,1,1); - dim3 threads(BLOCK_W, 1, 1); - - grid.x = divUp(left.cols - maxdisp - 2 * RADIUS, BLOCK_W); - grid.y = divUp(left.rows - 2 * RADIUS, ROWSperTHREAD); - - cudaSafeCall( cudaMemcpyToSymbol( cwidth, &left.cols, sizeof (left.cols) ) ); - cudaSafeCall( cudaMemcpyToSymbol( cheight, &left.rows, sizeof (left.rows) ) ); - cudaSafeCall( cudaMemcpyToSymbol( cminSSDImage, &minSSD_buf.ptr, sizeof (minSSD_buf.ptr) ) ); + cudaSafeCall( cudaMemset2D(disp.ptr, disp.step, 0, disp.cols, disp. rows) ); + cudaSafeCall( cudaMemset2D(minSSD_buf.ptr, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp. rows) ); - size_t minssd_step = minSSD_buf.step/minSSD_buf.elemSize(); - cudaSafeCall( cudaMemcpyToSymbol( cminSSD_step, &minssd_step, sizeof (minssd_step) ) ); - - device_code::stereoKernel<<>>(left.ptr, right.ptr, left.step, disp.ptr, disp.step, maxdisp); - cudaSafeCall( cudaThreadSynchronize() ); -} \ No newline at end of file + dim3 grid(1,1,1); + dim3 threads(BLOCK_W, 1, 1); + + grid.x = divUp(left.cols - maxdisp - 2 * RADIUS, BLOCK_W); + grid.y = divUp(left.rows - 2 * RADIUS, ROWSperTHREAD); + + cudaSafeCall( cudaMemcpyToSymbol( stereobm_gpu::cwidth, &left.cols, sizeof(left.cols) ) ); + cudaSafeCall( cudaMemcpyToSymbol( stereobm_gpu::cheight, &left.rows, sizeof(left.rows) ) ); + cudaSafeCall( cudaMemcpyToSymbol( stereobm_gpu::cminSSDImage, &minSSD_buf.ptr, sizeof(minSSD_buf.ptr) ) ); + + size_t minssd_step = minSSD_buf.step/minSSD_buf.elemSize(); + cudaSafeCall( cudaMemcpyToSymbol( stereobm_gpu::cminSSD_step, &minssd_step, sizeof(minssd_step) ) ); + + stereobm_gpu::stereoKernel<<>>(left.ptr, right.ptr, left.step, disp.ptr, disp.step, maxdisp); + cudaSafeCall( cudaThreadSynchronize() ); + } +}}} + +////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////// Sobel Prefiler /////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace stereobm_gpu +{ + +texture tex; + +extern "C" __global__ void prefilert_kernel(unsigned char *output, size_t step, int width, int height, int prefilterCap) +{ + int x = blockDim.x * blockIdx.x + threadIdx.x; + int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (x < width && y < height) + { + int conv = (int)tex2D(tex, x - 1, y - 1) * (-1) + (int)tex2D(tex, x + 1, y - 1) * (1) + + (int)tex2D(tex, x - 1, y ) * (-2) + (int)tex2D(tex, x + 1, y ) * (2) + + (int)tex2D(tex, x - 1, y + 1) * (-1) + (int)tex2D(tex, x + 1, y + 1) * (1); + + + conv = min(min(max(-prefilterCap, conv), prefilterCap) + prefilterCap, 255); + output[y * step + x] = conv & 0xFF; + } +} + +} + +namespace cv { namespace gpu { namespace impl +{ + extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output, int prefilterCap) + { + cudaChannelFormatDesc desc = cudaCreateChannelDesc(); + cudaSafeCall( cudaBindTexture2D( 0, stereobm_gpu::tex, input.ptr, desc, input.cols, input.rows, input.step ) ); + + dim3 threads(16, 16, 1); + dim3 grid(1, 1, 1); + + grid.x = divUp(input.cols, threads.x); + grid.y = divUp(input.rows, threads.y); + + stereobm_gpu::prefilert_kernel<<>>(output.ptr, output.step, output.cols, output.rows, prefilterCap); + cudaSafeCall( cudaThreadSynchronize() ); + } + +}}} \ No newline at end of file diff --git a/modules/gpu/src/stereobm_gpu.cpp b/modules/gpu/src/stereobm_gpu.cpp index ae9670072..c557d4e82 100644 --- a/modules/gpu/src/stereobm_gpu.cpp +++ b/modules/gpu/src/stereobm_gpu.cpp @@ -56,12 +56,21 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right #else /* !defined (HAVE_CUDA) */ + +namespace cv { namespace gpu +{ + namespace impl + { + extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, int winsz, const DevMem2D_& minSSD_buf); + extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output, int prefilterCap = 31); + } +}} -cv::gpu::StereoBM_GPU::StereoBM_GPU() : preset(BASIC_PRESET), ndisp(64) {} -cv::gpu::StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_) : preset(preset_), ndisp(ndisparities_) +cv::gpu::StereoBM_GPU::StereoBM_GPU() : preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ) {} +cv::gpu::StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_, int winSize_) : preset(preset_), ndisp(ndisparities_), winSize(winSize_) { - const int max_supported_ndisp = 1 << (sizeof(unsigned char) * 8); - CV_Assert(ndisp <= max_supported_ndisp); + const int max_supported_ndisp = 1 << (sizeof(unsigned char) * 8); + CV_Assert(0 < ndisp && ndisp <= max_supported_ndisp); CV_Assert(ndisp % 8 == 0); } @@ -91,14 +100,21 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right disparity.create(left.size(), CV_8U); minSSD.create(left.size(), CV_32S); + GpuMat le_for_bm = left; + GpuMat ri_for_bm = right; + if (preset == PREFILTER_XSOBEL) { - CV_Assert(!"Not implemented"); - } + leBuf.create( left.size(), left.type()); + riBuf.create(right.size(), right.type()); + + impl::prefilter_xsobel( left, leBuf); + impl::prefilter_xsobel(right, riBuf); - DevMem2D disp = disparity; - DevMem2D_ mssd = minSSD; - impl::stereoBM_GPU(left, right, disp, ndisp, mssd); + le_for_bm = leBuf; + ri_for_bm = riBuf; + } + impl::stereoBM_GPU(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD); } void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream)