From 1febf345bf3066d421e10cff55e51dbe81aa8de5 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 25 Aug 2010 06:30:11 +0000 Subject: [PATCH] renamed colorizeDisp to drawColorDisp, added acync version of drawColorDisp and reprojectImageTo3D_GPU. --- modules/gpu/include/opencv2/gpu/gpu.hpp | 14 ++++- modules/gpu/src/cuda/imgproc.cu | 48 +++++++++------- modules/gpu/src/imgproc_gpu.cpp | 74 +++++++++++++++---------- 3 files changed, 86 insertions(+), 50 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index ffe45af67..78a016d96 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -349,9 +349,21 @@ namespace cv // Does mean shift filtering on GPU. CV_EXPORTS void meanShiftFiltering_GPU(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); - CV_EXPORTS void colorizeDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp); + // Does coloring of disparity image: [0..ndisp) -> [0..240, 1, 1] in HSV. + // Supported types of input disparity: CV_8U, CV_16S. + // Output disparity has CV_8UC4 type in BGRA format (alpha = 255). + CV_EXPORTS void drawColorDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp); + // Acync version + CV_EXPORTS void drawColorDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp, const Stream& stream); + // Reprojects disparity image to 3D space. + // Supports CV_8U and CV_16S types of input disparity. + // The output is a 4-channel floating-point (CV_32FC4) matrix. + // Each element of this matrix will contain the 3D coordinates of the point (x,y,z,1), computed from the disparity map. + // Q is the 4x4 perspective transformation matrix that can be obtained with cvStereoRectify. CV_EXPORTS void reprojectImageTo3D_GPU(const GpuMat& disp, GpuMat& xyzw, const Mat& Q); + // Acync version + CV_EXPORTS void reprojectImageTo3D_GPU(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const Stream& stream); //////////////////////////////// StereoBM_GPU //////////////////////////////// diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu index 5a87c054f..a08e34ab9 100644 --- a/modules/gpu/src/cuda/imgproc.cu +++ b/modules/gpu/src/cuda/imgproc.cu @@ -182,7 +182,7 @@ namespace cv { namespace gpu { namespace impl } }}} -/////////////////////////////////// colorizeDisp /////////////////////////////////////////////// +/////////////////////////////////// drawColorDisp /////////////////////////////////////////////// namespace imgproc { @@ -240,14 +240,15 @@ namespace imgproc res.y = p; res.z = V; } - unsigned int b = (unsigned int)(max(0.f, min (res.x, 1.f)) * 255.f); - unsigned int g = (unsigned int)(max(0.f, min (res.y, 1.f)) * 255.f); - unsigned int r = (unsigned int)(max(0.f, min (res.z, 1.f)) * 255.f); + const unsigned int b = (unsigned int)(max(0.f, min (res.x, 1.f)) * 255.f); + const unsigned int g = (unsigned int)(max(0.f, min (res.y, 1.f)) * 255.f); + const unsigned int r = (unsigned int)(max(0.f, min (res.z, 1.f)) * 255.f); + const unsigned int a = 255U; - return (r << 16) + (g << 8) + b; + return (a << 24) + (r << 16) + (g << 8) + b; } - __global__ void colorizeDisp(uchar* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp) + __global__ void drawColorDisp(uchar* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp) { const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 2; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -267,7 +268,7 @@ namespace imgproc } } - __global__ void colorizeDisp(short* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp) + __global__ void drawColorDisp(short* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp) { const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 1; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -288,30 +289,34 @@ namespace imgproc namespace cv { namespace gpu { namespace impl { - void colorizeDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp) + void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream) { dim3 threads(16, 16, 1); dim3 grid(1, 1, 1); grid.x = divUp(src.cols, threads.x << 2); grid.y = divUp(src.rows, threads.y); - imgproc::colorizeDisp<<>>(src.ptr, src.step, dst.ptr, dst.step, src.cols, src.rows, ndisp); - cudaSafeCall( cudaThreadSynchronize() ); + imgproc::drawColorDisp<<>>(src.ptr, src.step, dst.ptr, dst.step, src.cols, src.rows, ndisp); + + if (stream == 0) + cudaSafeCall( cudaThreadSynchronize() ); } - void colorizeDisp_gpu(const DevMem2D_& src, const DevMem2D& dst, int ndisp) + void drawColorDisp_gpu(const DevMem2D_& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); grid.x = divUp(src.cols, threads.x << 1); grid.y = divUp(src.rows, threads.y); - imgproc::colorizeDisp<<>>(src.ptr, src.step / sizeof(short), dst.ptr, dst.step, src.cols, src.rows, ndisp); - cudaSafeCall( cudaThreadSynchronize() ); + imgproc::drawColorDisp<<>>(src.ptr, src.step / sizeof(short), dst.ptr, dst.step, src.cols, src.rows, ndisp); + + if (stream == 0) + cudaSafeCall( cudaThreadSynchronize() ); } }}} -/////////////////////////////////// colorizeDisp /////////////////////////////////////////////// +/////////////////////////////////// reprojectImageTo3D /////////////////////////////////////////////// namespace imgproc { @@ -351,7 +356,7 @@ namespace imgproc namespace cv { namespace gpu { namespace impl { template - inline void reprojectImageTo3D_caller(const DevMem2D_& disp, const DevMem2Df& xyzw, const float* q) + inline void reprojectImageTo3D_caller(const DevMem2D_& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -360,18 +365,19 @@ namespace cv { namespace gpu { namespace impl cudaSafeCall( cudaMemcpyToSymbol(imgproc::cq, q, 16 * sizeof(float)) ); - imgproc::reprojectImageTo3D<<>>(disp.ptr, disp.step / sizeof(T), xyzw.ptr, xyzw.step / sizeof(float), disp.rows, disp.cols); + imgproc::reprojectImageTo3D<<>>(disp.ptr, disp.step / sizeof(T), xyzw.ptr, xyzw.step / sizeof(float), disp.rows, disp.cols); - cudaSafeCall( cudaThreadSynchronize() ); + if (stream == 0) + cudaSafeCall( cudaThreadSynchronize() ); } - void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q) + void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream) { - reprojectImageTo3D_caller(disp, xyzw, q); + reprojectImageTo3D_caller(disp, xyzw, q, stream); } - void reprojectImageTo3D_gpu(const DevMem2D_& disp, const DevMem2Df& xyzw, const float* q) + void reprojectImageTo3D_gpu(const DevMem2D_& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream) { - reprojectImageTo3D_caller(disp, xyzw, q); + reprojectImageTo3D_caller(disp, xyzw, q, stream); } }}} diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 080d6eb40..ee06fe593 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -49,8 +49,10 @@ using namespace cv::gpu; void cv::gpu::remap(const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } void cv::gpu::meanShiftFiltering_GPU(const GpuMat&, GpuMat&, int, int, TermCriteria ) { throw_nogpu(); } -void cv::gpu::colorizeDisp(const GpuMat&, GpuMat&, int) { throw_nogpu(); } +void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int) { throw_nogpu(); } +void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, const Stream&) { throw_nogpu(); } void cv::gpu::reprojectImageTo3D_GPU(const GpuMat&, GpuMat&, const Mat&) { throw_nogpu(); } +void cv::gpu::reprojectImageTo3D_GPU(const GpuMat&, GpuMat&, const Mat&, const Stream&) { throw_nogpu(); } #else /* !defined (HAVE_CUDA) */ @@ -62,11 +64,11 @@ namespace cv { namespace gpu extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps); - void colorizeDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp); - void colorizeDisp_gpu(const DevMem2D_& src, const DevMem2D& dst, int ndisp); + void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream); + void drawColorDisp_gpu(const DevMem2D_& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream); - void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q); - void reprojectImageTo3D_gpu(const DevMem2D_& disp, const DevMem2Df& xyzw, const float* q); + void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream); + void reprojectImageTo3D_gpu(const DevMem2D_& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream); } }} @@ -109,47 +111,63 @@ void cv::gpu::meanShiftFiltering_GPU(const GpuMat& src, GpuMat& dst, int sp, int namespace { template - void colorizeDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp) - { - impl::colorizeDisp_gpu((DevMem2D_)src, dst, ndisp); + void drawColorDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream) + { + GpuMat out; + if (&dst != &src) + out = dst; + out.create(src.size(), CV_8UC4); + + impl::drawColorDisp_gpu((DevMem2D_)src, out, ndisp, stream); + + dst = out; } + + typedef void (*drawColorDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream); + + const drawColorDisp_caller_t drawColorDisp_callers[] = {drawColorDisp_caller, 0, 0, drawColorDisp_caller, 0, 0, 0, 0}; } -void cv::gpu::colorizeDisp(const GpuMat& src, GpuMat& dst, int ndisp) +void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp) { - typedef void (*colorizeDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp); - - static const colorizeDisp_caller_t callers[] = {colorizeDisp_caller, 0, 0, colorizeDisp_caller, 0, 0, 0, 0}; CV_Assert(src.type() == CV_8U || src.type() == CV_16S); - - GpuMat out; - if (&dst != &src) - out = dst; - out.create(src.size(), CV_8UC4); - callers[src.type()](src, out, ndisp); - dst = out; + drawColorDisp_callers[src.type()](src, dst, ndisp, 0); +} + +void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, const Stream& stream) +{ + CV_Assert(src.type() == CV_8U || src.type() == CV_16S); + + drawColorDisp_callers[src.type()](src, dst, ndisp, StreamAccessor::getStream(stream)); } namespace { template - void reprojectImageTo3D_caller(const GpuMat& disp, GpuMat& xyzw, const Mat& Q) - { - impl::reprojectImageTo3D_gpu((DevMem2D_)disp, xyzw, Q.ptr()); + void reprojectImageTo3D_caller(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream) + { + xyzw.create(disp.rows, disp.cols, CV_32FC4); + impl::reprojectImageTo3D_gpu((DevMem2D_)disp, xyzw, Q.ptr(), stream); } + + typedef void (*reprojectImageTo3D_caller_t)(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream); + + const reprojectImageTo3D_caller_t reprojectImageTo3D_callers[] = {reprojectImageTo3D_caller, 0, 0, reprojectImageTo3D_caller, 0, 0, 0, 0}; } void cv::gpu::reprojectImageTo3D_GPU(const GpuMat& disp, GpuMat& xyzw, const Mat& Q) { - typedef void (*reprojectImageTo3D_caller_t)(const GpuMat& disp, GpuMat& xyzw, const Mat& Q); - - static const reprojectImageTo3D_caller_t callers[] = {reprojectImageTo3D_caller, 0, 0, reprojectImageTo3D_caller, 0, 0, 0, 0}; CV_Assert((disp.type() == CV_8U || disp.type() == CV_16S) && Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4); - - xyzw.create(disp.rows, disp.cols, CV_32FC4); - callers[disp.type()](disp, xyzw, Q); + reprojectImageTo3D_callers[disp.type()](disp, xyzw, Q, 0); +} + +void cv::gpu::reprojectImageTo3D_GPU(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const Stream& stream) +{ + CV_Assert((disp.type() == CV_8U || disp.type() == CV_16S) && Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4); + + reprojectImageTo3D_callers[disp.type()](disp, xyzw, Q, StreamAccessor::getStream(stream)); } #endif /* !defined (HAVE_CUDA) */ \ No newline at end of file