From e2a9df408f712f694206974a1f24a864881371c3 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 8 Sep 2015 16:04:28 +0300 Subject: [PATCH 1/3] fix for gpu::StereoBeliefPropogation: use continuous memory for internal buffers --- modules/gpu/src/stereobp.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/gpu/src/stereobp.cpp b/modules/gpu/src/stereobp.cpp index 3b827a312..dde69d6d4 100644 --- a/modules/gpu/src/stereobp.cpp +++ b/modules/gpu/src/stereobp.cpp @@ -158,7 +158,7 @@ namespace init(stream); - datas[0].create(rows * rthis.ndisp, cols, rthis.msg_type); + createContinuous(rows * rthis.ndisp, cols, rthis.msg_type, datas[0]); comp_data_callers[rthis.msg_type == CV_32F][left.channels()](left, right, datas[0], StreamAccessor::getStream(stream)); @@ -187,10 +187,10 @@ namespace private: void init(Stream& stream) { - u.create(rows * rthis.ndisp, cols, rthis.msg_type); - d.create(rows * rthis.ndisp, cols, rthis.msg_type); - l.create(rows * rthis.ndisp, cols, rthis.msg_type); - r.create(rows * rthis.ndisp, cols, rthis.msg_type); + createContinuous(rows * rthis.ndisp, cols, rthis.msg_type, u); + createContinuous(rows * rthis.ndisp, cols, rthis.msg_type, d); + createContinuous(rows * rthis.ndisp, cols, rthis.msg_type, l); + createContinuous(rows * rthis.ndisp, cols, rthis.msg_type, r); if (rthis.levels & 1) { @@ -216,10 +216,10 @@ namespace int less_rows = rows / 2; int less_cols = cols / 2; - u2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); - d2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); - l2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); - r2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); + createContinuous(less_rows * rthis.ndisp, less_cols, rthis.msg_type, u2); + createContinuous(less_rows * rthis.ndisp, less_cols, rthis.msg_type, d2); + createContinuous(less_rows * rthis.ndisp, less_cols, rthis.msg_type, l2); + createContinuous(less_rows * rthis.ndisp, less_cols, rthis.msg_type, r2); if ((rthis.levels & 1) == 0) { @@ -286,7 +286,7 @@ namespace cols_all[i] = cols_all[i-1] / 2; rows_all[i] = rows_all[i-1] / 2; - datas[i].create(rows_all[i] * rthis.ndisp, cols_all[i], rthis.msg_type); + createContinuous(rows_all[i] * rthis.ndisp, cols_all[i], rthis.msg_type, datas[i]); data_step_down_callers[funcIdx](cols_all[i], rows_all[i], rows_all[i-1], datas[i-1], datas[i], cudaStream); } From f903192c17b1c70182c0c25bb156900ea82b731b Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 10 Sep 2015 10:05:04 +0300 Subject: [PATCH 2/3] revert previous change in gpu::StereoBeliefPropogation --- modules/gpu/src/stereobp.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/gpu/src/stereobp.cpp b/modules/gpu/src/stereobp.cpp index dde69d6d4..0864fbcad 100644 --- a/modules/gpu/src/stereobp.cpp +++ b/modules/gpu/src/stereobp.cpp @@ -213,8 +213,8 @@ namespace if (rthis.levels > 1) { - int less_rows = rows / 2; - int less_cols = cols / 2; + int less_rows = (rows + 1) / 2; + int less_cols = (cols + 1) / 2; createContinuous(less_rows * rthis.ndisp, less_cols, rthis.msg_type, u2); createContinuous(less_rows * rthis.ndisp, less_cols, rthis.msg_type, d2); @@ -283,8 +283,8 @@ namespace for (int i = 1; i < rthis.levels; ++i) { - cols_all[i] = cols_all[i-1] / 2; - rows_all[i] = rows_all[i-1] / 2; + cols_all[i] = (cols_all[i-1] + 1) / 2; + rows_all[i] = (rows_all[i-1] + 1) / 2; createContinuous(rows_all[i] * rthis.ndisp, cols_all[i], rthis.msg_type, datas[i]); From 3ef067cc65cd548a968588f72d3b9ecc92a0e9bb Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 10 Sep 2015 10:05:25 +0300 Subject: [PATCH 3/3] add extra checks to data_step_down to prevent out-of-border access --- modules/gpu/src/cuda/stereobp.cu | 18 +++++++++--------- modules/gpu/src/stereobp.cpp | 6 +++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/modules/gpu/src/cuda/stereobp.cu b/modules/gpu/src/cuda/stereobp.cu index 05a19b419..d011c7f4f 100644 --- a/modules/gpu/src/cuda/stereobp.cu +++ b/modules/gpu/src/cuda/stereobp.cu @@ -255,7 +255,7 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////// template - __global__ void data_step_down(int dst_cols, int dst_rows, int src_rows, const PtrStep src, PtrStep dst) + __global__ void data_step_down(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStep src, PtrStep dst) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -264,10 +264,10 @@ namespace cv { namespace gpu { namespace device { for (int d = 0; d < cndisp; ++d) { - float dst_reg = src.ptr(d * src_rows + (2*y+0))[(2*x+0)]; - dst_reg += src.ptr(d * src_rows + (2*y+1))[(2*x+0)]; - dst_reg += src.ptr(d * src_rows + (2*y+0))[(2*x+1)]; - dst_reg += src.ptr(d * src_rows + (2*y+1))[(2*x+1)]; + float dst_reg = src.ptr(d * src_rows + ::min(2*y+0, src_rows-1))[::min(2*x+0, src_cols-1)]; + dst_reg += src.ptr(d * src_rows + ::min(2*y+1, src_rows-1))[::min(2*x+0, src_cols-1)]; + dst_reg += src.ptr(d * src_rows + ::min(2*y+0, src_rows-1))[::min(2*x+1, src_cols-1)]; + dst_reg += src.ptr(d * src_rows + ::min(2*y+1, src_rows-1))[::min(2*x+1, src_cols-1)]; dst.ptr(d * dst_rows + y)[x] = saturate_cast(dst_reg); } @@ -275,7 +275,7 @@ namespace cv { namespace gpu { namespace device } template - void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream) + void data_step_down_gpu(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -283,15 +283,15 @@ namespace cv { namespace gpu { namespace device grid.x = divUp(dst_cols, threads.x); grid.y = divUp(dst_rows, threads.y); - data_step_down<<>>(dst_cols, dst_rows, src_rows, (PtrStepSz)src, (PtrStepSz)dst); + data_step_down<<>>(dst_cols, dst_rows, src_cols, src_rows, (PtrStepSz)src, (PtrStepSz)dst); cudaSafeCall( cudaGetLastError() ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } - template void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); - template void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); + template void data_step_down_gpu(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); + template void data_step_down_gpu(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); /////////////////////////////////////////////////////////////// /////////////////// level up messages //////////////////////// diff --git a/modules/gpu/src/stereobp.cpp b/modules/gpu/src/stereobp.cpp index 0864fbcad..2bcefe377 100644 --- a/modules/gpu/src/stereobp.cpp +++ b/modules/gpu/src/stereobp.cpp @@ -67,7 +67,7 @@ namespace cv { namespace gpu { namespace device template void comp_data_gpu(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream); template - void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); + void data_step_down_gpu(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); template void level_up_messages_gpu(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream); template @@ -253,7 +253,7 @@ namespace void calcBP(GpuMat& disp, Stream& stream) { - typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); + typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); static const data_step_down_t data_step_down_callers[2] = { data_step_down_gpu, data_step_down_gpu @@ -288,7 +288,7 @@ namespace createContinuous(rows_all[i] * rthis.ndisp, cols_all[i], rthis.msg_type, datas[i]); - data_step_down_callers[funcIdx](cols_all[i], rows_all[i], rows_all[i-1], datas[i-1], datas[i], cudaStream); + data_step_down_callers[funcIdx](cols_all[i], rows_all[i], cols_all[i-1], rows_all[i-1], datas[i-1], datas[i], cudaStream); } PtrStepSzb mus[] = {u, u2};