added get_first_k_initial_global_init_global_cost in gpu::SCBP

This commit is contained in:
Andrey Morozov 2010-08-17 15:53:00 +00:00
parent 9a669b1ceb
commit 28030952fa
3 changed files with 185 additions and 146 deletions

View File

@ -486,6 +486,8 @@ namespace cv
int min_disp_th; int min_disp_th;
int msg_type; int msg_type;
bool use_local_init_data_cost;
private: private:
GpuMat u[2], d[2], l[2], r[2]; GpuMat u[2], d[2], l[2], r[2];
GpuMat disp_selected_pyr[2]; GpuMat disp_selected_pyr[2];

View File

@ -62,10 +62,10 @@ namespace cv { namespace gpu { namespace csbp
const DevMem2D& left, const DevMem2D& right, const DevMem2D& temp); const DevMem2D& left, const DevMem2D& right, const DevMem2D& temp);
void init_data_cost(int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, void init_data_cost(int rows, int cols, short* disp_selected_pyr, short* data_cost_selected,
size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, cudaStream_t stream); size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream);
void init_data_cost(int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, void init_data_cost(int rows, int cols, float* disp_selected_pyr, float* data_cost_selected,
size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, cudaStream_t stream); size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream);
void compute_data_cost(const short* disp_selected_pyr, short* data_cost, size_t msg_step1, size_t msg_step2, void compute_data_cost(const short* disp_selected_pyr, short* data_cost, size_t msg_step1, size_t msg_step2,
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream);
@ -111,7 +111,7 @@ cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, in
: ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_), : ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_),
max_data_term(DEFAULT_MAX_DATA_TERM), data_weight(DEFAULT_DATA_WEIGHT), max_data_term(DEFAULT_MAX_DATA_TERM), data_weight(DEFAULT_DATA_WEIGHT),
max_disc_term(DEFAULT_MAX_DISC_TERM), disc_single_jump(DEFAULT_DISC_SINGLE_JUMP), min_disp_th(0), max_disc_term(DEFAULT_MAX_DISC_TERM), disc_single_jump(DEFAULT_DISC_SINGLE_JUMP), min_disp_th(0),
msg_type(msg_type_) msg_type(msg_type_), use_local_init_data_cost(true)
{ {
CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S); CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S);
} }
@ -122,7 +122,7 @@ cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, in
: ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_), : ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_),
max_data_term(max_data_term_), data_weight(data_weight_), max_data_term(max_data_term_), data_weight(data_weight_),
max_disc_term(max_disc_term_), disc_single_jump(disc_single_jump_), min_disp_th(min_disp_th_), max_disc_term(max_disc_term_), disc_single_jump(disc_single_jump_), min_disp_th(min_disp_th_),
msg_type(msg_type_) msg_type(msg_type_), use_local_init_data_cost(true)
{ {
CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S); CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S);
} }
@ -131,7 +131,7 @@ template<class T>
static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2], GpuMat l[2], GpuMat r[2], static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2], GpuMat l[2], GpuMat r[2],
GpuMat disp_selected_pyr[2], GpuMat& data_cost, GpuMat& data_cost_selected, GpuMat disp_selected_pyr[2], GpuMat& data_cost, GpuMat& data_cost_selected,
GpuMat& temp, GpuMat& out, const GpuMat& left, const GpuMat& right, GpuMat& disp, GpuMat& temp, GpuMat& out, const GpuMat& left, const GpuMat& right, GpuMat& disp,
cudaStream_t stream) bool use_local_init_data_cost, cudaStream_t stream)
{ {
CV_DbgAssert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels && 0 < rthis.nr_plane CV_DbgAssert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels && 0 < rthis.nr_plane
&& left.rows == right.rows && left.cols == right.cols && left.type() == right.type()); && left.rows == right.rows && left.cols == right.cols && left.type() == right.type());
@ -225,7 +225,7 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2]
if (i == levels - 1) if (i == levels - 1)
{ {
csbp::init_data_cost(left.rows, left.cols, disp_selected_pyr[cur_idx].ptr<T>(), data_cost_selected.ptr<T>(), csbp::init_data_cost(left.rows, left.cols, disp_selected_pyr[cur_idx].ptr<T>(), data_cost_selected.ptr<T>(),
step_pyr[i], rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], rthis.ndisp, left.channels(), stream); step_pyr[i], rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], rthis.ndisp, left.channels(), use_local_init_data_cost, stream);
} }
else else
{ {
@ -265,20 +265,20 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2]
typedef void (*csbp_operator_t)(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2], GpuMat l[2], GpuMat r[2], typedef void (*csbp_operator_t)(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2], GpuMat l[2], GpuMat r[2],
GpuMat disp_selected_pyr[2], GpuMat& data_cost, GpuMat& data_cost_selected, GpuMat disp_selected_pyr[2], GpuMat& data_cost, GpuMat& data_cost_selected,
GpuMat& temp, GpuMat& out, const GpuMat& left, const GpuMat& right, GpuMat& disp, GpuMat& temp, GpuMat& out, const GpuMat& left, const GpuMat& right, GpuMat& disp,
cudaStream_t stream); bool use_local_init_data_cost, cudaStream_t stream);
const static csbp_operator_t operators[] = {0, 0, 0, csbp_operator<short>, 0, csbp_operator<float>, 0, 0}; const static csbp_operator_t operators[] = {0, 0, 0, csbp_operator<short>, 0, csbp_operator<float>, 0, 0};
void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp) void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp)
{ {
CV_Assert(msg_type == CV_32F || msg_type == CV_16S); CV_Assert(msg_type == CV_32F || msg_type == CV_16S);
operators[msg_type](*this, u, d, l, r, disp_selected_pyr, data_cost, data_cost_selected, temp, out, left, right, disp, 0); operators[msg_type](*this, u, d, l, r, disp_selected_pyr, data_cost, data_cost_selected, temp, out, left, right, disp, use_local_init_data_cost, 0);
} }
void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream) void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
{ {
CV_Assert(msg_type == CV_32F || msg_type == CV_16S); CV_Assert(msg_type == CV_32F || msg_type == CV_16S);
operators[msg_type](*this, u, d, l, r, disp_selected_pyr, data_cost, data_cost_selected, temp, out, left, right, disp, StreamAccessor::getStream(stream)); operators[msg_type](*this, u, d, l, r, disp_selected_pyr, data_cost, data_cost_selected, temp, out, left, right, disp, use_local_init_data_cost, StreamAccessor::getStream(stream));
} }
#endif /* !defined (HAVE_CUDA) */ #endif /* !defined (HAVE_CUDA) */

View File

@ -146,6 +146,40 @@ namespace csbp_krnls
} }
}; };
template <typename T>
__global__ void get_first_k_initial_global(T* data_cost_selected_, T *selected_disp_pyr, int h, int w, int nr_plane)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < h && x < w)
{
T* selected_disparity = selected_disp_pyr + y * cmsg_step1 + x;
T* data_cost_selected = data_cost_selected_ + y * cmsg_step1 + x;
T* data_cost = (T*)ctemp + y * cmsg_step1 + x;
for(int i = 0; i < nr_plane; i++)
{
T fmin_ = data_cost[i * cdisp_step1];
int id = i;
for(int j = 0; j < nr_plane; j++)
{
T cur = data_cost[j * cdisp_step1];
if(cur < fmin_)
{
fmin_ = cur;
id = j;
}
}
data_cost_selected[i * cdisp_step1] = fmin_;
selected_disparity[i * cdisp_step1] = id;
data_cost [id * cdisp_step1] = TypeLimits<T>::max();;
}
}
}
template <typename T> template <typename T>
__global__ void get_first_k_initial_local(T* data_cost_selected_, T* selected_disp_pyr, int h, int w, int nr_plane) __global__ void get_first_k_initial_local(T* data_cost_selected_, T* selected_disp_pyr, int h, int w, int nr_plane)
{ {
@ -341,8 +375,8 @@ namespace cv { namespace gpu { namespace csbp
} }
template<class T> template<class T>
void init_data_cost_tmpl(int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, void init_data_cost_tmpl(int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step,
size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, cudaStream_t stream) int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream)
{ {
typedef void (*InitDataCostCaller)(int cols, int rows, int w, int h, int level, int ndisp, int channels, cudaStream_t stream); typedef void (*InitDataCostCaller)(int cols, int rows, int w, int h, int level, int ndisp, int channels, cudaStream_t stream);
@ -368,21 +402,24 @@ namespace cv { namespace gpu { namespace csbp
grid.x = divUp(w, threads.x); grid.x = divUp(w, threads.x);
grid.y = divUp(h, threads.y); grid.y = divUp(h, threads.y);
csbp_krnls::get_first_k_initial_local<<<grid, threads, 0, stream>>>(data_cost_selected, disp_selected_pyr, h, w, nr_plane); if (use_local_init_data_cost == true)
csbp_krnls::get_first_k_initial_local<<<grid, threads, 0, stream>>> (data_cost_selected, disp_selected_pyr, h, w, nr_plane);
else
csbp_krnls::get_first_k_initial_global<<<grid, threads, 0, stream>>>(data_cost_selected, disp_selected_pyr, h, w, nr_plane);
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() ); cudaSafeCall( cudaThreadSynchronize() );
} }
void init_data_cost(int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, void init_data_cost(int rows, int cols, short* disp_selected_pyr, short* data_cost_selected,
size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, cudaStream_t stream) size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream)
{ {
init_data_cost_tmpl(rows, cols, disp_selected_pyr, data_cost_selected, msg_step, h, w, level, nr_plane, ndisp, channels, stream); init_data_cost_tmpl(rows, cols, disp_selected_pyr, data_cost_selected, msg_step, h, w, level, nr_plane, ndisp, channels, use_local_init_data_cost, stream);
} }
void init_data_cost(int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, void init_data_cost(int rows, int cols, float* disp_selected_pyr, float* data_cost_selected,
size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, cudaStream_t stream) size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream)
{ {
init_data_cost_tmpl(rows, cols, disp_selected_pyr, data_cost_selected, msg_step, h, w, level, nr_plane, ndisp, channels, stream); init_data_cost_tmpl(rows, cols, disp_selected_pyr, data_cost_selected, msg_step, h, w, level, nr_plane, ndisp, channels, use_local_init_data_cost, stream);
} }
}}} }}}