added PtrStep PtrElemStep structures. Refactored name spaces,
This commit is contained in:
@@ -50,55 +50,78 @@ namespace cv
|
|||||||
// Simple lightweight structures that encapsulates information about an image on device.
|
// Simple lightweight structures that encapsulates information about an image on device.
|
||||||
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
|
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
|
||||||
|
|
||||||
template<typename T> struct PtrStep_
|
|
||||||
{
|
|
||||||
T* ptr;
|
|
||||||
size_t step;
|
|
||||||
|
|
||||||
typedef T elem_type;
|
|
||||||
enum { elem_size = sizeof(elem_type) };
|
|
||||||
|
|
||||||
#if defined(__CUDACC__)
|
#if defined(__CUDACC__)
|
||||||
__host__ __device__
|
#define __CV_GPU_HOST_DEVICE__ __host__ __device__
|
||||||
|
#else
|
||||||
|
#define __CV_GPU_HOST_DEVICE__
|
||||||
#endif
|
#endif
|
||||||
size_t elemSize() const { return elem_size; }
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T> struct DevMem2D_
|
template <typename T> struct DevMem2D_
|
||||||
{
|
{
|
||||||
int cols;
|
int cols;
|
||||||
int rows;
|
int rows;
|
||||||
T* ptr;
|
T* data;
|
||||||
size_t step;
|
size_t step;
|
||||||
size_t elem_step;
|
|
||||||
|
|
||||||
/*__host__*/
|
DevMem2D_() : cols(0), rows(0), data(0), step(0) {}
|
||||||
DevMem2D_() : cols(0), rows(0), ptr(0), step(0), elem_step(0) {}
|
|
||||||
|
|
||||||
/*__host__*/
|
DevMem2D_(int rows_, int cols_, T *data_, size_t step_)
|
||||||
DevMem2D_(int rows_, int cols_, T *ptr_, size_t step_)
|
: cols(cols_), rows(rows_), data(data_), step(step_) {}
|
||||||
: cols(cols_), rows(rows_), ptr(ptr_), step(step_), elem_step(step_ / sizeof(T)) {}
|
|
||||||
|
|
||||||
template <typename U>
|
template <typename U>
|
||||||
/*__host__*/
|
|
||||||
explicit DevMem2D_(const DevMem2D_<U>& d)
|
explicit DevMem2D_(const DevMem2D_<U>& d)
|
||||||
: cols(d.cols), rows(d.rows), ptr((T*)d.ptr), step(d.step), elem_step(d.step / sizeof(T)) {}
|
: cols(d.cols), rows(d.rows), data((T*)d.data), step(d.step) {}
|
||||||
|
|
||||||
template <typename U>
|
typedef T elem_type;
|
||||||
/*__host__*/
|
enum { elem_size = sizeof(elem_type) };
|
||||||
operator PtrStep_<U>() const { PtrStep_<U> dt; dt.ptr = ptr; dt.step = step; return dt; }
|
|
||||||
|
|
||||||
typedef typename PtrStep_<T>::elem_type elem_type;
|
__CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
|
||||||
enum { elem_size = PtrStep_<T>::elem_size };
|
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return (T*)( (char*)data + y * step ); }
|
||||||
#if defined(__CUDACC__)
|
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)data + y * step ); }
|
||||||
__host__ __device__
|
};
|
||||||
#endif
|
|
||||||
size_t elemSize() const { return elem_size; }
|
template<typename T> struct PtrStep_
|
||||||
|
{
|
||||||
|
T* data;
|
||||||
|
size_t step;
|
||||||
|
|
||||||
|
PtrStep_() : data(0), step(0) {}
|
||||||
|
PtrStep_(const DevMem2D_<T>& mem) : data(mem.data), step(mem.step) {}
|
||||||
|
|
||||||
|
typedef T elem_type;
|
||||||
|
enum { elem_size = sizeof(elem_type) };
|
||||||
|
|
||||||
|
__CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
|
||||||
|
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return (T*)( (char*)data + y * step); }
|
||||||
|
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)data + y * step); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T> struct PtrElemStep_ : public PtrStep_<T>
|
||||||
|
{
|
||||||
|
PtrElemStep_(const DevMem2D_<T>& mem) : PtrStep_<T>(mem)
|
||||||
|
{
|
||||||
|
step /= elem_size;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
template <bool> struct StaticCheck;
|
||||||
|
template <> struct StaticCheck<true>{};
|
||||||
|
|
||||||
|
StaticCheck<256 % sizeof(T) == 0> ElemStepTypeCheck;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef DevMem2D_<unsigned char> DevMem2D;
|
typedef DevMem2D_<unsigned char> DevMem2D;
|
||||||
typedef DevMem2D_<float> DevMem2Df;
|
typedef DevMem2D_<float> DevMem2Df;
|
||||||
typedef DevMem2D_<int> DevMem2Di;
|
typedef DevMem2D_<int> DevMem2Di;
|
||||||
|
|
||||||
|
typedef PtrStep_<unsigned char> PtrStep;
|
||||||
|
typedef PtrStep_<float> PtrStepf;
|
||||||
|
typedef PtrStep_<int> PtrStepi;
|
||||||
|
|
||||||
|
typedef PtrElemStep_<unsigned char> PtrElemStep;
|
||||||
|
typedef PtrElemStep_<float> PtrElemStepf;
|
||||||
|
typedef PtrElemStep_<int> PtrElemStepi;
|
||||||
|
|
||||||
|
#undef __CV_GPU_HOST_DEVICE__
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -109,6 +109,7 @@ namespace cv
|
|||||||
//! returns lightweight DevMem2D_ structure for passing to nvcc-compiled code.
|
//! returns lightweight DevMem2D_ structure for passing to nvcc-compiled code.
|
||||||
// Contains just image size, data ptr and step.
|
// Contains just image size, data ptr and step.
|
||||||
template <class T> operator DevMem2D_<T>() const;
|
template <class T> operator DevMem2D_<T>() const;
|
||||||
|
template <class T> operator PtrStep_<T>() const;
|
||||||
|
|
||||||
//! pefroms blocking upload data to GpuMat. .
|
//! pefroms blocking upload data to GpuMat. .
|
||||||
void upload(const cv::Mat& m);
|
void upload(const cv::Mat& m);
|
||||||
|
@@ -207,6 +207,7 @@ inline GpuMat& GpuMat::operator = (const GpuMat& m)
|
|||||||
inline GpuMat& GpuMat::operator = (const Mat& m) { upload(m); return *this; }
|
inline GpuMat& GpuMat::operator = (const Mat& m) { upload(m); return *this; }
|
||||||
|
|
||||||
template <class T> inline GpuMat::operator DevMem2D_<T>() const { return DevMem2D_<T>(rows, cols, (T*)data, step); }
|
template <class T> inline GpuMat::operator DevMem2D_<T>() const { return DevMem2D_<T>(rows, cols, (T*)data, step); }
|
||||||
|
template <class T> inline GpuMat::operator PtrStep_<T>() const { return PtrStep_<T>(*this); }
|
||||||
|
|
||||||
//CPP: void GpuMat::upload(const Mat& m);
|
//CPP: void GpuMat::upload(const Mat& m);
|
||||||
|
|
||||||
|
@@ -50,36 +50,32 @@ using namespace cv::gpu;
|
|||||||
#define FLT_MAX 3.402823466e+38F
|
#define FLT_MAX 3.402823466e+38F
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
namespace cv { namespace gpu { namespace bp {
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
/////////////////////// load constants ////////////////////////
|
/////////////////////// load constants ////////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace bp_kernels
|
|
||||||
{
|
|
||||||
__constant__ int cndisp;
|
__constant__ int cndisp;
|
||||||
__constant__ float cmax_data_term;
|
__constant__ float cmax_data_term;
|
||||||
__constant__ float cdata_weight;
|
__constant__ float cdata_weight;
|
||||||
__constant__ float cmax_disc_term;
|
__constant__ float cmax_disc_term;
|
||||||
__constant__ float cdisc_single_jump;
|
__constant__ float cdisc_single_jump;
|
||||||
};
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bp {
|
|
||||||
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump)
|
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(bp_kernels::cndisp, &ndisp, sizeof(int )) );
|
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int )) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(bp_kernels::cmax_data_term, &max_data_term, sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(bp_kernels::cdata_weight, &data_weight, sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(bp_kernels::cmax_disc_term, &max_disc_term, sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(bp_kernels::cdisc_single_jump, &disc_single_jump, sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) );
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
////////////////////////// comp data //////////////////////////
|
////////////////////////// comp data //////////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace bp_kernels
|
|
||||||
{
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void comp_data_gray(const uchar* l, const uchar* r, size_t step, T* data, size_t data_step, int cols, int rows)
|
__global__ void comp_data_gray(const uchar* l, const uchar* r, size_t step, T* data, size_t data_step, int cols, int rows)
|
||||||
{
|
{
|
||||||
@@ -145,9 +141,7 @@ namespace bp_kernels
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bp {
|
|
||||||
typedef void (*CompDataFunc)(const DevMem2D& l, const DevMem2D& r, int channels, DevMem2D mdata, const cudaStream_t& stream);
|
typedef void (*CompDataFunc)(const DevMem2D& l, const DevMem2D& r, int channels, DevMem2D mdata, const cudaStream_t& stream);
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
@@ -160,9 +154,9 @@ namespace cv { namespace gpu { namespace bp {
|
|||||||
grid.y = divUp(l.rows, threads.y);
|
grid.y = divUp(l.rows, threads.y);
|
||||||
|
|
||||||
if (channels == 1)
|
if (channels == 1)
|
||||||
bp_kernels::comp_data_gray<T><<<grid, threads, 0, stream>>>(l.ptr, r.ptr, l.step, (T*)mdata.ptr, mdata.step/sizeof(T), l.cols, l.rows);
|
comp_data_gray<T><<<grid, threads, 0, stream>>>(l.data, r.data, l.step, (T*)mdata.data, mdata.step/sizeof(T), l.cols, l.rows);
|
||||||
else
|
else
|
||||||
bp_kernels::comp_data_bgr<T><<<grid, threads, 0, stream>>>(l.ptr, r.ptr, l.step, (T*)mdata.ptr, mdata.step/sizeof(T), l.cols, l.rows);
|
comp_data_bgr<T><<<grid, threads, 0, stream>>>(l.data, r.data, l.step, (T*)mdata.data, mdata.step/sizeof(T), l.cols, l.rows);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -187,14 +181,11 @@ namespace cv { namespace gpu { namespace bp {
|
|||||||
cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
|
cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
|
||||||
func(l, r, channels, mdata, stream);
|
func(l, r, channels, mdata, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
//////////////////////// data step down ///////////////////////
|
//////////////////////// data step down ///////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace bp_kernels
|
|
||||||
{
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void data_step_down(int dst_cols, int dst_rows, int src_rows, const T* src, size_t src_step, T* dst, size_t dst_step)
|
__global__ void data_step_down(int dst_cols, int dst_rows, int src_rows, const T* src, size_t src_step, T* dst, size_t dst_step)
|
||||||
{
|
{
|
||||||
@@ -217,9 +208,7 @@ namespace bp_kernels
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bp {
|
|
||||||
typedef void (*DataStepDownFunc)(int dst_cols, int dst_rows, int src_rows, const DevMem2D& src, DevMem2D dst, const cudaStream_t& stream);
|
typedef void (*DataStepDownFunc)(int dst_cols, int dst_rows, int src_rows, const DevMem2D& src, DevMem2D dst, const cudaStream_t& stream);
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
@@ -231,7 +220,7 @@ namespace cv { namespace gpu { namespace bp {
|
|||||||
grid.x = divUp(dst_cols, threads.x);
|
grid.x = divUp(dst_cols, threads.x);
|
||||||
grid.y = divUp(dst_rows, threads.y);
|
grid.y = divUp(dst_rows, threads.y);
|
||||||
|
|
||||||
bp_kernels::data_step_down<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)src.ptr, src.step/sizeof(T), (T*)dst.ptr, dst.step/sizeof(T));
|
data_step_down<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)src.data, src.step/sizeof(T), (T*)dst.data, dst.step/sizeof(T));
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -256,14 +245,11 @@ namespace cv { namespace gpu { namespace bp {
|
|||||||
cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
|
cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
|
||||||
func(dst_cols, dst_rows, src_rows, src, dst, stream);
|
func(dst_cols, dst_rows, src_rows, src, dst, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
/////////////////// level up messages ////////////////////////
|
/////////////////// level up messages ////////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace bp_kernels
|
|
||||||
{
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void level_up_message(int dst_cols, int dst_rows, int src_rows, const T* src, size_t src_step, T* dst, size_t dst_step)
|
__global__ void level_up_message(int dst_cols, int dst_rows, int src_rows, const T* src, size_t src_step, T* dst, size_t dst_step)
|
||||||
{
|
{
|
||||||
@@ -282,9 +268,7 @@ namespace bp_kernels
|
|||||||
dstr[d * dst_disp_step] = srcr[d * src_disp_step];
|
dstr[d * dst_disp_step] = srcr[d * src_disp_step];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bp {
|
|
||||||
typedef void (*LevelUpMessagesFunc)(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2D* mus, DevMem2D* mds, DevMem2D* mls, DevMem2D* mrs, const cudaStream_t& stream);
|
typedef void (*LevelUpMessagesFunc)(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2D* mus, DevMem2D* mds, DevMem2D* mls, DevMem2D* mrs, const cudaStream_t& stream);
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
@@ -298,10 +282,10 @@ namespace cv { namespace gpu { namespace bp {
|
|||||||
|
|
||||||
int src_idx = (dst_idx + 1) & 1;
|
int src_idx = (dst_idx + 1) & 1;
|
||||||
|
|
||||||
bp_kernels::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mus[src_idx].ptr, mus[src_idx].step/sizeof(T), (T*)mus[dst_idx].ptr, mus[dst_idx].step/sizeof(T));
|
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mus[src_idx].data, mus[src_idx].step/sizeof(T), (T*)mus[dst_idx].data, mus[dst_idx].step/sizeof(T));
|
||||||
bp_kernels::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mds[src_idx].ptr, mds[src_idx].step/sizeof(T), (T*)mds[dst_idx].ptr, mds[dst_idx].step/sizeof(T));
|
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mds[src_idx].data, mds[src_idx].step/sizeof(T), (T*)mds[dst_idx].data, mds[dst_idx].step/sizeof(T));
|
||||||
bp_kernels::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mls[src_idx].ptr, mls[src_idx].step/sizeof(T), (T*)mls[dst_idx].ptr, mls[dst_idx].step/sizeof(T));
|
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mls[src_idx].data, mls[src_idx].step/sizeof(T), (T*)mls[dst_idx].data, mls[dst_idx].step/sizeof(T));
|
||||||
bp_kernels::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mrs[src_idx].ptr, mrs[src_idx].step/sizeof(T), (T*)mrs[dst_idx].ptr, mrs[dst_idx].step/sizeof(T));
|
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mrs[src_idx].data, mrs[src_idx].step/sizeof(T), (T*)mrs[dst_idx].data, mrs[dst_idx].step/sizeof(T));
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -326,14 +310,11 @@ namespace cv { namespace gpu { namespace bp {
|
|||||||
cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
|
cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
|
||||||
func(dst_idx, dst_cols, dst_rows, src_rows, mus, mds, mls, mrs, stream);
|
func(dst_idx, dst_cols, dst_rows, src_rows, mus, mds, mls, mrs, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
//////////////////// calc all iterations /////////////////////
|
//////////////////// calc all iterations /////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace bp_kernels
|
|
||||||
{
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ void calc_min_linear_penalty(T* dst, size_t step)
|
__device__ void calc_min_linear_penalty(T* dst, size_t step)
|
||||||
{
|
{
|
||||||
@@ -427,9 +408,7 @@ namespace bp_kernels
|
|||||||
message(us + msg_step, ds - msg_step, ls + 1, dt, ls, msg_disp_step, data_disp_step);
|
message(us + msg_step, ds - msg_step, ls + 1, dt, ls, msg_disp_step, data_disp_step);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bp {
|
|
||||||
typedef void (*CalcAllIterationFunc)(int cols, int rows, int iters, DevMem2D& u, DevMem2D& d, DevMem2D& l, DevMem2D& r, const DevMem2D& data, const cudaStream_t& stream);
|
typedef void (*CalcAllIterationFunc)(int cols, int rows, int iters, DevMem2D& u, DevMem2D& d, DevMem2D& l, DevMem2D& r, const DevMem2D& data, const cudaStream_t& stream);
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
@@ -443,7 +422,7 @@ namespace cv { namespace gpu { namespace bp {
|
|||||||
|
|
||||||
for(int t = 0; t < iters; ++t)
|
for(int t = 0; t < iters; ++t)
|
||||||
{
|
{
|
||||||
bp_kernels::one_iteration<T><<<grid, threads, 0, stream>>>(t, (T*)u.ptr, (T*)d.ptr, (T*)l.ptr, (T*)r.ptr, u.step/sizeof(T), (const T*)data.ptr, data.step/sizeof(T), cols, rows);
|
one_iteration<T><<<grid, threads, 0, stream>>>(t, (T*)u.data, (T*)d.data, (T*)l.data, (T*)r.data, u.step/sizeof(T), (const T*)data.data, data.step/sizeof(T), cols, rows);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -469,14 +448,11 @@ namespace cv { namespace gpu { namespace bp {
|
|||||||
cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
|
cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
|
||||||
func(cols, rows, iters, u, d, l, r, data, stream);
|
func(cols, rows, iters, u, d, l, r, data, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
/////////////////////////// output ////////////////////////////
|
/////////////////////////// output ////////////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace bp_kernels
|
|
||||||
{
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void output(int cols, int rows, const T* u, const T* d, const T* l, const T* r, const T* data, size_t step, short* disp, size_t res_step)
|
__global__ void output(int cols, int rows, const T* u, const T* d, const T* l, const T* r, const T* data, size_t step, short* disp, size_t res_step)
|
||||||
{
|
{
|
||||||
@@ -513,9 +489,7 @@ namespace bp_kernels
|
|||||||
disp[res_step * y + x] = saturate_cast<short>(best);
|
disp[res_step * y + x] = saturate_cast<short>(best);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bp {
|
|
||||||
typedef void (*OutputFunc)(const DevMem2D& u, const DevMem2D& d, const DevMem2D& l, const DevMem2D& r, const DevMem2D& data, DevMem2D disp, const cudaStream_t& stream);
|
typedef void (*OutputFunc)(const DevMem2D& u, const DevMem2D& d, const DevMem2D& l, const DevMem2D& r, const DevMem2D& data, DevMem2D disp, const cudaStream_t& stream);
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
@@ -527,7 +501,7 @@ namespace cv { namespace gpu { namespace bp {
|
|||||||
grid.x = divUp(disp.cols, threads.x);
|
grid.x = divUp(disp.cols, threads.x);
|
||||||
grid.y = divUp(disp.rows, threads.y);
|
grid.y = divUp(disp.rows, threads.y);
|
||||||
|
|
||||||
bp_kernels::output<T><<<grid, threads, 0, stream>>>(disp.cols, disp.rows, (const T*)u.ptr, (const T*)d.ptr, (const T*)l.ptr, (const T*)r.ptr, (const T*)data.ptr, u.step/sizeof(T), (short*)disp.ptr, disp.step/sizeof(short));
|
output<T><<<grid, threads, 0, stream>>>(disp.cols, disp.rows, (const T*)u.data, (const T*)d.data, (const T*)l.data, (const T*)r.data, (const T*)data.data, u.step/sizeof(T), (short*)disp.data, disp.step/sizeof(short));
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -552,4 +526,5 @@ namespace cv { namespace gpu { namespace bp {
|
|||||||
cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
|
cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
|
||||||
func(u, d, l, r, data, disp, stream);
|
func(u, d, l, r, data, disp, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
}}}
|
}}}
|
@@ -54,7 +54,7 @@ using namespace cv::gpu;
|
|||||||
#define FLT_EPSILON 1.192092896e-07F
|
#define FLT_EPSILON 1.192092896e-07F
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace color_krnls
|
namespace cv { namespace gpu { namespace color
|
||||||
{
|
{
|
||||||
template<typename T> struct ColorChannel {};
|
template<typename T> struct ColorChannel {};
|
||||||
template<> struct ColorChannel<uchar>
|
template<> struct ColorChannel<uchar>
|
||||||
@@ -95,12 +95,9 @@ namespace color_krnls
|
|||||||
{
|
{
|
||||||
return vec.w;
|
return vec.w;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
|
////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
|
||||||
|
|
||||||
namespace color_krnls
|
|
||||||
{
|
|
||||||
template <int SRCCN, int DSTCN, typename T>
|
template <int SRCCN, int DSTCN, typename T>
|
||||||
__global__ void RGB2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)
|
__global__ void RGB2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)
|
||||||
{
|
{
|
||||||
@@ -123,10 +120,7 @@ namespace color_krnls
|
|||||||
*(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;
|
*(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace color
|
|
||||||
{
|
|
||||||
template <typename T, int SRCCN, int DSTCN>
|
template <typename T, int SRCCN, int DSTCN>
|
||||||
void RGB2RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
|
void RGB2RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@@ -136,8 +130,8 @@ namespace cv { namespace gpu { namespace color
|
|||||||
grid.x = divUp(src.cols, threads.x);
|
grid.x = divUp(src.cols, threads.x);
|
||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
color_krnls::RGB2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
RGB2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
dst.data, dst.step, src.rows, src.cols, bidx);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -178,12 +172,9 @@ namespace cv { namespace gpu { namespace color
|
|||||||
|
|
||||||
RGB2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
RGB2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
/////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////
|
/////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////
|
||||||
|
|
||||||
namespace color_krnls
|
|
||||||
{
|
|
||||||
template <int GREEN_BITS, int DSTCN> struct RGB5x52RGBConverter {};
|
template <int GREEN_BITS, int DSTCN> struct RGB5x52RGBConverter {};
|
||||||
template <int DSTCN> struct RGB5x52RGBConverter<5, DSTCN>
|
template <int DSTCN> struct RGB5x52RGBConverter<5, DSTCN>
|
||||||
{
|
{
|
||||||
@@ -272,10 +263,7 @@ namespace color_krnls
|
|||||||
*(ushort*)(dst_ + y * dst_step + (x << 1)) = RGB2RGB5x5Converter<SRCCN, GREEN_BITS>::cvt(&src.x, bidx);
|
*(ushort*)(dst_ + y * dst_step + (x << 1)) = RGB2RGB5x5Converter<SRCCN, GREEN_BITS>::cvt(&src.x, bidx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace color
|
|
||||||
{
|
|
||||||
template <int GREEN_BITS, int DSTCN>
|
template <int GREEN_BITS, int DSTCN>
|
||||||
void RGB5x52RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
|
void RGB5x52RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@@ -285,8 +273,8 @@ namespace cv { namespace gpu { namespace color
|
|||||||
grid.x = divUp(src.cols, threads.x);
|
grid.x = divUp(src.cols, threads.x);
|
||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
color_krnls::RGB5x52RGB<GREEN_BITS, DSTCN><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
RGB5x52RGB<GREEN_BITS, DSTCN><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
dst.data, dst.step, src.rows, src.cols, bidx);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -313,8 +301,8 @@ namespace cv { namespace gpu { namespace color
|
|||||||
grid.x = divUp(src.cols, threads.x);
|
grid.x = divUp(src.cols, threads.x);
|
||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
color_krnls::RGB2RGB5x5<SRCCN, GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
RGB2RGB5x5<SRCCN, GREEN_BITS><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
dst.data, dst.step, src.rows, src.cols, bidx);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -331,12 +319,9 @@ namespace cv { namespace gpu { namespace color
|
|||||||
|
|
||||||
RGB2RGB5x5_callers[srccn - 3][green_bits - 5](src, dst, bidx, stream);
|
RGB2RGB5x5_callers[srccn - 3][green_bits - 5](src, dst, bidx, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
///////////////////////////////// Grayscale to Color ////////////////////////////////
|
///////////////////////////////// Grayscale to Color ////////////////////////////////
|
||||||
|
|
||||||
namespace color_krnls
|
|
||||||
{
|
|
||||||
template <int DSTCN, typename T>
|
template <int DSTCN, typename T>
|
||||||
__global__ void Gray2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)
|
__global__ void Gray2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)
|
||||||
{
|
{
|
||||||
@@ -387,10 +372,7 @@ namespace color_krnls
|
|||||||
*(ushort*)(dst_ + y * dst_step + (x << 1)) = Gray2RGB5x5Converter<GREEN_BITS>::cvt(src);
|
*(ushort*)(dst_ + y * dst_step + (x << 1)) = Gray2RGB5x5Converter<GREEN_BITS>::cvt(src);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace color
|
|
||||||
{
|
|
||||||
template <typename T, int DSTCN>
|
template <typename T, int DSTCN>
|
||||||
void Gray2RGB_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
|
void Gray2RGB_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@@ -400,8 +382,8 @@ namespace cv { namespace gpu { namespace color
|
|||||||
grid.x = divUp(src.cols, threads.x);
|
grid.x = divUp(src.cols, threads.x);
|
||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
color_krnls::Gray2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
Gray2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols);
|
dst.data, dst.step, src.rows, src.cols);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -440,8 +422,8 @@ namespace cv { namespace gpu { namespace color
|
|||||||
grid.x = divUp(src.cols, threads.x);
|
grid.x = divUp(src.cols, threads.x);
|
||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
color_krnls::Gray2RGB5x5<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
Gray2RGB5x5<GREEN_BITS><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols);
|
dst.data, dst.step, src.rows, src.cols);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -457,12 +439,9 @@ namespace cv { namespace gpu { namespace color
|
|||||||
|
|
||||||
Gray2RGB5x5_callers[green_bits - 5](src, dst, stream);
|
Gray2RGB5x5_callers[green_bits - 5](src, dst, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
///////////////////////////////// Color to Grayscale ////////////////////////////////
|
///////////////////////////////// Color to Grayscale ////////////////////////////////
|
||||||
|
|
||||||
namespace color_krnls
|
|
||||||
{
|
|
||||||
#undef R2Y
|
#undef R2Y
|
||||||
#undef G2Y
|
#undef G2Y
|
||||||
#undef B2Y
|
#undef B2Y
|
||||||
@@ -541,10 +520,7 @@ namespace color_krnls
|
|||||||
*(T*)(dst_ + y * dst_step + x * sizeof(T)) = RGB2GrayConvertor<T>::cvt(&src.x, bidx);
|
*(T*)(dst_ + y * dst_step + x * sizeof(T)) = RGB2GrayConvertor<T>::cvt(&src.x, bidx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace color
|
|
||||||
{
|
|
||||||
template <typename T, int SRCCN>
|
template <typename T, int SRCCN>
|
||||||
void RGB2Gray_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
|
void RGB2Gray_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@@ -554,8 +530,8 @@ namespace cv { namespace gpu { namespace color
|
|||||||
grid.x = divUp(src.cols, threads.x);
|
grid.x = divUp(src.cols, threads.x);
|
||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
color_krnls::RGB2Gray<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
RGB2Gray<SRCCN, T><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
dst.data, dst.step, src.rows, src.cols, bidx);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -594,8 +570,8 @@ namespace cv { namespace gpu { namespace color
|
|||||||
grid.x = divUp(src.cols, threads.x);
|
grid.x = divUp(src.cols, threads.x);
|
||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
color_krnls::RGB5x52Gray<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
RGB5x52Gray<GREEN_BITS><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols);
|
dst.data, dst.step, src.rows, src.cols);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -611,12 +587,9 @@ namespace cv { namespace gpu { namespace color
|
|||||||
|
|
||||||
RGB5x52Gray_callers[green_bits - 5](src, dst, stream);
|
RGB5x52Gray_callers[green_bits - 5](src, dst, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
|
///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
|
||||||
|
|
||||||
namespace color_krnls
|
|
||||||
{
|
|
||||||
__constant__ float cYCrCbCoeffs_f[5];
|
__constant__ float cYCrCbCoeffs_f[5];
|
||||||
__constant__ int cYCrCbCoeffs_i[5];
|
__constant__ int cYCrCbCoeffs_i[5];
|
||||||
|
|
||||||
@@ -712,10 +685,7 @@ namespace color_krnls
|
|||||||
*(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;
|
*(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace color
|
|
||||||
{
|
|
||||||
template <typename T, int SRCCN, int DSTCN>
|
template <typename T, int SRCCN, int DSTCN>
|
||||||
void RGB2YCrCb_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
|
void RGB2YCrCb_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@@ -725,8 +695,8 @@ namespace cv { namespace gpu { namespace color
|
|||||||
grid.x = divUp(src.cols, threads.x);
|
grid.x = divUp(src.cols, threads.x);
|
||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
color_krnls::RGB2YCrCb<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
RGB2YCrCb<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
dst.data, dst.step, src.rows, src.cols, bidx);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -741,7 +711,7 @@ namespace cv { namespace gpu { namespace color
|
|||||||
{RGB2YCrCb_caller<uchar, 4, 3>, RGB2YCrCb_caller<uchar, 4, 4>}
|
{RGB2YCrCb_caller<uchar, 4, 3>, RGB2YCrCb_caller<uchar, 4, 4>}
|
||||||
};
|
};
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(color_krnls::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );
|
||||||
|
|
||||||
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
||||||
}
|
}
|
||||||
@@ -755,7 +725,7 @@ namespace cv { namespace gpu { namespace color
|
|||||||
{RGB2YCrCb_caller<ushort, 4, 3>, RGB2YCrCb_caller<ushort, 4, 4>}
|
{RGB2YCrCb_caller<ushort, 4, 3>, RGB2YCrCb_caller<ushort, 4, 4>}
|
||||||
};
|
};
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(color_krnls::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );
|
||||||
|
|
||||||
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
||||||
}
|
}
|
||||||
@@ -769,7 +739,7 @@ namespace cv { namespace gpu { namespace color
|
|||||||
{RGB2YCrCb_caller<float, 4, 3>, RGB2YCrCb_caller<float, 4, 4>}
|
{RGB2YCrCb_caller<float, 4, 3>, RGB2YCrCb_caller<float, 4, 4>}
|
||||||
};
|
};
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(color_krnls::cYCrCbCoeffs_f, coeffs, 5 * sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cYCrCbCoeffs_f, coeffs, 5 * sizeof(float)) );
|
||||||
|
|
||||||
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
||||||
}
|
}
|
||||||
@@ -783,8 +753,8 @@ namespace cv { namespace gpu { namespace color
|
|||||||
grid.x = divUp(src.cols, threads.x);
|
grid.x = divUp(src.cols, threads.x);
|
||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
color_krnls::YCrCb2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
YCrCb2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
dst.data, dst.step, src.rows, src.cols, bidx);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -799,7 +769,7 @@ namespace cv { namespace gpu { namespace color
|
|||||||
{YCrCb2RGB_caller<uchar, 4, 3>, YCrCb2RGB_caller<uchar, 4, 4>}
|
{YCrCb2RGB_caller<uchar, 4, 3>, YCrCb2RGB_caller<uchar, 4, 4>}
|
||||||
};
|
};
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(color_krnls::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );
|
||||||
|
|
||||||
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
||||||
}
|
}
|
||||||
@@ -813,7 +783,7 @@ namespace cv { namespace gpu { namespace color
|
|||||||
{YCrCb2RGB_caller<ushort, 4, 3>, YCrCb2RGB_caller<ushort, 4, 4>}
|
{YCrCb2RGB_caller<ushort, 4, 3>, YCrCb2RGB_caller<ushort, 4, 4>}
|
||||||
};
|
};
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(color_krnls::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );
|
||||||
|
|
||||||
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
||||||
}
|
}
|
||||||
@@ -827,16 +797,13 @@ namespace cv { namespace gpu { namespace color
|
|||||||
{YCrCb2RGB_caller<float, 4, 3>, YCrCb2RGB_caller<float, 4, 4>}
|
{YCrCb2RGB_caller<float, 4, 3>, YCrCb2RGB_caller<float, 4, 4>}
|
||||||
};
|
};
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(color_krnls::cYCrCbCoeffs_f, coeffs, 4 * sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cYCrCbCoeffs_f, coeffs, 4 * sizeof(float)) );
|
||||||
|
|
||||||
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////
|
////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////
|
||||||
|
|
||||||
namespace color_krnls
|
|
||||||
{
|
|
||||||
__constant__ float cXYZ_D65f[9];
|
__constant__ float cXYZ_D65f[9];
|
||||||
__constant__ int cXYZ_D65i[9];
|
__constant__ int cXYZ_D65i[9];
|
||||||
|
|
||||||
@@ -922,10 +889,7 @@ namespace color_krnls
|
|||||||
*(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;
|
*(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace color
|
|
||||||
{
|
|
||||||
template <typename T, int SRCCN, int DSTCN>
|
template <typename T, int SRCCN, int DSTCN>
|
||||||
void RGB2XYZ_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
|
void RGB2XYZ_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@@ -935,8 +899,8 @@ namespace cv { namespace gpu { namespace color
|
|||||||
grid.x = divUp(src.cols, threads.x);
|
grid.x = divUp(src.cols, threads.x);
|
||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
color_krnls::RGB2XYZ<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
RGB2XYZ<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols);
|
dst.data, dst.step, src.rows, src.cols);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -951,7 +915,7 @@ namespace cv { namespace gpu { namespace color
|
|||||||
{RGB2XYZ_caller<uchar, 4, 3>, RGB2XYZ_caller<uchar, 4, 4>}
|
{RGB2XYZ_caller<uchar, 4, 3>, RGB2XYZ_caller<uchar, 4, 4>}
|
||||||
};
|
};
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(color_krnls::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cXYZ_D65i, coeffs, 9 * sizeof(int)) );
|
||||||
|
|
||||||
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
|
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
|
||||||
}
|
}
|
||||||
@@ -965,7 +929,7 @@ namespace cv { namespace gpu { namespace color
|
|||||||
{RGB2XYZ_caller<ushort, 4, 3>, RGB2XYZ_caller<ushort, 4, 4>}
|
{RGB2XYZ_caller<ushort, 4, 3>, RGB2XYZ_caller<ushort, 4, 4>}
|
||||||
};
|
};
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(color_krnls::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cXYZ_D65i, coeffs, 9 * sizeof(int)) );
|
||||||
|
|
||||||
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
|
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
|
||||||
}
|
}
|
||||||
@@ -979,7 +943,7 @@ namespace cv { namespace gpu { namespace color
|
|||||||
{RGB2XYZ_caller<float, 4, 3>, RGB2XYZ_caller<float, 4, 4>}
|
{RGB2XYZ_caller<float, 4, 3>, RGB2XYZ_caller<float, 4, 4>}
|
||||||
};
|
};
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(color_krnls::cXYZ_D65f, coeffs, 9 * sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cXYZ_D65f, coeffs, 9 * sizeof(float)) );
|
||||||
|
|
||||||
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
|
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
|
||||||
}
|
}
|
||||||
@@ -993,8 +957,8 @@ namespace cv { namespace gpu { namespace color
|
|||||||
grid.x = divUp(src.cols, threads.x);
|
grid.x = divUp(src.cols, threads.x);
|
||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
color_krnls::XYZ2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
XYZ2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols);
|
dst.data, dst.step, src.rows, src.cols);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -1009,7 +973,7 @@ namespace cv { namespace gpu { namespace color
|
|||||||
{XYZ2RGB_caller<uchar, 4, 3>, XYZ2RGB_caller<uchar, 4, 4>}
|
{XYZ2RGB_caller<uchar, 4, 3>, XYZ2RGB_caller<uchar, 4, 4>}
|
||||||
};
|
};
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(color_krnls::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cXYZ_D65i, coeffs, 9 * sizeof(int)) );
|
||||||
|
|
||||||
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
|
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
|
||||||
}
|
}
|
||||||
@@ -1023,7 +987,7 @@ namespace cv { namespace gpu { namespace color
|
|||||||
{XYZ2RGB_caller<ushort, 4, 3>, XYZ2RGB_caller<ushort, 4, 4>}
|
{XYZ2RGB_caller<ushort, 4, 3>, XYZ2RGB_caller<ushort, 4, 4>}
|
||||||
};
|
};
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(color_krnls::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cXYZ_D65i, coeffs, 9 * sizeof(int)) );
|
||||||
|
|
||||||
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
|
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
|
||||||
}
|
}
|
||||||
@@ -1037,16 +1001,13 @@ namespace cv { namespace gpu { namespace color
|
|||||||
{XYZ2RGB_caller<float, 4, 3>, XYZ2RGB_caller<float, 4, 4>}
|
{XYZ2RGB_caller<float, 4, 3>, XYZ2RGB_caller<float, 4, 4>}
|
||||||
};
|
};
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(color_krnls::cXYZ_D65f, coeffs, 9 * sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cXYZ_D65f, coeffs, 9 * sizeof(float)) );
|
||||||
|
|
||||||
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
|
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////
|
////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////
|
||||||
|
|
||||||
namespace color_krnls
|
|
||||||
{
|
|
||||||
__constant__ int cHsvDivTable[256];
|
__constant__ int cHsvDivTable[256];
|
||||||
|
|
||||||
template<typename T, int HR> struct RGB2HSVConvertor;
|
template<typename T, int HR> struct RGB2HSVConvertor;
|
||||||
@@ -1220,10 +1181,7 @@ namespace color_krnls
|
|||||||
*(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;
|
*(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace color
|
|
||||||
{
|
|
||||||
template <typename T, int SRCCN, int DSTCN>
|
template <typename T, int SRCCN, int DSTCN>
|
||||||
void RGB2HSV_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream)
|
void RGB2HSV_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@@ -1234,11 +1192,11 @@ namespace cv { namespace gpu { namespace color
|
|||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
if (hrange == 180)
|
if (hrange == 180)
|
||||||
color_krnls::RGB2HSV<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
RGB2HSV<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
dst.data, dst.step, src.rows, src.cols, bidx);
|
||||||
else
|
else
|
||||||
color_krnls::RGB2HSV<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
RGB2HSV<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
dst.data, dst.step, src.rows, src.cols, bidx);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -1288,7 +1246,7 @@ namespace cv { namespace gpu { namespace color
|
|||||||
4352, 4334, 4316, 4298, 4281, 4263, 4246, 4229,
|
4352, 4334, 4316, 4298, 4281, 4263, 4246, 4229,
|
||||||
4212, 4195, 4178, 4161, 4145, 4128, 4112, 4096
|
4212, 4195, 4178, 4161, 4145, 4128, 4112, 4096
|
||||||
};
|
};
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(color_krnls::cHsvDivTable, div_table, sizeof(div_table)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cHsvDivTable, div_table, sizeof(div_table)) );
|
||||||
|
|
||||||
RGB2HSV_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
|
RGB2HSV_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
|
||||||
}
|
}
|
||||||
@@ -1316,11 +1274,11 @@ namespace cv { namespace gpu { namespace color
|
|||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
if (hrange == 180)
|
if (hrange == 180)
|
||||||
color_krnls::HSV2RGB<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
HSV2RGB<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
dst.data, dst.step, src.rows, src.cols, bidx);
|
||||||
else
|
else
|
||||||
color_krnls::HSV2RGB<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
HSV2RGB<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
dst.data, dst.step, src.rows, src.cols, bidx);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -1338,7 +1296,7 @@ namespace cv { namespace gpu { namespace color
|
|||||||
static const int sector_data[][3] =
|
static const int sector_data[][3] =
|
||||||
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
|
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(color_krnls::cHsvSectorData, sector_data, sizeof(sector_data)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cHsvSectorData, sector_data, sizeof(sector_data)) );
|
||||||
|
|
||||||
HSV2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
|
HSV2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
|
||||||
}
|
}
|
||||||
@@ -1355,16 +1313,13 @@ namespace cv { namespace gpu { namespace color
|
|||||||
static const int sector_data[][3] =
|
static const int sector_data[][3] =
|
||||||
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
|
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(color_krnls::cHsvSectorData, sector_data, sizeof(sector_data)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cHsvSectorData, sector_data, sizeof(sector_data)) );
|
||||||
|
|
||||||
HSV2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
|
HSV2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
/////////////////////////////////////// RGB <-> HLS ////////////////////////////////////////
|
/////////////////////////////////////// RGB <-> HLS ////////////////////////////////////////
|
||||||
|
|
||||||
namespace color_krnls
|
|
||||||
{
|
|
||||||
template<typename T, int HR> struct RGB2HLSConvertor;
|
template<typename T, int HR> struct RGB2HLSConvertor;
|
||||||
template<int HR> struct RGB2HLSConvertor<float, HR>
|
template<int HR> struct RGB2HLSConvertor<float, HR>
|
||||||
{
|
{
|
||||||
@@ -1532,10 +1487,7 @@ namespace color_krnls
|
|||||||
*(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;
|
*(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace color
|
|
||||||
{
|
|
||||||
template <typename T, int SRCCN, int DSTCN>
|
template <typename T, int SRCCN, int DSTCN>
|
||||||
void RGB2HLS_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream)
|
void RGB2HLS_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@@ -1546,11 +1498,11 @@ namespace cv { namespace gpu { namespace color
|
|||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
if (hrange == 180)
|
if (hrange == 180)
|
||||||
color_krnls::RGB2HLS<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
RGB2HLS<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
dst.data, dst.step, src.rows, src.cols, bidx);
|
||||||
else
|
else
|
||||||
color_krnls::RGB2HLS<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
RGB2HLS<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
dst.data, dst.step, src.rows, src.cols, bidx);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -1591,11 +1543,11 @@ namespace cv { namespace gpu { namespace color
|
|||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
if (hrange == 180)
|
if (hrange == 180)
|
||||||
color_krnls::HLS2RGB<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
HLS2RGB<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
dst.data, dst.step, src.rows, src.cols, bidx);
|
||||||
else
|
else
|
||||||
color_krnls::HLS2RGB<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
|
HLS2RGB<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.data, src.step,
|
||||||
dst.ptr, dst.step, src.rows, src.cols, bidx);
|
dst.data, dst.step, src.rows, src.cols, bidx);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -1613,7 +1565,7 @@ namespace cv { namespace gpu { namespace color
|
|||||||
static const int sector_data[][3]=
|
static const int sector_data[][3]=
|
||||||
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
|
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(color_krnls::cHlsSectorData, sector_data, sizeof(sector_data)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cHlsSectorData, sector_data, sizeof(sector_data)) );
|
||||||
|
|
||||||
HLS2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
|
HLS2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
|
||||||
}
|
}
|
||||||
@@ -1630,7 +1582,7 @@ namespace cv { namespace gpu { namespace color
|
|||||||
static const int sector_data[][3]=
|
static const int sector_data[][3]=
|
||||||
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
|
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(color_krnls::cHlsSectorData, sector_data, sizeof(sector_data)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cHlsSectorData, sector_data, sizeof(sector_data)) );
|
||||||
|
|
||||||
HLS2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
|
HLS2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
|
||||||
}
|
}
|
||||||
|
@@ -54,8 +54,9 @@ using namespace cv::gpu;
|
|||||||
#define SHRT_MAX 32767
|
#define SHRT_MAX 32767
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace csbp_krnls
|
namespace cv { namespace gpu { namespace csbp
|
||||||
{
|
{
|
||||||
|
|
||||||
template <typename T> struct TypeLimits;
|
template <typename T> struct TypeLimits;
|
||||||
template <> struct TypeLimits<short>
|
template <> struct TypeLimits<short>
|
||||||
{
|
{
|
||||||
@@ -65,14 +66,11 @@ namespace csbp_krnls
|
|||||||
{
|
{
|
||||||
static __device__ float max() {return FLT_MAX;}
|
static __device__ float max() {return FLT_MAX;}
|
||||||
};
|
};
|
||||||
}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
/////////////////////// load constants ////////////////////////
|
/////////////////////// load constants ////////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace csbp_krnls
|
|
||||||
{
|
|
||||||
__constant__ int cndisp;
|
__constant__ int cndisp;
|
||||||
|
|
||||||
__constant__ float cmax_data_term;
|
__constant__ float cmax_data_term;
|
||||||
@@ -91,36 +89,30 @@ namespace csbp_krnls
|
|||||||
__constant__ uchar* cleft;
|
__constant__ uchar* cleft;
|
||||||
__constant__ uchar* cright;
|
__constant__ uchar* cright;
|
||||||
__constant__ uchar* ctemp;
|
__constant__ uchar* ctemp;
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace csbp
|
|
||||||
{
|
|
||||||
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
|
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
|
||||||
const DevMem2D& left, const DevMem2D& right, const DevMem2D& temp)
|
const DevMem2D& left, const DevMem2D& right, const DevMem2D& temp)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cndisp, &ndisp, sizeof(int)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cmax_data_term, &max_data_term, sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cdata_weight, &data_weight, sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cmax_disc_term, &max_disc_term, sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cdisc_single_jump, &disc_single_jump, sizeof(float)) );
|
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cth, &min_disp_th, sizeof(int)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cth, &min_disp_th, sizeof(int)) );
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cimg_step, &left.step, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cimg_step, &left.step, sizeof(size_t)) );
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cleft, &left.ptr, sizeof(left.ptr)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cleft, &left.data, sizeof(left.data)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cright, &right.ptr, sizeof(right.ptr)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cright, &right.data, sizeof(right.data)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::ctemp, &temp.ptr, sizeof(temp.ptr)) );
|
cudaSafeCall( cudaMemcpyToSymbol(ctemp, &temp.data, sizeof(temp.data)) );
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
/////////////////////// init data cost ////////////////////////
|
/////////////////////// init data cost ////////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace csbp_krnls
|
|
||||||
{
|
|
||||||
template <int channels>
|
template <int channels>
|
||||||
struct DataCostPerPixel
|
struct DataCostPerPixel
|
||||||
{
|
{
|
||||||
@@ -334,10 +326,8 @@ namespace csbp_krnls
|
|||||||
data_cost[cdisp_step1 * d] = saturate_cast<T>(dline[0]);
|
data_cost[cdisp_step1 * d] = saturate_cast<T>(dline[0]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace csbp
|
|
||||||
{
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void init_data_cost_caller_(int /*rows*/, int /*cols*/, int h, int w, int level, int /*ndisp*/, int channels, cudaStream_t stream)
|
void init_data_cost_caller_(int /*rows*/, int /*cols*/, int h, int w, int level, int /*ndisp*/, int channels, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@@ -349,8 +339,8 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
|
|
||||||
switch (channels)
|
switch (channels)
|
||||||
{
|
{
|
||||||
case 1: csbp_krnls::init_data_cost<T, 1><<<grid, threads, 0, stream>>>(h, w, level); break;
|
case 1: init_data_cost<T, 1><<<grid, threads, 0, stream>>>(h, w, level); break;
|
||||||
case 3: csbp_krnls::init_data_cost<T, 3><<<grid, threads, 0, stream>>>(h, w, level); break;
|
case 3: init_data_cost<T, 3><<<grid, threads, 0, stream>>>(h, w, level); break;
|
||||||
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -367,8 +357,8 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
|
|
||||||
switch (channels)
|
switch (channels)
|
||||||
{
|
{
|
||||||
case 1: csbp_krnls::init_data_cost_reduce<T, winsz, 1><<<grid, threads, smem_size, stream>>>(level, rows, cols, h); break;
|
case 1: init_data_cost_reduce<T, winsz, 1><<<grid, threads, smem_size, stream>>>(level, rows, cols, h); break;
|
||||||
case 3: csbp_krnls::init_data_cost_reduce<T, winsz, 3><<<grid, threads, smem_size, stream>>>(level, rows, cols, h); break;
|
case 3: init_data_cost_reduce<T, winsz, 3><<<grid, threads, smem_size, stream>>>(level, rows, cols, h); break;
|
||||||
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -388,8 +378,8 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
};
|
};
|
||||||
|
|
||||||
size_t disp_step = msg_step * h;
|
size_t disp_step = msg_step * h;
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cdisp_step1, &disp_step, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cmsg_step1, &msg_step, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step1, &msg_step, sizeof(size_t)) );
|
||||||
|
|
||||||
init_data_cost_callers[level](rows, cols, h, w, level, ndisp, channels, stream);
|
init_data_cost_callers[level](rows, cols, h, w, level, ndisp, channels, stream);
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
@@ -402,9 +392,9 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
grid.y = divUp(h, threads.y);
|
grid.y = divUp(h, threads.y);
|
||||||
|
|
||||||
if (use_local_init_data_cost == true)
|
if (use_local_init_data_cost == true)
|
||||||
csbp_krnls::get_first_k_initial_local<<<grid, threads, 0, stream>>> (data_cost_selected, disp_selected_pyr, h, w, nr_plane);
|
get_first_k_initial_local<<<grid, threads, 0, stream>>> (data_cost_selected, disp_selected_pyr, h, w, nr_plane);
|
||||||
else
|
else
|
||||||
csbp_krnls::get_first_k_initial_global<<<grid, threads, 0, stream>>>(data_cost_selected, disp_selected_pyr, h, w, nr_plane);
|
get_first_k_initial_global<<<grid, threads, 0, stream>>>(data_cost_selected, disp_selected_pyr, h, w, nr_plane);
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
}
|
}
|
||||||
@@ -421,14 +411,10 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
init_data_cost_tmpl(rows, cols, disp_selected_pyr, data_cost_selected, msg_step, h, w, level, nr_plane, ndisp, channels, use_local_init_data_cost, stream);
|
init_data_cost_tmpl(rows, cols, disp_selected_pyr, data_cost_selected, msg_step, h, w, level, nr_plane, ndisp, channels, use_local_init_data_cost, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
}}}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
////////////////////// compute data cost //////////////////////
|
////////////////////// compute data cost //////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace csbp_krnls
|
|
||||||
{
|
|
||||||
template <typename T, int channels>
|
template <typename T, int channels>
|
||||||
__global__ void compute_data_cost(const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane)
|
__global__ void compute_data_cost(const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane)
|
||||||
{
|
{
|
||||||
@@ -536,10 +522,7 @@ namespace csbp_krnls
|
|||||||
data_cost[cdisp_step1 * d] = saturate_cast<T>(dline[0]);
|
data_cost[cdisp_step1 * d] = saturate_cast<T>(dline[0]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace csbp
|
|
||||||
{
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void compute_data_cost_caller_(const T* disp_selected_pyr, T* data_cost, int /*rows*/, int /*cols*/,
|
void compute_data_cost_caller_(const T* disp_selected_pyr, T* data_cost, int /*rows*/, int /*cols*/,
|
||||||
int h, int w, int level, int nr_plane, int channels, cudaStream_t stream)
|
int h, int w, int level, int nr_plane, int channels, cudaStream_t stream)
|
||||||
@@ -552,8 +535,8 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
|
|
||||||
switch(channels)
|
switch(channels)
|
||||||
{
|
{
|
||||||
case 1: csbp_krnls::compute_data_cost<T, 1><<<grid, threads, 0, stream>>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break;
|
case 1: compute_data_cost<T, 1><<<grid, threads, 0, stream>>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break;
|
||||||
case 3: csbp_krnls::compute_data_cost<T, 3><<<grid, threads, 0, stream>>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break;
|
case 3: compute_data_cost<T, 3><<<grid, threads, 0, stream>>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break;
|
||||||
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -571,13 +554,12 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
|
|
||||||
switch (channels)
|
switch (channels)
|
||||||
{
|
{
|
||||||
case 1: csbp_krnls::compute_data_cost_reduce<T, winsz, 1><<<grid, threads, smem_size, stream>>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break;
|
case 1: compute_data_cost_reduce<T, winsz, 1><<<grid, threads, smem_size, stream>>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break;
|
||||||
case 3: csbp_krnls::compute_data_cost_reduce<T, winsz, 3><<<grid, threads, smem_size, stream>>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break;
|
case 3: compute_data_cost_reduce<T, winsz, 3><<<grid, threads, smem_size, stream>>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break;
|
||||||
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
void compute_data_cost_tmpl(const T* disp_selected_pyr, T* data_cost, size_t msg_step1, size_t msg_step2,
|
void compute_data_cost_tmpl(const T* disp_selected_pyr, T* data_cost, size_t msg_step1, size_t msg_step2,
|
||||||
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream)
|
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream)
|
||||||
@@ -594,10 +576,10 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
|
|
||||||
size_t disp_step1 = msg_step1 * h;
|
size_t disp_step1 = msg_step1 * h;
|
||||||
size_t disp_step2 = msg_step2 * h2;
|
size_t disp_step2 = msg_step2 * h2;
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cdisp_step1, &disp_step1, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cdisp_step2, &disp_step2, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cmsg_step1, &msg_step1, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step1, &msg_step1, sizeof(size_t)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cmsg_step2, &msg_step2, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step2, &msg_step2, sizeof(size_t)) );
|
||||||
|
|
||||||
callers[level](disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, stream);
|
callers[level](disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, stream);
|
||||||
|
|
||||||
@@ -616,14 +598,11 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
compute_data_cost_tmpl(disp_selected_pyr, data_cost, msg_step1, msg_step2, rows, cols, h, w, h2, level, nr_plane, channels, stream);
|
compute_data_cost_tmpl(disp_selected_pyr, data_cost, msg_step1, msg_step2, rows, cols, h, w, h2, level, nr_plane, channels, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
}}}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
//////////////////////// init message /////////////////////////
|
//////////////////////// init message /////////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace csbp_krnls
|
|
||||||
{
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ void get_first_k_element_increase(T* u_new, T* d_new, T* l_new, T* r_new,
|
__device__ void get_first_k_element_increase(T* u_new, T* d_new, T* l_new, T* r_new,
|
||||||
const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
|
const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
|
||||||
@@ -705,10 +684,8 @@ namespace csbp_krnls
|
|||||||
data_cost, disparity_selected_cur, nr_plane, nr_plane2);
|
data_cost, disparity_selected_cur, nr_plane, nr_plane2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace csbp
|
|
||||||
{
|
|
||||||
template<class T>
|
template<class T>
|
||||||
void init_message_tmpl(T* u_new, T* d_new, T* l_new, T* r_new,
|
void init_message_tmpl(T* u_new, T* d_new, T* l_new, T* r_new,
|
||||||
const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
|
const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
|
||||||
@@ -719,10 +696,10 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
|
|
||||||
size_t disp_step1 = msg_step1 * h;
|
size_t disp_step1 = msg_step1 * h;
|
||||||
size_t disp_step2 = msg_step2 * h2;
|
size_t disp_step2 = msg_step2 * h2;
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cdisp_step1, &disp_step1, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cdisp_step2, &disp_step2, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cmsg_step1, &msg_step1, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step1, &msg_step1, sizeof(size_t)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cmsg_step2, &msg_step2, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step2, &msg_step2, sizeof(size_t)) );
|
||||||
|
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@@ -730,7 +707,7 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
grid.x = divUp(w, threads.x);
|
grid.x = divUp(w, threads.x);
|
||||||
grid.y = divUp(h, threads.y);
|
grid.y = divUp(h, threads.y);
|
||||||
|
|
||||||
csbp_krnls::init_message<<<grid, threads, 0, stream>>>(u_new, d_new, l_new, r_new,
|
init_message<<<grid, threads, 0, stream>>>(u_new, d_new, l_new, r_new,
|
||||||
u_cur, d_cur, l_cur, r_cur,
|
u_cur, d_cur, l_cur, r_cur,
|
||||||
selected_disp_pyr_new, selected_disp_pyr_cur,
|
selected_disp_pyr_new, selected_disp_pyr_cur,
|
||||||
data_cost_selected, data_cost,
|
data_cost_selected, data_cost,
|
||||||
@@ -761,14 +738,11 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
selected_disp_pyr_new, selected_disp_pyr_cur, data_cost_selected, data_cost, msg_step1, msg_step2,
|
selected_disp_pyr_new, selected_disp_pyr_cur, data_cost_selected, data_cost, msg_step1, msg_step2,
|
||||||
h, w, nr_plane, h2, w2, nr_plane2, stream);
|
h, w, nr_plane, h2, w2, nr_plane2, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
//////////////////// calc all iterations /////////////////////
|
//////////////////// calc all iterations /////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace csbp_krnls
|
|
||||||
{
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ void message_per_pixel(const T* data, T* msg_dst, const T* msg1, const T* msg2, const T* msg3,
|
__device__ void message_per_pixel(const T* data, T* msg_dst, const T* msg1, const T* msg2, const T* msg3,
|
||||||
const T* dst_disp, const T* src_disp, int nr_plane, T* temp)
|
const T* dst_disp, const T* src_disp, int nr_plane, T* temp)
|
||||||
@@ -829,17 +803,15 @@ namespace csbp_krnls
|
|||||||
message_per_pixel(data, r, u + cmsg_step1, d - cmsg_step1, r - 1, disp, disp + 1, nr_plane, temp);
|
message_per_pixel(data, r, u + cmsg_step1, d - cmsg_step1, r - 1, disp, disp + 1, nr_plane, temp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace csbp
|
|
||||||
{
|
|
||||||
template<class T>
|
template<class T>
|
||||||
void calc_all_iterations_tmpl(T* u, T* d, T* l, T* r, const T* data_cost_selected,
|
void calc_all_iterations_tmpl(T* u, T* d, T* l, T* r, const T* data_cost_selected,
|
||||||
const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream)
|
const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
size_t disp_step = msg_step * h;
|
size_t disp_step = msg_step * h;
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cdisp_step1, &disp_step, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cmsg_step1, &msg_step, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step1, &msg_step, sizeof(size_t)) );
|
||||||
|
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@@ -849,7 +821,7 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
|
|
||||||
for(int t = 0; t < iters; ++t)
|
for(int t = 0; t < iters; ++t)
|
||||||
{
|
{
|
||||||
csbp_krnls::compute_message<<<grid, threads, 0, stream>>>(u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1);
|
compute_message<<<grid, threads, 0, stream>>>(u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -868,14 +840,12 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
calc_all_iterations_tmpl(u, d, l, r, data_cost_selected, selected_disp_pyr_cur, msg_step, h, w, nr_plane, iters, stream);
|
calc_all_iterations_tmpl(u, d, l, r, data_cost_selected, selected_disp_pyr_cur, msg_step, h, w, nr_plane, iters, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
}}}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
/////////////////////////// output ////////////////////////////
|
/////////////////////////// output ////////////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace csbp_krnls
|
|
||||||
{
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void compute_disp(const T* u_, const T* d_, const T* l_, const T* r_,
|
__global__ void compute_disp(const T* u_, const T* d_, const T* l_, const T* r_,
|
||||||
const T* data_cost_selected, const T* disp_selected_pyr,
|
const T* data_cost_selected, const T* disp_selected_pyr,
|
||||||
@@ -910,17 +880,15 @@ namespace csbp_krnls
|
|||||||
disp[res_step * y + x] = best;
|
disp[res_step * y + x] = best;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace csbp
|
|
||||||
{
|
|
||||||
template<class T>
|
template<class T>
|
||||||
void compute_disp_tmpl(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step,
|
void compute_disp_tmpl(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step,
|
||||||
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream)
|
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
size_t disp_step = disp.rows * msg_step;
|
size_t disp_step = disp.rows * msg_step;
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cdisp_step1, &disp_step, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(csbp_krnls::cmsg_step1, &msg_step, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step1, &msg_step, sizeof(size_t)) );
|
||||||
|
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@@ -928,8 +896,8 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
grid.x = divUp(disp.cols, threads.x);
|
grid.x = divUp(disp.cols, threads.x);
|
||||||
grid.y = divUp(disp.rows, threads.y);
|
grid.y = divUp(disp.rows, threads.y);
|
||||||
|
|
||||||
csbp_krnls::compute_disp<<<grid, threads, 0, stream>>>(u, d, l, r, data_cost_selected, disp_selected,
|
compute_disp<<<grid, threads, 0, stream>>>(u, d, l, r, data_cost_selected, disp_selected,
|
||||||
disp.ptr, disp.step / disp.elemSize(), disp.cols, disp.rows, nr_plane);
|
disp.data, disp.step / disp.elemSize(), disp.cols, disp.rows, nr_plane);
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
}
|
}
|
||||||
|
@@ -56,7 +56,7 @@ namespace cv
|
|||||||
typedef unsigned short ushort;
|
typedef unsigned short ushort;
|
||||||
typedef unsigned int uint;
|
typedef unsigned int uint;
|
||||||
|
|
||||||
static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; }
|
static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; }
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
static inline void uploadConstant(const char* name, const T& value) { cudaSafeCall( cudaMemcpyToSymbol(name, &value, sizeof(T)) ); }
|
static inline void uploadConstant(const char* name, const T& value) { cudaSafeCall( cudaMemcpyToSymbol(name, &value, sizeof(T)) ); }
|
||||||
|
@@ -128,8 +128,8 @@ namespace cv { namespace gpu { namespace filters
|
|||||||
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
|
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||||
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
|
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
|
||||||
|
|
||||||
filter_krnls::linearRowFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE, CN><<<blocks, threads>>>(src.ptr, src.elem_step,
|
filter_krnls::linearRowFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE, CN><<<blocks, threads>>>(src.data, src.step/src.elemSize(),
|
||||||
dst.ptr, dst.elem_step, anchor, src.cols, src.rows);
|
dst.data, dst.step/dst.elemSize(), anchor, src.cols, src.rows);
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
}
|
}
|
||||||
@@ -152,10 +152,12 @@ namespace cv { namespace gpu { namespace filters
|
|||||||
callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
|
callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
|
||||||
}
|
}
|
||||||
|
|
||||||
void linearRowFilter_gpu_8u_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
|
template void linearRowFilter_gpu<4, uchar4, uchar4>(const DevMem2D&, const DevMem2D&, const float[], int , int);
|
||||||
|
|
||||||
|
/* void linearRowFilter_gpu_8u_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
|
||||||
{
|
{
|
||||||
linearRowFilter_gpu<4, uchar4, uchar4>(src, dst, kernel, ksize, anchor);
|
linearRowFilter_gpu<4, uchar4, uchar4>(src, dst, kernel, ksize, anchor);
|
||||||
}
|
}*/
|
||||||
void linearRowFilter_gpu_8u_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
|
void linearRowFilter_gpu_8u_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
|
||||||
{
|
{
|
||||||
linearRowFilter_gpu<4, uchar4, char4>(src, dst, kernel, ksize, anchor);
|
linearRowFilter_gpu<4, uchar4, char4>(src, dst, kernel, ksize, anchor);
|
||||||
@@ -262,8 +264,8 @@ namespace cv { namespace gpu { namespace filters
|
|||||||
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
|
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||||
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
|
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
|
||||||
|
|
||||||
filter_krnls::linearColumnFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE, CN><<<blocks, threads>>>(src.ptr, src.elem_step,
|
filter_krnls::linearColumnFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE, CN><<<blocks, threads>>>(src.data, src.step/src.elemSize(),
|
||||||
dst.ptr, dst.elem_step, anchor, src.cols, src.rows);
|
dst.data, dst.step/dst.elemSize(), anchor, src.cols, src.rows);
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
}
|
}
|
||||||
@@ -357,7 +359,7 @@ namespace cv { namespace gpu { namespace bf
|
|||||||
void load_constants(float* table_color, const DevMem2Df& table_space, int ndisp, int radius, short edge_disc, short max_disc)
|
void load_constants(float* table_color, const DevMem2Df& table_space, int ndisp, int radius, short edge_disc, short max_disc)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_color, &table_color, sizeof(table_color)) );
|
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_color, &table_color, sizeof(table_color)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space, &table_space.ptr, sizeof(table_space.ptr)) );
|
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space, &table_space.data, sizeof(table_space.data)) );
|
||||||
size_t table_space_step = table_space.step / sizeof(float);
|
size_t table_space_step = table_space.step / sizeof(float);
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space_step, &table_space_step, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space_step, &table_space_step, sizeof(size_t)) );
|
||||||
|
|
||||||
@@ -491,15 +493,15 @@ namespace cv { namespace gpu { namespace bf
|
|||||||
case 1:
|
case 1:
|
||||||
for (int i = 0; i < iters; ++i)
|
for (int i = 0; i < iters; ++i)
|
||||||
{
|
{
|
||||||
bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);
|
bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
||||||
bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);
|
bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
for (int i = 0; i < iters; ++i)
|
for (int i = 0; i < iters; ++i)
|
||||||
{
|
{
|
||||||
bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);
|
bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
||||||
bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);
|
bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@@ -45,7 +45,7 @@
|
|||||||
using namespace cv::gpu;
|
using namespace cv::gpu;
|
||||||
|
|
||||||
/////////////////////////////////// Remap ///////////////////////////////////////////////
|
/////////////////////////////////// Remap ///////////////////////////////////////////////
|
||||||
namespace imgproc_krnls
|
namespace cv { namespace gpu { namespace imgproc
|
||||||
{
|
{
|
||||||
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap;
|
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap;
|
||||||
|
|
||||||
@@ -121,10 +121,7 @@ namespace imgproc_krnls
|
|||||||
*(dst + y * dst_step + 3 * x + 2) = out.z;
|
*(dst + y * dst_step + 3 * x + 2) = out.z;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
|
||||||
{
|
|
||||||
void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
|
void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
|
||||||
{
|
{
|
||||||
dim3 threads(16, 16, 1);
|
dim3 threads(16, 16, 1);
|
||||||
@@ -132,15 +129,15 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
grid.x = divUp(dst.cols, threads.x);
|
grid.x = divUp(dst.cols, threads.x);
|
||||||
grid.y = divUp(dst.rows, threads.y);
|
grid.y = divUp(dst.rows, threads.y);
|
||||||
|
|
||||||
imgproc_krnls::tex_remap.filterMode = cudaFilterModeLinear;
|
tex_remap.filterMode = cudaFilterModeLinear;
|
||||||
imgproc_krnls::tex_remap.addressMode[0] = imgproc_krnls::tex_remap.addressMode[1] = cudaAddressModeWrap;
|
tex_remap.addressMode[0] = tex_remap.addressMode[1] = cudaAddressModeWrap;
|
||||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
||||||
cudaSafeCall( cudaBindTexture2D(0, imgproc_krnls::tex_remap, src.ptr, desc, src.cols, src.rows, src.step) );
|
cudaSafeCall( cudaBindTexture2D(0, tex_remap, src.data, desc, src.cols, src.rows, src.step) );
|
||||||
|
|
||||||
imgproc_krnls::remap_1c<<<grid, threads>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
|
remap_1c<<<grid, threads>>>(xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows);
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
cudaSafeCall( cudaUnbindTexture(imgproc_krnls::tex_remap) );
|
cudaSafeCall( cudaUnbindTexture(tex_remap) );
|
||||||
}
|
}
|
||||||
|
|
||||||
void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
|
void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
|
||||||
@@ -150,17 +147,13 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
grid.x = divUp(dst.cols, threads.x);
|
grid.x = divUp(dst.cols, threads.x);
|
||||||
grid.y = divUp(dst.rows, threads.y);
|
grid.y = divUp(dst.rows, threads.y);
|
||||||
|
|
||||||
imgproc_krnls::remap_3c<<<grid, threads>>>(src.ptr, src.step, xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
|
remap_3c<<<grid, threads>>>(src.data, src.step, xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows);
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
|
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
|
||||||
|
|
||||||
namespace imgproc_krnls
|
|
||||||
{
|
|
||||||
texture<uchar4, 2> tex_meanshift;
|
texture<uchar4, 2> tex_meanshift;
|
||||||
|
|
||||||
__device__ short2 do_mean_shift(int x0, int y0, unsigned char* out,
|
__device__ short2 do_mean_shift(int x0, int y0, unsigned char* out,
|
||||||
@@ -252,10 +245,7 @@ namespace imgproc_krnls
|
|||||||
*(short2*)(outsp + basesp) = do_mean_shift(x0, y0, outr, outrstep, cols, rows, sp, sr, maxIter, eps);
|
*(short2*)(outsp + basesp) = do_mean_shift(x0, y0, outr, outrstep, cols, rows, sp, sr, maxIter, eps);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
|
||||||
{
|
|
||||||
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps)
|
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps)
|
||||||
{
|
{
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@@ -264,11 +254,11 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
|
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
|
||||||
cudaSafeCall( cudaBindTexture2D( 0, imgproc_krnls::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );
|
cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
|
||||||
|
|
||||||
imgproc_krnls::meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
|
meanshift_kernel<<< grid, threads >>>( dst.data, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
cudaSafeCall( cudaUnbindTexture( imgproc_krnls::tex_meanshift ) );
|
cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
||||||
}
|
}
|
||||||
extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps)
|
extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps)
|
||||||
{
|
{
|
||||||
@@ -278,18 +268,15 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
|
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
|
||||||
cudaSafeCall( cudaBindTexture2D( 0, imgproc_krnls::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );
|
cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
|
||||||
|
|
||||||
imgproc_krnls::meanshiftproc_kernel<<< grid, threads >>>( dstr.ptr, dstr.step, dstsp.ptr, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
|
meanshiftproc_kernel<<< grid, threads >>>( dstr.data, dstr.step, dstsp.data, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
cudaSafeCall( cudaUnbindTexture( imgproc_krnls::tex_meanshift ) );
|
cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
/////////////////////////////////// drawColorDisp ///////////////////////////////////////////////
|
/////////////////////////////////// drawColorDisp ///////////////////////////////////////////////
|
||||||
|
|
||||||
namespace imgproc_krnls
|
|
||||||
{
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
|
__device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
|
||||||
{
|
{
|
||||||
@@ -389,10 +376,8 @@ namespace imgproc_krnls
|
|||||||
line[x >> 1] = res;
|
line[x >> 1] = res;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
|
||||||
{
|
|
||||||
void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream)
|
void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 threads(16, 16, 1);
|
dim3 threads(16, 16, 1);
|
||||||
@@ -400,7 +385,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
grid.x = divUp(src.cols, threads.x << 2);
|
grid.x = divUp(src.cols, threads.x << 2);
|
||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
imgproc_krnls::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, src.cols, src.rows, ndisp);
|
drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step, dst.data, dst.step, src.cols, src.rows, ndisp);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -413,17 +398,14 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
grid.x = divUp(src.cols, threads.x << 1);
|
grid.x = divUp(src.cols, threads.x << 1);
|
||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
imgproc_krnls::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(short), dst.ptr, dst.step, src.cols, src.rows, ndisp);
|
drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step / sizeof(short), dst.data, dst.step, src.cols, src.rows, ndisp);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
/////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
|
/////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
|
||||||
|
|
||||||
namespace imgproc_krnls
|
|
||||||
{
|
|
||||||
__constant__ float cq[16];
|
__constant__ float cq[16];
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@@ -455,10 +437,7 @@ namespace imgproc_krnls
|
|||||||
*(float4*)(xyzw + xyzw_step * y + (x * 4)) = v;
|
*(float4*)(xyzw + xyzw_step * y + (x * 4)) = v;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
|
||||||
{
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline void reprojectImageTo3D_caller(const DevMem2D_<T>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream)
|
inline void reprojectImageTo3D_caller(const DevMem2D_<T>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
@@ -467,9 +446,9 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
grid.x = divUp(disp.cols, threads.x);
|
grid.x = divUp(disp.cols, threads.x);
|
||||||
grid.y = divUp(disp.rows, threads.y);
|
grid.y = divUp(disp.rows, threads.y);
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cq, q, 16 * sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cq, q, 16 * sizeof(float)) );
|
||||||
|
|
||||||
imgproc_krnls::reprojectImageTo3D<<<grid, threads, 0, stream>>>(disp.ptr, disp.step / sizeof(T), xyzw.ptr, xyzw.step / sizeof(float), disp.rows, disp.cols);
|
reprojectImageTo3D<<<grid, threads, 0, stream>>>(disp.data, disp.step / sizeof(T), xyzw.data, xyzw.step / sizeof(float), disp.rows, disp.cols);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
|
@@ -41,9 +41,7 @@
|
|||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
#include "cuda_shared.hpp"
|
#include "cuda_shared.hpp"
|
||||||
#include "saturate_cast.hpp"
|
|
||||||
#include "transform.hpp"
|
#include "transform.hpp"
|
||||||
#include "vecmath.hpp"
|
|
||||||
|
|
||||||
using namespace cv::gpu;
|
using namespace cv::gpu;
|
||||||
|
|
||||||
@@ -54,7 +52,7 @@ using namespace cv::gpu;
|
|||||||
//////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Cart <-> Polar
|
// Cart <-> Polar
|
||||||
|
|
||||||
namespace mathfunc_krnls
|
namespace cv { namespace gpu { namespace mathfunc
|
||||||
{
|
{
|
||||||
struct Nothing
|
struct Nothing
|
||||||
{
|
{
|
||||||
@@ -133,10 +131,7 @@ namespace mathfunc_krnls
|
|||||||
yptr[y * y_step + x] = mag_data * sin_a;
|
yptr[y * y_step + x] = mag_data * sin_a;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace mathfunc
|
|
||||||
{
|
|
||||||
template <typename Mag, typename Angle>
|
template <typename Mag, typename Angle>
|
||||||
void cartToPolar_caller(const DevMem2Df& x, const DevMem2Df& y, const DevMem2Df& mag, const DevMem2Df& angle, bool angleInDegrees, cudaStream_t stream)
|
void cartToPolar_caller(const DevMem2Df& x, const DevMem2Df& y, const DevMem2Df& mag, const DevMem2Df& angle, bool angleInDegrees, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@@ -148,9 +143,9 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
const float scale = angleInDegrees ? (float)(180.0f / CV_PI) : 1.f;
|
const float scale = angleInDegrees ? (float)(180.0f / CV_PI) : 1.f;
|
||||||
|
|
||||||
mathfunc_krnls::cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
|
cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
|
||||||
x.ptr, x.elem_step, y.ptr, y.elem_step,
|
x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(),
|
||||||
mag.ptr, mag.elem_step, angle.ptr, angle.elem_step, scale, x.cols, x.rows);
|
mag.data, mag.step/mag.elemSize(), angle.data, angle.step/angle.elemSize(), scale, x.cols, x.rows);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -163,27 +158,27 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
{
|
{
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
cartToPolar_caller<mathfunc_krnls::Magnitude, mathfunc_krnls::Atan2>,
|
cartToPolar_caller<Magnitude, Atan2>,
|
||||||
cartToPolar_caller<mathfunc_krnls::Magnitude, mathfunc_krnls::Nothing>
|
cartToPolar_caller<Magnitude, Nothing>
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
cartToPolar_caller<mathfunc_krnls::MagnitudeSqr, mathfunc_krnls::Atan2>,
|
cartToPolar_caller<MagnitudeSqr, Atan2>,
|
||||||
cartToPolar_caller<mathfunc_krnls::MagnitudeSqr, mathfunc_krnls::Nothing>,
|
cartToPolar_caller<MagnitudeSqr, Nothing>,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
cartToPolar_caller<mathfunc_krnls::Nothing, mathfunc_krnls::Atan2>,
|
cartToPolar_caller<Nothing, Atan2>,
|
||||||
cartToPolar_caller<mathfunc_krnls::Nothing, mathfunc_krnls::Nothing>
|
cartToPolar_caller<Nothing, Nothing>
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
cartToPolar_caller<mathfunc_krnls::Nothing, mathfunc_krnls::Atan2>,
|
cartToPolar_caller<Nothing, Atan2>,
|
||||||
cartToPolar_caller<mathfunc_krnls::Nothing, mathfunc_krnls::Nothing>,
|
cartToPolar_caller<Nothing, Nothing>,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
callers[mag.ptr == 0][magSqr][angle.ptr == 0](x, y, mag, angle, angleInDegrees, stream);
|
callers[mag.data == 0][magSqr][angle.data == 0](x, y, mag, angle, angleInDegrees, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Mag>
|
template <typename Mag>
|
||||||
@@ -197,8 +192,8 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
const float scale = angleInDegrees ? (float)(CV_PI / 180.0f) : 1.0f;
|
const float scale = angleInDegrees ? (float)(CV_PI / 180.0f) : 1.0f;
|
||||||
|
|
||||||
mathfunc_krnls::polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.ptr, mag.elem_step,
|
polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.data, mag.step/mag.elemSize(),
|
||||||
angle.ptr, angle.elem_step, scale, x.ptr, x.elem_step, y.ptr, y.elem_step, mag.cols, mag.rows);
|
angle.data, angle.step/angle.elemSize(), scale, x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(), mag.cols, mag.rows);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -209,19 +204,16 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
typedef void (*caller_t)(const DevMem2Df& mag, const DevMem2Df& angle, const DevMem2Df& x, const DevMem2Df& y, bool angleInDegrees, cudaStream_t stream);
|
typedef void (*caller_t)(const DevMem2Df& mag, const DevMem2Df& angle, const DevMem2Df& x, const DevMem2Df& y, bool angleInDegrees, cudaStream_t stream);
|
||||||
static const caller_t callers[2] =
|
static const caller_t callers[2] =
|
||||||
{
|
{
|
||||||
polarToCart_caller<mathfunc_krnls::NonEmptyMag>,
|
polarToCart_caller<NonEmptyMag>,
|
||||||
polarToCart_caller<mathfunc_krnls::EmptyMag>
|
polarToCart_caller<EmptyMag>
|
||||||
};
|
};
|
||||||
|
|
||||||
callers[mag.ptr == 0](mag, angle, x, y, angleInDegrees, stream);
|
callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Compare
|
// Compare
|
||||||
|
|
||||||
namespace mathfunc_krnls
|
|
||||||
{
|
|
||||||
template <typename T1, typename T2>
|
template <typename T1, typename T2>
|
||||||
struct NotEqual
|
struct NotEqual
|
||||||
{
|
{
|
||||||
@@ -230,14 +222,11 @@ namespace mathfunc_krnls
|
|||||||
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
|
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace mathfunc
|
|
||||||
{
|
|
||||||
template <typename T1, typename T2>
|
template <typename T1, typename T2>
|
||||||
inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
|
inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
|
||||||
{
|
{
|
||||||
mathfunc_krnls::NotEqual<T1, T2> op;
|
NotEqual<T1, T2> op;
|
||||||
transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), dst, op, 0);
|
transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), dst, op, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -40,16 +40,11 @@
|
|||||||
//
|
//
|
||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include "cuda_shared.hpp"
|
#include "cuda_shared.hpp"
|
||||||
#include "cuda_runtime.h"
|
|
||||||
#include "saturate_cast.hpp"
|
#include "saturate_cast.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
namespace cv { namespace gpu { namespace matrix_operations {
|
||||||
|
|
||||||
namespace matop_krnls
|
|
||||||
{
|
|
||||||
template <typename T> struct shift_and_sizeof;
|
template <typename T> struct shift_and_sizeof;
|
||||||
template <> struct shift_and_sizeof<char> { enum { shift = 0 }; };
|
template <> struct shift_and_sizeof<char> { enum { shift = 0 }; };
|
||||||
template <> struct shift_and_sizeof<unsigned char> { enum { shift = 0 }; };
|
template <> struct shift_and_sizeof<unsigned char> { enum { shift = 0 }; };
|
||||||
@@ -115,14 +110,11 @@ namespace matop_krnls
|
|||||||
typedef int2 read_type;
|
typedef int2 read_type;
|
||||||
typedef short2 write_type;
|
typedef short2 write_type;
|
||||||
};
|
};
|
||||||
}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
////////////////////////////////// CopyTo /////////////////////////////////
|
////////////////////////////////// CopyTo /////////////////////////////////
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace matop_krnls
|
|
||||||
{
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
__global__ void copy_to_with_mask(T * mat_src, T * mat_dst, const unsigned char * mask, int cols, int rows, int step_mat, int step_mask, int channels)
|
__global__ void copy_to_with_mask(T * mat_src, T * mat_dst, const unsigned char * mask, int cols, int rows, int step_mat, int step_mask, int channels)
|
||||||
{
|
{
|
||||||
@@ -136,10 +128,6 @@ namespace matop_krnls
|
|||||||
mat_dst[idx] = mat_src[idx];
|
mat_dst[idx] = mat_src[idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace matrix_operations
|
|
||||||
{
|
|
||||||
typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream);
|
typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream);
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
@@ -147,18 +135,13 @@ namespace cv { namespace gpu { namespace matrix_operations
|
|||||||
{
|
{
|
||||||
dim3 threadsPerBlock(16,16, 1);
|
dim3 threadsPerBlock(16,16, 1);
|
||||||
dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
|
dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
|
||||||
|
|
||||||
|
copy_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>
|
||||||
|
((T*)mat_src.data, (T*)mat_dst.data, (unsigned char*)mask.data, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
{
|
|
||||||
::matop_krnls::copy_to_with_mask<T><<<numBlocks,threadsPerBlock>>>
|
|
||||||
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
|
|
||||||
cudaSafeCall ( cudaThreadSynchronize() );
|
cudaSafeCall ( cudaThreadSynchronize() );
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
::matop_krnls::copy_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>
|
|
||||||
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void copy_to_with_mask(const DevMem2D& mat_src, DevMem2D mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
void copy_to_with_mask(const DevMem2D& mat_src, DevMem2D mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
||||||
{
|
{
|
||||||
@@ -180,14 +163,11 @@ namespace cv { namespace gpu { namespace matrix_operations
|
|||||||
|
|
||||||
func(mat_src, mat_dst, mask, channels, stream);
|
func(mat_src, mat_dst, mask, channels, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
////////////////////////////////// SetTo //////////////////////////////////
|
////////////////////////////////// SetTo //////////////////////////////////
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace matop_krnls
|
|
||||||
{
|
|
||||||
__constant__ double scalar_d[4];
|
__constant__ double scalar_d[4];
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
@@ -216,10 +196,6 @@ namespace matop_krnls
|
|||||||
mat[idx] = scalar_d[ x % channels ];
|
mat[idx] = scalar_d[ x % channels ];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace matrix_operations
|
|
||||||
{
|
|
||||||
typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream);
|
typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream);
|
||||||
typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels, const cudaStream_t & stream);
|
typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels, const cudaStream_t & stream);
|
||||||
|
|
||||||
@@ -229,17 +205,10 @@ namespace cv { namespace gpu { namespace matrix_operations
|
|||||||
dim3 threadsPerBlock(32, 8, 1);
|
dim3 threadsPerBlock(32, 8, 1);
|
||||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||||
|
|
||||||
|
set_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.data, (unsigned char *)mask.data, mat.cols, mat.rows, mat.step, channels, mask.step);
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
{
|
|
||||||
::matop_krnls::set_to_with_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
|
|
||||||
cudaSafeCall ( cudaThreadSynchronize() );
|
cudaSafeCall ( cudaThreadSynchronize() );
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
::matop_krnls::set_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void set_to_without_mask_run(const DevMem2D& mat, int channels, const cudaStream_t & stream)
|
void set_to_without_mask_run(const DevMem2D& mat, int channels, const cudaStream_t & stream)
|
||||||
@@ -247,20 +216,15 @@ namespace cv { namespace gpu { namespace matrix_operations
|
|||||||
dim3 threadsPerBlock(32, 8, 1);
|
dim3 threadsPerBlock(32, 8, 1);
|
||||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||||
|
|
||||||
|
set_to_without_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.data, mat.cols, mat.rows, mat.step, channels);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
{
|
|
||||||
matop_krnls::set_to_without_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
|
|
||||||
cudaSafeCall ( cudaThreadSynchronize() );
|
cudaSafeCall ( cudaThreadSynchronize() );
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
matop_krnls::set_to_without_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_to_without_mask(DevMem2D mat, int depth, const double *scalar, int channels, const cudaStream_t & stream)
|
void set_to_without_mask(DevMem2D mat, int depth, const double *scalar, int channels, const cudaStream_t & stream)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(matop_krnls::scalar_d, scalar, sizeof(double) * 4));
|
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, scalar, sizeof(double) * 4));
|
||||||
|
|
||||||
static SetToFunc_without_mask tab[8] =
|
static SetToFunc_without_mask tab[8] =
|
||||||
{
|
{
|
||||||
@@ -284,7 +248,7 @@ namespace cv { namespace gpu { namespace matrix_operations
|
|||||||
|
|
||||||
void set_to_with_mask(DevMem2D mat, int depth, const double * scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
void set_to_with_mask(DevMem2D mat, int depth, const double * scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(matop_krnls::scalar_d, scalar, sizeof(double) * 4));
|
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, scalar, sizeof(double) * 4));
|
||||||
|
|
||||||
static SetToFunc_with_mask tab[8] =
|
static SetToFunc_with_mask tab[8] =
|
||||||
{
|
{
|
||||||
@@ -305,14 +269,11 @@ namespace cv { namespace gpu { namespace matrix_operations
|
|||||||
|
|
||||||
func(mat, mask, channels, stream);
|
func(mat, mask, channels, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
//////////////////////////////// ConvertTo ////////////////////////////////
|
//////////////////////////////// ConvertTo ////////////////////////////////
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace matop_krnls
|
|
||||||
{
|
|
||||||
template <typename T, typename DT>
|
template <typename T, typename DT>
|
||||||
__global__ static void convert_to(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta)
|
__global__ static void convert_to(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta)
|
||||||
{
|
{
|
||||||
@@ -348,30 +309,21 @@ namespace matop_krnls
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace matrix_operations
|
|
||||||
{
|
|
||||||
typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream);
|
typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream);
|
||||||
|
|
||||||
template<typename T, typename DT>
|
template<typename T, typename DT>
|
||||||
void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream)
|
void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream)
|
||||||
{
|
{
|
||||||
const int shift = ::matop_krnls::ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
|
const int shift = ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
|
||||||
|
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
|
dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
|
||||||
|
|
||||||
|
convert_to<T, DT><<<grid, block, 0, stream>>>(src.data, src.step, dst.data, dst.step, width, height, alpha, beta);
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
{
|
|
||||||
matop_krnls::convert_to<T, DT><<<grid, block>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
matop_krnls::convert_to<T, DT><<<grid, block, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream)
|
void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream)
|
||||||
{
|
{
|
||||||
|
@@ -230,9 +230,9 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
dim3 blockDim(32, 8);
|
dim3 blockDim(32, 8);
|
||||||
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
||||||
mergeC2_<T><<<gridDim, blockDim, 0, stream>>>(
|
mergeC2_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
src[0].ptr, src[0].step,
|
src[0].data, src[0].step,
|
||||||
src[1].ptr, src[1].step,
|
src[1].data, src[1].step,
|
||||||
dst.rows, dst.cols, dst.ptr, dst.step);
|
dst.rows, dst.cols, dst.data, dst.step);
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
}
|
}
|
||||||
@@ -244,10 +244,10 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
dim3 blockDim(32, 8);
|
dim3 blockDim(32, 8);
|
||||||
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
||||||
mergeC3_<T><<<gridDim, blockDim, 0, stream>>>(
|
mergeC3_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
src[0].ptr, src[0].step,
|
src[0].data, src[0].step,
|
||||||
src[1].ptr, src[1].step,
|
src[1].data, src[1].step,
|
||||||
src[2].ptr, src[2].step,
|
src[2].data, src[2].step,
|
||||||
dst.rows, dst.cols, dst.ptr, dst.step);
|
dst.rows, dst.cols, dst.data, dst.step);
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
}
|
}
|
||||||
@@ -259,11 +259,11 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
dim3 blockDim(32, 8);
|
dim3 blockDim(32, 8);
|
||||||
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
||||||
mergeC4_<T><<<gridDim, blockDim, 0, stream>>>(
|
mergeC4_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
src[0].ptr, src[0].step,
|
src[0].data, src[0].step,
|
||||||
src[1].ptr, src[1].step,
|
src[1].data, src[1].step,
|
||||||
src[2].ptr, src[2].step,
|
src[2].data, src[2].step,
|
||||||
src[3].ptr, src[3].step,
|
src[3].data, src[3].step,
|
||||||
dst.rows, dst.cols, dst.ptr, dst.step);
|
dst.rows, dst.cols, dst.data, dst.step);
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
}
|
}
|
||||||
@@ -433,9 +433,9 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
dim3 blockDim(32, 8);
|
dim3 blockDim(32, 8);
|
||||||
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
||||||
splitC2_<T><<<gridDim, blockDim, 0, stream>>>(
|
splitC2_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
src.ptr, src.step, src.rows, src.cols,
|
src.data, src.step, src.rows, src.cols,
|
||||||
dst[0].ptr, dst[0].step,
|
dst[0].data, dst[0].step,
|
||||||
dst[1].ptr, dst[1].step);
|
dst[1].data, dst[1].step);
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
}
|
}
|
||||||
@@ -447,10 +447,10 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
dim3 blockDim(32, 8);
|
dim3 blockDim(32, 8);
|
||||||
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
||||||
splitC3_<T><<<gridDim, blockDim, 0, stream>>>(
|
splitC3_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
src.ptr, src.step, src.rows, src.cols,
|
src.data, src.step, src.rows, src.cols,
|
||||||
dst[0].ptr, dst[0].step,
|
dst[0].data, dst[0].step,
|
||||||
dst[1].ptr, dst[1].step,
|
dst[1].data, dst[1].step,
|
||||||
dst[2].ptr, dst[2].step);
|
dst[2].data, dst[2].step);
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
}
|
}
|
||||||
@@ -462,11 +462,11 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
dim3 blockDim(32, 8);
|
dim3 blockDim(32, 8);
|
||||||
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
||||||
splitC4_<T><<<gridDim, blockDim, 0, stream>>>(
|
splitC4_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
src.ptr, src.step, src.rows, src.cols,
|
src.data, src.step, src.rows, src.cols,
|
||||||
dst[0].ptr, dst[0].step,
|
dst[0].data, dst[0].step,
|
||||||
dst[1].ptr, dst[1].step,
|
dst[1].data, dst[1].step,
|
||||||
dst[2].ptr, dst[2].step,
|
dst[2].data, dst[2].step,
|
||||||
dst[3].ptr, dst[3].step);
|
dst[3].data, dst[3].step);
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
}
|
}
|
||||||
|
@@ -43,8 +43,7 @@
|
|||||||
//#include "cuda_shared.hpp"
|
//#include "cuda_shared.hpp"
|
||||||
#include "opencv2/gpu/devmem2d.hpp"
|
#include "opencv2/gpu/devmem2d.hpp"
|
||||||
#include "safe_call.hpp"
|
#include "safe_call.hpp"
|
||||||
static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; }
|
static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
using namespace cv::gpu;
|
using namespace cv::gpu;
|
||||||
@@ -55,7 +54,7 @@ using namespace cv::gpu;
|
|||||||
|
|
||||||
#define ROWSperTHREAD 21 // the number of rows a thread will process
|
#define ROWSperTHREAD 21 // the number of rows a thread will process
|
||||||
|
|
||||||
namespace stereobm_gpu
|
namespace cv { namespace gpu { namespace bm
|
||||||
{
|
{
|
||||||
|
|
||||||
#define BLOCK_W 128 // the thread block width (464)
|
#define BLOCK_W 128 // the thread block width (464)
|
||||||
@@ -233,7 +232,7 @@ __device__ void InitColSSD(int x_tex, int y_tex, int im_pitch, unsigned char* im
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<int RADIUS>
|
template<int RADIUS>
|
||||||
__global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t img_step, unsigned char* disp, size_t disp_pitch, int maxdisp)
|
__global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t img_step, PtrStep disp, int maxdisp)
|
||||||
{
|
{
|
||||||
extern __shared__ unsigned int col_ssd_cache[];
|
extern __shared__ unsigned int col_ssd_cache[];
|
||||||
volatile unsigned int *col_ssd = col_ssd_cache + BLOCK_W + threadIdx.x;
|
volatile unsigned int *col_ssd = col_ssd_cache + BLOCK_W + threadIdx.x;
|
||||||
@@ -246,7 +245,7 @@ __global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t i
|
|||||||
//int Y = blockIdx.y * ROWSperTHREAD + RADIUS;
|
//int Y = blockIdx.y * ROWSperTHREAD + RADIUS;
|
||||||
|
|
||||||
unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step;
|
unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step;
|
||||||
unsigned char* disparImage = disp + X + Y * disp_pitch;
|
unsigned char* disparImage = disp.data + X + Y * disp.step;
|
||||||
/* if (X < cwidth)
|
/* if (X < cwidth)
|
||||||
{
|
{
|
||||||
unsigned int *minSSDImage_end = minSSDImage + min(ROWSperTHREAD, cheight - Y) * minssd_step;
|
unsigned int *minSSDImage_end = minSSDImage + min(ROWSperTHREAD, cheight - Y) * minssd_step;
|
||||||
@@ -305,7 +304,7 @@ __global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t i
|
|||||||
uint2 minSSD = MinSSD<RADIUS>(col_ssd_cache + threadIdx.x, col_ssd);
|
uint2 minSSD = MinSSD<RADIUS>(col_ssd_cache + threadIdx.x, col_ssd);
|
||||||
if (minSSD.x < minSSDImage[idx])
|
if (minSSD.x < minSSDImage[idx])
|
||||||
{
|
{
|
||||||
disparImage[disp_pitch * row] = (unsigned char)(d + minSSD.y);
|
disparImage[disp.step * row] = (unsigned char)(d + minSSD.y);
|
||||||
minSSDImage[idx] = minSSD.x;
|
minSSDImage[idx] = minSSD.x;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -313,12 +312,8 @@ __global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t i
|
|||||||
} // for d loop
|
} // for d loop
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
template<int RADIUS> void kernel_caller(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, cudaStream_t & stream)
|
||||||
namespace cv { namespace gpu { namespace bm
|
|
||||||
{
|
|
||||||
template<int RADIUS> void kernel_caller(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, const cudaStream_t & stream)
|
|
||||||
{
|
{
|
||||||
dim3 grid(1,1,1);
|
dim3 grid(1,1,1);
|
||||||
dim3 threads(BLOCK_W, 1, 1);
|
dim3 threads(BLOCK_W, 1, 1);
|
||||||
@@ -329,19 +324,12 @@ namespace cv { namespace gpu { namespace bm
|
|||||||
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
|
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
|
||||||
size_t smem_size = (BLOCK_W + N_DISPARITIES * (BLOCK_W + 2 * RADIUS)) * sizeof(unsigned int);
|
size_t smem_size = (BLOCK_W + N_DISPARITIES * (BLOCK_W + 2 * RADIUS)) * sizeof(unsigned int);
|
||||||
|
|
||||||
|
stereoKernel<RADIUS><<<grid, threads, smem_size, stream>>>(left.data, right.data, left.step, disp, maxdisp);
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
{
|
|
||||||
stereobm_gpu::stereoKernel<RADIUS><<<grid, threads, smem_size>>>(left.ptr, right.ptr, left.step, disp.ptr, disp.step, maxdisp);
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
stereobm_gpu::stereoKernel<RADIUS><<<grid, threads, smem_size, stream>>>(left.ptr, right.ptr, left.step, disp.ptr, disp.step, maxdisp);
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*kernel_caller_t)(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, const cudaStream_t & stream);
|
typedef void (*kernel_caller_t)(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, cudaStream_t & stream);
|
||||||
|
|
||||||
const static kernel_caller_t callers[] =
|
const static kernel_caller_t callers[] =
|
||||||
{
|
{
|
||||||
@@ -356,7 +344,7 @@ namespace cv { namespace gpu { namespace bm
|
|||||||
};
|
};
|
||||||
const int calles_num = sizeof(callers)/sizeof(callers[0]);
|
const int calles_num = sizeof(callers)/sizeof(callers[0]);
|
||||||
|
|
||||||
extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf, const cudaStream_t & stream)
|
extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf, cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
int winsz2 = winsz >> 1;
|
int winsz2 = winsz >> 1;
|
||||||
|
|
||||||
@@ -366,35 +354,31 @@ namespace cv { namespace gpu { namespace bm
|
|||||||
//cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferL1) );
|
//cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferL1) );
|
||||||
//cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferShared) );
|
//cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferShared) );
|
||||||
|
|
||||||
cudaSafeCall( cudaMemset2D(disp.ptr, disp.step, 0, disp.cols, disp.rows) );
|
cudaSafeCall( cudaMemset2D(disp.data, disp.step, 0, disp.cols, disp.rows) );
|
||||||
cudaSafeCall( cudaMemset2D(minSSD_buf.ptr, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp.rows) );
|
cudaSafeCall( cudaMemset2D(minSSD_buf.data, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp.rows) );
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol( stereobm_gpu::cwidth, &left.cols, sizeof(left.cols) ) );
|
cudaSafeCall( cudaMemcpyToSymbol( cwidth, &left.cols, sizeof(left.cols) ) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol( stereobm_gpu::cheight, &left.rows, sizeof(left.rows) ) );
|
cudaSafeCall( cudaMemcpyToSymbol( cheight, &left.rows, sizeof(left.rows) ) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol( stereobm_gpu::cminSSDImage, &minSSD_buf.ptr, sizeof(minSSD_buf.ptr) ) );
|
cudaSafeCall( cudaMemcpyToSymbol( cminSSDImage, &minSSD_buf.data, sizeof(minSSD_buf.data) ) );
|
||||||
|
|
||||||
size_t minssd_step = minSSD_buf.step/minSSD_buf.elemSize();
|
size_t minssd_step = minSSD_buf.step/minSSD_buf.elemSize();
|
||||||
cudaSafeCall( cudaMemcpyToSymbol( stereobm_gpu::cminSSD_step, &minssd_step, sizeof(minssd_step) ) );
|
cudaSafeCall( cudaMemcpyToSymbol( cminSSD_step, &minssd_step, sizeof(minssd_step) ) );
|
||||||
|
|
||||||
callers[winsz2](left, right, disp, maxdisp, stream);
|
callers[winsz2](left, right, disp, maxdisp, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
/////////////////////////////////////// Sobel Prefiler ///////////////////////////////////////////
|
/////////////////////////////////////// Sobel Prefiler ///////////////////////////////////////////
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace stereobm_gpu
|
|
||||||
{
|
|
||||||
|
|
||||||
texture<unsigned char, 2, cudaReadModeElementType> texForSobel;
|
texture<unsigned char, 2, cudaReadModeElementType> texForSobel;
|
||||||
|
|
||||||
extern "C" __global__ void prefilter_kernel(unsigned char *output, size_t step, int width, int height, int prefilterCap)
|
extern "C" __global__ void prefilter_kernel(DevMem2D output, int prefilterCap)
|
||||||
{
|
{
|
||||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
|
|
||||||
if (x < width && y < height)
|
if (x < output.cols && y < output.rows)
|
||||||
{
|
{
|
||||||
int conv = (int)tex2D(texForSobel, x - 1, y - 1) * (-1) + (int)tex2D(texForSobel, x + 1, y - 1) * (1) +
|
int conv = (int)tex2D(texForSobel, x - 1, y - 1) * (-1) + (int)tex2D(texForSobel, x + 1, y - 1) * (1) +
|
||||||
(int)tex2D(texForSobel, x - 1, y ) * (-2) + (int)tex2D(texForSobel, x + 1, y ) * (2) +
|
(int)tex2D(texForSobel, x - 1, y ) * (-2) + (int)tex2D(texForSobel, x + 1, y ) * (2) +
|
||||||
@@ -402,18 +386,15 @@ extern "C" __global__ void prefilter_kernel(unsigned char *output, size_t step,
|
|||||||
|
|
||||||
|
|
||||||
conv = min(min(max(-prefilterCap, conv), prefilterCap) + prefilterCap, 255);
|
conv = min(min(max(-prefilterCap, conv), prefilterCap) + prefilterCap, 255);
|
||||||
output[y * step + x] = conv & 0xFF;
|
output.ptr(y)[x] = conv & 0xFF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bm
|
extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output, int prefilterCap, cudaStream_t & stream)
|
||||||
{
|
|
||||||
extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output, int prefilterCap, const cudaStream_t & stream)
|
|
||||||
{
|
{
|
||||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
||||||
cudaSafeCall( cudaBindTexture2D( 0, stereobm_gpu::texForSobel, input.ptr, desc, input.cols, input.rows, input.step ) );
|
cudaSafeCall( cudaBindTexture2D( 0, texForSobel, input.data, desc, input.cols, input.rows, input.step ) );
|
||||||
|
|
||||||
dim3 threads(16, 16, 1);
|
dim3 threads(16, 16, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@@ -421,29 +402,19 @@ namespace cv { namespace gpu { namespace bm
|
|||||||
grid.x = divUp(input.cols, threads.x);
|
grid.x = divUp(input.cols, threads.x);
|
||||||
grid.y = divUp(input.rows, threads.y);
|
grid.y = divUp(input.rows, threads.y);
|
||||||
|
|
||||||
|
prefilter_kernel<<<grid, threads, 0, stream>>>(output, prefilterCap);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
{
|
|
||||||
stereobm_gpu::prefilter_kernel<<<grid, threads>>>(output.ptr, output.step, output.cols, output.rows, prefilterCap);
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
}
|
|
||||||
else
|
cudaSafeCall( cudaUnbindTexture (texForSobel ) );
|
||||||
{
|
|
||||||
stereobm_gpu::prefilter_kernel<<<grid, threads, 0, stream>>>(output.ptr, output.step, output.cols, output.rows, prefilterCap);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaSafeCall( cudaUnbindTexture (stereobm_gpu::texForSobel ) );
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}}}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
/////////////////////////////////// Textureness filtering ////////////////////////////////////////
|
/////////////////////////////////// Textureness filtering ////////////////////////////////////////
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace stereobm_gpu
|
|
||||||
{
|
|
||||||
|
|
||||||
texture<unsigned char, 2, cudaReadModeNormalizedFloat> texForTF;
|
texture<unsigned char, 2, cudaReadModeNormalizedFloat> texForTF;
|
||||||
|
|
||||||
__device__ float sobel(int x, int y)
|
__device__ float sobel(int x, int y)
|
||||||
@@ -478,7 +449,7 @@ __device__ float CalcSums(float *cols, float *cols_cache, int winsz)
|
|||||||
|
|
||||||
#define RpT (2 * ROWSperTHREAD) // got experimentally
|
#define RpT (2 * ROWSperTHREAD) // got experimentally
|
||||||
|
|
||||||
extern "C" __global__ void textureness_kernel(unsigned char *disp, size_t disp_step, int winsz, float threshold, int width, int height)
|
extern "C" __global__ void textureness_kernel(DevMem2D disp, int winsz, float threshold)
|
||||||
{
|
{
|
||||||
int winsz2 = winsz/2;
|
int winsz2 = winsz/2;
|
||||||
int n_dirty_pixels = (winsz2) * 2;
|
int n_dirty_pixels = (winsz2) * 2;
|
||||||
@@ -489,9 +460,9 @@ extern "C" __global__ void textureness_kernel(unsigned char *disp, size_t disp_s
|
|||||||
|
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int beg_row = blockIdx.y * RpT;
|
int beg_row = blockIdx.y * RpT;
|
||||||
int end_row = min(beg_row + RpT, height);
|
int end_row = min(beg_row + RpT, disp.rows);
|
||||||
|
|
||||||
if (x < width)
|
if (x < disp.cols)
|
||||||
{
|
{
|
||||||
int y = beg_row;
|
int y = beg_row;
|
||||||
|
|
||||||
@@ -512,7 +483,7 @@ extern "C" __global__ void textureness_kernel(unsigned char *disp, size_t disp_s
|
|||||||
|
|
||||||
float sum_win = CalcSums(cols, cols_cache + threadIdx.x, winsz) * 255;
|
float sum_win = CalcSums(cols, cols_cache + threadIdx.x, winsz) * 255;
|
||||||
if (sum_win < threshold)
|
if (sum_win < threshold)
|
||||||
disp[y * disp_step + x] = 0;
|
disp.data[y * disp.step + x] = 0;
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
@@ -530,26 +501,23 @@ extern "C" __global__ void textureness_kernel(unsigned char *disp, size_t disp_s
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
float sum_win = CalcSums(cols, cols_cache + threadIdx.x, winsz) * 255;
|
float sum_win = CalcSums(cols, cols_cache + threadIdx.x, winsz) * 255;
|
||||||
if (sum_win < threshold)
|
if (sum_win < threshold)
|
||||||
disp[y * disp_step + x] = 0;
|
disp.data[y * disp.step + x] = 0;
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bm
|
extern "C" void postfilter_textureness(const DevMem2D& input, int winsz, float avgTexturenessThreshold, const DevMem2D& disp, cudaStream_t & stream)
|
||||||
{
|
|
||||||
extern "C" void postfilter_textureness(const DevMem2D& input, int winsz, float avgTexturenessThreshold, const DevMem2D& disp, const cudaStream_t & stream)
|
|
||||||
{
|
{
|
||||||
avgTexturenessThreshold *= winsz * winsz;
|
avgTexturenessThreshold *= winsz * winsz;
|
||||||
|
|
||||||
stereobm_gpu::texForTF.filterMode = cudaFilterModeLinear;
|
texForTF.filterMode = cudaFilterModeLinear;
|
||||||
stereobm_gpu::texForTF.addressMode[0] = cudaAddressModeWrap;
|
texForTF.addressMode[0] = cudaAddressModeWrap;
|
||||||
stereobm_gpu::texForTF.addressMode[1] = cudaAddressModeWrap;
|
texForTF.addressMode[1] = cudaAddressModeWrap;
|
||||||
|
|
||||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
||||||
cudaSafeCall( cudaBindTexture2D( 0, stereobm_gpu::texForTF, input.ptr, desc, input.cols, input.rows, input.step ) );
|
cudaSafeCall( cudaBindTexture2D( 0, texForTF, input.data, desc, input.cols, input.rows, input.step ) );
|
||||||
|
|
||||||
dim3 threads(128, 1, 1);
|
dim3 threads(128, 1, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@@ -558,17 +526,12 @@ namespace cv { namespace gpu { namespace bm
|
|||||||
grid.y = divUp(input.rows, RpT);
|
grid.y = divUp(input.rows, RpT);
|
||||||
|
|
||||||
size_t smem_size = (threads.x + threads.x + (winsz/2) * 2 ) * sizeof(float);
|
size_t smem_size = (threads.x + threads.x + (winsz/2) * 2 ) * sizeof(float);
|
||||||
|
textureness_kernel<<<grid, threads, smem_size, stream>>>(disp, winsz, avgTexturenessThreshold);
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
{
|
|
||||||
stereobm_gpu::textureness_kernel<<<grid, threads, smem_size>>>(disp.ptr, disp.step, winsz, avgTexturenessThreshold, disp.cols, disp.rows);
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
}
|
cudaSafeCall( cudaUnbindTexture (texForTF) );
|
||||||
else
|
|
||||||
{
|
|
||||||
stereobm_gpu::textureness_kernel<<<grid, threads, smem_size, stream>>>(disp.ptr, disp.step, winsz, avgTexturenessThreshold, disp.cols, disp.rows);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaSafeCall( cudaUnbindTexture (stereobm_gpu::texForTF) );
|
|
||||||
}
|
|
||||||
}}}
|
}}}
|
||||||
|
@@ -44,36 +44,32 @@
|
|||||||
#define __OPENCV_GPU_TRANSFORM_HPP__
|
#define __OPENCV_GPU_TRANSFORM_HPP__
|
||||||
|
|
||||||
#include "cuda_shared.hpp"
|
#include "cuda_shared.hpp"
|
||||||
#include "saturate_cast.hpp"
|
|
||||||
#include "vecmath.hpp"
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace algo_krnls
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
template <typename T, typename D, typename UnOp>
|
template <typename T, typename D, typename UnOp>
|
||||||
static __global__ void transform(const T* src, size_t src_step,
|
static __global__ void transform(const DevMem2D_<T> src, PtrStep_<D> dst, UnOp op)
|
||||||
D* dst, size_t dst_step, int width, int height, UnOp op)
|
|
||||||
{
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
|
|
||||||
if (x < width && y < height)
|
if (x < src.cols && y < src.rows)
|
||||||
{
|
{
|
||||||
T src_data = src[y * src_step + x];
|
T src_data = src.ptr(y)[x];
|
||||||
dst[y * dst_step + x] = op(src_data, x, y);
|
dst.ptr(y)[x] = op(src_data, x, y);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template <typename T1, typename T2, typename D, typename BinOp>
|
template <typename T1, typename T2, typename D, typename BinOp>
|
||||||
static __global__ void transform(const T1* src1, size_t src1_step, const T2* src2, size_t src2_step,
|
static __global__ void transform(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst, BinOp op)
|
||||||
D* dst, size_t dst_step, int width, int height, BinOp op)
|
|
||||||
{
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
|
|
||||||
if (x < width && y < height)
|
if (x < src1.cols && y < src1.rows)
|
||||||
{
|
{
|
||||||
T1 src1_data = src1[y * src1_step + x];
|
T1 src1_data = src1.ptr(y)[x];
|
||||||
T2 src2_data = src2[y * src2_step + x];
|
T2 src2_data = src2.ptr(y)[x];
|
||||||
dst[y * dst_step + x] = op(src1_data, src2_data, x, y);
|
dst.ptr(y)[x] = op(src1_data, src2_data, x, y);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
@@ -83,7 +79,7 @@ namespace cv
|
|||||||
namespace gpu
|
namespace gpu
|
||||||
{
|
{
|
||||||
template <typename T, typename D, typename UnOp>
|
template <typename T, typename D, typename UnOp>
|
||||||
static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, cudaStream_t stream)
|
static void transform2(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(16, 16, 1);
|
dim3 threads(16, 16, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@@ -91,8 +87,7 @@ namespace cv
|
|||||||
grid.x = divUp(src.cols, threads.x);
|
grid.x = divUp(src.cols, threads.x);
|
||||||
grid.y = divUp(src.rows, threads.y);
|
grid.y = divUp(src.rows, threads.y);
|
||||||
|
|
||||||
algo_krnls::transform<<<grid, threads, 0, stream>>>(src.ptr, src.elem_step,
|
device::transform<T, D, UnOp><<<grid, threads, 0, stream>>>(src, dst, op);
|
||||||
dst.ptr, dst.elem_step, src.cols, src.rows, op);
|
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
@@ -106,8 +101,7 @@ namespace cv
|
|||||||
grid.x = divUp(src1.cols, threads.x);
|
grid.x = divUp(src1.cols, threads.x);
|
||||||
grid.y = divUp(src1.rows, threads.y);
|
grid.y = divUp(src1.rows, threads.y);
|
||||||
|
|
||||||
algo_krnls::transform<<<grid, threads, 0, stream>>>(src1.ptr, src1.elem_step,
|
device::transform<T1, T2, D, BinOp><<<grid, threads, 0, stream>>>(src1, src2, dst, op);
|
||||||
src2.ptr, src2.elem_step, dst.ptr, dst.elem_step, src1.cols, src1.rows, op);
|
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaThreadSynchronize() );
|
||||||
|
@@ -384,7 +384,14 @@ namespace cv
|
|||||||
|
|
||||||
template <typename VecD, typename VecS> static __device__ VecD saturate_cast_caller(const VecS& v)
|
template <typename VecD, typename VecS> static __device__ VecD saturate_cast_caller(const VecS& v)
|
||||||
{
|
{
|
||||||
SatCast<VecTraits<VecD>::cn, VecD> cast;
|
SatCast<
|
||||||
|
|
||||||
|
VecTraits<VecD>::cn,
|
||||||
|
|
||||||
|
VecD
|
||||||
|
>
|
||||||
|
|
||||||
|
cast;
|
||||||
return cast(v);
|
return cast(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -577,7 +577,10 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke
|
|||||||
|
|
||||||
namespace cv { namespace gpu { namespace filters
|
namespace cv { namespace gpu { namespace filters
|
||||||
{
|
{
|
||||||
void linearRowFilter_gpu_8u_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
template <int CN, typename T, typename D>
|
||||||
|
void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
||||||
|
|
||||||
|
//void linearRowFilter_gpu_8u_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
||||||
void linearRowFilter_gpu_8u_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
void linearRowFilter_gpu_8u_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
||||||
void linearRowFilter_gpu_8s_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
void linearRowFilter_gpu_8s_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
||||||
void linearRowFilter_gpu_8s_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
void linearRowFilter_gpu_8s_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
|
||||||
@@ -653,7 +656,8 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
|
|||||||
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R};
|
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R};
|
||||||
static const gpuFilter1D_t gpuFilter1D_callers[6][6] =
|
static const gpuFilter1D_t gpuFilter1D_callers[6][6] =
|
||||||
{
|
{
|
||||||
{linearRowFilter_gpu_8u_8u_c4,linearRowFilter_gpu_8u_8s_c4,0,0,0,0},
|
{linearRowFilter_gpu<4, uchar4, uchar4>/*linearRowFilter_gpu_8u_8u_c4*/,linearRowFilter_gpu_8u_8s_c4,0,0,0,0},
|
||||||
|
|
||||||
{linearRowFilter_gpu_8s_8u_c4,linearRowFilter_gpu_8s_8s_c4,0,0,0,0},
|
{linearRowFilter_gpu_8s_8u_c4,linearRowFilter_gpu_8s_8s_c4,0,0,0,0},
|
||||||
{0,0,linearRowFilter_gpu_16u_16u_c2,linearRowFilter_gpu_16u_16s_c2,0,0},
|
{0,0,linearRowFilter_gpu_16u_16u_c2,linearRowFilter_gpu_16u_16s_c2,0,0},
|
||||||
{0,0,linearRowFilter_gpu_16s_16u_c2,linearRowFilter_gpu_16s_16s_c2,0,0},
|
{0,0,linearRowFilter_gpu_16s_16u_c2,linearRowFilter_gpu_16s_16s_c2,0,0},
|
||||||
|
@@ -61,9 +61,9 @@ namespace cv { namespace gpu
|
|||||||
namespace bm
|
namespace bm
|
||||||
{
|
{
|
||||||
//extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf);
|
//extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf);
|
||||||
extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf, const cudaStream_t & stream);
|
extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf, cudaStream_t & stream);
|
||||||
extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output, int prefilterCap /*= 31*/, const cudaStream_t & stream);
|
extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D output, int prefilterCap /*= 31*/, cudaStream_t & stream);
|
||||||
extern "C" void postfilter_textureness(const DevMem2D& input, int winsz, float avgTexturenessThreshold, const DevMem2D& disp, const cudaStream_t & stream);
|
extern "C" void postfilter_textureness(const DevMem2D& input, int winsz, float avgTexturenessThreshold, const DevMem2D& disp, cudaStream_t & stream);
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
|
|
||||||
@@ -98,7 +98,7 @@ bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void stereo_bm_gpu_operator ( GpuMat& minSSD, GpuMat& leBuf, GpuMat& riBuf, int preset, int ndisp, int winSize, float avergeTexThreshold, const GpuMat& left, const GpuMat& right, GpuMat& disparity, const cudaStream_t & stream)
|
static void stereo_bm_gpu_operator ( GpuMat& minSSD, GpuMat& leBuf, GpuMat& riBuf, int preset, int ndisp, int winSize, float avergeTexThreshold, const GpuMat& left, const GpuMat& right, GpuMat& disparity, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);
|
CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);
|
||||||
CV_DbgAssert(left.type() == CV_8UC1);
|
CV_DbgAssert(left.type() == CV_8UC1);
|
||||||
|
Reference in New Issue
Block a user