added support of 4-channels images to StereoConstantSpaceBP.
refactored transpose_gpu, made it non template function.
This commit is contained in:
parent
c18aa438ec
commit
905e5f1739
@ -272,20 +272,11 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst)
|
||||
|
||||
namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
template <typename T>
|
||||
void transpose_gpu(const DevMem2D& src, const DevMem2D& dst);
|
||||
void transpose_gpu(const DevMem2Di& src, const DevMem2Di& dst);
|
||||
}}}
|
||||
|
||||
void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
|
||||
{
|
||||
using namespace cv::gpu::mathfunc;
|
||||
typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
transpose_gpu<uchar4>, transpose_gpu<char4>, transpose_gpu<ushort2>, transpose_gpu<short2>,
|
||||
transpose_gpu<int>, transpose_gpu<float>
|
||||
};
|
||||
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_8SC4
|
||||
|| src.type() == CV_16UC2 || src.type() == CV_16SC2 || src.type() == CV_32SC1 || src.type() == CV_32FC1);
|
||||
|
||||
@ -301,7 +292,7 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
|
||||
}
|
||||
else
|
||||
{
|
||||
funcs[src.depth()](src, dst);
|
||||
mathfunc::transpose_gpu(src, dst);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -141,7 +141,7 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2]
|
||||
CV_DbgAssert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels && 0 < rthis.nr_plane
|
||||
&& left.rows == right.rows && left.cols == right.cols && left.type() == right.type());
|
||||
|
||||
CV_Assert(rthis.levels <= 8 && (left.type() == CV_8UC1 || left.type() == CV_8UC3));
|
||||
CV_Assert(rthis.levels <= 8 && (left.type() == CV_8UC1 || left.type() == CV_8UC3 || left.type() == CV_8UC4));
|
||||
|
||||
const Scalar zero = Scalar::all(0);
|
||||
|
||||
|
@ -99,8 +99,15 @@ namespace cv { namespace gpu { namespace csbp
|
||||
/////////////////////// init data cost ////////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
template <int channels>
|
||||
struct DataCostPerPixel
|
||||
template <int channels> struct DataCostPerPixel;
|
||||
template <> struct DataCostPerPixel<1>
|
||||
{
|
||||
static __device__ float compute(const uchar* left, const uchar* right)
|
||||
{
|
||||
return fmin(cdata_weight * abs((int)*left - *right), cdata_weight * cmax_data_term);
|
||||
}
|
||||
};
|
||||
template <> struct DataCostPerPixel<3>
|
||||
{
|
||||
static __device__ float compute(const uchar* left, const uchar* right)
|
||||
{
|
||||
@ -111,13 +118,18 @@ namespace cv { namespace gpu { namespace csbp
|
||||
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct DataCostPerPixel<1>
|
||||
template <> struct DataCostPerPixel<4>
|
||||
{
|
||||
static __device__ float compute(const uchar* left, const uchar* right)
|
||||
{
|
||||
return fmin(cdata_weight * abs((int)*left - *right), cdata_weight * cmax_data_term);
|
||||
uchar4 l = *((const uchar4*)left);
|
||||
uchar4 r = *((const uchar4*)right);
|
||||
|
||||
float tb = 0.114f * abs((int)l.x - r.x);
|
||||
float tg = 0.587f * abs((int)l.y - r.y);
|
||||
float tr = 0.299f * abs((int)l.z - r.z);
|
||||
|
||||
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
|
||||
}
|
||||
};
|
||||
|
||||
@ -327,6 +339,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
{
|
||||
case 1: init_data_cost<T, 1><<<grid, threads, 0, stream>>>(h, w, level); break;
|
||||
case 3: init_data_cost<T, 3><<<grid, threads, 0, stream>>>(h, w, level); break;
|
||||
case 4: init_data_cost<T, 4><<<grid, threads, 0, stream>>>(h, w, level); break;
|
||||
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
@ -345,6 +358,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
{
|
||||
case 1: init_data_cost_reduce<T, winsz, 1><<<grid, threads, smem_size, stream>>>(level, rows, cols, h); break;
|
||||
case 3: init_data_cost_reduce<T, winsz, 3><<<grid, threads, smem_size, stream>>>(level, rows, cols, h); break;
|
||||
case 4: init_data_cost_reduce<T, winsz, 4><<<grid, threads, smem_size, stream>>>(level, rows, cols, h); break;
|
||||
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
@ -517,6 +531,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
{
|
||||
case 1: compute_data_cost<T, 1><<<grid, threads, 0, stream>>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break;
|
||||
case 3: compute_data_cost<T, 3><<<grid, threads, 0, stream>>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break;
|
||||
case 4: compute_data_cost<T, 4><<<grid, threads, 0, stream>>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break;
|
||||
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
@ -536,6 +551,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
{
|
||||
case 1: compute_data_cost_reduce<T, winsz, 1><<<grid, threads, smem_size, stream>>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break;
|
||||
case 3: compute_data_cost_reduce<T, winsz, 3><<<grid, threads, smem_size, stream>>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break;
|
||||
case 4: compute_data_cost_reduce<T, winsz, 4><<<grid, threads, smem_size, stream>>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break;
|
||||
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
|
@ -1254,10 +1254,9 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// transpose
|
||||
|
||||
template <typename T>
|
||||
__global__ void transpose(const DevMem2D_<T> src, PtrStep_<T> dst)
|
||||
__global__ void transpose(const DevMem2Di src, PtrStepi dst)
|
||||
{
|
||||
__shared__ T s_mem[16 * 17];
|
||||
__shared__ int s_mem[16 * 17];
|
||||
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
@ -1280,22 +1279,14 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void transpose_gpu(const DevMem2D& src, const DevMem2D& dst)
|
||||
void transpose_gpu(const DevMem2Di& src, const DevMem2Di& dst)
|
||||
{
|
||||
dim3 threads(16, 16, 1);
|
||||
dim3 grid(divUp(src.cols, 16), divUp(src.rows, 16), 1);
|
||||
|
||||
transpose<T><<<grid, threads>>>((DevMem2D_<T>)src, (DevMem2D_<T>)dst);
|
||||
transpose<<<grid, threads>>>(src, dst);
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
|
||||
template void transpose_gpu<uchar4 >(const DevMem2D& src, const DevMem2D& dst);
|
||||
template void transpose_gpu<char4 >(const DevMem2D& src, const DevMem2D& dst);
|
||||
template void transpose_gpu<ushort2>(const DevMem2D& src, const DevMem2D& dst);
|
||||
template void transpose_gpu<short2 >(const DevMem2D& src, const DevMem2D& dst);
|
||||
template void transpose_gpu<int >(const DevMem2D& src, const DevMem2D& dst);
|
||||
template void transpose_gpu<float >(const DevMem2D& src, const DevMem2D& dst);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// min/max
|
||||
|
@ -62,6 +62,9 @@ struct CV_GpuStereoCSBPTest : public CvTest
|
||||
|
||||
try
|
||||
{
|
||||
{cv::Mat temp; cv::cvtColor(img_l, temp, CV_BGR2BGRA); cv::swap(temp, img_l);}
|
||||
{cv::Mat temp; cv::cvtColor(img_r, temp, CV_BGR2BGRA); cv::swap(temp, img_r);}
|
||||
|
||||
cv::gpu::GpuMat disp;
|
||||
cv::gpu::StereoConstantSpaceBP bpm(128, 16, 4, 4);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user