fixed some compile-time problems (under Ubuntu)
This commit is contained in:
parent
bf25758159
commit
12b7f3a0d0
@ -96,72 +96,6 @@ namespace cv { namespace gpu { namespace split_merge {
|
||||
//------------------------------------------------------------
|
||||
// Merge
|
||||
|
||||
template <typename T>
|
||||
static void mergeC2_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
|
||||
{
|
||||
dim3 blockDim(32, 8);
|
||||
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
||||
mergeC2_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||
src[0].ptr, src[0].step,
|
||||
src[1].ptr, src[1].step,
|
||||
dst.rows, dst.cols, dst.ptr, dst.step);
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
static void mergeC3_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
|
||||
{
|
||||
dim3 blockDim(32, 8);
|
||||
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
||||
mergeC3_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||
src[0].ptr, src[0].step,
|
||||
src[1].ptr, src[1].step,
|
||||
src[2].ptr, src[2].step,
|
||||
dst.rows, dst.cols, dst.ptr, dst.step);
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
static void mergeC4_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
|
||||
{
|
||||
dim3 blockDim(32, 8);
|
||||
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
||||
mergeC4_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||
src[0].ptr, src[0].step,
|
||||
src[1].ptr, src[1].step,
|
||||
src[2].ptr, src[2].step,
|
||||
src[3].ptr, src[3].step,
|
||||
dst.rows, dst.cols, dst.ptr, dst.step);
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
|
||||
extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst,
|
||||
int total_channels, int elem_size,
|
||||
const cudaStream_t& stream)
|
||||
{
|
||||
static MergeFunction merge_func_tbl[] =
|
||||
{
|
||||
mergeC2_<char>, mergeC2_<short>, mergeC2_<int>, 0, mergeC2_<double>,
|
||||
mergeC3_<char>, mergeC3_<short>, mergeC3_<int>, 0, mergeC3_<double>,
|
||||
mergeC4_<char>, mergeC4_<short>, mergeC4_<int>, 0, mergeC4_<double>,
|
||||
};
|
||||
|
||||
int merge_func_id = (total_channels - 2) * 5 + (elem_size >> 1);
|
||||
MergeFunction merge_func = merge_func_tbl[merge_func_id];
|
||||
|
||||
if (merge_func == 0)
|
||||
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);
|
||||
|
||||
merge_func(src, dst, stream);
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void mergeC2_(const uchar* src0, size_t src0_step,
|
||||
const uchar* src1, size_t src1_step,
|
||||
@ -289,76 +223,78 @@ namespace cv { namespace gpu { namespace split_merge {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Split
|
||||
|
||||
|
||||
template <typename T>
|
||||
static void splitC2_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
|
||||
static void mergeC2_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
|
||||
{
|
||||
dim3 blockDim(32, 8);
|
||||
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
||||
splitC2_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||
src.ptr, src.step, src.rows, src.cols,
|
||||
dst[0].ptr, dst[0].step,
|
||||
dst[1].ptr, dst[1].step);
|
||||
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
||||
mergeC2_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||
src[0].ptr, src[0].step,
|
||||
src[1].ptr, src[1].step,
|
||||
dst.rows, dst.cols, dst.ptr, dst.step);
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
static void splitC3_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
|
||||
static void mergeC3_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
|
||||
{
|
||||
dim3 blockDim(32, 8);
|
||||
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
||||
splitC3_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||
src.ptr, src.step, src.rows, src.cols,
|
||||
dst[0].ptr, dst[0].step,
|
||||
dst[1].ptr, dst[1].step,
|
||||
dst[2].ptr, dst[2].step);
|
||||
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
||||
mergeC3_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||
src[0].ptr, src[0].step,
|
||||
src[1].ptr, src[1].step,
|
||||
src[2].ptr, src[2].step,
|
||||
dst.rows, dst.cols, dst.ptr, dst.step);
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
static void splitC4_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
|
||||
static void mergeC4_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
|
||||
{
|
||||
dim3 blockDim(32, 8);
|
||||
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
||||
splitC4_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||
src.ptr, src.step, src.rows, src.cols,
|
||||
dst[0].ptr, dst[0].step,
|
||||
dst[1].ptr, dst[1].step,
|
||||
dst[2].ptr, dst[2].step,
|
||||
dst[3].ptr, dst[3].step);
|
||||
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
||||
mergeC4_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||
src[0].ptr, src[0].step,
|
||||
src[1].ptr, src[1].step,
|
||||
src[2].ptr, src[2].step,
|
||||
src[3].ptr, src[3].step,
|
||||
dst.rows, dst.cols, dst.ptr, dst.step);
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
|
||||
extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst,
|
||||
int num_channels, int elem_size1,
|
||||
extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst,
|
||||
int total_channels, int elem_size,
|
||||
const cudaStream_t& stream)
|
||||
{
|
||||
static SplitFunction split_func_tbl[] =
|
||||
static MergeFunction merge_func_tbl[] =
|
||||
{
|
||||
splitC2_<char>, splitC2_<short>, splitC2_<int>, 0, splitC2_<double>,
|
||||
splitC3_<char>, splitC3_<short>, splitC3_<int>, 0, splitC3_<double>,
|
||||
splitC4_<char>, splitC4_<short>, splitC4_<int>, 0, splitC4_<double>,
|
||||
mergeC2_<char>, mergeC2_<short>, mergeC2_<int>, 0, mergeC2_<double>,
|
||||
mergeC3_<char>, mergeC3_<short>, mergeC3_<int>, 0, mergeC3_<double>,
|
||||
mergeC4_<char>, mergeC4_<short>, mergeC4_<int>, 0, mergeC4_<double>,
|
||||
};
|
||||
|
||||
int split_func_id = (num_channels - 2) * 5 + (elem_size1 >> 1);
|
||||
SplitFunction split_func = split_func_tbl[split_func_id];
|
||||
int merge_func_id = (total_channels - 2) * 5 + (elem_size >> 1);
|
||||
MergeFunction merge_func = merge_func_tbl[merge_func_id];
|
||||
|
||||
if (split_func == 0)
|
||||
if (merge_func == 0)
|
||||
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);
|
||||
|
||||
split_func(src, dst, stream);
|
||||
merge_func(src, dst, stream);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Split
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void splitC2_(const uchar* src, size_t src_step,
|
||||
int rows, int cols,
|
||||
@ -491,4 +427,69 @@ namespace cv { namespace gpu { namespace split_merge {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void splitC2_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
|
||||
{
|
||||
dim3 blockDim(32, 8);
|
||||
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
||||
splitC2_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||
src.ptr, src.step, src.rows, src.cols,
|
||||
dst[0].ptr, dst[0].step,
|
||||
dst[1].ptr, dst[1].step);
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
static void splitC3_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
|
||||
{
|
||||
dim3 blockDim(32, 8);
|
||||
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
||||
splitC3_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||
src.ptr, src.step, src.rows, src.cols,
|
||||
dst[0].ptr, dst[0].step,
|
||||
dst[1].ptr, dst[1].step,
|
||||
dst[2].ptr, dst[2].step);
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
static void splitC4_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
|
||||
{
|
||||
dim3 blockDim(32, 8);
|
||||
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
||||
splitC4_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||
src.ptr, src.step, src.rows, src.cols,
|
||||
dst[0].ptr, dst[0].step,
|
||||
dst[1].ptr, dst[1].step,
|
||||
dst[2].ptr, dst[2].step,
|
||||
dst[3].ptr, dst[3].step);
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
|
||||
extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst,
|
||||
int num_channels, int elem_size1,
|
||||
const cudaStream_t& stream)
|
||||
{
|
||||
static SplitFunction split_func_tbl[] =
|
||||
{
|
||||
splitC2_<char>, splitC2_<short>, splitC2_<int>, 0, splitC2_<double>,
|
||||
splitC3_<char>, splitC3_<short>, splitC3_<int>, 0, splitC3_<double>,
|
||||
splitC4_<char>, splitC4_<short>, splitC4_<int>, 0, splitC4_<double>,
|
||||
};
|
||||
|
||||
int split_func_id = (num_channels - 2) * 5 + (elem_size1 >> 1);
|
||||
SplitFunction split_func = split_func_tbl[split_func_id];
|
||||
|
||||
if (split_func == 0)
|
||||
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);
|
||||
|
||||
split_func(src, dst, stream);
|
||||
}
|
||||
|
||||
}}} // namespace cv::gpu::split_merge
|
@ -100,7 +100,8 @@ namespace cv { namespace gpu { namespace split_merge
|
||||
for(size_t i = 0; i < n; ++i)
|
||||
src_as_devmem[i] = src[i];
|
||||
|
||||
split_merge::merge_caller(src_as_devmem, (DevMem2D)dst,
|
||||
DevMem2D dst_as_devmem(dst);
|
||||
split_merge::merge_caller(src_as_devmem, dst_as_devmem,
|
||||
total_channels, CV_ELEM_SIZE(depth),
|
||||
stream);
|
||||
}
|
||||
@ -130,7 +131,8 @@ namespace cv { namespace gpu { namespace split_merge
|
||||
for (int i = 0; i < num_channels; ++i)
|
||||
dst_as_devmem[i] = dst[i];
|
||||
|
||||
split_merge::split_caller((DevMem2D)src, dst_as_devmem,
|
||||
DevMem2D src_as_devmem(src);
|
||||
split_merge::split_caller(src_as_devmem, dst_as_devmem,
|
||||
num_channels, src.elemSize1(),
|
||||
stream);
|
||||
}
|
||||
|
@ -379,7 +379,7 @@ struct CV_GpuNppImageIntegralTest : public CV_GpuImageProcTest
|
||||
{
|
||||
CV_GpuNppImageIntegralTest() : CV_GpuImageProcTest( "GPU-NppImageIntegral", "integral" ) {}
|
||||
|
||||
int CV_GpuNppImageIntegralTest::test(const Mat& img)
|
||||
int test(const Mat& img)
|
||||
{
|
||||
if (img.type() != CV_8UC1)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user