fixed some compile-time problems (under Ubuntu)
This commit is contained in:
parent
bf25758159
commit
12b7f3a0d0
@ -94,73 +94,7 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
typedef void (*SplitFunction)(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream);
|
typedef void (*SplitFunction)(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream);
|
||||||
|
|
||||||
//------------------------------------------------------------
|
//------------------------------------------------------------
|
||||||
// Merge
|
// Merge
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
static void mergeC2_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
|
|
||||||
{
|
|
||||||
dim3 blockDim(32, 8);
|
|
||||||
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
|
||||||
mergeC2_<T><<<gridDim, blockDim, 0, stream>>>(
|
|
||||||
src[0].ptr, src[0].step,
|
|
||||||
src[1].ptr, src[1].step,
|
|
||||||
dst.rows, dst.cols, dst.ptr, dst.step);
|
|
||||||
if (stream == 0)
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
static void mergeC3_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
|
|
||||||
{
|
|
||||||
dim3 blockDim(32, 8);
|
|
||||||
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
|
||||||
mergeC3_<T><<<gridDim, blockDim, 0, stream>>>(
|
|
||||||
src[0].ptr, src[0].step,
|
|
||||||
src[1].ptr, src[1].step,
|
|
||||||
src[2].ptr, src[2].step,
|
|
||||||
dst.rows, dst.cols, dst.ptr, dst.step);
|
|
||||||
if (stream == 0)
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
static void mergeC4_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
|
|
||||||
{
|
|
||||||
dim3 blockDim(32, 8);
|
|
||||||
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
|
||||||
mergeC4_<T><<<gridDim, blockDim, 0, stream>>>(
|
|
||||||
src[0].ptr, src[0].step,
|
|
||||||
src[1].ptr, src[1].step,
|
|
||||||
src[2].ptr, src[2].step,
|
|
||||||
src[3].ptr, src[3].step,
|
|
||||||
dst.rows, dst.cols, dst.ptr, dst.step);
|
|
||||||
if (stream == 0)
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst,
|
|
||||||
int total_channels, int elem_size,
|
|
||||||
const cudaStream_t& stream)
|
|
||||||
{
|
|
||||||
static MergeFunction merge_func_tbl[] =
|
|
||||||
{
|
|
||||||
mergeC2_<char>, mergeC2_<short>, mergeC2_<int>, 0, mergeC2_<double>,
|
|
||||||
mergeC3_<char>, mergeC3_<short>, mergeC3_<int>, 0, mergeC3_<double>,
|
|
||||||
mergeC4_<char>, mergeC4_<short>, mergeC4_<int>, 0, mergeC4_<double>,
|
|
||||||
};
|
|
||||||
|
|
||||||
int merge_func_id = (total_channels - 2) * 5 + (elem_size >> 1);
|
|
||||||
MergeFunction merge_func = merge_func_tbl[merge_func_id];
|
|
||||||
|
|
||||||
if (merge_func == 0)
|
|
||||||
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);
|
|
||||||
|
|
||||||
merge_func(src, dst, stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void mergeC2_(const uchar* src0, size_t src0_step,
|
__global__ void mergeC2_(const uchar* src0, size_t src0_step,
|
||||||
@ -289,76 +223,78 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------
|
|
||||||
// Split
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
template <typename T>
|
static void mergeC2_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
|
||||||
static void splitC2_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
|
|
||||||
{
|
{
|
||||||
dim3 blockDim(32, 8);
|
dim3 blockDim(32, 8);
|
||||||
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
||||||
splitC2_<T><<<gridDim, blockDim, 0, stream>>>(
|
mergeC2_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
src.ptr, src.step, src.rows, src.cols,
|
src[0].ptr, src[0].step,
|
||||||
dst[0].ptr, dst[0].step,
|
src[1].ptr, src[1].step,
|
||||||
dst[1].ptr, dst[1].step);
|
dst.rows, dst.cols, dst.ptr, dst.step);
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void splitC3_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
|
static void mergeC3_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 blockDim(32, 8);
|
dim3 blockDim(32, 8);
|
||||||
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
||||||
splitC3_<T><<<gridDim, blockDim, 0, stream>>>(
|
mergeC3_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
src.ptr, src.step, src.rows, src.cols,
|
src[0].ptr, src[0].step,
|
||||||
dst[0].ptr, dst[0].step,
|
src[1].ptr, src[1].step,
|
||||||
dst[1].ptr, dst[1].step,
|
src[2].ptr, src[2].step,
|
||||||
dst[2].ptr, dst[2].step);
|
dst.rows, dst.cols, dst.ptr, dst.step);
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void splitC4_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
|
static void mergeC4_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 blockDim(32, 8);
|
dim3 blockDim(32, 8);
|
||||||
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
|
||||||
splitC4_<T><<<gridDim, blockDim, 0, stream>>>(
|
mergeC4_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
src.ptr, src.step, src.rows, src.cols,
|
src[0].ptr, src[0].step,
|
||||||
dst[0].ptr, dst[0].step,
|
src[1].ptr, src[1].step,
|
||||||
dst[1].ptr, dst[1].step,
|
src[2].ptr, src[2].step,
|
||||||
dst[2].ptr, dst[2].step,
|
src[3].ptr, src[3].step,
|
||||||
dst[3].ptr, dst[3].step);
|
dst.rows, dst.cols, dst.ptr, dst.step);
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst,
|
extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst,
|
||||||
int num_channels, int elem_size1,
|
int total_channels, int elem_size,
|
||||||
const cudaStream_t& stream)
|
const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
static SplitFunction split_func_tbl[] =
|
static MergeFunction merge_func_tbl[] =
|
||||||
{
|
{
|
||||||
splitC2_<char>, splitC2_<short>, splitC2_<int>, 0, splitC2_<double>,
|
mergeC2_<char>, mergeC2_<short>, mergeC2_<int>, 0, mergeC2_<double>,
|
||||||
splitC3_<char>, splitC3_<short>, splitC3_<int>, 0, splitC3_<double>,
|
mergeC3_<char>, mergeC3_<short>, mergeC3_<int>, 0, mergeC3_<double>,
|
||||||
splitC4_<char>, splitC4_<short>, splitC4_<int>, 0, splitC4_<double>,
|
mergeC4_<char>, mergeC4_<short>, mergeC4_<int>, 0, mergeC4_<double>,
|
||||||
};
|
};
|
||||||
|
|
||||||
int split_func_id = (num_channels - 2) * 5 + (elem_size1 >> 1);
|
int merge_func_id = (total_channels - 2) * 5 + (elem_size >> 1);
|
||||||
SplitFunction split_func = split_func_tbl[split_func_id];
|
MergeFunction merge_func = merge_func_tbl[merge_func_id];
|
||||||
|
|
||||||
if (split_func == 0)
|
if (merge_func == 0)
|
||||||
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);
|
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);
|
||||||
|
|
||||||
split_func(src, dst, stream);
|
merge_func(src, dst, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//------------------------------------------------------------
|
||||||
|
// Split
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void splitC2_(const uchar* src, size_t src_step,
|
__global__ void splitC2_(const uchar* src, size_t src_step,
|
||||||
int rows, int cols,
|
int rows, int cols,
|
||||||
@ -491,4 +427,69 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}}} // namespace cv::gpu::split_merge
|
template <typename T>
|
||||||
|
static void splitC2_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
|
||||||
|
{
|
||||||
|
dim3 blockDim(32, 8);
|
||||||
|
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
||||||
|
splitC2_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
|
src.ptr, src.step, src.rows, src.cols,
|
||||||
|
dst[0].ptr, dst[0].step,
|
||||||
|
dst[1].ptr, dst[1].step);
|
||||||
|
if (stream == 0)
|
||||||
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static void splitC3_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
|
||||||
|
{
|
||||||
|
dim3 blockDim(32, 8);
|
||||||
|
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
||||||
|
splitC3_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
|
src.ptr, src.step, src.rows, src.cols,
|
||||||
|
dst[0].ptr, dst[0].step,
|
||||||
|
dst[1].ptr, dst[1].step,
|
||||||
|
dst[2].ptr, dst[2].step);
|
||||||
|
if (stream == 0)
|
||||||
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static void splitC4_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
|
||||||
|
{
|
||||||
|
dim3 blockDim(32, 8);
|
||||||
|
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
|
||||||
|
splitC4_<T><<<gridDim, blockDim, 0, stream>>>(
|
||||||
|
src.ptr, src.step, src.rows, src.cols,
|
||||||
|
dst[0].ptr, dst[0].step,
|
||||||
|
dst[1].ptr, dst[1].step,
|
||||||
|
dst[2].ptr, dst[2].step,
|
||||||
|
dst[3].ptr, dst[3].step);
|
||||||
|
if (stream == 0)
|
||||||
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst,
|
||||||
|
int num_channels, int elem_size1,
|
||||||
|
const cudaStream_t& stream)
|
||||||
|
{
|
||||||
|
static SplitFunction split_func_tbl[] =
|
||||||
|
{
|
||||||
|
splitC2_<char>, splitC2_<short>, splitC2_<int>, 0, splitC2_<double>,
|
||||||
|
splitC3_<char>, splitC3_<short>, splitC3_<int>, 0, splitC3_<double>,
|
||||||
|
splitC4_<char>, splitC4_<short>, splitC4_<int>, 0, splitC4_<double>,
|
||||||
|
};
|
||||||
|
|
||||||
|
int split_func_id = (num_channels - 2) * 5 + (elem_size1 >> 1);
|
||||||
|
SplitFunction split_func = split_func_tbl[split_func_id];
|
||||||
|
|
||||||
|
if (split_func == 0)
|
||||||
|
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);
|
||||||
|
|
||||||
|
split_func(src, dst, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
}}} // namespace cv::gpu::split_merge
|
||||||
|
@ -100,8 +100,9 @@ namespace cv { namespace gpu { namespace split_merge
|
|||||||
for(size_t i = 0; i < n; ++i)
|
for(size_t i = 0; i < n; ++i)
|
||||||
src_as_devmem[i] = src[i];
|
src_as_devmem[i] = src[i];
|
||||||
|
|
||||||
split_merge::merge_caller(src_as_devmem, (DevMem2D)dst,
|
DevMem2D dst_as_devmem(dst);
|
||||||
total_channels, CV_ELEM_SIZE(depth),
|
split_merge::merge_caller(src_as_devmem, dst_as_devmem,
|
||||||
|
total_channels, CV_ELEM_SIZE(depth),
|
||||||
stream);
|
stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -130,7 +131,8 @@ namespace cv { namespace gpu { namespace split_merge
|
|||||||
for (int i = 0; i < num_channels; ++i)
|
for (int i = 0; i < num_channels; ++i)
|
||||||
dst_as_devmem[i] = dst[i];
|
dst_as_devmem[i] = dst[i];
|
||||||
|
|
||||||
split_merge::split_caller((DevMem2D)src, dst_as_devmem,
|
DevMem2D src_as_devmem(src);
|
||||||
|
split_merge::split_caller(src_as_devmem, dst_as_devmem,
|
||||||
num_channels, src.elemSize1(),
|
num_channels, src.elemSize1(),
|
||||||
stream);
|
stream);
|
||||||
}
|
}
|
||||||
@ -190,4 +192,4 @@ void cv::gpu::split(const GpuMat& src, vector<GpuMat>& dst, const Stream& stream
|
|||||||
split_merge::split(src, &dst[0], StreamAccessor::getStream(stream));
|
split_merge::split(src, &dst[0], StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* !defined (HAVE_CUDA) */
|
#endif /* !defined (HAVE_CUDA) */
|
||||||
|
@ -379,7 +379,7 @@ struct CV_GpuNppImageIntegralTest : public CV_GpuImageProcTest
|
|||||||
{
|
{
|
||||||
CV_GpuNppImageIntegralTest() : CV_GpuImageProcTest( "GPU-NppImageIntegral", "integral" ) {}
|
CV_GpuNppImageIntegralTest() : CV_GpuImageProcTest( "GPU-NppImageIntegral", "integral" ) {}
|
||||||
|
|
||||||
int CV_GpuNppImageIntegralTest::test(const Mat& img)
|
int test(const Mat& img)
|
||||||
{
|
{
|
||||||
if (img.type() != CV_8UC1)
|
if (img.type() != CV_8UC1)
|
||||||
{
|
{
|
||||||
@ -554,4 +554,4 @@ CV_GpuNppImageWarpAffineTest CV_GpuNppImageWarpAffine_test;
|
|||||||
CV_GpuNppImageWarpPerspectiveTest CV_GpuNppImageWarpPerspective_test;
|
CV_GpuNppImageWarpPerspectiveTest CV_GpuNppImageWarpPerspective_test;
|
||||||
CV_GpuNppImageIntegralTest CV_GpuNppImageIntegral_test;
|
CV_GpuNppImageIntegralTest CV_GpuNppImageIntegral_test;
|
||||||
CV_GpuNppImageBlurTest CV_GpuNppImageBlur_test;
|
CV_GpuNppImageBlurTest CV_GpuNppImageBlur_test;
|
||||||
CV_GpuCvtColorTest CV_GpuCvtColor_test;
|
CV_GpuCvtColorTest CV_GpuCvtColor_test;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user