fixed some compile-time problems (under Ubuntu)

This commit is contained in:
Alexey Spizhevoy 2010-09-28 13:11:21 +00:00
parent bf25758159
commit 12b7f3a0d0
3 changed files with 115 additions and 112 deletions

View File

@ -94,73 +94,7 @@ namespace cv { namespace gpu { namespace split_merge {
typedef void (*SplitFunction)(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream); typedef void (*SplitFunction)(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream);
//------------------------------------------------------------ //------------------------------------------------------------
// Merge // Merge
template <typename T>
static void mergeC2_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
{
dim3 blockDim(32, 8);
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
mergeC2_<T><<<gridDim, blockDim, 0, stream>>>(
src[0].ptr, src[0].step,
src[1].ptr, src[1].step,
dst.rows, dst.cols, dst.ptr, dst.step);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
template <typename T>
static void mergeC3_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
{
dim3 blockDim(32, 8);
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
mergeC3_<T><<<gridDim, blockDim, 0, stream>>>(
src[0].ptr, src[0].step,
src[1].ptr, src[1].step,
src[2].ptr, src[2].step,
dst.rows, dst.cols, dst.ptr, dst.step);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
template <typename T>
static void mergeC4_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
{
dim3 blockDim(32, 8);
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
mergeC4_<T><<<gridDim, blockDim, 0, stream>>>(
src[0].ptr, src[0].step,
src[1].ptr, src[1].step,
src[2].ptr, src[2].step,
src[3].ptr, src[3].step,
dst.rows, dst.cols, dst.ptr, dst.step);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst,
int total_channels, int elem_size,
const cudaStream_t& stream)
{
static MergeFunction merge_func_tbl[] =
{
mergeC2_<char>, mergeC2_<short>, mergeC2_<int>, 0, mergeC2_<double>,
mergeC3_<char>, mergeC3_<short>, mergeC3_<int>, 0, mergeC3_<double>,
mergeC4_<char>, mergeC4_<short>, mergeC4_<int>, 0, mergeC4_<double>,
};
int merge_func_id = (total_channels - 2) * 5 + (elem_size >> 1);
MergeFunction merge_func = merge_func_tbl[merge_func_id];
if (merge_func == 0)
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);
merge_func(src, dst, stream);
}
template <typename T> template <typename T>
__global__ void mergeC2_(const uchar* src0, size_t src0_step, __global__ void mergeC2_(const uchar* src0, size_t src0_step,
@ -289,76 +223,78 @@ namespace cv { namespace gpu { namespace split_merge {
} }
} }
//------------------------------------------------------------
// Split
template <typename T>
template <typename T> static void mergeC2_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
static void splitC2_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
{ {
dim3 blockDim(32, 8); dim3 blockDim(32, 8);
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y)); dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
splitC2_<T><<<gridDim, blockDim, 0, stream>>>( mergeC2_<T><<<gridDim, blockDim, 0, stream>>>(
src.ptr, src.step, src.rows, src.cols, src[0].ptr, src[0].step,
dst[0].ptr, dst[0].step, src[1].ptr, src[1].step,
dst[1].ptr, dst[1].step); dst.rows, dst.cols, dst.ptr, dst.step);
if (stream == 0) if (stream == 0)
cudaSafeCall(cudaThreadSynchronize()); cudaSafeCall(cudaThreadSynchronize());
} }
template <typename T> template <typename T>
static void splitC3_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream) static void mergeC3_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
{ {
dim3 blockDim(32, 8); dim3 blockDim(32, 8);
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y)); dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
splitC3_<T><<<gridDim, blockDim, 0, stream>>>( mergeC3_<T><<<gridDim, blockDim, 0, stream>>>(
src.ptr, src.step, src.rows, src.cols, src[0].ptr, src[0].step,
dst[0].ptr, dst[0].step, src[1].ptr, src[1].step,
dst[1].ptr, dst[1].step, src[2].ptr, src[2].step,
dst[2].ptr, dst[2].step); dst.rows, dst.cols, dst.ptr, dst.step);
if (stream == 0) if (stream == 0)
cudaSafeCall(cudaThreadSynchronize()); cudaSafeCall(cudaThreadSynchronize());
} }
template <typename T> template <typename T>
static void splitC4_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream) static void mergeC4_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
{ {
dim3 blockDim(32, 8); dim3 blockDim(32, 8);
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y)); dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
splitC4_<T><<<gridDim, blockDim, 0, stream>>>( mergeC4_<T><<<gridDim, blockDim, 0, stream>>>(
src.ptr, src.step, src.rows, src.cols, src[0].ptr, src[0].step,
dst[0].ptr, dst[0].step, src[1].ptr, src[1].step,
dst[1].ptr, dst[1].step, src[2].ptr, src[2].step,
dst[2].ptr, dst[2].step, src[3].ptr, src[3].step,
dst[3].ptr, dst[3].step); dst.rows, dst.cols, dst.ptr, dst.step);
if (stream == 0) if (stream == 0)
cudaSafeCall(cudaThreadSynchronize()); cudaSafeCall(cudaThreadSynchronize());
} }
extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst, extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst,
int num_channels, int elem_size1, int total_channels, int elem_size,
const cudaStream_t& stream) const cudaStream_t& stream)
{ {
static SplitFunction split_func_tbl[] = static MergeFunction merge_func_tbl[] =
{ {
splitC2_<char>, splitC2_<short>, splitC2_<int>, 0, splitC2_<double>, mergeC2_<char>, mergeC2_<short>, mergeC2_<int>, 0, mergeC2_<double>,
splitC3_<char>, splitC3_<short>, splitC3_<int>, 0, splitC3_<double>, mergeC3_<char>, mergeC3_<short>, mergeC3_<int>, 0, mergeC3_<double>,
splitC4_<char>, splitC4_<short>, splitC4_<int>, 0, splitC4_<double>, mergeC4_<char>, mergeC4_<short>, mergeC4_<int>, 0, mergeC4_<double>,
}; };
int split_func_id = (num_channels - 2) * 5 + (elem_size1 >> 1); int merge_func_id = (total_channels - 2) * 5 + (elem_size >> 1);
SplitFunction split_func = split_func_tbl[split_func_id]; MergeFunction merge_func = merge_func_tbl[merge_func_id];
if (split_func == 0) if (merge_func == 0)
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__); cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);
split_func(src, dst, stream); merge_func(src, dst, stream);
} }
//------------------------------------------------------------
// Split
template <typename T> template <typename T>
__global__ void splitC2_(const uchar* src, size_t src_step, __global__ void splitC2_(const uchar* src, size_t src_step,
int rows, int cols, int rows, int cols,
@ -491,4 +427,69 @@ namespace cv { namespace gpu { namespace split_merge {
} }
} }
}}} // namespace cv::gpu::split_merge template <typename T>
static void splitC2_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
{
dim3 blockDim(32, 8);
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
splitC2_<T><<<gridDim, blockDim, 0, stream>>>(
src.ptr, src.step, src.rows, src.cols,
dst[0].ptr, dst[0].step,
dst[1].ptr, dst[1].step);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
template <typename T>
static void splitC3_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
{
dim3 blockDim(32, 8);
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
splitC3_<T><<<gridDim, blockDim, 0, stream>>>(
src.ptr, src.step, src.rows, src.cols,
dst[0].ptr, dst[0].step,
dst[1].ptr, dst[1].step,
dst[2].ptr, dst[2].step);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
template <typename T>
static void splitC4_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
{
dim3 blockDim(32, 8);
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
splitC4_<T><<<gridDim, blockDim, 0, stream>>>(
src.ptr, src.step, src.rows, src.cols,
dst[0].ptr, dst[0].step,
dst[1].ptr, dst[1].step,
dst[2].ptr, dst[2].step,
dst[3].ptr, dst[3].step);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst,
int num_channels, int elem_size1,
const cudaStream_t& stream)
{
static SplitFunction split_func_tbl[] =
{
splitC2_<char>, splitC2_<short>, splitC2_<int>, 0, splitC2_<double>,
splitC3_<char>, splitC3_<short>, splitC3_<int>, 0, splitC3_<double>,
splitC4_<char>, splitC4_<short>, splitC4_<int>, 0, splitC4_<double>,
};
int split_func_id = (num_channels - 2) * 5 + (elem_size1 >> 1);
SplitFunction split_func = split_func_tbl[split_func_id];
if (split_func == 0)
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);
split_func(src, dst, stream);
}
}}} // namespace cv::gpu::split_merge

View File

@ -100,8 +100,9 @@ namespace cv { namespace gpu { namespace split_merge
for(size_t i = 0; i < n; ++i) for(size_t i = 0; i < n; ++i)
src_as_devmem[i] = src[i]; src_as_devmem[i] = src[i];
split_merge::merge_caller(src_as_devmem, (DevMem2D)dst, DevMem2D dst_as_devmem(dst);
total_channels, CV_ELEM_SIZE(depth), split_merge::merge_caller(src_as_devmem, dst_as_devmem,
total_channels, CV_ELEM_SIZE(depth),
stream); stream);
} }
} }
@ -130,7 +131,8 @@ namespace cv { namespace gpu { namespace split_merge
for (int i = 0; i < num_channels; ++i) for (int i = 0; i < num_channels; ++i)
dst_as_devmem[i] = dst[i]; dst_as_devmem[i] = dst[i];
split_merge::split_caller((DevMem2D)src, dst_as_devmem, DevMem2D src_as_devmem(src);
split_merge::split_caller(src_as_devmem, dst_as_devmem,
num_channels, src.elemSize1(), num_channels, src.elemSize1(),
stream); stream);
} }
@ -190,4 +192,4 @@ void cv::gpu::split(const GpuMat& src, vector<GpuMat>& dst, const Stream& stream
split_merge::split(src, &dst[0], StreamAccessor::getStream(stream)); split_merge::split(src, &dst[0], StreamAccessor::getStream(stream));
} }
#endif /* !defined (HAVE_CUDA) */ #endif /* !defined (HAVE_CUDA) */

View File

@ -379,7 +379,7 @@ struct CV_GpuNppImageIntegralTest : public CV_GpuImageProcTest
{ {
CV_GpuNppImageIntegralTest() : CV_GpuImageProcTest( "GPU-NppImageIntegral", "integral" ) {} CV_GpuNppImageIntegralTest() : CV_GpuImageProcTest( "GPU-NppImageIntegral", "integral" ) {}
int CV_GpuNppImageIntegralTest::test(const Mat& img) int test(const Mat& img)
{ {
if (img.type() != CV_8UC1) if (img.type() != CV_8UC1)
{ {
@ -554,4 +554,4 @@ CV_GpuNppImageWarpAffineTest CV_GpuNppImageWarpAffine_test;
CV_GpuNppImageWarpPerspectiveTest CV_GpuNppImageWarpPerspective_test; CV_GpuNppImageWarpPerspectiveTest CV_GpuNppImageWarpPerspective_test;
CV_GpuNppImageIntegralTest CV_GpuNppImageIntegral_test; CV_GpuNppImageIntegralTest CV_GpuNppImageIntegral_test;
CV_GpuNppImageBlurTest CV_GpuNppImageBlur_test; CV_GpuNppImageBlurTest CV_GpuNppImageBlur_test;
CV_GpuCvtColorTest CV_GpuCvtColor_test; CV_GpuCvtColorTest CV_GpuCvtColor_test;