fixed some compile-time problems (under Ubuntu)

This commit is contained in:
Alexey Spizhevoy 2010-09-28 13:11:21 +00:00
parent bf25758159
commit 12b7f3a0d0
3 changed files with 115 additions and 112 deletions

View File

@ -94,73 +94,7 @@ namespace cv { namespace gpu { namespace split_merge {
typedef void (*SplitFunction)(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream);
//------------------------------------------------------------
// Merge
template <typename T>
static void mergeC2_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
{
dim3 blockDim(32, 8);
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
mergeC2_<T><<<gridDim, blockDim, 0, stream>>>(
src[0].ptr, src[0].step,
src[1].ptr, src[1].step,
dst.rows, dst.cols, dst.ptr, dst.step);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
template <typename T>
static void mergeC3_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
{
dim3 blockDim(32, 8);
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
mergeC3_<T><<<gridDim, blockDim, 0, stream>>>(
src[0].ptr, src[0].step,
src[1].ptr, src[1].step,
src[2].ptr, src[2].step,
dst.rows, dst.cols, dst.ptr, dst.step);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
template <typename T>
static void mergeC4_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
{
dim3 blockDim(32, 8);
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
mergeC4_<T><<<gridDim, blockDim, 0, stream>>>(
src[0].ptr, src[0].step,
src[1].ptr, src[1].step,
src[2].ptr, src[2].step,
src[3].ptr, src[3].step,
dst.rows, dst.cols, dst.ptr, dst.step);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst,
int total_channels, int elem_size,
const cudaStream_t& stream)
{
static MergeFunction merge_func_tbl[] =
{
mergeC2_<char>, mergeC2_<short>, mergeC2_<int>, 0, mergeC2_<double>,
mergeC3_<char>, mergeC3_<short>, mergeC3_<int>, 0, mergeC3_<double>,
mergeC4_<char>, mergeC4_<short>, mergeC4_<int>, 0, mergeC4_<double>,
};
int merge_func_id = (total_channels - 2) * 5 + (elem_size >> 1);
MergeFunction merge_func = merge_func_tbl[merge_func_id];
if (merge_func == 0)
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);
merge_func(src, dst, stream);
}
// Merge
template <typename T>
__global__ void mergeC2_(const uchar* src0, size_t src0_step,
@ -289,76 +223,78 @@ namespace cv { namespace gpu { namespace split_merge {
}
}
//------------------------------------------------------------
// Split
template <typename T>
static void splitC2_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
template <typename T>
static void mergeC2_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
{
dim3 blockDim(32, 8);
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
splitC2_<T><<<gridDim, blockDim, 0, stream>>>(
src.ptr, src.step, src.rows, src.cols,
dst[0].ptr, dst[0].step,
dst[1].ptr, dst[1].step);
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
mergeC2_<T><<<gridDim, blockDim, 0, stream>>>(
src[0].ptr, src[0].step,
src[1].ptr, src[1].step,
dst.rows, dst.cols, dst.ptr, dst.step);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
template <typename T>
static void splitC3_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
template <typename T>
static void mergeC3_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
{
dim3 blockDim(32, 8);
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
splitC3_<T><<<gridDim, blockDim, 0, stream>>>(
src.ptr, src.step, src.rows, src.cols,
dst[0].ptr, dst[0].step,
dst[1].ptr, dst[1].step,
dst[2].ptr, dst[2].step);
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
mergeC3_<T><<<gridDim, blockDim, 0, stream>>>(
src[0].ptr, src[0].step,
src[1].ptr, src[1].step,
src[2].ptr, src[2].step,
dst.rows, dst.cols, dst.ptr, dst.step);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
template <typename T>
static void splitC4_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
template <typename T>
static void mergeC4_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)
{
dim3 blockDim(32, 8);
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
splitC4_<T><<<gridDim, blockDim, 0, stream>>>(
src.ptr, src.step, src.rows, src.cols,
dst[0].ptr, dst[0].step,
dst[1].ptr, dst[1].step,
dst[2].ptr, dst[2].step,
dst[3].ptr, dst[3].step);
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
mergeC4_<T><<<gridDim, blockDim, 0, stream>>>(
src[0].ptr, src[0].step,
src[1].ptr, src[1].step,
src[2].ptr, src[2].step,
src[3].ptr, src[3].step,
dst.rows, dst.cols, dst.ptr, dst.step);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst,
int num_channels, int elem_size1,
const cudaStream_t& stream)
extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst,
int total_channels, int elem_size,
const cudaStream_t& stream)
{
static SplitFunction split_func_tbl[] =
static MergeFunction merge_func_tbl[] =
{
splitC2_<char>, splitC2_<short>, splitC2_<int>, 0, splitC2_<double>,
splitC3_<char>, splitC3_<short>, splitC3_<int>, 0, splitC3_<double>,
splitC4_<char>, splitC4_<short>, splitC4_<int>, 0, splitC4_<double>,
mergeC2_<char>, mergeC2_<short>, mergeC2_<int>, 0, mergeC2_<double>,
mergeC3_<char>, mergeC3_<short>, mergeC3_<int>, 0, mergeC3_<double>,
mergeC4_<char>, mergeC4_<short>, mergeC4_<int>, 0, mergeC4_<double>,
};
int split_func_id = (num_channels - 2) * 5 + (elem_size1 >> 1);
SplitFunction split_func = split_func_tbl[split_func_id];
int merge_func_id = (total_channels - 2) * 5 + (elem_size >> 1);
MergeFunction merge_func = merge_func_tbl[merge_func_id];
if (split_func == 0)
if (merge_func == 0)
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);
split_func(src, dst, stream);
merge_func(src, dst, stream);
}
//------------------------------------------------------------
// Split
template <typename T>
__global__ void splitC2_(const uchar* src, size_t src_step,
int rows, int cols,
@ -491,4 +427,69 @@ namespace cv { namespace gpu { namespace split_merge {
}
}
}}} // namespace cv::gpu::split_merge
template <typename T>
static void splitC2_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
{
dim3 blockDim(32, 8);
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
splitC2_<T><<<gridDim, blockDim, 0, stream>>>(
src.ptr, src.step, src.rows, src.cols,
dst[0].ptr, dst[0].step,
dst[1].ptr, dst[1].step);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
template <typename T>
static void splitC3_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
{
dim3 blockDim(32, 8);
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
splitC3_<T><<<gridDim, blockDim, 0, stream>>>(
src.ptr, src.step, src.rows, src.cols,
dst[0].ptr, dst[0].step,
dst[1].ptr, dst[1].step,
dst[2].ptr, dst[2].step);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
template <typename T>
static void splitC4_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)
{
dim3 blockDim(32, 8);
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
splitC4_<T><<<gridDim, blockDim, 0, stream>>>(
src.ptr, src.step, src.rows, src.cols,
dst[0].ptr, dst[0].step,
dst[1].ptr, dst[1].step,
dst[2].ptr, dst[2].step,
dst[3].ptr, dst[3].step);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst,
int num_channels, int elem_size1,
const cudaStream_t& stream)
{
static SplitFunction split_func_tbl[] =
{
splitC2_<char>, splitC2_<short>, splitC2_<int>, 0, splitC2_<double>,
splitC3_<char>, splitC3_<short>, splitC3_<int>, 0, splitC3_<double>,
splitC4_<char>, splitC4_<short>, splitC4_<int>, 0, splitC4_<double>,
};
int split_func_id = (num_channels - 2) * 5 + (elem_size1 >> 1);
SplitFunction split_func = split_func_tbl[split_func_id];
if (split_func == 0)
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);
split_func(src, dst, stream);
}
}}} // namespace cv::gpu::split_merge

View File

@ -100,8 +100,9 @@ namespace cv { namespace gpu { namespace split_merge
for(size_t i = 0; i < n; ++i)
src_as_devmem[i] = src[i];
split_merge::merge_caller(src_as_devmem, (DevMem2D)dst,
total_channels, CV_ELEM_SIZE(depth),
DevMem2D dst_as_devmem(dst);
split_merge::merge_caller(src_as_devmem, dst_as_devmem,
total_channels, CV_ELEM_SIZE(depth),
stream);
}
}
@ -130,7 +131,8 @@ namespace cv { namespace gpu { namespace split_merge
for (int i = 0; i < num_channels; ++i)
dst_as_devmem[i] = dst[i];
split_merge::split_caller((DevMem2D)src, dst_as_devmem,
DevMem2D src_as_devmem(src);
split_merge::split_caller(src_as_devmem, dst_as_devmem,
num_channels, src.elemSize1(),
stream);
}
@ -190,4 +192,4 @@ void cv::gpu::split(const GpuMat& src, vector<GpuMat>& dst, const Stream& stream
split_merge::split(src, &dst[0], StreamAccessor::getStream(stream));
}
#endif /* !defined (HAVE_CUDA) */
#endif /* !defined (HAVE_CUDA) */

View File

@ -379,7 +379,7 @@ struct CV_GpuNppImageIntegralTest : public CV_GpuImageProcTest
{
CV_GpuNppImageIntegralTest() : CV_GpuImageProcTest( "GPU-NppImageIntegral", "integral" ) {}
int CV_GpuNppImageIntegralTest::test(const Mat& img)
int test(const Mat& img)
{
if (img.type() != CV_8UC1)
{
@ -554,4 +554,4 @@ CV_GpuNppImageWarpAffineTest CV_GpuNppImageWarpAffine_test;
CV_GpuNppImageWarpPerspectiveTest CV_GpuNppImageWarpPerspective_test;
CV_GpuNppImageIntegralTest CV_GpuNppImageIntegral_test;
CV_GpuNppImageBlurTest CV_GpuNppImageBlur_test;
CV_GpuCvtColorTest CV_GpuCvtColor_test;
CV_GpuCvtColorTest CV_GpuCvtColor_test;