refactored and fixed bugs in gpu warp functions (remap, resize, warpAffine, warpPerspective)

wrote more complicated tests for them
implemented own version of warpAffine and warpPerspective for different border interpolation types
refactored some gpu tests
This commit is contained in:
Vladislav Vinogradov
2012-03-14 15:54:17 +00:00
parent 6e2507c197
commit ade7394e77
33 changed files with 6243 additions and 4545 deletions

View File

@@ -47,15 +47,11 @@ using namespace cv::gpu;
#if !defined (HAVE_CUDA)
void cv::gpu::remap(const GpuMat&, GpuMat&, const GpuMat&, const GpuMat&, int, int, const Scalar&, Stream&){ throw_nogpu(); }
void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria, Stream&) { throw_nogpu(); }
void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria, Stream&) { throw_nogpu(); }
void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }
void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, Stream&) { throw_nogpu(); }
void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&) { throw_nogpu(); }
void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, int, const Scalar&, Stream&) { throw_nogpu(); }
void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }
void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }
void cv::gpu::buildWarpPlaneMaps(Size, Rect, const Mat&, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::buildWarpCylindricalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
@@ -105,64 +101,6 @@ void cv::gpu::ImagePyramid::getLayer(GpuMat&, Size, Stream&) const { throw_nogpu
#else /* !defined (HAVE_CUDA) */
////////////////////////////////////////////////////////////////////////
// remap
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
template <typename T>
void remap_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst,
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
}
}}}
void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode, const Scalar& borderValue, Stream& stream)
{
using namespace ::cv::gpu::device::imgproc;
typedef void (*caller_t)(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, int cc);
static const caller_t callers[6][4] =
{
{remap_gpu<uchar>, 0/*remap_gpu<uchar2>*/, remap_gpu<uchar3>, remap_gpu<uchar4>},
{0/*remap_gpu<schar>*/, 0/*remap_gpu<char2>*/, 0/*remap_gpu<char3>*/, 0/*remap_gpu<char4>*/},
{remap_gpu<ushort>, 0/*remap_gpu<ushort2>*/, remap_gpu<ushort3>, remap_gpu<ushort4>},
{remap_gpu<short>, 0/*remap_gpu<short2>*/, remap_gpu<short3>, remap_gpu<short4>},
{0/*remap_gpu<int>*/, 0/*remap_gpu<int2>*/, 0/*remap_gpu<int3>*/, 0/*remap_gpu<int4>*/},
{remap_gpu<float>, 0/*remap_gpu<float2>*/, remap_gpu<float3>, remap_gpu<float4>}
};
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
CV_Assert(xmap.type() == CV_32F && ymap.type() == CV_32F && xmap.size() == ymap.size());
caller_t func = callers[src.depth()][src.channels() - 1];
CV_Assert(func != 0);
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP);
int gpuBorderType;
CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));
dst.create(xmap.size(), src.type());
Scalar_<float> borderValueFloat;
borderValueFloat = borderValue;
DeviceInfo info;
int cc = info.majorVersion() * 10 + info.minorVersion();
Size wholeSize;
Point ofs;
src.locateROI(wholeSize, ofs);
func(src, DevMem2Db(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap,
dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(stream), cc);
}
////////////////////////////////////////////////////////////////////////
// meanShiftFiltering_GPU
@@ -308,106 +246,6 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q,
reprojectImageTo3D_callers[disp.type()](disp, xyzw, Q, StreamAccessor::getStream(stream));
}
////////////////////////////////////////////////////////////////////////
// resize
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy,
DevMem2Db dst, int interpolation, cudaStream_t stream);
}
}}}
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
{
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );
if( dsize == Size() )
{
dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));
}
else
{
fx = (double)dsize.width / src.cols;
fy = (double)dsize.height / src.rows;
}
dst.create(dsize, src.type());
if (dsize == src.size())
{
if (s)
s.enqueueCopy(src, dst);
else
src.copyTo(dst);
return;
}
cudaStream_t stream = StreamAccessor::getStream(s);
Size wholeSize;
Point ofs;
src.locateROI(wholeSize, ofs);
if ((src.type() == CV_8UC1 || src.type() == CV_8UC4) && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR))
{
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS};
NppiSize srcsz;
srcsz.width = wholeSize.width;
srcsz.height = wholeSize.height;
NppiRect srcrect;
srcrect.x = ofs.x;
srcrect.y = ofs.y;
srcrect.width = src.cols;
srcrect.height = src.rows;
NppiSize dstsz;
dstsz.width = dst.cols;
dstsz.height = dst.rows;
NppStreamHandler h(stream);
if (src.type() == CV_8UC1)
{
nppSafeCall( nppiResize_8u_C1R(src.datastart, srcsz, static_cast<int>(src.step), srcrect,
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );
}
else
{
nppSafeCall( nppiResize_8u_C4R(src.datastart, srcsz, static_cast<int>(src.step), srcrect,
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
else
{
using namespace ::cv::gpu::device::imgproc;
typedef void (*caller_t)(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
static const caller_t callers[6][4] =
{
{resize_gpu<uchar>, 0/*resize_gpu<uchar2>*/, resize_gpu<uchar3>, resize_gpu<uchar4>},
{0/*resize_gpu<schar>*/, 0/*resize_gpu<char2>*/, 0/*resize_gpu<char3>*/, 0/*resize_gpu<char4>*/},
{resize_gpu<ushort>, 0/*resize_gpu<ushort2>*/, resize_gpu<ushort3>, resize_gpu<ushort4>},
{resize_gpu<short>, 0/*resize_gpu<short2>*/, resize_gpu<short3>, resize_gpu<short4>},
{0/*resize_gpu<int>*/, 0/*resize_gpu<int2>*/, 0/*resize_gpu<int3>*/, 0/*resize_gpu<int4>*/},
{resize_gpu<float>, 0/*resize_gpu<float2>*/, resize_gpu<float3>, resize_gpu<float4>}
};
callers[src.depth()][src.channels() - 1](src, DevMem2Db(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y,
static_cast<float>(fx), static_cast<float>(fy), dst, interpolation, stream);
}
}
////////////////////////////////////////////////////////////////////////
// copyMakeBorder
@@ -511,175 +349,6 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
}
}
////////////////////////////////////////////////////////////////////////
// warp
namespace
{
typedef NppStatus (*npp_warp_8u_t)(const Npp8u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp8u* pDst,
int dstStep, NppiRect dstRoi, const double coeffs[][3],
int interpolation);
typedef NppStatus (*npp_warp_16u_t)(const Npp16u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp16u* pDst,
int dstStep, NppiRect dstRoi, const double coeffs[][3],
int interpolation);
typedef NppStatus (*npp_warp_32s_t)(const Npp32s* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32s* pDst,
int dstStep, NppiRect dstRoi, const double coeffs[][3],
int interpolation);
typedef NppStatus (*npp_warp_32f_t)(const Npp32f* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32f* pDst,
int dstStep, NppiRect dstRoi, const double coeffs[][3],
int interpolation);
void nppWarpCaller(const GpuMat& src, GpuMat& dst, double coeffs[][3], const Size& dsize, int flags,
npp_warp_8u_t npp_warp_8u[][2], npp_warp_16u_t npp_warp_16u[][2],
npp_warp_32s_t npp_warp_32s[][2], npp_warp_32f_t npp_warp_32f[][2], cudaStream_t stream)
{
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
int interpolation = flags & INTER_MAX;
CV_Assert((src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F) && src.channels() != 2);
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
dst.create(dsize, src.type());
Size wholeSize;
Point ofs;
src.locateROI(wholeSize, ofs);
NppiSize srcsz;
srcsz.height = wholeSize.height;
srcsz.width = wholeSize.width;
NppiRect srcroi;
srcroi.x = ofs.x;
srcroi.y = ofs.y;
srcroi.height = src.rows;
srcroi.width = src.cols;
NppiRect dstroi;
dstroi.x = dstroi.y = 0;
dstroi.height = dst.rows;
dstroi.width = dst.cols;
int warpInd = (flags & WARP_INVERSE_MAP) >> 4;
NppStreamHandler h(stream);
switch (src.depth())
{
case CV_8U:
nppSafeCall( npp_warp_8u[src.channels()][warpInd]((Npp8u*)src.datastart, srcsz, static_cast<int>(src.step), srcroi,
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );
break;
case CV_16U:
nppSafeCall( npp_warp_16u[src.channels()][warpInd]((Npp16u*)src.datastart, srcsz, static_cast<int>(src.step), srcroi,
dst.ptr<Npp16u>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );
break;
case CV_32S:
nppSafeCall( npp_warp_32s[src.channels()][warpInd]((Npp32s*)src.datastart, srcsz, static_cast<int>(src.step), srcroi,
dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );
break;
case CV_32F:
nppSafeCall( npp_warp_32f[src.channels()][warpInd]((Npp32f*)src.datastart, srcsz, static_cast<int>(src.step), srcroi,
dst.ptr<Npp32f>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );
break;
default:
CV_Assert(!"Unsupported source type");
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, Stream& s)
{
static npp_warp_8u_t npp_warpAffine_8u[][2] =
{
{0, 0},
{nppiWarpAffine_8u_C1R, nppiWarpAffineBack_8u_C1R},
{0, 0},
{nppiWarpAffine_8u_C3R, nppiWarpAffineBack_8u_C3R},
{nppiWarpAffine_8u_C4R, nppiWarpAffineBack_8u_C4R}
};
static npp_warp_16u_t npp_warpAffine_16u[][2] =
{
{0, 0},
{nppiWarpAffine_16u_C1R, nppiWarpAffineBack_16u_C1R},
{0, 0},
{nppiWarpAffine_16u_C3R, nppiWarpAffineBack_16u_C3R},
{nppiWarpAffine_16u_C4R, nppiWarpAffineBack_16u_C4R}
};
static npp_warp_32s_t npp_warpAffine_32s[][2] =
{
{0, 0},
{nppiWarpAffine_32s_C1R, nppiWarpAffineBack_32s_C1R},
{0, 0},
{nppiWarpAffine_32s_C3R, nppiWarpAffineBack_32s_C3R},
{nppiWarpAffine_32s_C4R, nppiWarpAffineBack_32s_C4R}
};
static npp_warp_32f_t npp_warpAffine_32f[][2] =
{
{0, 0},
{nppiWarpAffine_32f_C1R, nppiWarpAffineBack_32f_C1R},
{0, 0},
{nppiWarpAffine_32f_C3R, nppiWarpAffineBack_32f_C3R},
{nppiWarpAffine_32f_C4R, nppiWarpAffineBack_32f_C4R}
};
CV_Assert(M.rows == 2 && M.cols == 3);
double coeffs[2][3];
Mat coeffsMat(2, 3, CV_64F, (void*)coeffs);
M.convertTo(coeffsMat, coeffsMat.type());
nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpAffine_8u, npp_warpAffine_16u, npp_warpAffine_32s, npp_warpAffine_32f, StreamAccessor::getStream(s));
}
void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, Stream& s)
{
static npp_warp_8u_t npp_warpPerspective_8u[][2] =
{
{0, 0},
{nppiWarpPerspective_8u_C1R, nppiWarpPerspectiveBack_8u_C1R},
{0, 0},
{nppiWarpPerspective_8u_C3R, nppiWarpPerspectiveBack_8u_C3R},
{nppiWarpPerspective_8u_C4R, nppiWarpPerspectiveBack_8u_C4R}
};
static npp_warp_16u_t npp_warpPerspective_16u[][2] =
{
{0, 0},
{nppiWarpPerspective_16u_C1R, nppiWarpPerspectiveBack_16u_C1R},
{0, 0},
{nppiWarpPerspective_16u_C3R, nppiWarpPerspectiveBack_16u_C3R},
{nppiWarpPerspective_16u_C4R, nppiWarpPerspectiveBack_16u_C4R}
};
static npp_warp_32s_t npp_warpPerspective_32s[][2] =
{
{0, 0},
{nppiWarpPerspective_32s_C1R, nppiWarpPerspectiveBack_32s_C1R},
{0, 0},
{nppiWarpPerspective_32s_C3R, nppiWarpPerspectiveBack_32s_C3R},
{nppiWarpPerspective_32s_C4R, nppiWarpPerspectiveBack_32s_C4R}
};
static npp_warp_32f_t npp_warpPerspective_32f[][2] =
{
{0, 0},
{nppiWarpPerspective_32f_C1R, nppiWarpPerspectiveBack_32f_C1R},
{0, 0},
{nppiWarpPerspective_32f_C3R, nppiWarpPerspectiveBack_32f_C3R},
{nppiWarpPerspective_32f_C4R, nppiWarpPerspectiveBack_32f_C4R}
};
CV_Assert(M.rows == 3 && M.cols == 3);
double coeffs[3][3];
Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);
M.convertTo(coeffsMat, coeffsMat.type());
nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpPerspective_8u, npp_warpPerspective_16u, npp_warpPerspective_32s, npp_warpPerspective_32f, StreamAccessor::getStream(s));
}
//////////////////////////////////////////////////////////////////////////////
// buildWarpPlaneMaps