refactored and fixed bugs in gpu warp functions (remap, resize, warpAffine, warpPerspective)
wrote more complicated tests for them implemented own version of warpAffine and warpPerspective for different border interpolation types refactored some gpu tests
This commit is contained in:
@@ -47,15 +47,11 @@ using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA)
|
||||
|
||||
void cv::gpu::remap(const GpuMat&, GpuMat&, const GpuMat&, const GpuMat&, int, int, const Scalar&, Stream&){ throw_nogpu(); }
|
||||
void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, int, const Scalar&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::buildWarpPlaneMaps(Size, Rect, const Mat&, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::buildWarpCylindricalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
@@ -105,64 +101,6 @@ void cv::gpu::ImagePyramid::getLayer(GpuMat&, Size, Stream&) const { throw_nogpu
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// remap
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T>
|
||||
void remap_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst,
|
||||
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode, const Scalar& borderValue, Stream& stream)
|
||||
{
|
||||
using namespace ::cv::gpu::device::imgproc;
|
||||
|
||||
typedef void (*caller_t)(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
static const caller_t callers[6][4] =
|
||||
{
|
||||
{remap_gpu<uchar>, 0/*remap_gpu<uchar2>*/, remap_gpu<uchar3>, remap_gpu<uchar4>},
|
||||
{0/*remap_gpu<schar>*/, 0/*remap_gpu<char2>*/, 0/*remap_gpu<char3>*/, 0/*remap_gpu<char4>*/},
|
||||
{remap_gpu<ushort>, 0/*remap_gpu<ushort2>*/, remap_gpu<ushort3>, remap_gpu<ushort4>},
|
||||
{remap_gpu<short>, 0/*remap_gpu<short2>*/, remap_gpu<short3>, remap_gpu<short4>},
|
||||
{0/*remap_gpu<int>*/, 0/*remap_gpu<int2>*/, 0/*remap_gpu<int3>*/, 0/*remap_gpu<int4>*/},
|
||||
{remap_gpu<float>, 0/*remap_gpu<float2>*/, remap_gpu<float3>, remap_gpu<float4>}
|
||||
};
|
||||
|
||||
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
|
||||
CV_Assert(xmap.type() == CV_32F && ymap.type() == CV_32F && xmap.size() == ymap.size());
|
||||
|
||||
caller_t func = callers[src.depth()][src.channels() - 1];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
|
||||
|
||||
CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP);
|
||||
int gpuBorderType;
|
||||
CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));
|
||||
|
||||
dst.create(xmap.size(), src.type());
|
||||
|
||||
Scalar_<float> borderValueFloat;
|
||||
borderValueFloat = borderValue;
|
||||
|
||||
DeviceInfo info;
|
||||
int cc = info.majorVersion() * 10 + info.minorVersion();
|
||||
|
||||
Size wholeSize;
|
||||
Point ofs;
|
||||
src.locateROI(wholeSize, ofs);
|
||||
|
||||
func(src, DevMem2Db(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap,
|
||||
dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(stream), cc);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// meanShiftFiltering_GPU
|
||||
|
||||
@@ -308,106 +246,6 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q,
|
||||
reprojectImageTo3D_callers[disp.type()](disp, xyzw, Q, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// resize
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy,
|
||||
DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
|
||||
{
|
||||
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
|
||||
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
|
||||
CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );
|
||||
|
||||
if( dsize == Size() )
|
||||
{
|
||||
dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));
|
||||
}
|
||||
else
|
||||
{
|
||||
fx = (double)dsize.width / src.cols;
|
||||
fy = (double)dsize.height / src.rows;
|
||||
}
|
||||
|
||||
dst.create(dsize, src.type());
|
||||
|
||||
if (dsize == src.size())
|
||||
{
|
||||
if (s)
|
||||
s.enqueueCopy(src, dst);
|
||||
else
|
||||
src.copyTo(dst);
|
||||
return;
|
||||
}
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
Size wholeSize;
|
||||
Point ofs;
|
||||
src.locateROI(wholeSize, ofs);
|
||||
|
||||
if ((src.type() == CV_8UC1 || src.type() == CV_8UC4) && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR))
|
||||
{
|
||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS};
|
||||
|
||||
NppiSize srcsz;
|
||||
srcsz.width = wholeSize.width;
|
||||
srcsz.height = wholeSize.height;
|
||||
|
||||
NppiRect srcrect;
|
||||
srcrect.x = ofs.x;
|
||||
srcrect.y = ofs.y;
|
||||
srcrect.width = src.cols;
|
||||
srcrect.height = src.rows;
|
||||
|
||||
NppiSize dstsz;
|
||||
dstsz.width = dst.cols;
|
||||
dstsz.height = dst.rows;
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
if (src.type() == CV_8UC1)
|
||||
{
|
||||
nppSafeCall( nppiResize_8u_C1R(src.datastart, srcsz, static_cast<int>(src.step), srcrect,
|
||||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );
|
||||
}
|
||||
else
|
||||
{
|
||||
nppSafeCall( nppiResize_8u_C4R(src.datastart, srcsz, static_cast<int>(src.step), srcrect,
|
||||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
using namespace ::cv::gpu::device::imgproc;
|
||||
|
||||
typedef void (*caller_t)(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[6][4] =
|
||||
{
|
||||
{resize_gpu<uchar>, 0/*resize_gpu<uchar2>*/, resize_gpu<uchar3>, resize_gpu<uchar4>},
|
||||
{0/*resize_gpu<schar>*/, 0/*resize_gpu<char2>*/, 0/*resize_gpu<char3>*/, 0/*resize_gpu<char4>*/},
|
||||
{resize_gpu<ushort>, 0/*resize_gpu<ushort2>*/, resize_gpu<ushort3>, resize_gpu<ushort4>},
|
||||
{resize_gpu<short>, 0/*resize_gpu<short2>*/, resize_gpu<short3>, resize_gpu<short4>},
|
||||
{0/*resize_gpu<int>*/, 0/*resize_gpu<int2>*/, 0/*resize_gpu<int3>*/, 0/*resize_gpu<int4>*/},
|
||||
{resize_gpu<float>, 0/*resize_gpu<float2>*/, resize_gpu<float3>, resize_gpu<float4>}
|
||||
};
|
||||
|
||||
callers[src.depth()][src.channels() - 1](src, DevMem2Db(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y,
|
||||
static_cast<float>(fx), static_cast<float>(fy), dst, interpolation, stream);
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// copyMakeBorder
|
||||
|
||||
@@ -511,175 +349,6 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// warp
|
||||
|
||||
namespace
|
||||
{
|
||||
typedef NppStatus (*npp_warp_8u_t)(const Npp8u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp8u* pDst,
|
||||
int dstStep, NppiRect dstRoi, const double coeffs[][3],
|
||||
int interpolation);
|
||||
typedef NppStatus (*npp_warp_16u_t)(const Npp16u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp16u* pDst,
|
||||
int dstStep, NppiRect dstRoi, const double coeffs[][3],
|
||||
int interpolation);
|
||||
typedef NppStatus (*npp_warp_32s_t)(const Npp32s* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32s* pDst,
|
||||
int dstStep, NppiRect dstRoi, const double coeffs[][3],
|
||||
int interpolation);
|
||||
typedef NppStatus (*npp_warp_32f_t)(const Npp32f* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32f* pDst,
|
||||
int dstStep, NppiRect dstRoi, const double coeffs[][3],
|
||||
int interpolation);
|
||||
|
||||
void nppWarpCaller(const GpuMat& src, GpuMat& dst, double coeffs[][3], const Size& dsize, int flags,
|
||||
npp_warp_8u_t npp_warp_8u[][2], npp_warp_16u_t npp_warp_16u[][2],
|
||||
npp_warp_32s_t npp_warp_32s[][2], npp_warp_32f_t npp_warp_32f[][2], cudaStream_t stream)
|
||||
{
|
||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
|
||||
|
||||
int interpolation = flags & INTER_MAX;
|
||||
|
||||
CV_Assert((src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F) && src.channels() != 2);
|
||||
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
|
||||
|
||||
dst.create(dsize, src.type());
|
||||
|
||||
Size wholeSize;
|
||||
Point ofs;
|
||||
src.locateROI(wholeSize, ofs);
|
||||
|
||||
NppiSize srcsz;
|
||||
srcsz.height = wholeSize.height;
|
||||
srcsz.width = wholeSize.width;
|
||||
|
||||
NppiRect srcroi;
|
||||
srcroi.x = ofs.x;
|
||||
srcroi.y = ofs.y;
|
||||
srcroi.height = src.rows;
|
||||
srcroi.width = src.cols;
|
||||
|
||||
NppiRect dstroi;
|
||||
dstroi.x = dstroi.y = 0;
|
||||
dstroi.height = dst.rows;
|
||||
dstroi.width = dst.cols;
|
||||
|
||||
int warpInd = (flags & WARP_INVERSE_MAP) >> 4;
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
switch (src.depth())
|
||||
{
|
||||
case CV_8U:
|
||||
nppSafeCall( npp_warp_8u[src.channels()][warpInd]((Npp8u*)src.datastart, srcsz, static_cast<int>(src.step), srcroi,
|
||||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );
|
||||
break;
|
||||
case CV_16U:
|
||||
nppSafeCall( npp_warp_16u[src.channels()][warpInd]((Npp16u*)src.datastart, srcsz, static_cast<int>(src.step), srcroi,
|
||||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );
|
||||
break;
|
||||
case CV_32S:
|
||||
nppSafeCall( npp_warp_32s[src.channels()][warpInd]((Npp32s*)src.datastart, srcsz, static_cast<int>(src.step), srcroi,
|
||||
dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );
|
||||
break;
|
||||
case CV_32F:
|
||||
nppSafeCall( npp_warp_32f[src.channels()][warpInd]((Npp32f*)src.datastart, srcsz, static_cast<int>(src.step), srcroi,
|
||||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );
|
||||
break;
|
||||
default:
|
||||
CV_Assert(!"Unsupported source type");
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, Stream& s)
|
||||
{
|
||||
static npp_warp_8u_t npp_warpAffine_8u[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpAffine_8u_C1R, nppiWarpAffineBack_8u_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpAffine_8u_C3R, nppiWarpAffineBack_8u_C3R},
|
||||
{nppiWarpAffine_8u_C4R, nppiWarpAffineBack_8u_C4R}
|
||||
};
|
||||
static npp_warp_16u_t npp_warpAffine_16u[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpAffine_16u_C1R, nppiWarpAffineBack_16u_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpAffine_16u_C3R, nppiWarpAffineBack_16u_C3R},
|
||||
{nppiWarpAffine_16u_C4R, nppiWarpAffineBack_16u_C4R}
|
||||
};
|
||||
static npp_warp_32s_t npp_warpAffine_32s[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpAffine_32s_C1R, nppiWarpAffineBack_32s_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpAffine_32s_C3R, nppiWarpAffineBack_32s_C3R},
|
||||
{nppiWarpAffine_32s_C4R, nppiWarpAffineBack_32s_C4R}
|
||||
};
|
||||
static npp_warp_32f_t npp_warpAffine_32f[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpAffine_32f_C1R, nppiWarpAffineBack_32f_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpAffine_32f_C3R, nppiWarpAffineBack_32f_C3R},
|
||||
{nppiWarpAffine_32f_C4R, nppiWarpAffineBack_32f_C4R}
|
||||
};
|
||||
|
||||
CV_Assert(M.rows == 2 && M.cols == 3);
|
||||
|
||||
double coeffs[2][3];
|
||||
Mat coeffsMat(2, 3, CV_64F, (void*)coeffs);
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
|
||||
nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpAffine_8u, npp_warpAffine_16u, npp_warpAffine_32s, npp_warpAffine_32f, StreamAccessor::getStream(s));
|
||||
}
|
||||
|
||||
void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, Stream& s)
|
||||
{
|
||||
static npp_warp_8u_t npp_warpPerspective_8u[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_8u_C1R, nppiWarpPerspectiveBack_8u_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_8u_C3R, nppiWarpPerspectiveBack_8u_C3R},
|
||||
{nppiWarpPerspective_8u_C4R, nppiWarpPerspectiveBack_8u_C4R}
|
||||
};
|
||||
static npp_warp_16u_t npp_warpPerspective_16u[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_16u_C1R, nppiWarpPerspectiveBack_16u_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_16u_C3R, nppiWarpPerspectiveBack_16u_C3R},
|
||||
{nppiWarpPerspective_16u_C4R, nppiWarpPerspectiveBack_16u_C4R}
|
||||
};
|
||||
static npp_warp_32s_t npp_warpPerspective_32s[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_32s_C1R, nppiWarpPerspectiveBack_32s_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_32s_C3R, nppiWarpPerspectiveBack_32s_C3R},
|
||||
{nppiWarpPerspective_32s_C4R, nppiWarpPerspectiveBack_32s_C4R}
|
||||
};
|
||||
static npp_warp_32f_t npp_warpPerspective_32f[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_32f_C1R, nppiWarpPerspectiveBack_32f_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_32f_C3R, nppiWarpPerspectiveBack_32f_C3R},
|
||||
{nppiWarpPerspective_32f_C4R, nppiWarpPerspectiveBack_32f_C4R}
|
||||
};
|
||||
|
||||
CV_Assert(M.rows == 3 && M.cols == 3);
|
||||
|
||||
double coeffs[3][3];
|
||||
Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
|
||||
nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpPerspective_8u, npp_warpPerspective_16u, npp_warpPerspective_32s, npp_warpPerspective_32f, StreamAccessor::getStream(s));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// buildWarpPlaneMaps
|
||||
|
||||
|
Reference in New Issue
Block a user