added gpu version of LUT, integral, boxFilter and cvtColor (RGB <-> YCrCb), based on NPP.
minor refactoring of GPU module and GPU tests, split arithm and imgproc parts.
This commit is contained in:
@@ -52,38 +52,22 @@ void cv::gpu::add(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::subtract(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::multiply(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::divide(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
||||
|
||||
void cv::gpu::transpose(const GpuMat&, GpuMat&) { throw_nogpu(); }
|
||||
|
||||
void cv::gpu::absdiff(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
||||
|
||||
double cv::gpu::threshold(const GpuMat&, GpuMat&, double) { throw_nogpu(); return 0.0; }
|
||||
|
||||
void cv::gpu::compare(const GpuMat&, const GpuMat&, GpuMat&, int) { throw_nogpu(); }
|
||||
|
||||
void cv::gpu::meanStdDev(const GpuMat&, Scalar&, Scalar&) { throw_nogpu(); }
|
||||
|
||||
double cv::gpu::norm(const GpuMat&, int) { throw_nogpu(); return 0.0; }
|
||||
double cv::gpu::norm(const GpuMat&, const GpuMat&, int) { throw_nogpu(); return 0.0; }
|
||||
|
||||
void cv::gpu::flip(const GpuMat&, GpuMat&, int) { throw_nogpu(); }
|
||||
|
||||
void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int) { throw_nogpu(); }
|
||||
|
||||
Scalar cv::gpu::sum(const GpuMat&) { throw_nogpu(); return Scalar(); }
|
||||
|
||||
void cv::gpu::minMax(const GpuMat&, double*, double*) { throw_nogpu(); }
|
||||
|
||||
void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, const Scalar&) { throw_nogpu(); }
|
||||
|
||||
void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); }
|
||||
|
||||
void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); }
|
||||
|
||||
void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int) { throw_nogpu(); }
|
||||
void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst) { throw_nogpu(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// add subtract multiply divide
|
||||
|
||||
namespace
|
||||
{
|
||||
typedef NppStatus (*npp_arithm_8u_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep,
|
||||
@@ -147,6 +131,9 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
||||
nppFuncCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32f_C1R);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// transpose
|
||||
|
||||
void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1);
|
||||
@@ -160,6 +147,9 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
|
||||
nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz) );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// absdiff
|
||||
|
||||
void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
||||
{
|
||||
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||
@@ -186,21 +176,8 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
||||
}
|
||||
}
|
||||
|
||||
double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh)
|
||||
{
|
||||
CV_Assert(src.type() == CV_32FC1)
|
||||
|
||||
dst.create( src.size(), src.type() );
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
nppSafeCall( nppiThreshold_32f_C1R(src.ptr<Npp32f>(), src.step,
|
||||
dst.ptr<Npp32f>(), dst.step, sz, static_cast<Npp32f>(thresh), NPP_CMP_GREATER) );
|
||||
|
||||
return thresh;
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// compare
|
||||
|
||||
namespace cv { namespace gpu { namespace matrix_operations
|
||||
{
|
||||
@@ -250,6 +227,9 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// meanStdDev
|
||||
|
||||
void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1);
|
||||
@@ -261,6 +241,9 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
|
||||
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), src.step, sz, mean.val, stddev.val) );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// norm
|
||||
|
||||
double cv::gpu::norm(const GpuMat& src1, int normType)
|
||||
{
|
||||
return norm(src1, GpuMat(src1.size(), src1.type(), Scalar::all(0.0)), normType);
|
||||
@@ -292,6 +275,9 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
|
||||
return retVal[0];
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// flip
|
||||
|
||||
void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
|
||||
@@ -316,50 +302,8 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode)
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation)
|
||||
{
|
||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS};
|
||||
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
|
||||
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4);
|
||||
|
||||
CV_Assert( src.size().area() > 0 );
|
||||
CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );
|
||||
|
||||
if( dsize == Size() )
|
||||
{
|
||||
dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));
|
||||
}
|
||||
else
|
||||
{
|
||||
fx = (double)dsize.width / src.cols;
|
||||
fy = (double)dsize.height / src.rows;
|
||||
}
|
||||
|
||||
dst.create(dsize, src.type());
|
||||
|
||||
NppiSize srcsz;
|
||||
srcsz.width = src.cols;
|
||||
srcsz.height = src.rows;
|
||||
NppiRect srcrect;
|
||||
srcrect.x = srcrect.y = 0;
|
||||
srcrect.width = src.cols;
|
||||
srcrect.height = src.rows;
|
||||
NppiSize dstsz;
|
||||
dstsz.width = dst.cols;
|
||||
dstsz.height = dst.rows;
|
||||
|
||||
if (src.type() == CV_8UC1)
|
||||
{
|
||||
nppSafeCall( nppiResize_8u_C1R(src.ptr<Npp8u>(), srcsz, src.step, srcrect,
|
||||
dst.ptr<Npp8u>(), dst.step, dstsz, fx, fy, npp_inter[interpolation]) );
|
||||
}
|
||||
else
|
||||
{
|
||||
nppSafeCall( nppiResize_8u_C4R(src.ptr<Npp8u>(), srcsz, src.step, srcrect,
|
||||
dst.ptr<Npp8u>(), dst.step, dstsz, fx, fy, npp_inter[interpolation]) );
|
||||
}
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// sum
|
||||
|
||||
Scalar cv::gpu::sum(const GpuMat& src)
|
||||
{
|
||||
@@ -383,6 +327,9 @@ Scalar cv::gpu::sum(const GpuMat& src)
|
||||
return res;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// minMax
|
||||
|
||||
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1);
|
||||
@@ -402,232 +349,37 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal)
|
||||
*maxVal = max_res;
|
||||
}
|
||||
|
||||
void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, const Scalar& value)
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// LUT
|
||||
|
||||
void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1);
|
||||
|
||||
dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
|
||||
|
||||
NppiSize srcsz;
|
||||
srcsz.width = src.cols;
|
||||
srcsz.height = src.rows;
|
||||
NppiSize dstsz;
|
||||
dstsz.width = dst.cols;
|
||||
dstsz.height = dst.rows;
|
||||
|
||||
switch (src.type())
|
||||
class LevelsInit
|
||||
{
|
||||
case CV_8UC1:
|
||||
{
|
||||
Npp8u nVal = static_cast<Npp8u>(value[0]);
|
||||
nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr<Npp8u>(), src.step, srcsz,
|
||||
dst.ptr<Npp8u>(), dst.step, dstsz, top, left, nVal) );
|
||||
break;
|
||||
}
|
||||
case CV_8UC4:
|
||||
{
|
||||
Npp8u nVal[] = {static_cast<Npp8u>(value[0]), static_cast<Npp8u>(value[1]), static_cast<Npp8u>(value[2]), static_cast<Npp8u>(value[3])};
|
||||
nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr<Npp8u>(), src.step, srcsz,
|
||||
dst.ptr<Npp8u>(), dst.step, dstsz, top, left, nVal) );
|
||||
break;
|
||||
}
|
||||
case CV_32SC1:
|
||||
{
|
||||
Npp32s nVal = static_cast<Npp32s>(value[0]);
|
||||
nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), src.step, srcsz,
|
||||
dst.ptr<Npp32s>(), dst.step, dstsz, top, left, nVal) );
|
||||
break;
|
||||
}
|
||||
default:
|
||||
CV_Assert(!"Unsupported source type");
|
||||
}
|
||||
}
|
||||
public:
|
||||
Npp32s pLevels[256];
|
||||
|
||||
namespace
|
||||
{
|
||||
typedef NppStatus (*npp_warp_8u_t)(const Npp8u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp8u* pDst,
|
||||
int dstStep, NppiRect dstRoi, const double coeffs[][3],
|
||||
int interpolation);
|
||||
typedef NppStatus (*npp_warp_16u_t)(const Npp16u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp16u* pDst,
|
||||
int dstStep, NppiRect dstRoi, const double coeffs[][3],
|
||||
int interpolation);
|
||||
typedef NppStatus (*npp_warp_32s_t)(const Npp32s* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32s* pDst,
|
||||
int dstStep, NppiRect dstRoi, const double coeffs[][3],
|
||||
int interpolation);
|
||||
typedef NppStatus (*npp_warp_32f_t)(const Npp32f* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32f* pDst,
|
||||
int dstStep, NppiRect dstRoi, const double coeffs[][3],
|
||||
int interpolation);
|
||||
|
||||
void nppWarpCaller(const GpuMat& src, GpuMat& dst, double coeffs[][3], const Size& dsize, int flags,
|
||||
npp_warp_8u_t npp_warp_8u[][2], npp_warp_16u_t npp_warp_16u[][2],
|
||||
npp_warp_32s_t npp_warp_32s[][2], npp_warp_32f_t npp_warp_32f[][2])
|
||||
{
|
||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
|
||||
|
||||
int interpolation = flags & INTER_MAX;
|
||||
|
||||
CV_Assert((src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F) && src.channels() != 2);
|
||||
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
|
||||
|
||||
dst.create(dsize, src.type());
|
||||
|
||||
NppiSize srcsz;
|
||||
srcsz.height = src.rows;
|
||||
srcsz.width = src.cols;
|
||||
NppiRect srcroi;
|
||||
srcroi.x = srcroi.y = 0;
|
||||
srcroi.height = src.rows;
|
||||
srcroi.width = src.cols;
|
||||
NppiRect dstroi;
|
||||
dstroi.x = dstroi.y = 0;
|
||||
dstroi.height = dst.rows;
|
||||
dstroi.width = dst.cols;
|
||||
|
||||
int warpInd = (flags & WARP_INVERSE_MAP) >> 4;
|
||||
|
||||
switch (src.depth())
|
||||
{
|
||||
case CV_8U:
|
||||
nppSafeCall( npp_warp_8u[src.channels()][warpInd](src.ptr<Npp8u>(), srcsz, src.step, srcroi,
|
||||
dst.ptr<Npp8u>(), dst.step, dstroi, coeffs, npp_inter[interpolation]) );
|
||||
break;
|
||||
case CV_16U:
|
||||
nppSafeCall( npp_warp_16u[src.channels()][warpInd](src.ptr<Npp16u>(), srcsz, src.step, srcroi,
|
||||
dst.ptr<Npp16u>(), dst.step, dstroi, coeffs, npp_inter[interpolation]) );
|
||||
break;
|
||||
case CV_32S:
|
||||
nppSafeCall( npp_warp_32s[src.channels()][warpInd](src.ptr<Npp32s>(), srcsz, src.step, srcroi,
|
||||
dst.ptr<Npp32s>(), dst.step, dstroi, coeffs, npp_inter[interpolation]) );
|
||||
break;
|
||||
case CV_32F:
|
||||
nppSafeCall( npp_warp_32f[src.channels()][warpInd](src.ptr<Npp32f>(), srcsz, src.step, srcroi,
|
||||
dst.ptr<Npp32f>(), dst.step, dstroi, coeffs, npp_inter[interpolation]) );
|
||||
break;
|
||||
default:
|
||||
CV_Assert(!"Unsupported source type");
|
||||
LevelsInit()
|
||||
{
|
||||
for (int i = 0; i < 256; ++i)
|
||||
pLevels[i] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
static LevelsInit lvls;
|
||||
|
||||
void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags)
|
||||
{
|
||||
static npp_warp_8u_t npp_warpAffine_8u[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpAffine_8u_C1R, nppiWarpAffineBack_8u_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpAffine_8u_C3R, nppiWarpAffineBack_8u_C3R},
|
||||
{nppiWarpAffine_8u_C4R, nppiWarpAffineBack_8u_C4R}
|
||||
};
|
||||
static npp_warp_16u_t npp_warpAffine_16u[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpAffine_16u_C1R, nppiWarpAffineBack_16u_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpAffine_16u_C3R, nppiWarpAffineBack_16u_C3R},
|
||||
{nppiWarpAffine_16u_C4R, nppiWarpAffineBack_16u_C4R}
|
||||
};
|
||||
static npp_warp_32s_t npp_warpAffine_32s[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpAffine_32s_C1R, nppiWarpAffineBack_32s_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpAffine_32s_C3R, nppiWarpAffineBack_32s_C3R},
|
||||
{nppiWarpAffine_32s_C4R, nppiWarpAffineBack_32s_C4R}
|
||||
};
|
||||
static npp_warp_32f_t npp_warpAffine_32f[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpAffine_32f_C1R, nppiWarpAffineBack_32f_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpAffine_32f_C3R, nppiWarpAffineBack_32f_C3R},
|
||||
{nppiWarpAffine_32f_C4R, nppiWarpAffineBack_32f_C4R}
|
||||
};
|
||||
int cn = src.channels();
|
||||
|
||||
CV_Assert(M.rows == 2 && M.cols == 3);
|
||||
CV_Assert(src.type() == CV_8UC1);
|
||||
CV_Assert(lut.depth() == CV_32SC1 && lut.rows * lut.cols == 256 && lut.isContinuous());
|
||||
|
||||
double coeffs[2][3];
|
||||
Mat coeffsMat(2, 3, CV_64F, (void*)coeffs);
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpAffine_8u, npp_warpAffine_16u, npp_warpAffine_32s, npp_warpAffine_32f);
|
||||
}
|
||||
NppiSize sz;
|
||||
sz.height = src.rows;
|
||||
sz.width = src.cols;
|
||||
|
||||
void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags)
|
||||
{
|
||||
static npp_warp_8u_t npp_warpPerspective_8u[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_8u_C1R, nppiWarpPerspectiveBack_8u_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_8u_C3R, nppiWarpPerspectiveBack_8u_C3R},
|
||||
{nppiWarpPerspective_8u_C4R, nppiWarpPerspectiveBack_8u_C4R}
|
||||
};
|
||||
static npp_warp_16u_t npp_warpPerspective_16u[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_16u_C1R, nppiWarpPerspectiveBack_16u_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_16u_C3R, nppiWarpPerspectiveBack_16u_C3R},
|
||||
{nppiWarpPerspective_16u_C4R, nppiWarpPerspectiveBack_16u_C4R}
|
||||
};
|
||||
static npp_warp_32s_t npp_warpPerspective_32s[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_32s_C1R, nppiWarpPerspectiveBack_32s_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_32s_C3R, nppiWarpPerspectiveBack_32s_C3R},
|
||||
{nppiWarpPerspective_32s_C4R, nppiWarpPerspectiveBack_32s_C4R}
|
||||
};
|
||||
static npp_warp_32f_t npp_warpPerspective_32f[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_32f_C1R, nppiWarpPerspectiveBack_32f_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_32f_C3R, nppiWarpPerspectiveBack_32f_C3R},
|
||||
{nppiWarpPerspective_32f_C4R, nppiWarpPerspectiveBack_32f_C4R}
|
||||
};
|
||||
|
||||
CV_Assert(M.rows == 3 && M.cols == 3);
|
||||
|
||||
double coeffs[3][3];
|
||||
Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
|
||||
nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpPerspective_8u, npp_warpPerspective_16u, npp_warpPerspective_32s, npp_warpPerspective_32f);
|
||||
}
|
||||
|
||||
void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation)
|
||||
{
|
||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
|
||||
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
|
||||
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
|
||||
|
||||
dst.create(dsize, src.type());
|
||||
|
||||
NppiSize srcsz;
|
||||
srcsz.height = src.rows;
|
||||
srcsz.width = src.cols;
|
||||
NppiRect srcroi;
|
||||
srcroi.x = srcroi.y = 0;
|
||||
srcroi.height = src.rows;
|
||||
srcroi.width = src.cols;
|
||||
NppiRect dstroi;
|
||||
dstroi.x = dstroi.y = 0;
|
||||
dstroi.height = dst.rows;
|
||||
dstroi.width = dst.cols;
|
||||
|
||||
if (src.type() == CV_8UC1)
|
||||
{
|
||||
nppSafeCall( nppiRotate_8u_C1R(src.ptr<Npp8u>(), srcsz, src.step, srcroi,
|
||||
dst.ptr<Npp8u>(), dst.step, dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
|
||||
}
|
||||
else
|
||||
{
|
||||
nppSafeCall( nppiRotate_8u_C4R(src.ptr<Npp8u>(), srcsz, src.step, srcroi,
|
||||
dst.ptr<Npp8u>(), dst.step, dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
|
||||
}
|
||||
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz,
|
||||
lut.ptr<Npp32s>(), lvls.pLevels, 256) );
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
@@ -46,20 +46,30 @@
|
||||
using namespace cv::gpu;
|
||||
|
||||
#ifndef CV_DESCALE
|
||||
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
|
||||
#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
|
||||
#endif
|
||||
|
||||
namespace imgproc
|
||||
{
|
||||
template<typename _Tp> struct ColorChannel
|
||||
{
|
||||
};
|
||||
template<typename T, int N> struct TypeVec {};
|
||||
template<> struct TypeVec<uchar, 1> { typedef uchar1 vec_t; };
|
||||
template<> struct TypeVec<uchar, 2> { typedef uchar2 vec_t; };
|
||||
template<> struct TypeVec<uchar, 3> { typedef uchar3 vec_t; };
|
||||
template<> struct TypeVec<uchar, 4> { typedef uchar4 vec_t; };
|
||||
template<> struct TypeVec<unsigned short, 1> { typedef ushort1 vec_t; };
|
||||
template<> struct TypeVec<unsigned short, 2> { typedef ushort2 vec_t; };
|
||||
template<> struct TypeVec<unsigned short, 3> { typedef ushort3 vec_t; };
|
||||
template<> struct TypeVec<unsigned short, 4> { typedef ushort4 vec_t; };
|
||||
template<> struct TypeVec<float, 1> { typedef float1 vec_t; };
|
||||
template<> struct TypeVec<float, 2> { typedef float2 vec_t; };
|
||||
template<> struct TypeVec<float, 3> { typedef float3 vec_t; };
|
||||
template<> struct TypeVec<float, 4> { typedef float4 vec_t; };
|
||||
|
||||
template<typename _Tp> struct ColorChannel {};
|
||||
|
||||
template<> struct ColorChannel<uchar>
|
||||
{
|
||||
typedef float worktype_f;
|
||||
typedef uchar3 vec3_t;
|
||||
typedef uchar4 vec4_t;
|
||||
static __device__ unsigned char max() { return UCHAR_MAX; }
|
||||
static __device__ unsigned char half() { return (unsigned char)(max()/2 + 1); }
|
||||
};
|
||||
@@ -67,8 +77,6 @@ namespace imgproc
|
||||
template<> struct ColorChannel<unsigned short>
|
||||
{
|
||||
typedef float worktype_f;
|
||||
typedef ushort3 vec3_t;
|
||||
typedef ushort4 vec4_t;
|
||||
static __device__ unsigned short max() { return USHRT_MAX; }
|
||||
static __device__ unsigned short half() { return (unsigned short)(max()/2 + 1); }
|
||||
};
|
||||
@@ -76,94 +84,114 @@ namespace imgproc
|
||||
template<> struct ColorChannel<float>
|
||||
{
|
||||
typedef float worktype_f;
|
||||
typedef float3 vec3_t;
|
||||
typedef float4 vec4_t;
|
||||
static __device__ float max() { return 1.f; }
|
||||
static __device__ float half() { return 0.5f; }
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
//////////////////////////////////////// SwapChannels /////////////////////////////////////
|
||||
|
||||
namespace imgproc
|
||||
{
|
||||
__constant__ int ccoeffs[4];
|
||||
|
||||
template <int CN, typename T>
|
||||
__global__ void swapChannels(const T* src_, size_t src_step, T* dst_, size_t dst_step, int rows, int cols)
|
||||
{
|
||||
typedef typename TypeVec<T, CN>::vec_t vec_t;
|
||||
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
vec_t src = *(const vec_t*)(src_ + y * src_step + x * CN);
|
||||
vec_t dst;
|
||||
|
||||
const T* src_ptr = (const T*)(&src);
|
||||
T* dst_ptr = (T*)(&dst);
|
||||
|
||||
for (int i = 0; i < CN; ++i)
|
||||
dst_ptr[i] = src_ptr[ccoeffs[i]];
|
||||
|
||||
*(vec_t*)(dst_ + y * dst_step + x * CN) = dst;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace improc
|
||||
{
|
||||
template <typename T>
|
||||
void swapChannels_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, int cn, const int* coeffs, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::ccoeffs, coeffs, cn * sizeof(int)) );
|
||||
|
||||
switch (cn)
|
||||
{
|
||||
case 3:
|
||||
imgproc::swapChannels<3><<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T), src.rows, src.cols);
|
||||
break;
|
||||
case 4:
|
||||
imgproc::swapChannels<4><<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T), src.rows, src.cols);
|
||||
break;
|
||||
default:
|
||||
cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
||||
break;
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
|
||||
void swapChannels_gpu(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream)
|
||||
{
|
||||
swapChannels_caller(src, dst, cn, coeffs, stream);
|
||||
}
|
||||
|
||||
void swapChannels_gpu(const DevMem2D_<unsigned short>& src, const DevMem2D_<unsigned short>& dst, int cn, const int* coeffs, cudaStream_t stream)
|
||||
{
|
||||
swapChannels_caller(src, dst, cn, coeffs, stream);
|
||||
}
|
||||
|
||||
void swapChannels_gpu(const DevMem2Df& src, const DevMem2Df& dst, int cn, const int* coeffs, cudaStream_t stream)
|
||||
{
|
||||
swapChannels_caller(src, dst, cn, coeffs, stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
|
||||
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T>
|
||||
__global__ void RGB2RGB_3_3(const T* src_, size_t src_step, T* dst_, size_t dst_step, int rows, int cols, int bidx)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
const T* src = src_ + y * src_step + x * 3;
|
||||
T* dst = dst_ + y * dst_step + x * 3;
|
||||
|
||||
T t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
|
||||
dst[0] = t0; dst[1] = t1; dst[2] = t2;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void RGB2RGB_4_3(const T* src_, size_t src_step, T* dst_, size_t dst_step, int rows, int cols, int bidx)
|
||||
{
|
||||
typedef typename ColorChannel<T>::vec4_t vec4_t;
|
||||
template <int SRCCN, int DSTCN, typename T>
|
||||
__global__ void RGB2RGB(const T* src_, size_t src_step, T* dst_, size_t dst_step, int rows, int cols, int bidx)
|
||||
{
|
||||
typedef typename TypeVec<T, SRCCN>::vec_t src_t;
|
||||
typedef typename TypeVec<T, DSTCN>::vec_t dst_t;
|
||||
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
vec4_t src = *(vec4_t*)(src_ + y * src_step + (x << 2));
|
||||
T* dst = dst_ + y * dst_step + x * 3;
|
||||
src_t src = *(const src_t*)(src_ + y * src_step + x * SRCCN);
|
||||
dst_t dst;
|
||||
|
||||
T t0 = ((T*)(&src))[bidx], t1 = src.y, t2 = ((T*)(&src))[bidx ^ 2];
|
||||
dst[0] = t0; dst[1] = t1; dst[2] = t2;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void RGB2RGB_3_4(const T* src_, size_t src_step, T* dst_, size_t dst_step, int rows, int cols, int bidx)
|
||||
{
|
||||
typedef typename ColorChannel<T>::vec4_t vec4_t;
|
||||
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
const T* src = src_ + y * src_step + x * 3;
|
||||
|
||||
vec4_t dst;
|
||||
|
||||
dst.x = src[bidx];
|
||||
dst.y = src[1];
|
||||
dst.z = src[bidx ^ 2];
|
||||
dst.w = ColorChannel<T>::max();
|
||||
*(vec4_t*)(dst_ + y * dst_step + (x << 2)) = dst;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void RGB2RGB_4_4(const T* src_, size_t src_step, T* dst_, size_t dst_step, int rows, int cols, int bidx)
|
||||
{
|
||||
typedef typename ColorChannel<T>::vec4_t vec4_t;
|
||||
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
vec4_t src = *(const vec4_t*)(src_ + y * src_step + (x << 2));
|
||||
vec4_t dst;
|
||||
|
||||
dst.x = ((T*)(&src))[bidx];
|
||||
dst.x = ((const T*)(&src))[bidx];
|
||||
dst.y = src.y;
|
||||
dst.z = ((T*)(&src))[bidx ^ 2];
|
||||
dst.w = src.w;
|
||||
|
||||
*(vec4_t*)(dst_ + y * dst_step + (x << 2)) = dst;
|
||||
}
|
||||
dst.z = ((const T*)(&src))[bidx ^ 2];
|
||||
if (DSTCN == 4)
|
||||
((T*)(&dst))[3] = ColorChannel<T>::max();
|
||||
|
||||
*(dst_t*)(dst_ + y * dst_step + x * DSTCN) = dst;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace improc
|
||||
@@ -183,12 +211,15 @@ namespace cv { namespace gpu { namespace improc
|
||||
switch (srccn)
|
||||
{
|
||||
case 3:
|
||||
imgproc::RGB2RGB_3_3<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T),
|
||||
src.rows, src.cols, bidx);
|
||||
{
|
||||
int coeffs[] = {2, 1, 0};
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::ccoeffs, coeffs, 3 * sizeof(int)) );
|
||||
imgproc::swapChannels<3><<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T), src.rows, src.cols);
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
imgproc::RGB2RGB_4_3<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T),
|
||||
src.rows, src.cols, bidx);
|
||||
imgproc::RGB2RGB<4, 3><<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T),
|
||||
src.rows, src.cols, bidx);
|
||||
break;
|
||||
default:
|
||||
cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
||||
@@ -199,13 +230,16 @@ namespace cv { namespace gpu { namespace improc
|
||||
switch (srccn)
|
||||
{
|
||||
case 3:
|
||||
imgproc::RGB2RGB_3_4<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T),
|
||||
src.rows, src.cols, bidx);
|
||||
imgproc::RGB2RGB<3, 4><<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T),
|
||||
src.rows, src.cols, bidx);
|
||||
break;
|
||||
case 4:
|
||||
imgproc::RGB2RGB_4_4<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T),
|
||||
src.rows, src.cols, bidx);
|
||||
{
|
||||
int coeffs[] = {2, 1, 0, 3};
|
||||
cudaSafeCall( cudaMemcpyToSymbol(imgproc::ccoeffs, coeffs, 4 * sizeof(int)) );
|
||||
imgproc::swapChannels<4><<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T), src.rows, src.cols);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
|
||||
break;
|
||||
@@ -319,8 +353,8 @@ namespace imgproc
|
||||
template <typename T>
|
||||
__global__ void Gray2RGB_3(const T* src_, size_t src_step, T* dst_, size_t dst_step, int rows, int cols)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
@@ -335,7 +369,7 @@ namespace imgproc
|
||||
template <typename T>
|
||||
__global__ void Gray2RGB_4(const T* src_, size_t src_step, T* dst_, size_t dst_step, int rows, int cols)
|
||||
{
|
||||
typedef typename ColorChannel<T>::vec4_t vec4_t;
|
||||
typedef typename TypeVec<T, 4>::vec_t vec4_t;
|
||||
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
@@ -50,7 +50,7 @@ using namespace cv::gpu;
|
||||
|
||||
void cv::gpu::erode( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }
|
||||
void cv::gpu::dilate( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }
|
||||
void morphologyEx( const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_nogpu(); }
|
||||
void cv::gpu::morphologyEx( const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_nogpu(); }
|
||||
|
||||
#else
|
||||
|
||||
@@ -132,7 +132,6 @@ void cv::gpu::morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& k
|
||||
temp = dst;
|
||||
dilate( src, temp, kernel, anchor, iterations);
|
||||
erode( temp, temp, kernel, anchor, iterations);
|
||||
dst = temp - src;
|
||||
subtract(temp, src, dst);
|
||||
break;
|
||||
default:
|
||||
|
@@ -55,6 +55,14 @@ void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&) { throw_nog
|
||||
void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, const Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::cvtColor(const GpuMat&, GpuMat&, int, int) { throw_nogpu(); }
|
||||
void cv::gpu::cvtColor(const GpuMat&, GpuMat&, int, int, const Stream&) { throw_nogpu(); }
|
||||
double cv::gpu::threshold(const GpuMat&, GpuMat&, double) { throw_nogpu(); return 0.0; }
|
||||
void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int) { throw_nogpu(); }
|
||||
void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, const Scalar&) { throw_nogpu(); }
|
||||
void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); }
|
||||
void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); }
|
||||
void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int) { throw_nogpu(); }
|
||||
void cv::gpu::integral(GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::boxFilter(const GpuMat&, GpuMat&, Size, Point) { throw_nogpu(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
@@ -73,6 +81,10 @@ namespace cv { namespace gpu
|
||||
void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);
|
||||
void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);
|
||||
|
||||
void swapChannels_gpu(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream);
|
||||
void swapChannels_gpu(const DevMem2D_<ushort>& src, const DevMem2D_<ushort>& dst, int cn, const int* coeffs, cudaStream_t stream);
|
||||
void swapChannels_gpu(const DevMem2Df& src, const DevMem2Df& dst, int cn, const int* coeffs, cudaStream_t stream);
|
||||
|
||||
void RGB2RGB_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);
|
||||
void RGB2RGB_gpu(const DevMem2D_<ushort>& src, int srccn, const DevMem2D_<ushort>& dst, int dstcn, int bidx, cudaStream_t stream);
|
||||
void RGB2RGB_gpu(const DevMem2Df& src, int srccn, const DevMem2Df& dst, int dstcn, int bidx, cudaStream_t stream);
|
||||
@@ -218,6 +230,10 @@ namespace
|
||||
if (dst.data != src.data)
|
||||
out = dst;
|
||||
|
||||
NppiSize nppsz;
|
||||
nppsz.height = src.rows;
|
||||
nppsz.width = src.cols;
|
||||
|
||||
switch (code)
|
||||
{
|
||||
case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR:
|
||||
@@ -305,6 +321,31 @@ namespace
|
||||
// CvtColorLoop(src, dst, Gray2RGB5x5(code == CV_GRAY2BGR565 ? 6 : 5));
|
||||
// break;
|
||||
|
||||
case CV_RGB2YCrCb:
|
||||
CV_Assert(scn == 3 && depth == CV_8U);
|
||||
|
||||
out.create(sz, CV_MAKETYPE(depth, 3));
|
||||
|
||||
nppSafeCall( nppiRGBToYCbCr_8u_C3R(src.ptr<Npp8u>(), src.step, out.ptr<Npp8u>(), out.step, nppsz) );
|
||||
{
|
||||
static int coeffs[] = {0, 2, 1};
|
||||
improc::swapChannels_gpu((DevMem2D)out, (DevMem2D)out, 3, coeffs, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
case CV_YCrCb2RGB:
|
||||
CV_Assert(scn == 3 && depth == CV_8U);
|
||||
|
||||
out.create(sz, CV_MAKETYPE(depth, 3));
|
||||
|
||||
{
|
||||
static int coeffs[] = {0, 2, 1};
|
||||
GpuMat src1(src.size(), src.type());
|
||||
improc::swapChannels_gpu((DevMem2D)src, (DevMem2D)src1, 3, coeffs, 0);
|
||||
nppSafeCall( nppiYCbCrToRGB_8u_C3R(src1.ptr<Npp8u>(), src1.step, out.ptr<Npp8u>(), out.step, nppsz) );
|
||||
}
|
||||
break;
|
||||
|
||||
//case CV_BGR2YCrCb: case CV_RGB2YCrCb:
|
||||
//case CV_BGR2YUV: case CV_RGB2YUV:
|
||||
// {
|
||||
@@ -526,4 +567,366 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, const
|
||||
cvtColor_caller(src, dst, code, dcn, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// threshold
|
||||
|
||||
double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh)
|
||||
{
|
||||
CV_Assert(src.type() == CV_32FC1)
|
||||
|
||||
dst.create( src.size(), src.type() );
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
nppSafeCall( nppiThreshold_32f_C1R(src.ptr<Npp32f>(), src.step,
|
||||
dst.ptr<Npp32f>(), dst.step, sz, static_cast<Npp32f>(thresh), NPP_CMP_GREATER) );
|
||||
|
||||
return thresh;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// resize
|
||||
|
||||
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation)
|
||||
{
|
||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS};
|
||||
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
|
||||
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4);
|
||||
|
||||
CV_Assert( src.size().area() > 0 );
|
||||
CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );
|
||||
|
||||
if( dsize == Size() )
|
||||
{
|
||||
dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));
|
||||
}
|
||||
else
|
||||
{
|
||||
fx = (double)dsize.width / src.cols;
|
||||
fy = (double)dsize.height / src.rows;
|
||||
}
|
||||
|
||||
dst.create(dsize, src.type());
|
||||
|
||||
NppiSize srcsz;
|
||||
srcsz.width = src.cols;
|
||||
srcsz.height = src.rows;
|
||||
NppiRect srcrect;
|
||||
srcrect.x = srcrect.y = 0;
|
||||
srcrect.width = src.cols;
|
||||
srcrect.height = src.rows;
|
||||
NppiSize dstsz;
|
||||
dstsz.width = dst.cols;
|
||||
dstsz.height = dst.rows;
|
||||
|
||||
if (src.type() == CV_8UC1)
|
||||
{
|
||||
nppSafeCall( nppiResize_8u_C1R(src.ptr<Npp8u>(), srcsz, src.step, srcrect,
|
||||
dst.ptr<Npp8u>(), dst.step, dstsz, fx, fy, npp_inter[interpolation]) );
|
||||
}
|
||||
else
|
||||
{
|
||||
nppSafeCall( nppiResize_8u_C4R(src.ptr<Npp8u>(), srcsz, src.step, srcrect,
|
||||
dst.ptr<Npp8u>(), dst.step, dstsz, fx, fy, npp_inter[interpolation]) );
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// copyMakeBorder
|
||||
|
||||
void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, const Scalar& value)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1);
|
||||
|
||||
dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
|
||||
|
||||
NppiSize srcsz;
|
||||
srcsz.width = src.cols;
|
||||
srcsz.height = src.rows;
|
||||
NppiSize dstsz;
|
||||
dstsz.width = dst.cols;
|
||||
dstsz.height = dst.rows;
|
||||
|
||||
switch (src.type())
|
||||
{
|
||||
case CV_8UC1:
|
||||
{
|
||||
Npp8u nVal = static_cast<Npp8u>(value[0]);
|
||||
nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr<Npp8u>(), src.step, srcsz,
|
||||
dst.ptr<Npp8u>(), dst.step, dstsz, top, left, nVal) );
|
||||
break;
|
||||
}
|
||||
case CV_8UC4:
|
||||
{
|
||||
Npp8u nVal[] = {static_cast<Npp8u>(value[0]), static_cast<Npp8u>(value[1]), static_cast<Npp8u>(value[2]), static_cast<Npp8u>(value[3])};
|
||||
nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr<Npp8u>(), src.step, srcsz,
|
||||
dst.ptr<Npp8u>(), dst.step, dstsz, top, left, nVal) );
|
||||
break;
|
||||
}
|
||||
case CV_32SC1:
|
||||
{
|
||||
Npp32s nVal = static_cast<Npp32s>(value[0]);
|
||||
nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), src.step, srcsz,
|
||||
dst.ptr<Npp32s>(), dst.step, dstsz, top, left, nVal) );
|
||||
break;
|
||||
}
|
||||
default:
|
||||
CV_Assert(!"Unsupported source type");
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// warp
|
||||
|
||||
namespace
|
||||
{
|
||||
typedef NppStatus (*npp_warp_8u_t)(const Npp8u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp8u* pDst,
|
||||
int dstStep, NppiRect dstRoi, const double coeffs[][3],
|
||||
int interpolation);
|
||||
typedef NppStatus (*npp_warp_16u_t)(const Npp16u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp16u* pDst,
|
||||
int dstStep, NppiRect dstRoi, const double coeffs[][3],
|
||||
int interpolation);
|
||||
typedef NppStatus (*npp_warp_32s_t)(const Npp32s* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32s* pDst,
|
||||
int dstStep, NppiRect dstRoi, const double coeffs[][3],
|
||||
int interpolation);
|
||||
typedef NppStatus (*npp_warp_32f_t)(const Npp32f* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32f* pDst,
|
||||
int dstStep, NppiRect dstRoi, const double coeffs[][3],
|
||||
int interpolation);
|
||||
|
||||
void nppWarpCaller(const GpuMat& src, GpuMat& dst, double coeffs[][3], const Size& dsize, int flags,
|
||||
npp_warp_8u_t npp_warp_8u[][2], npp_warp_16u_t npp_warp_16u[][2],
|
||||
npp_warp_32s_t npp_warp_32s[][2], npp_warp_32f_t npp_warp_32f[][2])
|
||||
{
|
||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
|
||||
|
||||
int interpolation = flags & INTER_MAX;
|
||||
|
||||
CV_Assert((src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F) && src.channels() != 2);
|
||||
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
|
||||
|
||||
dst.create(dsize, src.type());
|
||||
|
||||
NppiSize srcsz;
|
||||
srcsz.height = src.rows;
|
||||
srcsz.width = src.cols;
|
||||
NppiRect srcroi;
|
||||
srcroi.x = srcroi.y = 0;
|
||||
srcroi.height = src.rows;
|
||||
srcroi.width = src.cols;
|
||||
NppiRect dstroi;
|
||||
dstroi.x = dstroi.y = 0;
|
||||
dstroi.height = dst.rows;
|
||||
dstroi.width = dst.cols;
|
||||
|
||||
int warpInd = (flags & WARP_INVERSE_MAP) >> 4;
|
||||
|
||||
switch (src.depth())
|
||||
{
|
||||
case CV_8U:
|
||||
nppSafeCall( npp_warp_8u[src.channels()][warpInd](src.ptr<Npp8u>(), srcsz, src.step, srcroi,
|
||||
dst.ptr<Npp8u>(), dst.step, dstroi, coeffs, npp_inter[interpolation]) );
|
||||
break;
|
||||
case CV_16U:
|
||||
nppSafeCall( npp_warp_16u[src.channels()][warpInd](src.ptr<Npp16u>(), srcsz, src.step, srcroi,
|
||||
dst.ptr<Npp16u>(), dst.step, dstroi, coeffs, npp_inter[interpolation]) );
|
||||
break;
|
||||
case CV_32S:
|
||||
nppSafeCall( npp_warp_32s[src.channels()][warpInd](src.ptr<Npp32s>(), srcsz, src.step, srcroi,
|
||||
dst.ptr<Npp32s>(), dst.step, dstroi, coeffs, npp_inter[interpolation]) );
|
||||
break;
|
||||
case CV_32F:
|
||||
nppSafeCall( npp_warp_32f[src.channels()][warpInd](src.ptr<Npp32f>(), srcsz, src.step, srcroi,
|
||||
dst.ptr<Npp32f>(), dst.step, dstroi, coeffs, npp_inter[interpolation]) );
|
||||
break;
|
||||
default:
|
||||
CV_Assert(!"Unsupported source type");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags)
|
||||
{
|
||||
static npp_warp_8u_t npp_warpAffine_8u[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpAffine_8u_C1R, nppiWarpAffineBack_8u_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpAffine_8u_C3R, nppiWarpAffineBack_8u_C3R},
|
||||
{nppiWarpAffine_8u_C4R, nppiWarpAffineBack_8u_C4R}
|
||||
};
|
||||
static npp_warp_16u_t npp_warpAffine_16u[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpAffine_16u_C1R, nppiWarpAffineBack_16u_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpAffine_16u_C3R, nppiWarpAffineBack_16u_C3R},
|
||||
{nppiWarpAffine_16u_C4R, nppiWarpAffineBack_16u_C4R}
|
||||
};
|
||||
static npp_warp_32s_t npp_warpAffine_32s[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpAffine_32s_C1R, nppiWarpAffineBack_32s_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpAffine_32s_C3R, nppiWarpAffineBack_32s_C3R},
|
||||
{nppiWarpAffine_32s_C4R, nppiWarpAffineBack_32s_C4R}
|
||||
};
|
||||
static npp_warp_32f_t npp_warpAffine_32f[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpAffine_32f_C1R, nppiWarpAffineBack_32f_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpAffine_32f_C3R, nppiWarpAffineBack_32f_C3R},
|
||||
{nppiWarpAffine_32f_C4R, nppiWarpAffineBack_32f_C4R}
|
||||
};
|
||||
|
||||
CV_Assert(M.rows == 2 && M.cols == 3);
|
||||
|
||||
double coeffs[2][3];
|
||||
Mat coeffsMat(2, 3, CV_64F, (void*)coeffs);
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
|
||||
nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpAffine_8u, npp_warpAffine_16u, npp_warpAffine_32s, npp_warpAffine_32f);
|
||||
}
|
||||
|
||||
void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags)
|
||||
{
|
||||
static npp_warp_8u_t npp_warpPerspective_8u[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_8u_C1R, nppiWarpPerspectiveBack_8u_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_8u_C3R, nppiWarpPerspectiveBack_8u_C3R},
|
||||
{nppiWarpPerspective_8u_C4R, nppiWarpPerspectiveBack_8u_C4R}
|
||||
};
|
||||
static npp_warp_16u_t npp_warpPerspective_16u[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_16u_C1R, nppiWarpPerspectiveBack_16u_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_16u_C3R, nppiWarpPerspectiveBack_16u_C3R},
|
||||
{nppiWarpPerspective_16u_C4R, nppiWarpPerspectiveBack_16u_C4R}
|
||||
};
|
||||
static npp_warp_32s_t npp_warpPerspective_32s[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_32s_C1R, nppiWarpPerspectiveBack_32s_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_32s_C3R, nppiWarpPerspectiveBack_32s_C3R},
|
||||
{nppiWarpPerspective_32s_C4R, nppiWarpPerspectiveBack_32s_C4R}
|
||||
};
|
||||
static npp_warp_32f_t npp_warpPerspective_32f[][2] =
|
||||
{
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_32f_C1R, nppiWarpPerspectiveBack_32f_C1R},
|
||||
{0, 0},
|
||||
{nppiWarpPerspective_32f_C3R, nppiWarpPerspectiveBack_32f_C3R},
|
||||
{nppiWarpPerspective_32f_C4R, nppiWarpPerspectiveBack_32f_C4R}
|
||||
};
|
||||
|
||||
CV_Assert(M.rows == 3 && M.cols == 3);
|
||||
|
||||
double coeffs[3][3];
|
||||
Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
|
||||
nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpPerspective_8u, npp_warpPerspective_16u, npp_warpPerspective_32s, npp_warpPerspective_32f);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// rotate
|
||||
|
||||
void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation)
|
||||
{
|
||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
|
||||
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
|
||||
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
|
||||
|
||||
dst.create(dsize, src.type());
|
||||
|
||||
NppiSize srcsz;
|
||||
srcsz.height = src.rows;
|
||||
srcsz.width = src.cols;
|
||||
NppiRect srcroi;
|
||||
srcroi.x = srcroi.y = 0;
|
||||
srcroi.height = src.rows;
|
||||
srcroi.width = src.cols;
|
||||
NppiRect dstroi;
|
||||
dstroi.x = dstroi.y = 0;
|
||||
dstroi.height = dst.rows;
|
||||
dstroi.width = dst.cols;
|
||||
|
||||
if (src.type() == CV_8UC1)
|
||||
{
|
||||
nppSafeCall( nppiRotate_8u_C1R(src.ptr<Npp8u>(), srcsz, src.step, srcroi,
|
||||
dst.ptr<Npp8u>(), dst.step, dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
|
||||
}
|
||||
else
|
||||
{
|
||||
nppSafeCall( nppiRotate_8u_C4R(src.ptr<Npp8u>(), srcsz, src.step, srcroi,
|
||||
dst.ptr<Npp8u>(), dst.step, dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// integral
|
||||
|
||||
void cv::gpu::integral(GpuMat& src, GpuMat& sum, GpuMat& sqsum)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1);
|
||||
|
||||
int w = src.cols + 1, h = src.rows + 1;
|
||||
|
||||
sum.create(h, w, CV_32S);
|
||||
sqsum.create(h, w, CV_32F);
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
nppSafeCall( nppiSqrIntegral_8u32s32f_C1R(src.ptr<Npp8u>(), src.step, sum.ptr<Npp32s>(),
|
||||
sum.step, sqsum.ptr<Npp32f>(), sqsum.step, sz, 0, 0.0f, h) );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// boxFilter
|
||||
|
||||
void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
|
||||
CV_Assert(ksize.height == 3 || ksize.height == 5 || ksize.height == 7);
|
||||
CV_Assert(ksize.height == ksize.width);
|
||||
|
||||
if (anchor.x == -1)
|
||||
anchor.x = 0;
|
||||
if (anchor.y == -1)
|
||||
anchor.y = 0;
|
||||
|
||||
CV_Assert(anchor.x == 0 && anchor.y == 0);
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
NppiSize srcsz;
|
||||
srcsz.height = src.rows;
|
||||
srcsz.width = src.cols;
|
||||
NppiSize masksz;
|
||||
masksz.height = ksize.height;
|
||||
masksz.width = ksize.width;
|
||||
NppiPoint anc;
|
||||
anc.x = anchor.x;
|
||||
anc.y = anchor.y;
|
||||
|
||||
if (src.type() == CV_8UC1)
|
||||
{
|
||||
nppSafeCall( nppiFilterBox_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, srcsz, masksz, anc) );
|
||||
}
|
||||
else
|
||||
{
|
||||
nppSafeCall( nppiFilterBox_8u_C4R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, srcsz, masksz, anc) );
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
Reference in New Issue
Block a user