added Sobel, GaussianBlur, Canny to gpu module.

minor fix of matrix_operations.cpp.
This commit is contained in:
Vladislav Vinogradov 2010-10-04 11:42:40 +00:00
parent 12656df19a
commit 49fa536c62
6 changed files with 589 additions and 178 deletions

View File

@ -533,9 +533,18 @@ namespace cv
//! applies an advanced morphological operation to the image
CV_EXPORTS void morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor, int iterations);
//! 1D mask Window Sum for 8 bit images
CV_EXPORTS void sumWindowColumn(const GpuMat& src, GpuMat& dst, int ksize, int anchor = -1);
CV_EXPORTS void sumWindowRow(const GpuMat& src, GpuMat& dst, int ksize, int anchor = -1);
//! applies generalized Sobel operator to the image
CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1);
//! smooths the image using Gaussian filter.
CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0);
//! applies Canny edge detector and produces the edge map.
CV_EXPORTS void Canny(const GpuMat& image, GpuMat& edges, double threshold1, double threshold2, int apertureSize = 3);
//////////////////////////////// Image Labeling ////////////////////////////////

View File

@ -54,6 +54,8 @@ void cv::gpu::morphologyEx( const GpuMat&, GpuMat&, int, const Mat&, Point, int)
void cv::gpu::boxFilter(const GpuMat&, GpuMat&, Size, Point) { throw_nogpu(); }
void cv::gpu::sumWindowColumn(const GpuMat&, GpuMat&, int, int) { throw_nogpu(); }
void cv::gpu::sumWindowRow(const GpuMat&, GpuMat&, int, int) { throw_nogpu(); }
void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double) { throw_nogpu(); }
void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, double, double) { throw_nogpu(); }
#else
@ -237,4 +239,186 @@ void cv::gpu::sumWindowRow(const GpuMat& src, GpuMat& dst, int ksize, int anchor
sumWindowCaller(nppiSumWindowRow_8u32f_C1R, src, dst, ksize, anchor);
}
////////////////////////////////////////////////////////////////////////
// Filter Engine
namespace
{
typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI,
const Npp32s * pKernel, Npp32s nMaskSize, Npp32s nAnchor, Npp32s nDivisor);
typedef NppStatus (*nppFilter2D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI,
const Npp32s * pKernel, NppiSize oKernelSize, NppiPoint oAnchor, Npp32s nDivisor);
void applyRowFilter(const GpuMat& src, GpuMat& dst, const GpuMat& rowKernel, Npp32s anchor = -1, Npp32s nDivisor = 1)
{
static const nppFilter1D_t nppFilter1D_callers[] = {nppiFilterRow_8u_C1R, nppiFilterRow_8u_C4R};
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
int kRowSize = rowKernel.cols;
dst.create(src.size(), src.type());
dst = Scalar();
NppiSize oROI;
oROI.width = src.cols - kRowSize + 1;
oROI.height = src.rows;
if (anchor < 0)
anchor = kRowSize >> 1;
GpuMat srcROI = src.colRange(kRowSize-1, oROI.width);
GpuMat dstROI = dst.colRange(kRowSize-1, oROI.width);
nppFilter1D_callers[src.channels() >> 2](srcROI.ptr<Npp8u>(), srcROI.step, dstROI.ptr<Npp8u>(), dstROI.step, oROI,
rowKernel.ptr<Npp32s>(), kRowSize, anchor, nDivisor);
}
void applyColumnFilter(const GpuMat& src, GpuMat& dst, const GpuMat& columnKernel, Npp32s anchor = -1, Npp32s nDivisor = 1)
{
static const nppFilter1D_t nppFilter1D_callers[] = {nppiFilterColumn_8u_C1R, nppiFilterColumn_8u_C4R};
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
int kColSize = columnKernel.cols;
dst.create(src.size(), src.type());
dst = Scalar();
NppiSize oROI;
oROI.width = src.cols;
oROI.height = src.rows - kColSize + 1;
if (anchor < 0)
anchor = kColSize >> 1;
GpuMat srcROI = src.rowRange(kColSize-1, oROI.height);
GpuMat dstROI = dst.rowRange(kColSize-1, oROI.height);
nppFilter1D_callers[src.channels() >> 2](srcROI.ptr<Npp8u>(), srcROI.step, dstROI.ptr<Npp8u>(), dstROI.step, oROI,
columnKernel.ptr<Npp32s>(), kColSize, anchor, nDivisor);
}
inline void applySeparableFilter(const GpuMat& src, GpuMat& dst, const GpuMat& rowKernel, const GpuMat& columnKernel,
const cv::Point& anchor = cv::Point(-1, -1), Npp32s nDivisor = 1)
{
GpuMat dstBuf;
applyRowFilter(src, dstBuf, rowKernel, anchor.x, nDivisor);
applyColumnFilter(dstBuf, dst, columnKernel, anchor.y, nDivisor);
}
void makeNppKernel(Mat kernel, GpuMat& dst)
{
kernel.convertTo(kernel, CV_32S);
kernel = kernel.t();
int ksize = kernel.cols;
for (int i = 0; i < ksize / 2; ++i)
{
std::swap(kernel.at<int>(0, i), kernel.at<int>(0, ksize - 1 - i));
}
dst.upload(kernel);
}
void applyFilter2D(const GpuMat& src, GpuMat& dst, const GpuMat& kernel, cv::Point anchor = cv::Point(-1, -1), Npp32s nDivisor = 1)
{
static const nppFilter2D_t nppFilter2D_callers[] = {nppiFilter_8u_C1R, nppiFilter_8u_C4R};
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
dst.create(src.size(), src.type());
dst = Scalar();
NppiSize oROI;
oROI.width = src.cols - kernel.cols + 1;
oROI.height = src.rows - kernel.rows + 1;
if (anchor.x < 0)
anchor.x = kernel.cols >> 1;
if (anchor.y < 0)
anchor.y = kernel.rows >> 1;
GpuMat srcROI = src(Range(kernel.rows-1, oROI.height), Range(kernel.cols-1, oROI.width));
GpuMat dstROI = dst(Range(kernel.rows-1, oROI.height), Range(kernel.cols-1, oROI.width));
NppiSize oKernelSize;
oKernelSize.height = kernel.rows;
oKernelSize.width = kernel.cols;
NppiPoint oAnchor;
oAnchor.x = anchor.x;
oAnchor.y = anchor.y;
nppFilter2D_callers[src.channels() >> 2](srcROI.ptr<Npp8u>(), srcROI.step, dstROI.ptr<Npp8u>(), dstROI.step, oROI,
kernel.ptr<Npp32s>(), oKernelSize, oAnchor, nDivisor);
}
}
////////////////////////////////////////////////////////////////////////
// Sobel
void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize, double scale)
{
Mat kx, ky;
getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
if (scale != 1)
{
// usually the smoothing part is the slowest to compute,
// so try to scale it instead of the faster differenciating part
if (dx == 0)
kx *= scale;
else
ky *= scale;
}
GpuMat rowKernel; makeNppKernel(kx, rowKernel);
GpuMat columnKernel; makeNppKernel(ky, columnKernel);
applySeparableFilter(src, dst, rowKernel, columnKernel);
}
////////////////////////////////////////////////////////////////////////
// GaussianBlur
void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2)
{
if (ksize.width == 1 && ksize.height == 1)
{
src.copyTo(dst);
return;
}
int depth = src.depth();
if (sigma2 <= 0)
sigma2 = sigma1;
// automatic detection of kernel size from sigma
if (ksize.width <= 0 && sigma1 > 0)
ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1;
if (ksize.height <= 0 && sigma2 > 0)
ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1;
CV_Assert(ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1);
sigma1 = std::max(sigma1, 0.0);
sigma2 = std::max(sigma2, 0.0);
const int scaleFactor = 256;
Mat kx = getGaussianKernel(ksize.width, sigma1, std::max(depth, CV_32F));
kx.convertTo(kx, kx.depth(), scaleFactor);
Mat ky;
if (ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON)
ky = kx;
else
{
ky = getGaussianKernel(ksize.height, sigma2, std::max(depth, CV_32F));
ky.convertTo(ky, ky.depth(), scaleFactor);
}
GpuMat rowKernel; makeNppKernel(kx, rowKernel);
GpuMat columnKernel; makeNppKernel(ky, columnKernel);
applySeparableFilter(src, dst, rowKernel, columnKernel, cv::Point(-1, -1), scaleFactor);
}
#endif

View File

@ -62,6 +62,7 @@ void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_
void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); }
void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int) { throw_nogpu(); }
void cv::gpu::integral(GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int) { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */
@ -986,4 +987,33 @@ void cv::gpu::integral(GpuMat& src, GpuMat& sum, GpuMat& sqsum)
sum.step, sqsum.ptr<Npp32f>(), sqsum.step, sz, 0, 0.0f, h) );
}
////////////////////////////////////////////////////////////////////////
// Canny
void cv::gpu::Canny(const GpuMat& image, GpuMat& edges, double threshold1, double threshold2, int apertureSize)
{
CV_Assert(image.type() == CV_8UC1);
GpuMat srcDx, srcDy;
Sobel(image, srcDx, -1, 1, 0, apertureSize);
Sobel(image, srcDy, -1, 0, 1, apertureSize);
srcDx.convertTo(srcDx, CV_32F);
srcDy.convertTo(srcDy, CV_32F);
edges.create(image.size(), CV_8UC1);
NppiSize sz;
sz.height = image.rows;
sz.width = image.cols;
int bufsz;
nppSafeCall( nppiCannyGetBufferSize(sz, &bufsz) );
GpuMat buf(1, bufsz, CV_8UC1);
nppSafeCall( nppiCanny_32f8u_C1R(srcDx.ptr<Npp32f>(), srcDx.step, srcDy.ptr<Npp32f>(), srcDy.step,
edges.ptr<Npp8u>(), edges.step, sz, (Npp32f)threshold1, (Npp32f)threshold2, buf.ptr<Npp8u>()) );
}
#endif /* !defined (HAVE_CUDA) */

View File

@ -124,6 +124,61 @@ void cv::gpu::GpuMat::copyTo( GpuMat& mat, const GpuMat& mask ) const
}
}
namespace
{
template<int n> struct NPPTypeTraits;
template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };
template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };
template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };
template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; };
template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
template<int SDEPTH, int DDEPTH> struct NppConvertFunc
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI);
};
template<int DDEPTH> struct NppConvertFunc<CV_32F, DDEPTH>
{
typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI, NppRoundMode eRoundMode);
};
template<int SDEPTH, int DDEPTH, typename NppConvertFunc<SDEPTH, DDEPTH>::func_ptr func> struct NppCvt
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
static void cvt(const GpuMat& src, GpuMat& dst)
{
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
nppSafeCall( func(src.ptr<src_t>(), src.step, dst.ptr<dst_t>(), dst.step, sz) );
}
};
template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func>
{
typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
static void cvt(const GpuMat& src, GpuMat& dst)
{
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
nppSafeCall( func(src.ptr<Npp32f>(), src.step, dst.ptr<dst_t>(), dst.step, sz, NPP_RND_NEAR) );
}
};
void convertToKernelCaller(const GpuMat& src, GpuMat& dst)
{
matrix_operations::convert_to(src, src.depth(), dst, dst.depth(), src.channels(), 1.0, 0.0);
}
}
void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double beta ) const
{
bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon();
@ -133,7 +188,7 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
else
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());
int stype = type();
int scn = channels();
int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype);
if( sdepth == ddepth && noScale )
{
@ -152,44 +207,85 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), alpha, beta);
else
{
NppiSize sz;
sz.width = cols;
sz.height = rows;
typedef void (*convert_caller_t)(const GpuMat& src, GpuMat& dst);
static const convert_caller_t convert_callers[8][8][4] =
{
{
{0,0,0,0},
{convertToKernelCaller, convertToKernelCaller, convertToKernelCaller, convertToKernelCaller},
{NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C4R>::cvt},
{NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C4R>::cvt},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{NppCvt<CV_8U, CV_32F, nppiConvert_8u32f_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{0,0,0,0}
},
{
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{0,0,0,0},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{0,0,0,0}
},
{
{NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C4R>::cvt},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{0,0,0,0},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{NppCvt<CV_16U, CV_32S, nppiConvert_16u32s_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{NppCvt<CV_16U, CV_32F, nppiConvert_16u32f_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{0,0,0,0}
},
{
{NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C4R>::cvt},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{0,0,0,0},
{NppCvt<CV_16S, CV_32S, nppiConvert_16s32s_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{NppCvt<CV_16S, CV_32F, nppiConvert_16s32f_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{0,0,0,0}
},
{
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{0,0,0,0},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{0,0,0,0}
},
{
{NppCvt<CV_32F, CV_8U, nppiConvert_32f8u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{NppCvt<CV_32F, CV_16U, nppiConvert_32f16u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{NppCvt<CV_32F, CV_16S, nppiConvert_32f16s_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{0,0,0,0},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{0,0,0,0}
},
{
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
{0,0,0,0},
{0,0,0,0}
},
{
{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}
}
};
if (stype == CV_8UC1 && ddepth == CV_16U)
nppSafeCall( nppiConvert_8u16u_C1R(psrc->ptr<Npp8u>(), psrc->step, dst.ptr<Npp16u>(), dst.step, sz) );
else if (stype == CV_16UC1 && ddepth == CV_8U)
nppSafeCall( nppiConvert_16u8u_C1R(psrc->ptr<Npp16u>(), psrc->step, dst.ptr<Npp8u>(), dst.step, sz) );
else if (stype == CV_8UC4 && ddepth == CV_16U)
nppSafeCall( nppiConvert_8u16u_C4R(psrc->ptr<Npp8u>(), psrc->step, dst.ptr<Npp16u>(), dst.step, sz) );
else if (stype == CV_16UC4 && ddepth == CV_8U)
nppSafeCall( nppiConvert_16u8u_C4R(psrc->ptr<Npp16u>(), psrc->step, dst.ptr<Npp8u>(), dst.step, sz) );
else if (stype == CV_8UC1 && ddepth == CV_16S)
nppSafeCall( nppiConvert_8u16s_C1R(psrc->ptr<Npp8u>(), psrc->step, dst.ptr<Npp16s>(), dst.step, sz) );
else if (stype == CV_16SC1 && ddepth == CV_8U)
nppSafeCall( nppiConvert_16s8u_C1R(psrc->ptr<Npp16s>(), psrc->step, dst.ptr<Npp8u>(), dst.step, sz) );
else if (stype == CV_8UC4 && ddepth == CV_16S)
nppSafeCall( nppiConvert_8u16s_C4R(psrc->ptr<Npp8u>(), psrc->step, dst.ptr<Npp16s>(), dst.step, sz) );
else if (stype == CV_16SC4 && ddepth == CV_8U)
nppSafeCall( nppiConvert_16s8u_C4R(psrc->ptr<Npp16s>(), psrc->step, dst.ptr<Npp8u>(), dst.step, sz) );
else if (stype == CV_16SC1 && ddepth == CV_32F)
nppSafeCall( nppiConvert_16s32f_C1R(psrc->ptr<Npp16s>(), psrc->step, dst.ptr<Npp32f>(), dst.step, sz) );
else if (stype == CV_32FC1 && ddepth == CV_16S)
nppSafeCall( nppiConvert_32f16s_C1R(psrc->ptr<Npp32f>(), psrc->step, dst.ptr<Npp16s>(), dst.step, sz, NPP_RND_NEAR) );
else if (stype == CV_8UC1 && ddepth == CV_32F)
nppSafeCall( nppiConvert_8u32f_C1R(psrc->ptr<Npp8u>(), psrc->step, dst.ptr<Npp32f>(), dst.step, sz) );
else if (stype == CV_32FC1 && ddepth == CV_8U)
nppSafeCall( nppiConvert_32f8u_C1R(psrc->ptr<Npp32f>(), psrc->step, dst.ptr<Npp8u>(), dst.step, sz, NPP_RND_NEAR) );
else if (stype == CV_16UC1 && ddepth == CV_32F)
nppSafeCall( nppiConvert_16u32f_C1R(psrc->ptr<Npp16u>(), psrc->step, dst.ptr<Npp32f>(), dst.step, sz) );
else if (stype == CV_32FC1 && ddepth == CV_16U)
nppSafeCall( nppiConvert_32f16u_C1R(psrc->ptr<Npp32f>(), psrc->step, dst.ptr<Npp16u>(), dst.step, sz, NPP_RND_NEAR) );
else if (stype == CV_16UC1 && ddepth == CV_32S)
nppSafeCall( nppiConvert_16u32s_C1R(psrc->ptr<Npp16u>(), psrc->step, dst.ptr<Npp32s>(), dst.step, sz) );
else if (stype == CV_16SC1 && ddepth == CV_32S)
nppSafeCall( nppiConvert_16s32s_C1R(psrc->ptr<Npp16s>(), psrc->step, dst.ptr<Npp32s>(), dst.step, sz) );
else
matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), 1.0, 0.0);
convert_callers[sdepth][ddepth][scn-1](*psrc, dst);
}
}
@ -199,6 +295,99 @@ GpuMat& GpuMat::operator = (const Scalar& s)
return *this;
}
namespace
{
template<int SDEPTH, int SCN> struct NppSetFunc
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI);
};
template<int SDEPTH> struct NppSetFunc<SDEPTH, 1>
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI);
};
template<int SDEPTH, int SCN, typename NppSetFunc<SDEPTH, SCN>::func_ptr func> struct NppSet
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
static void set(GpuMat& src, const Scalar& s)
{
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
Scalar_<src_t> nppS = s;
nppSafeCall( func(nppS.val, src.ptr<src_t>(), src.step, sz) );
}
};
template<int SDEPTH, typename NppSetFunc<SDEPTH, 1>::func_ptr func> struct NppSet<SDEPTH, 1, func>
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
static void set(GpuMat& src, const Scalar& s)
{
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
Scalar_<src_t> nppS = s;
nppSafeCall( func(nppS[0], src.ptr<src_t>(), src.step, sz) );
}
};
void kernelSet(GpuMat& src, const Scalar& s)
{
matrix_operations::set_to_without_mask(src, src.depth(), s.val, src.channels());
}
template<int SDEPTH, int SCN> struct NppSetMaskFunc
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep);
};
template<int SDEPTH> struct NppSetMaskFunc<SDEPTH, 1>
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep);
};
template<int SDEPTH, int SCN, typename NppSetMaskFunc<SDEPTH, SCN>::func_ptr func> struct NppSetMask
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
static void set(GpuMat& src, const Scalar& s, const GpuMat& mask)
{
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
Scalar_<src_t> nppS = s;
nppSafeCall( func(nppS.val, src.ptr<src_t>(), src.step, sz, mask.ptr<Npp8u>(), mask.step) );
}
};
template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct NppSetMask<SDEPTH, 1, func>
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
static void set(GpuMat& src, const Scalar& s, const GpuMat& mask)
{
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
Scalar_<src_t> nppS = s;
nppSafeCall( func(nppS[0], src.ptr<src_t>(), src.step, sz, mask.ptr<Npp8u>(), mask.step) );
}
};
void kernelSetMask(GpuMat& src, const Scalar& s, const GpuMat& mask)
{
matrix_operations::set_to_with_mask(src, src.depth(), s.val, mask, src.channels());
}
}
GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask)
{
CV_Assert(mask.type() == CV_8UC1);
@ -211,151 +400,35 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask)
if (mask.empty())
{
switch (type())
typedef void (*set_caller_t)(GpuMat& src, const Scalar& s);
static const set_caller_t set_callers[8][4] =
{
case CV_8UC1:
{
Npp8u nVal = (Npp8u)s[0];
nppSafeCall( nppiSet_8u_C1R(nVal, ptr<Npp8u>(), step, sz) );
break;
}
case CV_8UC4:
{
Scalar_<Npp8u> nVal = s;
nppSafeCall( nppiSet_8u_C4R(nVal.val, ptr<Npp8u>(), step, sz) );
break;
}
case CV_16UC1:
{
Npp16u nVal = (Npp16u)s[0];
nppSafeCall( nppiSet_16u_C1R(nVal, ptr<Npp16u>(), step, sz) );
break;
}
/*case CV_16UC2:
{
Scalar_<Npp16u> nVal = s;
nppSafeCall( nppiSet_16u_C2R(nVal.val, ptr<Npp16u>(), step, sz) );
break;
}*/
case CV_16UC4:
{
Scalar_<Npp16u> nVal = s;
nppSafeCall( nppiSet_16u_C4R(nVal.val, ptr<Npp16u>(), step, sz) );
break;
}
case CV_16SC1:
{
Npp16s nVal = (Npp16s)s[0];
nppSafeCall( nppiSet_16s_C1R(nVal, ptr<Npp16s>(), step, sz) );
break;
}
/*case CV_16SC2:
{
Scalar_<Npp16s> nVal = s;
nppSafeCall( nppiSet_16s_C2R(nVal.val, ptr<Npp16s>(), step, sz) );
break;
}*/
case CV_16SC4:
{
Scalar_<Npp16s> nVal = s;
nppSafeCall( nppiSet_16s_C4R(nVal.val, ptr<Npp16s>(), step, sz) );
break;
}
case CV_32SC1:
{
Npp32s nVal = (Npp32s)s[0];
nppSafeCall( nppiSet_32s_C1R(nVal, ptr<Npp32s>(), step, sz) );
break;
}
case CV_32SC4:
{
Scalar_<Npp32s> nVal = s;
nppSafeCall( nppiSet_32s_C4R(nVal.val, ptr<Npp32s>(), step, sz) );
break;
}
case CV_32FC1:
{
Npp32f nVal = (Npp32f)s[0];
nppSafeCall( nppiSet_32f_C1R(nVal, ptr<Npp32f>(), step, sz) );
break;
}
case CV_32FC4:
{
Scalar_<Npp32f> nVal = s;
nppSafeCall( nppiSet_32f_C4R(nVal.val, ptr<Npp32f>(), step, sz) );
break;
}
default:
matrix_operations::set_to_without_mask( *this, depth(), s.val, channels());
}
{NppSet<CV_8U, 1, nppiSet_8u_C1R>::set,kernelSet,kernelSet,NppSet<CV_8U, 4, nppiSet_8u_C4R>::set},
{kernelSet,kernelSet,kernelSet,kernelSet},
{NppSet<CV_16U, 1, nppiSet_16u_C1R>::set,kernelSet,kernelSet,NppSet<CV_16U, 4, nppiSet_16u_C4R>::set},
{NppSet<CV_16S, 1, nppiSet_16s_C1R>::set,kernelSet,kernelSet,NppSet<CV_16S, 4, nppiSet_16s_C4R>::set},
{NppSet<CV_32S, 1, nppiSet_32s_C1R>::set,kernelSet,kernelSet,NppSet<CV_32S, 4, nppiSet_32s_C4R>::set},
{NppSet<CV_32F, 1, nppiSet_32f_C1R>::set,kernelSet,kernelSet,NppSet<CV_32F, 4, nppiSet_32f_C4R>::set},
{kernelSet,kernelSet,kernelSet,kernelSet},
{0,0,0,0}
};
set_callers[depth()][channels()-1](*this, s);
}
else
{
switch (type())
typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, const GpuMat& mask);
static const set_caller_t set_callers[8][4] =
{
case CV_8UC1:
{
Npp8u nVal = (Npp8u)s[0];
nppSafeCall( nppiSet_8u_C1MR(nVal, ptr<Npp8u>(), step, sz, mask.ptr<Npp8u>(), mask.step) );
break;
}
case CV_8UC4:
{
Scalar_<Npp8u> nVal = s;
nppSafeCall( nppiSet_8u_C4MR(nVal.val, ptr<Npp8u>(), step, sz, mask.ptr<Npp8u>(), mask.step) );
break;
}
case CV_16UC1:
{
Npp16u nVal = (Npp16u)s[0];
nppSafeCall( nppiSet_16u_C1MR(nVal, ptr<Npp16u>(), step, sz, mask.ptr<Npp8u>(), mask.step) );
break;
}
case CV_16UC4:
{
Scalar_<Npp16u> nVal = s;
nppSafeCall( nppiSet_16u_C4MR(nVal.val, ptr<Npp16u>(), step, sz, mask.ptr<Npp8u>(), mask.step) );
break;
}
case CV_16SC1:
{
Npp16s nVal = (Npp16s)s[0];
nppSafeCall( nppiSet_16s_C1MR(nVal, ptr<Npp16s>(), step, sz, mask.ptr<Npp8u>(), mask.step) );
break;
}
case CV_16SC4:
{
Scalar_<Npp16s> nVal = s;
nppSafeCall( nppiSet_16s_C4MR(nVal.val, ptr<Npp16s>(), step, sz, mask.ptr<Npp8u>(), mask.step) );
break;
}
case CV_32SC1:
{
Npp32s nVal = (Npp32s)s[0];
nppSafeCall( nppiSet_32s_C1MR(nVal, ptr<Npp32s>(), step, sz, mask.ptr<Npp8u>(), mask.step) );
break;
}
case CV_32SC4:
{
Scalar_<Npp32s> nVal = s;
nppSafeCall( nppiSet_32s_C4MR(nVal.val, ptr<Npp32s>(), step, sz, mask.ptr<Npp8u>(), mask.step) );
break;
}
case CV_32FC1:
{
Npp32f nVal = (Npp32f)s[0];
nppSafeCall( nppiSet_32f_C1MR(nVal, ptr<Npp32f>(), step, sz, mask.ptr<Npp8u>(), mask.step) );
break;
}
case CV_32FC4:
{
Scalar_<Npp32f> nVal = s;
nppSafeCall( nppiSet_32f_C4MR(nVal.val, ptr<Npp32f>(), step, sz, mask.ptr<Npp8u>(), mask.step) );
break;
}
default:
matrix_operations::set_to_with_mask( *this, depth(), s.val, mask, channels());
}
{NppSetMask<CV_8U, 1, nppiSet_8u_C1MR>::set,kernelSetMask,kernelSetMask,NppSetMask<CV_8U, 4, nppiSet_8u_C4MR>::set},
{kernelSetMask,kernelSetMask,kernelSetMask,kernelSetMask},
{NppSetMask<CV_16U, 1, nppiSet_16u_C1MR>::set,kernelSetMask,kernelSetMask,NppSetMask<CV_16U, 4, nppiSet_16u_C4MR>::set},
{NppSetMask<CV_16S, 1, nppiSet_16s_C1MR>::set,kernelSetMask,kernelSetMask,NppSetMask<CV_16S, 4, nppiSet_16s_C4MR>::set},
{NppSetMask<CV_32S, 1, nppiSet_32s_C1MR>::set,kernelSetMask,kernelSetMask,NppSetMask<CV_32S, 4, nppiSet_32s_C4MR>::set},
{NppSetMask<CV_32F, 1, nppiSet_32f_C1MR>::set,kernelSetMask,kernelSetMask,NppSetMask<CV_32F, 4, nppiSet_32f_C4MR>::set},
{kernelSetMask,kernelSetMask,kernelSetMask,kernelSetMask},
{0,0,0,0}
};
set_callers[depth()][channels()-1](*this, s, mask);
}
return *this;

View File

@ -61,6 +61,9 @@ const char* blacklist[] =
//"GPU-NppImageLog", // different precision
//"GPU-NppImageMagnitude", // different precision
//"GPU-NppImageSumWindow", // different border interpolation
//"GPU-NppImageSobel", // ???
//"GPU-NppImageGaussianBlur", // different border interpolation
"GPU-NppImageCanny", // NPP_TEXTURE_BIND_ERROR
0
};

View File

@ -492,6 +492,115 @@ struct CV_GpuNppImageSumWindowTest : public CV_GpuImageProcTest
}
};
////////////////////////////////////////////////////////////////////////////////
// Sobel
struct CV_GpuNppImageSobelTest : public CV_GpuImageProcTest
{
CV_GpuNppImageSobelTest() : CV_GpuImageProcTest( "GPU-NppImageSobel", "Sobel" ) {}
int test(const Mat& img)
{
if (img.type() != CV_8UC1 && img.type() != CV_8UC4)
{
ts->printf(CvTS::LOG, "\nUnsupported type\n");
return CvTS::OK;
}
int ksizes[] = {3, 5, 7};
int ksizes_num = sizeof(ksizes) / sizeof(int);
int dx = 1, dy = 0;
int test_res = CvTS::OK;
for (int i = 0; i < ksizes_num; ++i)
{
ts->printf(CvTS::LOG, "\nksize = %d\n", ksizes[i]);
Mat cpudst;
cv::Sobel(img, cpudst, -1, dx, dy, ksizes[i]);
GpuMat gpu1(img);
GpuMat gpudst;
cv::gpu::Sobel(gpu1, gpudst, -1, dx, dy, ksizes[i]);
if (CheckNorm(cpudst, gpudst) != CvTS::OK)
test_res = CvTS::FAIL_GENERIC;
}
return test_res;
}
};
////////////////////////////////////////////////////////////////////////////////
// GaussianBlur
struct CV_GpuNppImageGaussianBlurTest : public CV_GpuImageProcTest
{
CV_GpuNppImageGaussianBlurTest() : CV_GpuImageProcTest( "GPU-NppImageGaussianBlur", "GaussianBlur" ) {}
int test(const Mat& img)
{
if (img.type() != CV_8UC1 && img.type() != CV_8UC4)
{
ts->printf(CvTS::LOG, "\nUnsupported type\n");
return CvTS::OK;
}
int ksizes[] = {3, 5, 7};
int ksizes_num = sizeof(ksizes) / sizeof(int);
int test_res = CvTS::OK;
const double sigma1 = 3.0;
for (int i = 0; i < ksizes_num; ++i)
{
for (int j = 0; j < ksizes_num; ++j)
{
ts->printf(CvTS::LOG, "\nksize = (%dx%d)\n", ksizes[i], ksizes[j]);
Mat cpudst;
cv::GaussianBlur(img, cpudst, cv::Size(ksizes[i], ksizes[j]), sigma1);
GpuMat gpu1(img);
GpuMat gpudst;
cv::gpu::GaussianBlur(gpu1, gpudst, cv::Size(ksizes[i], ksizes[j]), sigma1);
if (CheckNorm(cpudst, gpudst) != CvTS::OK)
test_res = CvTS::FAIL_GENERIC;
}
}
return test_res;
}
};
////////////////////////////////////////////////////////////////////////////////
// Canny
struct CV_GpuNppImageCannyTest : public CV_GpuImageProcTest
{
CV_GpuNppImageCannyTest() : CV_GpuImageProcTest( "GPU-NppImageCanny", "Canny" ) {}
int test(const Mat& img)
{
if (img.type() != CV_8UC1)
{
ts->printf(CvTS::LOG, "\nUnsupported type\n");
return CvTS::OK;
}
const double threshold1 = 1.0, threshold2 = 10.0;
Mat cpudst;
cv::Canny(img, cpudst, threshold1, threshold2);
GpuMat gpu1(img);
GpuMat gpudst;
cv::gpu::Canny(gpu1, gpudst, threshold1, threshold2);
return CheckNorm(cpudst, gpudst);
}
};
////////////////////////////////////////////////////////////////////////////////
// cvtColor
class CV_GpuCvtColorTest : public CvTest
@ -598,4 +707,7 @@ CV_GpuNppImageWarpPerspectiveTest CV_GpuNppImageWarpPerspective_test;
CV_GpuNppImageIntegralTest CV_GpuNppImageIntegral_test;
CV_GpuNppImageBlurTest CV_GpuNppImageBlur_test;
CV_GpuNppImageSumWindowTest CV_GpuNppImageSumWindow_test;
CV_GpuNppImageSobelTest CV_GpuNppImageSobel_test;
CV_GpuNppImageGaussianBlurTest CV_GpuNppImageGaussianBlur_test;
CV_GpuNppImageCannyTest CV_GpuNppImageCanny_test;
CV_GpuCvtColorTest CV_GpuCvtColor_test;