fix gpu module compilation under linux

This commit is contained in:
Vladislav Vinogradov
2010-11-08 09:55:10 +00:00
parent 4cdcf37139
commit 863d61e9eb
4 changed files with 425 additions and 252 deletions

View File

@@ -84,162 +84,230 @@ void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool,
#else /* !defined (HAVE_CUDA) */
#define NPP_VERSION (10 * NPP_VERSION_MAJOR + NPP_VERSION_MINOR)
#if (defined(_WIN32) || defined(_WIN64)) && (NPP_VERSION >= 32)
# define NPP_HAVE_COMPLEX_TYPE
#endif
////////////////////////////////////////////////////////////////////////
// add subtract multiply divide
namespace
{
typedef NppStatus (*npp_arithm_8u_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep,
NppiSize oSizeROI, int nScaleFactor);
typedef NppStatus (*npp_arithm_32s_t)(const Npp32s* pSrc1, int nSrc1Step, const Npp32s* pSrc2, int nSrc2Step, Npp32s* pDst,
int nDstStep, NppiSize oSizeROI);
typedef NppStatus (*npp_arithm_32f_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst,
int nDstStep, NppiSize oSizeROI);
typedef NppStatus (*npp_arithm_8u_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep,
NppiSize oSizeROI, int nScaleFactor);
typedef NppStatus (*npp_arithm_32s_t)(const Npp32s* pSrc1, int nSrc1Step, const Npp32s* pSrc2, int nSrc2Step, Npp32s* pDst,
int nDstStep, NppiSize oSizeROI);
typedef NppStatus (*npp_arithm_32f_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst,
int nDstStep, NppiSize oSizeROI);
void nppArithmCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst,
npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4,
void nppArithmCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst,
npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4,
npp_arithm_32s_t npp_func_32sc1, npp_arithm_32f_t npp_func_32fc1)
{
{
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
#if NPP_VERSION >= 32
CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1);
#else
CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32FC1);
#endif
dst.create( src1.size(), src1.type() );
NppiSize sz;
sz.width = src1.cols;
sz.height = src1.rows;
NppiSize sz;
sz.width = src1.cols;
sz.height = src1.rows;
switch (src1.type())
{
case CV_8UC1:
nppSafeCall( npp_func_8uc1(src1.ptr<Npp8u>(), src1.step,
src2.ptr<Npp8u>(), src2.step,
dst.ptr<Npp8u>(), dst.step, sz, 0) );
nppSafeCall( npp_func_8uc1(src1.ptr<Npp8u>(), src1.step,
src2.ptr<Npp8u>(), src2.step,
dst.ptr<Npp8u>(), dst.step, sz, 0) );
break;
case CV_8UC4:
nppSafeCall( npp_func_8uc4(src1.ptr<Npp8u>(), src1.step,
src2.ptr<Npp8u>(), src2.step,
dst.ptr<Npp8u>(), dst.step, sz, 0) );
nppSafeCall( npp_func_8uc4(src1.ptr<Npp8u>(), src1.step,
src2.ptr<Npp8u>(), src2.step,
dst.ptr<Npp8u>(), dst.step, sz, 0) );
break;
#if NPP_VERSION >= 32
case CV_32SC1:
nppSafeCall( npp_func_32sc1(src1.ptr<Npp32s>(), src1.step,
src2.ptr<Npp32s>(), src2.step,
dst.ptr<Npp32s>(), dst.step, sz) );
nppSafeCall( npp_func_32sc1(src1.ptr<Npp32s>(), src1.step,
src2.ptr<Npp32s>(), src2.step,
dst.ptr<Npp32s>(), dst.step, sz) );
break;
#endif
case CV_32FC1:
nppSafeCall( npp_func_32fc1(src1.ptr<Npp32f>(), src1.step,
src2.ptr<Npp32f>(), src2.step,
dst.ptr<Npp32f>(), dst.step, sz) );
nppSafeCall( npp_func_32fc1(src1.ptr<Npp32f>(), src1.step,
src2.ptr<Npp32f>(), src2.step,
dst.ptr<Npp32f>(), dst.step, sz) );
break;
default:
CV_Assert(!"Unsupported source type");
}
}
}
template<int SCN> struct NppArithmScalarFunc;
template<> struct NppArithmScalarFunc<1>
{
typedef NppStatus (*func_ptr)(const Npp32f *pSrc, int nSrcStep, Npp32f nValue, Npp32f *pDst,
typedef NppStatus (*func_ptr)(const Npp32f *pSrc, int nSrcStep, Npp32f nValue, Npp32f *pDst,
int nDstStep, NppiSize oSizeROI);
};
#ifdef NPP_HAVE_COMPLEX_TYPE
template<> struct NppArithmScalarFunc<2>
{
{
typedef NppStatus (*func_ptr)(const Npp32fc *pSrc, int nSrcStep, Npp32fc nValue, Npp32fc *pDst,
int nDstStep, NppiSize oSizeROI);
};
#endif
template<int SCN, typename NppArithmScalarFunc<SCN>::func_ptr func> struct NppArithmScalar;
template<typename NppArithmScalarFunc<1>::func_ptr func> struct NppArithmScalar<1, func>
{
static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst)
{
{
dst.create(src.size(), src.type());
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
nppSafeCall( func(src.ptr<Npp32f>(), src.step, (Npp32f)sc[0], dst.ptr<Npp32f>(), dst.step, sz) );
}
nppSafeCall( func(src.ptr<Npp32f>(), src.step, (Npp32f)sc[0], dst.ptr<Npp32f>(), dst.step, sz) );
}
};
#ifdef NPP_HAVE_COMPLEX_TYPE
template<typename NppArithmScalarFunc<2>::func_ptr func> struct NppArithmScalar<2, func>
{
static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst)
{
{
dst.create(src.size(), src.type());
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
Npp32fc nValue;
nValue.re = (Npp32f)sc[0];
nValue.im = (Npp32f)sc[1];
nppSafeCall( func(src.ptr<Npp32fc>(), src.step, nValue, dst.ptr<Npp32fc>(), dst.step, sz) );
}
nppSafeCall( func(src.ptr<Npp32fc>(), src.step, nValue, dst.ptr<Npp32fc>(), dst.step, sz) );
}
};
#endif
}
void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
{
#if NPP_VERSION >= 32
nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R);
#else
nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, 0, nppiAdd_32f_C1R);
#endif
}
void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
{
nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R);
#if NPP_VERSION >= 32
nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R);
#else
nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, 0, nppiSub_32f_C1R);
#endif
}
void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
{
nppArithmCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32s_C1R, nppiMul_32f_C1R);
#if NPP_VERSION >= 32
nppArithmCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32s_C1R, nppiMul_32f_C1R);
#else
nppArithmCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, 0, nppiMul_32f_C1R);
#endif
}
void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
{
nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32s_C1R, nppiDiv_32f_C1R);
#if NPP_VERSION >= 32
nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32s_C1R, nppiDiv_32f_C1R);
#else
nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, 0, nppiDiv_32f_C1R);
#endif
}
void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst)
{
#ifdef NPP_HAVE_COMPLEX_TYPE
typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst);
static const caller_t callers[] = {NppArithmScalar<1, nppiAddC_32f_C1R>::calc, NppArithmScalar<2, nppiAddC_32fc_C1R>::calc};
static const caller_t callers[] = {0, NppArithmScalar<1, nppiAddC_32f_C1R>::calc, NppArithmScalar<2, nppiAddC_32fc_C1R>::calc};
CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
callers[src.channels()](src, sc, dst);
#else
# if NPP_VERSION >= 32
CV_Assert(src.type() == CV_32FC1);
NppArithmScalar<1, nppiAddC_32f_C1R>::calc(src, sc, dst);
# else
CV_Assert(!"This function doesn't supported");
# endif
#endif
}
void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst)
{
#ifdef NPP_HAVE_COMPLEX_TYPE
typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst);
static const caller_t callers[] = {NppArithmScalar<1, nppiSubC_32f_C1R>::calc, NppArithmScalar<2, nppiSubC_32fc_C1R>::calc};
static const caller_t callers[] = {0, NppArithmScalar<1, nppiSubC_32f_C1R>::calc, NppArithmScalar<2, nppiSubC_32fc_C1R>::calc};
CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
callers[src.channels()](src, sc, dst);
#else
# if NPP_VERSION >= 32
CV_Assert(src.type() == CV_32FC1);
NppArithmScalar<1, nppiSubC_32f_C1R>::calc(src, sc, dst);
# else
CV_Assert(!"This function doesn't supported");
# endif
#endif
}
void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst)
{
#ifdef NPP_HAVE_COMPLEX_TYPE
typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst);
static const caller_t callers[] = {NppArithmScalar<1, nppiMulC_32f_C1R>::calc, NppArithmScalar<2, nppiMulC_32fc_C1R>::calc};
static const caller_t callers[] = {0, NppArithmScalar<1, nppiMulC_32f_C1R>::calc, NppArithmScalar<2, nppiMulC_32fc_C1R>::calc};
CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
callers[src.channels()](src, sc, dst);
#else
# if NPP_VERSION >= 32
CV_Assert(src.type() == CV_32FC1);
NppArithmScalar<1, nppiMulC_32f_C1R>::calc(src, sc, dst);
# else
CV_Assert(!"This function doesn't supported");
# endif
#endif
}
void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst)
{
#ifdef NPP_HAVE_COMPLEX_TYPE
typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst);
static const caller_t callers[] = {NppArithmScalar<1, nppiDivC_32f_C1R>::calc, NppArithmScalar<2, nppiDivC_32fc_C1R>::calc};
static const caller_t callers[] = {0, NppArithmScalar<1, nppiDivC_32f_C1R>::calc, NppArithmScalar<2, nppiDivC_32fc_C1R>::calc};
CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
callers[src.channels()](src, sc, dst);
#else
# if NPP_VERSION >= 32
CV_Assert(src.type() == CV_32FC1);
NppArithmScalar<1, nppiDivC_32f_C1R>::calc(src, sc, dst);
# else
CV_Assert(!"This function doesn't supported");
# endif
#endif
}
////////////////////////////////////////////////////////////////////////
@@ -263,9 +331,13 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
{
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1);
#if NPP_VERSION >= 32
CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1);
#else
CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32FC1);
#endif
dst.create( src1.size(), src1.type() );
@@ -276,20 +348,22 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
switch (src1.type())
{
case CV_8UC1:
nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), src1.step,
src2.ptr<Npp8u>(), src2.step,
nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), src1.step,
src2.ptr<Npp8u>(), src2.step,
dst.ptr<Npp8u>(), dst.step, sz) );
break;
case CV_8UC4:
nppSafeCall( nppiAbsDiff_8u_C4R(src1.ptr<Npp8u>(), src1.step,
src2.ptr<Npp8u>(), src2.step,
nppSafeCall( nppiAbsDiff_8u_C4R(src1.ptr<Npp8u>(), src1.step,
src2.ptr<Npp8u>(), src2.step,
dst.ptr<Npp8u>(), dst.step, sz) );
break;
#if NPP_VERSION >= 32
case CV_32SC1:
nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr<Npp32s>(), src1.step,
src2.ptr<Npp32s>(), src2.step,
dst.ptr<Npp32s>(), dst.step, sz) );
break;
#endif
case CV_32FC1:
nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), src1.step,
src2.ptr<Npp32f>(), src2.step,
@@ -302,7 +376,8 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst)
{
CV_Assert(src.type() == CV_32FC1);
#if NPP_VERSION >= 32
CV_Assert(src.type() == CV_32FC1);
dst.create( src.size(), src.type() );
@@ -311,6 +386,9 @@ void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst)
sz.height = src.rows;
nppSafeCall( nppiAbsDiffC_32f_C1R(src.ptr<Npp32f>(), src.step, dst.ptr<Npp32f>(), dst.step, sz, (Npp32f)s[0]) );
#else
CV_Assert(!"This function doesn't supported");
#endif
}
////////////////////////////////////////////////////////////////////////
@@ -322,7 +400,7 @@ namespace cv { namespace gpu { namespace mathfunc
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst);
}}}
void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop)
void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop)
{
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
@@ -340,8 +418,8 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
{
if (cmpop != CMP_NE)
{
nppSafeCall( nppiCompare_8u_C4R(src1.ptr<Npp8u>(), src1.step,
src2.ptr<Npp8u>(), src2.step,
nppSafeCall( nppiCompare_8u_C4R(src1.ptr<Npp8u>(), src1.step,
src2.ptr<Npp8u>(), src2.step,
dst.ptr<Npp8u>(), dst.step, sz, nppCmpOp[cmpop]) );
}
else
@@ -367,7 +445,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
////////////////////////////////////////////////////////////////////////
// meanStdDev
void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
{
CV_Assert(src.type() == CV_8UC1);
@@ -381,7 +459,7 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
////////////////////////////////////////////////////////////////////////
// norm
double cv::gpu::norm(const GpuMat& src1, int normType)
double cv::gpu::norm(const GpuMat& src1, int normType)
{
return norm(src1, GpuMat(src1.size(), src1.type(), Scalar::all(0.0)), normType);
}
@@ -393,7 +471,7 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
CV_Assert(src1.type() == CV_8UC1);
CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);
typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2,
typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2,
NppiSize oSizeROI, Npp64f* pRetVal);
static const npp_norm_diff_func_t npp_norm_diff_func[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
@@ -405,8 +483,8 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
int funcIdx = normType >> 1;
double retVal;
nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), src1.step,
src2.ptr<Npp8u>(), src2.step,
nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), src1.step,
src2.ptr<Npp8u>(), src2.step,
sz, &retVal) );
return retVal;
@@ -427,14 +505,14 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode)
if (src.type() == CV_8UC1)
{
nppSafeCall( nppiMirror_8u_C1R(src.ptr<Npp8u>(), src.step,
dst.ptr<Npp8u>(), dst.step, sz,
nppSafeCall( nppiMirror_8u_C1R(src.ptr<Npp8u>(), src.step,
dst.ptr<Npp8u>(), dst.step, sz,
(flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
}
else
{
nppSafeCall( nppiMirror_8u_C4R(src.ptr<Npp8u>(), src.step,
dst.ptr<Npp8u>(), dst.step, sz,
nppSafeCall( nppiMirror_8u_C4R(src.ptr<Npp8u>(), src.step,
dst.ptr<Npp8u>(), dst.step, sz,
(flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
}
}
@@ -444,33 +522,40 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode)
Scalar cv::gpu::sum(const GpuMat& src)
{
CV_Assert(!"disabled until fix crash");
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
Scalar res;
#if NPP_VERSION >= 32
CV_Assert(!"disabled until fix crash");
int bufsz;
if (src.type() == CV_8UC1)
{
{
nppiReductionGetBufferHostSize_8u_C1R(sz, &bufsz);
GpuMat buf(1, bufsz, CV_32S);
Scalar res;
nppSafeCall( nppiSum_8u_C1R(src.ptr<Npp8u>(), src.step, sz, buf.ptr<Npp32s>(), res.val) );
return res;
}
else
{
{
nppiReductionGetBufferHostSize_8u_C4R(sz, &bufsz);
GpuMat buf(1, bufsz, CV_32S);
Scalar res;
nppSafeCall( nppiSum_8u_C4R(src.ptr<Npp8u>(), src.step, sz, buf.ptr<Npp32s>(), res.val) );
return res;
}
#else
if (src.type() == CV_8UC1)
nppSafeCall( nppiSum_8u_C1R(src.ptr<Npp8u>(), src.step, sz, res.val) );
else
nppSafeCall( nppiSum_8u_C4R(src.ptr<Npp8u>(), src.step, sz, res.val) );
#endif
return res;
}
////////////////////////////////////////////////////////////////////////
@@ -501,22 +586,30 @@ namespace
sz.width = src.cols;
sz.height = src.rows;
Npp8u* cuMin = nppsMalloc_8u(4);
Npp8u* cuMax = nppsMalloc_8u(4);
Npp8u* cuMem;
nppSafeCall( nppiMinMax_8u_C4R(src.ptr<Npp8u>(), src.step, sz, cuMin, cuMax) );
#if NPP_VERSION >= 32
cuMem = nppsMalloc_8u(8);
#else
cudaSafeCall( cudaMalloc((void**)&cuMem, 8 * sizeof(Npp8u)) );
#endif
nppSafeCall( nppiMinMax_8u_C4R(src.ptr<Npp8u>(), src.step, sz, cuMem, cuMem + 4) );
if (minVal)
cudaMemcpy(minVal, cuMin, 4 * sizeof(Npp8u), cudaMemcpyDeviceToHost);
cudaMemcpy(minVal, cuMem, 4 * sizeof(Npp8u), cudaMemcpyDeviceToHost);
if (maxVal)
cudaMemcpy(maxVal, cuMax, 4 * sizeof(Npp8u), cudaMemcpyDeviceToHost);
cudaMemcpy(maxVal, cuMem + 4, 4 * sizeof(Npp8u), cudaMemcpyDeviceToHost);
nppsFree(cuMin);
nppsFree(cuMax);
#if NPP_VERSION >= 32
nppsFree(cuMem);
#else
cudaSafeCall( cudaFree(cuMem) );
#endif
}
}
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal)
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal)
{
typedef void (*minMax_t)(const GpuMat& src, double* minVal, double* maxVal);
static const minMax_t minMax_callers[] = {0, minMax_c1, 0, 0, minMax_c4};
@@ -559,13 +652,13 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
NppiSize sz;
sz.height = src.rows;
sz.width = src.cols;
Mat nppLut;
lut.convertTo(nppLut, CV_32S);
if (src.type() == CV_8UC1)
{
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz,
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz,
nppLut.ptr<Npp32s>(), lvls.pLevels, 256) );
}
else
@@ -578,10 +671,10 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
{
cv::split(nppLut, nppLut3);
pValues3[0] = nppLut3[0].ptr<Npp32s>();
pValues3[1] = nppLut3[1].ptr<Npp32s>();
pValues3[1] = nppLut3[1].ptr<Npp32s>();
pValues3[2] = nppLut3[2].ptr<Npp32s>();
}
nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz,
nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz,
pValues3, lvls.pLevels3, lvls.nValues3) );
}
}
@@ -591,6 +684,7 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
void cv::gpu::exp(const GpuMat& src, GpuMat& dst)
{
#if NPP_VERSION >= 32
CV_Assert(src.type() == CV_32FC1);
dst.create(src.size(), src.type());
@@ -600,6 +694,9 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst)
sz.height = src.rows;
nppSafeCall( nppiExp_32f_C1R(src.ptr<Npp32f>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) );
#else
CV_Assert(!"This function doesn't supported");
#endif
}
////////////////////////////////////////////////////////////////////////
@@ -607,6 +704,7 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst)
void cv::gpu::log(const GpuMat& src, GpuMat& dst)
{
#if NPP_VERSION >= 32
CV_Assert(src.type() == CV_32FC1);
dst.create(src.size(), src.type());
@@ -616,11 +714,15 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst)
sz.height = src.rows;
nppSafeCall( nppiLn_32f_C1R(src.ptr<Npp32f>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) );
#else
CV_Assert(!"This function doesn't supported");
#endif
}
////////////////////////////////////////////////////////////////////////
// NPP magnitide
#ifdef NPP_HAVE_COMPLEX_TYPE
namespace
{
typedef NppStatus (*nppMagnitude_t)(const Npp32fc* pSrc, int nSrcStep, Npp32f* pDst, int nDstStep, NppiSize oSizeROI);
@@ -638,21 +740,30 @@ namespace
nppSafeCall( func(src.ptr<Npp32fc>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) );
}
}
#endif
void cv::gpu::magnitude(const GpuMat& src, GpuMat& dst)
{
#ifdef NPP_HAVE_COMPLEX_TYPE
::npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R);
#else
CV_Assert(!"This function doesn't supported");
#endif
}
void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst)
{
#ifdef NPP_HAVE_COMPLEX_TYPE
::npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R);
#else
CV_Assert(!"This function doesn't supported");
#endif
}
////////////////////////////////////////////////////////////////////////
// Polar <-> Cart
namespace cv { namespace gpu { namespace mathfunc
namespace cv { namespace gpu { namespace mathfunc
{
void cartToPolar_gpu(const DevMem2Df& x, const DevMem2Df& y, const DevMem2Df& mag, bool magSqr, const DevMem2Df& angle, bool angleInDegrees, cudaStream_t stream);
void polarToCart_gpu(const DevMem2Df& mag, const DevMem2Df& angle, const DevMem2Df& x, const DevMem2Df& y, bool angleInDegrees, cudaStream_t stream);
@@ -721,7 +832,7 @@ void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleI
}
void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees, const Stream& stream)
{
{
::cartToPolar_caller(x, y, 0, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
}