fixed several bugs in gpu arithm functions

refactored tests for them
This commit is contained in:
Vladislav Vinogradov
2012-03-19 14:18:12 +00:00
parent f58c40bfab
commit 844bdea5ac
5 changed files with 1713 additions and 892 deletions

View File

@@ -71,8 +71,8 @@ void cv::gpu::bitwise_and(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&,
void cv::gpu::bitwise_and(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::bitwise_xor(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::rshift(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::lshift(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::rshift(const GpuMat&, Scalar_<int>, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::lshift(const GpuMat&, Scalar_<int>, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::min(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
@@ -101,11 +101,11 @@ namespace
template <int DEPTH> struct NppArithmFunc
{
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
typedef NppStatus (*func_t)(const npp_t* pSrc1, int nSrc1Step, const npp_t* pSrc2, int nSrc2Step, npp_t* pDst, int nDstStep, NppiSize oSizeROI, int nScaleFactor);
};
template <> struct NppArithmFunc<CV_32F>
{
{
typedef NppTypeTraits<CV_32F>::npp_t npp_t;
typedef NppStatus (*func_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst, int nDstStep, NppiSize oSizeROI);
@@ -123,7 +123,7 @@ namespace
sz.width = src1.cols;
sz.height = src1.rows;
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (const npp_t*)src2.data, static_cast<int>(src2.step),
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (const npp_t*)src2.data, static_cast<int>(src2.step),
(npp_t*)dst.data, static_cast<int>(dst.step), sz, 0) );
if (stream == 0)
@@ -145,8 +145,8 @@ namespace
NppiSize sz;
sz.width = src1.cols;
sz.height = src1.rows;
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (const npp_t*)src2.data, static_cast<int>(src2.step),
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (const npp_t*)src2.data, static_cast<int>(src2.step),
(npp_t*)dst.data, static_cast<int>(dst.step), sz) );
if (stream == 0)
@@ -162,12 +162,12 @@ namespace
////////////////////////////////////////////////////////////////////////
// add
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
template <typename T, typename D>
template <typename T, typename D>
void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
template <typename T, typename D>
template <typename T, typename D>
void add_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
}}}
@@ -177,7 +177,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
static const func_t funcs[7][7] =
static const func_t funcs[7][7] =
{
{add_gpu<unsigned char, unsigned char>, 0/*add_gpu<unsigned char, signed char>*/, add_gpu<unsigned char, unsigned short>, add_gpu<unsigned char, short>, add_gpu<unsigned char, int>, add_gpu<unsigned char, float>, add_gpu<unsigned char, double>},
{0/*add_gpu<signed char, unsigned char>*/, 0/*add_gpu<signed char, signed char>*/, 0/*add_gpu<signed char, unsigned short>*/, 0/*add_gpu<signed char, short>*/, 0/*add_gpu<signed char, int>*/, 0/*add_gpu<signed char, float>*/, 0/*add_gpu<signed char, double>*/},
@@ -188,7 +188,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu
{0/*add_gpu<double, unsigned char>*/, 0/*add_gpu<double, signed char>*/, 0/*add_gpu<double, unsigned short>*/, 0/*add_gpu<double, short>*/, 0/*add_gpu<double, int>*/, 0/*add_gpu<double, float>*/, add_gpu<double, double>}
};
static const func_t npp_funcs[7] =
static const func_t npp_funcs[7] =
{
NppArithm<CV_8U, nppiAdd_8u_C1RSfs>::call,
0,
@@ -228,21 +228,21 @@ namespace
{
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
typedef NppStatus (*func_ptr)(const npp_t* pSrc1, int nSrc1Step, const npp_t* pConstants,
typedef NppStatus (*func_ptr)(const npp_t* pSrc1, int nSrc1Step, const npp_t* pConstants,
npp_t* pDst, int nDstStep, NppiSize oSizeROI, int nScaleFactor);
};
template<int DEPTH> struct NppArithmScalarFunc<DEPTH, 1>
{
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
typedef NppStatus (*func_ptr)(const npp_t* pSrc1, int nSrc1Step, const npp_t pConstants,
typedef NppStatus (*func_ptr)(const npp_t* pSrc1, int nSrc1Step, const npp_t pConstants,
npp_t* pDst, int nDstStep, NppiSize oSizeROI, int nScaleFactor);
};
template<int DEPTH> struct NppArithmScalarFunc<DEPTH, 2>
{
typedef typename NppTypeTraits<DEPTH>::npp_complex_type npp_complex_type;
typedef NppStatus (*func_ptr)(const npp_complex_type* pSrc1, int nSrc1Step, const npp_complex_type pConstants,
typedef NppStatus (*func_ptr)(const npp_complex_type* pSrc1, int nSrc1Step, const npp_complex_type pConstants,
npp_complex_type* pDst, int nDstStep, NppiSize oSizeROI, int nScaleFactor);
};
template<int cn> struct NppArithmScalarFunc<CV_32F, cn>
@@ -313,7 +313,7 @@ namespace
nConstant.re = saturate_cast<npp_t>(sc.val[0]);
nConstant.im = saturate_cast<npp_t>(sc.val[1]);
nppSafeCall( func(src.ptr<npp_complex_type>(), static_cast<int>(src.step), nConstant,
nppSafeCall( func(src.ptr<npp_complex_type>(), static_cast<int>(src.step), nConstant,
dst.ptr<npp_complex_type>(), static_cast<int>(dst.step), sz, 0) );
if (stream == 0)
@@ -382,7 +382,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
static const func_t funcs[7][7] =
static const func_t funcs[7][7] =
{
{add_gpu<unsigned char, unsigned char>, 0/*add_gpu<unsigned char, signed char>*/, add_gpu<unsigned char, unsigned short>, add_gpu<unsigned char, short>, add_gpu<unsigned char, int>, add_gpu<unsigned char, float>, add_gpu<unsigned char, double>},
{0/*add_gpu<signed char, unsigned char>*/, 0/*add_gpu<signed char, signed char>*/, 0/*add_gpu<signed char, unsigned short>*/, 0/*add_gpu<signed char, short>*/, 0/*add_gpu<signed char, int>*/, 0/*add_gpu<signed char, float>*/, 0/*add_gpu<signed char, double>*/},
@@ -394,7 +394,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat
};
typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);
static const npp_func_t npp_funcs[7][4] =
static const npp_func_t npp_funcs[7][4] =
{
{NppArithmScalar<CV_8U, 1, nppiAddC_8u_C1RSfs>::call, 0, NppArithmScalar<CV_8U, 3, nppiAddC_8u_C3RSfs>::call, NppArithmScalar<CV_8U, 4, nppiAddC_8u_C4RSfs>::call},
{0,0,0,0},
@@ -436,12 +436,12 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat
////////////////////////////////////////////////////////////////////////
// subtract
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
template <typename T, typename D>
template <typename T, typename D>
void subtract_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
template <typename T, typename D>
template <typename T, typename D>
void subtract_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
}}}
@@ -451,7 +451,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
static const func_t funcs[7][7] =
static const func_t funcs[7][7] =
{
{subtract_gpu<unsigned char, unsigned char>, 0/*subtract_gpu<unsigned char, signed char>*/, subtract_gpu<unsigned char, unsigned short>, subtract_gpu<unsigned char, short>, subtract_gpu<unsigned char, int>, subtract_gpu<unsigned char, float>, subtract_gpu<unsigned char, double>},
{0/*subtract_gpu<signed char, unsigned char>*/, 0/*subtract_gpu<signed char, signed char>*/, 0/*subtract_gpu<signed char, unsigned short>*/, 0/*subtract_gpu<signed char, short>*/, 0/*subtract_gpu<signed char, int>*/, 0/*subtract_gpu<signed char, float>*/, 0/*subtract_gpu<signed char, double>*/},
@@ -462,15 +462,14 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
{0/*subtract_gpu<double, unsigned char>*/, 0/*subtract_gpu<double, signed char>*/, 0/*subtract_gpu<double, unsigned short>*/, 0/*subtract_gpu<double, short>*/, 0/*subtract_gpu<double, int>*/, 0/*subtract_gpu<double, float>*/, subtract_gpu<double, double>}
};
static const func_t npp_funcs[7] =
static const func_t npp_funcs[6] =
{
NppArithm<CV_8U, nppiSub_8u_C1RSfs>::call,
0,
NppArithm<CV_16U, nppiSub_16u_C1RSfs>::call,
NppArithm<CV_16S, nppiSub_16s_C1RSfs>::call,
NppArithm<CV_32S, nppiSub_32s_C1RSfs>::call,
NppArithm<CV_32F, nppiSub_32f_C1R>::call,
subtract_gpu<double, double>
NppArithm<CV_32F, nppiSub_32f_C1R>::call
};
CV_Assert(src1.type() != CV_8S);
@@ -484,7 +483,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
cudaStream_t stream = StreamAccessor::getStream(s);
if (mask.empty() && dst.type() == src1.type())
if (mask.empty() && dst.type() == src1.type() && src1.depth() <= CV_32F)
{
npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), PtrStepb(), stream);
return;
@@ -502,7 +501,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
static const func_t funcs[7][7] =
static const func_t funcs[7][7] =
{
{subtract_gpu<unsigned char, unsigned char>, 0/*subtract_gpu<unsigned char, signed char>*/, subtract_gpu<unsigned char, unsigned short>, subtract_gpu<unsigned char, short>, subtract_gpu<unsigned char, int>, subtract_gpu<unsigned char, float>, subtract_gpu<unsigned char, double>},
{0/*subtract_gpu<signed char, unsigned char>*/, 0/*subtract_gpu<signed char, signed char>*/, 0/*subtract_gpu<signed char, unsigned short>*/, 0/*subtract_gpu<signed char, short>*/, 0/*subtract_gpu<signed char, int>*/, 0/*subtract_gpu<signed char, float>*/, 0/*subtract_gpu<signed char, double>*/},
@@ -514,7 +513,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G
};
typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);
static const npp_func_t npp_funcs[7][4] =
static const npp_func_t npp_funcs[7][4] =
{
{NppArithmScalar<CV_8U, 1, nppiSubC_8u_C1RSfs>::call, 0, NppArithmScalar<CV_8U, 3, nppiSubC_8u_C3RSfs>::call, NppArithmScalar<CV_8U, 4, nppiSubC_8u_C4RSfs>::call},
{0,0,0,0},
@@ -556,15 +555,15 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G
////////////////////////////////////////////////////////////////////////
// multiply
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
void multiply_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream);
void multiply_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream);
template <typename T, typename D>
template <typename T, typename D>
void multiply_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
template <typename T, typename D>
template <typename T, typename D>
void multiply_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
}}}
@@ -574,7 +573,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
static const func_t funcs[7][7] =
static const func_t funcs[7][7] =
{
{multiply_gpu<unsigned char, unsigned char>, 0/*multiply_gpu<unsigned char, signed char>*/, multiply_gpu<unsigned char, unsigned short>, multiply_gpu<unsigned char, short>, multiply_gpu<unsigned char, int>, multiply_gpu<unsigned char, float>, multiply_gpu<unsigned char, double>},
{0/*multiply_gpu<signed char, unsigned char>*/, 0/*multiply_gpu<signed char, signed char>*/, 0/*multiply_gpu<signed char, unsigned short>*/, 0/*multiply_gpu<signed char, short>*/, 0/*multiply_gpu<signed char, int>*/, 0/*multiply_gpu<signed char, float>*/, 0/*multiply_gpu<signed char, double>*/},
@@ -585,7 +584,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub
{0/*multiply_gpu<double, unsigned char>*/, 0/*multiply_gpu<double, signed char>*/, 0/*multiply_gpu<double, unsigned short>*/, 0/*multiply_gpu<double, short>*/, 0/*multiply_gpu<double, int>*/, 0/*multiply_gpu<double, float>*/, multiply_gpu<double, double>}
};
static const func_t npp_funcs[7] =
static const func_t npp_funcs[7] =
{
NppArithm<CV_8U, nppiMul_8u_C1RSfs>::call,
0,
@@ -651,7 +650,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
static const func_t funcs[7][7] =
static const func_t funcs[7][7] =
{
{multiply_gpu<unsigned char, unsigned char>, 0/*multiply_gpu<unsigned char, signed char>*/, multiply_gpu<unsigned char, unsigned short>, multiply_gpu<unsigned char, short>, multiply_gpu<unsigned char, int>, multiply_gpu<unsigned char, float>, multiply_gpu<unsigned char, double>},
{0/*multiply_gpu<signed char, unsigned char>*/, 0/*multiply_gpu<signed char, signed char>*/, 0/*multiply_gpu<signed char, unsigned short>*/, 0/*multiply_gpu<signed char, short>*/, 0/*multiply_gpu<signed char, int>*/, 0/*multiply_gpu<signed char, float>*/, 0/*multiply_gpu<signed char, double>*/},
@@ -663,7 +662,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double
};
typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);
static const npp_func_t npp_funcs[7][4] =
static const npp_func_t npp_funcs[7][4] =
{
{NppArithmScalar<CV_8U, 1, nppiMulC_8u_C1RSfs>::call, 0, NppArithmScalar<CV_8U, 3, nppiMulC_8u_C3RSfs>::call, NppArithmScalar<CV_8U, 4, nppiMulC_8u_C4RSfs>::call},
{0,0,0,0},
@@ -702,18 +701,18 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double
////////////////////////////////////////////////////////////////////////
// divide
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
void divide_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream);
void divide_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream);
template <typename T, typename D>
template <typename T, typename D>
void divide_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
template <typename T, typename D>
template <typename T, typename D>
void divide_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
template <typename T, typename D>
template <typename T, typename D>
void divide_gpu(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
}}}
@@ -723,7 +722,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
static const func_t funcs[7][7] =
static const func_t funcs[7][7] =
{
{divide_gpu<unsigned char, unsigned char>, 0/*divide_gpu<unsigned char, signed char>*/, divide_gpu<unsigned char, unsigned short>, divide_gpu<unsigned char, short>, divide_gpu<unsigned char, int>, divide_gpu<unsigned char, float>, divide_gpu<unsigned char, double>},
{0/*divide_gpu<signed char, unsigned char>*/, 0/*divide_gpu<signed char, signed char>*/, 0/*divide_gpu<signed char, unsigned short>*/, 0/*divide_gpu<signed char, short>*/, 0/*divide_gpu<signed char, int>*/, 0/*divide_gpu<signed char, float>*/, 0/*divide_gpu<signed char, double>*/},
@@ -734,15 +733,14 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
{0/*divide_gpu<double, unsigned char>*/, 0/*divide_gpu<double, signed char>*/, 0/*divide_gpu<double, unsigned short>*/, 0/*divide_gpu<double, short>*/, 0/*divide_gpu<double, int>*/, 0/*divide_gpu<double, float>*/, divide_gpu<double, double>}
};
static const func_t npp_funcs[7] =
static const func_t npp_funcs[6] =
{
NppArithm<CV_8U, nppiDiv_8u_C1RSfs>::call,
0,
NppArithm<CV_16U, nppiDiv_16u_C1RSfs>::call,
NppArithm<CV_16S, nppiDiv_16s_C1RSfs>::call,
NppArithm<CV_32S, nppiDiv_32s_C1RSfs>::call,
NppArithm<CV_32F, nppiDiv_32f_C1R>::call,
divide_gpu<double, double>
NppArithm<CV_32F, nppiDiv_32f_C1R>::call
};
cudaStream_t stream = StreamAccessor::getStream(s);
@@ -753,7 +751,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
dst.create(src1.size(), src1.type());
multiply_gpu(static_cast<DevMem2D_<uchar4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<uchar4> >(dst), stream);
divide_gpu(static_cast<DevMem2D_<uchar4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<uchar4> >(dst), stream);
}
else if (src1.type() == CV_16SC4 && src2.type() == CV_32FC1)
{
@@ -761,10 +759,10 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
dst.create(src1.size(), src1.type());
multiply_gpu(static_cast<DevMem2D_<short4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<short4> >(dst), stream);
divide_gpu(static_cast<DevMem2D_<short4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<short4> >(dst), stream);
}
else
{
{
CV_Assert(src1.type() != CV_8S);
CV_Assert(src1.type() == src2.type() && src1.size() == src2.size());
@@ -773,7 +771,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));
if (scale == 1 && dst.type() == src1.type())
if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F)
{
npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), 1, stream);
return;
@@ -792,7 +790,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
static const func_t funcs[7][7] =
static const func_t funcs[7][7] =
{
{divide_gpu<unsigned char, unsigned char>, 0/*divide_gpu<unsigned char, signed char>*/, divide_gpu<unsigned char, unsigned short>, divide_gpu<unsigned char, short>, divide_gpu<unsigned char, int>, divide_gpu<unsigned char, float>, divide_gpu<unsigned char, double>},
{0/*divide_gpu<signed char, unsigned char>*/, 0/*divide_gpu<signed char, signed char>*/, 0/*divide_gpu<signed char, unsigned short>*/, 0/*divide_gpu<signed char, short>*/, 0/*divide_gpu<signed char, int>*/, 0/*divide_gpu<signed char, float>*/, 0/*divide_gpu<signed char, double>*/},
@@ -804,7 +802,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc
};
typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);
static const npp_func_t npp_funcs[7][4] =
static const npp_func_t npp_funcs[7][4] =
{
{NppArithmScalar<CV_8U, 1, nppiDivC_8u_C1RSfs>::call, 0, NppArithmScalar<CV_8U, 3, nppiDivC_8u_C3RSfs>::call, NppArithmScalar<CV_8U, 4, nppiDivC_8u_C4RSfs>::call},
{0,0,0,0},
@@ -846,7 +844,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St
typedef void (*func_t)(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
static const func_t funcs[7][7] =
static const func_t funcs[7][7] =
{
{divide_gpu<unsigned char, unsigned char>, 0/*divide_gpu<unsigned char, signed char>*/, divide_gpu<unsigned char, unsigned short>, divide_gpu<unsigned char, short>, divide_gpu<unsigned char, int>, divide_gpu<unsigned char, float>, divide_gpu<unsigned char, double>},
{0/*divide_gpu<signed char, unsigned char>*/, 0/*divide_gpu<signed char, signed char>*/, 0/*divide_gpu<signed char, unsigned short>*/, 0/*divide_gpu<signed char, short>*/, 0/*divide_gpu<signed char, int>*/, 0/*divide_gpu<signed char, float>*/, 0/*divide_gpu<signed char, double>*/},
@@ -875,12 +873,12 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St
//////////////////////////////////////////////////////////////////////////////
// absdiff
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
template <typename T>
void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
template <typename T>
template <typename T>
void absdiff_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream);
}}}
@@ -890,7 +888,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
static const func_t funcs[] =
static const func_t funcs[] =
{
absdiff_gpu<unsigned char>, absdiff_gpu<signed char>, absdiff_gpu<unsigned short>, absdiff_gpu<short>, absdiff_gpu<int>, absdiff_gpu<float>, absdiff_gpu<double>
};
@@ -909,7 +907,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
{
NppStreamHandler h(stream);
nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step),
nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) );
if (stream == 0)
@@ -919,7 +917,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
{
NppStreamHandler h(stream);
nppSafeCall( nppiAbsDiff_16u_C1R(src1.ptr<Npp16u>(), static_cast<int>(src1.step), src2.ptr<Npp16u>(), static_cast<int>(src2.step),
nppSafeCall( nppiAbsDiff_16u_C1R(src1.ptr<Npp16u>(), static_cast<int>(src1.step), src2.ptr<Npp16u>(), static_cast<int>(src2.step),
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) );
if (stream == 0)
@@ -929,7 +927,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
{
NppStreamHandler h(stream);
nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), static_cast<int>(src1.step), src2.ptr<Npp32f>(), static_cast<int>(src2.step),
nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), static_cast<int>(src1.step), src2.ptr<Npp32f>(), static_cast<int>(src2.step),
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
if (stream == 0)
@@ -969,7 +967,7 @@ namespace
sz.width = src1.cols;
sz.height = src1.rows;
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (npp_t*)dst.data, static_cast<int>(dst.step),
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (npp_t*)dst.data, static_cast<int>(dst.step),
sz, static_cast<npp_t>(val)) );
if (stream == 0)
@@ -984,14 +982,14 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream);
static const func_t funcs[] =
static const func_t funcs[] =
{
NppAbsDiffC<CV_8U, nppiAbsDiffC_8u_C1R>::call,
absdiff_gpu<signed char>,
NppAbsDiffC<CV_16U, nppiAbsDiffC_16u_C1R>::call,
NppAbsDiffC<CV_8U, nppiAbsDiffC_8u_C1R>::call,
absdiff_gpu<signed char>,
NppAbsDiffC<CV_16U, nppiAbsDiffC_16u_C1R>::call,
absdiff_gpu<short>,
absdiff_gpu<int>,
NppAbsDiffC<CV_32F, nppiAbsDiffC_32f_C1R>::call,
absdiff_gpu<int>,
NppAbsDiffC<CV_32F, nppiAbsDiffC_32f_C1R>::call,
absdiff_gpu<double>
};
@@ -1132,7 +1130,7 @@ void cv::gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream)
{
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] =
static const func_t funcs[] =
{
NppSqr<CV_8U, nppiSqr_8u_C1RSfs, nppiSqr_8u_C4RSfs>::call,
0,
@@ -1209,7 +1207,7 @@ void cv::gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream)
{
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] =
static const func_t funcs[] =
{
NppOneSource<CV_8U, nppiSqrt_8u_C1RSfs>::call,
0,
@@ -1233,7 +1231,7 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream)
{
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] =
static const func_t funcs[] =
{
NppOneSource<CV_8U, nppiLn_8u_C1RSfs>::call,
0,
@@ -1257,7 +1255,7 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream)
{
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] =
static const func_t funcs[] =
{
NppOneSource<CV_8U, nppiExp_8u_C1RSfs>::call,
0,
@@ -1277,7 +1275,7 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream)
//////////////////////////////////////////////////////////////////////////////
// Comparison of two matrixes
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
template <typename T> void compare_eq(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
template <typename T> void compare_ne(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
@@ -1291,7 +1289,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
static const func_t funcs[7][4] =
static const func_t funcs[7][4] =
{
{compare_eq<unsigned char>, compare_ne<unsigned char>, compare_lt<unsigned char>, compare_le<unsigned char>},
{compare_eq<signed char>, compare_ne<signed char>, compare_lt<signed char>, compare_le<signed char>},
@@ -1353,7 +1351,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
//////////////////////////////////////////////////////////////////////////////
// Unary bitwise logical operations
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
void bitwiseNotCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src, PtrStepb dst, cudaStream_t stream);
@@ -1377,9 +1375,9 @@ namespace
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
static Caller callers[] =
static Caller callers[] =
{
bitwiseMaskNotCaller<unsigned char>, bitwiseMaskNotCaller<unsigned char>,
bitwiseMaskNotCaller<unsigned char>, bitwiseMaskNotCaller<unsigned char>,
bitwiseMaskNotCaller<unsigned short>, bitwiseMaskNotCaller<unsigned short>,
bitwiseMaskNotCaller<unsigned int>, bitwiseMaskNotCaller<unsigned int>,
bitwiseMaskNotCaller<unsigned int>
@@ -1410,7 +1408,7 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St
//////////////////////////////////////////////////////////////////////////////
// Binary bitwise logical operations
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
void bitwiseOrCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
@@ -1444,9 +1442,9 @@ namespace
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
static Caller callers[] =
static Caller callers[] =
{
bitwiseMaskOrCaller<unsigned char>, bitwiseMaskOrCaller<unsigned char>,
bitwiseMaskOrCaller<unsigned char>, bitwiseMaskOrCaller<unsigned char>,
bitwiseMaskOrCaller<unsigned short>, bitwiseMaskOrCaller<unsigned short>,
bitwiseMaskOrCaller<unsigned int>, bitwiseMaskOrCaller<unsigned int>,
bitwiseMaskOrCaller<unsigned int>
@@ -1478,9 +1476,9 @@ namespace
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
static Caller callers[] =
static Caller callers[] =
{
bitwiseMaskAndCaller<unsigned char>, bitwiseMaskAndCaller<unsigned char>,
bitwiseMaskAndCaller<unsigned char>, bitwiseMaskAndCaller<unsigned char>,
bitwiseMaskAndCaller<unsigned short>, bitwiseMaskAndCaller<unsigned short>,
bitwiseMaskAndCaller<unsigned int>, bitwiseMaskAndCaller<unsigned int>,
bitwiseMaskAndCaller<unsigned int>
@@ -1512,9 +1510,9 @@ namespace
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
static Caller callers[] =
static Caller callers[] =
{
bitwiseMaskXorCaller<unsigned char>, bitwiseMaskXorCaller<unsigned char>,
bitwiseMaskXorCaller<unsigned char>, bitwiseMaskXorCaller<unsigned char>,
bitwiseMaskXorCaller<unsigned short>, bitwiseMaskXorCaller<unsigned short>,
bitwiseMaskXorCaller<unsigned int>, bitwiseMaskXorCaller<unsigned int>,
bitwiseMaskXorCaller<unsigned int>
@@ -1584,7 +1582,7 @@ namespace
const npp_t pConstants[] = {static_cast<npp_t>(sc.val[0]), static_cast<npp_t>(sc.val[1]), static_cast<npp_t>(sc.val[2]), static_cast<npp_t>(sc.val[3])};
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), pConstants, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), pConstants, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
@@ -1602,7 +1600,7 @@ namespace
oSizeROI.width = src.cols;
oSizeROI.height = src.rows;
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), static_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), static_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
@@ -1614,7 +1612,7 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea
{
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] =
static const func_t funcs[5][4] =
{
{NppBitwiseC<CV_8U, 1, nppiOrC_8u_C1R>::call, 0, NppBitwiseC<CV_8U, 3, nppiOrC_8u_C3R>::call, NppBitwiseC<CV_8U, 4, nppiOrC_8u_C4R>::call},
{0,0,0,0},
@@ -1635,7 +1633,7 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre
{
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] =
static const func_t funcs[5][4] =
{
{NppBitwiseC<CV_8U, 1, nppiAndC_8u_C1R>::call, 0, NppBitwiseC<CV_8U, 3, nppiAndC_8u_C3R>::call, NppBitwiseC<CV_8U, 4, nppiAndC_8u_C4R>::call},
{0,0,0,0},
@@ -1656,7 +1654,7 @@ void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre
{
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] =
static const func_t funcs[5][4] =
{
{NppBitwiseC<CV_8U, 1, nppiXorC_8u_C1R>::call, 0, NppBitwiseC<CV_8U, 3, nppiXorC_8u_C3R>::call, NppBitwiseC<CV_8U, 4, nppiXorC_8u_C4R>::call},
{0,0,0,0},
@@ -1704,7 +1702,7 @@ namespace
oSizeROI.height = src.rows;
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), sc.val, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
@@ -1722,17 +1720,17 @@ namespace
oSizeROI.height = src.rows;
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), sc.val[0], dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
}
void cv::gpu::rshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
void cv::gpu::rshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream)
{
typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] =
static const func_t funcs[5][4] =
{
{NppShift<CV_8U , 1, nppiRShiftC_8u_C1R >::call, 0, NppShift<CV_8U , 3, nppiRShiftC_8u_C3R >::call, NppShift<CV_8U , 4, nppiRShiftC_8u_C4R>::call },
{NppShift<CV_8S , 1, nppiRShiftC_8s_C1R >::call, 0, NppShift<CV_8S , 3, nppiRShiftC_8s_C3R >::call, NppShift<CV_8S , 4, nppiRShiftC_8s_C4R>::call },
@@ -1749,10 +1747,10 @@ void cv::gpu::rshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& s
funcs[src.depth()][src.channels() - 1](src, sc, dst, StreamAccessor::getStream(stream));
}
void cv::gpu::lshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
void cv::gpu::lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream)
{
typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] =
static const func_t funcs[5][4] =
{
{NppShift<CV_8U , 1, nppiLShiftC_8u_C1R>::call , 0, NppShift<CV_8U , 3, nppiLShiftC_8u_C3R>::call , NppShift<CV_8U , 4, nppiLShiftC_8u_C4R>::call },
{0 , 0, 0 , 0 },
@@ -1772,7 +1770,7 @@ void cv::gpu::lshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& s
//////////////////////////////////////////////////////////////////////////////
// Minimum and maximum operations
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
template <typename T>
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream);
@@ -1803,7 +1801,7 @@ namespace
dst.create(src1.size(), src1.type());
::cv::gpu::device::min_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream);
}
template <typename T>
void max_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
{
@@ -1820,58 +1818,58 @@ namespace
}
}
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert((src1.depth() != CV_64F) ||
CV_Assert((src1.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] =
static const func_t funcs[] =
{
min_caller<unsigned char>, min_caller<signed char>, min_caller<unsigned short>, min_caller<short>, min_caller<int>,
min_caller<unsigned char>, min_caller<signed char>, min_caller<unsigned short>, min_caller<short>, min_caller<int>,
min_caller<float>, min_caller<double>
};
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
}
void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
{
CV_Assert((src1.depth() != CV_64F) ||
CV_Assert((src1.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] =
static const func_t funcs[] =
{
min_caller<unsigned char>, min_caller<signed char>, min_caller<unsigned short>, min_caller<short>, min_caller<int>,
min_caller<unsigned char>, min_caller<signed char>, min_caller<unsigned short>, min_caller<short>, min_caller<int>,
min_caller<float>, min_caller<double>
};
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
}
void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert((src1.depth() != CV_64F) ||
CV_Assert((src1.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] =
static const func_t funcs[] =
{
max_caller<unsigned char>, max_caller<signed char>, max_caller<unsigned short>, max_caller<short>, max_caller<int>,
max_caller<unsigned char>, max_caller<signed char>, max_caller<unsigned short>, max_caller<short>, max_caller<int>,
max_caller<float>, max_caller<double>
};
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
}
void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
{
CV_Assert((src1.depth() != CV_64F) ||
CV_Assert((src1.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] =
static const func_t funcs[] =
{
max_caller<unsigned char>, max_caller<signed char>, max_caller<unsigned short>, max_caller<short>, max_caller<int>,
max_caller<unsigned char>, max_caller<signed char>, max_caller<unsigned short>, max_caller<short>, max_caller<int>,
max_caller<float>, max_caller<double>
};
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
@@ -1880,7 +1878,7 @@ void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
////////////////////////////////////////////////////////////////////////
// threshold
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
template <typename T>
void threshold_gpu(const DevMem2Db& src, const DevMem2Db& dst, T thresh, T maxVal, int type, cudaStream_t stream);
@@ -1921,10 +1919,10 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
{
typedef void (*caller_t)(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, cudaStream_t stream);
static const caller_t callers[] =
static const caller_t callers[] =
{
threshold_caller<unsigned char>, threshold_caller<signed char>,
threshold_caller<unsigned short>, threshold_caller<short>,
threshold_caller<unsigned char>, threshold_caller<signed char>,
threshold_caller<unsigned short>, threshold_caller<short>,
threshold_caller<int>, threshold_caller<float>, threshold_caller<double>
};
@@ -1943,7 +1941,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
////////////////////////////////////////////////////////////////////////
// pow
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
template<typename T>
void pow_caller(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);
@@ -1958,10 +1956,10 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
typedef void (*caller_t)(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);
static const caller_t callers[] =
static const caller_t callers[] =
{
pow_caller<unsigned char>, pow_caller<signed char>,
pow_caller<unsigned short>, pow_caller<short>,
pow_caller<unsigned char>, pow_caller<signed char>,
pow_caller<unsigned short>, pow_caller<short>,
pow_caller<int>, pow_caller<float>
};
@@ -1992,7 +1990,7 @@ namespace
oSizeROI.width = img1.cols;
oSizeROI.height = img2.rows;
nppSafeCall( func(img1.ptr<npp_t>(), static_cast<int>(img1.step), img2.ptr<npp_t>(), static_cast<int>(img2.step),
nppSafeCall( func(img1.ptr<npp_t>(), static_cast<int>(img1.step), img2.ptr<npp_t>(), static_cast<int>(img2.step),
dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI, eAlphaOp) );
if (stream == 0)
@@ -2021,7 +2019,7 @@ void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int
typedef void (*func_t)(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, NppiAlphaOp eAlphaOp, cudaStream_t stream);
static const func_t funcs[] =
static const func_t funcs[] =
{
NppAlphaComp<CV_8U, nppiAlphaComp_8u_AC4R>::call,
0,
@@ -2046,7 +2044,7 @@ void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int
////////////////////////////////////////////////////////////////////////
// addWeighted
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
template <typename T1, typename T2, typename D>
void addWeighted_gpu(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);