gpu module refactoring: moved per-element operations into separated file

2010-12-20 09:07:19 +00:00
parent 6891a60149
commit 0465b89e7e
6 changed files with 1048 additions and 915 deletions
--- a/modules/gpu/src/arithm.cpp
+++ b/modules/gpu/src/arithm.cpp
@@ -48,18 +48,7 @@ using namespace std;

 #if !defined (HAVE_CUDA)

-void cv::gpu::add(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
-void cv::gpu::add(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); }
-void cv::gpu::subtract(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
-void cv::gpu::subtract(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); }
-void cv::gpu::multiply(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
-void cv::gpu::multiply(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); }
-void cv::gpu::divide(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
-void cv::gpu::divide(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); }
 void cv::gpu::transpose(const GpuMat&, GpuMat&) { throw_nogpu(); }
-void cv::gpu::absdiff(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
-void cv::gpu::absdiff(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); }
-void cv::gpu::compare(const GpuMat&, const GpuMat&, GpuMat&, int) { throw_nogpu(); }
 void cv::gpu::meanStdDev(const GpuMat&, Scalar&, Scalar&) { throw_nogpu(); }
 double cv::gpu::norm(const GpuMat&, int) { throw_nogpu(); return 0.0; }
 double cv::gpu::norm(const GpuMat&, const GpuMat&, int) { throw_nogpu(); return 0.0; }
@@ -89,18 +78,6 @@ void cv::gpu::cartToPolar(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool)
 void cv::gpu::cartToPolar(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, const Stream&) { throw_nogpu(); }
 void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool) { throw_nogpu(); }
 void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, const Stream&) { throw_nogpu(); }
-void cv::gpu::bitwise_not(const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); }
-void cv::gpu::bitwise_not(const GpuMat&, GpuMat&, const GpuMat&, const Stream&) { throw_nogpu(); }
-void cv::gpu::bitwise_or(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); }
-void cv::gpu::bitwise_or(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, const Stream&) { throw_nogpu(); }
-void cv::gpu::bitwise_and(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); }
-void cv::gpu::bitwise_and(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, const Stream&) { throw_nogpu(); }
-void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); }
-void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, const Stream&) { throw_nogpu(); }
-cv::gpu::GpuMat cv::gpu::operator ~ (const GpuMat&) { throw_nogpu(); return GpuMat(); }
-cv::gpu::GpuMat cv::gpu::operator | (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); }
-cv::gpu::GpuMat cv::gpu::operator & (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); }
-cv::gpu::GpuMat cv::gpu::operator ^ (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); }
 void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
 void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }
 void cv::gpu::min(const GpuMat&, double, GpuMat&) { throw_nogpu(); }
@@ -112,164 +89,6 @@ void cv::gpu::max(const GpuMat&, double, GpuMat&, const Stream&) { throw_nogpu()

 #else /* !defined (HAVE_CUDA) */

-////////////////////////////////////////////////////////////////////////
-// add subtract multiply divide
-
-namespace
-{
-    typedef NppStatus (*npp_arithm_8u_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep,
-                                         NppiSize oSizeROI, int nScaleFactor);
-    typedef NppStatus (*npp_arithm_32s_t)(const Npp32s* pSrc1, int nSrc1Step, const Npp32s* pSrc2, int nSrc2Step, Npp32s* pDst,
-                                          int nDstStep, NppiSize oSizeROI);
-    typedef NppStatus (*npp_arithm_32f_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst,
-                                          int nDstStep, NppiSize oSizeROI);
-
-    void nppArithmCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst,
-                         npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4,
-                         npp_arithm_32s_t npp_func_32sc1, npp_arithm_32f_t npp_func_32fc1)
-    {
-        CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
-
-        CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1);
-
-        dst.create( src1.size(), src1.type() );
-
-        NppiSize sz;
-        sz.width  = src1.cols;
-        sz.height = src1.rows;
-
-        switch (src1.type())
-        {
-        case CV_8UC1:
-            nppSafeCall( npp_func_8uc1(src1.ptr<Npp8u>(), src1.step,
-                src2.ptr<Npp8u>(), src2.step,
-                dst.ptr<Npp8u>(), dst.step, sz, 0) );
-            break;
-        case CV_8UC4:
-            nppSafeCall( npp_func_8uc4(src1.ptr<Npp8u>(), src1.step,
-                src2.ptr<Npp8u>(), src2.step,
-                dst.ptr<Npp8u>(), dst.step, sz, 0) );
-            break;
-        case CV_32SC1:
-            nppSafeCall( npp_func_32sc1(src1.ptr<Npp32s>(), src1.step,
-                src2.ptr<Npp32s>(), src2.step,
-                dst.ptr<Npp32s>(), dst.step, sz) );
-            break;
-        case CV_32FC1:
-            nppSafeCall( npp_func_32fc1(src1.ptr<Npp32f>(), src1.step,
-                src2.ptr<Npp32f>(), src2.step,
-                dst.ptr<Npp32f>(), dst.step, sz) );
-            break;
-        default:
-            CV_Assert(!"Unsupported source type");
-        }
-    }
-
-    template<int SCN> struct NppArithmScalarFunc;
-    template<> struct NppArithmScalarFunc<1>
-    {
-        typedef NppStatus (*func_ptr)(const Npp32f *pSrc, int nSrcStep, Npp32f nValue, Npp32f *pDst,
-                                      int nDstStep, NppiSize oSizeROI);
-    };
-    template<> struct NppArithmScalarFunc<2>
-    {
-        typedef NppStatus (*func_ptr)(const Npp32fc *pSrc, int nSrcStep, Npp32fc nValue, Npp32fc *pDst,
-                                      int nDstStep, NppiSize oSizeROI);
-    };
-
-    template<int SCN, typename NppArithmScalarFunc<SCN>::func_ptr func> struct NppArithmScalar;
-    template<typename NppArithmScalarFunc<1>::func_ptr func> struct NppArithmScalar<1, func>
-    {
-        static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst)
-        {
-            dst.create(src.size(), src.type());
-
-            NppiSize sz;
-            sz.width  = src.cols;
-            sz.height = src.rows;
-
-            nppSafeCall( func(src.ptr<Npp32f>(), src.step, (Npp32f)sc[0], dst.ptr<Npp32f>(), dst.step, sz) );
-        }
-    };
-    template<typename NppArithmScalarFunc<2>::func_ptr func> struct NppArithmScalar<2, func>
-    {
-        static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst)
-        {
-            dst.create(src.size(), src.type());
-
-            NppiSize sz;
-            sz.width  = src.cols;
-            sz.height = src.rows;
-
-            Npp32fc nValue;
-            nValue.re = (Npp32f)sc[0];
-            nValue.im = (Npp32f)sc[1];
-
-            nppSafeCall( func(src.ptr<Npp32fc>(), src.step, nValue, dst.ptr<Npp32fc>(), dst.step, sz) );
-        }
-    };
-}
-
-void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
-{
-    nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R);
-}
-
-void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
-{
-    nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R);
-}
-
-void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
-{
-    nppArithmCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32s_C1R, nppiMul_32f_C1R);
-}
-
-void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
-{
-    nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32s_C1R, nppiDiv_32f_C1R);
-}
-
-void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst)
-{
-    typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst);
-    static const caller_t callers[] = {0, NppArithmScalar<1, nppiAddC_32f_C1R>::calc, NppArithmScalar<2, nppiAddC_32fc_C1R>::calc};
-
-    CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
-
-    callers[src.channels()](src, sc, dst);
-}
-
-void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst)
-{
-    typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst);
-    static const caller_t callers[] = {0, NppArithmScalar<1, nppiSubC_32f_C1R>::calc, NppArithmScalar<2, nppiSubC_32fc_C1R>::calc};
-
-    CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
-
-    callers[src.channels()](src, sc, dst);
-}
-
-void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst)
-{
-    typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst);
-    static const caller_t callers[] = {0, NppArithmScalar<1, nppiMulC_32f_C1R>::calc, NppArithmScalar<2, nppiMulC_32fc_C1R>::calc};
-
-    CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
-
-    callers[src.channels()](src, sc, dst);
-}
-
-void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst)
-{
-    typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst);
-    static const caller_t callers[] = {0, NppArithmScalar<1, nppiDivC_32f_C1R>::calc, NppArithmScalar<2, nppiDivC_32fc_C1R>::calc};
-
-    CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
-
-    callers[src.channels()](src, sc, dst);
-}
-
 ////////////////////////////////////////////////////////////////////////
 // transpose

@@ -299,112 +118,6 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
    }
 }

-////////////////////////////////////////////////////////////////////////
-// absdiff
-
-void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
-{
-    CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
-
-    CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1);
-
-    dst.create( src1.size(), src1.type() );
-
-    NppiSize sz;
-    sz.width  = src1.cols;
-    sz.height = src1.rows;
-
-    switch (src1.type())
-    {
-    case CV_8UC1:
-        nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), src1.step,
-            src2.ptr<Npp8u>(), src2.step,
-            dst.ptr<Npp8u>(), dst.step, sz) );
-        break;
-    case CV_8UC4:
-        nppSafeCall( nppiAbsDiff_8u_C4R(src1.ptr<Npp8u>(), src1.step,
-            src2.ptr<Npp8u>(), src2.step,
-            dst.ptr<Npp8u>(), dst.step, sz) );
-        break;
-    case CV_32SC1:
-        nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr<Npp32s>(), src1.step,
-            src2.ptr<Npp32s>(), src2.step,
-            dst.ptr<Npp32s>(), dst.step, sz) );
-        break;
-    case CV_32FC1:
-        nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), src1.step,
-            src2.ptr<Npp32f>(), src2.step,
-            dst.ptr<Npp32f>(), dst.step, sz) );
-        break;
-    default:
-        CV_Assert(!"Unsupported source type");
-    }
-}
-
-void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst)
-{
-    CV_Assert(src.type() == CV_32FC1);
-
-    dst.create( src.size(), src.type() );
-
-    NppiSize sz;
-    sz.width  = src.cols;
-    sz.height = src.rows;
-
-    nppSafeCall( nppiAbsDiffC_32f_C1R(src.ptr<Npp32f>(), src.step, dst.ptr<Npp32f>(), dst.step, sz, (Npp32f)s[0]) );
-}
-
-////////////////////////////////////////////////////////////////////////
-// compare
-
-namespace cv { namespace gpu { namespace mathfunc
-{
-    void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst);
-    void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst);
-}}}
-
-void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop)
-{
-    CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
-
-    CV_Assert(src1.type() == CV_8UC4 || src1.type() == CV_32FC1);
-
-    dst.create( src1.size(), CV_8UC1 );
-
-    static const NppCmpOp nppCmpOp[] = { NPP_CMP_EQ, NPP_CMP_GREATER, NPP_CMP_GREATER_EQ, NPP_CMP_LESS, NPP_CMP_LESS_EQ };
-
-    NppiSize sz;
-    sz.width  = src1.cols;
-    sz.height = src1.rows;
-
-    if (src1.type() == CV_8UC4)
-    {
-        if (cmpop != CMP_NE)
-        {
-            nppSafeCall( nppiCompare_8u_C4R(src1.ptr<Npp8u>(), src1.step,
-                src2.ptr<Npp8u>(), src2.step,
-                dst.ptr<Npp8u>(), dst.step, sz, nppCmpOp[cmpop]) );
-        }
-        else
-        {
-            mathfunc::compare_ne_8uc4(src1, src2, dst);
-        }
-    }
-    else
-    {
-        if (cmpop != CMP_NE)
-        {
-            nppSafeCall( nppiCompare_32f_C1R(src1.ptr<Npp32f>(), src1.step,
-                src2.ptr<Npp32f>(), src2.step,
-                dst.ptr<Npp8u>(), dst.step, sz, nppCmpOp[cmpop]) );
-        }
-        else
-        {
-            mathfunc::compare_ne_32f(src1, src2, dst);
-        }
-    }
-}
-
 ////////////////////////////////////////////////////////////////////////
 // meanStdDev

@@ -997,249 +710,6 @@ void cv::gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat&
    ::polarToCart_caller(magnitude, angle, x, y, angleInDegrees, StreamAccessor::getStream(stream));
 }

-//////////////////////////////////////////////////////////////////////////////
-// Per-element bit-wise logical matrix operations
-
-namespace cv { namespace gpu { namespace mathfunc
-{
-    void bitwise_not_caller(int rows, int cols, int elem_size1, int cn, const PtrStep src, PtrStep dst, cudaStream_t stream);
-
-    template <typename T>
-    void bitwise_mask_not_caller(int rows, int cols, int cn, const PtrStep src, const PtrStep mask, PtrStep dst, cudaStream_t stream);
-
-    void bitwise_or_caller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream);
-
-    template <typename T>
-    void bitwise_mask_or_caller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream);
-
-    void bitwise_and_caller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream);
-
-    template <typename T>
-    void bitwise_mask_and_caller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream);
-
-    void bitwise_xor_caller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream);
-
-    template <typename T>
-    void bitwise_mask_xor_caller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream);
-}}}
-
-namespace
-{
-    void bitwise_not_caller(const GpuMat& src, GpuMat& dst, cudaStream_t stream)
-    {
-        dst.create(src.size(), src.type());
-
-        cv::gpu::mathfunc::bitwise_not_caller(src.rows, src.cols, src.elemSize1(), 
-                                              dst.channels(), src, dst, stream);
-    }
-
-
-    void bitwise_not_caller(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
-    {
-        using namespace cv::gpu;
-
-        typedef void (*Caller)(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
-        static Caller callers[] = {mathfunc::bitwise_mask_not_caller<unsigned char>, mathfunc::bitwise_mask_not_caller<unsigned char>, 
-                                   mathfunc::bitwise_mask_not_caller<unsigned short>, mathfunc::bitwise_mask_not_caller<unsigned short>,
-                                   mathfunc::bitwise_mask_not_caller<unsigned int>, mathfunc::bitwise_mask_not_caller<unsigned int>,
-                                   mathfunc::bitwise_mask_not_caller<unsigned int>};
-
-        CV_Assert(mask.type() == CV_8U && mask.size() == src.size());
-        dst.create(src.size(), src.type());
-
-        Caller caller = callers[src.depth()];
-        CV_Assert(caller);
-
-        int cn = src.depth() != CV_64F ? src.channels() : src.channels() * (sizeof(double) / sizeof(unsigned int));
-        caller(src.rows, src.cols, cn, src, mask, dst, stream);
-    }
-
-
-    void bitwise_or_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
-    {
-        CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
-        dst.create(src1.size(), src1.type());
-
-        cv::gpu::mathfunc::bitwise_or_caller(dst.rows, dst.cols, dst.elemSize1(), 
-                                             dst.channels(), src1, src2, dst, stream);
-    }
-
-
-    void bitwise_or_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
-    {
-        using namespace cv::gpu;
-
-        typedef void (*Caller)(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
-        static Caller callers[] = {mathfunc::bitwise_mask_or_caller<unsigned char>, mathfunc::bitwise_mask_or_caller<unsigned char>, 
-                                   mathfunc::bitwise_mask_or_caller<unsigned short>, mathfunc::bitwise_mask_or_caller<unsigned short>,
-                                   mathfunc::bitwise_mask_or_caller<unsigned int>, mathfunc::bitwise_mask_or_caller<unsigned int>,
-                                   mathfunc::bitwise_mask_or_caller<unsigned int>};
-
-        CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
-        dst.create(src1.size(), src1.type());
-
-        Caller caller = callers[src1.depth()];
-        CV_Assert(caller);
-
-        int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int));
-        caller(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream);
-    }
-
-
-    void bitwise_and_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
-    {
-        CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
-        dst.create(src1.size(), src1.type());
-
-        cv::gpu::mathfunc::bitwise_and_caller(dst.rows, dst.cols, dst.elemSize1(), 
-                                              dst.channels(), src1, src2, dst, stream);
-    }
-
-
-    void bitwise_and_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
-    {
-        using namespace cv::gpu;
-
-        typedef void (*Caller)(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
-        static Caller callers[] = {mathfunc::bitwise_mask_and_caller<unsigned char>, mathfunc::bitwise_mask_and_caller<unsigned char>, 
-                                   mathfunc::bitwise_mask_and_caller<unsigned short>, mathfunc::bitwise_mask_and_caller<unsigned short>,
-                                   mathfunc::bitwise_mask_and_caller<unsigned int>, mathfunc::bitwise_mask_and_caller<unsigned int>,
-                                   mathfunc::bitwise_mask_and_caller<unsigned int>};
-
-        CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
-        dst.create(src1.size(), src1.type());
-
-        Caller caller = callers[src1.depth()];
-        CV_Assert(caller);
-
-        int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int));
-        caller(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream);
-    }
-
-
-    void bitwise_xor_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
-    {
-        CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
-        dst.create(src1.size(), src1.type());
-
-        cv::gpu::mathfunc::bitwise_xor_caller(dst.rows, dst.cols, dst.elemSize1(), 
-                                              dst.channels(), src1, src2, dst, stream);
-    }
-
-
-    void bitwise_xor_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
-    {
-        using namespace cv::gpu;
-
-        typedef void (*Caller)(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
-        static Caller callers[] = {mathfunc::bitwise_mask_xor_caller<unsigned char>, mathfunc::bitwise_mask_xor_caller<unsigned char>, 
-                                   mathfunc::bitwise_mask_xor_caller<unsigned short>, mathfunc::bitwise_mask_xor_caller<unsigned short>,
-                                   mathfunc::bitwise_mask_xor_caller<unsigned int>, mathfunc::bitwise_mask_xor_caller<unsigned int>,
-                                   mathfunc::bitwise_mask_xor_caller<unsigned int>};
-
-        CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
-        dst.create(src1.size(), src1.type());
-
-        Caller caller = callers[src1.depth()];
-        CV_Assert(caller);
-
-        int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int));
-        caller(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream);
-    }
-}
-
-void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask)
-{
-    if (mask.empty())
-        ::bitwise_not_caller(src, dst, 0);
-    else
-        ::bitwise_not_caller(src, dst, mask, 0);
-}
-
-void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, const Stream& stream)
-{
-    if (mask.empty())
-        ::bitwise_not_caller(src, dst, StreamAccessor::getStream(stream));
-    else
-        ::bitwise_not_caller(src, dst, mask, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask)
-{
-    if (mask.empty())
-        ::bitwise_or_caller(src1, src2, dst, 0);
-    else
-        ::bitwise_or_caller(src1, src2, dst, mask, 0);
-}
-
-void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream)
-{
-    if (mask.empty())
-        ::bitwise_or_caller(src1, src2, dst, StreamAccessor::getStream(stream));
-    else
-        ::bitwise_or_caller(src1, src2, dst, mask, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask)
-{
-    if (mask.empty())
-        ::bitwise_and_caller(src1, src2, dst, 0);
-    else
-        ::bitwise_and_caller(src1, src2, dst, mask, 0);
-}
-
-void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream)
-{
-    if (mask.empty())
-        ::bitwise_and_caller(src1, src2, dst, StreamAccessor::getStream(stream));
-    else
-        ::bitwise_and_caller(src1, src2, dst, mask, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask)
-{
-    if (mask.empty())
-        ::bitwise_xor_caller(src1, src2, dst, 0);
-    else
-        ::bitwise_xor_caller(src1, src2, dst, mask, 0);
-}
-
-void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream)
-{
-    if (mask.empty())
-        ::bitwise_xor_caller(src1, src2, dst, StreamAccessor::getStream(stream));
-    else
-        ::bitwise_xor_caller(src1, src2, dst, mask, StreamAccessor::getStream(stream));
-
-}
-
-cv::gpu::GpuMat cv::gpu::operator ~ (const GpuMat& src)
-{
-    GpuMat dst;
-    bitwise_not(src, dst);
-    return dst;
-}
-
-cv::gpu::GpuMat cv::gpu::operator | (const GpuMat& src1, const GpuMat& src2)
-{
-    GpuMat dst;
-    bitwise_or(src1, src2, dst);
-    return dst;
-}
-
-cv::gpu::GpuMat cv::gpu::operator & (const GpuMat& src1, const GpuMat& src2)
-{
-    GpuMat dst;
-    bitwise_and(src1, src2, dst);
-    return dst;
-}
-
-cv::gpu::GpuMat cv::gpu::operator ^ (const GpuMat& src1, const GpuMat& src2)
-{
-    GpuMat dst;
-    bitwise_xor(src1, src2, dst);
-    return dst;
-}

 //////////////////////////////////////////////////////////////////////////////
 // min/max