/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #if !defined CUDA_DISABLER #include "opencv2/gpu/device/common.hpp" #include "opencv2/gpu/device/functional.hpp" #include "opencv2/gpu/device/vec_math.hpp" #include "opencv2/gpu/device/transform.hpp" #include "opencv2/gpu/device/limits.hpp" #include "opencv2/gpu/device/saturate_cast.hpp" using namespace cv::gpu; using namespace cv::gpu::device; namespace arithm { template struct ArithmFuncTraits { enum { simple_block_dim_x = 32 }; enum { simple_block_dim_y = 8 }; enum { smart_block_dim_x = 32 }; enum { smart_block_dim_y = 8 }; enum { smart_shift = 1 }; }; template <> struct ArithmFuncTraits<1, 1> { enum { simple_block_dim_x = 32 }; enum { simple_block_dim_y = 8 }; enum { smart_block_dim_x = 32 }; enum { smart_block_dim_y = 8 }; enum { smart_shift = 4 }; }; template <> struct ArithmFuncTraits<1, 2> { enum { simple_block_dim_x = 32 }; enum { simple_block_dim_y = 8 }; enum { smart_block_dim_x = 32 }; enum { smart_block_dim_y = 8 }; enum { smart_shift = 4 }; }; template <> struct ArithmFuncTraits<1, 4> { enum { simple_block_dim_x = 32 }; enum { simple_block_dim_y = 8 }; enum { smart_block_dim_x = 32 }; enum { smart_block_dim_y = 8 }; enum { smart_shift = 4 }; }; template <> struct ArithmFuncTraits<2, 1> { enum { simple_block_dim_x = 32 }; enum { simple_block_dim_y = 8 }; enum { smart_block_dim_x = 32 }; enum { smart_block_dim_y = 8 }; enum { smart_shift = 4 }; }; template <> struct ArithmFuncTraits<2, 2> { enum { simple_block_dim_x = 32 }; enum { simple_block_dim_y = 8 }; enum { smart_block_dim_x = 32 }; enum { smart_block_dim_y = 8 }; enum { smart_shift = 4 }; }; template <> struct ArithmFuncTraits<2, 4> { enum { simple_block_dim_x = 32 }; enum { simple_block_dim_y = 8 }; enum { smart_block_dim_x = 32 }; enum { smart_block_dim_y = 8 }; enum { smart_shift = 4 }; }; template <> struct ArithmFuncTraits<4, 1> { enum { simple_block_dim_x = 32 }; enum { simple_block_dim_y = 8 }; enum { smart_block_dim_x = 32 }; enum { smart_block_dim_y = 8 }; enum { smart_shift = 4 }; }; template <> struct ArithmFuncTraits<4, 2> { enum { simple_block_dim_x = 32 }; enum { simple_block_dim_y = 8 }; enum { smart_block_dim_x = 32 }; enum { smart_block_dim_y = 8 }; enum { smart_shift = 4 }; }; template <> struct ArithmFuncTraits<4, 4> { enum { simple_block_dim_x = 32 }; enum { simple_block_dim_y = 8 }; enum { smart_block_dim_x = 32 }; enum { smart_block_dim_y = 8 }; enum { smart_shift = 4 }; }; } ////////////////////////////////////////////////////////////////////////// // addMat namespace arithm { template struct VAdd4; template <> struct VAdd4 : binary_function { __device__ __forceinline__ uint operator ()(uint a, uint b) const { uint res = 0; #if __CUDA_ARCH__ >= 300 asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VAdd4() {} __device__ __forceinline__ VAdd4(const VAdd4& other) {} }; template <> struct VAdd4 : binary_function { __device__ __forceinline__ uint operator ()(int a, int b) const { uint res = 0; #if __CUDA_ARCH__ >= 300 asm("vadd4.u32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vadd.u32.s32.s32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vadd.u32.s32.s32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vadd.u32.s32.s32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vadd.u32.s32.s32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VAdd4() {} __device__ __forceinline__ VAdd4(const VAdd4& other) {} }; template <> struct VAdd4 : binary_function { __device__ __forceinline__ int operator ()(uint a, uint b) const { int res = 0; #if __CUDA_ARCH__ >= 300 asm("vadd4.s32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vadd.s32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vadd.s32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vadd.s32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vadd.s32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VAdd4() {} __device__ __forceinline__ VAdd4(const VAdd4& other) {} }; template <> struct VAdd4 : binary_function { __device__ __forceinline__ int operator ()(int a, int b) const { int res = 0; #if __CUDA_ARCH__ >= 300 asm("vadd4.s32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vadd.s32.s32.s32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vadd.s32.s32.s32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vadd.s32.s32.s32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vadd.s32.s32.s32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VAdd4() {} __device__ __forceinline__ VAdd4(const VAdd4& other) {} }; //////////////////////////////////// template struct VAdd2; template <> struct VAdd2 : binary_function { __device__ __forceinline__ uint operator ()(uint a, uint b) const { uint res = 0; #if __CUDA_ARCH__ >= 300 asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VAdd2() {} __device__ __forceinline__ VAdd2(const VAdd2& other) {} }; template <> struct VAdd2 : binary_function { __device__ __forceinline__ int operator ()(uint a, uint b) const { int res = 0; #if __CUDA_ARCH__ >= 300 asm("vadd2.s32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vadd.s32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vadd.s32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VAdd2() {} __device__ __forceinline__ VAdd2(const VAdd2& other) {} }; template <> struct VAdd2 : binary_function { __device__ __forceinline__ uint operator ()(int a, int b) const { uint res = 0; #if __CUDA_ARCH__ >= 300 asm("vadd2.u32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vadd.u32.s32.s32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vadd.u32.s32.s32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VAdd2() {} __device__ __forceinline__ VAdd2(const VAdd2& other) {} }; template <> struct VAdd2 : binary_function { __device__ __forceinline__ int operator ()(int a, int b) const { int res = 0; #if __CUDA_ARCH__ >= 300 asm("vadd2.s32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vadd.s32.s32.s32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vadd.s32.s32.s32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VAdd2() {} __device__ __forceinline__ VAdd2(const VAdd2& other) {} }; //////////////////////////////////// template struct AddMat : binary_function { __device__ __forceinline__ D operator ()(T a, T b) const { return saturate_cast(a + b); } __device__ __forceinline__ AddMat() {} __device__ __forceinline__ AddMat(const AddMat& other) {} }; } namespace cv { namespace gpu { namespace device { template struct TransformFunctorTraits< arithm::VAdd4 > : arithm::ArithmFuncTraits { }; //////////////////////////////////// template struct TransformFunctorTraits< arithm::VAdd2 > : arithm::ArithmFuncTraits { }; //////////////////////////////////// template struct TransformFunctorTraits< arithm::AddMat > : arithm::ArithmFuncTraits { }; }}} namespace arithm { template void vadd4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream) { transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, VAdd4(), WithOutMask(), stream); } template void vadd4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void vadd4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void vadd4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void vadd4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void vadd2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream) { transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, VAdd2(), WithOutMask(), stream); } template void vadd2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void vadd2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void vadd2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void vadd2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) { if (mask.data) transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, AddMat(), mask, stream); else transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, AddMat(), WithOutMask(), stream); } template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////// // addScalar namespace arithm { template struct AddScalar : unary_function { S val; explicit AddScalar(S val_) : val(val_) {} __device__ __forceinline__ D operator ()(T a) const { return saturate_cast(a + val); } }; } namespace cv { namespace gpu { namespace device { template struct TransformFunctorTraits< arithm::AddScalar > : arithm::ArithmFuncTraits { }; }}} namespace arithm { template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) { AddScalar op(static_cast(val)); if (mask.data) transform((PtrStepSz) src1, (PtrStepSz) dst, op, mask, stream); else transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); } template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////// // subMat namespace arithm { template struct VSub4; template <> struct VSub4 : binary_function { __device__ __forceinline__ uint operator ()(uint a, uint b) const { uint res = 0; #if __CUDA_ARCH__ >= 300 asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VSub4() {} __device__ __forceinline__ VSub4(const VSub4& other) {} }; template <> struct VSub4 : binary_function { __device__ __forceinline__ uint operator ()(int a, int b) const { uint res = 0; #if __CUDA_ARCH__ >= 300 asm("vsub4.u32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vsub.u32.s32.s32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vsub.u32.s32.s32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vsub.u32.s32.s32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vsub.u32.s32.s32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VSub4() {} __device__ __forceinline__ VSub4(const VSub4& other) {} }; template <> struct VSub4 : binary_function { __device__ __forceinline__ int operator ()(uint a, uint b) const { int res = 0; #if __CUDA_ARCH__ >= 300 asm("vsub4.s32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vsub.s32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vsub.s32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vsub.s32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vsub.s32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VSub4() {} __device__ __forceinline__ VSub4(const VSub4& other) {} }; template <> struct VSub4 : binary_function { __device__ __forceinline__ int operator ()(int a, int b) const { int res = 0; #if __CUDA_ARCH__ >= 300 asm("vsub4.s32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vsub.s32.s32.s32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vsub.s32.s32.s32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vsub.s32.s32.s32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vsub.s32.s32.s32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VSub4() {} __device__ __forceinline__ VSub4(const VSub4& other) {} }; //////////////////////////////////// template struct VSub2; template <> struct VSub2 : binary_function { __device__ __forceinline__ uint operator ()(uint a, uint b) const { uint res = 0; #if __CUDA_ARCH__ >= 300 asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VSub2() {} __device__ __forceinline__ VSub2(const VSub2& other) {} }; template <> struct VSub2 : binary_function { __device__ __forceinline__ int operator ()(uint a, uint b) const { int res = 0; #if __CUDA_ARCH__ >= 300 asm("vsub2.s32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vsub.s32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vsub.s32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VSub2() {} __device__ __forceinline__ VSub2(const VSub2& other) {} }; template <> struct VSub2 : binary_function { __device__ __forceinline__ uint operator ()(int a, int b) const { uint res = 0; #if __CUDA_ARCH__ >= 300 asm("vsub2.u32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vsub.u32.s32.s32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vsub.u32.s32.s32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VSub2() {} __device__ __forceinline__ VSub2(const VSub2& other) {} }; template <> struct VSub2 : binary_function { __device__ __forceinline__ int operator ()(int a, int b) const { int res = 0; #if __CUDA_ARCH__ >= 300 asm("vsub2.s32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vsub.s32.s32.s32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vsub.s32.s32.s32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VSub2() {} __device__ __forceinline__ VSub2(const VSub2& other) {} }; //////////////////////////////////// template struct SubMat : binary_function { __device__ __forceinline__ D operator ()(T a, T b) const { return saturate_cast(a - b); } __device__ __forceinline__ SubMat() {} __device__ __forceinline__ SubMat(const SubMat& other) {} }; } namespace cv { namespace gpu { namespace device { template struct TransformFunctorTraits< arithm::VSub4 > : arithm::ArithmFuncTraits { }; //////////////////////////////////// template struct TransformFunctorTraits< arithm::VSub2 > : arithm::ArithmFuncTraits { }; //////////////////////////////////// template struct TransformFunctorTraits< arithm::SubMat > : arithm::ArithmFuncTraits { }; }}} namespace arithm { template void vsub4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream) { transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, VSub4(), WithOutMask(), stream); } template void vsub4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void vsub4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void vsub4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void vsub4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void vsub2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream) { transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, VSub2(), WithOutMask(), stream); } template void vsub2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void vsub2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void vsub2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void vsub2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) { if (mask.data) transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, SubMat(), mask, stream); else transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, SubMat(), WithOutMask(), stream); } template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////// // subScalar namespace arithm { template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) { AddScalar op(-static_cast(val)); if (mask.data) transform((PtrStepSz) src1, (PtrStepSz) dst, op, mask, stream); else transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); } template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////// // mulMat namespace arithm { struct Mul_8uc4_32f : binary_function { __device__ __forceinline__ uint operator ()(uint a, float b) const { uint res = 0; res |= (saturate_cast((0xffu & (a )) * b) ); res |= (saturate_cast((0xffu & (a >> 8)) * b) << 8); res |= (saturate_cast((0xffu & (a >> 16)) * b) << 16); res |= (saturate_cast((0xffu & (a >> 24)) * b) << 24); return res; } __device__ __forceinline__ Mul_8uc4_32f() {} __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {} }; struct Mul_16sc4_32f : binary_function { __device__ __forceinline__ short4 operator ()(short4 a, float b) const { return make_short4(saturate_cast(a.x * b), saturate_cast(a.y * b), saturate_cast(a.z * b), saturate_cast(a.w * b)); } __device__ __forceinline__ Mul_16sc4_32f() {} __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {} }; template struct Mul : binary_function { __device__ __forceinline__ D operator ()(T a, T b) const { return saturate_cast(a * b); } __device__ __forceinline__ Mul() {} __device__ __forceinline__ Mul(const Mul& other) {} }; template struct MulScale : binary_function { S scale; explicit MulScale(S scale_) : scale(scale_) {} __device__ __forceinline__ D operator ()(T a, T b) const { return saturate_cast(scale * a * b); } }; } namespace cv { namespace gpu { namespace device { template <> struct TransformFunctorTraits : arithm::ArithmFuncTraits { }; template struct TransformFunctorTraits< arithm::Mul > : arithm::ArithmFuncTraits { }; template struct TransformFunctorTraits< arithm::MulScale > : arithm::ArithmFuncTraits { }; }}} namespace arithm { void mulMat_8uc4_32f(PtrStepSz src1, PtrStepSzf src2, PtrStepSz dst, cudaStream_t stream) { transform(src1, src2, dst, Mul_8uc4_32f(), WithOutMask(), stream); } void mulMat_16sc4_32f(PtrStepSz src1, PtrStepSzf src2, PtrStepSz dst, cudaStream_t stream) { transform(src1, src2, dst, Mul_16sc4_32f(), WithOutMask(), stream); } template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream) { if (scale == 1) { Mul op; transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, op, WithOutMask(), stream); } else { MulScale op(static_cast(scale)); transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, op, WithOutMask(), stream); } } template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////// // mulScalar namespace arithm { template struct MulScalar : unary_function { S val; explicit MulScalar(S val_) : val(val_) {} __device__ __forceinline__ D operator ()(T a) const { return saturate_cast(a * val); } }; } namespace cv { namespace gpu { namespace device { template struct TransformFunctorTraits< arithm::MulScalar > : arithm::ArithmFuncTraits { }; }}} namespace arithm { template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) { MulScalar op(static_cast(val)); transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); } template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////// // divMat namespace arithm { struct Div_8uc4_32f : binary_function { __device__ __forceinline__ uint operator ()(uint a, float b) const { uint res = 0; if (b != 0) { b = 1.0f / b; res |= (saturate_cast((0xffu & (a )) * b) ); res |= (saturate_cast((0xffu & (a >> 8)) * b) << 8); res |= (saturate_cast((0xffu & (a >> 16)) * b) << 16); res |= (saturate_cast((0xffu & (a >> 24)) * b) << 24); } return res; } }; struct Div_16sc4_32f : binary_function { __device__ __forceinline__ short4 operator ()(short4 a, float b) const { return b != 0 ? make_short4(saturate_cast(a.x / b), saturate_cast(a.y / b), saturate_cast(a.z / b), saturate_cast(a.w / b)) : make_short4(0,0,0,0); } }; template struct Div : binary_function { __device__ __forceinline__ D operator ()(T a, T b) const { return b != 0 ? saturate_cast(a / b) : 0; } __device__ __forceinline__ Div() {} __device__ __forceinline__ Div(const Div& other) {} }; template struct Div : binary_function { __device__ __forceinline__ float operator ()(T a, T b) const { return b != 0 ? static_cast(a) / b : 0; } __device__ __forceinline__ Div() {} __device__ __forceinline__ Div(const Div& other) {} }; template struct Div : binary_function { __device__ __forceinline__ double operator ()(T a, T b) const { return b != 0 ? static_cast(a) / b : 0; } __device__ __forceinline__ Div() {} __device__ __forceinline__ Div(const Div& other) {} }; template struct DivScale : binary_function { S scale; explicit DivScale(S scale_) : scale(scale_) {} __device__ __forceinline__ D operator ()(T a, T b) const { return b != 0 ? saturate_cast(scale * a / b) : 0; } }; } namespace cv { namespace gpu { namespace device { template <> struct TransformFunctorTraits : arithm::ArithmFuncTraits { }; template struct TransformFunctorTraits< arithm::Div > : arithm::ArithmFuncTraits { }; template struct TransformFunctorTraits< arithm::DivScale > : arithm::ArithmFuncTraits { }; }}} namespace arithm { void divMat_8uc4_32f(PtrStepSz src1, PtrStepSzf src2, PtrStepSz dst, cudaStream_t stream) { transform(src1, src2, dst, Div_8uc4_32f(), WithOutMask(), stream); } void divMat_16sc4_32f(PtrStepSz src1, PtrStepSzf src2, PtrStepSz dst, cudaStream_t stream) { transform(src1, src2, dst, Div_16sc4_32f(), WithOutMask(), stream); } template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream) { if (scale == 1) { Div op; transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, op, WithOutMask(), stream); } else { DivScale op(static_cast(scale)); transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, op, WithOutMask(), stream); } } template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////// // divScalar namespace arithm { template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) { MulScalar op(static_cast(1.0 / val)); transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); } template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////// // divInv namespace arithm { template struct DivInv : unary_function { S val; explicit DivInv(S val_) : val(val_) {} __device__ __forceinline__ D operator ()(T a) const { return a != 0 ? saturate_cast(val / a) : 0; } }; } namespace cv { namespace gpu { namespace device { template struct TransformFunctorTraits< arithm::DivInv > : arithm::ArithmFuncTraits { }; }}} namespace arithm { template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) { DivInv op(static_cast(val)); transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); } template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////// // absDiffMat namespace arithm { template struct VAbsDiff4; template <> struct VAbsDiff4 : binary_function { __device__ __forceinline__ uint operator ()(uint a, uint b) const { uint res = 0; #if __CUDA_ARCH__ >= 300 asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VAbsDiff4() {} __device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {} }; template <> struct VAbsDiff4 : binary_function { __device__ __forceinline__ int operator ()(int a, int b) const { int res = 0; #if __CUDA_ARCH__ >= 300 asm("vabsdiff4.s32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vabsdiff.s32.s32.s32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vabsdiff.s32.s32.s32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vabsdiff.s32.s32.s32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vabsdiff.s32.s32.s32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VAbsDiff4() {} __device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {} }; //////////////////////////////////// template struct VAbsDiff2; template <> struct VAbsDiff2 : binary_function { __device__ __forceinline__ uint operator ()(uint a, uint b) const { uint res = 0; #if __CUDA_ARCH__ >= 300 asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VAbsDiff2() {} __device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {} }; template <> struct VAbsDiff2 : binary_function { __device__ __forceinline__ int operator ()(int a, int b) const { int res = 0; #if __CUDA_ARCH__ >= 300 asm("vabsdiff2.s32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #elif __CUDA_ARCH__ >= 200 asm("vabsdiff.s32.s32.s32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); asm("vabsdiff.s32.s32.s32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res)); #endif return res; } __device__ __forceinline__ VAbsDiff2() {} __device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {} }; //////////////////////////////////// __device__ __forceinline__ int _abs(int a) { return ::abs(a); } __device__ __forceinline__ float _abs(float a) { return ::fabsf(a); } __device__ __forceinline__ double _abs(double a) { return ::fabs(a); } template struct AbsDiffMat : binary_function { __device__ __forceinline__ T operator ()(T a, T b) const { return saturate_cast(_abs(a - b)); } __device__ __forceinline__ AbsDiffMat() {} __device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {} }; } namespace cv { namespace gpu { namespace device { template struct TransformFunctorTraits< arithm::VAbsDiff4 > : arithm::ArithmFuncTraits { }; //////////////////////////////////// template struct TransformFunctorTraits< arithm::VAbsDiff2 > : arithm::ArithmFuncTraits { }; //////////////////////////////////// template struct TransformFunctorTraits< arithm::AbsDiffMat > : arithm::ArithmFuncTraits { }; }}} namespace arithm { template void vabsDiff4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream) { transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, VAbsDiff4(), WithOutMask(), stream); } template void vabsDiff4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void vabsDiff4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void vabsDiff2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream) { transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, VAbsDiff2(), WithOutMask(), stream); } template void vabsDiff2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void vabsDiff2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream) { transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, AbsDiffMat(), WithOutMask(), stream); } template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////// // absDiffScalar namespace arithm { template struct AbsDiffScalar : unary_function { S val; explicit AbsDiffScalar(S val_) : val(val_) {} __device__ __forceinline__ T operator ()(T a) const { abs_func f; return saturate_cast(f(a - val)); } }; } namespace cv { namespace gpu { namespace device { template struct TransformFunctorTraits< arithm::AbsDiffScalar > : arithm::ArithmFuncTraits { }; }}} namespace arithm { template void absDiffScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) { AbsDiffScalar op(static_cast(val)); transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); } template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////// // absMat namespace cv { namespace gpu { namespace device { template struct TransformFunctorTraits< abs_func > : arithm::ArithmFuncTraits { }; }}} namespace arithm { template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) { transform((PtrStepSz) src, (PtrStepSz) dst, abs_func(), WithOutMask(), stream); } template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////// // sqrMat namespace arithm { template struct Sqr : unary_function { __device__ __forceinline__ T operator ()(T x) const { return saturate_cast(x * x); } __device__ __forceinline__ Sqr() {} __device__ __forceinline__ Sqr(const Sqr& other) {} }; } namespace cv { namespace gpu { namespace device { template struct TransformFunctorTraits< arithm::Sqr > : arithm::ArithmFuncTraits { }; }}} namespace arithm { template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) { transform((PtrStepSz) src, (PtrStepSz) dst, Sqr(), WithOutMask(), stream); } template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////// // sqrtMat namespace cv { namespace gpu { namespace device { template struct TransformFunctorTraits< sqrt_func > : arithm::ArithmFuncTraits { }; }}} namespace arithm { template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) { transform((PtrStepSz) src, (PtrStepSz) dst, sqrt_func(), WithOutMask(), stream); } template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////// // logMat namespace cv { namespace gpu { namespace device { template struct TransformFunctorTraits< log_func > : arithm::ArithmFuncTraits { }; }}} namespace arithm { template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) { transform((PtrStepSz) src, (PtrStepSz) dst, log_func(), WithOutMask(), stream); } template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////// // expMat namespace arithm { template struct Exp : unary_function { __device__ __forceinline__ T operator ()(T x) const { exp_func f; return saturate_cast(f(x)); } __device__ __forceinline__ Exp() {} __device__ __forceinline__ Exp(const Exp& other) {} }; } namespace cv { namespace gpu { namespace device { template struct TransformFunctorTraits< arithm::Exp > : arithm::ArithmFuncTraits { }; }}} namespace arithm { template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) { transform((PtrStepSz) src, (PtrStepSz) dst, Exp(), WithOutMask(), stream); } template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////////////////// // cmpMat namespace arithm { template struct Cmp : binary_function { __device__ __forceinline__ uchar operator()(T a, T b) const { Op op; return -op(a, b); } }; } namespace cv { namespace gpu { namespace device { template struct TransformFunctorTraits< arithm::Cmp > : arithm::ArithmFuncTraits { }; }}} namespace arithm { template